1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2009-2016 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | # ====================================================================
|
---|
11 | # Written by Andy Polyakov <[email protected]> for the OpenSSL
|
---|
12 | # project. The module is, however, dual licensed under OpenSSL and
|
---|
13 | # CRYPTOGAMS licenses depending on where you obtain it. For further
|
---|
14 | # details see http://www.openssl.org/~appro/cryptogams/.
|
---|
15 | # ====================================================================
|
---|
16 |
|
---|
17 | # SHA1 block procedure for PA-RISC.
|
---|
18 |
|
---|
19 | # June 2009.
|
---|
20 | #
|
---|
21 | # On PA-7100LC performance is >30% better than gcc 3.2 generated code
|
---|
22 | # for aligned input and >50% better for unaligned. Compared to vendor
|
---|
23 | # compiler on PA-8600 it's almost 60% faster in 64-bit build and just
|
---|
24 | # few percent faster in 32-bit one (this for aligned input, data for
|
---|
25 | # unaligned input is not available).
|
---|
26 | #
|
---|
27 | # Special thanks to polarhome.com for providing HP-UX account.
|
---|
28 |
|
---|
29 | $flavour = shift;
|
---|
30 | $output = shift;
|
---|
31 | open STDOUT,">$output";
|
---|
32 |
|
---|
33 | if ($flavour =~ /64/) {
|
---|
34 | $LEVEL ="2.0W";
|
---|
35 | $SIZE_T =8;
|
---|
36 | $FRAME_MARKER =80;
|
---|
37 | $SAVED_RP =16;
|
---|
38 | $PUSH ="std";
|
---|
39 | $PUSHMA ="std,ma";
|
---|
40 | $POP ="ldd";
|
---|
41 | $POPMB ="ldd,mb";
|
---|
42 | } else {
|
---|
43 | $LEVEL ="1.0";
|
---|
44 | $SIZE_T =4;
|
---|
45 | $FRAME_MARKER =48;
|
---|
46 | $SAVED_RP =20;
|
---|
47 | $PUSH ="stw";
|
---|
48 | $PUSHMA ="stwm";
|
---|
49 | $POP ="ldw";
|
---|
50 | $POPMB ="ldwm";
|
---|
51 | }
|
---|
52 |
|
---|
53 | $FRAME=14*$SIZE_T+$FRAME_MARKER;# 14 saved regs + frame marker
|
---|
54 | # [+ argument transfer]
|
---|
55 | $ctx="%r26"; # arg0
|
---|
56 | $inp="%r25"; # arg1
|
---|
57 | $num="%r24"; # arg2
|
---|
58 |
|
---|
59 | $t0="%r28";
|
---|
60 | $t1="%r29";
|
---|
61 | $K="%r31";
|
---|
62 |
|
---|
63 | @X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
|
---|
64 | "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$t0);
|
---|
65 |
|
---|
66 | @V=($A,$B,$C,$D,$E)=("%r19","%r20","%r21","%r22","%r23");
|
---|
67 |
|
---|
68 | sub BODY_00_19 {
|
---|
69 | my ($i,$a,$b,$c,$d,$e)=@_;
|
---|
70 | my $j=$i+1;
|
---|
71 | $code.=<<___ if ($i<15);
|
---|
72 | addl $K,$e,$e ; $i
|
---|
73 | shd $a,$a,27,$t1
|
---|
74 | addl @X[$i],$e,$e
|
---|
75 | and $c,$b,$t0
|
---|
76 | addl $t1,$e,$e
|
---|
77 | andcm $d,$b,$t1
|
---|
78 | shd $b,$b,2,$b
|
---|
79 | or $t1,$t0,$t0
|
---|
80 | addl $t0,$e,$e
|
---|
81 | ___
|
---|
82 | $code.=<<___ if ($i>=15); # with forward Xupdate
|
---|
83 | addl $K,$e,$e ; $i
|
---|
84 | shd $a,$a,27,$t1
|
---|
85 | xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
|
---|
86 | addl @X[$i%16],$e,$e
|
---|
87 | and $c,$b,$t0
|
---|
88 | xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
---|
89 | addl $t1,$e,$e
|
---|
90 | andcm $d,$b,$t1
|
---|
91 | shd $b,$b,2,$b
|
---|
92 | or $t1,$t0,$t0
|
---|
93 | xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
---|
94 | add $t0,$e,$e
|
---|
95 | shd @X[$j%16],@X[$j%16],31,@X[$j%16]
|
---|
96 | ___
|
---|
97 | }
|
---|
98 |
|
---|
99 | sub BODY_20_39 {
|
---|
100 | my ($i,$a,$b,$c,$d,$e)=@_;
|
---|
101 | my $j=$i+1;
|
---|
102 | $code.=<<___ if ($i<79);
|
---|
103 | xor @X[($j+2)%16],@X[$j%16],@X[$j%16] ; $i
|
---|
104 | addl $K,$e,$e
|
---|
105 | shd $a,$a,27,$t1
|
---|
106 | xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
---|
107 | addl @X[$i%16],$e,$e
|
---|
108 | xor $b,$c,$t0
|
---|
109 | xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
---|
110 | addl $t1,$e,$e
|
---|
111 | shd $b,$b,2,$b
|
---|
112 | xor $d,$t0,$t0
|
---|
113 | shd @X[$j%16],@X[$j%16],31,@X[$j%16]
|
---|
114 | addl $t0,$e,$e
|
---|
115 | ___
|
---|
116 | $code.=<<___ if ($i==79); # with context load
|
---|
117 | ldw 0($ctx),@X[0] ; $i
|
---|
118 | addl $K,$e,$e
|
---|
119 | shd $a,$a,27,$t1
|
---|
120 | ldw 4($ctx),@X[1]
|
---|
121 | addl @X[$i%16],$e,$e
|
---|
122 | xor $b,$c,$t0
|
---|
123 | ldw 8($ctx),@X[2]
|
---|
124 | addl $t1,$e,$e
|
---|
125 | shd $b,$b,2,$b
|
---|
126 | xor $d,$t0,$t0
|
---|
127 | ldw 12($ctx),@X[3]
|
---|
128 | addl $t0,$e,$e
|
---|
129 | ldw 16($ctx),@X[4]
|
---|
130 | ___
|
---|
131 | }
|
---|
132 |
|
---|
133 | sub BODY_40_59 {
|
---|
134 | my ($i,$a,$b,$c,$d,$e)=@_;
|
---|
135 | my $j=$i+1;
|
---|
136 | $code.=<<___;
|
---|
137 | shd $a,$a,27,$t1 ; $i
|
---|
138 | addl $K,$e,$e
|
---|
139 | xor @X[($j+2)%16],@X[$j%16],@X[$j%16]
|
---|
140 | xor $d,$c,$t0
|
---|
141 | addl @X[$i%16],$e,$e
|
---|
142 | xor @X[($j+8)%16],@X[$j%16],@X[$j%16]
|
---|
143 | and $b,$t0,$t0
|
---|
144 | addl $t1,$e,$e
|
---|
145 | shd $b,$b,2,$b
|
---|
146 | xor @X[($j+13)%16],@X[$j%16],@X[$j%16]
|
---|
147 | addl $t0,$e,$e
|
---|
148 | and $d,$c,$t1
|
---|
149 | shd @X[$j%16],@X[$j%16],31,@X[$j%16]
|
---|
150 | addl $t1,$e,$e
|
---|
151 | ___
|
---|
152 | }
|
---|
153 |
|
---|
154 | $code=<<___;
|
---|
155 | .LEVEL $LEVEL
|
---|
156 | .SPACE \$TEXT\$
|
---|
157 | .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
|
---|
158 |
|
---|
159 | .EXPORT sha1_block_data_order,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
|
---|
160 | sha1_block_data_order
|
---|
161 | .PROC
|
---|
162 | .CALLINFO FRAME=`$FRAME-14*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=16
|
---|
163 | .ENTRY
|
---|
164 | $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
|
---|
165 | $PUSHMA %r3,$FRAME(%sp)
|
---|
166 | $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
|
---|
167 | $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
|
---|
168 | $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
|
---|
169 | $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
|
---|
170 | $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
|
---|
171 | $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
|
---|
172 | $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
|
---|
173 | $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
|
---|
174 | $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
|
---|
175 | $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
|
---|
176 | $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
|
---|
177 | $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
|
---|
178 | $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
|
---|
179 |
|
---|
180 | ldw 0($ctx),$A
|
---|
181 | ldw 4($ctx),$B
|
---|
182 | ldw 8($ctx),$C
|
---|
183 | ldw 12($ctx),$D
|
---|
184 | ldw 16($ctx),$E
|
---|
185 |
|
---|
186 | extru $inp,31,2,$t0 ; t0=inp&3;
|
---|
187 | sh3addl $t0,%r0,$t0 ; t0*=8;
|
---|
188 | subi 32,$t0,$t0 ; t0=32-t0;
|
---|
189 | mtctl $t0,%cr11 ; %sar=t0;
|
---|
190 |
|
---|
191 | L\$oop
|
---|
192 | ldi 3,$t0
|
---|
193 | andcm $inp,$t0,$t0 ; 64-bit neutral
|
---|
194 | ___
|
---|
195 | for ($i=0;$i<15;$i++) { # load input block
|
---|
196 | $code.="\tldw `4*$i`($t0),@X[$i]\n"; }
|
---|
197 | $code.=<<___;
|
---|
198 | cmpb,*= $inp,$t0,L\$aligned
|
---|
199 | ldw 60($t0),@X[15]
|
---|
200 | ldw 64($t0),@X[16]
|
---|
201 | ___
|
---|
202 | for ($i=0;$i<16;$i++) { # align input
|
---|
203 | $code.="\tvshd @X[$i],@X[$i+1],@X[$i]\n"; }
|
---|
204 | $code.=<<___;
|
---|
205 | L\$aligned
|
---|
206 | ldil L'0x5a827000,$K ; K_00_19
|
---|
207 | ldo 0x999($K),$K
|
---|
208 | ___
|
---|
209 | for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
|
---|
210 | $code.=<<___;
|
---|
211 | ldil L'0x6ed9e000,$K ; K_20_39
|
---|
212 | ldo 0xba1($K),$K
|
---|
213 | ___
|
---|
214 |
|
---|
215 | for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
---|
216 | $code.=<<___;
|
---|
217 | ldil L'0x8f1bb000,$K ; K_40_59
|
---|
218 | ldo 0xcdc($K),$K
|
---|
219 | ___
|
---|
220 |
|
---|
221 | for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
|
---|
222 | $code.=<<___;
|
---|
223 | ldil L'0xca62c000,$K ; K_60_79
|
---|
224 | ldo 0x1d6($K),$K
|
---|
225 | ___
|
---|
226 | for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
|
---|
227 |
|
---|
228 | $code.=<<___;
|
---|
229 | addl @X[0],$A,$A
|
---|
230 | addl @X[1],$B,$B
|
---|
231 | addl @X[2],$C,$C
|
---|
232 | addl @X[3],$D,$D
|
---|
233 | addl @X[4],$E,$E
|
---|
234 | stw $A,0($ctx)
|
---|
235 | stw $B,4($ctx)
|
---|
236 | stw $C,8($ctx)
|
---|
237 | stw $D,12($ctx)
|
---|
238 | stw $E,16($ctx)
|
---|
239 | addib,*<> -1,$num,L\$oop
|
---|
240 | ldo 64($inp),$inp
|
---|
241 |
|
---|
242 | $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
|
---|
243 | $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
|
---|
244 | $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
|
---|
245 | $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
|
---|
246 | $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
|
---|
247 | $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
|
---|
248 | $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
|
---|
249 | $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
|
---|
250 | $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
|
---|
251 | $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
|
---|
252 | $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
|
---|
253 | $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
|
---|
254 | $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
|
---|
255 | $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
|
---|
256 | bv (%r2)
|
---|
257 | .EXIT
|
---|
258 | $POPMB -$FRAME(%sp),%r3
|
---|
259 | .PROCEND
|
---|
260 | .STRINGZ "SHA1 block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
|
---|
261 | ___
|
---|
262 |
|
---|
263 | $code =~ s/\`([^\`]*)\`/eval $1/gem;
|
---|
264 | $code =~ s/,\*/,/gm if ($SIZE_T==4);
|
---|
265 | $code =~ s/\bbv\b/bve/gm if ($SIZE_T==8);
|
---|
266 | print $code;
|
---|
267 | close STDOUT;
|
---|