VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.3/crypto/sha/asm/sha512-parisc.pl@ 102427

Last change on this file since 102427 was 101211, checked in by vboxsync, 17 months ago

openssl-3.1.3: Applied and adjusted our OpenSSL changes to 3.1.2. bugref:10527

  • Property svn:executable set to *
File size: 21.6 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2009-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# SHA256/512 block procedure for PA-RISC.
18
19# June 2009.
20#
21# SHA256 performance is >75% better than gcc 3.2 generated code on
22# PA-7100LC. Compared to code generated by vendor compiler this
23# implementation is almost 70% faster in 64-bit build, but delivers
24# virtually same performance in 32-bit build on PA-8600.
25#
26# SHA512 performance is >2.9x better than gcc 3.2 generated code on
27# PA-7100LC, PA-RISC 1.1 processor. Then implementation detects if the
28# code is executed on PA-RISC 2.0 processor and switches to 64-bit
29# code path delivering adequate performance even in "blended" 32-bit
30# build. Though 64-bit code is not any faster than code generated by
31# vendor compiler on PA-8600...
32#
33# Special thanks to polarhome.com for providing HP-UX account.
34
35# $output is the last argument if it looks like a file (it has an extension)
36# $flavour is the first argument if it doesn't look like a file
37$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
38$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
39
40$output and open STDOUT,">$output";
41
42if ($flavour =~ /64/) {
43 $LEVEL ="2.0W";
44 $SIZE_T =8;
45 $FRAME_MARKER =80;
46 $SAVED_RP =16;
47 $PUSH ="std";
48 $PUSHMA ="std,ma";
49 $POP ="ldd";
50 $POPMB ="ldd,mb";
51} else {
52 $LEVEL ="1.0";
53 $SIZE_T =4;
54 $FRAME_MARKER =48;
55 $SAVED_RP =20;
56 $PUSH ="stw";
57 $PUSHMA ="stwm";
58 $POP ="ldw";
59 $POPMB ="ldwm";
60}
61
62if ($output =~ /512/) {
63 $func="sha512_block_data_order";
64 $SZ=8;
65 @Sigma0=(28,34,39);
66 @Sigma1=(14,18,41);
67 @sigma0=(1, 8, 7);
68 @sigma1=(19,61, 6);
69 $rounds=80;
70 $LAST10BITS=0x017;
71 $LD="ldd";
72 $LDM="ldd,ma";
73 $ST="std";
74} else {
75 $func="sha256_block_data_order";
76 $SZ=4;
77 @Sigma0=( 2,13,22);
78 @Sigma1=( 6,11,25);
79 @sigma0=( 7,18, 3);
80 @sigma1=(17,19,10);
81 $rounds=64;
82 $LAST10BITS=0x0f2;
83 $LD="ldw";
84 $LDM="ldwm";
85 $ST="stw";
86}
87
88$FRAME=16*$SIZE_T+$FRAME_MARKER;# 16 saved regs + frame marker
89 # [+ argument transfer]
90$XOFF=16*$SZ+32; # local variables
91$FRAME+=$XOFF;
92$XOFF+=$FRAME_MARKER; # distance between %sp and local variables
93
94$ctx="%r26"; # zapped by $a0
95$inp="%r25"; # zapped by $a1
96$num="%r24"; # zapped by $t0
97
98$a0 ="%r26";
99$a1 ="%r25";
100$t0 ="%r24";
101$t1 ="%r29";
102$Tbl="%r31";
103
104@V=($A,$B,$C,$D,$E,$F,$G,$H)=("%r17","%r18","%r19","%r20","%r21","%r22","%r23","%r28");
105
106@X=("%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
107 "%r9", "%r10","%r11","%r12","%r13","%r14","%r15","%r16",$inp);
108
109sub ROUND_00_15 {
110my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
111$code.=<<___;
112 _ror $e,$Sigma1[0],$a0
113 and $f,$e,$t0
114 _ror $e,$Sigma1[1],$a1
115 addl $t1,$h,$h
116 andcm $g,$e,$t1
117 xor $a1,$a0,$a0
118 _ror $a1,`$Sigma1[2]-$Sigma1[1]`,$a1
119 or $t0,$t1,$t1 ; Ch(e,f,g)
120 addl @X[$i%16],$h,$h
121 xor $a0,$a1,$a1 ; Sigma1(e)
122 addl $t1,$h,$h
123 _ror $a,$Sigma0[0],$a0
124 addl $a1,$h,$h
125
126 _ror $a,$Sigma0[1],$a1
127 and $a,$b,$t0
128 and $a,$c,$t1
129 xor $a1,$a0,$a0
130 _ror $a1,`$Sigma0[2]-$Sigma0[1]`,$a1
131 xor $t1,$t0,$t0
132 and $b,$c,$t1
133 xor $a0,$a1,$a1 ; Sigma0(a)
134 addl $h,$d,$d
135 xor $t1,$t0,$t0 ; Maj(a,b,c)
136 `"$LDM $SZ($Tbl),$t1" if ($i<15)`
137 addl $a1,$h,$h
138 addl $t0,$h,$h
139
140___
141}
142
143sub ROUND_16_xx {
144my ($i,$a,$b,$c,$d,$e,$f,$g,$h)=@_;
145$i-=16;
146$code.=<<___;
147 _ror @X[($i+1)%16],$sigma0[0],$a0
148 _ror @X[($i+1)%16],$sigma0[1],$a1
149 addl @X[($i+9)%16],@X[$i],@X[$i]
150 _ror @X[($i+14)%16],$sigma1[0],$t0
151 _ror @X[($i+14)%16],$sigma1[1],$t1
152 xor $a1,$a0,$a0
153 _shr @X[($i+1)%16],$sigma0[2],$a1
154 xor $t1,$t0,$t0
155 _shr @X[($i+14)%16],$sigma1[2],$t1
156 xor $a1,$a0,$a0 ; sigma0(X[(i+1)&0x0f])
157 xor $t1,$t0,$t0 ; sigma1(X[(i+14)&0x0f])
158 $LDM $SZ($Tbl),$t1
159 addl $a0,@X[$i],@X[$i]
160 addl $t0,@X[$i],@X[$i]
161___
162$code.=<<___ if ($i==15);
163 extru $t1,31,10,$a1
164 comiclr,<> $LAST10BITS,$a1,%r0
165 ldo 1($Tbl),$Tbl ; signal end of $Tbl
166___
167&ROUND_00_15($i+16,$a,$b,$c,$d,$e,$f,$g,$h);
168}
169
170$code=<<___;
171 .LEVEL $LEVEL
172 .SPACE \$TEXT\$
173 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
174
175 .ALIGN 64
176L\$table
177___
178$code.=<<___ if ($SZ==8);
179 .WORD 0x428a2f98,0xd728ae22,0x71374491,0x23ef65cd
180 .WORD 0xb5c0fbcf,0xec4d3b2f,0xe9b5dba5,0x8189dbbc
181 .WORD 0x3956c25b,0xf348b538,0x59f111f1,0xb605d019
182 .WORD 0x923f82a4,0xaf194f9b,0xab1c5ed5,0xda6d8118
183 .WORD 0xd807aa98,0xa3030242,0x12835b01,0x45706fbe
184 .WORD 0x243185be,0x4ee4b28c,0x550c7dc3,0xd5ffb4e2
185 .WORD 0x72be5d74,0xf27b896f,0x80deb1fe,0x3b1696b1
186 .WORD 0x9bdc06a7,0x25c71235,0xc19bf174,0xcf692694
187 .WORD 0xe49b69c1,0x9ef14ad2,0xefbe4786,0x384f25e3
188 .WORD 0x0fc19dc6,0x8b8cd5b5,0x240ca1cc,0x77ac9c65
189 .WORD 0x2de92c6f,0x592b0275,0x4a7484aa,0x6ea6e483
190 .WORD 0x5cb0a9dc,0xbd41fbd4,0x76f988da,0x831153b5
191 .WORD 0x983e5152,0xee66dfab,0xa831c66d,0x2db43210
192 .WORD 0xb00327c8,0x98fb213f,0xbf597fc7,0xbeef0ee4
193 .WORD 0xc6e00bf3,0x3da88fc2,0xd5a79147,0x930aa725
194 .WORD 0x06ca6351,0xe003826f,0x14292967,0x0a0e6e70
195 .WORD 0x27b70a85,0x46d22ffc,0x2e1b2138,0x5c26c926
196 .WORD 0x4d2c6dfc,0x5ac42aed,0x53380d13,0x9d95b3df
197 .WORD 0x650a7354,0x8baf63de,0x766a0abb,0x3c77b2a8
198 .WORD 0x81c2c92e,0x47edaee6,0x92722c85,0x1482353b
199 .WORD 0xa2bfe8a1,0x4cf10364,0xa81a664b,0xbc423001
200 .WORD 0xc24b8b70,0xd0f89791,0xc76c51a3,0x0654be30
201 .WORD 0xd192e819,0xd6ef5218,0xd6990624,0x5565a910
202 .WORD 0xf40e3585,0x5771202a,0x106aa070,0x32bbd1b8
203 .WORD 0x19a4c116,0xb8d2d0c8,0x1e376c08,0x5141ab53
204 .WORD 0x2748774c,0xdf8eeb99,0x34b0bcb5,0xe19b48a8
205 .WORD 0x391c0cb3,0xc5c95a63,0x4ed8aa4a,0xe3418acb
206 .WORD 0x5b9cca4f,0x7763e373,0x682e6ff3,0xd6b2b8a3
207 .WORD 0x748f82ee,0x5defb2fc,0x78a5636f,0x43172f60
208 .WORD 0x84c87814,0xa1f0ab72,0x8cc70208,0x1a6439ec
209 .WORD 0x90befffa,0x23631e28,0xa4506ceb,0xde82bde9
210 .WORD 0xbef9a3f7,0xb2c67915,0xc67178f2,0xe372532b
211 .WORD 0xca273ece,0xea26619c,0xd186b8c7,0x21c0c207
212 .WORD 0xeada7dd6,0xcde0eb1e,0xf57d4f7f,0xee6ed178
213 .WORD 0x06f067aa,0x72176fba,0x0a637dc5,0xa2c898a6
214 .WORD 0x113f9804,0xbef90dae,0x1b710b35,0x131c471b
215 .WORD 0x28db77f5,0x23047d84,0x32caab7b,0x40c72493
216 .WORD 0x3c9ebe0a,0x15c9bebc,0x431d67c4,0x9c100d4c
217 .WORD 0x4cc5d4be,0xcb3e42b6,0x597f299c,0xfc657e2a
218 .WORD 0x5fcb6fab,0x3ad6faec,0x6c44198c,0x4a475817
219___
220$code.=<<___ if ($SZ==4);
221 .WORD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
222 .WORD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
223 .WORD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
224 .WORD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
225 .WORD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
226 .WORD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
227 .WORD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
228 .WORD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
229 .WORD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
230 .WORD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
231 .WORD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
232 .WORD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
233 .WORD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
234 .WORD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
235 .WORD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
236 .WORD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
237___
238$code.=<<___;
239
240 .EXPORT $func,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
241 .ALIGN 64
242$func
243 .PROC
244 .CALLINFO FRAME=`$FRAME-16*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=18
245 .ENTRY
246 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
247 $PUSHMA %r3,$FRAME(%sp)
248 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
249 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
250 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
251 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
252 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
253 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
254 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
255 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
256 $PUSH %r12,`-$FRAME+9*$SIZE_T`(%sp)
257 $PUSH %r13,`-$FRAME+10*$SIZE_T`(%sp)
258 $PUSH %r14,`-$FRAME+11*$SIZE_T`(%sp)
259 $PUSH %r15,`-$FRAME+12*$SIZE_T`(%sp)
260 $PUSH %r16,`-$FRAME+13*$SIZE_T`(%sp)
261 $PUSH %r17,`-$FRAME+14*$SIZE_T`(%sp)
262 $PUSH %r18,`-$FRAME+15*$SIZE_T`(%sp)
263
264 _shl $num,`log(16*$SZ)/log(2)`,$num
265 addl $inp,$num,$num ; $num to point at the end of $inp
266
267 $PUSH $num,`-$FRAME_MARKER-4*$SIZE_T`(%sp) ; save arguments
268 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp)
269 $PUSH $ctx,`-$FRAME_MARKER-2*$SIZE_T`(%sp)
270
271 blr %r0,$Tbl
272 ldi 3,$t1
273L\$pic
274 andcm $Tbl,$t1,$Tbl ; wipe privilege level
275 ldo L\$table-L\$pic($Tbl),$Tbl
276___
277$code.=<<___ if ($SZ==8 && $SIZE_T==4);
278 ldi 31,$t1
279 mtctl $t1,%cr11
280 extrd,u,*= $t1,%sar,1,$t1 ; executes on PA-RISC 1.0
281 b L\$parisc1
282 nop
283___
284$code.=<<___;
285 $LD `0*$SZ`($ctx),$A ; load context
286 $LD `1*$SZ`($ctx),$B
287 $LD `2*$SZ`($ctx),$C
288 $LD `3*$SZ`($ctx),$D
289 $LD `4*$SZ`($ctx),$E
290 $LD `5*$SZ`($ctx),$F
291 $LD `6*$SZ`($ctx),$G
292 $LD `7*$SZ`($ctx),$H
293
294 extru $inp,31,`log($SZ)/log(2)`,$t0
295 sh3addl $t0,%r0,$t0
296 subi `8*$SZ`,$t0,$t0
297 mtctl $t0,%cr11 ; load %sar with align factor
298
299L\$oop
300 ldi `$SZ-1`,$t0
301 $LDM $SZ($Tbl),$t1
302 andcm $inp,$t0,$t0 ; align $inp
303___
304 for ($i=0;$i<15;$i++) { # load input block
305 $code.="\t$LD `$SZ*$i`($t0),@X[$i]\n"; }
306$code.=<<___;
307 cmpb,*= $inp,$t0,L\$aligned
308 $LD `$SZ*15`($t0),@X[15]
309 $LD `$SZ*16`($t0),@X[16]
310___
311 for ($i=0;$i<16;$i++) { # align data
312 $code.="\t_align @X[$i],@X[$i+1],@X[$i]\n"; }
313$code.=<<___;
314L\$aligned
315 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
316___
317
318for($i=0;$i<16;$i++) { &ROUND_00_15($i,@V); unshift(@V,pop(@V)); }
319$code.=<<___;
320L\$rounds
321 nop ; otherwise /usr/ccs/bin/as is confused by below .WORD
322___
323for(;$i<32;$i++) { &ROUND_16_xx($i,@V); unshift(@V,pop(@V)); }
324$code.=<<___;
325 bb,>= $Tbl,31,L\$rounds ; end of $Tbl signalled?
326 nop
327
328 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
329 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
330 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
331 ldo `-$rounds*$SZ-1`($Tbl),$Tbl ; rewind $Tbl
332
333 $LD `0*$SZ`($ctx),@X[0] ; load context
334 $LD `1*$SZ`($ctx),@X[1]
335 $LD `2*$SZ`($ctx),@X[2]
336 $LD `3*$SZ`($ctx),@X[3]
337 $LD `4*$SZ`($ctx),@X[4]
338 $LD `5*$SZ`($ctx),@X[5]
339 addl @X[0],$A,$A
340 $LD `6*$SZ`($ctx),@X[6]
341 addl @X[1],$B,$B
342 $LD `7*$SZ`($ctx),@X[7]
343 ldo `16*$SZ`($inp),$inp ; advance $inp
344
345 $ST $A,`0*$SZ`($ctx) ; save context
346 addl @X[2],$C,$C
347 $ST $B,`1*$SZ`($ctx)
348 addl @X[3],$D,$D
349 $ST $C,`2*$SZ`($ctx)
350 addl @X[4],$E,$E
351 $ST $D,`3*$SZ`($ctx)
352 addl @X[5],$F,$F
353 $ST $E,`4*$SZ`($ctx)
354 addl @X[6],$G,$G
355 $ST $F,`5*$SZ`($ctx)
356 addl @X[7],$H,$H
357 $ST $G,`6*$SZ`($ctx)
358 $ST $H,`7*$SZ`($ctx)
359
360 cmpb,*<>,n $inp,$num,L\$oop
361 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
362___
363if ($SZ==8 && $SIZE_T==4) # SHA512 for 32-bit PA-RISC 1.0
364{{
365$code.=<<___;
366 b L\$done
367 nop
368
369 .ALIGN 64
370L\$parisc1
371___
372
373@V=( $Ahi, $Alo, $Bhi, $Blo, $Chi, $Clo, $Dhi, $Dlo,
374 $Ehi, $Elo, $Fhi, $Flo, $Ghi, $Glo, $Hhi, $Hlo) =
375 ( "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", "%r8",
376 "%r9","%r10","%r11","%r12","%r13","%r14","%r15","%r16");
377$a0 ="%r17";
378$a1 ="%r18";
379$a2 ="%r19";
380$a3 ="%r20";
381$t0 ="%r21";
382$t1 ="%r22";
383$t2 ="%r28";
384$t3 ="%r29";
385$Tbl="%r31";
386
387@X=("%r23","%r24","%r25","%r26"); # zaps $num,$inp,$ctx
388
389sub ROUND_00_15_pa1 {
390my ($i,$ahi,$alo,$bhi,$blo,$chi,$clo,$dhi,$dlo,
391 $ehi,$elo,$fhi,$flo,$ghi,$glo,$hhi,$hlo,$flag)=@_;
392my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
393
394$code.=<<___ if (!$flag);
395 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
396 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
397___
398$code.=<<___;
399 shd $ehi,$elo,$Sigma1[0],$t0
400 add $Xlo,$hlo,$hlo
401 shd $elo,$ehi,$Sigma1[0],$t1
402 addc $Xhi,$hhi,$hhi ; h += X[i]
403 shd $ehi,$elo,$Sigma1[1],$t2
404 ldwm 8($Tbl),$Xhi
405 shd $elo,$ehi,$Sigma1[1],$t3
406 ldw -4($Tbl),$Xlo ; load K[i]
407 xor $t2,$t0,$t0
408 xor $t3,$t1,$t1
409 and $flo,$elo,$a0
410 and $fhi,$ehi,$a1
411 shd $ehi,$elo,$Sigma1[2],$t2
412 andcm $glo,$elo,$a2
413 shd $elo,$ehi,$Sigma1[2],$t3
414 andcm $ghi,$ehi,$a3
415 xor $t2,$t0,$t0
416 xor $t3,$t1,$t1 ; Sigma1(e)
417 add $Xlo,$hlo,$hlo
418 xor $a2,$a0,$a0
419 addc $Xhi,$hhi,$hhi ; h += K[i]
420 xor $a3,$a1,$a1 ; Ch(e,f,g)
421
422 add $t0,$hlo,$hlo
423 shd $ahi,$alo,$Sigma0[0],$t0
424 addc $t1,$hhi,$hhi ; h += Sigma1(e)
425 shd $alo,$ahi,$Sigma0[0],$t1
426 add $a0,$hlo,$hlo
427 shd $ahi,$alo,$Sigma0[1],$t2
428 addc $a1,$hhi,$hhi ; h += Ch(e,f,g)
429 shd $alo,$ahi,$Sigma0[1],$t3
430
431 xor $t2,$t0,$t0
432 xor $t3,$t1,$t1
433 shd $ahi,$alo,$Sigma0[2],$t2
434 and $alo,$blo,$a0
435 shd $alo,$ahi,$Sigma0[2],$t3
436 and $ahi,$bhi,$a1
437 xor $t2,$t0,$t0
438 xor $t3,$t1,$t1 ; Sigma0(a)
439
440 and $alo,$clo,$a2
441 and $ahi,$chi,$a3
442 xor $a2,$a0,$a0
443 add $hlo,$dlo,$dlo
444 xor $a3,$a1,$a1
445 addc $hhi,$dhi,$dhi ; d += h
446 and $blo,$clo,$a2
447 add $t0,$hlo,$hlo
448 and $bhi,$chi,$a3
449 addc $t1,$hhi,$hhi ; h += Sigma0(a)
450 xor $a2,$a0,$a0
451 add $a0,$hlo,$hlo
452 xor $a3,$a1,$a1 ; Maj(a,b,c)
453 addc $a1,$hhi,$hhi ; h += Maj(a,b,c)
454
455___
456$code.=<<___ if ($i==15 && $flag);
457 extru $Xlo,31,10,$Xlo
458 comiclr,= $LAST10BITS,$Xlo,%r0
459 b L\$rounds_pa1
460 nop
461___
462push(@X,shift(@X)); push(@X,shift(@X));
463}
464
465sub ROUND_16_xx_pa1 {
466my ($Xhi,$Xlo,$Xnhi,$Xnlo) = @X;
467my ($i)=shift;
468$i-=16;
469$code.=<<___;
470 ldw `-$XOFF+8*(($i+1)%16)`(%sp),$Xnhi
471 ldw `-$XOFF+8*(($i+1)%16)+4`(%sp),$Xnlo ; load X[i+1]
472 ldw `-$XOFF+8*(($i+9)%16)`(%sp),$a1
473 ldw `-$XOFF+8*(($i+9)%16)+4`(%sp),$a0 ; load X[i+9]
474 ldw `-$XOFF+8*(($i+14)%16)`(%sp),$a3
475 ldw `-$XOFF+8*(($i+14)%16)+4`(%sp),$a2 ; load X[i+14]
476 shd $Xnhi,$Xnlo,$sigma0[0],$t0
477 shd $Xnlo,$Xnhi,$sigma0[0],$t1
478 add $a0,$Xlo,$Xlo
479 shd $Xnhi,$Xnlo,$sigma0[1],$t2
480 addc $a1,$Xhi,$Xhi
481 shd $Xnlo,$Xnhi,$sigma0[1],$t3
482 xor $t2,$t0,$t0
483 shd $Xnhi,$Xnlo,$sigma0[2],$t2
484 xor $t3,$t1,$t1
485 extru $Xnhi,`31-$sigma0[2]`,`32-$sigma0[2]`,$t3
486 xor $t2,$t0,$t0
487 shd $a3,$a2,$sigma1[0],$a0
488 xor $t3,$t1,$t1 ; sigma0(X[i+1)&0x0f])
489 shd $a2,$a3,$sigma1[0],$a1
490 add $t0,$Xlo,$Xlo
491 shd $a3,$a2,$sigma1[1],$t2
492 addc $t1,$Xhi,$Xhi
493 shd $a2,$a3,$sigma1[1],$t3
494 xor $t2,$a0,$a0
495 shd $a3,$a2,$sigma1[2],$t2
496 xor $t3,$a1,$a1
497 extru $a3,`31-$sigma1[2]`,`32-$sigma1[2]`,$t3
498 xor $t2,$a0,$a0
499 xor $t3,$a1,$a1 ; sigma0(X[i+14)&0x0f])
500 add $a0,$Xlo,$Xlo
501 addc $a1,$Xhi,$Xhi
502
503 stw $Xhi,`-$XOFF+8*($i%16)`(%sp)
504 stw $Xlo,`-$XOFF+8*($i%16)+4`(%sp)
505___
506&ROUND_00_15_pa1($i,@_,1);
507}
508$code.=<<___;
509 ldw `0*4`($ctx),$Ahi ; load context
510 ldw `1*4`($ctx),$Alo
511 ldw `2*4`($ctx),$Bhi
512 ldw `3*4`($ctx),$Blo
513 ldw `4*4`($ctx),$Chi
514 ldw `5*4`($ctx),$Clo
515 ldw `6*4`($ctx),$Dhi
516 ldw `7*4`($ctx),$Dlo
517 ldw `8*4`($ctx),$Ehi
518 ldw `9*4`($ctx),$Elo
519 ldw `10*4`($ctx),$Fhi
520 ldw `11*4`($ctx),$Flo
521 ldw `12*4`($ctx),$Ghi
522 ldw `13*4`($ctx),$Glo
523 ldw `14*4`($ctx),$Hhi
524 ldw `15*4`($ctx),$Hlo
525
526 extru $inp,31,2,$t0
527 sh3addl $t0,%r0,$t0
528 subi 32,$t0,$t0
529 mtctl $t0,%cr11 ; load %sar with align factor
530
531L\$oop_pa1
532 extru $inp,31,2,$a3
533 comib,= 0,$a3,L\$aligned_pa1
534 sub $inp,$a3,$inp
535
536 ldw `0*4`($inp),$X[0]
537 ldw `1*4`($inp),$X[1]
538 ldw `2*4`($inp),$t2
539 ldw `3*4`($inp),$t3
540 ldw `4*4`($inp),$a0
541 ldw `5*4`($inp),$a1
542 ldw `6*4`($inp),$a2
543 ldw `7*4`($inp),$a3
544 vshd $X[0],$X[1],$X[0]
545 vshd $X[1],$t2,$X[1]
546 stw $X[0],`-$XOFF+0*4`(%sp)
547 ldw `8*4`($inp),$t0
548 vshd $t2,$t3,$t2
549 stw $X[1],`-$XOFF+1*4`(%sp)
550 ldw `9*4`($inp),$t1
551 vshd $t3,$a0,$t3
552___
553{
554my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
555for ($i=2;$i<=(128/4-8);$i++) {
556$code.=<<___;
557 stw $t[0],`-$XOFF+$i*4`(%sp)
558 ldw `(8+$i)*4`($inp),$t[0]
559 vshd $t[1],$t[2],$t[1]
560___
561push(@t,shift(@t));
562}
563for (;$i<(128/4-1);$i++) {
564$code.=<<___;
565 stw $t[0],`-$XOFF+$i*4`(%sp)
566 vshd $t[1],$t[2],$t[1]
567___
568push(@t,shift(@t));
569}
570$code.=<<___;
571 b L\$collected_pa1
572 stw $t[0],`-$XOFF+$i*4`(%sp)
573
574___
575}
576$code.=<<___;
577L\$aligned_pa1
578 ldw `0*4`($inp),$X[0]
579 ldw `1*4`($inp),$X[1]
580 ldw `2*4`($inp),$t2
581 ldw `3*4`($inp),$t3
582 ldw `4*4`($inp),$a0
583 ldw `5*4`($inp),$a1
584 ldw `6*4`($inp),$a2
585 ldw `7*4`($inp),$a3
586 stw $X[0],`-$XOFF+0*4`(%sp)
587 ldw `8*4`($inp),$t0
588 stw $X[1],`-$XOFF+1*4`(%sp)
589 ldw `9*4`($inp),$t1
590___
591{
592my @t=($t2,$t3,$a0,$a1,$a2,$a3,$t0,$t1);
593for ($i=2;$i<(128/4-8);$i++) {
594$code.=<<___;
595 stw $t[0],`-$XOFF+$i*4`(%sp)
596 ldw `(8+$i)*4`($inp),$t[0]
597___
598push(@t,shift(@t));
599}
600for (;$i<128/4;$i++) {
601$code.=<<___;
602 stw $t[0],`-$XOFF+$i*4`(%sp)
603___
604push(@t,shift(@t));
605}
606$code.="L\$collected_pa1\n";
607}
608
609for($i=0;$i<16;$i++) { &ROUND_00_15_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
610$code.="L\$rounds_pa1\n";
611for(;$i<32;$i++) { &ROUND_16_xx_pa1($i,@V); unshift(@V,pop(@V)); unshift(@V,pop(@V)); }
612
613$code.=<<___;
614 $POP `-$FRAME_MARKER-2*$SIZE_T`(%sp),$ctx ; restore arguments
615 $POP `-$FRAME_MARKER-3*$SIZE_T`(%sp),$inp
616 $POP `-$FRAME_MARKER-4*$SIZE_T`(%sp),$num
617 ldo `-$rounds*$SZ`($Tbl),$Tbl ; rewind $Tbl
618
619 ldw `0*4`($ctx),$t1 ; update context
620 ldw `1*4`($ctx),$t0
621 ldw `2*4`($ctx),$t3
622 ldw `3*4`($ctx),$t2
623 ldw `4*4`($ctx),$a1
624 ldw `5*4`($ctx),$a0
625 ldw `6*4`($ctx),$a3
626 add $t0,$Alo,$Alo
627 ldw `7*4`($ctx),$a2
628 addc $t1,$Ahi,$Ahi
629 ldw `8*4`($ctx),$t1
630 add $t2,$Blo,$Blo
631 ldw `9*4`($ctx),$t0
632 addc $t3,$Bhi,$Bhi
633 ldw `10*4`($ctx),$t3
634 add $a0,$Clo,$Clo
635 ldw `11*4`($ctx),$t2
636 addc $a1,$Chi,$Chi
637 ldw `12*4`($ctx),$a1
638 add $a2,$Dlo,$Dlo
639 ldw `13*4`($ctx),$a0
640 addc $a3,$Dhi,$Dhi
641 ldw `14*4`($ctx),$a3
642 add $t0,$Elo,$Elo
643 ldw `15*4`($ctx),$a2
644 addc $t1,$Ehi,$Ehi
645 stw $Ahi,`0*4`($ctx)
646 add $t2,$Flo,$Flo
647 stw $Alo,`1*4`($ctx)
648 addc $t3,$Fhi,$Fhi
649 stw $Bhi,`2*4`($ctx)
650 add $a0,$Glo,$Glo
651 stw $Blo,`3*4`($ctx)
652 addc $a1,$Ghi,$Ghi
653 stw $Chi,`4*4`($ctx)
654 add $a2,$Hlo,$Hlo
655 stw $Clo,`5*4`($ctx)
656 addc $a3,$Hhi,$Hhi
657 stw $Dhi,`6*4`($ctx)
658 ldo `16*$SZ`($inp),$inp ; advance $inp
659 stw $Dlo,`7*4`($ctx)
660 stw $Ehi,`8*4`($ctx)
661 stw $Elo,`9*4`($ctx)
662 stw $Fhi,`10*4`($ctx)
663 stw $Flo,`11*4`($ctx)
664 stw $Ghi,`12*4`($ctx)
665 stw $Glo,`13*4`($ctx)
666 stw $Hhi,`14*4`($ctx)
667 comb,= $inp,$num,L\$done
668 stw $Hlo,`15*4`($ctx)
669 b L\$oop_pa1
670 $PUSH $inp,`-$FRAME_MARKER-3*$SIZE_T`(%sp) ; save $inp
671L\$done
672___
673}}
674$code.=<<___;
675 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
676 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
677 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
678 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
679 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
680 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
681 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
682 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
683 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
684 $POP `-$FRAME+9*$SIZE_T`(%sp),%r12
685 $POP `-$FRAME+10*$SIZE_T`(%sp),%r13
686 $POP `-$FRAME+11*$SIZE_T`(%sp),%r14
687 $POP `-$FRAME+12*$SIZE_T`(%sp),%r15
688 $POP `-$FRAME+13*$SIZE_T`(%sp),%r16
689 $POP `-$FRAME+14*$SIZE_T`(%sp),%r17
690 $POP `-$FRAME+15*$SIZE_T`(%sp),%r18
691 bv (%r2)
692 .EXIT
693 $POPMB -$FRAME(%sp),%r3
694 .PROCEND
695 .STRINGZ "SHA`64*$SZ` block transform for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
696___
697
698# Explicitly encode PA-RISC 2.0 instructions used in this module, so
699# that it can be compiled with .LEVEL 1.0. It should be noted that I
700# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
701# directive...
702
703my $ldd = sub {
704 my ($mod,$args) = @_;
705 my $orig = "ldd$mod\t$args";
706
707 if ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 3 suffices
708 { my $opcode=(0x14<<26)|($2<<21)|($3<<16)|(($1&0x1FF8)<<1)|(($1>>13)&1);
709 $opcode|=(1<<3) if ($mod =~ /^,m/);
710 $opcode|=(1<<2) if ($mod =~ /^,mb/);
711 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
712 }
713 else { "\t".$orig; }
714};
715
716my $std = sub {
717 my ($mod,$args) = @_;
718 my $orig = "std$mod\t$args";
719
720 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
721 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
722 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
723 }
724 else { "\t".$orig; }
725};
726
727my $extrd = sub {
728 my ($mod,$args) = @_;
729 my $orig = "extrd$mod\t$args";
730
731 # I only have ",u" completer, it's implicitly encoded...
732 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
733 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
734 my $len=32-$3;
735 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
736 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
737 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
738 }
739 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
740 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
741 my $len=32-$2;
742 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
743 $opcode |= (1<<13) if ($mod =~ /,\**=/);
744 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
745 }
746 else { "\t".$orig; }
747};
748
749my $shrpd = sub {
750 my ($mod,$args) = @_;
751 my $orig = "shrpd$mod\t$args";
752
753 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
754 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
755 my $cpos=63-$3;
756 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
757 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
758 }
759 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
760 { sprintf "\t.WORD\t0x%08x\t; %s",
761 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
762 }
763 else { "\t".$orig; }
764};
765
766sub assemble {
767 my ($mnemonic,$mod,$args)=@_;
768 my $opcode = eval("\$$mnemonic");
769
770 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
771}
772
773if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
774 =~ /GNU assembler/) {
775 $gnuas = 1;
776}
777
778foreach (split("\n",$code)) {
779 s/\`([^\`]*)\`/eval $1/ge;
780
781 s/shd\s+(%r[0-9]+),(%r[0-9]+),([0-9]+)/
782 $3>31 ? sprintf("shd\t%$2,%$1,%d",$3-32) # rotation for >=32
783 : sprintf("shd\t%$1,%$2,%d",$3)/e or
784 # translate made up instructions: _ror, _shr, _align, _shl
785 s/_ror(\s+)(%r[0-9]+),/
786 ($SZ==4 ? "shd" : "shrpd")."$1$2,$2,"/e or
787
788 s/_shr(\s+%r[0-9]+),([0-9]+),/
789 $SZ==4 ? sprintf("extru%s,%d,%d,",$1,31-$2,32-$2)
790 : sprintf("extrd,u%s,%d,%d,",$1,63-$2,64-$2)/e or
791
792 s/_align(\s+%r[0-9]+,%r[0-9]+),/
793 ($SZ==4 ? "vshd$1," : "shrpd$1,%sar,")/e or
794
795 s/_shl(\s+%r[0-9]+),([0-9]+),/
796 $SIZE_T==4 ? sprintf("zdep%s,%d,%d,",$1,31-$2,32-$2)
797 : sprintf("depd,z%s,%d,%d,",$1,63-$2,64-$2)/e;
798
799 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($SIZE_T==4);
800
801 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
802 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
803 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
804 s/cmpb,\*/comb,/ if ($SIZE_T==4);
805 s/\bbv\b/bve/ if ($SIZE_T==8);
806
807 print $_,"\n";
808}
809
810close STDOUT or die "error closing STDOUT: $!";
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette