VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.7/crypto/modes/asm/ghash-parisc.pl@ 97371

Last change on this file since 97371 was 94082, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: started applying and adjusting our OpenSSL changes to 3.0.1. bugref:10128

File size: 16.9 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# April 2010
18#
19# The module implements "4-bit" GCM GHASH function and underlying
20# single multiplication operation in GF(2^128). "4-bit" means that it
21# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
22# it processes one byte in 19.6 cycles, which is more than twice as
23# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
24# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
25# processed byte. This is ~2.2x faster than 64-bit code generated by
26# vendor compiler (which used to be very hard to beat:-).
27#
28# Special thanks to polarhome.com for providing HP-UX account.
29
30# $output is the last argument if it looks like a file (it has an extension)
31# $flavour is the first argument if it doesn't look like a file
32$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
33$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
34
35$output and open STDOUT,">$output";
36
37if ($flavour =~ /64/) {
38 $LEVEL ="2.0W";
39 $SIZE_T =8;
40 $FRAME_MARKER =80;
41 $SAVED_RP =16;
42 $PUSH ="std";
43 $PUSHMA ="std,ma";
44 $POP ="ldd";
45 $POPMB ="ldd,mb";
46 $NREGS =6;
47} else {
48 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
49 $SIZE_T =4;
50 $FRAME_MARKER =48;
51 $SAVED_RP =20;
52 $PUSH ="stw";
53 $PUSHMA ="stwm";
54 $POP ="ldw";
55 $POPMB ="ldwm";
56 $NREGS =11;
57}
58
59$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
60 # [+ argument transfer]
61
62################# volatile registers
63$Xi="%r26"; # argument block
64$Htbl="%r25";
65$inp="%r24";
66$len="%r23";
67$Hhh=$Htbl; # variables
68$Hll="%r22";
69$Zhh="%r21";
70$Zll="%r20";
71$cnt="%r19";
72$rem_4bit="%r28";
73$rem="%r29";
74$mask0xf0="%r31";
75
76################# preserved registers
77$Thh="%r1";
78$Tll="%r2";
79$nlo="%r3";
80$nhi="%r4";
81$byte="%r5";
82if ($SIZE_T==4) {
83 $Zhl="%r6";
84 $Zlh="%r7";
85 $Hhl="%r8";
86 $Hlh="%r9";
87 $Thl="%r10";
88 $Tlh="%r11";
89}
90$rem2="%r6"; # used in PA-RISC 2.0 code
91
92$code.=<<___;
93 .LEVEL $LEVEL
94 .SPACE \$TEXT\$
95 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
96
97 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
98 .ALIGN 64
99gcm_gmult_4bit
100 .PROC
101 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
102 .ENTRY
103 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
104 $PUSHMA %r3,$FRAME(%sp)
105 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
106 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
107 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
108___
109$code.=<<___ if ($SIZE_T==4);
110 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
111 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
112 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
113 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
114 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
115___
116$code.=<<___;
117 blr %r0,$rem_4bit
118 ldi 3,$rem
119L\$pic_gmult
120 andcm $rem_4bit,$rem,$rem_4bit
121 addl $inp,$len,$len
122 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
123 ldi 0xf0,$mask0xf0
124___
125$code.=<<___ if ($SIZE_T==4);
126 ldi 31,$rem
127 mtctl $rem,%cr11
128 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
129 b L\$parisc1_gmult
130 nop
131___
132
133
134$code.=<<___;
135 ldb 15($Xi),$nlo
136 ldo 8($Htbl),$Hll
137
138 and $mask0xf0,$nlo,$nhi
139 depd,z $nlo,59,4,$nlo
140
141 ldd $nlo($Hll),$Zll
142 ldd $nlo($Hhh),$Zhh
143
144 depd,z $Zll,60,4,$rem
145 shrpd $Zhh,$Zll,4,$Zll
146 extrd,u $Zhh,59,60,$Zhh
147 ldb 14($Xi),$nlo
148
149 ldd $nhi($Hll),$Tll
150 ldd $nhi($Hhh),$Thh
151 and $mask0xf0,$nlo,$nhi
152 depd,z $nlo,59,4,$nlo
153
154 xor $Tll,$Zll,$Zll
155 xor $Thh,$Zhh,$Zhh
156 ldd $rem($rem_4bit),$rem
157 b L\$oop_gmult_pa2
158 ldi 13,$cnt
159
160 .ALIGN 8
161L\$oop_gmult_pa2
162 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
163 depd,z $Zll,60,4,$rem
164
165 shrpd $Zhh,$Zll,4,$Zll
166 extrd,u $Zhh,59,60,$Zhh
167 ldd $nlo($Hll),$Tll
168 ldd $nlo($Hhh),$Thh
169
170 xor $Tll,$Zll,$Zll
171 xor $Thh,$Zhh,$Zhh
172 ldd $rem($rem_4bit),$rem
173
174 xor $rem,$Zhh,$Zhh
175 depd,z $Zll,60,4,$rem
176 ldbx $cnt($Xi),$nlo
177
178 shrpd $Zhh,$Zll,4,$Zll
179 extrd,u $Zhh,59,60,$Zhh
180 ldd $nhi($Hll),$Tll
181 ldd $nhi($Hhh),$Thh
182
183 and $mask0xf0,$nlo,$nhi
184 depd,z $nlo,59,4,$nlo
185 ldd $rem($rem_4bit),$rem
186
187 xor $Tll,$Zll,$Zll
188 addib,uv -1,$cnt,L\$oop_gmult_pa2
189 xor $Thh,$Zhh,$Zhh
190
191 xor $rem,$Zhh,$Zhh
192 depd,z $Zll,60,4,$rem
193
194 shrpd $Zhh,$Zll,4,$Zll
195 extrd,u $Zhh,59,60,$Zhh
196 ldd $nlo($Hll),$Tll
197 ldd $nlo($Hhh),$Thh
198
199 xor $Tll,$Zll,$Zll
200 xor $Thh,$Zhh,$Zhh
201 ldd $rem($rem_4bit),$rem
202
203 xor $rem,$Zhh,$Zhh
204 depd,z $Zll,60,4,$rem
205
206 shrpd $Zhh,$Zll,4,$Zll
207 extrd,u $Zhh,59,60,$Zhh
208 ldd $nhi($Hll),$Tll
209 ldd $nhi($Hhh),$Thh
210
211 xor $Tll,$Zll,$Zll
212 xor $Thh,$Zhh,$Zhh
213 ldd $rem($rem_4bit),$rem
214
215 xor $rem,$Zhh,$Zhh
216 std $Zll,8($Xi)
217 std $Zhh,0($Xi)
218___
219
220
221$code.=<<___ if ($SIZE_T==4);
222 b L\$done_gmult
223 nop
224
225L\$parisc1_gmult
226 ldb 15($Xi),$nlo
227 ldo 12($Htbl),$Hll
228 ldo 8($Htbl),$Hlh
229 ldo 4($Htbl),$Hhl
230
231 and $mask0xf0,$nlo,$nhi
232 zdep $nlo,27,4,$nlo
233
234 ldwx $nlo($Hll),$Zll
235 ldwx $nlo($Hlh),$Zlh
236 ldwx $nlo($Hhl),$Zhl
237 ldwx $nlo($Hhh),$Zhh
238 zdep $Zll,28,4,$rem
239 ldb 14($Xi),$nlo
240 ldwx $rem($rem_4bit),$rem
241 shrpw $Zlh,$Zll,4,$Zll
242 ldwx $nhi($Hll),$Tll
243 shrpw $Zhl,$Zlh,4,$Zlh
244 ldwx $nhi($Hlh),$Tlh
245 shrpw $Zhh,$Zhl,4,$Zhl
246 ldwx $nhi($Hhl),$Thl
247 extru $Zhh,27,28,$Zhh
248 ldwx $nhi($Hhh),$Thh
249 xor $rem,$Zhh,$Zhh
250 and $mask0xf0,$nlo,$nhi
251 zdep $nlo,27,4,$nlo
252
253 xor $Tll,$Zll,$Zll
254 ldwx $nlo($Hll),$Tll
255 xor $Tlh,$Zlh,$Zlh
256 ldwx $nlo($Hlh),$Tlh
257 xor $Thl,$Zhl,$Zhl
258 b L\$oop_gmult_pa1
259 ldi 13,$cnt
260
261 .ALIGN 8
262L\$oop_gmult_pa1
263 zdep $Zll,28,4,$rem
264 ldwx $nlo($Hhl),$Thl
265 xor $Thh,$Zhh,$Zhh
266 ldwx $rem($rem_4bit),$rem
267 shrpw $Zlh,$Zll,4,$Zll
268 ldwx $nlo($Hhh),$Thh
269 shrpw $Zhl,$Zlh,4,$Zlh
270 ldbx $cnt($Xi),$nlo
271 xor $Tll,$Zll,$Zll
272 ldwx $nhi($Hll),$Tll
273 shrpw $Zhh,$Zhl,4,$Zhl
274 xor $Tlh,$Zlh,$Zlh
275 ldwx $nhi($Hlh),$Tlh
276 extru $Zhh,27,28,$Zhh
277 xor $Thl,$Zhl,$Zhl
278 ldwx $nhi($Hhl),$Thl
279 xor $rem,$Zhh,$Zhh
280 zdep $Zll,28,4,$rem
281 xor $Thh,$Zhh,$Zhh
282 ldwx $nhi($Hhh),$Thh
283 shrpw $Zlh,$Zll,4,$Zll
284 ldwx $rem($rem_4bit),$rem
285 shrpw $Zhl,$Zlh,4,$Zlh
286 shrpw $Zhh,$Zhl,4,$Zhl
287 and $mask0xf0,$nlo,$nhi
288 extru $Zhh,27,28,$Zhh
289 zdep $nlo,27,4,$nlo
290 xor $Tll,$Zll,$Zll
291 ldwx $nlo($Hll),$Tll
292 xor $Tlh,$Zlh,$Zlh
293 ldwx $nlo($Hlh),$Tlh
294 xor $rem,$Zhh,$Zhh
295 addib,uv -1,$cnt,L\$oop_gmult_pa1
296 xor $Thl,$Zhl,$Zhl
297
298 zdep $Zll,28,4,$rem
299 ldwx $nlo($Hhl),$Thl
300 xor $Thh,$Zhh,$Zhh
301 ldwx $rem($rem_4bit),$rem
302 shrpw $Zlh,$Zll,4,$Zll
303 ldwx $nlo($Hhh),$Thh
304 shrpw $Zhl,$Zlh,4,$Zlh
305 xor $Tll,$Zll,$Zll
306 ldwx $nhi($Hll),$Tll
307 shrpw $Zhh,$Zhl,4,$Zhl
308 xor $Tlh,$Zlh,$Zlh
309 ldwx $nhi($Hlh),$Tlh
310 extru $Zhh,27,28,$Zhh
311 xor $rem,$Zhh,$Zhh
312 xor $Thl,$Zhl,$Zhl
313 ldwx $nhi($Hhl),$Thl
314 xor $Thh,$Zhh,$Zhh
315 ldwx $nhi($Hhh),$Thh
316 zdep $Zll,28,4,$rem
317 ldwx $rem($rem_4bit),$rem
318 shrpw $Zlh,$Zll,4,$Zll
319 shrpw $Zhl,$Zlh,4,$Zlh
320 shrpw $Zhh,$Zhl,4,$Zhl
321 extru $Zhh,27,28,$Zhh
322 xor $Tll,$Zll,$Zll
323 xor $Tlh,$Zlh,$Zlh
324 xor $rem,$Zhh,$Zhh
325 stw $Zll,12($Xi)
326 xor $Thl,$Zhl,$Zhl
327 stw $Zlh,8($Xi)
328 xor $Thh,$Zhh,$Zhh
329 stw $Zhl,4($Xi)
330 stw $Zhh,0($Xi)
331___
332$code.=<<___;
333L\$done_gmult
334 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
335 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
336 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
337 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
338___
339$code.=<<___ if ($SIZE_T==4);
340 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
341 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
342 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
343 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
344 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
345___
346$code.=<<___;
347 bv (%r2)
348 .EXIT
349 $POPMB -$FRAME(%sp),%r3
350 .PROCEND
351
352 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
353 .ALIGN 64
354gcm_ghash_4bit
355 .PROC
356 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
357 .ENTRY
358 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
359 $PUSHMA %r3,$FRAME(%sp)
360 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
361 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
362 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
363___
364$code.=<<___ if ($SIZE_T==4);
365 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
366 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
367 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
368 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
369 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
370___
371$code.=<<___;
372 blr %r0,$rem_4bit
373 ldi 3,$rem
374L\$pic_ghash
375 andcm $rem_4bit,$rem,$rem_4bit
376 addl $inp,$len,$len
377 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
378 ldi 0xf0,$mask0xf0
379___
380$code.=<<___ if ($SIZE_T==4);
381 ldi 31,$rem
382 mtctl $rem,%cr11
383 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
384 b L\$parisc1_ghash
385 nop
386___
387
388
389
390$code.=<<___;
391 ldb 15($Xi),$nlo
392 ldo 8($Htbl),$Hll
393
394L\$outer_ghash_pa2
395 ldb 15($inp),$nhi
396 xor $nhi,$nlo,$nlo
397 and $mask0xf0,$nlo,$nhi
398 depd,z $nlo,59,4,$nlo
399
400 ldd $nlo($Hll),$Zll
401 ldd $nlo($Hhh),$Zhh
402
403 depd,z $Zll,60,4,$rem
404 shrpd $Zhh,$Zll,4,$Zll
405 extrd,u $Zhh,59,60,$Zhh
406 ldb 14($Xi),$nlo
407 ldb 14($inp),$byte
408
409 ldd $nhi($Hll),$Tll
410 ldd $nhi($Hhh),$Thh
411 xor $byte,$nlo,$nlo
412 and $mask0xf0,$nlo,$nhi
413 depd,z $nlo,59,4,$nlo
414
415 xor $Tll,$Zll,$Zll
416 xor $Thh,$Zhh,$Zhh
417 ldd $rem($rem_4bit),$rem
418 b L\$oop_ghash_pa2
419 ldi 13,$cnt
420
421 .ALIGN 8
422L\$oop_ghash_pa2
423 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
424 depd,z $Zll,60,4,$rem2
425
426 shrpd $Zhh,$Zll,4,$Zll
427 extrd,u $Zhh,59,60,$Zhh
428 ldd $nlo($Hll),$Tll
429 ldd $nlo($Hhh),$Thh
430
431 xor $Tll,$Zll,$Zll
432 xor $Thh,$Zhh,$Zhh
433 ldbx $cnt($Xi),$nlo
434 ldbx $cnt($inp),$byte
435
436 depd,z $Zll,60,4,$rem
437 shrpd $Zhh,$Zll,4,$Zll
438 ldd $rem2($rem_4bit),$rem2
439
440 xor $rem2,$Zhh,$Zhh
441 xor $byte,$nlo,$nlo
442 ldd $nhi($Hll),$Tll
443 ldd $nhi($Hhh),$Thh
444
445 and $mask0xf0,$nlo,$nhi
446 depd,z $nlo,59,4,$nlo
447
448 extrd,u $Zhh,59,60,$Zhh
449 xor $Tll,$Zll,$Zll
450
451 ldd $rem($rem_4bit),$rem
452 addib,uv -1,$cnt,L\$oop_ghash_pa2
453 xor $Thh,$Zhh,$Zhh
454
455 xor $rem,$Zhh,$Zhh
456 depd,z $Zll,60,4,$rem2
457
458 shrpd $Zhh,$Zll,4,$Zll
459 extrd,u $Zhh,59,60,$Zhh
460 ldd $nlo($Hll),$Tll
461 ldd $nlo($Hhh),$Thh
462
463 xor $Tll,$Zll,$Zll
464 xor $Thh,$Zhh,$Zhh
465
466 depd,z $Zll,60,4,$rem
467 shrpd $Zhh,$Zll,4,$Zll
468 ldd $rem2($rem_4bit),$rem2
469
470 xor $rem2,$Zhh,$Zhh
471 ldd $nhi($Hll),$Tll
472 ldd $nhi($Hhh),$Thh
473
474 extrd,u $Zhh,59,60,$Zhh
475 xor $Tll,$Zll,$Zll
476 xor $Thh,$Zhh,$Zhh
477 ldd $rem($rem_4bit),$rem
478
479 xor $rem,$Zhh,$Zhh
480 std $Zll,8($Xi)
481 ldo 16($inp),$inp
482 std $Zhh,0($Xi)
483 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
484 copy $Zll,$nlo
485___
486
487
488$code.=<<___ if ($SIZE_T==4);
489 b L\$done_ghash
490 nop
491
492L\$parisc1_ghash
493 ldb 15($Xi),$nlo
494 ldo 12($Htbl),$Hll
495 ldo 8($Htbl),$Hlh
496 ldo 4($Htbl),$Hhl
497
498L\$outer_ghash_pa1
499 ldb 15($inp),$byte
500 xor $byte,$nlo,$nlo
501 and $mask0xf0,$nlo,$nhi
502 zdep $nlo,27,4,$nlo
503
504 ldwx $nlo($Hll),$Zll
505 ldwx $nlo($Hlh),$Zlh
506 ldwx $nlo($Hhl),$Zhl
507 ldwx $nlo($Hhh),$Zhh
508 zdep $Zll,28,4,$rem
509 ldb 14($Xi),$nlo
510 ldb 14($inp),$byte
511 ldwx $rem($rem_4bit),$rem
512 shrpw $Zlh,$Zll,4,$Zll
513 ldwx $nhi($Hll),$Tll
514 shrpw $Zhl,$Zlh,4,$Zlh
515 ldwx $nhi($Hlh),$Tlh
516 shrpw $Zhh,$Zhl,4,$Zhl
517 ldwx $nhi($Hhl),$Thl
518 extru $Zhh,27,28,$Zhh
519 ldwx $nhi($Hhh),$Thh
520 xor $byte,$nlo,$nlo
521 xor $rem,$Zhh,$Zhh
522 and $mask0xf0,$nlo,$nhi
523 zdep $nlo,27,4,$nlo
524
525 xor $Tll,$Zll,$Zll
526 ldwx $nlo($Hll),$Tll
527 xor $Tlh,$Zlh,$Zlh
528 ldwx $nlo($Hlh),$Tlh
529 xor $Thl,$Zhl,$Zhl
530 b L\$oop_ghash_pa1
531 ldi 13,$cnt
532
533 .ALIGN 8
534L\$oop_ghash_pa1
535 zdep $Zll,28,4,$rem
536 ldwx $nlo($Hhl),$Thl
537 xor $Thh,$Zhh,$Zhh
538 ldwx $rem($rem_4bit),$rem
539 shrpw $Zlh,$Zll,4,$Zll
540 ldwx $nlo($Hhh),$Thh
541 shrpw $Zhl,$Zlh,4,$Zlh
542 ldbx $cnt($Xi),$nlo
543 xor $Tll,$Zll,$Zll
544 ldwx $nhi($Hll),$Tll
545 shrpw $Zhh,$Zhl,4,$Zhl
546 ldbx $cnt($inp),$byte
547 xor $Tlh,$Zlh,$Zlh
548 ldwx $nhi($Hlh),$Tlh
549 extru $Zhh,27,28,$Zhh
550 xor $Thl,$Zhl,$Zhl
551 ldwx $nhi($Hhl),$Thl
552 xor $rem,$Zhh,$Zhh
553 zdep $Zll,28,4,$rem
554 xor $Thh,$Zhh,$Zhh
555 ldwx $nhi($Hhh),$Thh
556 shrpw $Zlh,$Zll,4,$Zll
557 ldwx $rem($rem_4bit),$rem
558 shrpw $Zhl,$Zlh,4,$Zlh
559 xor $byte,$nlo,$nlo
560 shrpw $Zhh,$Zhl,4,$Zhl
561 and $mask0xf0,$nlo,$nhi
562 extru $Zhh,27,28,$Zhh
563 zdep $nlo,27,4,$nlo
564 xor $Tll,$Zll,$Zll
565 ldwx $nlo($Hll),$Tll
566 xor $Tlh,$Zlh,$Zlh
567 ldwx $nlo($Hlh),$Tlh
568 xor $rem,$Zhh,$Zhh
569 addib,uv -1,$cnt,L\$oop_ghash_pa1
570 xor $Thl,$Zhl,$Zhl
571
572 zdep $Zll,28,4,$rem
573 ldwx $nlo($Hhl),$Thl
574 xor $Thh,$Zhh,$Zhh
575 ldwx $rem($rem_4bit),$rem
576 shrpw $Zlh,$Zll,4,$Zll
577 ldwx $nlo($Hhh),$Thh
578 shrpw $Zhl,$Zlh,4,$Zlh
579 xor $Tll,$Zll,$Zll
580 ldwx $nhi($Hll),$Tll
581 shrpw $Zhh,$Zhl,4,$Zhl
582 xor $Tlh,$Zlh,$Zlh
583 ldwx $nhi($Hlh),$Tlh
584 extru $Zhh,27,28,$Zhh
585 xor $rem,$Zhh,$Zhh
586 xor $Thl,$Zhl,$Zhl
587 ldwx $nhi($Hhl),$Thl
588 xor $Thh,$Zhh,$Zhh
589 ldwx $nhi($Hhh),$Thh
590 zdep $Zll,28,4,$rem
591 ldwx $rem($rem_4bit),$rem
592 shrpw $Zlh,$Zll,4,$Zll
593 shrpw $Zhl,$Zlh,4,$Zlh
594 shrpw $Zhh,$Zhl,4,$Zhl
595 extru $Zhh,27,28,$Zhh
596 xor $Tll,$Zll,$Zll
597 xor $Tlh,$Zlh,$Zlh
598 xor $rem,$Zhh,$Zhh
599 stw $Zll,12($Xi)
600 xor $Thl,$Zhl,$Zhl
601 stw $Zlh,8($Xi)
602 xor $Thh,$Zhh,$Zhh
603 stw $Zhl,4($Xi)
604 ldo 16($inp),$inp
605 stw $Zhh,0($Xi)
606 comb,<> $inp,$len,L\$outer_ghash_pa1
607 copy $Zll,$nlo
608___
609$code.=<<___;
610L\$done_ghash
611 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
612 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
613 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
614 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
615___
616$code.=<<___ if ($SIZE_T==4);
617 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
618 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
619 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
620 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
621 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
622___
623$code.=<<___;
624 bv (%r2)
625 .EXIT
626 $POPMB -$FRAME(%sp),%r3
627 .PROCEND
628
629 .ALIGN 64
630L\$rem_4bit
631 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
632 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
633 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
634 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
635 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
636 .ALIGN 64
637___
638
639# Explicitly encode PA-RISC 2.0 instructions used in this module, so
640# that it can be compiled with .LEVEL 1.0. It should be noted that I
641# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
642# directive...
643
644my $ldd = sub {
645 my ($mod,$args) = @_;
646 my $orig = "ldd$mod\t$args";
647
648 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
649 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
650 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
651 }
652 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
653 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
654 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
655 $opcode|=(1<<5) if ($mod =~ /^,m/);
656 $opcode|=(1<<13) if ($mod =~ /^,mb/);
657 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
658 }
659 else { "\t".$orig; }
660};
661
662my $std = sub {
663 my ($mod,$args) = @_;
664 my $orig = "std$mod\t$args";
665
666 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
667 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
668 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
669 }
670 else { "\t".$orig; }
671};
672
673my $extrd = sub {
674 my ($mod,$args) = @_;
675 my $orig = "extrd$mod\t$args";
676
677 # I only have ",u" completer, it's implicitly encoded...
678 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
679 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
680 my $len=32-$3;
681 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
682 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
683 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
684 }
685 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
686 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
687 my $len=32-$2;
688 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
689 $opcode |= (1<<13) if ($mod =~ /,\**=/);
690 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
691 }
692 else { "\t".$orig; }
693};
694
695my $shrpd = sub {
696 my ($mod,$args) = @_;
697 my $orig = "shrpd$mod\t$args";
698
699 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
700 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
701 my $cpos=63-$3;
702 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
703 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
704 }
705 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
706 { sprintf "\t.WORD\t0x%08x\t; %s",
707 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
708 }
709 else { "\t".$orig; }
710};
711
712my $depd = sub {
713 my ($mod,$args) = @_;
714 my $orig = "depd$mod\t$args";
715
716 # I only have ",z" completer, it's implicitly encoded...
717 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
718 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
719 my $cpos=63-$2;
720 my $len=32-$3;
721 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
722 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
723 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
724 }
725 else { "\t".$orig; }
726};
727
728sub assemble {
729 my ($mnemonic,$mod,$args)=@_;
730 my $opcode = eval("\$$mnemonic");
731
732 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
733}
734
735if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
736 =~ /GNU assembler/) {
737 $gnuas = 1;
738}
739
740foreach (split("\n",$code)) {
741 s/\`([^\`]*)\`/eval $1/ge;
742 if ($SIZE_T==4) {
743 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
744 s/cmpb,\*/comb,/;
745 s/,\*/,/;
746 }
747
748 s/(\.LEVEL\s+2\.0)W/$1w/ if ($gnuas && $SIZE_T==8);
749 s/\.SPACE\s+\$TEXT\$/.text/ if ($gnuas && $SIZE_T==8);
750 s/\.SUBSPA.*// if ($gnuas && $SIZE_T==8);
751 s/\bbv\b/bve/ if ($SIZE_T==8);
752
753 print $_,"\n";
754}
755
756close STDOUT or die "error closing STDOUT: $!";
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette