VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.0g/crypto/modes/asm/ghash-parisc.pl@ 69881

Last change on this file since 69881 was 69881, checked in by vboxsync, 7 years ago

Update OpenSSL to 1.1.0g.
bugref:8070: src/libs maintenance

  • Property svn:eol-style set to LF
  • Property svn:executable set to *
File size: 16.4 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2010-2016 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# April 2010
18#
19# The module implements "4-bit" GCM GHASH function and underlying
20# single multiplication operation in GF(2^128). "4-bit" means that it
21# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
22# it processes one byte in 19.6 cycles, which is more than twice as
23# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
24# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
25# processed byte. This is ~2.2x faster than 64-bit code generated by
26# vendor compiler (which used to be very hard to beat:-).
27#
28# Special thanks to polarhome.com for providing HP-UX account.
29
30$flavour = shift;
31$output = shift;
32open STDOUT,">$output";
33
34if ($flavour =~ /64/) {
35 $LEVEL ="2.0W";
36 $SIZE_T =8;
37 $FRAME_MARKER =80;
38 $SAVED_RP =16;
39 $PUSH ="std";
40 $PUSHMA ="std,ma";
41 $POP ="ldd";
42 $POPMB ="ldd,mb";
43 $NREGS =6;
44} else {
45 $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
46 $SIZE_T =4;
47 $FRAME_MARKER =48;
48 $SAVED_RP =20;
49 $PUSH ="stw";
50 $PUSHMA ="stwm";
51 $POP ="ldw";
52 $POPMB ="ldwm";
53 $NREGS =11;
54}
55
56$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
57 # [+ argument transfer]
58
59################# volatile registers
60$Xi="%r26"; # argument block
61$Htbl="%r25";
62$inp="%r24";
63$len="%r23";
64$Hhh=$Htbl; # variables
65$Hll="%r22";
66$Zhh="%r21";
67$Zll="%r20";
68$cnt="%r19";
69$rem_4bit="%r28";
70$rem="%r29";
71$mask0xf0="%r31";
72
73################# preserved registers
74$Thh="%r1";
75$Tll="%r2";
76$nlo="%r3";
77$nhi="%r4";
78$byte="%r5";
79if ($SIZE_T==4) {
80 $Zhl="%r6";
81 $Zlh="%r7";
82 $Hhl="%r8";
83 $Hlh="%r9";
84 $Thl="%r10";
85 $Tlh="%r11";
86}
87$rem2="%r6"; # used in PA-RISC 2.0 code
88
89$code.=<<___;
90 .LEVEL $LEVEL
91 .SPACE \$TEXT\$
92 .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
93
94 .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
95 .ALIGN 64
96gcm_gmult_4bit
97 .PROC
98 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
99 .ENTRY
100 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
101 $PUSHMA %r3,$FRAME(%sp)
102 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
103 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
104 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
105___
106$code.=<<___ if ($SIZE_T==4);
107 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
108 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
109 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
110 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
111 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
112___
113$code.=<<___;
114 blr %r0,$rem_4bit
115 ldi 3,$rem
116L\$pic_gmult
117 andcm $rem_4bit,$rem,$rem_4bit
118 addl $inp,$len,$len
119 ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
120 ldi 0xf0,$mask0xf0
121___
122$code.=<<___ if ($SIZE_T==4);
123 ldi 31,$rem
124 mtctl $rem,%cr11
125 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
126 b L\$parisc1_gmult
127 nop
128___
129
130
131$code.=<<___;
132 ldb 15($Xi),$nlo
133 ldo 8($Htbl),$Hll
134
135 and $mask0xf0,$nlo,$nhi
136 depd,z $nlo,59,4,$nlo
137
138 ldd $nlo($Hll),$Zll
139 ldd $nlo($Hhh),$Zhh
140
141 depd,z $Zll,60,4,$rem
142 shrpd $Zhh,$Zll,4,$Zll
143 extrd,u $Zhh,59,60,$Zhh
144 ldb 14($Xi),$nlo
145
146 ldd $nhi($Hll),$Tll
147 ldd $nhi($Hhh),$Thh
148 and $mask0xf0,$nlo,$nhi
149 depd,z $nlo,59,4,$nlo
150
151 xor $Tll,$Zll,$Zll
152 xor $Thh,$Zhh,$Zhh
153 ldd $rem($rem_4bit),$rem
154 b L\$oop_gmult_pa2
155 ldi 13,$cnt
156
157 .ALIGN 8
158L\$oop_gmult_pa2
159 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
160 depd,z $Zll,60,4,$rem
161
162 shrpd $Zhh,$Zll,4,$Zll
163 extrd,u $Zhh,59,60,$Zhh
164 ldd $nlo($Hll),$Tll
165 ldd $nlo($Hhh),$Thh
166
167 xor $Tll,$Zll,$Zll
168 xor $Thh,$Zhh,$Zhh
169 ldd $rem($rem_4bit),$rem
170
171 xor $rem,$Zhh,$Zhh
172 depd,z $Zll,60,4,$rem
173 ldbx $cnt($Xi),$nlo
174
175 shrpd $Zhh,$Zll,4,$Zll
176 extrd,u $Zhh,59,60,$Zhh
177 ldd $nhi($Hll),$Tll
178 ldd $nhi($Hhh),$Thh
179
180 and $mask0xf0,$nlo,$nhi
181 depd,z $nlo,59,4,$nlo
182 ldd $rem($rem_4bit),$rem
183
184 xor $Tll,$Zll,$Zll
185 addib,uv -1,$cnt,L\$oop_gmult_pa2
186 xor $Thh,$Zhh,$Zhh
187
188 xor $rem,$Zhh,$Zhh
189 depd,z $Zll,60,4,$rem
190
191 shrpd $Zhh,$Zll,4,$Zll
192 extrd,u $Zhh,59,60,$Zhh
193 ldd $nlo($Hll),$Tll
194 ldd $nlo($Hhh),$Thh
195
196 xor $Tll,$Zll,$Zll
197 xor $Thh,$Zhh,$Zhh
198 ldd $rem($rem_4bit),$rem
199
200 xor $rem,$Zhh,$Zhh
201 depd,z $Zll,60,4,$rem
202
203 shrpd $Zhh,$Zll,4,$Zll
204 extrd,u $Zhh,59,60,$Zhh
205 ldd $nhi($Hll),$Tll
206 ldd $nhi($Hhh),$Thh
207
208 xor $Tll,$Zll,$Zll
209 xor $Thh,$Zhh,$Zhh
210 ldd $rem($rem_4bit),$rem
211
212 xor $rem,$Zhh,$Zhh
213 std $Zll,8($Xi)
214 std $Zhh,0($Xi)
215___
216
217
218$code.=<<___ if ($SIZE_T==4);
219 b L\$done_gmult
220 nop
221
222L\$parisc1_gmult
223 ldb 15($Xi),$nlo
224 ldo 12($Htbl),$Hll
225 ldo 8($Htbl),$Hlh
226 ldo 4($Htbl),$Hhl
227
228 and $mask0xf0,$nlo,$nhi
229 zdep $nlo,27,4,$nlo
230
231 ldwx $nlo($Hll),$Zll
232 ldwx $nlo($Hlh),$Zlh
233 ldwx $nlo($Hhl),$Zhl
234 ldwx $nlo($Hhh),$Zhh
235 zdep $Zll,28,4,$rem
236 ldb 14($Xi),$nlo
237 ldwx $rem($rem_4bit),$rem
238 shrpw $Zlh,$Zll,4,$Zll
239 ldwx $nhi($Hll),$Tll
240 shrpw $Zhl,$Zlh,4,$Zlh
241 ldwx $nhi($Hlh),$Tlh
242 shrpw $Zhh,$Zhl,4,$Zhl
243 ldwx $nhi($Hhl),$Thl
244 extru $Zhh,27,28,$Zhh
245 ldwx $nhi($Hhh),$Thh
246 xor $rem,$Zhh,$Zhh
247 and $mask0xf0,$nlo,$nhi
248 zdep $nlo,27,4,$nlo
249
250 xor $Tll,$Zll,$Zll
251 ldwx $nlo($Hll),$Tll
252 xor $Tlh,$Zlh,$Zlh
253 ldwx $nlo($Hlh),$Tlh
254 xor $Thl,$Zhl,$Zhl
255 b L\$oop_gmult_pa1
256 ldi 13,$cnt
257
258 .ALIGN 8
259L\$oop_gmult_pa1
260 zdep $Zll,28,4,$rem
261 ldwx $nlo($Hhl),$Thl
262 xor $Thh,$Zhh,$Zhh
263 ldwx $rem($rem_4bit),$rem
264 shrpw $Zlh,$Zll,4,$Zll
265 ldwx $nlo($Hhh),$Thh
266 shrpw $Zhl,$Zlh,4,$Zlh
267 ldbx $cnt($Xi),$nlo
268 xor $Tll,$Zll,$Zll
269 ldwx $nhi($Hll),$Tll
270 shrpw $Zhh,$Zhl,4,$Zhl
271 xor $Tlh,$Zlh,$Zlh
272 ldwx $nhi($Hlh),$Tlh
273 extru $Zhh,27,28,$Zhh
274 xor $Thl,$Zhl,$Zhl
275 ldwx $nhi($Hhl),$Thl
276 xor $rem,$Zhh,$Zhh
277 zdep $Zll,28,4,$rem
278 xor $Thh,$Zhh,$Zhh
279 ldwx $nhi($Hhh),$Thh
280 shrpw $Zlh,$Zll,4,$Zll
281 ldwx $rem($rem_4bit),$rem
282 shrpw $Zhl,$Zlh,4,$Zlh
283 shrpw $Zhh,$Zhl,4,$Zhl
284 and $mask0xf0,$nlo,$nhi
285 extru $Zhh,27,28,$Zhh
286 zdep $nlo,27,4,$nlo
287 xor $Tll,$Zll,$Zll
288 ldwx $nlo($Hll),$Tll
289 xor $Tlh,$Zlh,$Zlh
290 ldwx $nlo($Hlh),$Tlh
291 xor $rem,$Zhh,$Zhh
292 addib,uv -1,$cnt,L\$oop_gmult_pa1
293 xor $Thl,$Zhl,$Zhl
294
295 zdep $Zll,28,4,$rem
296 ldwx $nlo($Hhl),$Thl
297 xor $Thh,$Zhh,$Zhh
298 ldwx $rem($rem_4bit),$rem
299 shrpw $Zlh,$Zll,4,$Zll
300 ldwx $nlo($Hhh),$Thh
301 shrpw $Zhl,$Zlh,4,$Zlh
302 xor $Tll,$Zll,$Zll
303 ldwx $nhi($Hll),$Tll
304 shrpw $Zhh,$Zhl,4,$Zhl
305 xor $Tlh,$Zlh,$Zlh
306 ldwx $nhi($Hlh),$Tlh
307 extru $Zhh,27,28,$Zhh
308 xor $rem,$Zhh,$Zhh
309 xor $Thl,$Zhl,$Zhl
310 ldwx $nhi($Hhl),$Thl
311 xor $Thh,$Zhh,$Zhh
312 ldwx $nhi($Hhh),$Thh
313 zdep $Zll,28,4,$rem
314 ldwx $rem($rem_4bit),$rem
315 shrpw $Zlh,$Zll,4,$Zll
316 shrpw $Zhl,$Zlh,4,$Zlh
317 shrpw $Zhh,$Zhl,4,$Zhl
318 extru $Zhh,27,28,$Zhh
319 xor $Tll,$Zll,$Zll
320 xor $Tlh,$Zlh,$Zlh
321 xor $rem,$Zhh,$Zhh
322 stw $Zll,12($Xi)
323 xor $Thl,$Zhl,$Zhl
324 stw $Zlh,8($Xi)
325 xor $Thh,$Zhh,$Zhh
326 stw $Zhl,4($Xi)
327 stw $Zhh,0($Xi)
328___
329$code.=<<___;
330L\$done_gmult
331 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
332 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
333 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
334 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
335___
336$code.=<<___ if ($SIZE_T==4);
337 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
338 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
339 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
340 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
341 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
342___
343$code.=<<___;
344 bv (%r2)
345 .EXIT
346 $POPMB -$FRAME(%sp),%r3
347 .PROCEND
348
349 .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
350 .ALIGN 64
351gcm_ghash_4bit
352 .PROC
353 .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
354 .ENTRY
355 $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
356 $PUSHMA %r3,$FRAME(%sp)
357 $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
358 $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
359 $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
360___
361$code.=<<___ if ($SIZE_T==4);
362 $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
363 $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
364 $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
365 $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
366 $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
367___
368$code.=<<___;
369 blr %r0,$rem_4bit
370 ldi 3,$rem
371L\$pic_ghash
372 andcm $rem_4bit,$rem,$rem_4bit
373 addl $inp,$len,$len
374 ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
375 ldi 0xf0,$mask0xf0
376___
377$code.=<<___ if ($SIZE_T==4);
378 ldi 31,$rem
379 mtctl $rem,%cr11
380 extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
381 b L\$parisc1_ghash
382 nop
383___
384
385
386
387$code.=<<___;
388 ldb 15($Xi),$nlo
389 ldo 8($Htbl),$Hll
390
391L\$outer_ghash_pa2
392 ldb 15($inp),$nhi
393 xor $nhi,$nlo,$nlo
394 and $mask0xf0,$nlo,$nhi
395 depd,z $nlo,59,4,$nlo
396
397 ldd $nlo($Hll),$Zll
398 ldd $nlo($Hhh),$Zhh
399
400 depd,z $Zll,60,4,$rem
401 shrpd $Zhh,$Zll,4,$Zll
402 extrd,u $Zhh,59,60,$Zhh
403 ldb 14($Xi),$nlo
404 ldb 14($inp),$byte
405
406 ldd $nhi($Hll),$Tll
407 ldd $nhi($Hhh),$Thh
408 xor $byte,$nlo,$nlo
409 and $mask0xf0,$nlo,$nhi
410 depd,z $nlo,59,4,$nlo
411
412 xor $Tll,$Zll,$Zll
413 xor $Thh,$Zhh,$Zhh
414 ldd $rem($rem_4bit),$rem
415 b L\$oop_ghash_pa2
416 ldi 13,$cnt
417
418 .ALIGN 8
419L\$oop_ghash_pa2
420 xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
421 depd,z $Zll,60,4,$rem2
422
423 shrpd $Zhh,$Zll,4,$Zll
424 extrd,u $Zhh,59,60,$Zhh
425 ldd $nlo($Hll),$Tll
426 ldd $nlo($Hhh),$Thh
427
428 xor $Tll,$Zll,$Zll
429 xor $Thh,$Zhh,$Zhh
430 ldbx $cnt($Xi),$nlo
431 ldbx $cnt($inp),$byte
432
433 depd,z $Zll,60,4,$rem
434 shrpd $Zhh,$Zll,4,$Zll
435 ldd $rem2($rem_4bit),$rem2
436
437 xor $rem2,$Zhh,$Zhh
438 xor $byte,$nlo,$nlo
439 ldd $nhi($Hll),$Tll
440 ldd $nhi($Hhh),$Thh
441
442 and $mask0xf0,$nlo,$nhi
443 depd,z $nlo,59,4,$nlo
444
445 extrd,u $Zhh,59,60,$Zhh
446 xor $Tll,$Zll,$Zll
447
448 ldd $rem($rem_4bit),$rem
449 addib,uv -1,$cnt,L\$oop_ghash_pa2
450 xor $Thh,$Zhh,$Zhh
451
452 xor $rem,$Zhh,$Zhh
453 depd,z $Zll,60,4,$rem2
454
455 shrpd $Zhh,$Zll,4,$Zll
456 extrd,u $Zhh,59,60,$Zhh
457 ldd $nlo($Hll),$Tll
458 ldd $nlo($Hhh),$Thh
459
460 xor $Tll,$Zll,$Zll
461 xor $Thh,$Zhh,$Zhh
462
463 depd,z $Zll,60,4,$rem
464 shrpd $Zhh,$Zll,4,$Zll
465 ldd $rem2($rem_4bit),$rem2
466
467 xor $rem2,$Zhh,$Zhh
468 ldd $nhi($Hll),$Tll
469 ldd $nhi($Hhh),$Thh
470
471 extrd,u $Zhh,59,60,$Zhh
472 xor $Tll,$Zll,$Zll
473 xor $Thh,$Zhh,$Zhh
474 ldd $rem($rem_4bit),$rem
475
476 xor $rem,$Zhh,$Zhh
477 std $Zll,8($Xi)
478 ldo 16($inp),$inp
479 std $Zhh,0($Xi)
480 cmpb,*<> $inp,$len,L\$outer_ghash_pa2
481 copy $Zll,$nlo
482___
483
484
485$code.=<<___ if ($SIZE_T==4);
486 b L\$done_ghash
487 nop
488
489L\$parisc1_ghash
490 ldb 15($Xi),$nlo
491 ldo 12($Htbl),$Hll
492 ldo 8($Htbl),$Hlh
493 ldo 4($Htbl),$Hhl
494
495L\$outer_ghash_pa1
496 ldb 15($inp),$byte
497 xor $byte,$nlo,$nlo
498 and $mask0xf0,$nlo,$nhi
499 zdep $nlo,27,4,$nlo
500
501 ldwx $nlo($Hll),$Zll
502 ldwx $nlo($Hlh),$Zlh
503 ldwx $nlo($Hhl),$Zhl
504 ldwx $nlo($Hhh),$Zhh
505 zdep $Zll,28,4,$rem
506 ldb 14($Xi),$nlo
507 ldb 14($inp),$byte
508 ldwx $rem($rem_4bit),$rem
509 shrpw $Zlh,$Zll,4,$Zll
510 ldwx $nhi($Hll),$Tll
511 shrpw $Zhl,$Zlh,4,$Zlh
512 ldwx $nhi($Hlh),$Tlh
513 shrpw $Zhh,$Zhl,4,$Zhl
514 ldwx $nhi($Hhl),$Thl
515 extru $Zhh,27,28,$Zhh
516 ldwx $nhi($Hhh),$Thh
517 xor $byte,$nlo,$nlo
518 xor $rem,$Zhh,$Zhh
519 and $mask0xf0,$nlo,$nhi
520 zdep $nlo,27,4,$nlo
521
522 xor $Tll,$Zll,$Zll
523 ldwx $nlo($Hll),$Tll
524 xor $Tlh,$Zlh,$Zlh
525 ldwx $nlo($Hlh),$Tlh
526 xor $Thl,$Zhl,$Zhl
527 b L\$oop_ghash_pa1
528 ldi 13,$cnt
529
530 .ALIGN 8
531L\$oop_ghash_pa1
532 zdep $Zll,28,4,$rem
533 ldwx $nlo($Hhl),$Thl
534 xor $Thh,$Zhh,$Zhh
535 ldwx $rem($rem_4bit),$rem
536 shrpw $Zlh,$Zll,4,$Zll
537 ldwx $nlo($Hhh),$Thh
538 shrpw $Zhl,$Zlh,4,$Zlh
539 ldbx $cnt($Xi),$nlo
540 xor $Tll,$Zll,$Zll
541 ldwx $nhi($Hll),$Tll
542 shrpw $Zhh,$Zhl,4,$Zhl
543 ldbx $cnt($inp),$byte
544 xor $Tlh,$Zlh,$Zlh
545 ldwx $nhi($Hlh),$Tlh
546 extru $Zhh,27,28,$Zhh
547 xor $Thl,$Zhl,$Zhl
548 ldwx $nhi($Hhl),$Thl
549 xor $rem,$Zhh,$Zhh
550 zdep $Zll,28,4,$rem
551 xor $Thh,$Zhh,$Zhh
552 ldwx $nhi($Hhh),$Thh
553 shrpw $Zlh,$Zll,4,$Zll
554 ldwx $rem($rem_4bit),$rem
555 shrpw $Zhl,$Zlh,4,$Zlh
556 xor $byte,$nlo,$nlo
557 shrpw $Zhh,$Zhl,4,$Zhl
558 and $mask0xf0,$nlo,$nhi
559 extru $Zhh,27,28,$Zhh
560 zdep $nlo,27,4,$nlo
561 xor $Tll,$Zll,$Zll
562 ldwx $nlo($Hll),$Tll
563 xor $Tlh,$Zlh,$Zlh
564 ldwx $nlo($Hlh),$Tlh
565 xor $rem,$Zhh,$Zhh
566 addib,uv -1,$cnt,L\$oop_ghash_pa1
567 xor $Thl,$Zhl,$Zhl
568
569 zdep $Zll,28,4,$rem
570 ldwx $nlo($Hhl),$Thl
571 xor $Thh,$Zhh,$Zhh
572 ldwx $rem($rem_4bit),$rem
573 shrpw $Zlh,$Zll,4,$Zll
574 ldwx $nlo($Hhh),$Thh
575 shrpw $Zhl,$Zlh,4,$Zlh
576 xor $Tll,$Zll,$Zll
577 ldwx $nhi($Hll),$Tll
578 shrpw $Zhh,$Zhl,4,$Zhl
579 xor $Tlh,$Zlh,$Zlh
580 ldwx $nhi($Hlh),$Tlh
581 extru $Zhh,27,28,$Zhh
582 xor $rem,$Zhh,$Zhh
583 xor $Thl,$Zhl,$Zhl
584 ldwx $nhi($Hhl),$Thl
585 xor $Thh,$Zhh,$Zhh
586 ldwx $nhi($Hhh),$Thh
587 zdep $Zll,28,4,$rem
588 ldwx $rem($rem_4bit),$rem
589 shrpw $Zlh,$Zll,4,$Zll
590 shrpw $Zhl,$Zlh,4,$Zlh
591 shrpw $Zhh,$Zhl,4,$Zhl
592 extru $Zhh,27,28,$Zhh
593 xor $Tll,$Zll,$Zll
594 xor $Tlh,$Zlh,$Zlh
595 xor $rem,$Zhh,$Zhh
596 stw $Zll,12($Xi)
597 xor $Thl,$Zhl,$Zhl
598 stw $Zlh,8($Xi)
599 xor $Thh,$Zhh,$Zhh
600 stw $Zhl,4($Xi)
601 ldo 16($inp),$inp
602 stw $Zhh,0($Xi)
603 comb,<> $inp,$len,L\$outer_ghash_pa1
604 copy $Zll,$nlo
605___
606$code.=<<___;
607L\$done_ghash
608 $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
609 $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
610 $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
611 $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
612___
613$code.=<<___ if ($SIZE_T==4);
614 $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
615 $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
616 $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
617 $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
618 $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
619___
620$code.=<<___;
621 bv (%r2)
622 .EXIT
623 $POPMB -$FRAME(%sp),%r3
624 .PROCEND
625
626 .ALIGN 64
627L\$rem_4bit
628 .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
629 .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
630 .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
631 .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
632 .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
633 .ALIGN 64
634___
635
636# Explicitly encode PA-RISC 2.0 instructions used in this module, so
637# that it can be compiled with .LEVEL 1.0. It should be noted that I
638# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
639# directive...
640
641my $ldd = sub {
642 my ($mod,$args) = @_;
643 my $orig = "ldd$mod\t$args";
644
645 if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
646 { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
647 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
648 }
649 elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
650 { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
651 $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
652 $opcode|=(1<<5) if ($mod =~ /^,m/);
653 $opcode|=(1<<13) if ($mod =~ /^,mb/);
654 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
655 }
656 else { "\t".$orig; }
657};
658
659my $std = sub {
660 my ($mod,$args) = @_;
661 my $orig = "std$mod\t$args";
662
663 if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
664 { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
665 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
666 }
667 else { "\t".$orig; }
668};
669
670my $extrd = sub {
671 my ($mod,$args) = @_;
672 my $orig = "extrd$mod\t$args";
673
674 # I only have ",u" completer, it's implicitly encoded...
675 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
676 { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
677 my $len=32-$3;
678 $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
679 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
680 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
681 }
682 elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
683 { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
684 my $len=32-$2;
685 $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
686 $opcode |= (1<<13) if ($mod =~ /,\**=/);
687 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
688 }
689 else { "\t".$orig; }
690};
691
692my $shrpd = sub {
693 my ($mod,$args) = @_;
694 my $orig = "shrpd$mod\t$args";
695
696 if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
697 { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
698 my $cpos=63-$3;
699 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
700 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
701 }
702 elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
703 { sprintf "\t.WORD\t0x%08x\t; %s",
704 (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
705 }
706 else { "\t".$orig; }
707};
708
709my $depd = sub {
710 my ($mod,$args) = @_;
711 my $orig = "depd$mod\t$args";
712
713 # I only have ",z" completer, it's impicitly encoded...
714 if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
715 { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
716 my $cpos=63-$2;
717 my $len=32-$3;
718 $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
719 $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
720 sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
721 }
722 else { "\t".$orig; }
723};
724
725sub assemble {
726 my ($mnemonic,$mod,$args)=@_;
727 my $opcode = eval("\$$mnemonic");
728
729 ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
730}
731
732foreach (split("\n",$code)) {
733 s/\`([^\`]*)\`/eval $1/ge;
734 if ($SIZE_T==4) {
735 s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
736 s/cmpb,\*/comb,/;
737 s/,\*/,/;
738 }
739 s/\bbv\b/bve/ if ($SIZE_T==8);
740 print $_,"\n";
741}
742
743close STDOUT;
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette