VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.1/crypto/aes/asm/aesp8-ppc.pl@ 94082

Last change on this file since 94082 was 94082, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: started applying and adjusting our OpenSSL changes to 3.0.1. bugref:10128

  • Property svn:executable set to *
File size: 91.4 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# This module implements support for AES instructions as per PowerISA
18# specification version 2.07, first implemented by POWER8 processor.
19# The module is endian-agnostic in sense that it supports both big-
20# and little-endian cases. Data alignment in parallelizable modes is
21# handled with VSX loads and stores, which implies MSR.VSX flag being
22# set. It should also be noted that ISA specification doesn't prohibit
23# alignment exceptions for these instructions on page boundaries.
24# Initially alignment was handled in pure AltiVec/VMX way [when data
25# is aligned programmatically, which in turn guarantees exception-
26# free execution], but it turned to hamper performance when vcipher
27# instructions are interleaved. It's reckoned that eventual
28# misalignment penalties at page boundaries are in average lower
29# than additional overhead in pure AltiVec approach.
30#
31# May 2016
32#
33# Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34# systems were measured.
35#
36######################################################################
37# Current large-block performance in cycles per byte processed with
38# 128-bit key (less is better).
39#
40# CBC en-/decrypt CTR XTS
41# POWER8[le] 3.96/0.72 0.74 1.1
42# POWER8[be] 3.75/0.65 0.66 1.0
43# POWER9[le] 4.02/0.86 0.84 1.05
44# POWER9[be] 3.99/0.78 0.79 0.97
45
46# $output is the last argument if it looks like a file (it has an extension)
47# $flavour is the first argument if it doesn't look like a file
48$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
49$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
50
51if ($flavour =~ /64/) {
52 $SIZE_T =8;
53 $LRSAVE =2*$SIZE_T;
54 $STU ="stdu";
55 $POP ="ld";
56 $PUSH ="std";
57 $UCMP ="cmpld";
58 $SHL ="sldi";
59} elsif ($flavour =~ /32/) {
60 $SIZE_T =4;
61 $LRSAVE =$SIZE_T;
62 $STU ="stwu";
63 $POP ="lwz";
64 $PUSH ="stw";
65 $UCMP ="cmplw";
66 $SHL ="slwi";
67} else { die "nonsense $flavour"; }
68
69$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
70
71$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
72( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
73( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
74die "can't locate ppc-xlate.pl";
75
76open STDOUT,"| $^X $xlate $flavour \"$output\""
77 or die "can't call $xlate: $!";
78
79$FRAME=8*$SIZE_T;
80$prefix="aes_p8";
81
82$sp="r1";
83$vrsave="r12";
84
85#########################################################################
86{{{ # Key setup procedures #
87my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
88my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
89my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
90
91$code.=<<___;
92.machine "any"
93
94.text
95
96.align 7
97rcon:
98.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
99.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
100.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
101.long 0,0,0,0 ?asis
102Lconsts:
103 mflr r0
104 bcl 20,31,\$+4
105 mflr $ptr #vvvvv "distance between . and rcon
106 addi $ptr,$ptr,-0x48
107 mtlr r0
108 blr
109 .long 0
110 .byte 0,12,0x14,0,0,0,0,0
111.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
112
113.globl .${prefix}_set_encrypt_key
114.align 5
115.${prefix}_set_encrypt_key:
116Lset_encrypt_key:
117 mflr r11
118 $PUSH r11,$LRSAVE($sp)
119
120 li $ptr,-1
121 ${UCMP}i $inp,0
122 beq- Lenc_key_abort # if ($inp==0) return -1;
123 ${UCMP}i $out,0
124 beq- Lenc_key_abort # if ($out==0) return -1;
125 li $ptr,-2
126 cmpwi $bits,128
127 blt- Lenc_key_abort
128 cmpwi $bits,256
129 bgt- Lenc_key_abort
130 andi. r0,$bits,0x3f
131 bne- Lenc_key_abort
132
133 lis r0,0xfff0
134 mfspr $vrsave,256
135 mtspr 256,r0
136
137 bl Lconsts
138 mtlr r11
139
140 neg r9,$inp
141 lvx $in0,0,$inp
142 addi $inp,$inp,15 # 15 is not typo
143 lvsr $key,0,r9 # borrow $key
144 li r8,0x20
145 cmpwi $bits,192
146 lvx $in1,0,$inp
147 le?vspltisb $mask,0x0f # borrow $mask
148 lvx $rcon,0,$ptr
149 le?vxor $key,$key,$mask # adjust for byte swap
150 lvx $mask,r8,$ptr
151 addi $ptr,$ptr,0x10
152 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
153 li $cnt,8
154 vxor $zero,$zero,$zero
155 mtctr $cnt
156
157 ?lvsr $outperm,0,$out
158 vspltisb $outmask,-1
159 lvx $outhead,0,$out
160 ?vperm $outmask,$zero,$outmask,$outperm
161
162 blt Loop128
163 addi $inp,$inp,8
164 beq L192
165 addi $inp,$inp,8
166 b L256
167
168.align 4
169Loop128:
170 vperm $key,$in0,$in0,$mask # rotate-n-splat
171 vsldoi $tmp,$zero,$in0,12 # >>32
172 vperm $outtail,$in0,$in0,$outperm # rotate
173 vsel $stage,$outhead,$outtail,$outmask
174 vmr $outhead,$outtail
175 vcipherlast $key,$key,$rcon
176 stvx $stage,0,$out
177 addi $out,$out,16
178
179 vxor $in0,$in0,$tmp
180 vsldoi $tmp,$zero,$tmp,12 # >>32
181 vxor $in0,$in0,$tmp
182 vsldoi $tmp,$zero,$tmp,12 # >>32
183 vxor $in0,$in0,$tmp
184 vadduwm $rcon,$rcon,$rcon
185 vxor $in0,$in0,$key
186 bdnz Loop128
187
188 lvx $rcon,0,$ptr # last two round keys
189
190 vperm $key,$in0,$in0,$mask # rotate-n-splat
191 vsldoi $tmp,$zero,$in0,12 # >>32
192 vperm $outtail,$in0,$in0,$outperm # rotate
193 vsel $stage,$outhead,$outtail,$outmask
194 vmr $outhead,$outtail
195 vcipherlast $key,$key,$rcon
196 stvx $stage,0,$out
197 addi $out,$out,16
198
199 vxor $in0,$in0,$tmp
200 vsldoi $tmp,$zero,$tmp,12 # >>32
201 vxor $in0,$in0,$tmp
202 vsldoi $tmp,$zero,$tmp,12 # >>32
203 vxor $in0,$in0,$tmp
204 vadduwm $rcon,$rcon,$rcon
205 vxor $in0,$in0,$key
206
207 vperm $key,$in0,$in0,$mask # rotate-n-splat
208 vsldoi $tmp,$zero,$in0,12 # >>32
209 vperm $outtail,$in0,$in0,$outperm # rotate
210 vsel $stage,$outhead,$outtail,$outmask
211 vmr $outhead,$outtail
212 vcipherlast $key,$key,$rcon
213 stvx $stage,0,$out
214 addi $out,$out,16
215
216 vxor $in0,$in0,$tmp
217 vsldoi $tmp,$zero,$tmp,12 # >>32
218 vxor $in0,$in0,$tmp
219 vsldoi $tmp,$zero,$tmp,12 # >>32
220 vxor $in0,$in0,$tmp
221 vxor $in0,$in0,$key
222 vperm $outtail,$in0,$in0,$outperm # rotate
223 vsel $stage,$outhead,$outtail,$outmask
224 vmr $outhead,$outtail
225 stvx $stage,0,$out
226
227 addi $inp,$out,15 # 15 is not typo
228 addi $out,$out,0x50
229
230 li $rounds,10
231 b Ldone
232
233.align 4
234L192:
235 lvx $tmp,0,$inp
236 li $cnt,4
237 vperm $outtail,$in0,$in0,$outperm # rotate
238 vsel $stage,$outhead,$outtail,$outmask
239 vmr $outhead,$outtail
240 stvx $stage,0,$out
241 addi $out,$out,16
242 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
243 vspltisb $key,8 # borrow $key
244 mtctr $cnt
245 vsububm $mask,$mask,$key # adjust the mask
246
247Loop192:
248 vperm $key,$in1,$in1,$mask # roate-n-splat
249 vsldoi $tmp,$zero,$in0,12 # >>32
250 vcipherlast $key,$key,$rcon
251
252 vxor $in0,$in0,$tmp
253 vsldoi $tmp,$zero,$tmp,12 # >>32
254 vxor $in0,$in0,$tmp
255 vsldoi $tmp,$zero,$tmp,12 # >>32
256 vxor $in0,$in0,$tmp
257
258 vsldoi $stage,$zero,$in1,8
259 vspltw $tmp,$in0,3
260 vxor $tmp,$tmp,$in1
261 vsldoi $in1,$zero,$in1,12 # >>32
262 vadduwm $rcon,$rcon,$rcon
263 vxor $in1,$in1,$tmp
264 vxor $in0,$in0,$key
265 vxor $in1,$in1,$key
266 vsldoi $stage,$stage,$in0,8
267
268 vperm $key,$in1,$in1,$mask # rotate-n-splat
269 vsldoi $tmp,$zero,$in0,12 # >>32
270 vperm $outtail,$stage,$stage,$outperm # rotate
271 vsel $stage,$outhead,$outtail,$outmask
272 vmr $outhead,$outtail
273 vcipherlast $key,$key,$rcon
274 stvx $stage,0,$out
275 addi $out,$out,16
276
277 vsldoi $stage,$in0,$in1,8
278 vxor $in0,$in0,$tmp
279 vsldoi $tmp,$zero,$tmp,12 # >>32
280 vperm $outtail,$stage,$stage,$outperm # rotate
281 vsel $stage,$outhead,$outtail,$outmask
282 vmr $outhead,$outtail
283 vxor $in0,$in0,$tmp
284 vsldoi $tmp,$zero,$tmp,12 # >>32
285 vxor $in0,$in0,$tmp
286 stvx $stage,0,$out
287 addi $out,$out,16
288
289 vspltw $tmp,$in0,3
290 vxor $tmp,$tmp,$in1
291 vsldoi $in1,$zero,$in1,12 # >>32
292 vadduwm $rcon,$rcon,$rcon
293 vxor $in1,$in1,$tmp
294 vxor $in0,$in0,$key
295 vxor $in1,$in1,$key
296 vperm $outtail,$in0,$in0,$outperm # rotate
297 vsel $stage,$outhead,$outtail,$outmask
298 vmr $outhead,$outtail
299 stvx $stage,0,$out
300 addi $inp,$out,15 # 15 is not typo
301 addi $out,$out,16
302 bdnz Loop192
303
304 li $rounds,12
305 addi $out,$out,0x20
306 b Ldone
307
308.align 4
309L256:
310 lvx $tmp,0,$inp
311 li $cnt,7
312 li $rounds,14
313 vperm $outtail,$in0,$in0,$outperm # rotate
314 vsel $stage,$outhead,$outtail,$outmask
315 vmr $outhead,$outtail
316 stvx $stage,0,$out
317 addi $out,$out,16
318 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
319 mtctr $cnt
320
321Loop256:
322 vperm $key,$in1,$in1,$mask # rotate-n-splat
323 vsldoi $tmp,$zero,$in0,12 # >>32
324 vperm $outtail,$in1,$in1,$outperm # rotate
325 vsel $stage,$outhead,$outtail,$outmask
326 vmr $outhead,$outtail
327 vcipherlast $key,$key,$rcon
328 stvx $stage,0,$out
329 addi $out,$out,16
330
331 vxor $in0,$in0,$tmp
332 vsldoi $tmp,$zero,$tmp,12 # >>32
333 vxor $in0,$in0,$tmp
334 vsldoi $tmp,$zero,$tmp,12 # >>32
335 vxor $in0,$in0,$tmp
336 vadduwm $rcon,$rcon,$rcon
337 vxor $in0,$in0,$key
338 vperm $outtail,$in0,$in0,$outperm # rotate
339 vsel $stage,$outhead,$outtail,$outmask
340 vmr $outhead,$outtail
341 stvx $stage,0,$out
342 addi $inp,$out,15 # 15 is not typo
343 addi $out,$out,16
344 bdz Ldone
345
346 vspltw $key,$in0,3 # just splat
347 vsldoi $tmp,$zero,$in1,12 # >>32
348 vsbox $key,$key
349
350 vxor $in1,$in1,$tmp
351 vsldoi $tmp,$zero,$tmp,12 # >>32
352 vxor $in1,$in1,$tmp
353 vsldoi $tmp,$zero,$tmp,12 # >>32
354 vxor $in1,$in1,$tmp
355
356 vxor $in1,$in1,$key
357 b Loop256
358
359.align 4
360Ldone:
361 lvx $in1,0,$inp # redundant in aligned case
362 vsel $in1,$outhead,$in1,$outmask
363 stvx $in1,0,$inp
364 li $ptr,0
365 mtspr 256,$vrsave
366 stw $rounds,0($out)
367
368Lenc_key_abort:
369 mr r3,$ptr
370 blr
371 .long 0
372 .byte 0,12,0x14,1,0,0,3,0
373 .long 0
374.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
375
376.globl .${prefix}_set_decrypt_key
377.align 5
378.${prefix}_set_decrypt_key:
379 $STU $sp,-$FRAME($sp)
380 mflr r10
381 $PUSH r10,$FRAME+$LRSAVE($sp)
382 bl Lset_encrypt_key
383 mtlr r10
384
385 cmpwi r3,0
386 bne- Ldec_key_abort
387
388 slwi $cnt,$rounds,4
389 subi $inp,$out,240 # first round key
390 srwi $rounds,$rounds,1
391 add $out,$inp,$cnt # last round key
392 mtctr $rounds
393
394Ldeckey:
395 lwz r0, 0($inp)
396 lwz r6, 4($inp)
397 lwz r7, 8($inp)
398 lwz r8, 12($inp)
399 addi $inp,$inp,16
400 lwz r9, 0($out)
401 lwz r10,4($out)
402 lwz r11,8($out)
403 lwz r12,12($out)
404 stw r0, 0($out)
405 stw r6, 4($out)
406 stw r7, 8($out)
407 stw r8, 12($out)
408 subi $out,$out,16
409 stw r9, -16($inp)
410 stw r10,-12($inp)
411 stw r11,-8($inp)
412 stw r12,-4($inp)
413 bdnz Ldeckey
414
415 xor r3,r3,r3 # return value
416Ldec_key_abort:
417 addi $sp,$sp,$FRAME
418 blr
419 .long 0
420 .byte 0,12,4,1,0x80,0,3,0
421 .long 0
422.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
423___
424}}}
425#########################################################################
426{{{ # Single block en- and decrypt procedures #
427sub gen_block () {
428my $dir = shift;
429my $n = $dir eq "de" ? "n" : "";
430my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
431
432$code.=<<___;
433.globl .${prefix}_${dir}crypt
434.align 5
435.${prefix}_${dir}crypt:
436 lwz $rounds,240($key)
437 lis r0,0xfc00
438 mfspr $vrsave,256
439 li $idx,15 # 15 is not typo
440 mtspr 256,r0
441
442 lvx v0,0,$inp
443 neg r11,$out
444 lvx v1,$idx,$inp
445 lvsl v2,0,$inp # inpperm
446 le?vspltisb v4,0x0f
447 ?lvsl v3,0,r11 # outperm
448 le?vxor v2,v2,v4
449 li $idx,16
450 vperm v0,v0,v1,v2 # align [and byte swap in LE]
451 lvx v1,0,$key
452 ?lvsl v5,0,$key # keyperm
453 srwi $rounds,$rounds,1
454 lvx v2,$idx,$key
455 addi $idx,$idx,16
456 subi $rounds,$rounds,1
457 ?vperm v1,v1,v2,v5 # align round key
458
459 vxor v0,v0,v1
460 lvx v1,$idx,$key
461 addi $idx,$idx,16
462 mtctr $rounds
463
464Loop_${dir}c:
465 ?vperm v2,v2,v1,v5
466 v${n}cipher v0,v0,v2
467 lvx v2,$idx,$key
468 addi $idx,$idx,16
469 ?vperm v1,v1,v2,v5
470 v${n}cipher v0,v0,v1
471 lvx v1,$idx,$key
472 addi $idx,$idx,16
473 bdnz Loop_${dir}c
474
475 ?vperm v2,v2,v1,v5
476 v${n}cipher v0,v0,v2
477 lvx v2,$idx,$key
478 ?vperm v1,v1,v2,v5
479 v${n}cipherlast v0,v0,v1
480
481 vspltisb v2,-1
482 vxor v1,v1,v1
483 li $idx,15 # 15 is not typo
484 ?vperm v2,v1,v2,v3 # outmask
485 le?vxor v3,v3,v4
486 lvx v1,0,$out # outhead
487 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
488 vsel v1,v1,v0,v2
489 lvx v4,$idx,$out
490 stvx v1,0,$out
491 vsel v0,v0,v4,v2
492 stvx v0,$idx,$out
493
494 mtspr 256,$vrsave
495 blr
496 .long 0
497 .byte 0,12,0x14,0,0,0,3,0
498 .long 0
499.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
500___
501}
502&gen_block("en");
503&gen_block("de");
504}}}
505#########################################################################
506{{{ # CBC en- and decrypt procedures #
507my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
508my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
509my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
510 map("v$_",(4..10));
511$code.=<<___;
512.globl .${prefix}_cbc_encrypt
513.align 5
514.${prefix}_cbc_encrypt:
515 ${UCMP}i $len,16
516 bltlr-
517
518 cmpwi $enc,0 # test direction
519 lis r0,0xffe0
520 mfspr $vrsave,256
521 mtspr 256,r0
522
523 li $idx,15
524 vxor $rndkey0,$rndkey0,$rndkey0
525 le?vspltisb $tmp,0x0f
526
527 lvx $ivec,0,$ivp # load [unaligned] iv
528 lvsl $inpperm,0,$ivp
529 lvx $inptail,$idx,$ivp
530 le?vxor $inpperm,$inpperm,$tmp
531 vperm $ivec,$ivec,$inptail,$inpperm
532
533 neg r11,$inp
534 ?lvsl $keyperm,0,$key # prepare for unaligned key
535 lwz $rounds,240($key)
536
537 lvsr $inpperm,0,r11 # prepare for unaligned load
538 lvx $inptail,0,$inp
539 addi $inp,$inp,15 # 15 is not typo
540 le?vxor $inpperm,$inpperm,$tmp
541
542 ?lvsr $outperm,0,$out # prepare for unaligned store
543 vspltisb $outmask,-1
544 lvx $outhead,0,$out
545 ?vperm $outmask,$rndkey0,$outmask,$outperm
546 le?vxor $outperm,$outperm,$tmp
547
548 srwi $rounds,$rounds,1
549 li $idx,16
550 subi $rounds,$rounds,1
551 beq Lcbc_dec
552
553Lcbc_enc:
554 vmr $inout,$inptail
555 lvx $inptail,0,$inp
556 addi $inp,$inp,16
557 mtctr $rounds
558 subi $len,$len,16 # len-=16
559
560 lvx $rndkey0,0,$key
561 vperm $inout,$inout,$inptail,$inpperm
562 lvx $rndkey1,$idx,$key
563 addi $idx,$idx,16
564 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
565 vxor $inout,$inout,$rndkey0
566 lvx $rndkey0,$idx,$key
567 addi $idx,$idx,16
568 vxor $inout,$inout,$ivec
569
570Loop_cbc_enc:
571 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
572 vcipher $inout,$inout,$rndkey1
573 lvx $rndkey1,$idx,$key
574 addi $idx,$idx,16
575 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
576 vcipher $inout,$inout,$rndkey0
577 lvx $rndkey0,$idx,$key
578 addi $idx,$idx,16
579 bdnz Loop_cbc_enc
580
581 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
582 vcipher $inout,$inout,$rndkey1
583 lvx $rndkey1,$idx,$key
584 li $idx,16
585 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
586 vcipherlast $ivec,$inout,$rndkey0
587 ${UCMP}i $len,16
588
589 vperm $tmp,$ivec,$ivec,$outperm
590 vsel $inout,$outhead,$tmp,$outmask
591 vmr $outhead,$tmp
592 stvx $inout,0,$out
593 addi $out,$out,16
594 bge Lcbc_enc
595
596 b Lcbc_done
597
598.align 4
599Lcbc_dec:
600 ${UCMP}i $len,128
601 bge _aesp8_cbc_decrypt8x
602 vmr $tmp,$inptail
603 lvx $inptail,0,$inp
604 addi $inp,$inp,16
605 mtctr $rounds
606 subi $len,$len,16 # len-=16
607
608 lvx $rndkey0,0,$key
609 vperm $tmp,$tmp,$inptail,$inpperm
610 lvx $rndkey1,$idx,$key
611 addi $idx,$idx,16
612 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
613 vxor $inout,$tmp,$rndkey0
614 lvx $rndkey0,$idx,$key
615 addi $idx,$idx,16
616
617Loop_cbc_dec:
618 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
619 vncipher $inout,$inout,$rndkey1
620 lvx $rndkey1,$idx,$key
621 addi $idx,$idx,16
622 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
623 vncipher $inout,$inout,$rndkey0
624 lvx $rndkey0,$idx,$key
625 addi $idx,$idx,16
626 bdnz Loop_cbc_dec
627
628 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
629 vncipher $inout,$inout,$rndkey1
630 lvx $rndkey1,$idx,$key
631 li $idx,16
632 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
633 vncipherlast $inout,$inout,$rndkey0
634 ${UCMP}i $len,16
635
636 vxor $inout,$inout,$ivec
637 vmr $ivec,$tmp
638 vperm $tmp,$inout,$inout,$outperm
639 vsel $inout,$outhead,$tmp,$outmask
640 vmr $outhead,$tmp
641 stvx $inout,0,$out
642 addi $out,$out,16
643 bge Lcbc_dec
644
645Lcbc_done:
646 addi $out,$out,-1
647 lvx $inout,0,$out # redundant in aligned case
648 vsel $inout,$outhead,$inout,$outmask
649 stvx $inout,0,$out
650
651 neg $enc,$ivp # write [unaligned] iv
652 li $idx,15 # 15 is not typo
653 vxor $rndkey0,$rndkey0,$rndkey0
654 vspltisb $outmask,-1
655 le?vspltisb $tmp,0x0f
656 ?lvsl $outperm,0,$enc
657 ?vperm $outmask,$rndkey0,$outmask,$outperm
658 le?vxor $outperm,$outperm,$tmp
659 lvx $outhead,0,$ivp
660 vperm $ivec,$ivec,$ivec,$outperm
661 vsel $inout,$outhead,$ivec,$outmask
662 lvx $inptail,$idx,$ivp
663 stvx $inout,0,$ivp
664 vsel $inout,$ivec,$inptail,$outmask
665 stvx $inout,$idx,$ivp
666
667 mtspr 256,$vrsave
668 blr
669 .long 0
670 .byte 0,12,0x14,0,0,0,6,0
671 .long 0
672___
673#########################################################################
674{{ # Optimized CBC decrypt procedure #
675my $key_="r11";
676my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
677 $x00=0 if ($flavour =~ /osx/);
678my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
679my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
680my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
681 # v26-v31 last 6 round keys
682my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
683
684$code.=<<___;
685.align 5
686_aesp8_cbc_decrypt8x:
687 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
688 li r10,`$FRAME+8*16+15`
689 li r11,`$FRAME+8*16+31`
690 stvx v20,r10,$sp # ABI says so
691 addi r10,r10,32
692 stvx v21,r11,$sp
693 addi r11,r11,32
694 stvx v22,r10,$sp
695 addi r10,r10,32
696 stvx v23,r11,$sp
697 addi r11,r11,32
698 stvx v24,r10,$sp
699 addi r10,r10,32
700 stvx v25,r11,$sp
701 addi r11,r11,32
702 stvx v26,r10,$sp
703 addi r10,r10,32
704 stvx v27,r11,$sp
705 addi r11,r11,32
706 stvx v28,r10,$sp
707 addi r10,r10,32
708 stvx v29,r11,$sp
709 addi r11,r11,32
710 stvx v30,r10,$sp
711 stvx v31,r11,$sp
712 li r0,-1
713 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
714 li $x10,0x10
715 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
716 li $x20,0x20
717 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
718 li $x30,0x30
719 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
720 li $x40,0x40
721 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
722 li $x50,0x50
723 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
724 li $x60,0x60
725 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
726 li $x70,0x70
727 mtspr 256,r0
728
729 subi $rounds,$rounds,3 # -4 in total
730 subi $len,$len,128 # bias
731
732 lvx $rndkey0,$x00,$key # load key schedule
733 lvx v30,$x10,$key
734 addi $key,$key,0x20
735 lvx v31,$x00,$key
736 ?vperm $rndkey0,$rndkey0,v30,$keyperm
737 addi $key_,$sp,$FRAME+15
738 mtctr $rounds
739
740Load_cbc_dec_key:
741 ?vperm v24,v30,v31,$keyperm
742 lvx v30,$x10,$key
743 addi $key,$key,0x20
744 stvx v24,$x00,$key_ # off-load round[1]
745 ?vperm v25,v31,v30,$keyperm
746 lvx v31,$x00,$key
747 stvx v25,$x10,$key_ # off-load round[2]
748 addi $key_,$key_,0x20
749 bdnz Load_cbc_dec_key
750
751 lvx v26,$x10,$key
752 ?vperm v24,v30,v31,$keyperm
753 lvx v27,$x20,$key
754 stvx v24,$x00,$key_ # off-load round[3]
755 ?vperm v25,v31,v26,$keyperm
756 lvx v28,$x30,$key
757 stvx v25,$x10,$key_ # off-load round[4]
758 addi $key_,$sp,$FRAME+15 # rewind $key_
759 ?vperm v26,v26,v27,$keyperm
760 lvx v29,$x40,$key
761 ?vperm v27,v27,v28,$keyperm
762 lvx v30,$x50,$key
763 ?vperm v28,v28,v29,$keyperm
764 lvx v31,$x60,$key
765 ?vperm v29,v29,v30,$keyperm
766 lvx $out0,$x70,$key # borrow $out0
767 ?vperm v30,v30,v31,$keyperm
768 lvx v24,$x00,$key_ # pre-load round[1]
769 ?vperm v31,v31,$out0,$keyperm
770 lvx v25,$x10,$key_ # pre-load round[2]
771
772 #lvx $inptail,0,$inp # "caller" already did this
773 #addi $inp,$inp,15 # 15 is not typo
774 subi $inp,$inp,15 # undo "caller"
775
776 le?li $idx,8
777 lvx_u $in0,$x00,$inp # load first 8 "words"
778 le?lvsl $inpperm,0,$idx
779 le?vspltisb $tmp,0x0f
780 lvx_u $in1,$x10,$inp
781 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
782 lvx_u $in2,$x20,$inp
783 le?vperm $in0,$in0,$in0,$inpperm
784 lvx_u $in3,$x30,$inp
785 le?vperm $in1,$in1,$in1,$inpperm
786 lvx_u $in4,$x40,$inp
787 le?vperm $in2,$in2,$in2,$inpperm
788 vxor $out0,$in0,$rndkey0
789 lvx_u $in5,$x50,$inp
790 le?vperm $in3,$in3,$in3,$inpperm
791 vxor $out1,$in1,$rndkey0
792 lvx_u $in6,$x60,$inp
793 le?vperm $in4,$in4,$in4,$inpperm
794 vxor $out2,$in2,$rndkey0
795 lvx_u $in7,$x70,$inp
796 addi $inp,$inp,0x80
797 le?vperm $in5,$in5,$in5,$inpperm
798 vxor $out3,$in3,$rndkey0
799 le?vperm $in6,$in6,$in6,$inpperm
800 vxor $out4,$in4,$rndkey0
801 le?vperm $in7,$in7,$in7,$inpperm
802 vxor $out5,$in5,$rndkey0
803 vxor $out6,$in6,$rndkey0
804 vxor $out7,$in7,$rndkey0
805
806 mtctr $rounds
807 b Loop_cbc_dec8x
808.align 5
809Loop_cbc_dec8x:
810 vncipher $out0,$out0,v24
811 vncipher $out1,$out1,v24
812 vncipher $out2,$out2,v24
813 vncipher $out3,$out3,v24
814 vncipher $out4,$out4,v24
815 vncipher $out5,$out5,v24
816 vncipher $out6,$out6,v24
817 vncipher $out7,$out7,v24
818 lvx v24,$x20,$key_ # round[3]
819 addi $key_,$key_,0x20
820
821 vncipher $out0,$out0,v25
822 vncipher $out1,$out1,v25
823 vncipher $out2,$out2,v25
824 vncipher $out3,$out3,v25
825 vncipher $out4,$out4,v25
826 vncipher $out5,$out5,v25
827 vncipher $out6,$out6,v25
828 vncipher $out7,$out7,v25
829 lvx v25,$x10,$key_ # round[4]
830 bdnz Loop_cbc_dec8x
831
832 subic $len,$len,128 # $len-=128
833 vncipher $out0,$out0,v24
834 vncipher $out1,$out1,v24
835 vncipher $out2,$out2,v24
836 vncipher $out3,$out3,v24
837 vncipher $out4,$out4,v24
838 vncipher $out5,$out5,v24
839 vncipher $out6,$out6,v24
840 vncipher $out7,$out7,v24
841
842 subfe. r0,r0,r0 # borrow?-1:0
843 vncipher $out0,$out0,v25
844 vncipher $out1,$out1,v25
845 vncipher $out2,$out2,v25
846 vncipher $out3,$out3,v25
847 vncipher $out4,$out4,v25
848 vncipher $out5,$out5,v25
849 vncipher $out6,$out6,v25
850 vncipher $out7,$out7,v25
851
852 and r0,r0,$len
853 vncipher $out0,$out0,v26
854 vncipher $out1,$out1,v26
855 vncipher $out2,$out2,v26
856 vncipher $out3,$out3,v26
857 vncipher $out4,$out4,v26
858 vncipher $out5,$out5,v26
859 vncipher $out6,$out6,v26
860 vncipher $out7,$out7,v26
861
862 add $inp,$inp,r0 # $inp is adjusted in such
863 # way that at exit from the
864 # loop inX-in7 are loaded
865 # with last "words"
866 vncipher $out0,$out0,v27
867 vncipher $out1,$out1,v27
868 vncipher $out2,$out2,v27
869 vncipher $out3,$out3,v27
870 vncipher $out4,$out4,v27
871 vncipher $out5,$out5,v27
872 vncipher $out6,$out6,v27
873 vncipher $out7,$out7,v27
874
875 addi $key_,$sp,$FRAME+15 # rewind $key_
876 vncipher $out0,$out0,v28
877 vncipher $out1,$out1,v28
878 vncipher $out2,$out2,v28
879 vncipher $out3,$out3,v28
880 vncipher $out4,$out4,v28
881 vncipher $out5,$out5,v28
882 vncipher $out6,$out6,v28
883 vncipher $out7,$out7,v28
884 lvx v24,$x00,$key_ # re-pre-load round[1]
885
886 vncipher $out0,$out0,v29
887 vncipher $out1,$out1,v29
888 vncipher $out2,$out2,v29
889 vncipher $out3,$out3,v29
890 vncipher $out4,$out4,v29
891 vncipher $out5,$out5,v29
892 vncipher $out6,$out6,v29
893 vncipher $out7,$out7,v29
894 lvx v25,$x10,$key_ # re-pre-load round[2]
895
896 vncipher $out0,$out0,v30
897 vxor $ivec,$ivec,v31 # xor with last round key
898 vncipher $out1,$out1,v30
899 vxor $in0,$in0,v31
900 vncipher $out2,$out2,v30
901 vxor $in1,$in1,v31
902 vncipher $out3,$out3,v30
903 vxor $in2,$in2,v31
904 vncipher $out4,$out4,v30
905 vxor $in3,$in3,v31
906 vncipher $out5,$out5,v30
907 vxor $in4,$in4,v31
908 vncipher $out6,$out6,v30
909 vxor $in5,$in5,v31
910 vncipher $out7,$out7,v30
911 vxor $in6,$in6,v31
912
913 vncipherlast $out0,$out0,$ivec
914 vncipherlast $out1,$out1,$in0
915 lvx_u $in0,$x00,$inp # load next input block
916 vncipherlast $out2,$out2,$in1
917 lvx_u $in1,$x10,$inp
918 vncipherlast $out3,$out3,$in2
919 le?vperm $in0,$in0,$in0,$inpperm
920 lvx_u $in2,$x20,$inp
921 vncipherlast $out4,$out4,$in3
922 le?vperm $in1,$in1,$in1,$inpperm
923 lvx_u $in3,$x30,$inp
924 vncipherlast $out5,$out5,$in4
925 le?vperm $in2,$in2,$in2,$inpperm
926 lvx_u $in4,$x40,$inp
927 vncipherlast $out6,$out6,$in5
928 le?vperm $in3,$in3,$in3,$inpperm
929 lvx_u $in5,$x50,$inp
930 vncipherlast $out7,$out7,$in6
931 le?vperm $in4,$in4,$in4,$inpperm
932 lvx_u $in6,$x60,$inp
933 vmr $ivec,$in7
934 le?vperm $in5,$in5,$in5,$inpperm
935 lvx_u $in7,$x70,$inp
936 addi $inp,$inp,0x80
937
938 le?vperm $out0,$out0,$out0,$inpperm
939 le?vperm $out1,$out1,$out1,$inpperm
940 stvx_u $out0,$x00,$out
941 le?vperm $in6,$in6,$in6,$inpperm
942 vxor $out0,$in0,$rndkey0
943 le?vperm $out2,$out2,$out2,$inpperm
944 stvx_u $out1,$x10,$out
945 le?vperm $in7,$in7,$in7,$inpperm
946 vxor $out1,$in1,$rndkey0
947 le?vperm $out3,$out3,$out3,$inpperm
948 stvx_u $out2,$x20,$out
949 vxor $out2,$in2,$rndkey0
950 le?vperm $out4,$out4,$out4,$inpperm
951 stvx_u $out3,$x30,$out
952 vxor $out3,$in3,$rndkey0
953 le?vperm $out5,$out5,$out5,$inpperm
954 stvx_u $out4,$x40,$out
955 vxor $out4,$in4,$rndkey0
956 le?vperm $out6,$out6,$out6,$inpperm
957 stvx_u $out5,$x50,$out
958 vxor $out5,$in5,$rndkey0
959 le?vperm $out7,$out7,$out7,$inpperm
960 stvx_u $out6,$x60,$out
961 vxor $out6,$in6,$rndkey0
962 stvx_u $out7,$x70,$out
963 addi $out,$out,0x80
964 vxor $out7,$in7,$rndkey0
965
966 mtctr $rounds
967 beq Loop_cbc_dec8x # did $len-=128 borrow?
968
969 addic. $len,$len,128
970 beq Lcbc_dec8x_done
971 nop
972 nop
973
974Loop_cbc_dec8x_tail: # up to 7 "words" tail...
975 vncipher $out1,$out1,v24
976 vncipher $out2,$out2,v24
977 vncipher $out3,$out3,v24
978 vncipher $out4,$out4,v24
979 vncipher $out5,$out5,v24
980 vncipher $out6,$out6,v24
981 vncipher $out7,$out7,v24
982 lvx v24,$x20,$key_ # round[3]
983 addi $key_,$key_,0x20
984
985 vncipher $out1,$out1,v25
986 vncipher $out2,$out2,v25
987 vncipher $out3,$out3,v25
988 vncipher $out4,$out4,v25
989 vncipher $out5,$out5,v25
990 vncipher $out6,$out6,v25
991 vncipher $out7,$out7,v25
992 lvx v25,$x10,$key_ # round[4]
993 bdnz Loop_cbc_dec8x_tail
994
995 vncipher $out1,$out1,v24
996 vncipher $out2,$out2,v24
997 vncipher $out3,$out3,v24
998 vncipher $out4,$out4,v24
999 vncipher $out5,$out5,v24
1000 vncipher $out6,$out6,v24
1001 vncipher $out7,$out7,v24
1002
1003 vncipher $out1,$out1,v25
1004 vncipher $out2,$out2,v25
1005 vncipher $out3,$out3,v25
1006 vncipher $out4,$out4,v25
1007 vncipher $out5,$out5,v25
1008 vncipher $out6,$out6,v25
1009 vncipher $out7,$out7,v25
1010
1011 vncipher $out1,$out1,v26
1012 vncipher $out2,$out2,v26
1013 vncipher $out3,$out3,v26
1014 vncipher $out4,$out4,v26
1015 vncipher $out5,$out5,v26
1016 vncipher $out6,$out6,v26
1017 vncipher $out7,$out7,v26
1018
1019 vncipher $out1,$out1,v27
1020 vncipher $out2,$out2,v27
1021 vncipher $out3,$out3,v27
1022 vncipher $out4,$out4,v27
1023 vncipher $out5,$out5,v27
1024 vncipher $out6,$out6,v27
1025 vncipher $out7,$out7,v27
1026
1027 vncipher $out1,$out1,v28
1028 vncipher $out2,$out2,v28
1029 vncipher $out3,$out3,v28
1030 vncipher $out4,$out4,v28
1031 vncipher $out5,$out5,v28
1032 vncipher $out6,$out6,v28
1033 vncipher $out7,$out7,v28
1034
1035 vncipher $out1,$out1,v29
1036 vncipher $out2,$out2,v29
1037 vncipher $out3,$out3,v29
1038 vncipher $out4,$out4,v29
1039 vncipher $out5,$out5,v29
1040 vncipher $out6,$out6,v29
1041 vncipher $out7,$out7,v29
1042
1043 vncipher $out1,$out1,v30
1044 vxor $ivec,$ivec,v31 # last round key
1045 vncipher $out2,$out2,v30
1046 vxor $in1,$in1,v31
1047 vncipher $out3,$out3,v30
1048 vxor $in2,$in2,v31
1049 vncipher $out4,$out4,v30
1050 vxor $in3,$in3,v31
1051 vncipher $out5,$out5,v30
1052 vxor $in4,$in4,v31
1053 vncipher $out6,$out6,v30
1054 vxor $in5,$in5,v31
1055 vncipher $out7,$out7,v30
1056 vxor $in6,$in6,v31
1057
1058 cmplwi $len,32 # switch($len)
1059 blt Lcbc_dec8x_one
1060 nop
1061 beq Lcbc_dec8x_two
1062 cmplwi $len,64
1063 blt Lcbc_dec8x_three
1064 nop
1065 beq Lcbc_dec8x_four
1066 cmplwi $len,96
1067 blt Lcbc_dec8x_five
1068 nop
1069 beq Lcbc_dec8x_six
1070
1071Lcbc_dec8x_seven:
1072 vncipherlast $out1,$out1,$ivec
1073 vncipherlast $out2,$out2,$in1
1074 vncipherlast $out3,$out3,$in2
1075 vncipherlast $out4,$out4,$in3
1076 vncipherlast $out5,$out5,$in4
1077 vncipherlast $out6,$out6,$in5
1078 vncipherlast $out7,$out7,$in6
1079 vmr $ivec,$in7
1080
1081 le?vperm $out1,$out1,$out1,$inpperm
1082 le?vperm $out2,$out2,$out2,$inpperm
1083 stvx_u $out1,$x00,$out
1084 le?vperm $out3,$out3,$out3,$inpperm
1085 stvx_u $out2,$x10,$out
1086 le?vperm $out4,$out4,$out4,$inpperm
1087 stvx_u $out3,$x20,$out
1088 le?vperm $out5,$out5,$out5,$inpperm
1089 stvx_u $out4,$x30,$out
1090 le?vperm $out6,$out6,$out6,$inpperm
1091 stvx_u $out5,$x40,$out
1092 le?vperm $out7,$out7,$out7,$inpperm
1093 stvx_u $out6,$x50,$out
1094 stvx_u $out7,$x60,$out
1095 addi $out,$out,0x70
1096 b Lcbc_dec8x_done
1097
1098.align 5
1099Lcbc_dec8x_six:
1100 vncipherlast $out2,$out2,$ivec
1101 vncipherlast $out3,$out3,$in2
1102 vncipherlast $out4,$out4,$in3
1103 vncipherlast $out5,$out5,$in4
1104 vncipherlast $out6,$out6,$in5
1105 vncipherlast $out7,$out7,$in6
1106 vmr $ivec,$in7
1107
1108 le?vperm $out2,$out2,$out2,$inpperm
1109 le?vperm $out3,$out3,$out3,$inpperm
1110 stvx_u $out2,$x00,$out
1111 le?vperm $out4,$out4,$out4,$inpperm
1112 stvx_u $out3,$x10,$out
1113 le?vperm $out5,$out5,$out5,$inpperm
1114 stvx_u $out4,$x20,$out
1115 le?vperm $out6,$out6,$out6,$inpperm
1116 stvx_u $out5,$x30,$out
1117 le?vperm $out7,$out7,$out7,$inpperm
1118 stvx_u $out6,$x40,$out
1119 stvx_u $out7,$x50,$out
1120 addi $out,$out,0x60
1121 b Lcbc_dec8x_done
1122
1123.align 5
1124Lcbc_dec8x_five:
1125 vncipherlast $out3,$out3,$ivec
1126 vncipherlast $out4,$out4,$in3
1127 vncipherlast $out5,$out5,$in4
1128 vncipherlast $out6,$out6,$in5
1129 vncipherlast $out7,$out7,$in6
1130 vmr $ivec,$in7
1131
1132 le?vperm $out3,$out3,$out3,$inpperm
1133 le?vperm $out4,$out4,$out4,$inpperm
1134 stvx_u $out3,$x00,$out
1135 le?vperm $out5,$out5,$out5,$inpperm
1136 stvx_u $out4,$x10,$out
1137 le?vperm $out6,$out6,$out6,$inpperm
1138 stvx_u $out5,$x20,$out
1139 le?vperm $out7,$out7,$out7,$inpperm
1140 stvx_u $out6,$x30,$out
1141 stvx_u $out7,$x40,$out
1142 addi $out,$out,0x50
1143 b Lcbc_dec8x_done
1144
1145.align 5
1146Lcbc_dec8x_four:
1147 vncipherlast $out4,$out4,$ivec
1148 vncipherlast $out5,$out5,$in4
1149 vncipherlast $out6,$out6,$in5
1150 vncipherlast $out7,$out7,$in6
1151 vmr $ivec,$in7
1152
1153 le?vperm $out4,$out4,$out4,$inpperm
1154 le?vperm $out5,$out5,$out5,$inpperm
1155 stvx_u $out4,$x00,$out
1156 le?vperm $out6,$out6,$out6,$inpperm
1157 stvx_u $out5,$x10,$out
1158 le?vperm $out7,$out7,$out7,$inpperm
1159 stvx_u $out6,$x20,$out
1160 stvx_u $out7,$x30,$out
1161 addi $out,$out,0x40
1162 b Lcbc_dec8x_done
1163
1164.align 5
1165Lcbc_dec8x_three:
1166 vncipherlast $out5,$out5,$ivec
1167 vncipherlast $out6,$out6,$in5
1168 vncipherlast $out7,$out7,$in6
1169 vmr $ivec,$in7
1170
1171 le?vperm $out5,$out5,$out5,$inpperm
1172 le?vperm $out6,$out6,$out6,$inpperm
1173 stvx_u $out5,$x00,$out
1174 le?vperm $out7,$out7,$out7,$inpperm
1175 stvx_u $out6,$x10,$out
1176 stvx_u $out7,$x20,$out
1177 addi $out,$out,0x30
1178 b Lcbc_dec8x_done
1179
1180.align 5
1181Lcbc_dec8x_two:
1182 vncipherlast $out6,$out6,$ivec
1183 vncipherlast $out7,$out7,$in6
1184 vmr $ivec,$in7
1185
1186 le?vperm $out6,$out6,$out6,$inpperm
1187 le?vperm $out7,$out7,$out7,$inpperm
1188 stvx_u $out6,$x00,$out
1189 stvx_u $out7,$x10,$out
1190 addi $out,$out,0x20
1191 b Lcbc_dec8x_done
1192
1193.align 5
1194Lcbc_dec8x_one:
1195 vncipherlast $out7,$out7,$ivec
1196 vmr $ivec,$in7
1197
1198 le?vperm $out7,$out7,$out7,$inpperm
1199 stvx_u $out7,0,$out
1200 addi $out,$out,0x10
1201
1202Lcbc_dec8x_done:
1203 le?vperm $ivec,$ivec,$ivec,$inpperm
1204 stvx_u $ivec,0,$ivp # write [unaligned] iv
1205
1206 li r10,`$FRAME+15`
1207 li r11,`$FRAME+31`
1208 stvx $inpperm,r10,$sp # wipe copies of round keys
1209 addi r10,r10,32
1210 stvx $inpperm,r11,$sp
1211 addi r11,r11,32
1212 stvx $inpperm,r10,$sp
1213 addi r10,r10,32
1214 stvx $inpperm,r11,$sp
1215 addi r11,r11,32
1216 stvx $inpperm,r10,$sp
1217 addi r10,r10,32
1218 stvx $inpperm,r11,$sp
1219 addi r11,r11,32
1220 stvx $inpperm,r10,$sp
1221 addi r10,r10,32
1222 stvx $inpperm,r11,$sp
1223 addi r11,r11,32
1224
1225 mtspr 256,$vrsave
1226 lvx v20,r10,$sp # ABI says so
1227 addi r10,r10,32
1228 lvx v21,r11,$sp
1229 addi r11,r11,32
1230 lvx v22,r10,$sp
1231 addi r10,r10,32
1232 lvx v23,r11,$sp
1233 addi r11,r11,32
1234 lvx v24,r10,$sp
1235 addi r10,r10,32
1236 lvx v25,r11,$sp
1237 addi r11,r11,32
1238 lvx v26,r10,$sp
1239 addi r10,r10,32
1240 lvx v27,r11,$sp
1241 addi r11,r11,32
1242 lvx v28,r10,$sp
1243 addi r10,r10,32
1244 lvx v29,r11,$sp
1245 addi r11,r11,32
1246 lvx v30,r10,$sp
1247 lvx v31,r11,$sp
1248 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1249 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1250 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1251 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1252 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1253 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1254 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1255 blr
1256 .long 0
1257 .byte 0,12,0x04,0,0x80,6,6,0
1258 .long 0
1259.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1260___
1261}} }}}
1262
1263#########################################################################
1264{{{ # CTR procedure[s] #
1265my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1266my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1267my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1268 map("v$_",(4..11));
1269my $dat=$tmp;
1270
1271$code.=<<___;
1272.globl .${prefix}_ctr32_encrypt_blocks
1273.align 5
1274.${prefix}_ctr32_encrypt_blocks:
1275 ${UCMP}i $len,1
1276 bltlr-
1277
1278 lis r0,0xfff0
1279 mfspr $vrsave,256
1280 mtspr 256,r0
1281
1282 li $idx,15
1283 vxor $rndkey0,$rndkey0,$rndkey0
1284 le?vspltisb $tmp,0x0f
1285
1286 lvx $ivec,0,$ivp # load [unaligned] iv
1287 lvsl $inpperm,0,$ivp
1288 lvx $inptail,$idx,$ivp
1289 vspltisb $one,1
1290 le?vxor $inpperm,$inpperm,$tmp
1291 vperm $ivec,$ivec,$inptail,$inpperm
1292 vsldoi $one,$rndkey0,$one,1
1293
1294 neg r11,$inp
1295 ?lvsl $keyperm,0,$key # prepare for unaligned key
1296 lwz $rounds,240($key)
1297
1298 lvsr $inpperm,0,r11 # prepare for unaligned load
1299 lvx $inptail,0,$inp
1300 addi $inp,$inp,15 # 15 is not typo
1301 le?vxor $inpperm,$inpperm,$tmp
1302
1303 srwi $rounds,$rounds,1
1304 li $idx,16
1305 subi $rounds,$rounds,1
1306
1307 ${UCMP}i $len,8
1308 bge _aesp8_ctr32_encrypt8x
1309
1310 ?lvsr $outperm,0,$out # prepare for unaligned store
1311 vspltisb $outmask,-1
1312 lvx $outhead,0,$out
1313 ?vperm $outmask,$rndkey0,$outmask,$outperm
1314 le?vxor $outperm,$outperm,$tmp
1315
1316 lvx $rndkey0,0,$key
1317 mtctr $rounds
1318 lvx $rndkey1,$idx,$key
1319 addi $idx,$idx,16
1320 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1321 vxor $inout,$ivec,$rndkey0
1322 lvx $rndkey0,$idx,$key
1323 addi $idx,$idx,16
1324 b Loop_ctr32_enc
1325
1326.align 5
1327Loop_ctr32_enc:
1328 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1329 vcipher $inout,$inout,$rndkey1
1330 lvx $rndkey1,$idx,$key
1331 addi $idx,$idx,16
1332 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1333 vcipher $inout,$inout,$rndkey0
1334 lvx $rndkey0,$idx,$key
1335 addi $idx,$idx,16
1336 bdnz Loop_ctr32_enc
1337
1338 vadduwm $ivec,$ivec,$one
1339 vmr $dat,$inptail
1340 lvx $inptail,0,$inp
1341 addi $inp,$inp,16
1342 subic. $len,$len,1 # blocks--
1343
1344 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1345 vcipher $inout,$inout,$rndkey1
1346 lvx $rndkey1,$idx,$key
1347 vperm $dat,$dat,$inptail,$inpperm
1348 li $idx,16
1349 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1350 lvx $rndkey0,0,$key
1351 vxor $dat,$dat,$rndkey1 # last round key
1352 vcipherlast $inout,$inout,$dat
1353
1354 lvx $rndkey1,$idx,$key
1355 addi $idx,$idx,16
1356 vperm $inout,$inout,$inout,$outperm
1357 vsel $dat,$outhead,$inout,$outmask
1358 mtctr $rounds
1359 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1360 vmr $outhead,$inout
1361 vxor $inout,$ivec,$rndkey0
1362 lvx $rndkey0,$idx,$key
1363 addi $idx,$idx,16
1364 stvx $dat,0,$out
1365 addi $out,$out,16
1366 bne Loop_ctr32_enc
1367
1368 addi $out,$out,-1
1369 lvx $inout,0,$out # redundant in aligned case
1370 vsel $inout,$outhead,$inout,$outmask
1371 stvx $inout,0,$out
1372
1373 mtspr 256,$vrsave
1374 blr
1375 .long 0
1376 .byte 0,12,0x14,0,0,0,6,0
1377 .long 0
1378___
1379#########################################################################
1380{{ # Optimized CTR procedure #
1381my $key_="r11";
1382my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1383 $x00=0 if ($flavour =~ /osx/);
1384my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1385my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1386my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1387 # v26-v31 last 6 round keys
1388my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1389my ($two,$three,$four)=($outhead,$outperm,$outmask);
1390
1391$code.=<<___;
1392.align 5
1393_aesp8_ctr32_encrypt8x:
1394 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1395 li r10,`$FRAME+8*16+15`
1396 li r11,`$FRAME+8*16+31`
1397 stvx v20,r10,$sp # ABI says so
1398 addi r10,r10,32
1399 stvx v21,r11,$sp
1400 addi r11,r11,32
1401 stvx v22,r10,$sp
1402 addi r10,r10,32
1403 stvx v23,r11,$sp
1404 addi r11,r11,32
1405 stvx v24,r10,$sp
1406 addi r10,r10,32
1407 stvx v25,r11,$sp
1408 addi r11,r11,32
1409 stvx v26,r10,$sp
1410 addi r10,r10,32
1411 stvx v27,r11,$sp
1412 addi r11,r11,32
1413 stvx v28,r10,$sp
1414 addi r10,r10,32
1415 stvx v29,r11,$sp
1416 addi r11,r11,32
1417 stvx v30,r10,$sp
1418 stvx v31,r11,$sp
1419 li r0,-1
1420 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1421 li $x10,0x10
1422 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1423 li $x20,0x20
1424 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1425 li $x30,0x30
1426 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1427 li $x40,0x40
1428 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1429 li $x50,0x50
1430 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1431 li $x60,0x60
1432 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1433 li $x70,0x70
1434 mtspr 256,r0
1435
1436 subi $rounds,$rounds,3 # -4 in total
1437
1438 lvx $rndkey0,$x00,$key # load key schedule
1439 lvx v30,$x10,$key
1440 addi $key,$key,0x20
1441 lvx v31,$x00,$key
1442 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1443 addi $key_,$sp,$FRAME+15
1444 mtctr $rounds
1445
1446Load_ctr32_enc_key:
1447 ?vperm v24,v30,v31,$keyperm
1448 lvx v30,$x10,$key
1449 addi $key,$key,0x20
1450 stvx v24,$x00,$key_ # off-load round[1]
1451 ?vperm v25,v31,v30,$keyperm
1452 lvx v31,$x00,$key
1453 stvx v25,$x10,$key_ # off-load round[2]
1454 addi $key_,$key_,0x20
1455 bdnz Load_ctr32_enc_key
1456
1457 lvx v26,$x10,$key
1458 ?vperm v24,v30,v31,$keyperm
1459 lvx v27,$x20,$key
1460 stvx v24,$x00,$key_ # off-load round[3]
1461 ?vperm v25,v31,v26,$keyperm
1462 lvx v28,$x30,$key
1463 stvx v25,$x10,$key_ # off-load round[4]
1464 addi $key_,$sp,$FRAME+15 # rewind $key_
1465 ?vperm v26,v26,v27,$keyperm
1466 lvx v29,$x40,$key
1467 ?vperm v27,v27,v28,$keyperm
1468 lvx v30,$x50,$key
1469 ?vperm v28,v28,v29,$keyperm
1470 lvx v31,$x60,$key
1471 ?vperm v29,v29,v30,$keyperm
1472 lvx $out0,$x70,$key # borrow $out0
1473 ?vperm v30,v30,v31,$keyperm
1474 lvx v24,$x00,$key_ # pre-load round[1]
1475 ?vperm v31,v31,$out0,$keyperm
1476 lvx v25,$x10,$key_ # pre-load round[2]
1477
1478 vadduwm $two,$one,$one
1479 subi $inp,$inp,15 # undo "caller"
1480 $SHL $len,$len,4
1481
1482 vadduwm $out1,$ivec,$one # counter values ...
1483 vadduwm $out2,$ivec,$two
1484 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1485 le?li $idx,8
1486 vadduwm $out3,$out1,$two
1487 vxor $out1,$out1,$rndkey0
1488 le?lvsl $inpperm,0,$idx
1489 vadduwm $out4,$out2,$two
1490 vxor $out2,$out2,$rndkey0
1491 le?vspltisb $tmp,0x0f
1492 vadduwm $out5,$out3,$two
1493 vxor $out3,$out3,$rndkey0
1494 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1495 vadduwm $out6,$out4,$two
1496 vxor $out4,$out4,$rndkey0
1497 vadduwm $out7,$out5,$two
1498 vxor $out5,$out5,$rndkey0
1499 vadduwm $ivec,$out6,$two # next counter value
1500 vxor $out6,$out6,$rndkey0
1501 vxor $out7,$out7,$rndkey0
1502
1503 mtctr $rounds
1504 b Loop_ctr32_enc8x
1505.align 5
1506Loop_ctr32_enc8x:
1507 vcipher $out0,$out0,v24
1508 vcipher $out1,$out1,v24
1509 vcipher $out2,$out2,v24
1510 vcipher $out3,$out3,v24
1511 vcipher $out4,$out4,v24
1512 vcipher $out5,$out5,v24
1513 vcipher $out6,$out6,v24
1514 vcipher $out7,$out7,v24
1515Loop_ctr32_enc8x_middle:
1516 lvx v24,$x20,$key_ # round[3]
1517 addi $key_,$key_,0x20
1518
1519 vcipher $out0,$out0,v25
1520 vcipher $out1,$out1,v25
1521 vcipher $out2,$out2,v25
1522 vcipher $out3,$out3,v25
1523 vcipher $out4,$out4,v25
1524 vcipher $out5,$out5,v25
1525 vcipher $out6,$out6,v25
1526 vcipher $out7,$out7,v25
1527 lvx v25,$x10,$key_ # round[4]
1528 bdnz Loop_ctr32_enc8x
1529
1530 subic r11,$len,256 # $len-256, borrow $key_
1531 vcipher $out0,$out0,v24
1532 vcipher $out1,$out1,v24
1533 vcipher $out2,$out2,v24
1534 vcipher $out3,$out3,v24
1535 vcipher $out4,$out4,v24
1536 vcipher $out5,$out5,v24
1537 vcipher $out6,$out6,v24
1538 vcipher $out7,$out7,v24
1539
1540 subfe r0,r0,r0 # borrow?-1:0
1541 vcipher $out0,$out0,v25
1542 vcipher $out1,$out1,v25
1543 vcipher $out2,$out2,v25
1544 vcipher $out3,$out3,v25
1545 vcipher $out4,$out4,v25
1546 vcipher $out5,$out5,v25
1547 vcipher $out6,$out6,v25
1548 vcipher $out7,$out7,v25
1549
1550 and r0,r0,r11
1551 addi $key_,$sp,$FRAME+15 # rewind $key_
1552 vcipher $out0,$out0,v26
1553 vcipher $out1,$out1,v26
1554 vcipher $out2,$out2,v26
1555 vcipher $out3,$out3,v26
1556 vcipher $out4,$out4,v26
1557 vcipher $out5,$out5,v26
1558 vcipher $out6,$out6,v26
1559 vcipher $out7,$out7,v26
1560 lvx v24,$x00,$key_ # re-pre-load round[1]
1561
1562 subic $len,$len,129 # $len-=129
1563 vcipher $out0,$out0,v27
1564 addi $len,$len,1 # $len-=128 really
1565 vcipher $out1,$out1,v27
1566 vcipher $out2,$out2,v27
1567 vcipher $out3,$out3,v27
1568 vcipher $out4,$out4,v27
1569 vcipher $out5,$out5,v27
1570 vcipher $out6,$out6,v27
1571 vcipher $out7,$out7,v27
1572 lvx v25,$x10,$key_ # re-pre-load round[2]
1573
1574 vcipher $out0,$out0,v28
1575 lvx_u $in0,$x00,$inp # load input
1576 vcipher $out1,$out1,v28
1577 lvx_u $in1,$x10,$inp
1578 vcipher $out2,$out2,v28
1579 lvx_u $in2,$x20,$inp
1580 vcipher $out3,$out3,v28
1581 lvx_u $in3,$x30,$inp
1582 vcipher $out4,$out4,v28
1583 lvx_u $in4,$x40,$inp
1584 vcipher $out5,$out5,v28
1585 lvx_u $in5,$x50,$inp
1586 vcipher $out6,$out6,v28
1587 lvx_u $in6,$x60,$inp
1588 vcipher $out7,$out7,v28
1589 lvx_u $in7,$x70,$inp
1590 addi $inp,$inp,0x80
1591
1592 vcipher $out0,$out0,v29
1593 le?vperm $in0,$in0,$in0,$inpperm
1594 vcipher $out1,$out1,v29
1595 le?vperm $in1,$in1,$in1,$inpperm
1596 vcipher $out2,$out2,v29
1597 le?vperm $in2,$in2,$in2,$inpperm
1598 vcipher $out3,$out3,v29
1599 le?vperm $in3,$in3,$in3,$inpperm
1600 vcipher $out4,$out4,v29
1601 le?vperm $in4,$in4,$in4,$inpperm
1602 vcipher $out5,$out5,v29
1603 le?vperm $in5,$in5,$in5,$inpperm
1604 vcipher $out6,$out6,v29
1605 le?vperm $in6,$in6,$in6,$inpperm
1606 vcipher $out7,$out7,v29
1607 le?vperm $in7,$in7,$in7,$inpperm
1608
1609 add $inp,$inp,r0 # $inp is adjusted in such
1610 # way that at exit from the
1611 # loop inX-in7 are loaded
1612 # with last "words"
1613 subfe. r0,r0,r0 # borrow?-1:0
1614 vcipher $out0,$out0,v30
1615 vxor $in0,$in0,v31 # xor with last round key
1616 vcipher $out1,$out1,v30
1617 vxor $in1,$in1,v31
1618 vcipher $out2,$out2,v30
1619 vxor $in2,$in2,v31
1620 vcipher $out3,$out3,v30
1621 vxor $in3,$in3,v31
1622 vcipher $out4,$out4,v30
1623 vxor $in4,$in4,v31
1624 vcipher $out5,$out5,v30
1625 vxor $in5,$in5,v31
1626 vcipher $out6,$out6,v30
1627 vxor $in6,$in6,v31
1628 vcipher $out7,$out7,v30
1629 vxor $in7,$in7,v31
1630
1631 bne Lctr32_enc8x_break # did $len-129 borrow?
1632
1633 vcipherlast $in0,$out0,$in0
1634 vcipherlast $in1,$out1,$in1
1635 vadduwm $out1,$ivec,$one # counter values ...
1636 vcipherlast $in2,$out2,$in2
1637 vadduwm $out2,$ivec,$two
1638 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1639 vcipherlast $in3,$out3,$in3
1640 vadduwm $out3,$out1,$two
1641 vxor $out1,$out1,$rndkey0
1642 vcipherlast $in4,$out4,$in4
1643 vadduwm $out4,$out2,$two
1644 vxor $out2,$out2,$rndkey0
1645 vcipherlast $in5,$out5,$in5
1646 vadduwm $out5,$out3,$two
1647 vxor $out3,$out3,$rndkey0
1648 vcipherlast $in6,$out6,$in6
1649 vadduwm $out6,$out4,$two
1650 vxor $out4,$out4,$rndkey0
1651 vcipherlast $in7,$out7,$in7
1652 vadduwm $out7,$out5,$two
1653 vxor $out5,$out5,$rndkey0
1654 le?vperm $in0,$in0,$in0,$inpperm
1655 vadduwm $ivec,$out6,$two # next counter value
1656 vxor $out6,$out6,$rndkey0
1657 le?vperm $in1,$in1,$in1,$inpperm
1658 vxor $out7,$out7,$rndkey0
1659 mtctr $rounds
1660
1661 vcipher $out0,$out0,v24
1662 stvx_u $in0,$x00,$out
1663 le?vperm $in2,$in2,$in2,$inpperm
1664 vcipher $out1,$out1,v24
1665 stvx_u $in1,$x10,$out
1666 le?vperm $in3,$in3,$in3,$inpperm
1667 vcipher $out2,$out2,v24
1668 stvx_u $in2,$x20,$out
1669 le?vperm $in4,$in4,$in4,$inpperm
1670 vcipher $out3,$out3,v24
1671 stvx_u $in3,$x30,$out
1672 le?vperm $in5,$in5,$in5,$inpperm
1673 vcipher $out4,$out4,v24
1674 stvx_u $in4,$x40,$out
1675 le?vperm $in6,$in6,$in6,$inpperm
1676 vcipher $out5,$out5,v24
1677 stvx_u $in5,$x50,$out
1678 le?vperm $in7,$in7,$in7,$inpperm
1679 vcipher $out6,$out6,v24
1680 stvx_u $in6,$x60,$out
1681 vcipher $out7,$out7,v24
1682 stvx_u $in7,$x70,$out
1683 addi $out,$out,0x80
1684
1685 b Loop_ctr32_enc8x_middle
1686
1687.align 5
1688Lctr32_enc8x_break:
1689 cmpwi $len,-0x60
1690 blt Lctr32_enc8x_one
1691 nop
1692 beq Lctr32_enc8x_two
1693 cmpwi $len,-0x40
1694 blt Lctr32_enc8x_three
1695 nop
1696 beq Lctr32_enc8x_four
1697 cmpwi $len,-0x20
1698 blt Lctr32_enc8x_five
1699 nop
1700 beq Lctr32_enc8x_six
1701 cmpwi $len,0x00
1702 blt Lctr32_enc8x_seven
1703
1704Lctr32_enc8x_eight:
1705 vcipherlast $out0,$out0,$in0
1706 vcipherlast $out1,$out1,$in1
1707 vcipherlast $out2,$out2,$in2
1708 vcipherlast $out3,$out3,$in3
1709 vcipherlast $out4,$out4,$in4
1710 vcipherlast $out5,$out5,$in5
1711 vcipherlast $out6,$out6,$in6
1712 vcipherlast $out7,$out7,$in7
1713
1714 le?vperm $out0,$out0,$out0,$inpperm
1715 le?vperm $out1,$out1,$out1,$inpperm
1716 stvx_u $out0,$x00,$out
1717 le?vperm $out2,$out2,$out2,$inpperm
1718 stvx_u $out1,$x10,$out
1719 le?vperm $out3,$out3,$out3,$inpperm
1720 stvx_u $out2,$x20,$out
1721 le?vperm $out4,$out4,$out4,$inpperm
1722 stvx_u $out3,$x30,$out
1723 le?vperm $out5,$out5,$out5,$inpperm
1724 stvx_u $out4,$x40,$out
1725 le?vperm $out6,$out6,$out6,$inpperm
1726 stvx_u $out5,$x50,$out
1727 le?vperm $out7,$out7,$out7,$inpperm
1728 stvx_u $out6,$x60,$out
1729 stvx_u $out7,$x70,$out
1730 addi $out,$out,0x80
1731 b Lctr32_enc8x_done
1732
1733.align 5
1734Lctr32_enc8x_seven:
1735 vcipherlast $out0,$out0,$in1
1736 vcipherlast $out1,$out1,$in2
1737 vcipherlast $out2,$out2,$in3
1738 vcipherlast $out3,$out3,$in4
1739 vcipherlast $out4,$out4,$in5
1740 vcipherlast $out5,$out5,$in6
1741 vcipherlast $out6,$out6,$in7
1742
1743 le?vperm $out0,$out0,$out0,$inpperm
1744 le?vperm $out1,$out1,$out1,$inpperm
1745 stvx_u $out0,$x00,$out
1746 le?vperm $out2,$out2,$out2,$inpperm
1747 stvx_u $out1,$x10,$out
1748 le?vperm $out3,$out3,$out3,$inpperm
1749 stvx_u $out2,$x20,$out
1750 le?vperm $out4,$out4,$out4,$inpperm
1751 stvx_u $out3,$x30,$out
1752 le?vperm $out5,$out5,$out5,$inpperm
1753 stvx_u $out4,$x40,$out
1754 le?vperm $out6,$out6,$out6,$inpperm
1755 stvx_u $out5,$x50,$out
1756 stvx_u $out6,$x60,$out
1757 addi $out,$out,0x70
1758 b Lctr32_enc8x_done
1759
1760.align 5
1761Lctr32_enc8x_six:
1762 vcipherlast $out0,$out0,$in2
1763 vcipherlast $out1,$out1,$in3
1764 vcipherlast $out2,$out2,$in4
1765 vcipherlast $out3,$out3,$in5
1766 vcipherlast $out4,$out4,$in6
1767 vcipherlast $out5,$out5,$in7
1768
1769 le?vperm $out0,$out0,$out0,$inpperm
1770 le?vperm $out1,$out1,$out1,$inpperm
1771 stvx_u $out0,$x00,$out
1772 le?vperm $out2,$out2,$out2,$inpperm
1773 stvx_u $out1,$x10,$out
1774 le?vperm $out3,$out3,$out3,$inpperm
1775 stvx_u $out2,$x20,$out
1776 le?vperm $out4,$out4,$out4,$inpperm
1777 stvx_u $out3,$x30,$out
1778 le?vperm $out5,$out5,$out5,$inpperm
1779 stvx_u $out4,$x40,$out
1780 stvx_u $out5,$x50,$out
1781 addi $out,$out,0x60
1782 b Lctr32_enc8x_done
1783
1784.align 5
1785Lctr32_enc8x_five:
1786 vcipherlast $out0,$out0,$in3
1787 vcipherlast $out1,$out1,$in4
1788 vcipherlast $out2,$out2,$in5
1789 vcipherlast $out3,$out3,$in6
1790 vcipherlast $out4,$out4,$in7
1791
1792 le?vperm $out0,$out0,$out0,$inpperm
1793 le?vperm $out1,$out1,$out1,$inpperm
1794 stvx_u $out0,$x00,$out
1795 le?vperm $out2,$out2,$out2,$inpperm
1796 stvx_u $out1,$x10,$out
1797 le?vperm $out3,$out3,$out3,$inpperm
1798 stvx_u $out2,$x20,$out
1799 le?vperm $out4,$out4,$out4,$inpperm
1800 stvx_u $out3,$x30,$out
1801 stvx_u $out4,$x40,$out
1802 addi $out,$out,0x50
1803 b Lctr32_enc8x_done
1804
1805.align 5
1806Lctr32_enc8x_four:
1807 vcipherlast $out0,$out0,$in4
1808 vcipherlast $out1,$out1,$in5
1809 vcipherlast $out2,$out2,$in6
1810 vcipherlast $out3,$out3,$in7
1811
1812 le?vperm $out0,$out0,$out0,$inpperm
1813 le?vperm $out1,$out1,$out1,$inpperm
1814 stvx_u $out0,$x00,$out
1815 le?vperm $out2,$out2,$out2,$inpperm
1816 stvx_u $out1,$x10,$out
1817 le?vperm $out3,$out3,$out3,$inpperm
1818 stvx_u $out2,$x20,$out
1819 stvx_u $out3,$x30,$out
1820 addi $out,$out,0x40
1821 b Lctr32_enc8x_done
1822
1823.align 5
1824Lctr32_enc8x_three:
1825 vcipherlast $out0,$out0,$in5
1826 vcipherlast $out1,$out1,$in6
1827 vcipherlast $out2,$out2,$in7
1828
1829 le?vperm $out0,$out0,$out0,$inpperm
1830 le?vperm $out1,$out1,$out1,$inpperm
1831 stvx_u $out0,$x00,$out
1832 le?vperm $out2,$out2,$out2,$inpperm
1833 stvx_u $out1,$x10,$out
1834 stvx_u $out2,$x20,$out
1835 addi $out,$out,0x30
1836 b Lctr32_enc8x_done
1837
1838.align 5
1839Lctr32_enc8x_two:
1840 vcipherlast $out0,$out0,$in6
1841 vcipherlast $out1,$out1,$in7
1842
1843 le?vperm $out0,$out0,$out0,$inpperm
1844 le?vperm $out1,$out1,$out1,$inpperm
1845 stvx_u $out0,$x00,$out
1846 stvx_u $out1,$x10,$out
1847 addi $out,$out,0x20
1848 b Lctr32_enc8x_done
1849
1850.align 5
1851Lctr32_enc8x_one:
1852 vcipherlast $out0,$out0,$in7
1853
1854 le?vperm $out0,$out0,$out0,$inpperm
1855 stvx_u $out0,0,$out
1856 addi $out,$out,0x10
1857
1858Lctr32_enc8x_done:
1859 li r10,`$FRAME+15`
1860 li r11,`$FRAME+31`
1861 stvx $inpperm,r10,$sp # wipe copies of round keys
1862 addi r10,r10,32
1863 stvx $inpperm,r11,$sp
1864 addi r11,r11,32
1865 stvx $inpperm,r10,$sp
1866 addi r10,r10,32
1867 stvx $inpperm,r11,$sp
1868 addi r11,r11,32
1869 stvx $inpperm,r10,$sp
1870 addi r10,r10,32
1871 stvx $inpperm,r11,$sp
1872 addi r11,r11,32
1873 stvx $inpperm,r10,$sp
1874 addi r10,r10,32
1875 stvx $inpperm,r11,$sp
1876 addi r11,r11,32
1877
1878 mtspr 256,$vrsave
1879 lvx v20,r10,$sp # ABI says so
1880 addi r10,r10,32
1881 lvx v21,r11,$sp
1882 addi r11,r11,32
1883 lvx v22,r10,$sp
1884 addi r10,r10,32
1885 lvx v23,r11,$sp
1886 addi r11,r11,32
1887 lvx v24,r10,$sp
1888 addi r10,r10,32
1889 lvx v25,r11,$sp
1890 addi r11,r11,32
1891 lvx v26,r10,$sp
1892 addi r10,r10,32
1893 lvx v27,r11,$sp
1894 addi r11,r11,32
1895 lvx v28,r10,$sp
1896 addi r10,r10,32
1897 lvx v29,r11,$sp
1898 addi r11,r11,32
1899 lvx v30,r10,$sp
1900 lvx v31,r11,$sp
1901 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1902 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1903 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1904 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1905 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1906 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1907 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1908 blr
1909 .long 0
1910 .byte 0,12,0x04,0,0x80,6,6,0
1911 .long 0
1912.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1913___
1914}} }}}
1915
1916#########################################################################
1917{{{ # XTS procedures #
1918# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1919# const AES_KEY *key1, const AES_KEY *key2, #
1920# [const] unsigned char iv[16]); #
1921# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1922# input tweak value is assumed to be encrypted already, and last tweak #
1923# value, one suitable for consecutive call on same chunk of data, is #
1924# written back to original buffer. In addition, in "tweak chaining" #
1925# mode only complete input blocks are processed. #
1926
1927my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1928my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1929my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1930my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1931my $taillen = $key2;
1932
1933 ($inp,$idx) = ($idx,$inp); # reassign
1934
1935$code.=<<___;
1936.globl .${prefix}_xts_encrypt
1937.align 5
1938.${prefix}_xts_encrypt:
1939 mr $inp,r3 # reassign
1940 li r3,-1
1941 ${UCMP}i $len,16
1942 bltlr-
1943
1944 lis r0,0xfff0
1945 mfspr r12,256 # save vrsave
1946 li r11,0
1947 mtspr 256,r0
1948
1949 vspltisb $seven,0x07 # 0x070707..07
1950 le?lvsl $leperm,r11,r11
1951 le?vspltisb $tmp,0x0f
1952 le?vxor $leperm,$leperm,$seven
1953
1954 li $idx,15
1955 lvx $tweak,0,$ivp # load [unaligned] iv
1956 lvsl $inpperm,0,$ivp
1957 lvx $inptail,$idx,$ivp
1958 le?vxor $inpperm,$inpperm,$tmp
1959 vperm $tweak,$tweak,$inptail,$inpperm
1960
1961 neg r11,$inp
1962 lvsr $inpperm,0,r11 # prepare for unaligned load
1963 lvx $inout,0,$inp
1964 addi $inp,$inp,15 # 15 is not typo
1965 le?vxor $inpperm,$inpperm,$tmp
1966
1967 ${UCMP}i $key2,0 # key2==NULL?
1968 beq Lxts_enc_no_key2
1969
1970 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1971 lwz $rounds,240($key2)
1972 srwi $rounds,$rounds,1
1973 subi $rounds,$rounds,1
1974 li $idx,16
1975
1976 lvx $rndkey0,0,$key2
1977 lvx $rndkey1,$idx,$key2
1978 addi $idx,$idx,16
1979 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1980 vxor $tweak,$tweak,$rndkey0
1981 lvx $rndkey0,$idx,$key2
1982 addi $idx,$idx,16
1983 mtctr $rounds
1984
1985Ltweak_xts_enc:
1986 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1987 vcipher $tweak,$tweak,$rndkey1
1988 lvx $rndkey1,$idx,$key2
1989 addi $idx,$idx,16
1990 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1991 vcipher $tweak,$tweak,$rndkey0
1992 lvx $rndkey0,$idx,$key2
1993 addi $idx,$idx,16
1994 bdnz Ltweak_xts_enc
1995
1996 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1997 vcipher $tweak,$tweak,$rndkey1
1998 lvx $rndkey1,$idx,$key2
1999 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2000 vcipherlast $tweak,$tweak,$rndkey0
2001
2002 li $ivp,0 # don't chain the tweak
2003 b Lxts_enc
2004
2005Lxts_enc_no_key2:
2006 li $idx,-16
2007 and $len,$len,$idx # in "tweak chaining"
2008 # mode only complete
2009 # blocks are processed
2010Lxts_enc:
2011 lvx $inptail,0,$inp
2012 addi $inp,$inp,16
2013
2014 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2015 lwz $rounds,240($key1)
2016 srwi $rounds,$rounds,1
2017 subi $rounds,$rounds,1
2018 li $idx,16
2019
2020 vslb $eighty7,$seven,$seven # 0x808080..80
2021 vor $eighty7,$eighty7,$seven # 0x878787..87
2022 vspltisb $tmp,1 # 0x010101..01
2023 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2024
2025 ${UCMP}i $len,96
2026 bge _aesp8_xts_encrypt6x
2027
2028 andi. $taillen,$len,15
2029 subic r0,$len,32
2030 subi $taillen,$taillen,16
2031 subfe r0,r0,r0
2032 and r0,r0,$taillen
2033 add $inp,$inp,r0
2034
2035 lvx $rndkey0,0,$key1
2036 lvx $rndkey1,$idx,$key1
2037 addi $idx,$idx,16
2038 vperm $inout,$inout,$inptail,$inpperm
2039 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2040 vxor $inout,$inout,$tweak
2041 vxor $inout,$inout,$rndkey0
2042 lvx $rndkey0,$idx,$key1
2043 addi $idx,$idx,16
2044 mtctr $rounds
2045 b Loop_xts_enc
2046
2047.align 5
2048Loop_xts_enc:
2049 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2050 vcipher $inout,$inout,$rndkey1
2051 lvx $rndkey1,$idx,$key1
2052 addi $idx,$idx,16
2053 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2054 vcipher $inout,$inout,$rndkey0
2055 lvx $rndkey0,$idx,$key1
2056 addi $idx,$idx,16
2057 bdnz Loop_xts_enc
2058
2059 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2060 vcipher $inout,$inout,$rndkey1
2061 lvx $rndkey1,$idx,$key1
2062 li $idx,16
2063 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2064 vxor $rndkey0,$rndkey0,$tweak
2065 vcipherlast $output,$inout,$rndkey0
2066
2067 le?vperm $tmp,$output,$output,$leperm
2068 be?nop
2069 le?stvx_u $tmp,0,$out
2070 be?stvx_u $output,0,$out
2071 addi $out,$out,16
2072
2073 subic. $len,$len,16
2074 beq Lxts_enc_done
2075
2076 vmr $inout,$inptail
2077 lvx $inptail,0,$inp
2078 addi $inp,$inp,16
2079 lvx $rndkey0,0,$key1
2080 lvx $rndkey1,$idx,$key1
2081 addi $idx,$idx,16
2082
2083 subic r0,$len,32
2084 subfe r0,r0,r0
2085 and r0,r0,$taillen
2086 add $inp,$inp,r0
2087
2088 vsrab $tmp,$tweak,$seven # next tweak value
2089 vaddubm $tweak,$tweak,$tweak
2090 vsldoi $tmp,$tmp,$tmp,15
2091 vand $tmp,$tmp,$eighty7
2092 vxor $tweak,$tweak,$tmp
2093
2094 vperm $inout,$inout,$inptail,$inpperm
2095 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2096 vxor $inout,$inout,$tweak
2097 vxor $output,$output,$rndkey0 # just in case $len<16
2098 vxor $inout,$inout,$rndkey0
2099 lvx $rndkey0,$idx,$key1
2100 addi $idx,$idx,16
2101
2102 mtctr $rounds
2103 ${UCMP}i $len,16
2104 bge Loop_xts_enc
2105
2106 vxor $output,$output,$tweak
2107 lvsr $inpperm,0,$len # $inpperm is no longer needed
2108 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2109 vspltisb $tmp,-1
2110 vperm $inptail,$inptail,$tmp,$inpperm
2111 vsel $inout,$inout,$output,$inptail
2112
2113 subi r11,$out,17
2114 subi $out,$out,16
2115 mtctr $len
2116 li $len,16
2117Loop_xts_enc_steal:
2118 lbzu r0,1(r11)
2119 stb r0,16(r11)
2120 bdnz Loop_xts_enc_steal
2121
2122 mtctr $rounds
2123 b Loop_xts_enc # one more time...
2124
2125Lxts_enc_done:
2126 ${UCMP}i $ivp,0
2127 beq Lxts_enc_ret
2128
2129 vsrab $tmp,$tweak,$seven # next tweak value
2130 vaddubm $tweak,$tweak,$tweak
2131 vsldoi $tmp,$tmp,$tmp,15
2132 vand $tmp,$tmp,$eighty7
2133 vxor $tweak,$tweak,$tmp
2134
2135 le?vperm $tweak,$tweak,$tweak,$leperm
2136 stvx_u $tweak,0,$ivp
2137
2138Lxts_enc_ret:
2139 mtspr 256,r12 # restore vrsave
2140 li r3,0
2141 blr
2142 .long 0
2143 .byte 0,12,0x04,0,0x80,6,6,0
2144 .long 0
2145.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2146
2147.globl .${prefix}_xts_decrypt
2148.align 5
2149.${prefix}_xts_decrypt:
2150 mr $inp,r3 # reassign
2151 li r3,-1
2152 ${UCMP}i $len,16
2153 bltlr-
2154
2155 lis r0,0xfff8
2156 mfspr r12,256 # save vrsave
2157 li r11,0
2158 mtspr 256,r0
2159
2160 andi. r0,$len,15
2161 neg r0,r0
2162 andi. r0,r0,16
2163 sub $len,$len,r0
2164
2165 vspltisb $seven,0x07 # 0x070707..07
2166 le?lvsl $leperm,r11,r11
2167 le?vspltisb $tmp,0x0f
2168 le?vxor $leperm,$leperm,$seven
2169
2170 li $idx,15
2171 lvx $tweak,0,$ivp # load [unaligned] iv
2172 lvsl $inpperm,0,$ivp
2173 lvx $inptail,$idx,$ivp
2174 le?vxor $inpperm,$inpperm,$tmp
2175 vperm $tweak,$tweak,$inptail,$inpperm
2176
2177 neg r11,$inp
2178 lvsr $inpperm,0,r11 # prepare for unaligned load
2179 lvx $inout,0,$inp
2180 addi $inp,$inp,15 # 15 is not typo
2181 le?vxor $inpperm,$inpperm,$tmp
2182
2183 ${UCMP}i $key2,0 # key2==NULL?
2184 beq Lxts_dec_no_key2
2185
2186 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2187 lwz $rounds,240($key2)
2188 srwi $rounds,$rounds,1
2189 subi $rounds,$rounds,1
2190 li $idx,16
2191
2192 lvx $rndkey0,0,$key2
2193 lvx $rndkey1,$idx,$key2
2194 addi $idx,$idx,16
2195 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2196 vxor $tweak,$tweak,$rndkey0
2197 lvx $rndkey0,$idx,$key2
2198 addi $idx,$idx,16
2199 mtctr $rounds
2200
2201Ltweak_xts_dec:
2202 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2203 vcipher $tweak,$tweak,$rndkey1
2204 lvx $rndkey1,$idx,$key2
2205 addi $idx,$idx,16
2206 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2207 vcipher $tweak,$tweak,$rndkey0
2208 lvx $rndkey0,$idx,$key2
2209 addi $idx,$idx,16
2210 bdnz Ltweak_xts_dec
2211
2212 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2213 vcipher $tweak,$tweak,$rndkey1
2214 lvx $rndkey1,$idx,$key2
2215 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2216 vcipherlast $tweak,$tweak,$rndkey0
2217
2218 li $ivp,0 # don't chain the tweak
2219 b Lxts_dec
2220
2221Lxts_dec_no_key2:
2222 neg $idx,$len
2223 andi. $idx,$idx,15
2224 add $len,$len,$idx # in "tweak chaining"
2225 # mode only complete
2226 # blocks are processed
2227Lxts_dec:
2228 lvx $inptail,0,$inp
2229 addi $inp,$inp,16
2230
2231 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2232 lwz $rounds,240($key1)
2233 srwi $rounds,$rounds,1
2234 subi $rounds,$rounds,1
2235 li $idx,16
2236
2237 vslb $eighty7,$seven,$seven # 0x808080..80
2238 vor $eighty7,$eighty7,$seven # 0x878787..87
2239 vspltisb $tmp,1 # 0x010101..01
2240 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2241
2242 ${UCMP}i $len,96
2243 bge _aesp8_xts_decrypt6x
2244
2245 lvx $rndkey0,0,$key1
2246 lvx $rndkey1,$idx,$key1
2247 addi $idx,$idx,16
2248 vperm $inout,$inout,$inptail,$inpperm
2249 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2250 vxor $inout,$inout,$tweak
2251 vxor $inout,$inout,$rndkey0
2252 lvx $rndkey0,$idx,$key1
2253 addi $idx,$idx,16
2254 mtctr $rounds
2255
2256 ${UCMP}i $len,16
2257 blt Ltail_xts_dec
2258 be?b Loop_xts_dec
2259
2260.align 5
2261Loop_xts_dec:
2262 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2263 vncipher $inout,$inout,$rndkey1
2264 lvx $rndkey1,$idx,$key1
2265 addi $idx,$idx,16
2266 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2267 vncipher $inout,$inout,$rndkey0
2268 lvx $rndkey0,$idx,$key1
2269 addi $idx,$idx,16
2270 bdnz Loop_xts_dec
2271
2272 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2273 vncipher $inout,$inout,$rndkey1
2274 lvx $rndkey1,$idx,$key1
2275 li $idx,16
2276 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2277 vxor $rndkey0,$rndkey0,$tweak
2278 vncipherlast $output,$inout,$rndkey0
2279
2280 le?vperm $tmp,$output,$output,$leperm
2281 be?nop
2282 le?stvx_u $tmp,0,$out
2283 be?stvx_u $output,0,$out
2284 addi $out,$out,16
2285
2286 subic. $len,$len,16
2287 beq Lxts_dec_done
2288
2289 vmr $inout,$inptail
2290 lvx $inptail,0,$inp
2291 addi $inp,$inp,16
2292 lvx $rndkey0,0,$key1
2293 lvx $rndkey1,$idx,$key1
2294 addi $idx,$idx,16
2295
2296 vsrab $tmp,$tweak,$seven # next tweak value
2297 vaddubm $tweak,$tweak,$tweak
2298 vsldoi $tmp,$tmp,$tmp,15
2299 vand $tmp,$tmp,$eighty7
2300 vxor $tweak,$tweak,$tmp
2301
2302 vperm $inout,$inout,$inptail,$inpperm
2303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2304 vxor $inout,$inout,$tweak
2305 vxor $inout,$inout,$rndkey0
2306 lvx $rndkey0,$idx,$key1
2307 addi $idx,$idx,16
2308
2309 mtctr $rounds
2310 ${UCMP}i $len,16
2311 bge Loop_xts_dec
2312
2313Ltail_xts_dec:
2314 vsrab $tmp,$tweak,$seven # next tweak value
2315 vaddubm $tweak1,$tweak,$tweak
2316 vsldoi $tmp,$tmp,$tmp,15
2317 vand $tmp,$tmp,$eighty7
2318 vxor $tweak1,$tweak1,$tmp
2319
2320 subi $inp,$inp,16
2321 add $inp,$inp,$len
2322
2323 vxor $inout,$inout,$tweak # :-(
2324 vxor $inout,$inout,$tweak1 # :-)
2325
2326Loop_xts_dec_short:
2327 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2328 vncipher $inout,$inout,$rndkey1
2329 lvx $rndkey1,$idx,$key1
2330 addi $idx,$idx,16
2331 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2332 vncipher $inout,$inout,$rndkey0
2333 lvx $rndkey0,$idx,$key1
2334 addi $idx,$idx,16
2335 bdnz Loop_xts_dec_short
2336
2337 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2338 vncipher $inout,$inout,$rndkey1
2339 lvx $rndkey1,$idx,$key1
2340 li $idx,16
2341 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2342 vxor $rndkey0,$rndkey0,$tweak1
2343 vncipherlast $output,$inout,$rndkey0
2344
2345 le?vperm $tmp,$output,$output,$leperm
2346 be?nop
2347 le?stvx_u $tmp,0,$out
2348 be?stvx_u $output,0,$out
2349
2350 vmr $inout,$inptail
2351 lvx $inptail,0,$inp
2352 #addi $inp,$inp,16
2353 lvx $rndkey0,0,$key1
2354 lvx $rndkey1,$idx,$key1
2355 addi $idx,$idx,16
2356 vperm $inout,$inout,$inptail,$inpperm
2357 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2358
2359 lvsr $inpperm,0,$len # $inpperm is no longer needed
2360 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2361 vspltisb $tmp,-1
2362 vperm $inptail,$inptail,$tmp,$inpperm
2363 vsel $inout,$inout,$output,$inptail
2364
2365 vxor $rndkey0,$rndkey0,$tweak
2366 vxor $inout,$inout,$rndkey0
2367 lvx $rndkey0,$idx,$key1
2368 addi $idx,$idx,16
2369
2370 subi r11,$out,1
2371 mtctr $len
2372 li $len,16
2373Loop_xts_dec_steal:
2374 lbzu r0,1(r11)
2375 stb r0,16(r11)
2376 bdnz Loop_xts_dec_steal
2377
2378 mtctr $rounds
2379 b Loop_xts_dec # one more time...
2380
2381Lxts_dec_done:
2382 ${UCMP}i $ivp,0
2383 beq Lxts_dec_ret
2384
2385 vsrab $tmp,$tweak,$seven # next tweak value
2386 vaddubm $tweak,$tweak,$tweak
2387 vsldoi $tmp,$tmp,$tmp,15
2388 vand $tmp,$tmp,$eighty7
2389 vxor $tweak,$tweak,$tmp
2390
2391 le?vperm $tweak,$tweak,$tweak,$leperm
2392 stvx_u $tweak,0,$ivp
2393
2394Lxts_dec_ret:
2395 mtspr 256,r12 # restore vrsave
2396 li r3,0
2397 blr
2398 .long 0
2399 .byte 0,12,0x04,0,0x80,6,6,0
2400 .long 0
2401.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2402___
2403#########################################################################
2404{{ # Optimized XTS procedures #
2405my $key_=$key2;
2406my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2407 $x00=0 if ($flavour =~ /osx/);
2408my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2409my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2410my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2411my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2412 # v26-v31 last 6 round keys
2413my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2414my $taillen=$x70;
2415
2416$code.=<<___;
2417.align 5
2418_aesp8_xts_encrypt6x:
2419 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2420 mflr r11
2421 li r7,`$FRAME+8*16+15`
2422 li r3,`$FRAME+8*16+31`
2423 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2424 stvx v20,r7,$sp # ABI says so
2425 addi r7,r7,32
2426 stvx v21,r3,$sp
2427 addi r3,r3,32
2428 stvx v22,r7,$sp
2429 addi r7,r7,32
2430 stvx v23,r3,$sp
2431 addi r3,r3,32
2432 stvx v24,r7,$sp
2433 addi r7,r7,32
2434 stvx v25,r3,$sp
2435 addi r3,r3,32
2436 stvx v26,r7,$sp
2437 addi r7,r7,32
2438 stvx v27,r3,$sp
2439 addi r3,r3,32
2440 stvx v28,r7,$sp
2441 addi r7,r7,32
2442 stvx v29,r3,$sp
2443 addi r3,r3,32
2444 stvx v30,r7,$sp
2445 stvx v31,r3,$sp
2446 li r0,-1
2447 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2448 li $x10,0x10
2449 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2450 li $x20,0x20
2451 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2452 li $x30,0x30
2453 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2454 li $x40,0x40
2455 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2456 li $x50,0x50
2457 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2458 li $x60,0x60
2459 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2460 li $x70,0x70
2461 mtspr 256,r0
2462
2463 subi $rounds,$rounds,3 # -4 in total
2464
2465 lvx $rndkey0,$x00,$key1 # load key schedule
2466 lvx v30,$x10,$key1
2467 addi $key1,$key1,0x20
2468 lvx v31,$x00,$key1
2469 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2470 addi $key_,$sp,$FRAME+15
2471 mtctr $rounds
2472
2473Load_xts_enc_key:
2474 ?vperm v24,v30,v31,$keyperm
2475 lvx v30,$x10,$key1
2476 addi $key1,$key1,0x20
2477 stvx v24,$x00,$key_ # off-load round[1]
2478 ?vperm v25,v31,v30,$keyperm
2479 lvx v31,$x00,$key1
2480 stvx v25,$x10,$key_ # off-load round[2]
2481 addi $key_,$key_,0x20
2482 bdnz Load_xts_enc_key
2483
2484 lvx v26,$x10,$key1
2485 ?vperm v24,v30,v31,$keyperm
2486 lvx v27,$x20,$key1
2487 stvx v24,$x00,$key_ # off-load round[3]
2488 ?vperm v25,v31,v26,$keyperm
2489 lvx v28,$x30,$key1
2490 stvx v25,$x10,$key_ # off-load round[4]
2491 addi $key_,$sp,$FRAME+15 # rewind $key_
2492 ?vperm v26,v26,v27,$keyperm
2493 lvx v29,$x40,$key1
2494 ?vperm v27,v27,v28,$keyperm
2495 lvx v30,$x50,$key1
2496 ?vperm v28,v28,v29,$keyperm
2497 lvx v31,$x60,$key1
2498 ?vperm v29,v29,v30,$keyperm
2499 lvx $twk5,$x70,$key1 # borrow $twk5
2500 ?vperm v30,v30,v31,$keyperm
2501 lvx v24,$x00,$key_ # pre-load round[1]
2502 ?vperm v31,v31,$twk5,$keyperm
2503 lvx v25,$x10,$key_ # pre-load round[2]
2504
2505 vperm $in0,$inout,$inptail,$inpperm
2506 subi $inp,$inp,31 # undo "caller"
2507 vxor $twk0,$tweak,$rndkey0
2508 vsrab $tmp,$tweak,$seven # next tweak value
2509 vaddubm $tweak,$tweak,$tweak
2510 vsldoi $tmp,$tmp,$tmp,15
2511 vand $tmp,$tmp,$eighty7
2512 vxor $out0,$in0,$twk0
2513 vxor $tweak,$tweak,$tmp
2514
2515 lvx_u $in1,$x10,$inp
2516 vxor $twk1,$tweak,$rndkey0
2517 vsrab $tmp,$tweak,$seven # next tweak value
2518 vaddubm $tweak,$tweak,$tweak
2519 vsldoi $tmp,$tmp,$tmp,15
2520 le?vperm $in1,$in1,$in1,$leperm
2521 vand $tmp,$tmp,$eighty7
2522 vxor $out1,$in1,$twk1
2523 vxor $tweak,$tweak,$tmp
2524
2525 lvx_u $in2,$x20,$inp
2526 andi. $taillen,$len,15
2527 vxor $twk2,$tweak,$rndkey0
2528 vsrab $tmp,$tweak,$seven # next tweak value
2529 vaddubm $tweak,$tweak,$tweak
2530 vsldoi $tmp,$tmp,$tmp,15
2531 le?vperm $in2,$in2,$in2,$leperm
2532 vand $tmp,$tmp,$eighty7
2533 vxor $out2,$in2,$twk2
2534 vxor $tweak,$tweak,$tmp
2535
2536 lvx_u $in3,$x30,$inp
2537 sub $len,$len,$taillen
2538 vxor $twk3,$tweak,$rndkey0
2539 vsrab $tmp,$tweak,$seven # next tweak value
2540 vaddubm $tweak,$tweak,$tweak
2541 vsldoi $tmp,$tmp,$tmp,15
2542 le?vperm $in3,$in3,$in3,$leperm
2543 vand $tmp,$tmp,$eighty7
2544 vxor $out3,$in3,$twk3
2545 vxor $tweak,$tweak,$tmp
2546
2547 lvx_u $in4,$x40,$inp
2548 subi $len,$len,0x60
2549 vxor $twk4,$tweak,$rndkey0
2550 vsrab $tmp,$tweak,$seven # next tweak value
2551 vaddubm $tweak,$tweak,$tweak
2552 vsldoi $tmp,$tmp,$tmp,15
2553 le?vperm $in4,$in4,$in4,$leperm
2554 vand $tmp,$tmp,$eighty7
2555 vxor $out4,$in4,$twk4
2556 vxor $tweak,$tweak,$tmp
2557
2558 lvx_u $in5,$x50,$inp
2559 addi $inp,$inp,0x60
2560 vxor $twk5,$tweak,$rndkey0
2561 vsrab $tmp,$tweak,$seven # next tweak value
2562 vaddubm $tweak,$tweak,$tweak
2563 vsldoi $tmp,$tmp,$tmp,15
2564 le?vperm $in5,$in5,$in5,$leperm
2565 vand $tmp,$tmp,$eighty7
2566 vxor $out5,$in5,$twk5
2567 vxor $tweak,$tweak,$tmp
2568
2569 vxor v31,v31,$rndkey0
2570 mtctr $rounds
2571 b Loop_xts_enc6x
2572
2573.align 5
2574Loop_xts_enc6x:
2575 vcipher $out0,$out0,v24
2576 vcipher $out1,$out1,v24
2577 vcipher $out2,$out2,v24
2578 vcipher $out3,$out3,v24
2579 vcipher $out4,$out4,v24
2580 vcipher $out5,$out5,v24
2581 lvx v24,$x20,$key_ # round[3]
2582 addi $key_,$key_,0x20
2583
2584 vcipher $out0,$out0,v25
2585 vcipher $out1,$out1,v25
2586 vcipher $out2,$out2,v25
2587 vcipher $out3,$out3,v25
2588 vcipher $out4,$out4,v25
2589 vcipher $out5,$out5,v25
2590 lvx v25,$x10,$key_ # round[4]
2591 bdnz Loop_xts_enc6x
2592
2593 subic $len,$len,96 # $len-=96
2594 vxor $in0,$twk0,v31 # xor with last round key
2595 vcipher $out0,$out0,v24
2596 vcipher $out1,$out1,v24
2597 vsrab $tmp,$tweak,$seven # next tweak value
2598 vxor $twk0,$tweak,$rndkey0
2599 vaddubm $tweak,$tweak,$tweak
2600 vcipher $out2,$out2,v24
2601 vcipher $out3,$out3,v24
2602 vsldoi $tmp,$tmp,$tmp,15
2603 vcipher $out4,$out4,v24
2604 vcipher $out5,$out5,v24
2605
2606 subfe. r0,r0,r0 # borrow?-1:0
2607 vand $tmp,$tmp,$eighty7
2608 vcipher $out0,$out0,v25
2609 vcipher $out1,$out1,v25
2610 vxor $tweak,$tweak,$tmp
2611 vcipher $out2,$out2,v25
2612 vcipher $out3,$out3,v25
2613 vxor $in1,$twk1,v31
2614 vsrab $tmp,$tweak,$seven # next tweak value
2615 vxor $twk1,$tweak,$rndkey0
2616 vcipher $out4,$out4,v25
2617 vcipher $out5,$out5,v25
2618
2619 and r0,r0,$len
2620 vaddubm $tweak,$tweak,$tweak
2621 vsldoi $tmp,$tmp,$tmp,15
2622 vcipher $out0,$out0,v26
2623 vcipher $out1,$out1,v26
2624 vand $tmp,$tmp,$eighty7
2625 vcipher $out2,$out2,v26
2626 vcipher $out3,$out3,v26
2627 vxor $tweak,$tweak,$tmp
2628 vcipher $out4,$out4,v26
2629 vcipher $out5,$out5,v26
2630
2631 add $inp,$inp,r0 # $inp is adjusted in such
2632 # way that at exit from the
2633 # loop inX-in5 are loaded
2634 # with last "words"
2635 vxor $in2,$twk2,v31
2636 vsrab $tmp,$tweak,$seven # next tweak value
2637 vxor $twk2,$tweak,$rndkey0
2638 vaddubm $tweak,$tweak,$tweak
2639 vcipher $out0,$out0,v27
2640 vcipher $out1,$out1,v27
2641 vsldoi $tmp,$tmp,$tmp,15
2642 vcipher $out2,$out2,v27
2643 vcipher $out3,$out3,v27
2644 vand $tmp,$tmp,$eighty7
2645 vcipher $out4,$out4,v27
2646 vcipher $out5,$out5,v27
2647
2648 addi $key_,$sp,$FRAME+15 # rewind $key_
2649 vxor $tweak,$tweak,$tmp
2650 vcipher $out0,$out0,v28
2651 vcipher $out1,$out1,v28
2652 vxor $in3,$twk3,v31
2653 vsrab $tmp,$tweak,$seven # next tweak value
2654 vxor $twk3,$tweak,$rndkey0
2655 vcipher $out2,$out2,v28
2656 vcipher $out3,$out3,v28
2657 vaddubm $tweak,$tweak,$tweak
2658 vsldoi $tmp,$tmp,$tmp,15
2659 vcipher $out4,$out4,v28
2660 vcipher $out5,$out5,v28
2661 lvx v24,$x00,$key_ # re-pre-load round[1]
2662 vand $tmp,$tmp,$eighty7
2663
2664 vcipher $out0,$out0,v29
2665 vcipher $out1,$out1,v29
2666 vxor $tweak,$tweak,$tmp
2667 vcipher $out2,$out2,v29
2668 vcipher $out3,$out3,v29
2669 vxor $in4,$twk4,v31
2670 vsrab $tmp,$tweak,$seven # next tweak value
2671 vxor $twk4,$tweak,$rndkey0
2672 vcipher $out4,$out4,v29
2673 vcipher $out5,$out5,v29
2674 lvx v25,$x10,$key_ # re-pre-load round[2]
2675 vaddubm $tweak,$tweak,$tweak
2676 vsldoi $tmp,$tmp,$tmp,15
2677
2678 vcipher $out0,$out0,v30
2679 vcipher $out1,$out1,v30
2680 vand $tmp,$tmp,$eighty7
2681 vcipher $out2,$out2,v30
2682 vcipher $out3,$out3,v30
2683 vxor $tweak,$tweak,$tmp
2684 vcipher $out4,$out4,v30
2685 vcipher $out5,$out5,v30
2686 vxor $in5,$twk5,v31
2687 vsrab $tmp,$tweak,$seven # next tweak value
2688 vxor $twk5,$tweak,$rndkey0
2689
2690 vcipherlast $out0,$out0,$in0
2691 lvx_u $in0,$x00,$inp # load next input block
2692 vaddubm $tweak,$tweak,$tweak
2693 vsldoi $tmp,$tmp,$tmp,15
2694 vcipherlast $out1,$out1,$in1
2695 lvx_u $in1,$x10,$inp
2696 vcipherlast $out2,$out2,$in2
2697 le?vperm $in0,$in0,$in0,$leperm
2698 lvx_u $in2,$x20,$inp
2699 vand $tmp,$tmp,$eighty7
2700 vcipherlast $out3,$out3,$in3
2701 le?vperm $in1,$in1,$in1,$leperm
2702 lvx_u $in3,$x30,$inp
2703 vcipherlast $out4,$out4,$in4
2704 le?vperm $in2,$in2,$in2,$leperm
2705 lvx_u $in4,$x40,$inp
2706 vxor $tweak,$tweak,$tmp
2707 vcipherlast $tmp,$out5,$in5 # last block might be needed
2708 # in stealing mode
2709 le?vperm $in3,$in3,$in3,$leperm
2710 lvx_u $in5,$x50,$inp
2711 addi $inp,$inp,0x60
2712 le?vperm $in4,$in4,$in4,$leperm
2713 le?vperm $in5,$in5,$in5,$leperm
2714
2715 le?vperm $out0,$out0,$out0,$leperm
2716 le?vperm $out1,$out1,$out1,$leperm
2717 stvx_u $out0,$x00,$out # store output
2718 vxor $out0,$in0,$twk0
2719 le?vperm $out2,$out2,$out2,$leperm
2720 stvx_u $out1,$x10,$out
2721 vxor $out1,$in1,$twk1
2722 le?vperm $out3,$out3,$out3,$leperm
2723 stvx_u $out2,$x20,$out
2724 vxor $out2,$in2,$twk2
2725 le?vperm $out4,$out4,$out4,$leperm
2726 stvx_u $out3,$x30,$out
2727 vxor $out3,$in3,$twk3
2728 le?vperm $out5,$tmp,$tmp,$leperm
2729 stvx_u $out4,$x40,$out
2730 vxor $out4,$in4,$twk4
2731 le?stvx_u $out5,$x50,$out
2732 be?stvx_u $tmp, $x50,$out
2733 vxor $out5,$in5,$twk5
2734 addi $out,$out,0x60
2735
2736 mtctr $rounds
2737 beq Loop_xts_enc6x # did $len-=96 borrow?
2738
2739 addic. $len,$len,0x60
2740 beq Lxts_enc6x_zero
2741 cmpwi $len,0x20
2742 blt Lxts_enc6x_one
2743 nop
2744 beq Lxts_enc6x_two
2745 cmpwi $len,0x40
2746 blt Lxts_enc6x_three
2747 nop
2748 beq Lxts_enc6x_four
2749
2750Lxts_enc6x_five:
2751 vxor $out0,$in1,$twk0
2752 vxor $out1,$in2,$twk1
2753 vxor $out2,$in3,$twk2
2754 vxor $out3,$in4,$twk3
2755 vxor $out4,$in5,$twk4
2756
2757 bl _aesp8_xts_enc5x
2758
2759 le?vperm $out0,$out0,$out0,$leperm
2760 vmr $twk0,$twk5 # unused tweak
2761 le?vperm $out1,$out1,$out1,$leperm
2762 stvx_u $out0,$x00,$out # store output
2763 le?vperm $out2,$out2,$out2,$leperm
2764 stvx_u $out1,$x10,$out
2765 le?vperm $out3,$out3,$out3,$leperm
2766 stvx_u $out2,$x20,$out
2767 vxor $tmp,$out4,$twk5 # last block prep for stealing
2768 le?vperm $out4,$out4,$out4,$leperm
2769 stvx_u $out3,$x30,$out
2770 stvx_u $out4,$x40,$out
2771 addi $out,$out,0x50
2772 bne Lxts_enc6x_steal
2773 b Lxts_enc6x_done
2774
2775.align 4
2776Lxts_enc6x_four:
2777 vxor $out0,$in2,$twk0
2778 vxor $out1,$in3,$twk1
2779 vxor $out2,$in4,$twk2
2780 vxor $out3,$in5,$twk3
2781 vxor $out4,$out4,$out4
2782
2783 bl _aesp8_xts_enc5x
2784
2785 le?vperm $out0,$out0,$out0,$leperm
2786 vmr $twk0,$twk4 # unused tweak
2787 le?vperm $out1,$out1,$out1,$leperm
2788 stvx_u $out0,$x00,$out # store output
2789 le?vperm $out2,$out2,$out2,$leperm
2790 stvx_u $out1,$x10,$out
2791 vxor $tmp,$out3,$twk4 # last block prep for stealing
2792 le?vperm $out3,$out3,$out3,$leperm
2793 stvx_u $out2,$x20,$out
2794 stvx_u $out3,$x30,$out
2795 addi $out,$out,0x40
2796 bne Lxts_enc6x_steal
2797 b Lxts_enc6x_done
2798
2799.align 4
2800Lxts_enc6x_three:
2801 vxor $out0,$in3,$twk0
2802 vxor $out1,$in4,$twk1
2803 vxor $out2,$in5,$twk2
2804 vxor $out3,$out3,$out3
2805 vxor $out4,$out4,$out4
2806
2807 bl _aesp8_xts_enc5x
2808
2809 le?vperm $out0,$out0,$out0,$leperm
2810 vmr $twk0,$twk3 # unused tweak
2811 le?vperm $out1,$out1,$out1,$leperm
2812 stvx_u $out0,$x00,$out # store output
2813 vxor $tmp,$out2,$twk3 # last block prep for stealing
2814 le?vperm $out2,$out2,$out2,$leperm
2815 stvx_u $out1,$x10,$out
2816 stvx_u $out2,$x20,$out
2817 addi $out,$out,0x30
2818 bne Lxts_enc6x_steal
2819 b Lxts_enc6x_done
2820
2821.align 4
2822Lxts_enc6x_two:
2823 vxor $out0,$in4,$twk0
2824 vxor $out1,$in5,$twk1
2825 vxor $out2,$out2,$out2
2826 vxor $out3,$out3,$out3
2827 vxor $out4,$out4,$out4
2828
2829 bl _aesp8_xts_enc5x
2830
2831 le?vperm $out0,$out0,$out0,$leperm
2832 vmr $twk0,$twk2 # unused tweak
2833 vxor $tmp,$out1,$twk2 # last block prep for stealing
2834 le?vperm $out1,$out1,$out1,$leperm
2835 stvx_u $out0,$x00,$out # store output
2836 stvx_u $out1,$x10,$out
2837 addi $out,$out,0x20
2838 bne Lxts_enc6x_steal
2839 b Lxts_enc6x_done
2840
2841.align 4
2842Lxts_enc6x_one:
2843 vxor $out0,$in5,$twk0
2844 nop
2845Loop_xts_enc1x:
2846 vcipher $out0,$out0,v24
2847 lvx v24,$x20,$key_ # round[3]
2848 addi $key_,$key_,0x20
2849
2850 vcipher $out0,$out0,v25
2851 lvx v25,$x10,$key_ # round[4]
2852 bdnz Loop_xts_enc1x
2853
2854 add $inp,$inp,$taillen
2855 cmpwi $taillen,0
2856 vcipher $out0,$out0,v24
2857
2858 subi $inp,$inp,16
2859 vcipher $out0,$out0,v25
2860
2861 lvsr $inpperm,0,$taillen
2862 vcipher $out0,$out0,v26
2863
2864 lvx_u $in0,0,$inp
2865 vcipher $out0,$out0,v27
2866
2867 addi $key_,$sp,$FRAME+15 # rewind $key_
2868 vcipher $out0,$out0,v28
2869 lvx v24,$x00,$key_ # re-pre-load round[1]
2870
2871 vcipher $out0,$out0,v29
2872 lvx v25,$x10,$key_ # re-pre-load round[2]
2873 vxor $twk0,$twk0,v31
2874
2875 le?vperm $in0,$in0,$in0,$leperm
2876 vcipher $out0,$out0,v30
2877
2878 vperm $in0,$in0,$in0,$inpperm
2879 vcipherlast $out0,$out0,$twk0
2880
2881 vmr $twk0,$twk1 # unused tweak
2882 vxor $tmp,$out0,$twk1 # last block prep for stealing
2883 le?vperm $out0,$out0,$out0,$leperm
2884 stvx_u $out0,$x00,$out # store output
2885 addi $out,$out,0x10
2886 bne Lxts_enc6x_steal
2887 b Lxts_enc6x_done
2888
2889.align 4
2890Lxts_enc6x_zero:
2891 cmpwi $taillen,0
2892 beq Lxts_enc6x_done
2893
2894 add $inp,$inp,$taillen
2895 subi $inp,$inp,16
2896 lvx_u $in0,0,$inp
2897 lvsr $inpperm,0,$taillen # $in5 is no more
2898 le?vperm $in0,$in0,$in0,$leperm
2899 vperm $in0,$in0,$in0,$inpperm
2900 vxor $tmp,$tmp,$twk0
2901Lxts_enc6x_steal:
2902 vxor $in0,$in0,$twk0
2903 vxor $out0,$out0,$out0
2904 vspltisb $out1,-1
2905 vperm $out0,$out0,$out1,$inpperm
2906 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2907
2908 subi r30,$out,17
2909 subi $out,$out,16
2910 mtctr $taillen
2911Loop_xts_enc6x_steal:
2912 lbzu r0,1(r30)
2913 stb r0,16(r30)
2914 bdnz Loop_xts_enc6x_steal
2915
2916 li $taillen,0
2917 mtctr $rounds
2918 b Loop_xts_enc1x # one more time...
2919
2920.align 4
2921Lxts_enc6x_done:
2922 ${UCMP}i $ivp,0
2923 beq Lxts_enc6x_ret
2924
2925 vxor $tweak,$twk0,$rndkey0
2926 le?vperm $tweak,$tweak,$tweak,$leperm
2927 stvx_u $tweak,0,$ivp
2928
2929Lxts_enc6x_ret:
2930 mtlr r11
2931 li r10,`$FRAME+15`
2932 li r11,`$FRAME+31`
2933 stvx $seven,r10,$sp # wipe copies of round keys
2934 addi r10,r10,32
2935 stvx $seven,r11,$sp
2936 addi r11,r11,32
2937 stvx $seven,r10,$sp
2938 addi r10,r10,32
2939 stvx $seven,r11,$sp
2940 addi r11,r11,32
2941 stvx $seven,r10,$sp
2942 addi r10,r10,32
2943 stvx $seven,r11,$sp
2944 addi r11,r11,32
2945 stvx $seven,r10,$sp
2946 addi r10,r10,32
2947 stvx $seven,r11,$sp
2948 addi r11,r11,32
2949
2950 mtspr 256,$vrsave
2951 lvx v20,r10,$sp # ABI says so
2952 addi r10,r10,32
2953 lvx v21,r11,$sp
2954 addi r11,r11,32
2955 lvx v22,r10,$sp
2956 addi r10,r10,32
2957 lvx v23,r11,$sp
2958 addi r11,r11,32
2959 lvx v24,r10,$sp
2960 addi r10,r10,32
2961 lvx v25,r11,$sp
2962 addi r11,r11,32
2963 lvx v26,r10,$sp
2964 addi r10,r10,32
2965 lvx v27,r11,$sp
2966 addi r11,r11,32
2967 lvx v28,r10,$sp
2968 addi r10,r10,32
2969 lvx v29,r11,$sp
2970 addi r11,r11,32
2971 lvx v30,r10,$sp
2972 lvx v31,r11,$sp
2973 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2974 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2975 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2976 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2977 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2978 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2979 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2980 blr
2981 .long 0
2982 .byte 0,12,0x04,1,0x80,6,6,0
2983 .long 0
2984
2985.align 5
2986_aesp8_xts_enc5x:
2987 vcipher $out0,$out0,v24
2988 vcipher $out1,$out1,v24
2989 vcipher $out2,$out2,v24
2990 vcipher $out3,$out3,v24
2991 vcipher $out4,$out4,v24
2992 lvx v24,$x20,$key_ # round[3]
2993 addi $key_,$key_,0x20
2994
2995 vcipher $out0,$out0,v25
2996 vcipher $out1,$out1,v25
2997 vcipher $out2,$out2,v25
2998 vcipher $out3,$out3,v25
2999 vcipher $out4,$out4,v25
3000 lvx v25,$x10,$key_ # round[4]
3001 bdnz _aesp8_xts_enc5x
3002
3003 add $inp,$inp,$taillen
3004 cmpwi $taillen,0
3005 vcipher $out0,$out0,v24
3006 vcipher $out1,$out1,v24
3007 vcipher $out2,$out2,v24
3008 vcipher $out3,$out3,v24
3009 vcipher $out4,$out4,v24
3010
3011 subi $inp,$inp,16
3012 vcipher $out0,$out0,v25
3013 vcipher $out1,$out1,v25
3014 vcipher $out2,$out2,v25
3015 vcipher $out3,$out3,v25
3016 vcipher $out4,$out4,v25
3017 vxor $twk0,$twk0,v31
3018
3019 vcipher $out0,$out0,v26
3020 lvsr $inpperm,0,$taillen # $in5 is no more
3021 vcipher $out1,$out1,v26
3022 vcipher $out2,$out2,v26
3023 vcipher $out3,$out3,v26
3024 vcipher $out4,$out4,v26
3025 vxor $in1,$twk1,v31
3026
3027 vcipher $out0,$out0,v27
3028 lvx_u $in0,0,$inp
3029 vcipher $out1,$out1,v27
3030 vcipher $out2,$out2,v27
3031 vcipher $out3,$out3,v27
3032 vcipher $out4,$out4,v27
3033 vxor $in2,$twk2,v31
3034
3035 addi $key_,$sp,$FRAME+15 # rewind $key_
3036 vcipher $out0,$out0,v28
3037 vcipher $out1,$out1,v28
3038 vcipher $out2,$out2,v28
3039 vcipher $out3,$out3,v28
3040 vcipher $out4,$out4,v28
3041 lvx v24,$x00,$key_ # re-pre-load round[1]
3042 vxor $in3,$twk3,v31
3043
3044 vcipher $out0,$out0,v29
3045 le?vperm $in0,$in0,$in0,$leperm
3046 vcipher $out1,$out1,v29
3047 vcipher $out2,$out2,v29
3048 vcipher $out3,$out3,v29
3049 vcipher $out4,$out4,v29
3050 lvx v25,$x10,$key_ # re-pre-load round[2]
3051 vxor $in4,$twk4,v31
3052
3053 vcipher $out0,$out0,v30
3054 vperm $in0,$in0,$in0,$inpperm
3055 vcipher $out1,$out1,v30
3056 vcipher $out2,$out2,v30
3057 vcipher $out3,$out3,v30
3058 vcipher $out4,$out4,v30
3059
3060 vcipherlast $out0,$out0,$twk0
3061 vcipherlast $out1,$out1,$in1
3062 vcipherlast $out2,$out2,$in2
3063 vcipherlast $out3,$out3,$in3
3064 vcipherlast $out4,$out4,$in4
3065 blr
3066 .long 0
3067 .byte 0,12,0x14,0,0,0,0,0
3068
3069.align 5
3070_aesp8_xts_decrypt6x:
3071 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3072 mflr r11
3073 li r7,`$FRAME+8*16+15`
3074 li r3,`$FRAME+8*16+31`
3075 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3076 stvx v20,r7,$sp # ABI says so
3077 addi r7,r7,32
3078 stvx v21,r3,$sp
3079 addi r3,r3,32
3080 stvx v22,r7,$sp
3081 addi r7,r7,32
3082 stvx v23,r3,$sp
3083 addi r3,r3,32
3084 stvx v24,r7,$sp
3085 addi r7,r7,32
3086 stvx v25,r3,$sp
3087 addi r3,r3,32
3088 stvx v26,r7,$sp
3089 addi r7,r7,32
3090 stvx v27,r3,$sp
3091 addi r3,r3,32
3092 stvx v28,r7,$sp
3093 addi r7,r7,32
3094 stvx v29,r3,$sp
3095 addi r3,r3,32
3096 stvx v30,r7,$sp
3097 stvx v31,r3,$sp
3098 li r0,-1
3099 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3100 li $x10,0x10
3101 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3102 li $x20,0x20
3103 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3104 li $x30,0x30
3105 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3106 li $x40,0x40
3107 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3108 li $x50,0x50
3109 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3110 li $x60,0x60
3111 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3112 li $x70,0x70
3113 mtspr 256,r0
3114
3115 subi $rounds,$rounds,3 # -4 in total
3116
3117 lvx $rndkey0,$x00,$key1 # load key schedule
3118 lvx v30,$x10,$key1
3119 addi $key1,$key1,0x20
3120 lvx v31,$x00,$key1
3121 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3122 addi $key_,$sp,$FRAME+15
3123 mtctr $rounds
3124
3125Load_xts_dec_key:
3126 ?vperm v24,v30,v31,$keyperm
3127 lvx v30,$x10,$key1
3128 addi $key1,$key1,0x20
3129 stvx v24,$x00,$key_ # off-load round[1]
3130 ?vperm v25,v31,v30,$keyperm
3131 lvx v31,$x00,$key1
3132 stvx v25,$x10,$key_ # off-load round[2]
3133 addi $key_,$key_,0x20
3134 bdnz Load_xts_dec_key
3135
3136 lvx v26,$x10,$key1
3137 ?vperm v24,v30,v31,$keyperm
3138 lvx v27,$x20,$key1
3139 stvx v24,$x00,$key_ # off-load round[3]
3140 ?vperm v25,v31,v26,$keyperm
3141 lvx v28,$x30,$key1
3142 stvx v25,$x10,$key_ # off-load round[4]
3143 addi $key_,$sp,$FRAME+15 # rewind $key_
3144 ?vperm v26,v26,v27,$keyperm
3145 lvx v29,$x40,$key1
3146 ?vperm v27,v27,v28,$keyperm
3147 lvx v30,$x50,$key1
3148 ?vperm v28,v28,v29,$keyperm
3149 lvx v31,$x60,$key1
3150 ?vperm v29,v29,v30,$keyperm
3151 lvx $twk5,$x70,$key1 # borrow $twk5
3152 ?vperm v30,v30,v31,$keyperm
3153 lvx v24,$x00,$key_ # pre-load round[1]
3154 ?vperm v31,v31,$twk5,$keyperm
3155 lvx v25,$x10,$key_ # pre-load round[2]
3156
3157 vperm $in0,$inout,$inptail,$inpperm
3158 subi $inp,$inp,31 # undo "caller"
3159 vxor $twk0,$tweak,$rndkey0
3160 vsrab $tmp,$tweak,$seven # next tweak value
3161 vaddubm $tweak,$tweak,$tweak
3162 vsldoi $tmp,$tmp,$tmp,15
3163 vand $tmp,$tmp,$eighty7
3164 vxor $out0,$in0,$twk0
3165 vxor $tweak,$tweak,$tmp
3166
3167 lvx_u $in1,$x10,$inp
3168 vxor $twk1,$tweak,$rndkey0
3169 vsrab $tmp,$tweak,$seven # next tweak value
3170 vaddubm $tweak,$tweak,$tweak
3171 vsldoi $tmp,$tmp,$tmp,15
3172 le?vperm $in1,$in1,$in1,$leperm
3173 vand $tmp,$tmp,$eighty7
3174 vxor $out1,$in1,$twk1
3175 vxor $tweak,$tweak,$tmp
3176
3177 lvx_u $in2,$x20,$inp
3178 andi. $taillen,$len,15
3179 vxor $twk2,$tweak,$rndkey0
3180 vsrab $tmp,$tweak,$seven # next tweak value
3181 vaddubm $tweak,$tweak,$tweak
3182 vsldoi $tmp,$tmp,$tmp,15
3183 le?vperm $in2,$in2,$in2,$leperm
3184 vand $tmp,$tmp,$eighty7
3185 vxor $out2,$in2,$twk2
3186 vxor $tweak,$tweak,$tmp
3187
3188 lvx_u $in3,$x30,$inp
3189 sub $len,$len,$taillen
3190 vxor $twk3,$tweak,$rndkey0
3191 vsrab $tmp,$tweak,$seven # next tweak value
3192 vaddubm $tweak,$tweak,$tweak
3193 vsldoi $tmp,$tmp,$tmp,15
3194 le?vperm $in3,$in3,$in3,$leperm
3195 vand $tmp,$tmp,$eighty7
3196 vxor $out3,$in3,$twk3
3197 vxor $tweak,$tweak,$tmp
3198
3199 lvx_u $in4,$x40,$inp
3200 subi $len,$len,0x60
3201 vxor $twk4,$tweak,$rndkey0
3202 vsrab $tmp,$tweak,$seven # next tweak value
3203 vaddubm $tweak,$tweak,$tweak
3204 vsldoi $tmp,$tmp,$tmp,15
3205 le?vperm $in4,$in4,$in4,$leperm
3206 vand $tmp,$tmp,$eighty7
3207 vxor $out4,$in4,$twk4
3208 vxor $tweak,$tweak,$tmp
3209
3210 lvx_u $in5,$x50,$inp
3211 addi $inp,$inp,0x60
3212 vxor $twk5,$tweak,$rndkey0
3213 vsrab $tmp,$tweak,$seven # next tweak value
3214 vaddubm $tweak,$tweak,$tweak
3215 vsldoi $tmp,$tmp,$tmp,15
3216 le?vperm $in5,$in5,$in5,$leperm
3217 vand $tmp,$tmp,$eighty7
3218 vxor $out5,$in5,$twk5
3219 vxor $tweak,$tweak,$tmp
3220
3221 vxor v31,v31,$rndkey0
3222 mtctr $rounds
3223 b Loop_xts_dec6x
3224
3225.align 5
3226Loop_xts_dec6x:
3227 vncipher $out0,$out0,v24
3228 vncipher $out1,$out1,v24
3229 vncipher $out2,$out2,v24
3230 vncipher $out3,$out3,v24
3231 vncipher $out4,$out4,v24
3232 vncipher $out5,$out5,v24
3233 lvx v24,$x20,$key_ # round[3]
3234 addi $key_,$key_,0x20
3235
3236 vncipher $out0,$out0,v25
3237 vncipher $out1,$out1,v25
3238 vncipher $out2,$out2,v25
3239 vncipher $out3,$out3,v25
3240 vncipher $out4,$out4,v25
3241 vncipher $out5,$out5,v25
3242 lvx v25,$x10,$key_ # round[4]
3243 bdnz Loop_xts_dec6x
3244
3245 subic $len,$len,96 # $len-=96
3246 vxor $in0,$twk0,v31 # xor with last round key
3247 vncipher $out0,$out0,v24
3248 vncipher $out1,$out1,v24
3249 vsrab $tmp,$tweak,$seven # next tweak value
3250 vxor $twk0,$tweak,$rndkey0
3251 vaddubm $tweak,$tweak,$tweak
3252 vncipher $out2,$out2,v24
3253 vncipher $out3,$out3,v24
3254 vsldoi $tmp,$tmp,$tmp,15
3255 vncipher $out4,$out4,v24
3256 vncipher $out5,$out5,v24
3257
3258 subfe. r0,r0,r0 # borrow?-1:0
3259 vand $tmp,$tmp,$eighty7
3260 vncipher $out0,$out0,v25
3261 vncipher $out1,$out1,v25
3262 vxor $tweak,$tweak,$tmp
3263 vncipher $out2,$out2,v25
3264 vncipher $out3,$out3,v25
3265 vxor $in1,$twk1,v31
3266 vsrab $tmp,$tweak,$seven # next tweak value
3267 vxor $twk1,$tweak,$rndkey0
3268 vncipher $out4,$out4,v25
3269 vncipher $out5,$out5,v25
3270
3271 and r0,r0,$len
3272 vaddubm $tweak,$tweak,$tweak
3273 vsldoi $tmp,$tmp,$tmp,15
3274 vncipher $out0,$out0,v26
3275 vncipher $out1,$out1,v26
3276 vand $tmp,$tmp,$eighty7
3277 vncipher $out2,$out2,v26
3278 vncipher $out3,$out3,v26
3279 vxor $tweak,$tweak,$tmp
3280 vncipher $out4,$out4,v26
3281 vncipher $out5,$out5,v26
3282
3283 add $inp,$inp,r0 # $inp is adjusted in such
3284 # way that at exit from the
3285 # loop inX-in5 are loaded
3286 # with last "words"
3287 vxor $in2,$twk2,v31
3288 vsrab $tmp,$tweak,$seven # next tweak value
3289 vxor $twk2,$tweak,$rndkey0
3290 vaddubm $tweak,$tweak,$tweak
3291 vncipher $out0,$out0,v27
3292 vncipher $out1,$out1,v27
3293 vsldoi $tmp,$tmp,$tmp,15
3294 vncipher $out2,$out2,v27
3295 vncipher $out3,$out3,v27
3296 vand $tmp,$tmp,$eighty7
3297 vncipher $out4,$out4,v27
3298 vncipher $out5,$out5,v27
3299
3300 addi $key_,$sp,$FRAME+15 # rewind $key_
3301 vxor $tweak,$tweak,$tmp
3302 vncipher $out0,$out0,v28
3303 vncipher $out1,$out1,v28
3304 vxor $in3,$twk3,v31
3305 vsrab $tmp,$tweak,$seven # next tweak value
3306 vxor $twk3,$tweak,$rndkey0
3307 vncipher $out2,$out2,v28
3308 vncipher $out3,$out3,v28
3309 vaddubm $tweak,$tweak,$tweak
3310 vsldoi $tmp,$tmp,$tmp,15
3311 vncipher $out4,$out4,v28
3312 vncipher $out5,$out5,v28
3313 lvx v24,$x00,$key_ # re-pre-load round[1]
3314 vand $tmp,$tmp,$eighty7
3315
3316 vncipher $out0,$out0,v29
3317 vncipher $out1,$out1,v29
3318 vxor $tweak,$tweak,$tmp
3319 vncipher $out2,$out2,v29
3320 vncipher $out3,$out3,v29
3321 vxor $in4,$twk4,v31
3322 vsrab $tmp,$tweak,$seven # next tweak value
3323 vxor $twk4,$tweak,$rndkey0
3324 vncipher $out4,$out4,v29
3325 vncipher $out5,$out5,v29
3326 lvx v25,$x10,$key_ # re-pre-load round[2]
3327 vaddubm $tweak,$tweak,$tweak
3328 vsldoi $tmp,$tmp,$tmp,15
3329
3330 vncipher $out0,$out0,v30
3331 vncipher $out1,$out1,v30
3332 vand $tmp,$tmp,$eighty7
3333 vncipher $out2,$out2,v30
3334 vncipher $out3,$out3,v30
3335 vxor $tweak,$tweak,$tmp
3336 vncipher $out4,$out4,v30
3337 vncipher $out5,$out5,v30
3338 vxor $in5,$twk5,v31
3339 vsrab $tmp,$tweak,$seven # next tweak value
3340 vxor $twk5,$tweak,$rndkey0
3341
3342 vncipherlast $out0,$out0,$in0
3343 lvx_u $in0,$x00,$inp # load next input block
3344 vaddubm $tweak,$tweak,$tweak
3345 vsldoi $tmp,$tmp,$tmp,15
3346 vncipherlast $out1,$out1,$in1
3347 lvx_u $in1,$x10,$inp
3348 vncipherlast $out2,$out2,$in2
3349 le?vperm $in0,$in0,$in0,$leperm
3350 lvx_u $in2,$x20,$inp
3351 vand $tmp,$tmp,$eighty7
3352 vncipherlast $out3,$out3,$in3
3353 le?vperm $in1,$in1,$in1,$leperm
3354 lvx_u $in3,$x30,$inp
3355 vncipherlast $out4,$out4,$in4
3356 le?vperm $in2,$in2,$in2,$leperm
3357 lvx_u $in4,$x40,$inp
3358 vxor $tweak,$tweak,$tmp
3359 vncipherlast $out5,$out5,$in5
3360 le?vperm $in3,$in3,$in3,$leperm
3361 lvx_u $in5,$x50,$inp
3362 addi $inp,$inp,0x60
3363 le?vperm $in4,$in4,$in4,$leperm
3364 le?vperm $in5,$in5,$in5,$leperm
3365
3366 le?vperm $out0,$out0,$out0,$leperm
3367 le?vperm $out1,$out1,$out1,$leperm
3368 stvx_u $out0,$x00,$out # store output
3369 vxor $out0,$in0,$twk0
3370 le?vperm $out2,$out2,$out2,$leperm
3371 stvx_u $out1,$x10,$out
3372 vxor $out1,$in1,$twk1
3373 le?vperm $out3,$out3,$out3,$leperm
3374 stvx_u $out2,$x20,$out
3375 vxor $out2,$in2,$twk2
3376 le?vperm $out4,$out4,$out4,$leperm
3377 stvx_u $out3,$x30,$out
3378 vxor $out3,$in3,$twk3
3379 le?vperm $out5,$out5,$out5,$leperm
3380 stvx_u $out4,$x40,$out
3381 vxor $out4,$in4,$twk4
3382 stvx_u $out5,$x50,$out
3383 vxor $out5,$in5,$twk5
3384 addi $out,$out,0x60
3385
3386 mtctr $rounds
3387 beq Loop_xts_dec6x # did $len-=96 borrow?
3388
3389 addic. $len,$len,0x60
3390 beq Lxts_dec6x_zero
3391 cmpwi $len,0x20
3392 blt Lxts_dec6x_one
3393 nop
3394 beq Lxts_dec6x_two
3395 cmpwi $len,0x40
3396 blt Lxts_dec6x_three
3397 nop
3398 beq Lxts_dec6x_four
3399
3400Lxts_dec6x_five:
3401 vxor $out0,$in1,$twk0
3402 vxor $out1,$in2,$twk1
3403 vxor $out2,$in3,$twk2
3404 vxor $out3,$in4,$twk3
3405 vxor $out4,$in5,$twk4
3406
3407 bl _aesp8_xts_dec5x
3408
3409 le?vperm $out0,$out0,$out0,$leperm
3410 vmr $twk0,$twk5 # unused tweak
3411 vxor $twk1,$tweak,$rndkey0
3412 le?vperm $out1,$out1,$out1,$leperm
3413 stvx_u $out0,$x00,$out # store output
3414 vxor $out0,$in0,$twk1
3415 le?vperm $out2,$out2,$out2,$leperm
3416 stvx_u $out1,$x10,$out
3417 le?vperm $out3,$out3,$out3,$leperm
3418 stvx_u $out2,$x20,$out
3419 le?vperm $out4,$out4,$out4,$leperm
3420 stvx_u $out3,$x30,$out
3421 stvx_u $out4,$x40,$out
3422 addi $out,$out,0x50
3423 bne Lxts_dec6x_steal
3424 b Lxts_dec6x_done
3425
3426.align 4
3427Lxts_dec6x_four:
3428 vxor $out0,$in2,$twk0
3429 vxor $out1,$in3,$twk1
3430 vxor $out2,$in4,$twk2
3431 vxor $out3,$in5,$twk3
3432 vxor $out4,$out4,$out4
3433
3434 bl _aesp8_xts_dec5x
3435
3436 le?vperm $out0,$out0,$out0,$leperm
3437 vmr $twk0,$twk4 # unused tweak
3438 vmr $twk1,$twk5
3439 le?vperm $out1,$out1,$out1,$leperm
3440 stvx_u $out0,$x00,$out # store output
3441 vxor $out0,$in0,$twk5
3442 le?vperm $out2,$out2,$out2,$leperm
3443 stvx_u $out1,$x10,$out
3444 le?vperm $out3,$out3,$out3,$leperm
3445 stvx_u $out2,$x20,$out
3446 stvx_u $out3,$x30,$out
3447 addi $out,$out,0x40
3448 bne Lxts_dec6x_steal
3449 b Lxts_dec6x_done
3450
3451.align 4
3452Lxts_dec6x_three:
3453 vxor $out0,$in3,$twk0
3454 vxor $out1,$in4,$twk1
3455 vxor $out2,$in5,$twk2
3456 vxor $out3,$out3,$out3
3457 vxor $out4,$out4,$out4
3458
3459 bl _aesp8_xts_dec5x
3460
3461 le?vperm $out0,$out0,$out0,$leperm
3462 vmr $twk0,$twk3 # unused tweak
3463 vmr $twk1,$twk4
3464 le?vperm $out1,$out1,$out1,$leperm
3465 stvx_u $out0,$x00,$out # store output
3466 vxor $out0,$in0,$twk4
3467 le?vperm $out2,$out2,$out2,$leperm
3468 stvx_u $out1,$x10,$out
3469 stvx_u $out2,$x20,$out
3470 addi $out,$out,0x30
3471 bne Lxts_dec6x_steal
3472 b Lxts_dec6x_done
3473
3474.align 4
3475Lxts_dec6x_two:
3476 vxor $out0,$in4,$twk0
3477 vxor $out1,$in5,$twk1
3478 vxor $out2,$out2,$out2
3479 vxor $out3,$out3,$out3
3480 vxor $out4,$out4,$out4
3481
3482 bl _aesp8_xts_dec5x
3483
3484 le?vperm $out0,$out0,$out0,$leperm
3485 vmr $twk0,$twk2 # unused tweak
3486 vmr $twk1,$twk3
3487 le?vperm $out1,$out1,$out1,$leperm
3488 stvx_u $out0,$x00,$out # store output
3489 vxor $out0,$in0,$twk3
3490 stvx_u $out1,$x10,$out
3491 addi $out,$out,0x20
3492 bne Lxts_dec6x_steal
3493 b Lxts_dec6x_done
3494
3495.align 4
3496Lxts_dec6x_one:
3497 vxor $out0,$in5,$twk0
3498 nop
3499Loop_xts_dec1x:
3500 vncipher $out0,$out0,v24
3501 lvx v24,$x20,$key_ # round[3]
3502 addi $key_,$key_,0x20
3503
3504 vncipher $out0,$out0,v25
3505 lvx v25,$x10,$key_ # round[4]
3506 bdnz Loop_xts_dec1x
3507
3508 subi r0,$taillen,1
3509 vncipher $out0,$out0,v24
3510
3511 andi. r0,r0,16
3512 cmpwi $taillen,0
3513 vncipher $out0,$out0,v25
3514
3515 sub $inp,$inp,r0
3516 vncipher $out0,$out0,v26
3517
3518 lvx_u $in0,0,$inp
3519 vncipher $out0,$out0,v27
3520
3521 addi $key_,$sp,$FRAME+15 # rewind $key_
3522 vncipher $out0,$out0,v28
3523 lvx v24,$x00,$key_ # re-pre-load round[1]
3524
3525 vncipher $out0,$out0,v29
3526 lvx v25,$x10,$key_ # re-pre-load round[2]
3527 vxor $twk0,$twk0,v31
3528
3529 le?vperm $in0,$in0,$in0,$leperm
3530 vncipher $out0,$out0,v30
3531
3532 mtctr $rounds
3533 vncipherlast $out0,$out0,$twk0
3534
3535 vmr $twk0,$twk1 # unused tweak
3536 vmr $twk1,$twk2
3537 le?vperm $out0,$out0,$out0,$leperm
3538 stvx_u $out0,$x00,$out # store output
3539 addi $out,$out,0x10
3540 vxor $out0,$in0,$twk2
3541 bne Lxts_dec6x_steal
3542 b Lxts_dec6x_done
3543
3544.align 4
3545Lxts_dec6x_zero:
3546 cmpwi $taillen,0
3547 beq Lxts_dec6x_done
3548
3549 lvx_u $in0,0,$inp
3550 le?vperm $in0,$in0,$in0,$leperm
3551 vxor $out0,$in0,$twk1
3552Lxts_dec6x_steal:
3553 vncipher $out0,$out0,v24
3554 lvx v24,$x20,$key_ # round[3]
3555 addi $key_,$key_,0x20
3556
3557 vncipher $out0,$out0,v25
3558 lvx v25,$x10,$key_ # round[4]
3559 bdnz Lxts_dec6x_steal
3560
3561 add $inp,$inp,$taillen
3562 vncipher $out0,$out0,v24
3563
3564 cmpwi $taillen,0
3565 vncipher $out0,$out0,v25
3566
3567 lvx_u $in0,0,$inp
3568 vncipher $out0,$out0,v26
3569
3570 lvsr $inpperm,0,$taillen # $in5 is no more
3571 vncipher $out0,$out0,v27
3572
3573 addi $key_,$sp,$FRAME+15 # rewind $key_
3574 vncipher $out0,$out0,v28
3575 lvx v24,$x00,$key_ # re-pre-load round[1]
3576
3577 vncipher $out0,$out0,v29
3578 lvx v25,$x10,$key_ # re-pre-load round[2]
3579 vxor $twk1,$twk1,v31
3580
3581 le?vperm $in0,$in0,$in0,$leperm
3582 vncipher $out0,$out0,v30
3583
3584 vperm $in0,$in0,$in0,$inpperm
3585 vncipherlast $tmp,$out0,$twk1
3586
3587 le?vperm $out0,$tmp,$tmp,$leperm
3588 le?stvx_u $out0,0,$out
3589 be?stvx_u $tmp,0,$out
3590
3591 vxor $out0,$out0,$out0
3592 vspltisb $out1,-1
3593 vperm $out0,$out0,$out1,$inpperm
3594 vsel $out0,$in0,$tmp,$out0
3595 vxor $out0,$out0,$twk0
3596
3597 subi r30,$out,1
3598 mtctr $taillen
3599Loop_xts_dec6x_steal:
3600 lbzu r0,1(r30)
3601 stb r0,16(r30)
3602 bdnz Loop_xts_dec6x_steal
3603
3604 li $taillen,0
3605 mtctr $rounds
3606 b Loop_xts_dec1x # one more time...
3607
3608.align 4
3609Lxts_dec6x_done:
3610 ${UCMP}i $ivp,0
3611 beq Lxts_dec6x_ret
3612
3613 vxor $tweak,$twk0,$rndkey0
3614 le?vperm $tweak,$tweak,$tweak,$leperm
3615 stvx_u $tweak,0,$ivp
3616
3617Lxts_dec6x_ret:
3618 mtlr r11
3619 li r10,`$FRAME+15`
3620 li r11,`$FRAME+31`
3621 stvx $seven,r10,$sp # wipe copies of round keys
3622 addi r10,r10,32
3623 stvx $seven,r11,$sp
3624 addi r11,r11,32
3625 stvx $seven,r10,$sp
3626 addi r10,r10,32
3627 stvx $seven,r11,$sp
3628 addi r11,r11,32
3629 stvx $seven,r10,$sp
3630 addi r10,r10,32
3631 stvx $seven,r11,$sp
3632 addi r11,r11,32
3633 stvx $seven,r10,$sp
3634 addi r10,r10,32
3635 stvx $seven,r11,$sp
3636 addi r11,r11,32
3637
3638 mtspr 256,$vrsave
3639 lvx v20,r10,$sp # ABI says so
3640 addi r10,r10,32
3641 lvx v21,r11,$sp
3642 addi r11,r11,32
3643 lvx v22,r10,$sp
3644 addi r10,r10,32
3645 lvx v23,r11,$sp
3646 addi r11,r11,32
3647 lvx v24,r10,$sp
3648 addi r10,r10,32
3649 lvx v25,r11,$sp
3650 addi r11,r11,32
3651 lvx v26,r10,$sp
3652 addi r10,r10,32
3653 lvx v27,r11,$sp
3654 addi r11,r11,32
3655 lvx v28,r10,$sp
3656 addi r10,r10,32
3657 lvx v29,r11,$sp
3658 addi r11,r11,32
3659 lvx v30,r10,$sp
3660 lvx v31,r11,$sp
3661 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3662 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3663 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3664 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3665 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3666 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3667 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3668 blr
3669 .long 0
3670 .byte 0,12,0x04,1,0x80,6,6,0
3671 .long 0
3672
3673.align 5
3674_aesp8_xts_dec5x:
3675 vncipher $out0,$out0,v24
3676 vncipher $out1,$out1,v24
3677 vncipher $out2,$out2,v24
3678 vncipher $out3,$out3,v24
3679 vncipher $out4,$out4,v24
3680 lvx v24,$x20,$key_ # round[3]
3681 addi $key_,$key_,0x20
3682
3683 vncipher $out0,$out0,v25
3684 vncipher $out1,$out1,v25
3685 vncipher $out2,$out2,v25
3686 vncipher $out3,$out3,v25
3687 vncipher $out4,$out4,v25
3688 lvx v25,$x10,$key_ # round[4]
3689 bdnz _aesp8_xts_dec5x
3690
3691 subi r0,$taillen,1
3692 vncipher $out0,$out0,v24
3693 vncipher $out1,$out1,v24
3694 vncipher $out2,$out2,v24
3695 vncipher $out3,$out3,v24
3696 vncipher $out4,$out4,v24
3697
3698 andi. r0,r0,16
3699 cmpwi $taillen,0
3700 vncipher $out0,$out0,v25
3701 vncipher $out1,$out1,v25
3702 vncipher $out2,$out2,v25
3703 vncipher $out3,$out3,v25
3704 vncipher $out4,$out4,v25
3705 vxor $twk0,$twk0,v31
3706
3707 sub $inp,$inp,r0
3708 vncipher $out0,$out0,v26
3709 vncipher $out1,$out1,v26
3710 vncipher $out2,$out2,v26
3711 vncipher $out3,$out3,v26
3712 vncipher $out4,$out4,v26
3713 vxor $in1,$twk1,v31
3714
3715 vncipher $out0,$out0,v27
3716 lvx_u $in0,0,$inp
3717 vncipher $out1,$out1,v27
3718 vncipher $out2,$out2,v27
3719 vncipher $out3,$out3,v27
3720 vncipher $out4,$out4,v27
3721 vxor $in2,$twk2,v31
3722
3723 addi $key_,$sp,$FRAME+15 # rewind $key_
3724 vncipher $out0,$out0,v28
3725 vncipher $out1,$out1,v28
3726 vncipher $out2,$out2,v28
3727 vncipher $out3,$out3,v28
3728 vncipher $out4,$out4,v28
3729 lvx v24,$x00,$key_ # re-pre-load round[1]
3730 vxor $in3,$twk3,v31
3731
3732 vncipher $out0,$out0,v29
3733 le?vperm $in0,$in0,$in0,$leperm
3734 vncipher $out1,$out1,v29
3735 vncipher $out2,$out2,v29
3736 vncipher $out3,$out3,v29
3737 vncipher $out4,$out4,v29
3738 lvx v25,$x10,$key_ # re-pre-load round[2]
3739 vxor $in4,$twk4,v31
3740
3741 vncipher $out0,$out0,v30
3742 vncipher $out1,$out1,v30
3743 vncipher $out2,$out2,v30
3744 vncipher $out3,$out3,v30
3745 vncipher $out4,$out4,v30
3746
3747 vncipherlast $out0,$out0,$twk0
3748 vncipherlast $out1,$out1,$in1
3749 vncipherlast $out2,$out2,$in2
3750 vncipherlast $out3,$out3,$in3
3751 vncipherlast $out4,$out4,$in4
3752 mtctr $rounds
3753 blr
3754 .long 0
3755 .byte 0,12,0x14,0,0,0,0,0
3756___
3757}} }}}
3758
3759my $consts=1;
3760foreach(split("\n",$code)) {
3761 s/\`([^\`]*)\`/eval($1)/geo;
3762
3763 # constants table endian-specific conversion
3764 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3765 my $conv=$3;
3766 my @bytes=();
3767
3768 # convert to endian-agnostic format
3769 if ($1 eq "long") {
3770 foreach (split(/,\s*/,$2)) {
3771 my $l = /^0/?oct:int;
3772 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3773 }
3774 } else {
3775 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3776 }
3777
3778 # little-endian conversion
3779 if ($flavour =~ /le$/o) {
3780 SWITCH: for($conv) {
3781 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3782 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3783 }
3784 }
3785
3786 #emit
3787 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3788 next;
3789 }
3790 $consts=0 if (m/Lconsts:/o); # end of table
3791
3792 # instructions prefixed with '?' are endian-specific and need
3793 # to be adjusted accordingly...
3794 if ($flavour =~ /le$/o) { # little-endian
3795 s/le\?//o or
3796 s/be\?/#be#/o or
3797 s/\?lvsr/lvsl/o or
3798 s/\?lvsl/lvsr/o or
3799 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3800 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3801 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3802 } else { # big-endian
3803 s/le\?/#le#/o or
3804 s/be\?//o or
3805 s/\?([a-z]+)/$1/o;
3806 }
3807
3808 print $_,"\n";
3809}
3810
3811close STDOUT or die "error closing STDOUT: $!";
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette