VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1j/crypto/aes/asm/vpaes-ppc.pl@ 88461

Last change on this file since 88461 was 87984, checked in by vboxsync, 4 years ago

openssl-1.1.1j: Applied and adjusted our OpenSSL changes to 1.1.1j. bugref:9963

File size: 41.8 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10######################################################################
11## Constant-time SSSE3 AES core implementation.
12## version 0.1
13##
14## By Mike Hamburg (Stanford University), 2009
15## Public domain.
16##
17## For details see http://shiftleft.org/papers/vector_aes/ and
18## http://crypto.stanford.edu/vpaes/.
19
20# CBC encrypt/decrypt performance in cycles per byte processed with
21# 128-bit key.
22#
23# aes-ppc.pl this
24# PPC74x0/G4e 35.5/52.1/(23.8) 11.9(*)/15.4
25# PPC970/G5 37.9/55.0/(28.5) 22.2/28.5
26# POWER6 42.7/54.3/(28.2) 63.0/92.8(**)
27# POWER7 32.3/42.9/(18.4) 18.5/23.3
28#
29# (*) This is ~10% worse than reported in paper. The reason is
30# twofold. This module doesn't make any assumption about
31# key schedule (or data for that matter) alignment and handles
32# it in-line. Secondly it, being transliterated from
33# vpaes-x86_64.pl, relies on "nested inversion" better suited
34# for Intel CPUs.
35# (**) Inadequate POWER6 performance is due to astronomic AltiVec
36# latency, 9 cycles per simple logical operation.
37
38$flavour = shift;
39
40if ($flavour =~ /64/) {
41 $SIZE_T =8;
42 $LRSAVE =2*$SIZE_T;
43 $STU ="stdu";
44 $POP ="ld";
45 $PUSH ="std";
46 $UCMP ="cmpld";
47} elsif ($flavour =~ /32/) {
48 $SIZE_T =4;
49 $LRSAVE =$SIZE_T;
50 $STU ="stwu";
51 $POP ="lwz";
52 $PUSH ="stw";
53 $UCMP ="cmplw";
54} else { die "nonsense $flavour"; }
55
56$sp="r1";
57$FRAME=6*$SIZE_T+13*16; # 13*16 is for v20-v31 offload
58
59$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
60( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
61( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
62die "can't locate ppc-xlate.pl";
63
64open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
65
66$code.=<<___;
67.machine "any"
68
69.text
70
71.align 7 # totally strategic alignment
72_vpaes_consts:
73Lk_mc_forward: # mc_forward
74 .long 0x01020300, 0x05060704, 0x090a0b08, 0x0d0e0f0c ?inv
75 .long 0x05060704, 0x090a0b08, 0x0d0e0f0c, 0x01020300 ?inv
76 .long 0x090a0b08, 0x0d0e0f0c, 0x01020300, 0x05060704 ?inv
77 .long 0x0d0e0f0c, 0x01020300, 0x05060704, 0x090a0b08 ?inv
78Lk_mc_backward: # mc_backward
79 .long 0x03000102, 0x07040506, 0x0b08090a, 0x0f0c0d0e ?inv
80 .long 0x0f0c0d0e, 0x03000102, 0x07040506, 0x0b08090a ?inv
81 .long 0x0b08090a, 0x0f0c0d0e, 0x03000102, 0x07040506 ?inv
82 .long 0x07040506, 0x0b08090a, 0x0f0c0d0e, 0x03000102 ?inv
83Lk_sr: # sr
84 .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f ?inv
85 .long 0x00050a0f, 0x04090e03, 0x080d0207, 0x0c01060b ?inv
86 .long 0x0009020b, 0x040d060f, 0x08010a03, 0x0c050e07 ?inv
87 .long 0x000d0a07, 0x04010e0b, 0x0805020f, 0x0c090603 ?inv
88
89##
90## "Hot" constants
91##
92Lk_inv: # inv, inva
93 .long 0xf001080d, 0x0f06050e, 0x020c0b0a, 0x09030704 ?rev
94 .long 0xf0070b0f, 0x060a0401, 0x09080502, 0x0c0e0d03 ?rev
95Lk_ipt: # input transform (lo, hi)
96 .long 0x00702a5a, 0x98e8b2c2, 0x08782252, 0x90e0baca ?rev
97 .long 0x004d7c31, 0x7d30014c, 0x81ccfdb0, 0xfcb180cd ?rev
98Lk_sbo: # sbou, sbot
99 .long 0x00c7bd6f, 0x176dd2d0, 0x78a802c5, 0x7abfaa15 ?rev
100 .long 0x006abb5f, 0xa574e4cf, 0xfa352b41, 0xd1901e8e ?rev
101Lk_sb1: # sb1u, sb1t
102 .long 0x0023e2fa, 0x15d41836, 0xefd92e0d, 0xc1ccf73b ?rev
103 .long 0x003e50cb, 0x8fe19bb1, 0x44f52a14, 0x6e7adfa5 ?rev
104Lk_sb2: # sb2u, sb2t
105 .long 0x0029e10a, 0x4088eb69, 0x4a2382ab, 0xc863a1c2 ?rev
106 .long 0x0024710b, 0xc6937ae2, 0xcd2f98bc, 0x55e9b75e ?rev
107
108##
109## Decryption stuff
110##
111Lk_dipt: # decryption input transform
112 .long 0x005f540b, 0x045b500f, 0x1a454e11, 0x1e414a15 ?rev
113 .long 0x00650560, 0xe683e386, 0x94f191f4, 0x72177712 ?rev
114Lk_dsbo: # decryption sbox final output
115 .long 0x0040f97e, 0x53ea8713, 0x2d3e94d4, 0xb96daac7 ?rev
116 .long 0x001d4493, 0x0f56d712, 0x9c8ec5d8, 0x59814bca ?rev
117Lk_dsb9: # decryption sbox output *9*u, *9*t
118 .long 0x00d6869a, 0x53031c85, 0xc94c994f, 0x501fd5ca ?rev
119 .long 0x0049d7ec, 0x89173bc0, 0x65a5fbb2, 0x9e2c5e72 ?rev
120Lk_dsbd: # decryption sbox output *D*u, *D*t
121 .long 0x00a2b1e6, 0xdfcc577d, 0x39442a88, 0x139b6ef5 ?rev
122 .long 0x00cbc624, 0xf7fae23c, 0xd3efde15, 0x0d183129 ?rev
123Lk_dsbb: # decryption sbox output *B*u, *B*t
124 .long 0x0042b496, 0x926422d0, 0x04d4f2b0, 0xf6462660 ?rev
125 .long 0x006759cd, 0xa69894c1, 0x6baa5532, 0x3e0cfff3 ?rev
126Lk_dsbe: # decryption sbox output *E*u, *E*t
127 .long 0x00d0d426, 0x9692f246, 0xb0f6b464, 0x04604222 ?rev
128 .long 0x00c1aaff, 0xcda6550c, 0x323e5998, 0x6bf36794 ?rev
129
130##
131## Key schedule constants
132##
133Lk_dksd: # decryption key schedule: invskew x*D
134 .long 0x0047e4a3, 0x5d1ab9fe, 0xf9be1d5a, 0xa4e34007 ?rev
135 .long 0x008336b5, 0xf477c241, 0x1e9d28ab, 0xea69dc5f ?rev
136Lk_dksb: # decryption key schedule: invskew x*B
137 .long 0x00d55085, 0x1fca4f9a, 0x994cc91c, 0x8653d603 ?rev
138 .long 0x004afcb6, 0xa7ed5b11, 0xc882347e, 0x6f2593d9 ?rev
139Lk_dkse: # decryption key schedule: invskew x*E + 0x63
140 .long 0x00d6c91f, 0xca1c03d5, 0x86504f99, 0x4c9a8553 ?rev
141 .long 0xe87bdc4f, 0x059631a2, 0x8714b320, 0x6af95ecd ?rev
142Lk_dks9: # decryption key schedule: invskew x*9
143 .long 0x00a7d97e, 0xc86f11b6, 0xfc5b2582, 0x3493ed4a ?rev
144 .long 0x00331427, 0x62517645, 0xcefddae9, 0xac9fb88b ?rev
145
146Lk_rcon: # rcon
147 .long 0xb6ee9daf, 0xb991831f, 0x817d7c4d, 0x08982a70 ?asis
148Lk_s63:
149 .long 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b, 0x5b5b5b5b ?asis
150
151Lk_opt: # output transform
152 .long 0x0060b6d6, 0x29499fff, 0x0868bede, 0x214197f7 ?rev
153 .long 0x00ecbc50, 0x51bded01, 0xe00c5cb0, 0xb15d0de1 ?rev
154Lk_deskew: # deskew tables: inverts the sbox's "skew"
155 .long 0x00e3a447, 0x40a3e407, 0x1af9be5d, 0x5ab9fe1d ?rev
156 .long 0x0069ea83, 0xdcb5365f, 0x771e9df4, 0xabc24128 ?rev
157.align 5
158Lconsts:
159 mflr r0
160 bcl 20,31,\$+4
161 mflr r12 #vvvvv "distance between . and _vpaes_consts
162 addi r12,r12,-0x308
163 mtlr r0
164 blr
165 .long 0
166 .byte 0,12,0x14,0,0,0,0,0
167.asciz "Vector Permutation AES for AltiVec, Mike Hamburg (Stanford University)"
168.align 6
169___
170
171
172my ($inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm) = map("v$_",(26..31));
173{
174my ($inp,$out,$key) = map("r$_",(3..5));
175
176my ($invlo,$invhi,$iptlo,$ipthi,$sbou,$sbot) = map("v$_",(10..15));
177my ($sb1u,$sb1t,$sb2u,$sb2t) = map("v$_",(16..19));
178my ($sb9u,$sb9t,$sbdu,$sbdt,$sbbu,$sbbt,$sbeu,$sbet)=map("v$_",(16..23));
179
180$code.=<<___;
181##
182## _aes_preheat
183##
184## Fills register %r10 -> .aes_consts (so you can -fPIC)
185## and %xmm9-%xmm15 as specified below.
186##
187.align 4
188_vpaes_encrypt_preheat:
189 mflr r8
190 bl Lconsts
191 mtlr r8
192 li r11, 0xc0 # Lk_inv
193 li r10, 0xd0
194 li r9, 0xe0 # Lk_ipt
195 li r8, 0xf0
196 vxor v7, v7, v7 # 0x00..00
197 vspltisb v8,4 # 0x04..04
198 vspltisb v9,0x0f # 0x0f..0f
199 lvx $invlo, r12, r11
200 li r11, 0x100
201 lvx $invhi, r12, r10
202 li r10, 0x110
203 lvx $iptlo, r12, r9
204 li r9, 0x120
205 lvx $ipthi, r12, r8
206 li r8, 0x130
207 lvx $sbou, r12, r11
208 li r11, 0x140
209 lvx $sbot, r12, r10
210 li r10, 0x150
211 lvx $sb1u, r12, r9
212 lvx $sb1t, r12, r8
213 lvx $sb2u, r12, r11
214 lvx $sb2t, r12, r10
215 blr
216 .long 0
217 .byte 0,12,0x14,0,0,0,0,0
218
219##
220## _aes_encrypt_core
221##
222## AES-encrypt %xmm0.
223##
224## Inputs:
225## %xmm0 = input
226## %xmm9-%xmm15 as in _vpaes_preheat
227## (%rdx) = scheduled keys
228##
229## Output in %xmm0
230## Clobbers %xmm1-%xmm6, %r9, %r10, %r11, %rax
231##
232##
233.align 5
234_vpaes_encrypt_core:
235 lwz r8, 240($key) # pull rounds
236 li r9, 16
237 lvx v5, 0, $key # vmovdqu (%r9), %xmm5 # round0 key
238 li r11, 0x10
239 lvx v6, r9, $key
240 addi r9, r9, 16
241 ?vperm v5, v5, v6, $keyperm # align round key
242 addi r10, r11, 0x40
243 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
244 vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm1
245 vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm3, %xmm2
246 vxor v0, v0, v5 # vpxor %xmm5, %xmm1, %xmm0
247 vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
248 mtctr r8
249 b Lenc_entry
250
251.align 4
252Lenc_loop:
253 # middle of middle round
254 vperm v4, $sb1t, v7, v2 # vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u
255 lvx v1, r12, r11 # vmovdqa -0x40(%r11,%r10), %xmm1 # .Lk_mc_forward[]
256 addi r11, r11, 16
257 vperm v0, $sb1u, v7, v3 # vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t
258 vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
259 andi. r11, r11, 0x30 # and \$0x30, %r11 # ... mod 4
260 vperm v5, $sb2t, v7, v2 # vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u
261 vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
262 vperm v2, $sb2u, v7, v3 # vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t
263 lvx v4, r12, r10 # vmovdqa (%r11,%r10), %xmm4 # .Lk_mc_backward[]
264 addi r10, r11, 0x40
265 vperm v3, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm3 # 0 = B
266 vxor v2, v2, v5 # vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A
267 vperm v0, v0, v7, v4 # vpshufb %xmm4, %xmm0, %xmm0 # 3 = D
268 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B
269 vperm v4, v3, v7, v1 # vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C
270 vxor v0, v0, v3 # vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D
271 vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D
272
273Lenc_entry:
274 # top of round
275 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
276 vperm v5, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k
277 vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
278 vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
279 vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
280 vand v0, v0, v9
281 vxor v3, v3, v5 # vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
282 vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
283 vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
284 vmr v5, v6
285 lvx v6, r9, $key # vmovdqu (%r9), %xmm5
286 vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
287 addi r9, r9, 16
288 vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
289 ?vperm v5, v5, v6, $keyperm # align round key
290 vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
291 bdnz Lenc_loop
292
293 # middle of last round
294 addi r10, r11, 0x80
295 # vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo
296 # vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16
297 vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
298 lvx v1, r12, r10 # vmovdqa 0x40(%r11,%r10), %xmm1 # .Lk_sr[]
299 vperm v0, $sbot, v7, v3 # vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t
300 vxor v4, v4, v5 # vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k
301 vxor v0, v0, v4 # vpxor %xmm4, %xmm0, %xmm0 # 0 = A
302 vperm v0, v0, v7, v1 # vpshufb %xmm1, %xmm0, %xmm0
303 blr
304 .long 0
305 .byte 0,12,0x14,0,0,0,0,0
306
307.globl .vpaes_encrypt
308.align 5
309.vpaes_encrypt:
310 $STU $sp,-$FRAME($sp)
311 li r10,`15+6*$SIZE_T`
312 li r11,`31+6*$SIZE_T`
313 mflr r6
314 mfspr r7, 256 # save vrsave
315 stvx v20,r10,$sp
316 addi r10,r10,32
317 stvx v21,r11,$sp
318 addi r11,r11,32
319 stvx v22,r10,$sp
320 addi r10,r10,32
321 stvx v23,r11,$sp
322 addi r11,r11,32
323 stvx v24,r10,$sp
324 addi r10,r10,32
325 stvx v25,r11,$sp
326 addi r11,r11,32
327 stvx v26,r10,$sp
328 addi r10,r10,32
329 stvx v27,r11,$sp
330 addi r11,r11,32
331 stvx v28,r10,$sp
332 addi r10,r10,32
333 stvx v29,r11,$sp
334 addi r11,r11,32
335 stvx v30,r10,$sp
336 stvx v31,r11,$sp
337 stw r7,`$FRAME-4`($sp) # save vrsave
338 li r0, -1
339 $PUSH r6,`$FRAME+$LRSAVE`($sp)
340 mtspr 256, r0 # preserve all AltiVec registers
341
342 bl _vpaes_encrypt_preheat
343
344 ?lvsl $inpperm, 0, $inp # prepare for unaligned access
345 lvx v0, 0, $inp
346 addi $inp, $inp, 15 # 15 is not a typo
347 ?lvsr $outperm, 0, $out
348 ?lvsl $keyperm, 0, $key # prepare for unaligned access
349 lvx $inptail, 0, $inp # redundant in aligned case
350 ?vperm v0, v0, $inptail, $inpperm
351
352 bl _vpaes_encrypt_core
353
354 andi. r8, $out, 15
355 li r9, 16
356 beq Lenc_out_aligned
357
358 vperm v0, v0, v0, $outperm # rotate right/left
359 mtctr r9
360Lenc_out_unaligned:
361 stvebx v0, 0, $out
362 addi $out, $out, 1
363 bdnz Lenc_out_unaligned
364 b Lenc_done
365
366.align 4
367Lenc_out_aligned:
368 stvx v0, 0, $out
369Lenc_done:
370
371 li r10,`15+6*$SIZE_T`
372 li r11,`31+6*$SIZE_T`
373 mtlr r6
374 mtspr 256, r7 # restore vrsave
375 lvx v20,r10,$sp
376 addi r10,r10,32
377 lvx v21,r11,$sp
378 addi r11,r11,32
379 lvx v22,r10,$sp
380 addi r10,r10,32
381 lvx v23,r11,$sp
382 addi r11,r11,32
383 lvx v24,r10,$sp
384 addi r10,r10,32
385 lvx v25,r11,$sp
386 addi r11,r11,32
387 lvx v26,r10,$sp
388 addi r10,r10,32
389 lvx v27,r11,$sp
390 addi r11,r11,32
391 lvx v28,r10,$sp
392 addi r10,r10,32
393 lvx v29,r11,$sp
394 addi r11,r11,32
395 lvx v30,r10,$sp
396 lvx v31,r11,$sp
397 addi $sp,$sp,$FRAME
398 blr
399 .long 0
400 .byte 0,12,0x04,1,0x80,0,3,0
401 .long 0
402.size .vpaes_encrypt,.-.vpaes_encrypt
403
404.align 4
405_vpaes_decrypt_preheat:
406 mflr r8
407 bl Lconsts
408 mtlr r8
409 li r11, 0xc0 # Lk_inv
410 li r10, 0xd0
411 li r9, 0x160 # Ldipt
412 li r8, 0x170
413 vxor v7, v7, v7 # 0x00..00
414 vspltisb v8,4 # 0x04..04
415 vspltisb v9,0x0f # 0x0f..0f
416 lvx $invlo, r12, r11
417 li r11, 0x180
418 lvx $invhi, r12, r10
419 li r10, 0x190
420 lvx $iptlo, r12, r9
421 li r9, 0x1a0
422 lvx $ipthi, r12, r8
423 li r8, 0x1b0
424 lvx $sbou, r12, r11
425 li r11, 0x1c0
426 lvx $sbot, r12, r10
427 li r10, 0x1d0
428 lvx $sb9u, r12, r9
429 li r9, 0x1e0
430 lvx $sb9t, r12, r8
431 li r8, 0x1f0
432 lvx $sbdu, r12, r11
433 li r11, 0x200
434 lvx $sbdt, r12, r10
435 li r10, 0x210
436 lvx $sbbu, r12, r9
437 lvx $sbbt, r12, r8
438 lvx $sbeu, r12, r11
439 lvx $sbet, r12, r10
440 blr
441 .long 0
442 .byte 0,12,0x14,0,0,0,0,0
443
444##
445## Decryption core
446##
447## Same API as encryption core.
448##
449.align 4
450_vpaes_decrypt_core:
451 lwz r8, 240($key) # pull rounds
452 li r9, 16
453 lvx v5, 0, $key # vmovdqu (%r9), %xmm4 # round0 key
454 li r11, 0x30
455 lvx v6, r9, $key
456 addi r9, r9, 16
457 ?vperm v5, v5, v6, $keyperm # align round key
458 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
459 vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
460 vperm v1, $ipthi, $ipthi, v1 # vpshufb %xmm0, %xmm1, %xmm0
461 vxor v0, v0, v5 # vpxor %xmm4, %xmm2, %xmm2
462 vxor v0, v0, v1 # vpxor %xmm2, %xmm0, %xmm0
463 mtctr r8
464 b Ldec_entry
465
466.align 4
467Ldec_loop:
468#
469# Inverse mix columns
470#
471 lvx v0, r12, r11 # v5 and v0 are flipped
472 # vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u
473 # vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t
474 vperm v4, $sb9u, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u
475 subi r11, r11, 16
476 vperm v1, $sb9t, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t
477 andi. r11, r11, 0x30
478 vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0
479 # vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu
480 vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
481 # vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt
482
483 vperm v4, $sbdu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu
484 vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
485 vperm v1, $sbdt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt
486 vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
487 # vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu
488 vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
489 # vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt
490
491 vperm v4, $sbbu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu
492 vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
493 vperm v1, $sbbt, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt
494 vxor v5, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
495 # vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu
496 vxor v5, v5, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
497 # vmovdqa 0x50(%r10), %xmm1 # 0 : sbet
498
499 vperm v4, $sbeu, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu
500 vperm v5, v5, v7, v0 # vpshufb %xmm5, %xmm0, %xmm0 # MC ch
501 vperm v1, $sbet, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet
502 vxor v0, v5, v4 # vpxor %xmm4, %xmm0, %xmm0 # 4 = ch
503 vxor v0, v0, v1 # vpxor %xmm1, %xmm0, %xmm0 # 0 = ch
504
505Ldec_entry:
506 # top of round
507 vsrb v1, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
508 vperm v2, $invhi, $invhi, v0 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
509 vxor v0, v0, v1 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
510 vperm v3, $invlo, $invlo, v1 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
511 vperm v4, $invlo, $invlo, v0 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
512 vand v0, v0, v9
513 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
514 vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
515 vperm v2, $invlo, v7, v3 # vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak
516 vmr v5, v6
517 lvx v6, r9, $key # vmovdqu (%r9), %xmm0
518 vperm v3, $invlo, v7, v4 # vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak
519 addi r9, r9, 16
520 vxor v2, v2, v0 # vpxor %xmm1, %xmm2, %xmm2 # 2 = io
521 ?vperm v5, v5, v6, $keyperm # align round key
522 vxor v3, v3, v1 # vpxor %xmm0, %xmm3, %xmm3 # 3 = jo
523 bdnz Ldec_loop
524
525 # middle of last round
526 addi r10, r11, 0x80
527 # vmovdqa 0x60(%r10), %xmm4 # 3 : sbou
528 vperm v4, $sbou, v7, v2 # vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou
529 # vmovdqa 0x70(%r10), %xmm1 # 0 : sbot
530 lvx v2, r12, r10 # vmovdqa -0x160(%r11), %xmm2 # .Lk_sr-.Lk_dsbd=-0x160
531 vperm v1, $sbot, v7, v3 # vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t
532 vxor v4, v4, v5 # vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k
533 vxor v0, v1, v4 # vpxor %xmm4, %xmm1, %xmm0 # 0 = A
534 vperm v0, v0, v7, v2 # vpshufb %xmm2, %xmm0, %xmm0
535 blr
536 .long 0
537 .byte 0,12,0x14,0,0,0,0,0
538
539.globl .vpaes_decrypt
540.align 5
541.vpaes_decrypt:
542 $STU $sp,-$FRAME($sp)
543 li r10,`15+6*$SIZE_T`
544 li r11,`31+6*$SIZE_T`
545 mflr r6
546 mfspr r7, 256 # save vrsave
547 stvx v20,r10,$sp
548 addi r10,r10,32
549 stvx v21,r11,$sp
550 addi r11,r11,32
551 stvx v22,r10,$sp
552 addi r10,r10,32
553 stvx v23,r11,$sp
554 addi r11,r11,32
555 stvx v24,r10,$sp
556 addi r10,r10,32
557 stvx v25,r11,$sp
558 addi r11,r11,32
559 stvx v26,r10,$sp
560 addi r10,r10,32
561 stvx v27,r11,$sp
562 addi r11,r11,32
563 stvx v28,r10,$sp
564 addi r10,r10,32
565 stvx v29,r11,$sp
566 addi r11,r11,32
567 stvx v30,r10,$sp
568 stvx v31,r11,$sp
569 stw r7,`$FRAME-4`($sp) # save vrsave
570 li r0, -1
571 $PUSH r6,`$FRAME+$LRSAVE`($sp)
572 mtspr 256, r0 # preserve all AltiVec registers
573
574 bl _vpaes_decrypt_preheat
575
576 ?lvsl $inpperm, 0, $inp # prepare for unaligned access
577 lvx v0, 0, $inp
578 addi $inp, $inp, 15 # 15 is not a typo
579 ?lvsr $outperm, 0, $out
580 ?lvsl $keyperm, 0, $key
581 lvx $inptail, 0, $inp # redundant in aligned case
582 ?vperm v0, v0, $inptail, $inpperm
583
584 bl _vpaes_decrypt_core
585
586 andi. r8, $out, 15
587 li r9, 16
588 beq Ldec_out_aligned
589
590 vperm v0, v0, v0, $outperm # rotate right/left
591 mtctr r9
592Ldec_out_unaligned:
593 stvebx v0, 0, $out
594 addi $out, $out, 1
595 bdnz Ldec_out_unaligned
596 b Ldec_done
597
598.align 4
599Ldec_out_aligned:
600 stvx v0, 0, $out
601Ldec_done:
602
603 li r10,`15+6*$SIZE_T`
604 li r11,`31+6*$SIZE_T`
605 mtlr r6
606 mtspr 256, r7 # restore vrsave
607 lvx v20,r10,$sp
608 addi r10,r10,32
609 lvx v21,r11,$sp
610 addi r11,r11,32
611 lvx v22,r10,$sp
612 addi r10,r10,32
613 lvx v23,r11,$sp
614 addi r11,r11,32
615 lvx v24,r10,$sp
616 addi r10,r10,32
617 lvx v25,r11,$sp
618 addi r11,r11,32
619 lvx v26,r10,$sp
620 addi r10,r10,32
621 lvx v27,r11,$sp
622 addi r11,r11,32
623 lvx v28,r10,$sp
624 addi r10,r10,32
625 lvx v29,r11,$sp
626 addi r11,r11,32
627 lvx v30,r10,$sp
628 lvx v31,r11,$sp
629 addi $sp,$sp,$FRAME
630 blr
631 .long 0
632 .byte 0,12,0x04,1,0x80,0,3,0
633 .long 0
634.size .vpaes_decrypt,.-.vpaes_decrypt
635
636.globl .vpaes_cbc_encrypt
637.align 5
638.vpaes_cbc_encrypt:
639 ${UCMP}i r5,16
640 bltlr-
641
642 $STU $sp,-`($FRAME+2*$SIZE_T)`($sp)
643 mflr r0
644 li r10,`15+6*$SIZE_T`
645 li r11,`31+6*$SIZE_T`
646 mfspr r12, 256
647 stvx v20,r10,$sp
648 addi r10,r10,32
649 stvx v21,r11,$sp
650 addi r11,r11,32
651 stvx v22,r10,$sp
652 addi r10,r10,32
653 stvx v23,r11,$sp
654 addi r11,r11,32
655 stvx v24,r10,$sp
656 addi r10,r10,32
657 stvx v25,r11,$sp
658 addi r11,r11,32
659 stvx v26,r10,$sp
660 addi r10,r10,32
661 stvx v27,r11,$sp
662 addi r11,r11,32
663 stvx v28,r10,$sp
664 addi r10,r10,32
665 stvx v29,r11,$sp
666 addi r11,r11,32
667 stvx v30,r10,$sp
668 stvx v31,r11,$sp
669 stw r12,`$FRAME-4`($sp) # save vrsave
670 $PUSH r30,`$FRAME+$SIZE_T*0`($sp)
671 $PUSH r31,`$FRAME+$SIZE_T*1`($sp)
672 li r9, -16
673 $PUSH r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
674
675 and r30, r5, r9 # copy length&-16
676 andi. r9, $out, 15 # is $out aligned?
677 mr r5, r6 # copy pointer to key
678 mr r31, r7 # copy pointer to iv
679 li r6, -1
680 mcrf cr1, cr0 # put aside $out alignment flag
681 mr r7, r12 # copy vrsave
682 mtspr 256, r6 # preserve all AltiVec registers
683
684 lvx v24, 0, r31 # load [potentially unaligned] iv
685 li r9, 15
686 ?lvsl $inpperm, 0, r31
687 lvx v25, r9, r31
688 ?vperm v24, v24, v25, $inpperm
689
690 cmpwi r8, 0 # test direction
691 neg r8, $inp # prepare for unaligned access
692 vxor v7, v7, v7
693 ?lvsl $keyperm, 0, $key
694 ?lvsr $outperm, 0, $out
695 ?lvsr $inpperm, 0, r8 # -$inp
696 vnor $outmask, v7, v7 # 0xff..ff
697 lvx $inptail, 0, $inp
698 ?vperm $outmask, v7, $outmask, $outperm
699 addi $inp, $inp, 15 # 15 is not a typo
700
701 beq Lcbc_decrypt
702
703 bl _vpaes_encrypt_preheat
704 li r0, 16
705
706 beq cr1, Lcbc_enc_loop # $out is aligned
707
708 vmr v0, $inptail
709 lvx $inptail, 0, $inp
710 addi $inp, $inp, 16
711 ?vperm v0, v0, $inptail, $inpperm
712 vxor v0, v0, v24 # ^= iv
713
714 bl _vpaes_encrypt_core
715
716 andi. r8, $out, 15
717 vmr v24, v0 # put aside iv
718 sub r9, $out, r8
719 vperm $outhead, v0, v0, $outperm # rotate right/left
720
721Lcbc_enc_head:
722 stvebx $outhead, r8, r9
723 cmpwi r8, 15
724 addi r8, r8, 1
725 bne Lcbc_enc_head
726
727 sub. r30, r30, r0 # len -= 16
728 addi $out, $out, 16
729 beq Lcbc_unaligned_done
730
731Lcbc_enc_loop:
732 vmr v0, $inptail
733 lvx $inptail, 0, $inp
734 addi $inp, $inp, 16
735 ?vperm v0, v0, $inptail, $inpperm
736 vxor v0, v0, v24 # ^= iv
737
738 bl _vpaes_encrypt_core
739
740 vmr v24, v0 # put aside iv
741 sub. r30, r30, r0 # len -= 16
742 vperm v0, v0, v0, $outperm # rotate right/left
743 vsel v1, $outhead, v0, $outmask
744 vmr $outhead, v0
745 stvx v1, 0, $out
746 addi $out, $out, 16
747 bne Lcbc_enc_loop
748
749 b Lcbc_done
750
751.align 5
752Lcbc_decrypt:
753 bl _vpaes_decrypt_preheat
754 li r0, 16
755
756 beq cr1, Lcbc_dec_loop # $out is aligned
757
758 vmr v0, $inptail
759 lvx $inptail, 0, $inp
760 addi $inp, $inp, 16
761 ?vperm v0, v0, $inptail, $inpperm
762 vmr v25, v0 # put aside input
763
764 bl _vpaes_decrypt_core
765
766 andi. r8, $out, 15
767 vxor v0, v0, v24 # ^= iv
768 vmr v24, v25
769 sub r9, $out, r8
770 vperm $outhead, v0, v0, $outperm # rotate right/left
771
772Lcbc_dec_head:
773 stvebx $outhead, r8, r9
774 cmpwi r8, 15
775 addi r8, r8, 1
776 bne Lcbc_dec_head
777
778 sub. r30, r30, r0 # len -= 16
779 addi $out, $out, 16
780 beq Lcbc_unaligned_done
781
782Lcbc_dec_loop:
783 vmr v0, $inptail
784 lvx $inptail, 0, $inp
785 addi $inp, $inp, 16
786 ?vperm v0, v0, $inptail, $inpperm
787 vmr v25, v0 # put aside input
788
789 bl _vpaes_decrypt_core
790
791 vxor v0, v0, v24 # ^= iv
792 vmr v24, v25
793 sub. r30, r30, r0 # len -= 16
794 vperm v0, v0, v0, $outperm # rotate right/left
795 vsel v1, $outhead, v0, $outmask
796 vmr $outhead, v0
797 stvx v1, 0, $out
798 addi $out, $out, 16
799 bne Lcbc_dec_loop
800
801Lcbc_done:
802 beq cr1, Lcbc_write_iv # $out is aligned
803
804Lcbc_unaligned_done:
805 andi. r8, $out, 15
806 sub $out, $out, r8
807 li r9, 0
808Lcbc_tail:
809 stvebx $outhead, r9, $out
810 addi r9, r9, 1
811 cmpw r9, r8
812 bne Lcbc_tail
813
814Lcbc_write_iv:
815 neg r8, r31 # write [potentially unaligned] iv
816 li r10, 4
817 ?lvsl $outperm, 0, r8
818 li r11, 8
819 li r12, 12
820 vperm v24, v24, v24, $outperm # rotate right/left
821 stvewx v24, 0, r31 # ivp is at least 32-bit aligned
822 stvewx v24, r10, r31
823 stvewx v24, r11, r31
824 stvewx v24, r12, r31
825
826 mtspr 256, r7 # restore vrsave
827 li r10,`15+6*$SIZE_T`
828 li r11,`31+6*$SIZE_T`
829 lvx v20,r10,$sp
830 addi r10,r10,32
831 lvx v21,r11,$sp
832 addi r11,r11,32
833 lvx v22,r10,$sp
834 addi r10,r10,32
835 lvx v23,r11,$sp
836 addi r11,r11,32
837 lvx v24,r10,$sp
838 addi r10,r10,32
839 lvx v25,r11,$sp
840 addi r11,r11,32
841 lvx v26,r10,$sp
842 addi r10,r10,32
843 lvx v27,r11,$sp
844 addi r11,r11,32
845 lvx v28,r10,$sp
846 addi r10,r10,32
847 lvx v29,r11,$sp
848 addi r11,r11,32
849 lvx v30,r10,$sp
850 lvx v31,r11,$sp
851Lcbc_abort:
852 $POP r0, `$FRAME+$SIZE_T*2+$LRSAVE`($sp)
853 $POP r30,`$FRAME+$SIZE_T*0`($sp)
854 $POP r31,`$FRAME+$SIZE_T*1`($sp)
855 mtlr r0
856 addi $sp,$sp,`$FRAME+$SIZE_T*2`
857 blr
858 .long 0
859 .byte 0,12,0x04,1,0x80,2,6,0
860 .long 0
861.size .vpaes_cbc_encrypt,.-.vpaes_cbc_encrypt
862___
863}
864
865{
866my ($inp,$bits,$out)=map("r$_",(3..5));
867my $dir="cr1";
868my ($invlo,$invhi,$iptlo,$ipthi,$rcon) = map("v$_",(10..13,24));
869
870$code.=<<___;
871########################################################
872## ##
873## AES key schedule ##
874## ##
875########################################################
876.align 4
877_vpaes_key_preheat:
878 mflr r8
879 bl Lconsts
880 mtlr r8
881 li r11, 0xc0 # Lk_inv
882 li r10, 0xd0
883 li r9, 0xe0 # L_ipt
884 li r8, 0xf0
885
886 vspltisb v8,4 # 0x04..04
887 vxor v9,v9,v9 # 0x00..00
888 lvx $invlo, r12, r11 # Lk_inv
889 li r11, 0x120
890 lvx $invhi, r12, r10
891 li r10, 0x130
892 lvx $iptlo, r12, r9 # Lk_ipt
893 li r9, 0x220
894 lvx $ipthi, r12, r8
895 li r8, 0x230
896
897 lvx v14, r12, r11 # Lk_sb1
898 li r11, 0x240
899 lvx v15, r12, r10
900 li r10, 0x250
901
902 lvx v16, r12, r9 # Lk_dksd
903 li r9, 0x260
904 lvx v17, r12, r8
905 li r8, 0x270
906 lvx v18, r12, r11 # Lk_dksb
907 li r11, 0x280
908 lvx v19, r12, r10
909 li r10, 0x290
910 lvx v20, r12, r9 # Lk_dkse
911 li r9, 0x2a0
912 lvx v21, r12, r8
913 li r8, 0x2b0
914 lvx v22, r12, r11 # Lk_dks9
915 lvx v23, r12, r10
916
917 lvx v24, r12, r9 # Lk_rcon
918 lvx v25, 0, r12 # Lk_mc_forward[0]
919 lvx v26, r12, r8 # Lks63
920 blr
921 .long 0
922 .byte 0,12,0x14,0,0,0,0,0
923
924.align 4
925_vpaes_schedule_core:
926 mflr r7
927
928 bl _vpaes_key_preheat # load the tables
929
930 #lvx v0, 0, $inp # vmovdqu (%rdi), %xmm0 # load key (unaligned)
931 neg r8, $inp # prepare for unaligned access
932 lvx v0, 0, $inp
933 addi $inp, $inp, 15 # 15 is not typo
934 ?lvsr $inpperm, 0, r8 # -$inp
935 lvx v6, 0, $inp # v6 serves as inptail
936 addi $inp, $inp, 8
937 ?vperm v0, v0, v6, $inpperm
938
939 # input transform
940 vmr v3, v0 # vmovdqa %xmm0, %xmm3
941 bl _vpaes_schedule_transform
942 vmr v7, v0 # vmovdqa %xmm0, %xmm7
943
944 bne $dir, Lschedule_am_decrypting
945
946 # encrypting, output zeroth round key after transform
947 li r8, 0x30 # mov \$0x30,%r8d
948 li r9, 4
949 li r10, 8
950 li r11, 12
951
952 ?lvsr $outperm, 0, $out # prepare for unaligned access
953 vnor $outmask, v9, v9 # 0xff..ff
954 ?vperm $outmask, v9, $outmask, $outperm
955
956 #stvx v0, 0, $out # vmovdqu %xmm0, (%rdx)
957 vperm $outhead, v0, v0, $outperm # rotate right/left
958 stvewx $outhead, 0, $out # some are superfluous
959 stvewx $outhead, r9, $out
960 stvewx $outhead, r10, $out
961 addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
962 stvewx $outhead, r11, $out
963 b Lschedule_go
964
965Lschedule_am_decrypting:
966 srwi r8, $bits, 1 # shr \$1,%r8d
967 andi. r8, r8, 32 # and \$32,%r8d
968 xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
969 addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
970 # decrypting, output zeroth round key after shiftrows
971 lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
972 li r9, 4
973 li r10, 8
974 li r11, 12
975 vperm v4, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
976
977 neg r0, $out # prepare for unaligned access
978 ?lvsl $outperm, 0, r0
979 vnor $outmask, v9, v9 # 0xff..ff
980 ?vperm $outmask, $outmask, v9, $outperm
981
982 #stvx v4, 0, $out # vmovdqu %xmm3, (%rdx)
983 vperm $outhead, v4, v4, $outperm # rotate right/left
984 stvewx $outhead, 0, $out # some are superfluous
985 stvewx $outhead, r9, $out
986 stvewx $outhead, r10, $out
987 addi r10, r12, 0x80 # lea .Lk_sr(%rip),%r10
988 stvewx $outhead, r11, $out
989 addi $out, $out, 15 # 15 is not typo
990 xori r8, r8, 0x30 # xor \$0x30, %r8
991
992Lschedule_go:
993 cmplwi $bits, 192 # cmp \$192, %esi
994 bgt Lschedule_256
995 beq Lschedule_192
996 # 128: fall though
997
998##
999## .schedule_128
1000##
1001## 128-bit specific part of key schedule.
1002##
1003## This schedule is really simple, because all its parts
1004## are accomplished by the subroutines.
1005##
1006Lschedule_128:
1007 li r0, 10 # mov \$10, %esi
1008 mtctr r0
1009
1010Loop_schedule_128:
1011 bl _vpaes_schedule_round
1012 bdz Lschedule_mangle_last # dec %esi
1013 bl _vpaes_schedule_mangle # write output
1014 b Loop_schedule_128
1015
1016##
1017## .aes_schedule_192
1018##
1019## 192-bit specific part of key schedule.
1020##
1021## The main body of this schedule is the same as the 128-bit
1022## schedule, but with more smearing. The long, high side is
1023## stored in %xmm7 as before, and the short, low side is in
1024## the high bits of %xmm6.
1025##
1026## This schedule is somewhat nastier, however, because each
1027## round produces 192 bits of key material, or 1.5 round keys.
1028## Therefore, on each cycle we do 2 rounds and produce 3 round
1029## keys.
1030##
1031.align 4
1032Lschedule_192:
1033 li r0, 4 # mov \$4, %esi
1034 lvx v0, 0, $inp
1035 ?vperm v0, v6, v0, $inpperm
1036 ?vsldoi v0, v3, v0, 8 # vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned)
1037 bl _vpaes_schedule_transform # input transform
1038 ?vsldoi v6, v0, v9, 8
1039 ?vsldoi v6, v9, v6, 8 # clobber "low" side with zeros
1040 mtctr r0
1041
1042Loop_schedule_192:
1043 bl _vpaes_schedule_round
1044 ?vsldoi v0, v6, v0, 8 # vpalignr \$8,%xmm6,%xmm0,%xmm0
1045 bl _vpaes_schedule_mangle # save key n
1046 bl _vpaes_schedule_192_smear
1047 bl _vpaes_schedule_mangle # save key n+1
1048 bl _vpaes_schedule_round
1049 bdz Lschedule_mangle_last # dec %esi
1050 bl _vpaes_schedule_mangle # save key n+2
1051 bl _vpaes_schedule_192_smear
1052 b Loop_schedule_192
1053
1054##
1055## .aes_schedule_256
1056##
1057## 256-bit specific part of key schedule.
1058##
1059## The structure here is very similar to the 128-bit
1060## schedule, but with an additional "low side" in
1061## %xmm6. The low side's rounds are the same as the
1062## high side's, except no rcon and no rotation.
1063##
1064.align 4
1065Lschedule_256:
1066 li r0, 7 # mov \$7, %esi
1067 addi $inp, $inp, 8
1068 lvx v0, 0, $inp # vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned)
1069 ?vperm v0, v6, v0, $inpperm
1070 bl _vpaes_schedule_transform # input transform
1071 mtctr r0
1072
1073Loop_schedule_256:
1074 bl _vpaes_schedule_mangle # output low result
1075 vmr v6, v0 # vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6
1076
1077 # high round
1078 bl _vpaes_schedule_round
1079 bdz Lschedule_mangle_last # dec %esi
1080 bl _vpaes_schedule_mangle
1081
1082 # low round. swap xmm7 and xmm6
1083 ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
1084 vmr v5, v7 # vmovdqa %xmm7, %xmm5
1085 vmr v7, v6 # vmovdqa %xmm6, %xmm7
1086 bl _vpaes_schedule_low_round
1087 vmr v7, v5 # vmovdqa %xmm5, %xmm7
1088
1089 b Loop_schedule_256
1090##
1091## .aes_schedule_mangle_last
1092##
1093## Mangler for last round of key schedule
1094## Mangles %xmm0
1095## when encrypting, outputs out(%xmm0) ^ 63
1096## when decrypting, outputs unskew(%xmm0)
1097##
1098## Always called right before return... jumps to cleanup and exits
1099##
1100.align 4
1101Lschedule_mangle_last:
1102 # schedule last round key from xmm0
1103 li r11, 0x2e0 # lea .Lk_deskew(%rip),%r11
1104 li r9, 0x2f0
1105 bne $dir, Lschedule_mangle_last_dec
1106
1107 # encrypting
1108 lvx v1, r8, r10 # vmovdqa (%r8,%r10),%xmm1
1109 li r11, 0x2c0 # lea .Lk_opt(%rip), %r11 # prepare to output transform
1110 li r9, 0x2d0 # prepare to output transform
1111 vperm v0, v0, v0, v1 # vpshufb %xmm1, %xmm0, %xmm0 # output permute
1112
1113 lvx $iptlo, r11, r12 # reload $ipt
1114 lvx $ipthi, r9, r12
1115 addi $out, $out, 16 # add \$16, %rdx
1116 vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
1117 bl _vpaes_schedule_transform # output transform
1118
1119 #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
1120 vperm v0, v0, v0, $outperm # rotate right/left
1121 li r10, 4
1122 vsel v2, $outhead, v0, $outmask
1123 li r11, 8
1124 stvx v2, 0, $out
1125 li r12, 12
1126 stvewx v0, 0, $out # some (or all) are redundant
1127 stvewx v0, r10, $out
1128 stvewx v0, r11, $out
1129 stvewx v0, r12, $out
1130 b Lschedule_mangle_done
1131
1132.align 4
1133Lschedule_mangle_last_dec:
1134 lvx $iptlo, r11, r12 # reload $ipt
1135 lvx $ipthi, r9, r12
1136 addi $out, $out, -16 # add \$-16, %rdx
1137 vxor v0, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm0
1138 bl _vpaes_schedule_transform # output transform
1139
1140 #stvx v0, r0, $out # vmovdqu %xmm0, (%rdx) # save last key
1141 addi r9, $out, -15 # -15 is not typo
1142 vperm v0, v0, v0, $outperm # rotate right/left
1143 li r10, 4
1144 vsel v2, $outhead, v0, $outmask
1145 li r11, 8
1146 stvx v2, 0, $out
1147 li r12, 12
1148 stvewx v0, 0, r9 # some (or all) are redundant
1149 stvewx v0, r10, r9
1150 stvewx v0, r11, r9
1151 stvewx v0, r12, r9
1152
1153
1154Lschedule_mangle_done:
1155 mtlr r7
1156 # cleanup
1157 vxor v0, v0, v0 # vpxor %xmm0, %xmm0, %xmm0
1158 vxor v1, v1, v1 # vpxor %xmm1, %xmm1, %xmm1
1159 vxor v2, v2, v2 # vpxor %xmm2, %xmm2, %xmm2
1160 vxor v3, v3, v3 # vpxor %xmm3, %xmm3, %xmm3
1161 vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
1162 vxor v5, v5, v5 # vpxor %xmm5, %xmm5, %xmm5
1163 vxor v6, v6, v6 # vpxor %xmm6, %xmm6, %xmm6
1164 vxor v7, v7, v7 # vpxor %xmm7, %xmm7, %xmm7
1165
1166 blr
1167 .long 0
1168 .byte 0,12,0x14,0,0,0,0,0
1169
1170##
1171## .aes_schedule_192_smear
1172##
1173## Smear the short, low side in the 192-bit key schedule.
1174##
1175## Inputs:
1176## %xmm7: high side, b a x y
1177## %xmm6: low side, d c 0 0
1178## %xmm13: 0
1179##
1180## Outputs:
1181## %xmm6: b+c+d b+c 0 0
1182## %xmm0: b+c+d b+c b a
1183##
1184.align 4
1185_vpaes_schedule_192_smear:
1186 ?vspltw v0, v7, 3
1187 ?vsldoi v1, v9, v6, 12 # vpshufd \$0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0
1188 ?vsldoi v0, v7, v0, 8 # vpshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
1189 vxor v6, v6, v1 # vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0
1190 vxor v6, v6, v0 # vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a
1191 vmr v0, v6
1192 ?vsldoi v6, v6, v9, 8
1193 ?vsldoi v6, v9, v6, 8 # clobber low side with zeros
1194 blr
1195 .long 0
1196 .byte 0,12,0x14,0,0,0,0,0
1197
1198##
1199## .aes_schedule_round
1200##
1201## Runs one main round of the key schedule on %xmm0, %xmm7
1202##
1203## Specifically, runs subbytes on the high dword of %xmm0
1204## then rotates it by one byte and xors into the low dword of
1205## %xmm7.
1206##
1207## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
1208## next rcon.
1209##
1210## Smears the dwords of %xmm7 by xoring the low into the
1211## second low, result into third, result into highest.
1212##
1213## Returns results in %xmm7 = %xmm0.
1214## Clobbers %xmm1-%xmm4, %r11.
1215##
1216.align 4
1217_vpaes_schedule_round:
1218 # extract rcon from xmm8
1219 #vxor v4, v4, v4 # vpxor %xmm4, %xmm4, %xmm4
1220 ?vsldoi v1, $rcon, v9, 15 # vpalignr \$15, %xmm8, %xmm4, %xmm1
1221 ?vsldoi $rcon, $rcon, $rcon, 15 # vpalignr \$15, %xmm8, %xmm8, %xmm8
1222 vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
1223
1224 # rotate
1225 ?vspltw v0, v0, 3 # vpshufd \$0xFF, %xmm0, %xmm0
1226 ?vsldoi v0, v0, v0, 1 # vpalignr \$1, %xmm0, %xmm0, %xmm0
1227
1228 # fall through...
1229
1230 # low round: same as high round, but no rotation and no rcon.
1231_vpaes_schedule_low_round:
1232 # smear xmm7
1233 ?vsldoi v1, v9, v7, 12 # vpslldq \$4, %xmm7, %xmm1
1234 vxor v7, v7, v1 # vpxor %xmm1, %xmm7, %xmm7
1235 vspltisb v1, 0x0f # 0x0f..0f
1236 ?vsldoi v4, v9, v7, 8 # vpslldq \$8, %xmm7, %xmm4
1237
1238 # subbytes
1239 vand v1, v1, v0 # vpand %xmm9, %xmm0, %xmm1 # 0 = k
1240 vsrb v0, v0, v8 # vpsrlb \$4, %xmm0, %xmm0 # 1 = i
1241 vxor v7, v7, v4 # vpxor %xmm4, %xmm7, %xmm7
1242 vperm v2, $invhi, v9, v1 # vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k
1243 vxor v1, v1, v0 # vpxor %xmm0, %xmm1, %xmm1 # 0 = j
1244 vperm v3, $invlo, v9, v0 # vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i
1245 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k
1246 vperm v4, $invlo, v9, v1 # vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j
1247 vxor v7, v7, v26 # vpxor .Lk_s63(%rip), %xmm7, %xmm7
1248 vperm v3, $invlo, v9, v3 # vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak
1249 vxor v4, v4, v2 # vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k
1250 vperm v2, $invlo, v9, v4 # vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak
1251 vxor v3, v3, v1 # vpxor %xmm1, %xmm3, %xmm3 # 2 = io
1252 vxor v2, v2, v0 # vpxor %xmm0, %xmm2, %xmm2 # 3 = jo
1253 vperm v4, v15, v9, v3 # vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou
1254 vperm v1, v14, v9, v2 # vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t
1255 vxor v1, v1, v4 # vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output
1256
1257 # add in smeared stuff
1258 vxor v0, v1, v7 # vpxor %xmm7, %xmm1, %xmm0
1259 vxor v7, v1, v7 # vmovdqa %xmm0, %xmm7
1260 blr
1261 .long 0
1262 .byte 0,12,0x14,0,0,0,0,0
1263
1264##
1265## .aes_schedule_transform
1266##
1267## Linear-transform %xmm0 according to tables at (%r11)
1268##
1269## Requires that %xmm9 = 0x0F0F... as in preheat
1270## Output in %xmm0
1271## Clobbers %xmm2
1272##
1273.align 4
1274_vpaes_schedule_transform:
1275 #vand v1, v0, v9 # vpand %xmm9, %xmm0, %xmm1
1276 vsrb v2, v0, v8 # vpsrlb \$4, %xmm0, %xmm0
1277 # vmovdqa (%r11), %xmm2 # lo
1278 vperm v0, $iptlo, $iptlo, v0 # vpshufb %xmm1, %xmm2, %xmm2
1279 # vmovdqa 16(%r11), %xmm1 # hi
1280 vperm v2, $ipthi, $ipthi, v2 # vpshufb %xmm0, %xmm1, %xmm0
1281 vxor v0, v0, v2 # vpxor %xmm2, %xmm0, %xmm0
1282 blr
1283 .long 0
1284 .byte 0,12,0x14,0,0,0,0,0
1285
1286##
1287## .aes_schedule_mangle
1288##
1289## Mangle xmm0 from (basis-transformed) standard version
1290## to our version.
1291##
1292## On encrypt,
1293## xor with 0x63
1294## multiply by circulant 0,1,1,1
1295## apply shiftrows transform
1296##
1297## On decrypt,
1298## xor with 0x63
1299## multiply by "inverse mixcolumns" circulant E,B,D,9
1300## deskew
1301## apply shiftrows transform
1302##
1303##
1304## Writes out to (%rdx), and increments or decrements it
1305## Keeps track of round number mod 4 in %r8
1306## Preserves xmm0
1307## Clobbers xmm1-xmm5
1308##
1309.align 4
1310_vpaes_schedule_mangle:
1311 #vmr v4, v0 # vmovdqa %xmm0, %xmm4 # save xmm0 for later
1312 # vmovdqa .Lk_mc_forward(%rip),%xmm5
1313 bne $dir, Lschedule_mangle_dec
1314
1315 # encrypting
1316 vxor v4, v0, v26 # vpxor .Lk_s63(%rip), %xmm0, %xmm4
1317 addi $out, $out, 16 # add \$16, %rdx
1318 vperm v4, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm4
1319 vperm v1, v4, v4, v25 # vpshufb %xmm5, %xmm4, %xmm1
1320 vperm v3, v1, v1, v25 # vpshufb %xmm5, %xmm1, %xmm3
1321 vxor v4, v4, v1 # vpxor %xmm1, %xmm4, %xmm4
1322 lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
1323 vxor v3, v3, v4 # vpxor %xmm4, %xmm3, %xmm3
1324
1325 vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
1326 addi r8, r8, -16 # add \$-16, %r8
1327 andi. r8, r8, 0x30 # and \$0x30, %r8
1328
1329 #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
1330 vperm v1, v3, v3, $outperm # rotate right/left
1331 vsel v2, $outhead, v1, $outmask
1332 vmr $outhead, v1
1333 stvx v2, 0, $out
1334 blr
1335
1336.align 4
1337Lschedule_mangle_dec:
1338 # inverse mix columns
1339 # lea .Lk_dksd(%rip),%r11
1340 vsrb v1, v0, v8 # vpsrlb \$4, %xmm4, %xmm1 # 1 = hi
1341 #and v4, v0, v9 # vpand %xmm9, %xmm4, %xmm4 # 4 = lo
1342
1343 # vmovdqa 0x00(%r11), %xmm2
1344 vperm v2, v16, v16, v0 # vpshufb %xmm4, %xmm2, %xmm2
1345 # vmovdqa 0x10(%r11), %xmm3
1346 vperm v3, v17, v17, v1 # vpshufb %xmm1, %xmm3, %xmm3
1347 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
1348 vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
1349
1350 # vmovdqa 0x20(%r11), %xmm2
1351 vperm v2, v18, v18, v0 # vpshufb %xmm4, %xmm2, %xmm2
1352 vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
1353 # vmovdqa 0x30(%r11), %xmm3
1354 vperm v3, v19, v19, v1 # vpshufb %xmm1, %xmm3, %xmm3
1355 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
1356 vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
1357
1358 # vmovdqa 0x40(%r11), %xmm2
1359 vperm v2, v20, v20, v0 # vpshufb %xmm4, %xmm2, %xmm2
1360 vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
1361 # vmovdqa 0x50(%r11), %xmm3
1362 vperm v3, v21, v21, v1 # vpshufb %xmm1, %xmm3, %xmm3
1363 vxor v3, v3, v2 # vpxor %xmm2, %xmm3, %xmm3
1364
1365 # vmovdqa 0x60(%r11), %xmm2
1366 vperm v2, v22, v22, v0 # vpshufb %xmm4, %xmm2, %xmm2
1367 vperm v3, v3, v9, v25 # vpshufb %xmm5, %xmm3, %xmm3
1368 # vmovdqa 0x70(%r11), %xmm4
1369 vperm v4, v23, v23, v1 # vpshufb %xmm1, %xmm4, %xmm4
1370 lvx v1, r8, r10 # vmovdqa (%r8,%r10), %xmm1
1371 vxor v2, v2, v3 # vpxor %xmm3, %xmm2, %xmm2
1372 vxor v3, v4, v2 # vpxor %xmm2, %xmm4, %xmm3
1373
1374 addi $out, $out, -16 # add \$-16, %rdx
1375
1376 vperm v3, v3, v3, v1 # vpshufb %xmm1, %xmm3, %xmm3
1377 addi r8, r8, -16 # add \$-16, %r8
1378 andi. r8, r8, 0x30 # and \$0x30, %r8
1379
1380 #stvx v3, 0, $out # vmovdqu %xmm3, (%rdx)
1381 vperm v1, v3, v3, $outperm # rotate right/left
1382 vsel v2, $outhead, v1, $outmask
1383 vmr $outhead, v1
1384 stvx v2, 0, $out
1385 blr
1386 .long 0
1387 .byte 0,12,0x14,0,0,0,0,0
1388
1389.globl .vpaes_set_encrypt_key
1390.align 5
1391.vpaes_set_encrypt_key:
1392 $STU $sp,-$FRAME($sp)
1393 li r10,`15+6*$SIZE_T`
1394 li r11,`31+6*$SIZE_T`
1395 mflr r0
1396 mfspr r6, 256 # save vrsave
1397 stvx v20,r10,$sp
1398 addi r10,r10,32
1399 stvx v21,r11,$sp
1400 addi r11,r11,32
1401 stvx v22,r10,$sp
1402 addi r10,r10,32
1403 stvx v23,r11,$sp
1404 addi r11,r11,32
1405 stvx v24,r10,$sp
1406 addi r10,r10,32
1407 stvx v25,r11,$sp
1408 addi r11,r11,32
1409 stvx v26,r10,$sp
1410 addi r10,r10,32
1411 stvx v27,r11,$sp
1412 addi r11,r11,32
1413 stvx v28,r10,$sp
1414 addi r10,r10,32
1415 stvx v29,r11,$sp
1416 addi r11,r11,32
1417 stvx v30,r10,$sp
1418 stvx v31,r11,$sp
1419 stw r6,`$FRAME-4`($sp) # save vrsave
1420 li r7, -1
1421 $PUSH r0, `$FRAME+$LRSAVE`($sp)
1422 mtspr 256, r7 # preserve all AltiVec registers
1423
1424 srwi r9, $bits, 5 # shr \$5,%eax
1425 addi r9, r9, 6 # add \$5,%eax
1426 stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
1427
1428 cmplw $dir, $bits, $bits # set encrypt direction
1429 li r8, 0x30 # mov \$0x30,%r8d
1430 bl _vpaes_schedule_core
1431
1432 $POP r0, `$FRAME+$LRSAVE`($sp)
1433 li r10,`15+6*$SIZE_T`
1434 li r11,`31+6*$SIZE_T`
1435 mtspr 256, r6 # restore vrsave
1436 mtlr r0
1437 xor r3, r3, r3
1438 lvx v20,r10,$sp
1439 addi r10,r10,32
1440 lvx v21,r11,$sp
1441 addi r11,r11,32
1442 lvx v22,r10,$sp
1443 addi r10,r10,32
1444 lvx v23,r11,$sp
1445 addi r11,r11,32
1446 lvx v24,r10,$sp
1447 addi r10,r10,32
1448 lvx v25,r11,$sp
1449 addi r11,r11,32
1450 lvx v26,r10,$sp
1451 addi r10,r10,32
1452 lvx v27,r11,$sp
1453 addi r11,r11,32
1454 lvx v28,r10,$sp
1455 addi r10,r10,32
1456 lvx v29,r11,$sp
1457 addi r11,r11,32
1458 lvx v30,r10,$sp
1459 lvx v31,r11,$sp
1460 addi $sp,$sp,$FRAME
1461 blr
1462 .long 0
1463 .byte 0,12,0x04,1,0x80,0,3,0
1464 .long 0
1465.size .vpaes_set_encrypt_key,.-.vpaes_set_encrypt_key
1466
1467.globl .vpaes_set_decrypt_key
1468.align 4
1469.vpaes_set_decrypt_key:
1470 $STU $sp,-$FRAME($sp)
1471 li r10,`15+6*$SIZE_T`
1472 li r11,`31+6*$SIZE_T`
1473 mflr r0
1474 mfspr r6, 256 # save vrsave
1475 stvx v20,r10,$sp
1476 addi r10,r10,32
1477 stvx v21,r11,$sp
1478 addi r11,r11,32
1479 stvx v22,r10,$sp
1480 addi r10,r10,32
1481 stvx v23,r11,$sp
1482 addi r11,r11,32
1483 stvx v24,r10,$sp
1484 addi r10,r10,32
1485 stvx v25,r11,$sp
1486 addi r11,r11,32
1487 stvx v26,r10,$sp
1488 addi r10,r10,32
1489 stvx v27,r11,$sp
1490 addi r11,r11,32
1491 stvx v28,r10,$sp
1492 addi r10,r10,32
1493 stvx v29,r11,$sp
1494 addi r11,r11,32
1495 stvx v30,r10,$sp
1496 stvx v31,r11,$sp
1497 stw r6,`$FRAME-4`($sp) # save vrsave
1498 li r7, -1
1499 $PUSH r0, `$FRAME+$LRSAVE`($sp)
1500 mtspr 256, r7 # preserve all AltiVec registers
1501
1502 srwi r9, $bits, 5 # shr \$5,%eax
1503 addi r9, r9, 6 # add \$5,%eax
1504 stw r9, 240($out) # mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5;
1505
1506 slwi r9, r9, 4 # shl \$4,%eax
1507 add $out, $out, r9 # lea (%rdx,%rax),%rdx
1508
1509 cmplwi $dir, $bits, 0 # set decrypt direction
1510 srwi r8, $bits, 1 # shr \$1,%r8d
1511 andi. r8, r8, 32 # and \$32,%r8d
1512 xori r8, r8, 32 # xor \$32,%r8d # nbits==192?0:32
1513 bl _vpaes_schedule_core
1514
1515 $POP r0, `$FRAME+$LRSAVE`($sp)
1516 li r10,`15+6*$SIZE_T`
1517 li r11,`31+6*$SIZE_T`
1518 mtspr 256, r6 # restore vrsave
1519 mtlr r0
1520 xor r3, r3, r3
1521 lvx v20,r10,$sp
1522 addi r10,r10,32
1523 lvx v21,r11,$sp
1524 addi r11,r11,32
1525 lvx v22,r10,$sp
1526 addi r10,r10,32
1527 lvx v23,r11,$sp
1528 addi r11,r11,32
1529 lvx v24,r10,$sp
1530 addi r10,r10,32
1531 lvx v25,r11,$sp
1532 addi r11,r11,32
1533 lvx v26,r10,$sp
1534 addi r10,r10,32
1535 lvx v27,r11,$sp
1536 addi r11,r11,32
1537 lvx v28,r10,$sp
1538 addi r10,r10,32
1539 lvx v29,r11,$sp
1540 addi r11,r11,32
1541 lvx v30,r10,$sp
1542 lvx v31,r11,$sp
1543 addi $sp,$sp,$FRAME
1544 blr
1545 .long 0
1546 .byte 0,12,0x04,1,0x80,0,3,0
1547 .long 0
1548.size .vpaes_set_decrypt_key,.-.vpaes_set_decrypt_key
1549___
1550}
1551
1552my $consts=1;
1553foreach (split("\n",$code)) {
1554 s/\`([^\`]*)\`/eval $1/geo;
1555
1556 # constants table endian-specific conversion
1557 if ($consts && m/\.long\s+(.+)\s+(\?[a-z]*)$/o) {
1558 my $conv=$2;
1559 my @bytes=();
1560
1561 # convert to endian-agnostic format
1562 foreach (split(/,\s+/,$1)) {
1563 my $l = /^0/?oct:int;
1564 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
1565 }
1566
1567 # little-endian conversion
1568 if ($flavour =~ /le$/o) {
1569 SWITCH: for($conv) {
1570 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
1571 /\?rev/ && do { @bytes=reverse(@bytes); last; };
1572 }
1573 }
1574
1575 #emit
1576 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
1577 next;
1578 }
1579 $consts=0 if (m/Lconsts:/o); # end of table
1580
1581 # instructions prefixed with '?' are endian-specific and need
1582 # to be adjusted accordingly...
1583 if ($flavour =~ /le$/o) { # little-endian
1584 s/\?lvsr/lvsl/o or
1585 s/\?lvsl/lvsr/o or
1586 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
1587 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
1588 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
1589 } else { # big-endian
1590 s/\?([a-z]+)/$1/o;
1591 }
1592
1593 print $_,"\n";
1594}
1595
1596close STDOUT or die "error closing STDOUT: $!";
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette