1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2012-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | # ====================================================================
|
---|
11 | # Written by David S. Miller and Andy Polyakov.
|
---|
12 | # The module is licensed under 2-clause BSD license. October 2012.
|
---|
13 | # All rights reserved.
|
---|
14 | # ====================================================================
|
---|
15 |
|
---|
16 | ######################################################################
|
---|
17 | # AES for SPARC T4.
|
---|
18 | #
|
---|
19 | # AES round instructions complete in 3 cycles and can be issued every
|
---|
20 | # cycle. It means that round calculations should take 4*rounds cycles,
|
---|
21 | # because any given round instruction depends on result of *both*
|
---|
22 | # previous instructions:
|
---|
23 | #
|
---|
24 | # |0 |1 |2 |3 |4
|
---|
25 | # |01|01|01|
|
---|
26 | # |23|23|23|
|
---|
27 | # |01|01|...
|
---|
28 | # |23|...
|
---|
29 | #
|
---|
30 | # Provided that fxor [with IV] takes 3 cycles to complete, critical
|
---|
31 | # path length for CBC encrypt would be 3+4*rounds, or in other words
|
---|
32 | # it should process one byte in at least (3+4*rounds)/16 cycles. This
|
---|
33 | # estimate doesn't account for "collateral" instructions, such as
|
---|
34 | # fetching input from memory, xor-ing it with zero-round key and
|
---|
35 | # storing the result. Yet, *measured* performance [for data aligned
|
---|
36 | # at 64-bit boundary!] deviates from this equation by less than 0.5%:
|
---|
37 | #
|
---|
38 | # 128-bit key 192- 256-
|
---|
39 | # CBC encrypt 2.70/2.90(*) 3.20/3.40 3.70/3.90
|
---|
40 | # (*) numbers after slash are for
|
---|
41 | # misaligned data.
|
---|
42 | #
|
---|
43 | # Out-of-order execution logic managed to fully overlap "collateral"
|
---|
44 | # instructions with those on critical path. Amazing!
|
---|
45 | #
|
---|
46 | # As with Intel AES-NI, question is if it's possible to improve
|
---|
47 | # performance of parallelizable modes by interleaving round
|
---|
48 | # instructions. Provided round instruction latency and throughput
|
---|
49 | # optimal interleave factor is 2. But can we expect 2x performance
|
---|
50 | # improvement? Well, as round instructions can be issued one per
|
---|
51 | # cycle, they don't saturate the 2-way issue pipeline and therefore
|
---|
52 | # there is room for "collateral" calculations... Yet, 2x speed-up
|
---|
53 | # over CBC encrypt remains unattaintable:
|
---|
54 | #
|
---|
55 | # 128-bit key 192- 256-
|
---|
56 | # CBC decrypt 1.64/2.11 1.89/2.37 2.23/2.61
|
---|
57 | # CTR 1.64/2.08(*) 1.89/2.33 2.23/2.61
|
---|
58 | # (*) numbers after slash are for
|
---|
59 | # misaligned data.
|
---|
60 | #
|
---|
61 | # Estimates based on amount of instructions under assumption that
|
---|
62 | # round instructions are not pairable with any other instruction
|
---|
63 | # suggest that latter is the actual case and pipeline runs
|
---|
64 | # underutilized. It should be noted that T4 out-of-order execution
|
---|
65 | # logic is so capable that performance gain from 2x interleave is
|
---|
66 | # not even impressive, ~7-13% over non-interleaved code, largest
|
---|
67 | # for 256-bit keys.
|
---|
68 |
|
---|
69 | # To anchor to something else, software implementation processes
|
---|
70 | # one byte in 29 cycles with 128-bit key on same processor. Intel
|
---|
71 | # Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts
|
---|
72 | # in 0.93, naturally with AES-NI.
|
---|
73 |
|
---|
74 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
75 | push(@INC,"${dir}","${dir}../../perlasm");
|
---|
76 | require "sparcv9_modes.pl";
|
---|
77 |
|
---|
78 | $output = pop;
|
---|
79 | open STDOUT,">$output";
|
---|
80 |
|
---|
81 | $::evp=1; # if $evp is set to 0, script generates module with
|
---|
82 | # AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry
|
---|
83 | # points. These however are not fully compatible with openssl/aes.h,
|
---|
84 | # because they expect AES_KEY to be aligned at 64-bit boundary. When
|
---|
85 | # used through EVP, alignment is arranged at EVP layer. Second thing
|
---|
86 | # that is arranged by EVP is at least 32-bit alignment of IV.
|
---|
87 |
|
---|
88 | ######################################################################
|
---|
89 | # single-round subroutines
|
---|
90 | #
|
---|
91 | {
|
---|
92 | my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
|
---|
93 |
|
---|
94 | $code.=<<___;
|
---|
95 | #include "sparc_arch.h"
|
---|
96 |
|
---|
97 | #ifdef __arch64__
|
---|
98 | .register %g2,#scratch
|
---|
99 | .register %g3,#scratch
|
---|
100 | #endif
|
---|
101 |
|
---|
102 | .text
|
---|
103 |
|
---|
104 | .globl aes_t4_encrypt
|
---|
105 | .align 32
|
---|
106 | aes_t4_encrypt:
|
---|
107 | andcc $inp, 7, %g1 ! is input aligned?
|
---|
108 | andn $inp, 7, $inp
|
---|
109 |
|
---|
110 | ldx [$key + 0], %g4
|
---|
111 | ldx [$key + 8], %g5
|
---|
112 |
|
---|
113 | ldx [$inp + 0], %o4
|
---|
114 | bz,pt %icc, 1f
|
---|
115 | ldx [$inp + 8], %o5
|
---|
116 | ldx [$inp + 16], $inp
|
---|
117 | sll %g1, 3, %g1
|
---|
118 | sub %g0, %g1, %o3
|
---|
119 | sllx %o4, %g1, %o4
|
---|
120 | sllx %o5, %g1, %g1
|
---|
121 | srlx %o5, %o3, %o5
|
---|
122 | srlx $inp, %o3, %o3
|
---|
123 | or %o5, %o4, %o4
|
---|
124 | or %o3, %g1, %o5
|
---|
125 | 1:
|
---|
126 | ld [$key + 240], $rounds
|
---|
127 | ldd [$key + 16], %f12
|
---|
128 | ldd [$key + 24], %f14
|
---|
129 | xor %g4, %o4, %o4
|
---|
130 | xor %g5, %o5, %o5
|
---|
131 | movxtod %o4, %f0
|
---|
132 | movxtod %o5, %f2
|
---|
133 | srl $rounds, 1, $rounds
|
---|
134 | ldd [$key + 32], %f16
|
---|
135 | sub $rounds, 1, $rounds
|
---|
136 | ldd [$key + 40], %f18
|
---|
137 | add $key, 48, $key
|
---|
138 |
|
---|
139 | .Lenc:
|
---|
140 | aes_eround01 %f12, %f0, %f2, %f4
|
---|
141 | aes_eround23 %f14, %f0, %f2, %f2
|
---|
142 | ldd [$key + 0], %f12
|
---|
143 | ldd [$key + 8], %f14
|
---|
144 | sub $rounds,1,$rounds
|
---|
145 | aes_eround01 %f16, %f4, %f2, %f0
|
---|
146 | aes_eround23 %f18, %f4, %f2, %f2
|
---|
147 | ldd [$key + 16], %f16
|
---|
148 | ldd [$key + 24], %f18
|
---|
149 | brnz,pt $rounds, .Lenc
|
---|
150 | add $key, 32, $key
|
---|
151 |
|
---|
152 | andcc $out, 7, $tmp ! is output aligned?
|
---|
153 | aes_eround01 %f12, %f0, %f2, %f4
|
---|
154 | aes_eround23 %f14, %f0, %f2, %f2
|
---|
155 | aes_eround01_l %f16, %f4, %f2, %f0
|
---|
156 | aes_eround23_l %f18, %f4, %f2, %f2
|
---|
157 |
|
---|
158 | bnz,pn %icc, 2f
|
---|
159 | nop
|
---|
160 |
|
---|
161 | std %f0, [$out + 0]
|
---|
162 | retl
|
---|
163 | std %f2, [$out + 8]
|
---|
164 |
|
---|
165 | 2: alignaddrl $out, %g0, $out
|
---|
166 | mov 0xff, $mask
|
---|
167 | srl $mask, $tmp, $mask
|
---|
168 |
|
---|
169 | faligndata %f0, %f0, %f4
|
---|
170 | faligndata %f0, %f2, %f6
|
---|
171 | faligndata %f2, %f2, %f8
|
---|
172 |
|
---|
173 | stda %f4, [$out + $mask]0xc0 ! partial store
|
---|
174 | std %f6, [$out + 8]
|
---|
175 | add $out, 16, $out
|
---|
176 | orn %g0, $mask, $mask
|
---|
177 | retl
|
---|
178 | stda %f8, [$out + $mask]0xc0 ! partial store
|
---|
179 | .type aes_t4_encrypt,#function
|
---|
180 | .size aes_t4_encrypt,.-aes_t4_encrypt
|
---|
181 |
|
---|
182 | .globl aes_t4_decrypt
|
---|
183 | .align 32
|
---|
184 | aes_t4_decrypt:
|
---|
185 | andcc $inp, 7, %g1 ! is input aligned?
|
---|
186 | andn $inp, 7, $inp
|
---|
187 |
|
---|
188 | ldx [$key + 0], %g4
|
---|
189 | ldx [$key + 8], %g5
|
---|
190 |
|
---|
191 | ldx [$inp + 0], %o4
|
---|
192 | bz,pt %icc, 1f
|
---|
193 | ldx [$inp + 8], %o5
|
---|
194 | ldx [$inp + 16], $inp
|
---|
195 | sll %g1, 3, %g1
|
---|
196 | sub %g0, %g1, %o3
|
---|
197 | sllx %o4, %g1, %o4
|
---|
198 | sllx %o5, %g1, %g1
|
---|
199 | srlx %o5, %o3, %o5
|
---|
200 | srlx $inp, %o3, %o3
|
---|
201 | or %o5, %o4, %o4
|
---|
202 | or %o3, %g1, %o5
|
---|
203 | 1:
|
---|
204 | ld [$key + 240], $rounds
|
---|
205 | ldd [$key + 16], %f12
|
---|
206 | ldd [$key + 24], %f14
|
---|
207 | xor %g4, %o4, %o4
|
---|
208 | xor %g5, %o5, %o5
|
---|
209 | movxtod %o4, %f0
|
---|
210 | movxtod %o5, %f2
|
---|
211 | srl $rounds, 1, $rounds
|
---|
212 | ldd [$key + 32], %f16
|
---|
213 | sub $rounds, 1, $rounds
|
---|
214 | ldd [$key + 40], %f18
|
---|
215 | add $key, 48, $key
|
---|
216 |
|
---|
217 | .Ldec:
|
---|
218 | aes_dround01 %f12, %f0, %f2, %f4
|
---|
219 | aes_dround23 %f14, %f0, %f2, %f2
|
---|
220 | ldd [$key + 0], %f12
|
---|
221 | ldd [$key + 8], %f14
|
---|
222 | sub $rounds,1,$rounds
|
---|
223 | aes_dround01 %f16, %f4, %f2, %f0
|
---|
224 | aes_dround23 %f18, %f4, %f2, %f2
|
---|
225 | ldd [$key + 16], %f16
|
---|
226 | ldd [$key + 24], %f18
|
---|
227 | brnz,pt $rounds, .Ldec
|
---|
228 | add $key, 32, $key
|
---|
229 |
|
---|
230 | andcc $out, 7, $tmp ! is output aligned?
|
---|
231 | aes_dround01 %f12, %f0, %f2, %f4
|
---|
232 | aes_dround23 %f14, %f0, %f2, %f2
|
---|
233 | aes_dround01_l %f16, %f4, %f2, %f0
|
---|
234 | aes_dround23_l %f18, %f4, %f2, %f2
|
---|
235 |
|
---|
236 | bnz,pn %icc, 2f
|
---|
237 | nop
|
---|
238 |
|
---|
239 | std %f0, [$out + 0]
|
---|
240 | retl
|
---|
241 | std %f2, [$out + 8]
|
---|
242 |
|
---|
243 | 2: alignaddrl $out, %g0, $out
|
---|
244 | mov 0xff, $mask
|
---|
245 | srl $mask, $tmp, $mask
|
---|
246 |
|
---|
247 | faligndata %f0, %f0, %f4
|
---|
248 | faligndata %f0, %f2, %f6
|
---|
249 | faligndata %f2, %f2, %f8
|
---|
250 |
|
---|
251 | stda %f4, [$out + $mask]0xc0 ! partial store
|
---|
252 | std %f6, [$out + 8]
|
---|
253 | add $out, 16, $out
|
---|
254 | orn %g0, $mask, $mask
|
---|
255 | retl
|
---|
256 | stda %f8, [$out + $mask]0xc0 ! partial store
|
---|
257 | .type aes_t4_decrypt,#function
|
---|
258 | .size aes_t4_decrypt,.-aes_t4_decrypt
|
---|
259 | ___
|
---|
260 | }
|
---|
261 |
|
---|
262 | ######################################################################
|
---|
263 | # key setup subroutines
|
---|
264 | #
|
---|
265 | {
|
---|
266 | my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
|
---|
267 | $code.=<<___;
|
---|
268 | .globl aes_t4_set_encrypt_key
|
---|
269 | .align 32
|
---|
270 | aes_t4_set_encrypt_key:
|
---|
271 | .Lset_encrypt_key:
|
---|
272 | and $inp, 7, $tmp
|
---|
273 | alignaddr $inp, %g0, $inp
|
---|
274 | cmp $bits, 192
|
---|
275 | ldd [$inp + 0], %f0
|
---|
276 | bl,pt %icc,.L128
|
---|
277 | ldd [$inp + 8], %f2
|
---|
278 |
|
---|
279 | be,pt %icc,.L192
|
---|
280 | ldd [$inp + 16], %f4
|
---|
281 | brz,pt $tmp, .L256aligned
|
---|
282 | ldd [$inp + 24], %f6
|
---|
283 |
|
---|
284 | ldd [$inp + 32], %f8
|
---|
285 | faligndata %f0, %f2, %f0
|
---|
286 | faligndata %f2, %f4, %f2
|
---|
287 | faligndata %f4, %f6, %f4
|
---|
288 | faligndata %f6, %f8, %f6
|
---|
289 | .L256aligned:
|
---|
290 | ___
|
---|
291 | for ($i=0; $i<6; $i++) {
|
---|
292 | $code.=<<___;
|
---|
293 | std %f0, [$out + `32*$i+0`]
|
---|
294 | aes_kexpand1 %f0, %f6, $i, %f0
|
---|
295 | std %f2, [$out + `32*$i+8`]
|
---|
296 | aes_kexpand2 %f2, %f0, %f2
|
---|
297 | std %f4, [$out + `32*$i+16`]
|
---|
298 | aes_kexpand0 %f4, %f2, %f4
|
---|
299 | std %f6, [$out + `32*$i+24`]
|
---|
300 | aes_kexpand2 %f6, %f4, %f6
|
---|
301 | ___
|
---|
302 | }
|
---|
303 | $code.=<<___;
|
---|
304 | std %f0, [$out + `32*$i+0`]
|
---|
305 | aes_kexpand1 %f0, %f6, $i, %f0
|
---|
306 | std %f2, [$out + `32*$i+8`]
|
---|
307 | aes_kexpand2 %f2, %f0, %f2
|
---|
308 | std %f4, [$out + `32*$i+16`]
|
---|
309 | std %f6, [$out + `32*$i+24`]
|
---|
310 | std %f0, [$out + `32*$i+32`]
|
---|
311 | std %f2, [$out + `32*$i+40`]
|
---|
312 |
|
---|
313 | mov 14, $tmp
|
---|
314 | st $tmp, [$out + 240]
|
---|
315 | retl
|
---|
316 | xor %o0, %o0, %o0
|
---|
317 |
|
---|
318 | .align 16
|
---|
319 | .L192:
|
---|
320 | brz,pt $tmp, .L192aligned
|
---|
321 | nop
|
---|
322 |
|
---|
323 | ldd [$inp + 24], %f6
|
---|
324 | faligndata %f0, %f2, %f0
|
---|
325 | faligndata %f2, %f4, %f2
|
---|
326 | faligndata %f4, %f6, %f4
|
---|
327 | .L192aligned:
|
---|
328 | ___
|
---|
329 | for ($i=0; $i<7; $i++) {
|
---|
330 | $code.=<<___;
|
---|
331 | std %f0, [$out + `24*$i+0`]
|
---|
332 | aes_kexpand1 %f0, %f4, $i, %f0
|
---|
333 | std %f2, [$out + `24*$i+8`]
|
---|
334 | aes_kexpand2 %f2, %f0, %f2
|
---|
335 | std %f4, [$out + `24*$i+16`]
|
---|
336 | aes_kexpand2 %f4, %f2, %f4
|
---|
337 | ___
|
---|
338 | }
|
---|
339 | $code.=<<___;
|
---|
340 | std %f0, [$out + `24*$i+0`]
|
---|
341 | aes_kexpand1 %f0, %f4, $i, %f0
|
---|
342 | std %f2, [$out + `24*$i+8`]
|
---|
343 | aes_kexpand2 %f2, %f0, %f2
|
---|
344 | std %f4, [$out + `24*$i+16`]
|
---|
345 | std %f0, [$out + `24*$i+24`]
|
---|
346 | std %f2, [$out + `24*$i+32`]
|
---|
347 |
|
---|
348 | mov 12, $tmp
|
---|
349 | st $tmp, [$out + 240]
|
---|
350 | retl
|
---|
351 | xor %o0, %o0, %o0
|
---|
352 |
|
---|
353 | .align 16
|
---|
354 | .L128:
|
---|
355 | brz,pt $tmp, .L128aligned
|
---|
356 | nop
|
---|
357 |
|
---|
358 | ldd [$inp + 16], %f4
|
---|
359 | faligndata %f0, %f2, %f0
|
---|
360 | faligndata %f2, %f4, %f2
|
---|
361 | .L128aligned:
|
---|
362 | ___
|
---|
363 | for ($i=0; $i<10; $i++) {
|
---|
364 | $code.=<<___;
|
---|
365 | std %f0, [$out + `16*$i+0`]
|
---|
366 | aes_kexpand1 %f0, %f2, $i, %f0
|
---|
367 | std %f2, [$out + `16*$i+8`]
|
---|
368 | aes_kexpand2 %f2, %f0, %f2
|
---|
369 | ___
|
---|
370 | }
|
---|
371 | $code.=<<___;
|
---|
372 | std %f0, [$out + `16*$i+0`]
|
---|
373 | std %f2, [$out + `16*$i+8`]
|
---|
374 |
|
---|
375 | mov 10, $tmp
|
---|
376 | st $tmp, [$out + 240]
|
---|
377 | retl
|
---|
378 | xor %o0, %o0, %o0
|
---|
379 | .type aes_t4_set_encrypt_key,#function
|
---|
380 | .size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
|
---|
381 |
|
---|
382 | .globl aes_t4_set_decrypt_key
|
---|
383 | .align 32
|
---|
384 | aes_t4_set_decrypt_key:
|
---|
385 | mov %o7, %o5
|
---|
386 | call .Lset_encrypt_key
|
---|
387 | nop
|
---|
388 |
|
---|
389 | mov %o5, %o7
|
---|
390 | sll $tmp, 4, $inp ! $tmp is number of rounds
|
---|
391 | add $tmp, 2, $tmp
|
---|
392 | add $out, $inp, $inp ! $inp=$out+16*rounds
|
---|
393 | srl $tmp, 2, $tmp ! $tmp=(rounds+2)/4
|
---|
394 |
|
---|
395 | .Lkey_flip:
|
---|
396 | ldd [$out + 0], %f0
|
---|
397 | ldd [$out + 8], %f2
|
---|
398 | ldd [$out + 16], %f4
|
---|
399 | ldd [$out + 24], %f6
|
---|
400 | ldd [$inp + 0], %f8
|
---|
401 | ldd [$inp + 8], %f10
|
---|
402 | ldd [$inp - 16], %f12
|
---|
403 | ldd [$inp - 8], %f14
|
---|
404 | sub $tmp, 1, $tmp
|
---|
405 | std %f0, [$inp + 0]
|
---|
406 | std %f2, [$inp + 8]
|
---|
407 | std %f4, [$inp - 16]
|
---|
408 | std %f6, [$inp - 8]
|
---|
409 | std %f8, [$out + 0]
|
---|
410 | std %f10, [$out + 8]
|
---|
411 | std %f12, [$out + 16]
|
---|
412 | std %f14, [$out + 24]
|
---|
413 | add $out, 32, $out
|
---|
414 | brnz $tmp, .Lkey_flip
|
---|
415 | sub $inp, 32, $inp
|
---|
416 |
|
---|
417 | retl
|
---|
418 | xor %o0, %o0, %o0
|
---|
419 | .type aes_t4_set_decrypt_key,#function
|
---|
420 | .size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
|
---|
421 | ___
|
---|
422 | }
|
---|
423 |
|
---|
424 | {{{
|
---|
425 | my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
|
---|
426 | my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
|
---|
427 |
|
---|
428 | $code.=<<___;
|
---|
429 | .align 32
|
---|
430 | _aes128_encrypt_1x:
|
---|
431 | ___
|
---|
432 | for ($i=0; $i<4; $i++) {
|
---|
433 | $code.=<<___;
|
---|
434 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
435 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
436 | aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
437 | aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
438 | ___
|
---|
439 | }
|
---|
440 | $code.=<<___;
|
---|
441 | aes_eround01 %f48, %f0, %f2, %f4
|
---|
442 | aes_eround23 %f50, %f0, %f2, %f2
|
---|
443 | aes_eround01_l %f52, %f4, %f2, %f0
|
---|
444 | retl
|
---|
445 | aes_eround23_l %f54, %f4, %f2, %f2
|
---|
446 | .type _aes128_encrypt_1x,#function
|
---|
447 | .size _aes128_encrypt_1x,.-_aes128_encrypt_1x
|
---|
448 |
|
---|
449 | .align 32
|
---|
450 | _aes128_encrypt_2x:
|
---|
451 | ___
|
---|
452 | for ($i=0; $i<4; $i++) {
|
---|
453 | $code.=<<___;
|
---|
454 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
455 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
456 | aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
457 | aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
458 | aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
459 | aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
460 | aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
461 | aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
462 | ___
|
---|
463 | }
|
---|
464 | $code.=<<___;
|
---|
465 | aes_eround01 %f48, %f0, %f2, %f8
|
---|
466 | aes_eround23 %f50, %f0, %f2, %f2
|
---|
467 | aes_eround01 %f48, %f4, %f6, %f10
|
---|
468 | aes_eround23 %f50, %f4, %f6, %f6
|
---|
469 | aes_eround01_l %f52, %f8, %f2, %f0
|
---|
470 | aes_eround23_l %f54, %f8, %f2, %f2
|
---|
471 | aes_eround01_l %f52, %f10, %f6, %f4
|
---|
472 | retl
|
---|
473 | aes_eround23_l %f54, %f10, %f6, %f6
|
---|
474 | .type _aes128_encrypt_2x,#function
|
---|
475 | .size _aes128_encrypt_2x,.-_aes128_encrypt_2x
|
---|
476 |
|
---|
477 | .align 32
|
---|
478 | _aes128_loadkey:
|
---|
479 | ldx [$key + 0], %g4
|
---|
480 | ldx [$key + 8], %g5
|
---|
481 | ___
|
---|
482 | for ($i=2; $i<22;$i++) { # load key schedule
|
---|
483 | $code.=<<___;
|
---|
484 | ldd [$key + `8*$i`], %f`12+2*$i`
|
---|
485 | ___
|
---|
486 | }
|
---|
487 | $code.=<<___;
|
---|
488 | retl
|
---|
489 | nop
|
---|
490 | .type _aes128_loadkey,#function
|
---|
491 | .size _aes128_loadkey,.-_aes128_loadkey
|
---|
492 | _aes128_load_enckey=_aes128_loadkey
|
---|
493 | _aes128_load_deckey=_aes128_loadkey
|
---|
494 |
|
---|
495 | ___
|
---|
496 |
|
---|
497 | &alg_cbc_encrypt_implement("aes",128);
|
---|
498 | if ($::evp) {
|
---|
499 | &alg_ctr32_implement("aes",128);
|
---|
500 | &alg_xts_implement("aes",128,"en");
|
---|
501 | &alg_xts_implement("aes",128,"de");
|
---|
502 | }
|
---|
503 | &alg_cbc_decrypt_implement("aes",128);
|
---|
504 |
|
---|
505 | $code.=<<___;
|
---|
506 | .align 32
|
---|
507 | _aes128_decrypt_1x:
|
---|
508 | ___
|
---|
509 | for ($i=0; $i<4; $i++) {
|
---|
510 | $code.=<<___;
|
---|
511 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
512 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
513 | aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
514 | aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
515 | ___
|
---|
516 | }
|
---|
517 | $code.=<<___;
|
---|
518 | aes_dround01 %f48, %f0, %f2, %f4
|
---|
519 | aes_dround23 %f50, %f0, %f2, %f2
|
---|
520 | aes_dround01_l %f52, %f4, %f2, %f0
|
---|
521 | retl
|
---|
522 | aes_dround23_l %f54, %f4, %f2, %f2
|
---|
523 | .type _aes128_decrypt_1x,#function
|
---|
524 | .size _aes128_decrypt_1x,.-_aes128_decrypt_1x
|
---|
525 |
|
---|
526 | .align 32
|
---|
527 | _aes128_decrypt_2x:
|
---|
528 | ___
|
---|
529 | for ($i=0; $i<4; $i++) {
|
---|
530 | $code.=<<___;
|
---|
531 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
532 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
533 | aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
534 | aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
535 | aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
536 | aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
537 | aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
538 | aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
539 | ___
|
---|
540 | }
|
---|
541 | $code.=<<___;
|
---|
542 | aes_dround01 %f48, %f0, %f2, %f8
|
---|
543 | aes_dround23 %f50, %f0, %f2, %f2
|
---|
544 | aes_dround01 %f48, %f4, %f6, %f10
|
---|
545 | aes_dround23 %f50, %f4, %f6, %f6
|
---|
546 | aes_dround01_l %f52, %f8, %f2, %f0
|
---|
547 | aes_dround23_l %f54, %f8, %f2, %f2
|
---|
548 | aes_dround01_l %f52, %f10, %f6, %f4
|
---|
549 | retl
|
---|
550 | aes_dround23_l %f54, %f10, %f6, %f6
|
---|
551 | .type _aes128_decrypt_2x,#function
|
---|
552 | .size _aes128_decrypt_2x,.-_aes128_decrypt_2x
|
---|
553 | ___
|
---|
554 |
|
---|
555 | $code.=<<___;
|
---|
556 | .align 32
|
---|
557 | _aes192_encrypt_1x:
|
---|
558 | ___
|
---|
559 | for ($i=0; $i<5; $i++) {
|
---|
560 | $code.=<<___;
|
---|
561 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
562 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
563 | aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
564 | aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
565 | ___
|
---|
566 | }
|
---|
567 | $code.=<<___;
|
---|
568 | aes_eround01 %f56, %f0, %f2, %f4
|
---|
569 | aes_eround23 %f58, %f0, %f2, %f2
|
---|
570 | aes_eround01_l %f60, %f4, %f2, %f0
|
---|
571 | retl
|
---|
572 | aes_eround23_l %f62, %f4, %f2, %f2
|
---|
573 | .type _aes192_encrypt_1x,#function
|
---|
574 | .size _aes192_encrypt_1x,.-_aes192_encrypt_1x
|
---|
575 |
|
---|
576 | .align 32
|
---|
577 | _aes192_encrypt_2x:
|
---|
578 | ___
|
---|
579 | for ($i=0; $i<5; $i++) {
|
---|
580 | $code.=<<___;
|
---|
581 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
582 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
583 | aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
584 | aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
585 | aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
586 | aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
587 | aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
588 | aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
589 | ___
|
---|
590 | }
|
---|
591 | $code.=<<___;
|
---|
592 | aes_eround01 %f56, %f0, %f2, %f8
|
---|
593 | aes_eround23 %f58, %f0, %f2, %f2
|
---|
594 | aes_eround01 %f56, %f4, %f6, %f10
|
---|
595 | aes_eround23 %f58, %f4, %f6, %f6
|
---|
596 | aes_eround01_l %f60, %f8, %f2, %f0
|
---|
597 | aes_eround23_l %f62, %f8, %f2, %f2
|
---|
598 | aes_eround01_l %f60, %f10, %f6, %f4
|
---|
599 | retl
|
---|
600 | aes_eround23_l %f62, %f10, %f6, %f6
|
---|
601 | .type _aes192_encrypt_2x,#function
|
---|
602 | .size _aes192_encrypt_2x,.-_aes192_encrypt_2x
|
---|
603 |
|
---|
604 | .align 32
|
---|
605 | _aes256_encrypt_1x:
|
---|
606 | aes_eround01 %f16, %f0, %f2, %f4
|
---|
607 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
608 | ldd [$key + 208], %f16
|
---|
609 | ldd [$key + 216], %f18
|
---|
610 | aes_eround01 %f20, %f4, %f2, %f0
|
---|
611 | aes_eround23 %f22, %f4, %f2, %f2
|
---|
612 | ldd [$key + 224], %f20
|
---|
613 | ldd [$key + 232], %f22
|
---|
614 | ___
|
---|
615 | for ($i=1; $i<6; $i++) {
|
---|
616 | $code.=<<___;
|
---|
617 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
618 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
619 | aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
620 | aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
621 | ___
|
---|
622 | }
|
---|
623 | $code.=<<___;
|
---|
624 | aes_eround01 %f16, %f0, %f2, %f4
|
---|
625 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
626 | ldd [$key + 16], %f16
|
---|
627 | ldd [$key + 24], %f18
|
---|
628 | aes_eround01_l %f20, %f4, %f2, %f0
|
---|
629 | aes_eround23_l %f22, %f4, %f2, %f2
|
---|
630 | ldd [$key + 32], %f20
|
---|
631 | retl
|
---|
632 | ldd [$key + 40], %f22
|
---|
633 | .type _aes256_encrypt_1x,#function
|
---|
634 | .size _aes256_encrypt_1x,.-_aes256_encrypt_1x
|
---|
635 |
|
---|
636 | .align 32
|
---|
637 | _aes256_encrypt_2x:
|
---|
638 | aes_eround01 %f16, %f0, %f2, %f8
|
---|
639 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
640 | aes_eround01 %f16, %f4, %f6, %f10
|
---|
641 | aes_eround23 %f18, %f4, %f6, %f6
|
---|
642 | ldd [$key + 208], %f16
|
---|
643 | ldd [$key + 216], %f18
|
---|
644 | aes_eround01 %f20, %f8, %f2, %f0
|
---|
645 | aes_eround23 %f22, %f8, %f2, %f2
|
---|
646 | aes_eround01 %f20, %f10, %f6, %f4
|
---|
647 | aes_eround23 %f22, %f10, %f6, %f6
|
---|
648 | ldd [$key + 224], %f20
|
---|
649 | ldd [$key + 232], %f22
|
---|
650 | ___
|
---|
651 | for ($i=1; $i<6; $i++) {
|
---|
652 | $code.=<<___;
|
---|
653 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
654 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
655 | aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
656 | aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
657 | aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
658 | aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
659 | aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
660 | aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
661 | ___
|
---|
662 | }
|
---|
663 | $code.=<<___;
|
---|
664 | aes_eround01 %f16, %f0, %f2, %f8
|
---|
665 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
666 | aes_eround01 %f16, %f4, %f6, %f10
|
---|
667 | aes_eround23 %f18, %f4, %f6, %f6
|
---|
668 | ldd [$key + 16], %f16
|
---|
669 | ldd [$key + 24], %f18
|
---|
670 | aes_eround01_l %f20, %f8, %f2, %f0
|
---|
671 | aes_eround23_l %f22, %f8, %f2, %f2
|
---|
672 | aes_eround01_l %f20, %f10, %f6, %f4
|
---|
673 | aes_eround23_l %f22, %f10, %f6, %f6
|
---|
674 | ldd [$key + 32], %f20
|
---|
675 | retl
|
---|
676 | ldd [$key + 40], %f22
|
---|
677 | .type _aes256_encrypt_2x,#function
|
---|
678 | .size _aes256_encrypt_2x,.-_aes256_encrypt_2x
|
---|
679 |
|
---|
680 | .align 32
|
---|
681 | _aes192_loadkey:
|
---|
682 | ldx [$key + 0], %g4
|
---|
683 | ldx [$key + 8], %g5
|
---|
684 | ___
|
---|
685 | for ($i=2; $i<26;$i++) { # load key schedule
|
---|
686 | $code.=<<___;
|
---|
687 | ldd [$key + `8*$i`], %f`12+2*$i`
|
---|
688 | ___
|
---|
689 | }
|
---|
690 | $code.=<<___;
|
---|
691 | retl
|
---|
692 | nop
|
---|
693 | .type _aes192_loadkey,#function
|
---|
694 | .size _aes192_loadkey,.-_aes192_loadkey
|
---|
695 | _aes256_loadkey=_aes192_loadkey
|
---|
696 | _aes192_load_enckey=_aes192_loadkey
|
---|
697 | _aes192_load_deckey=_aes192_loadkey
|
---|
698 | _aes256_load_enckey=_aes192_loadkey
|
---|
699 | _aes256_load_deckey=_aes192_loadkey
|
---|
700 | ___
|
---|
701 |
|
---|
702 | &alg_cbc_encrypt_implement("aes",256);
|
---|
703 | &alg_cbc_encrypt_implement("aes",192);
|
---|
704 | if ($::evp) {
|
---|
705 | &alg_ctr32_implement("aes",256);
|
---|
706 | &alg_xts_implement("aes",256,"en");
|
---|
707 | &alg_xts_implement("aes",256,"de");
|
---|
708 | &alg_ctr32_implement("aes",192);
|
---|
709 | }
|
---|
710 | &alg_cbc_decrypt_implement("aes",192);
|
---|
711 | &alg_cbc_decrypt_implement("aes",256);
|
---|
712 |
|
---|
713 | $code.=<<___;
|
---|
714 | .align 32
|
---|
715 | _aes256_decrypt_1x:
|
---|
716 | aes_dround01 %f16, %f0, %f2, %f4
|
---|
717 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
718 | ldd [$key + 208], %f16
|
---|
719 | ldd [$key + 216], %f18
|
---|
720 | aes_dround01 %f20, %f4, %f2, %f0
|
---|
721 | aes_dround23 %f22, %f4, %f2, %f2
|
---|
722 | ldd [$key + 224], %f20
|
---|
723 | ldd [$key + 232], %f22
|
---|
724 | ___
|
---|
725 | for ($i=1; $i<6; $i++) {
|
---|
726 | $code.=<<___;
|
---|
727 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
728 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
729 | aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
730 | aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
731 | ___
|
---|
732 | }
|
---|
733 | $code.=<<___;
|
---|
734 | aes_dround01 %f16, %f0, %f2, %f4
|
---|
735 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
736 | ldd [$key + 16], %f16
|
---|
737 | ldd [$key + 24], %f18
|
---|
738 | aes_dround01_l %f20, %f4, %f2, %f0
|
---|
739 | aes_dround23_l %f22, %f4, %f2, %f2
|
---|
740 | ldd [$key + 32], %f20
|
---|
741 | retl
|
---|
742 | ldd [$key + 40], %f22
|
---|
743 | .type _aes256_decrypt_1x,#function
|
---|
744 | .size _aes256_decrypt_1x,.-_aes256_decrypt_1x
|
---|
745 |
|
---|
746 | .align 32
|
---|
747 | _aes256_decrypt_2x:
|
---|
748 | aes_dround01 %f16, %f0, %f2, %f8
|
---|
749 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
750 | aes_dround01 %f16, %f4, %f6, %f10
|
---|
751 | aes_dround23 %f18, %f4, %f6, %f6
|
---|
752 | ldd [$key + 208], %f16
|
---|
753 | ldd [$key + 216], %f18
|
---|
754 | aes_dround01 %f20, %f8, %f2, %f0
|
---|
755 | aes_dround23 %f22, %f8, %f2, %f2
|
---|
756 | aes_dround01 %f20, %f10, %f6, %f4
|
---|
757 | aes_dround23 %f22, %f10, %f6, %f6
|
---|
758 | ldd [$key + 224], %f20
|
---|
759 | ldd [$key + 232], %f22
|
---|
760 | ___
|
---|
761 | for ($i=1; $i<6; $i++) {
|
---|
762 | $code.=<<___;
|
---|
763 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
764 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
765 | aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
766 | aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
767 | aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
768 | aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
769 | aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
770 | aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
771 | ___
|
---|
772 | }
|
---|
773 | $code.=<<___;
|
---|
774 | aes_dround01 %f16, %f0, %f2, %f8
|
---|
775 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
776 | aes_dround01 %f16, %f4, %f6, %f10
|
---|
777 | aes_dround23 %f18, %f4, %f6, %f6
|
---|
778 | ldd [$key + 16], %f16
|
---|
779 | ldd [$key + 24], %f18
|
---|
780 | aes_dround01_l %f20, %f8, %f2, %f0
|
---|
781 | aes_dround23_l %f22, %f8, %f2, %f2
|
---|
782 | aes_dround01_l %f20, %f10, %f6, %f4
|
---|
783 | aes_dround23_l %f22, %f10, %f6, %f6
|
---|
784 | ldd [$key + 32], %f20
|
---|
785 | retl
|
---|
786 | ldd [$key + 40], %f22
|
---|
787 | .type _aes256_decrypt_2x,#function
|
---|
788 | .size _aes256_decrypt_2x,.-_aes256_decrypt_2x
|
---|
789 |
|
---|
790 | .align 32
|
---|
791 | _aes192_decrypt_1x:
|
---|
792 | ___
|
---|
793 | for ($i=0; $i<5; $i++) {
|
---|
794 | $code.=<<___;
|
---|
795 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
796 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
797 | aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
798 | aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
799 | ___
|
---|
800 | }
|
---|
801 | $code.=<<___;
|
---|
802 | aes_dround01 %f56, %f0, %f2, %f4
|
---|
803 | aes_dround23 %f58, %f0, %f2, %f2
|
---|
804 | aes_dround01_l %f60, %f4, %f2, %f0
|
---|
805 | retl
|
---|
806 | aes_dround23_l %f62, %f4, %f2, %f2
|
---|
807 | .type _aes192_decrypt_1x,#function
|
---|
808 | .size _aes192_decrypt_1x,.-_aes192_decrypt_1x
|
---|
809 |
|
---|
810 | .align 32
|
---|
811 | _aes192_decrypt_2x:
|
---|
812 | ___
|
---|
813 | for ($i=0; $i<5; $i++) {
|
---|
814 | $code.=<<___;
|
---|
815 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
816 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
817 | aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
818 | aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
819 | aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
820 | aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
821 | aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
822 | aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
823 | ___
|
---|
824 | }
|
---|
825 | $code.=<<___;
|
---|
826 | aes_dround01 %f56, %f0, %f2, %f8
|
---|
827 | aes_dround23 %f58, %f0, %f2, %f2
|
---|
828 | aes_dround01 %f56, %f4, %f6, %f10
|
---|
829 | aes_dround23 %f58, %f4, %f6, %f6
|
---|
830 | aes_dround01_l %f60, %f8, %f2, %f0
|
---|
831 | aes_dround23_l %f62, %f8, %f2, %f2
|
---|
832 | aes_dround01_l %f60, %f10, %f6, %f4
|
---|
833 | retl
|
---|
834 | aes_dround23_l %f62, %f10, %f6, %f6
|
---|
835 | .type _aes192_decrypt_2x,#function
|
---|
836 | .size _aes192_decrypt_2x,.-_aes192_decrypt_2x
|
---|
837 | ___
|
---|
838 | }}}
|
---|
839 |
|
---|
840 | if (!$::evp) {
|
---|
841 | $code.=<<___;
|
---|
842 | .global AES_encrypt
|
---|
843 | AES_encrypt=aes_t4_encrypt
|
---|
844 | .global AES_decrypt
|
---|
845 | AES_decrypt=aes_t4_decrypt
|
---|
846 | .global AES_set_encrypt_key
|
---|
847 | .align 32
|
---|
848 | AES_set_encrypt_key:
|
---|
849 | andcc %o2, 7, %g0 ! check alignment
|
---|
850 | bnz,a,pn %icc, 1f
|
---|
851 | mov -1, %o0
|
---|
852 | brz,a,pn %o0, 1f
|
---|
853 | mov -1, %o0
|
---|
854 | brz,a,pn %o2, 1f
|
---|
855 | mov -1, %o0
|
---|
856 | andncc %o1, 0x1c0, %g0
|
---|
857 | bnz,a,pn %icc, 1f
|
---|
858 | mov -2, %o0
|
---|
859 | cmp %o1, 128
|
---|
860 | bl,a,pn %icc, 1f
|
---|
861 | mov -2, %o0
|
---|
862 | b aes_t4_set_encrypt_key
|
---|
863 | nop
|
---|
864 | 1: retl
|
---|
865 | nop
|
---|
866 | .type AES_set_encrypt_key,#function
|
---|
867 | .size AES_set_encrypt_key,.-AES_set_encrypt_key
|
---|
868 |
|
---|
869 | .global AES_set_decrypt_key
|
---|
870 | .align 32
|
---|
871 | AES_set_decrypt_key:
|
---|
872 | andcc %o2, 7, %g0 ! check alignment
|
---|
873 | bnz,a,pn %icc, 1f
|
---|
874 | mov -1, %o0
|
---|
875 | brz,a,pn %o0, 1f
|
---|
876 | mov -1, %o0
|
---|
877 | brz,a,pn %o2, 1f
|
---|
878 | mov -1, %o0
|
---|
879 | andncc %o1, 0x1c0, %g0
|
---|
880 | bnz,a,pn %icc, 1f
|
---|
881 | mov -2, %o0
|
---|
882 | cmp %o1, 128
|
---|
883 | bl,a,pn %icc, 1f
|
---|
884 | mov -2, %o0
|
---|
885 | b aes_t4_set_decrypt_key
|
---|
886 | nop
|
---|
887 | 1: retl
|
---|
888 | nop
|
---|
889 | .type AES_set_decrypt_key,#function
|
---|
890 | .size AES_set_decrypt_key,.-AES_set_decrypt_key
|
---|
891 | ___
|
---|
892 |
|
---|
893 | my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
|
---|
894 |
|
---|
895 | $code.=<<___;
|
---|
896 | .globl AES_cbc_encrypt
|
---|
897 | .align 32
|
---|
898 | AES_cbc_encrypt:
|
---|
899 | ld [$key + 240], %g1
|
---|
900 | nop
|
---|
901 | brz $enc, .Lcbc_decrypt
|
---|
902 | cmp %g1, 12
|
---|
903 |
|
---|
904 | bl,pt %icc, aes128_t4_cbc_encrypt
|
---|
905 | nop
|
---|
906 | be,pn %icc, aes192_t4_cbc_encrypt
|
---|
907 | nop
|
---|
908 | ba aes256_t4_cbc_encrypt
|
---|
909 | nop
|
---|
910 |
|
---|
911 | .Lcbc_decrypt:
|
---|
912 | bl,pt %icc, aes128_t4_cbc_decrypt
|
---|
913 | nop
|
---|
914 | be,pn %icc, aes192_t4_cbc_decrypt
|
---|
915 | nop
|
---|
916 | ba aes256_t4_cbc_decrypt
|
---|
917 | nop
|
---|
918 | .type AES_cbc_encrypt,#function
|
---|
919 | .size AES_cbc_encrypt,.-AES_cbc_encrypt
|
---|
920 | ___
|
---|
921 | }
|
---|
922 | $code.=<<___;
|
---|
923 | .asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"
|
---|
924 | .align 4
|
---|
925 | ___
|
---|
926 |
|
---|
927 | &emit_assembler();
|
---|
928 |
|
---|
929 | close STDOUT or die "error closing STDOUT: $!";
|
---|