1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | # ====================================================================
|
---|
11 | # Written by David S. Miller and Andy Polyakov.
|
---|
12 | # The module is licensed under 2-clause BSD license. October 2012.
|
---|
13 | # All rights reserved.
|
---|
14 | # ====================================================================
|
---|
15 |
|
---|
16 | ######################################################################
|
---|
17 | # AES for SPARC T4.
|
---|
18 | #
|
---|
19 | # AES round instructions complete in 3 cycles and can be issued every
|
---|
20 | # cycle. It means that round calculations should take 4*rounds cycles,
|
---|
21 | # because any given round instruction depends on result of *both*
|
---|
22 | # previous instructions:
|
---|
23 | #
|
---|
24 | # |0 |1 |2 |3 |4
|
---|
25 | # |01|01|01|
|
---|
26 | # |23|23|23|
|
---|
27 | # |01|01|...
|
---|
28 | # |23|...
|
---|
29 | #
|
---|
30 | # Provided that fxor [with IV] takes 3 cycles to complete, critical
|
---|
31 | # path length for CBC encrypt would be 3+4*rounds, or in other words
|
---|
32 | # it should process one byte in at least (3+4*rounds)/16 cycles. This
|
---|
33 | # estimate doesn't account for "collateral" instructions, such as
|
---|
34 | # fetching input from memory, xor-ing it with zero-round key and
|
---|
35 | # storing the result. Yet, *measured* performance [for data aligned
|
---|
36 | # at 64-bit boundary!] deviates from this equation by less than 0.5%:
|
---|
37 | #
|
---|
38 | # 128-bit key 192- 256-
|
---|
39 | # CBC encrypt 2.70/2.90(*) 3.20/3.40 3.70/3.90
|
---|
40 | # (*) numbers after slash are for
|
---|
41 | # misaligned data.
|
---|
42 | #
|
---|
43 | # Out-of-order execution logic managed to fully overlap "collateral"
|
---|
44 | # instructions with those on critical path. Amazing!
|
---|
45 | #
|
---|
46 | # As with Intel AES-NI, question is if it's possible to improve
|
---|
47 | # performance of parallelizable modes by interleaving round
|
---|
48 | # instructions. Provided round instruction latency and throughput
|
---|
49 | # optimal interleave factor is 2. But can we expect 2x performance
|
---|
50 | # improvement? Well, as round instructions can be issued one per
|
---|
51 | # cycle, they don't saturate the 2-way issue pipeline and therefore
|
---|
52 | # there is room for "collateral" calculations... Yet, 2x speed-up
|
---|
53 | # over CBC encrypt remains unattaintable:
|
---|
54 | #
|
---|
55 | # 128-bit key 192- 256-
|
---|
56 | # CBC decrypt 1.64/2.11 1.89/2.37 2.23/2.61
|
---|
57 | # CTR 1.64/2.08(*) 1.89/2.33 2.23/2.61
|
---|
58 | # (*) numbers after slash are for
|
---|
59 | # misaligned data.
|
---|
60 | #
|
---|
61 | # Estimates based on amount of instructions under assumption that
|
---|
62 | # round instructions are not pairable with any other instruction
|
---|
63 | # suggest that latter is the actual case and pipeline runs
|
---|
64 | # underutilized. It should be noted that T4 out-of-order execution
|
---|
65 | # logic is so capable that performance gain from 2x interleave is
|
---|
66 | # not even impressive, ~7-13% over non-interleaved code, largest
|
---|
67 | # for 256-bit keys.
|
---|
68 |
|
---|
69 | # To anchor to something else, software implementation processes
|
---|
70 | # one byte in 29 cycles with 128-bit key on same processor. Intel
|
---|
71 | # Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts
|
---|
72 | # in 0.93, naturally with AES-NI.
|
---|
73 |
|
---|
74 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
75 | push(@INC,"${dir}","${dir}../../perlasm");
|
---|
76 | require "sparcv9_modes.pl";
|
---|
77 |
|
---|
78 | $output = pop and open STDOUT,">$output";
|
---|
79 |
|
---|
80 | $::evp=1; # if $evp is set to 0, script generates module with
|
---|
81 | # AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry
|
---|
82 | # points. These however are not fully compatible with openssl/aes.h,
|
---|
83 | # because they expect AES_KEY to be aligned at 64-bit boundary. When
|
---|
84 | # used through EVP, alignment is arranged at EVP layer. Second thing
|
---|
85 | # that is arranged by EVP is at least 32-bit alignment of IV.
|
---|
86 |
|
---|
87 | ######################################################################
|
---|
88 | # single-round subroutines
|
---|
89 | #
|
---|
90 | {
|
---|
91 | my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5));
|
---|
92 |
|
---|
93 | $code.=<<___;
|
---|
94 | #ifndef __ASSEMBLER__
|
---|
95 | # define __ASSEMBLER__ 1
|
---|
96 | #endif
|
---|
97 | #include "crypto/sparc_arch.h"
|
---|
98 |
|
---|
99 | #ifdef __arch64__
|
---|
100 | .register %g2,#scratch
|
---|
101 | .register %g3,#scratch
|
---|
102 | #endif
|
---|
103 |
|
---|
104 | .text
|
---|
105 |
|
---|
106 | .globl aes_t4_encrypt
|
---|
107 | .align 32
|
---|
108 | aes_t4_encrypt:
|
---|
109 | andcc $inp, 7, %g1 ! is input aligned?
|
---|
110 | andn $inp, 7, $inp
|
---|
111 |
|
---|
112 | ldx [$key + 0], %g4
|
---|
113 | ldx [$key + 8], %g5
|
---|
114 |
|
---|
115 | ldx [$inp + 0], %o4
|
---|
116 | bz,pt %icc, 1f
|
---|
117 | ldx [$inp + 8], %o5
|
---|
118 | ldx [$inp + 16], $inp
|
---|
119 | sll %g1, 3, %g1
|
---|
120 | sub %g0, %g1, %o3
|
---|
121 | sllx %o4, %g1, %o4
|
---|
122 | sllx %o5, %g1, %g1
|
---|
123 | srlx %o5, %o3, %o5
|
---|
124 | srlx $inp, %o3, %o3
|
---|
125 | or %o5, %o4, %o4
|
---|
126 | or %o3, %g1, %o5
|
---|
127 | 1:
|
---|
128 | ld [$key + 240], $rounds
|
---|
129 | ldd [$key + 16], %f12
|
---|
130 | ldd [$key + 24], %f14
|
---|
131 | xor %g4, %o4, %o4
|
---|
132 | xor %g5, %o5, %o5
|
---|
133 | movxtod %o4, %f0
|
---|
134 | movxtod %o5, %f2
|
---|
135 | srl $rounds, 1, $rounds
|
---|
136 | ldd [$key + 32], %f16
|
---|
137 | sub $rounds, 1, $rounds
|
---|
138 | ldd [$key + 40], %f18
|
---|
139 | add $key, 48, $key
|
---|
140 |
|
---|
141 | .Lenc:
|
---|
142 | aes_eround01 %f12, %f0, %f2, %f4
|
---|
143 | aes_eround23 %f14, %f0, %f2, %f2
|
---|
144 | ldd [$key + 0], %f12
|
---|
145 | ldd [$key + 8], %f14
|
---|
146 | sub $rounds,1,$rounds
|
---|
147 | aes_eround01 %f16, %f4, %f2, %f0
|
---|
148 | aes_eround23 %f18, %f4, %f2, %f2
|
---|
149 | ldd [$key + 16], %f16
|
---|
150 | ldd [$key + 24], %f18
|
---|
151 | brnz,pt $rounds, .Lenc
|
---|
152 | add $key, 32, $key
|
---|
153 |
|
---|
154 | andcc $out, 7, $tmp ! is output aligned?
|
---|
155 | aes_eround01 %f12, %f0, %f2, %f4
|
---|
156 | aes_eround23 %f14, %f0, %f2, %f2
|
---|
157 | aes_eround01_l %f16, %f4, %f2, %f0
|
---|
158 | aes_eround23_l %f18, %f4, %f2, %f2
|
---|
159 |
|
---|
160 | bnz,pn %icc, 2f
|
---|
161 | nop
|
---|
162 |
|
---|
163 | std %f0, [$out + 0]
|
---|
164 | retl
|
---|
165 | std %f2, [$out + 8]
|
---|
166 |
|
---|
167 | 2: alignaddrl $out, %g0, $out
|
---|
168 | mov 0xff, $mask
|
---|
169 | srl $mask, $tmp, $mask
|
---|
170 |
|
---|
171 | faligndata %f0, %f0, %f4
|
---|
172 | faligndata %f0, %f2, %f6
|
---|
173 | faligndata %f2, %f2, %f8
|
---|
174 |
|
---|
175 | stda %f4, [$out + $mask]0xc0 ! partial store
|
---|
176 | std %f6, [$out + 8]
|
---|
177 | add $out, 16, $out
|
---|
178 | orn %g0, $mask, $mask
|
---|
179 | retl
|
---|
180 | stda %f8, [$out + $mask]0xc0 ! partial store
|
---|
181 | .type aes_t4_encrypt,#function
|
---|
182 | .size aes_t4_encrypt,.-aes_t4_encrypt
|
---|
183 |
|
---|
184 | .globl aes_t4_decrypt
|
---|
185 | .align 32
|
---|
186 | aes_t4_decrypt:
|
---|
187 | andcc $inp, 7, %g1 ! is input aligned?
|
---|
188 | andn $inp, 7, $inp
|
---|
189 |
|
---|
190 | ldx [$key + 0], %g4
|
---|
191 | ldx [$key + 8], %g5
|
---|
192 |
|
---|
193 | ldx [$inp + 0], %o4
|
---|
194 | bz,pt %icc, 1f
|
---|
195 | ldx [$inp + 8], %o5
|
---|
196 | ldx [$inp + 16], $inp
|
---|
197 | sll %g1, 3, %g1
|
---|
198 | sub %g0, %g1, %o3
|
---|
199 | sllx %o4, %g1, %o4
|
---|
200 | sllx %o5, %g1, %g1
|
---|
201 | srlx %o5, %o3, %o5
|
---|
202 | srlx $inp, %o3, %o3
|
---|
203 | or %o5, %o4, %o4
|
---|
204 | or %o3, %g1, %o5
|
---|
205 | 1:
|
---|
206 | ld [$key + 240], $rounds
|
---|
207 | ldd [$key + 16], %f12
|
---|
208 | ldd [$key + 24], %f14
|
---|
209 | xor %g4, %o4, %o4
|
---|
210 | xor %g5, %o5, %o5
|
---|
211 | movxtod %o4, %f0
|
---|
212 | movxtod %o5, %f2
|
---|
213 | srl $rounds, 1, $rounds
|
---|
214 | ldd [$key + 32], %f16
|
---|
215 | sub $rounds, 1, $rounds
|
---|
216 | ldd [$key + 40], %f18
|
---|
217 | add $key, 48, $key
|
---|
218 |
|
---|
219 | .Ldec:
|
---|
220 | aes_dround01 %f12, %f0, %f2, %f4
|
---|
221 | aes_dround23 %f14, %f0, %f2, %f2
|
---|
222 | ldd [$key + 0], %f12
|
---|
223 | ldd [$key + 8], %f14
|
---|
224 | sub $rounds,1,$rounds
|
---|
225 | aes_dround01 %f16, %f4, %f2, %f0
|
---|
226 | aes_dround23 %f18, %f4, %f2, %f2
|
---|
227 | ldd [$key + 16], %f16
|
---|
228 | ldd [$key + 24], %f18
|
---|
229 | brnz,pt $rounds, .Ldec
|
---|
230 | add $key, 32, $key
|
---|
231 |
|
---|
232 | andcc $out, 7, $tmp ! is output aligned?
|
---|
233 | aes_dround01 %f12, %f0, %f2, %f4
|
---|
234 | aes_dround23 %f14, %f0, %f2, %f2
|
---|
235 | aes_dround01_l %f16, %f4, %f2, %f0
|
---|
236 | aes_dround23_l %f18, %f4, %f2, %f2
|
---|
237 |
|
---|
238 | bnz,pn %icc, 2f
|
---|
239 | nop
|
---|
240 |
|
---|
241 | std %f0, [$out + 0]
|
---|
242 | retl
|
---|
243 | std %f2, [$out + 8]
|
---|
244 |
|
---|
245 | 2: alignaddrl $out, %g0, $out
|
---|
246 | mov 0xff, $mask
|
---|
247 | srl $mask, $tmp, $mask
|
---|
248 |
|
---|
249 | faligndata %f0, %f0, %f4
|
---|
250 | faligndata %f0, %f2, %f6
|
---|
251 | faligndata %f2, %f2, %f8
|
---|
252 |
|
---|
253 | stda %f4, [$out + $mask]0xc0 ! partial store
|
---|
254 | std %f6, [$out + 8]
|
---|
255 | add $out, 16, $out
|
---|
256 | orn %g0, $mask, $mask
|
---|
257 | retl
|
---|
258 | stda %f8, [$out + $mask]0xc0 ! partial store
|
---|
259 | .type aes_t4_decrypt,#function
|
---|
260 | .size aes_t4_decrypt,.-aes_t4_decrypt
|
---|
261 | ___
|
---|
262 | }
|
---|
263 |
|
---|
264 | ######################################################################
|
---|
265 | # key setup subroutines
|
---|
266 | #
|
---|
267 | {
|
---|
268 | my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5));
|
---|
269 | $code.=<<___;
|
---|
270 | .globl aes_t4_set_encrypt_key
|
---|
271 | .align 32
|
---|
272 | aes_t4_set_encrypt_key:
|
---|
273 | .Lset_encrypt_key:
|
---|
274 | and $inp, 7, $tmp
|
---|
275 | alignaddr $inp, %g0, $inp
|
---|
276 | cmp $bits, 192
|
---|
277 | ldd [$inp + 0], %f0
|
---|
278 | bl,pt %icc,.L128
|
---|
279 | ldd [$inp + 8], %f2
|
---|
280 |
|
---|
281 | be,pt %icc,.L192
|
---|
282 | ldd [$inp + 16], %f4
|
---|
283 | brz,pt $tmp, .L256aligned
|
---|
284 | ldd [$inp + 24], %f6
|
---|
285 |
|
---|
286 | ldd [$inp + 32], %f8
|
---|
287 | faligndata %f0, %f2, %f0
|
---|
288 | faligndata %f2, %f4, %f2
|
---|
289 | faligndata %f4, %f6, %f4
|
---|
290 | faligndata %f6, %f8, %f6
|
---|
291 | .L256aligned:
|
---|
292 | ___
|
---|
293 | for ($i=0; $i<6; $i++) {
|
---|
294 | $code.=<<___;
|
---|
295 | std %f0, [$out + `32*$i+0`]
|
---|
296 | aes_kexpand1 %f0, %f6, $i, %f0
|
---|
297 | std %f2, [$out + `32*$i+8`]
|
---|
298 | aes_kexpand2 %f2, %f0, %f2
|
---|
299 | std %f4, [$out + `32*$i+16`]
|
---|
300 | aes_kexpand0 %f4, %f2, %f4
|
---|
301 | std %f6, [$out + `32*$i+24`]
|
---|
302 | aes_kexpand2 %f6, %f4, %f6
|
---|
303 | ___
|
---|
304 | }
|
---|
305 | $code.=<<___;
|
---|
306 | std %f0, [$out + `32*$i+0`]
|
---|
307 | aes_kexpand1 %f0, %f6, $i, %f0
|
---|
308 | std %f2, [$out + `32*$i+8`]
|
---|
309 | aes_kexpand2 %f2, %f0, %f2
|
---|
310 | std %f4, [$out + `32*$i+16`]
|
---|
311 | std %f6, [$out + `32*$i+24`]
|
---|
312 | std %f0, [$out + `32*$i+32`]
|
---|
313 | std %f2, [$out + `32*$i+40`]
|
---|
314 |
|
---|
315 | mov 14, $tmp
|
---|
316 | st $tmp, [$out + 240]
|
---|
317 | retl
|
---|
318 | xor %o0, %o0, %o0
|
---|
319 |
|
---|
320 | .align 16
|
---|
321 | .L192:
|
---|
322 | brz,pt $tmp, .L192aligned
|
---|
323 | nop
|
---|
324 |
|
---|
325 | ldd [$inp + 24], %f6
|
---|
326 | faligndata %f0, %f2, %f0
|
---|
327 | faligndata %f2, %f4, %f2
|
---|
328 | faligndata %f4, %f6, %f4
|
---|
329 | .L192aligned:
|
---|
330 | ___
|
---|
331 | for ($i=0; $i<7; $i++) {
|
---|
332 | $code.=<<___;
|
---|
333 | std %f0, [$out + `24*$i+0`]
|
---|
334 | aes_kexpand1 %f0, %f4, $i, %f0
|
---|
335 | std %f2, [$out + `24*$i+8`]
|
---|
336 | aes_kexpand2 %f2, %f0, %f2
|
---|
337 | std %f4, [$out + `24*$i+16`]
|
---|
338 | aes_kexpand2 %f4, %f2, %f4
|
---|
339 | ___
|
---|
340 | }
|
---|
341 | $code.=<<___;
|
---|
342 | std %f0, [$out + `24*$i+0`]
|
---|
343 | aes_kexpand1 %f0, %f4, $i, %f0
|
---|
344 | std %f2, [$out + `24*$i+8`]
|
---|
345 | aes_kexpand2 %f2, %f0, %f2
|
---|
346 | std %f4, [$out + `24*$i+16`]
|
---|
347 | std %f0, [$out + `24*$i+24`]
|
---|
348 | std %f2, [$out + `24*$i+32`]
|
---|
349 |
|
---|
350 | mov 12, $tmp
|
---|
351 | st $tmp, [$out + 240]
|
---|
352 | retl
|
---|
353 | xor %o0, %o0, %o0
|
---|
354 |
|
---|
355 | .align 16
|
---|
356 | .L128:
|
---|
357 | brz,pt $tmp, .L128aligned
|
---|
358 | nop
|
---|
359 |
|
---|
360 | ldd [$inp + 16], %f4
|
---|
361 | faligndata %f0, %f2, %f0
|
---|
362 | faligndata %f2, %f4, %f2
|
---|
363 | .L128aligned:
|
---|
364 | ___
|
---|
365 | for ($i=0; $i<10; $i++) {
|
---|
366 | $code.=<<___;
|
---|
367 | std %f0, [$out + `16*$i+0`]
|
---|
368 | aes_kexpand1 %f0, %f2, $i, %f0
|
---|
369 | std %f2, [$out + `16*$i+8`]
|
---|
370 | aes_kexpand2 %f2, %f0, %f2
|
---|
371 | ___
|
---|
372 | }
|
---|
373 | $code.=<<___;
|
---|
374 | std %f0, [$out + `16*$i+0`]
|
---|
375 | std %f2, [$out + `16*$i+8`]
|
---|
376 |
|
---|
377 | mov 10, $tmp
|
---|
378 | st $tmp, [$out + 240]
|
---|
379 | retl
|
---|
380 | xor %o0, %o0, %o0
|
---|
381 | .type aes_t4_set_encrypt_key,#function
|
---|
382 | .size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key
|
---|
383 |
|
---|
384 | .globl aes_t4_set_decrypt_key
|
---|
385 | .align 32
|
---|
386 | aes_t4_set_decrypt_key:
|
---|
387 | mov %o7, %o5
|
---|
388 | call .Lset_encrypt_key
|
---|
389 | nop
|
---|
390 |
|
---|
391 | mov %o5, %o7
|
---|
392 | sll $tmp, 4, $inp ! $tmp is number of rounds
|
---|
393 | add $tmp, 2, $tmp
|
---|
394 | add $out, $inp, $inp ! $inp=$out+16*rounds
|
---|
395 | srl $tmp, 2, $tmp ! $tmp=(rounds+2)/4
|
---|
396 |
|
---|
397 | .Lkey_flip:
|
---|
398 | ldd [$out + 0], %f0
|
---|
399 | ldd [$out + 8], %f2
|
---|
400 | ldd [$out + 16], %f4
|
---|
401 | ldd [$out + 24], %f6
|
---|
402 | ldd [$inp + 0], %f8
|
---|
403 | ldd [$inp + 8], %f10
|
---|
404 | ldd [$inp - 16], %f12
|
---|
405 | ldd [$inp - 8], %f14
|
---|
406 | sub $tmp, 1, $tmp
|
---|
407 | std %f0, [$inp + 0]
|
---|
408 | std %f2, [$inp + 8]
|
---|
409 | std %f4, [$inp - 16]
|
---|
410 | std %f6, [$inp - 8]
|
---|
411 | std %f8, [$out + 0]
|
---|
412 | std %f10, [$out + 8]
|
---|
413 | std %f12, [$out + 16]
|
---|
414 | std %f14, [$out + 24]
|
---|
415 | add $out, 32, $out
|
---|
416 | brnz $tmp, .Lkey_flip
|
---|
417 | sub $inp, 32, $inp
|
---|
418 |
|
---|
419 | retl
|
---|
420 | xor %o0, %o0, %o0
|
---|
421 | .type aes_t4_set_decrypt_key,#function
|
---|
422 | .size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key
|
---|
423 | ___
|
---|
424 | }
|
---|
425 |
|
---|
426 | {{{
|
---|
427 | my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5));
|
---|
428 | my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7));
|
---|
429 |
|
---|
430 | $code.=<<___;
|
---|
431 | .align 32
|
---|
432 | _aes128_encrypt_1x:
|
---|
433 | ___
|
---|
434 | for ($i=0; $i<4; $i++) {
|
---|
435 | $code.=<<___;
|
---|
436 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
437 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
438 | aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
439 | aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
440 | ___
|
---|
441 | }
|
---|
442 | $code.=<<___;
|
---|
443 | aes_eround01 %f48, %f0, %f2, %f4
|
---|
444 | aes_eround23 %f50, %f0, %f2, %f2
|
---|
445 | aes_eround01_l %f52, %f4, %f2, %f0
|
---|
446 | retl
|
---|
447 | aes_eround23_l %f54, %f4, %f2, %f2
|
---|
448 | .type _aes128_encrypt_1x,#function
|
---|
449 | .size _aes128_encrypt_1x,.-_aes128_encrypt_1x
|
---|
450 |
|
---|
451 | .align 32
|
---|
452 | _aes128_encrypt_2x:
|
---|
453 | ___
|
---|
454 | for ($i=0; $i<4; $i++) {
|
---|
455 | $code.=<<___;
|
---|
456 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
457 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
458 | aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
459 | aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
460 | aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
461 | aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
462 | aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
463 | aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
464 | ___
|
---|
465 | }
|
---|
466 | $code.=<<___;
|
---|
467 | aes_eround01 %f48, %f0, %f2, %f8
|
---|
468 | aes_eround23 %f50, %f0, %f2, %f2
|
---|
469 | aes_eround01 %f48, %f4, %f6, %f10
|
---|
470 | aes_eround23 %f50, %f4, %f6, %f6
|
---|
471 | aes_eround01_l %f52, %f8, %f2, %f0
|
---|
472 | aes_eround23_l %f54, %f8, %f2, %f2
|
---|
473 | aes_eround01_l %f52, %f10, %f6, %f4
|
---|
474 | retl
|
---|
475 | aes_eround23_l %f54, %f10, %f6, %f6
|
---|
476 | .type _aes128_encrypt_2x,#function
|
---|
477 | .size _aes128_encrypt_2x,.-_aes128_encrypt_2x
|
---|
478 |
|
---|
479 | .align 32
|
---|
480 | _aes128_loadkey:
|
---|
481 | ldx [$key + 0], %g4
|
---|
482 | ldx [$key + 8], %g5
|
---|
483 | ___
|
---|
484 | for ($i=2; $i<22;$i++) { # load key schedule
|
---|
485 | $code.=<<___;
|
---|
486 | ldd [$key + `8*$i`], %f`12+2*$i`
|
---|
487 | ___
|
---|
488 | }
|
---|
489 | $code.=<<___;
|
---|
490 | retl
|
---|
491 | nop
|
---|
492 | .type _aes128_loadkey,#function
|
---|
493 | .size _aes128_loadkey,.-_aes128_loadkey
|
---|
494 | _aes128_load_enckey=_aes128_loadkey
|
---|
495 | _aes128_load_deckey=_aes128_loadkey
|
---|
496 |
|
---|
497 | ___
|
---|
498 |
|
---|
499 | &alg_cbc_encrypt_implement("aes",128);
|
---|
500 | if ($::evp) {
|
---|
501 | &alg_ctr32_implement("aes",128);
|
---|
502 | &alg_xts_implement("aes",128,"en");
|
---|
503 | &alg_xts_implement("aes",128,"de");
|
---|
504 | }
|
---|
505 | &alg_cbc_decrypt_implement("aes",128);
|
---|
506 |
|
---|
507 | $code.=<<___;
|
---|
508 | .align 32
|
---|
509 | _aes128_decrypt_1x:
|
---|
510 | ___
|
---|
511 | for ($i=0; $i<4; $i++) {
|
---|
512 | $code.=<<___;
|
---|
513 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
514 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
515 | aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
516 | aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
517 | ___
|
---|
518 | }
|
---|
519 | $code.=<<___;
|
---|
520 | aes_dround01 %f48, %f0, %f2, %f4
|
---|
521 | aes_dround23 %f50, %f0, %f2, %f2
|
---|
522 | aes_dround01_l %f52, %f4, %f2, %f0
|
---|
523 | retl
|
---|
524 | aes_dround23_l %f54, %f4, %f2, %f2
|
---|
525 | .type _aes128_decrypt_1x,#function
|
---|
526 | .size _aes128_decrypt_1x,.-_aes128_decrypt_1x
|
---|
527 |
|
---|
528 | .align 32
|
---|
529 | _aes128_decrypt_2x:
|
---|
530 | ___
|
---|
531 | for ($i=0; $i<4; $i++) {
|
---|
532 | $code.=<<___;
|
---|
533 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
534 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
535 | aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
536 | aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
537 | aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
538 | aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
539 | aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
540 | aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
541 | ___
|
---|
542 | }
|
---|
543 | $code.=<<___;
|
---|
544 | aes_dround01 %f48, %f0, %f2, %f8
|
---|
545 | aes_dround23 %f50, %f0, %f2, %f2
|
---|
546 | aes_dround01 %f48, %f4, %f6, %f10
|
---|
547 | aes_dround23 %f50, %f4, %f6, %f6
|
---|
548 | aes_dround01_l %f52, %f8, %f2, %f0
|
---|
549 | aes_dround23_l %f54, %f8, %f2, %f2
|
---|
550 | aes_dround01_l %f52, %f10, %f6, %f4
|
---|
551 | retl
|
---|
552 | aes_dround23_l %f54, %f10, %f6, %f6
|
---|
553 | .type _aes128_decrypt_2x,#function
|
---|
554 | .size _aes128_decrypt_2x,.-_aes128_decrypt_2x
|
---|
555 | ___
|
---|
556 |
|
---|
557 | $code.=<<___;
|
---|
558 | .align 32
|
---|
559 | _aes192_encrypt_1x:
|
---|
560 | ___
|
---|
561 | for ($i=0; $i<5; $i++) {
|
---|
562 | $code.=<<___;
|
---|
563 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
564 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
565 | aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
566 | aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
567 | ___
|
---|
568 | }
|
---|
569 | $code.=<<___;
|
---|
570 | aes_eround01 %f56, %f0, %f2, %f4
|
---|
571 | aes_eround23 %f58, %f0, %f2, %f2
|
---|
572 | aes_eround01_l %f60, %f4, %f2, %f0
|
---|
573 | retl
|
---|
574 | aes_eround23_l %f62, %f4, %f2, %f2
|
---|
575 | .type _aes192_encrypt_1x,#function
|
---|
576 | .size _aes192_encrypt_1x,.-_aes192_encrypt_1x
|
---|
577 |
|
---|
578 | .align 32
|
---|
579 | _aes192_encrypt_2x:
|
---|
580 | ___
|
---|
581 | for ($i=0; $i<5; $i++) {
|
---|
582 | $code.=<<___;
|
---|
583 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
584 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
585 | aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
586 | aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
587 | aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
588 | aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
589 | aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
590 | aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
591 | ___
|
---|
592 | }
|
---|
593 | $code.=<<___;
|
---|
594 | aes_eround01 %f56, %f0, %f2, %f8
|
---|
595 | aes_eround23 %f58, %f0, %f2, %f2
|
---|
596 | aes_eround01 %f56, %f4, %f6, %f10
|
---|
597 | aes_eround23 %f58, %f4, %f6, %f6
|
---|
598 | aes_eround01_l %f60, %f8, %f2, %f0
|
---|
599 | aes_eround23_l %f62, %f8, %f2, %f2
|
---|
600 | aes_eround01_l %f60, %f10, %f6, %f4
|
---|
601 | retl
|
---|
602 | aes_eround23_l %f62, %f10, %f6, %f6
|
---|
603 | .type _aes192_encrypt_2x,#function
|
---|
604 | .size _aes192_encrypt_2x,.-_aes192_encrypt_2x
|
---|
605 |
|
---|
606 | .align 32
|
---|
607 | _aes256_encrypt_1x:
|
---|
608 | aes_eround01 %f16, %f0, %f2, %f4
|
---|
609 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
610 | ldd [$key + 208], %f16
|
---|
611 | ldd [$key + 216], %f18
|
---|
612 | aes_eround01 %f20, %f4, %f2, %f0
|
---|
613 | aes_eround23 %f22, %f4, %f2, %f2
|
---|
614 | ldd [$key + 224], %f20
|
---|
615 | ldd [$key + 232], %f22
|
---|
616 | ___
|
---|
617 | for ($i=1; $i<6; $i++) {
|
---|
618 | $code.=<<___;
|
---|
619 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
620 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
621 | aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
622 | aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
623 | ___
|
---|
624 | }
|
---|
625 | $code.=<<___;
|
---|
626 | aes_eround01 %f16, %f0, %f2, %f4
|
---|
627 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
628 | ldd [$key + 16], %f16
|
---|
629 | ldd [$key + 24], %f18
|
---|
630 | aes_eround01_l %f20, %f4, %f2, %f0
|
---|
631 | aes_eround23_l %f22, %f4, %f2, %f2
|
---|
632 | ldd [$key + 32], %f20
|
---|
633 | retl
|
---|
634 | ldd [$key + 40], %f22
|
---|
635 | .type _aes256_encrypt_1x,#function
|
---|
636 | .size _aes256_encrypt_1x,.-_aes256_encrypt_1x
|
---|
637 |
|
---|
638 | .align 32
|
---|
639 | _aes256_encrypt_2x:
|
---|
640 | aes_eround01 %f16, %f0, %f2, %f8
|
---|
641 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
642 | aes_eround01 %f16, %f4, %f6, %f10
|
---|
643 | aes_eround23 %f18, %f4, %f6, %f6
|
---|
644 | ldd [$key + 208], %f16
|
---|
645 | ldd [$key + 216], %f18
|
---|
646 | aes_eround01 %f20, %f8, %f2, %f0
|
---|
647 | aes_eround23 %f22, %f8, %f2, %f2
|
---|
648 | aes_eround01 %f20, %f10, %f6, %f4
|
---|
649 | aes_eround23 %f22, %f10, %f6, %f6
|
---|
650 | ldd [$key + 224], %f20
|
---|
651 | ldd [$key + 232], %f22
|
---|
652 | ___
|
---|
653 | for ($i=1; $i<6; $i++) {
|
---|
654 | $code.=<<___;
|
---|
655 | aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
656 | aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
657 | aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
658 | aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
659 | aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
660 | aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
661 | aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
662 | aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
663 | ___
|
---|
664 | }
|
---|
665 | $code.=<<___;
|
---|
666 | aes_eround01 %f16, %f0, %f2, %f8
|
---|
667 | aes_eround23 %f18, %f0, %f2, %f2
|
---|
668 | aes_eround01 %f16, %f4, %f6, %f10
|
---|
669 | aes_eround23 %f18, %f4, %f6, %f6
|
---|
670 | ldd [$key + 16], %f16
|
---|
671 | ldd [$key + 24], %f18
|
---|
672 | aes_eround01_l %f20, %f8, %f2, %f0
|
---|
673 | aes_eround23_l %f22, %f8, %f2, %f2
|
---|
674 | aes_eround01_l %f20, %f10, %f6, %f4
|
---|
675 | aes_eround23_l %f22, %f10, %f6, %f6
|
---|
676 | ldd [$key + 32], %f20
|
---|
677 | retl
|
---|
678 | ldd [$key + 40], %f22
|
---|
679 | .type _aes256_encrypt_2x,#function
|
---|
680 | .size _aes256_encrypt_2x,.-_aes256_encrypt_2x
|
---|
681 |
|
---|
682 | .align 32
|
---|
683 | _aes192_loadkey:
|
---|
684 | ldx [$key + 0], %g4
|
---|
685 | ldx [$key + 8], %g5
|
---|
686 | ___
|
---|
687 | for ($i=2; $i<26;$i++) { # load key schedule
|
---|
688 | $code.=<<___;
|
---|
689 | ldd [$key + `8*$i`], %f`12+2*$i`
|
---|
690 | ___
|
---|
691 | }
|
---|
692 | $code.=<<___;
|
---|
693 | retl
|
---|
694 | nop
|
---|
695 | .type _aes192_loadkey,#function
|
---|
696 | .size _aes192_loadkey,.-_aes192_loadkey
|
---|
697 | _aes256_loadkey=_aes192_loadkey
|
---|
698 | _aes192_load_enckey=_aes192_loadkey
|
---|
699 | _aes192_load_deckey=_aes192_loadkey
|
---|
700 | _aes256_load_enckey=_aes192_loadkey
|
---|
701 | _aes256_load_deckey=_aes192_loadkey
|
---|
702 | ___
|
---|
703 |
|
---|
704 | &alg_cbc_encrypt_implement("aes",256);
|
---|
705 | &alg_cbc_encrypt_implement("aes",192);
|
---|
706 | if ($::evp) {
|
---|
707 | &alg_ctr32_implement("aes",256);
|
---|
708 | &alg_xts_implement("aes",256,"en");
|
---|
709 | &alg_xts_implement("aes",256,"de");
|
---|
710 | &alg_ctr32_implement("aes",192);
|
---|
711 | }
|
---|
712 | &alg_cbc_decrypt_implement("aes",192);
|
---|
713 | &alg_cbc_decrypt_implement("aes",256);
|
---|
714 |
|
---|
715 | $code.=<<___;
|
---|
716 | .align 32
|
---|
717 | _aes256_decrypt_1x:
|
---|
718 | aes_dround01 %f16, %f0, %f2, %f4
|
---|
719 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
720 | ldd [$key + 208], %f16
|
---|
721 | ldd [$key + 216], %f18
|
---|
722 | aes_dround01 %f20, %f4, %f2, %f0
|
---|
723 | aes_dround23 %f22, %f4, %f2, %f2
|
---|
724 | ldd [$key + 224], %f20
|
---|
725 | ldd [$key + 232], %f22
|
---|
726 | ___
|
---|
727 | for ($i=1; $i<6; $i++) {
|
---|
728 | $code.=<<___;
|
---|
729 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
730 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
731 | aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
732 | aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
733 | ___
|
---|
734 | }
|
---|
735 | $code.=<<___;
|
---|
736 | aes_dround01 %f16, %f0, %f2, %f4
|
---|
737 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
738 | ldd [$key + 16], %f16
|
---|
739 | ldd [$key + 24], %f18
|
---|
740 | aes_dround01_l %f20, %f4, %f2, %f0
|
---|
741 | aes_dround23_l %f22, %f4, %f2, %f2
|
---|
742 | ldd [$key + 32], %f20
|
---|
743 | retl
|
---|
744 | ldd [$key + 40], %f22
|
---|
745 | .type _aes256_decrypt_1x,#function
|
---|
746 | .size _aes256_decrypt_1x,.-_aes256_decrypt_1x
|
---|
747 |
|
---|
748 | .align 32
|
---|
749 | _aes256_decrypt_2x:
|
---|
750 | aes_dround01 %f16, %f0, %f2, %f8
|
---|
751 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
752 | aes_dround01 %f16, %f4, %f6, %f10
|
---|
753 | aes_dround23 %f18, %f4, %f6, %f6
|
---|
754 | ldd [$key + 208], %f16
|
---|
755 | ldd [$key + 216], %f18
|
---|
756 | aes_dround01 %f20, %f8, %f2, %f0
|
---|
757 | aes_dround23 %f22, %f8, %f2, %f2
|
---|
758 | aes_dround01 %f20, %f10, %f6, %f4
|
---|
759 | aes_dround23 %f22, %f10, %f6, %f6
|
---|
760 | ldd [$key + 224], %f20
|
---|
761 | ldd [$key + 232], %f22
|
---|
762 | ___
|
---|
763 | for ($i=1; $i<6; $i++) {
|
---|
764 | $code.=<<___;
|
---|
765 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
766 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
767 | aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
768 | aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
769 | aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
770 | aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
771 | aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
772 | aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
773 | ___
|
---|
774 | }
|
---|
775 | $code.=<<___;
|
---|
776 | aes_dround01 %f16, %f0, %f2, %f8
|
---|
777 | aes_dround23 %f18, %f0, %f2, %f2
|
---|
778 | aes_dround01 %f16, %f4, %f6, %f10
|
---|
779 | aes_dround23 %f18, %f4, %f6, %f6
|
---|
780 | ldd [$key + 16], %f16
|
---|
781 | ldd [$key + 24], %f18
|
---|
782 | aes_dround01_l %f20, %f8, %f2, %f0
|
---|
783 | aes_dround23_l %f22, %f8, %f2, %f2
|
---|
784 | aes_dround01_l %f20, %f10, %f6, %f4
|
---|
785 | aes_dround23_l %f22, %f10, %f6, %f6
|
---|
786 | ldd [$key + 32], %f20
|
---|
787 | retl
|
---|
788 | ldd [$key + 40], %f22
|
---|
789 | .type _aes256_decrypt_2x,#function
|
---|
790 | .size _aes256_decrypt_2x,.-_aes256_decrypt_2x
|
---|
791 |
|
---|
792 | .align 32
|
---|
793 | _aes192_decrypt_1x:
|
---|
794 | ___
|
---|
795 | for ($i=0; $i<5; $i++) {
|
---|
796 | $code.=<<___;
|
---|
797 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4
|
---|
798 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
799 | aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0
|
---|
800 | aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2
|
---|
801 | ___
|
---|
802 | }
|
---|
803 | $code.=<<___;
|
---|
804 | aes_dround01 %f56, %f0, %f2, %f4
|
---|
805 | aes_dround23 %f58, %f0, %f2, %f2
|
---|
806 | aes_dround01_l %f60, %f4, %f2, %f0
|
---|
807 | retl
|
---|
808 | aes_dround23_l %f62, %f4, %f2, %f2
|
---|
809 | .type _aes192_decrypt_1x,#function
|
---|
810 | .size _aes192_decrypt_1x,.-_aes192_decrypt_1x
|
---|
811 |
|
---|
812 | .align 32
|
---|
813 | _aes192_decrypt_2x:
|
---|
814 | ___
|
---|
815 | for ($i=0; $i<5; $i++) {
|
---|
816 | $code.=<<___;
|
---|
817 | aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8
|
---|
818 | aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2
|
---|
819 | aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10
|
---|
820 | aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6
|
---|
821 | aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0
|
---|
822 | aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2
|
---|
823 | aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4
|
---|
824 | aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6
|
---|
825 | ___
|
---|
826 | }
|
---|
827 | $code.=<<___;
|
---|
828 | aes_dround01 %f56, %f0, %f2, %f8
|
---|
829 | aes_dround23 %f58, %f0, %f2, %f2
|
---|
830 | aes_dround01 %f56, %f4, %f6, %f10
|
---|
831 | aes_dround23 %f58, %f4, %f6, %f6
|
---|
832 | aes_dround01_l %f60, %f8, %f2, %f0
|
---|
833 | aes_dround23_l %f62, %f8, %f2, %f2
|
---|
834 | aes_dround01_l %f60, %f10, %f6, %f4
|
---|
835 | retl
|
---|
836 | aes_dround23_l %f62, %f10, %f6, %f6
|
---|
837 | .type _aes192_decrypt_2x,#function
|
---|
838 | .size _aes192_decrypt_2x,.-_aes192_decrypt_2x
|
---|
839 | ___
|
---|
840 | }}}
|
---|
841 |
|
---|
842 | if (!$::evp) {
|
---|
843 | $code.=<<___;
|
---|
844 | .global AES_encrypt
|
---|
845 | AES_encrypt=aes_t4_encrypt
|
---|
846 | .global AES_decrypt
|
---|
847 | AES_decrypt=aes_t4_decrypt
|
---|
848 | .global AES_set_encrypt_key
|
---|
849 | .align 32
|
---|
850 | AES_set_encrypt_key:
|
---|
851 | andcc %o2, 7, %g0 ! check alignment
|
---|
852 | bnz,a,pn %icc, 1f
|
---|
853 | mov -1, %o0
|
---|
854 | brz,a,pn %o0, 1f
|
---|
855 | mov -1, %o0
|
---|
856 | brz,a,pn %o2, 1f
|
---|
857 | mov -1, %o0
|
---|
858 | andncc %o1, 0x1c0, %g0
|
---|
859 | bnz,a,pn %icc, 1f
|
---|
860 | mov -2, %o0
|
---|
861 | cmp %o1, 128
|
---|
862 | bl,a,pn %icc, 1f
|
---|
863 | mov -2, %o0
|
---|
864 | b aes_t4_set_encrypt_key
|
---|
865 | nop
|
---|
866 | 1: retl
|
---|
867 | nop
|
---|
868 | .type AES_set_encrypt_key,#function
|
---|
869 | .size AES_set_encrypt_key,.-AES_set_encrypt_key
|
---|
870 |
|
---|
871 | .global AES_set_decrypt_key
|
---|
872 | .align 32
|
---|
873 | AES_set_decrypt_key:
|
---|
874 | andcc %o2, 7, %g0 ! check alignment
|
---|
875 | bnz,a,pn %icc, 1f
|
---|
876 | mov -1, %o0
|
---|
877 | brz,a,pn %o0, 1f
|
---|
878 | mov -1, %o0
|
---|
879 | brz,a,pn %o2, 1f
|
---|
880 | mov -1, %o0
|
---|
881 | andncc %o1, 0x1c0, %g0
|
---|
882 | bnz,a,pn %icc, 1f
|
---|
883 | mov -2, %o0
|
---|
884 | cmp %o1, 128
|
---|
885 | bl,a,pn %icc, 1f
|
---|
886 | mov -2, %o0
|
---|
887 | b aes_t4_set_decrypt_key
|
---|
888 | nop
|
---|
889 | 1: retl
|
---|
890 | nop
|
---|
891 | .type AES_set_decrypt_key,#function
|
---|
892 | .size AES_set_decrypt_key,.-AES_set_decrypt_key
|
---|
893 | ___
|
---|
894 |
|
---|
895 | my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5));
|
---|
896 |
|
---|
897 | $code.=<<___;
|
---|
898 | .globl AES_cbc_encrypt
|
---|
899 | .align 32
|
---|
900 | AES_cbc_encrypt:
|
---|
901 | ld [$key + 240], %g1
|
---|
902 | nop
|
---|
903 | brz $enc, .Lcbc_decrypt
|
---|
904 | cmp %g1, 12
|
---|
905 |
|
---|
906 | bl,pt %icc, aes128_t4_cbc_encrypt
|
---|
907 | nop
|
---|
908 | be,pn %icc, aes192_t4_cbc_encrypt
|
---|
909 | nop
|
---|
910 | ba aes256_t4_cbc_encrypt
|
---|
911 | nop
|
---|
912 |
|
---|
913 | .Lcbc_decrypt:
|
---|
914 | bl,pt %icc, aes128_t4_cbc_decrypt
|
---|
915 | nop
|
---|
916 | be,pn %icc, aes192_t4_cbc_decrypt
|
---|
917 | nop
|
---|
918 | ba aes256_t4_cbc_decrypt
|
---|
919 | nop
|
---|
920 | .type AES_cbc_encrypt,#function
|
---|
921 | .size AES_cbc_encrypt,.-AES_cbc_encrypt
|
---|
922 | ___
|
---|
923 | }
|
---|
924 | $code.=<<___;
|
---|
925 | .asciz "AES for SPARC T4, David S. Miller, Andy Polyakov"
|
---|
926 | .align 4
|
---|
927 | ___
|
---|
928 |
|
---|
929 | &emit_assembler();
|
---|
930 |
|
---|
931 | close STDOUT or die "error closing STDOUT: $!";
|
---|