1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2013-2020 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | # ====================================================================
|
---|
11 | # Written by David S. Miller and Andy Polyakov.
|
---|
12 | # The module is licensed under 2-clause BSD
|
---|
13 | # license. March 2013. All rights reserved.
|
---|
14 | # ====================================================================
|
---|
15 |
|
---|
16 | ######################################################################
|
---|
17 | # DES for SPARC T4.
|
---|
18 | #
|
---|
19 | # As with other hardware-assisted ciphers CBC encrypt results [for
|
---|
20 | # aligned data] are virtually identical to critical path lengths:
|
---|
21 | #
|
---|
22 | # DES Triple-DES
|
---|
23 | # CBC encrypt 4.14/4.15(*) 11.7/11.7
|
---|
24 | # CBC decrypt 1.77/4.11(**) 6.42/7.47
|
---|
25 | #
|
---|
26 | # (*) numbers after slash are for
|
---|
27 | # misaligned data;
|
---|
28 | # (**) this is result for largest
|
---|
29 | # block size, unlike all other
|
---|
30 | # cases smaller blocks results
|
---|
31 | # are better[?];
|
---|
32 |
|
---|
33 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
34 | push(@INC,"${dir}","${dir}../../perlasm");
|
---|
35 | require "sparcv9_modes.pl";
|
---|
36 |
|
---|
37 | $output=pop;
|
---|
38 | open STDOUT,">$output";
|
---|
39 |
|
---|
40 | $code.=<<___;
|
---|
41 | #include "sparc_arch.h"
|
---|
42 |
|
---|
43 | #ifdef __arch64__
|
---|
44 | .register %g2,#scratch
|
---|
45 | .register %g3,#scratch
|
---|
46 | #endif
|
---|
47 |
|
---|
48 | .text
|
---|
49 | ___
|
---|
50 |
|
---|
51 | { my ($inp,$out)=("%o0","%o1");
|
---|
52 |
|
---|
53 | $code.=<<___;
|
---|
54 | .align 32
|
---|
55 | .globl des_t4_key_expand
|
---|
56 | .type des_t4_key_expand,#function
|
---|
57 | des_t4_key_expand:
|
---|
58 | andcc $inp, 0x7, %g0
|
---|
59 | alignaddr $inp, %g0, $inp
|
---|
60 | bz,pt %icc, 1f
|
---|
61 | ldd [$inp + 0x00], %f0
|
---|
62 | ldd [$inp + 0x08], %f2
|
---|
63 | faligndata %f0, %f2, %f0
|
---|
64 | 1: des_kexpand %f0, 0, %f0
|
---|
65 | des_kexpand %f0, 1, %f2
|
---|
66 | std %f0, [$out + 0x00]
|
---|
67 | des_kexpand %f2, 3, %f6
|
---|
68 | std %f2, [$out + 0x08]
|
---|
69 | des_kexpand %f2, 2, %f4
|
---|
70 | des_kexpand %f6, 3, %f10
|
---|
71 | std %f6, [$out + 0x18]
|
---|
72 | des_kexpand %f6, 2, %f8
|
---|
73 | std %f4, [$out + 0x10]
|
---|
74 | des_kexpand %f10, 3, %f14
|
---|
75 | std %f10, [$out + 0x28]
|
---|
76 | des_kexpand %f10, 2, %f12
|
---|
77 | std %f8, [$out + 0x20]
|
---|
78 | des_kexpand %f14, 1, %f16
|
---|
79 | std %f14, [$out + 0x38]
|
---|
80 | des_kexpand %f16, 3, %f20
|
---|
81 | std %f12, [$out + 0x30]
|
---|
82 | des_kexpand %f16, 2, %f18
|
---|
83 | std %f16, [$out + 0x40]
|
---|
84 | des_kexpand %f20, 3, %f24
|
---|
85 | std %f20, [$out + 0x50]
|
---|
86 | des_kexpand %f20, 2, %f22
|
---|
87 | std %f18, [$out + 0x48]
|
---|
88 | des_kexpand %f24, 3, %f28
|
---|
89 | std %f24, [$out + 0x60]
|
---|
90 | des_kexpand %f24, 2, %f26
|
---|
91 | std %f22, [$out + 0x58]
|
---|
92 | des_kexpand %f28, 1, %f30
|
---|
93 | std %f28, [$out + 0x70]
|
---|
94 | std %f26, [$out + 0x68]
|
---|
95 | retl
|
---|
96 | std %f30, [$out + 0x78]
|
---|
97 | .size des_t4_key_expand,.-des_t4_key_expand
|
---|
98 | ___
|
---|
99 | }
|
---|
100 | { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
|
---|
101 | my ($ileft,$iright,$omask) = map("%g$_",(1..3));
|
---|
102 |
|
---|
103 | $code.=<<___;
|
---|
104 | .globl des_t4_cbc_encrypt
|
---|
105 | .align 32
|
---|
106 | des_t4_cbc_encrypt:
|
---|
107 | cmp $len, 0
|
---|
108 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
109 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
110 | ld [$ivec + 0], %f0 ! load ivec
|
---|
111 | ld [$ivec + 4], %f1
|
---|
112 |
|
---|
113 | and $inp, 7, $ileft
|
---|
114 | andn $inp, 7, $inp
|
---|
115 | sll $ileft, 3, $ileft
|
---|
116 | mov 0xff, $omask
|
---|
117 | prefetch [$inp], 20
|
---|
118 | prefetch [$inp + 63], 20
|
---|
119 | sub %g0, $ileft, $iright
|
---|
120 | and $out, 7, %g4
|
---|
121 | alignaddrl $out, %g0, $out
|
---|
122 | srl $omask, %g4, $omask
|
---|
123 | srlx $len, 3, $len
|
---|
124 | movrz %g4, 0, $omask
|
---|
125 | prefetch [$out], 22
|
---|
126 |
|
---|
127 | ldd [$key + 0x00], %f4 ! load key schedule
|
---|
128 | ldd [$key + 0x08], %f6
|
---|
129 | ldd [$key + 0x10], %f8
|
---|
130 | ldd [$key + 0x18], %f10
|
---|
131 | ldd [$key + 0x20], %f12
|
---|
132 | ldd [$key + 0x28], %f14
|
---|
133 | ldd [$key + 0x30], %f16
|
---|
134 | ldd [$key + 0x38], %f18
|
---|
135 | ldd [$key + 0x40], %f20
|
---|
136 | ldd [$key + 0x48], %f22
|
---|
137 | ldd [$key + 0x50], %f24
|
---|
138 | ldd [$key + 0x58], %f26
|
---|
139 | ldd [$key + 0x60], %f28
|
---|
140 | ldd [$key + 0x68], %f30
|
---|
141 | ldd [$key + 0x70], %f32
|
---|
142 | ldd [$key + 0x78], %f34
|
---|
143 |
|
---|
144 | .Ldes_cbc_enc_loop:
|
---|
145 | ldx [$inp + 0], %g4
|
---|
146 | brz,pt $ileft, 4f
|
---|
147 | nop
|
---|
148 |
|
---|
149 | ldx [$inp + 8], %g5
|
---|
150 | sllx %g4, $ileft, %g4
|
---|
151 | srlx %g5, $iright, %g5
|
---|
152 | or %g5, %g4, %g4
|
---|
153 | 4:
|
---|
154 | movxtod %g4, %f2
|
---|
155 | prefetch [$inp + 8+63], 20
|
---|
156 | add $inp, 8, $inp
|
---|
157 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
158 | prefetch [$out + 63], 22
|
---|
159 |
|
---|
160 | des_ip %f0, %f0
|
---|
161 | des_round %f4, %f6, %f0, %f0
|
---|
162 | des_round %f8, %f10, %f0, %f0
|
---|
163 | des_round %f12, %f14, %f0, %f0
|
---|
164 | des_round %f16, %f18, %f0, %f0
|
---|
165 | des_round %f20, %f22, %f0, %f0
|
---|
166 | des_round %f24, %f26, %f0, %f0
|
---|
167 | des_round %f28, %f30, %f0, %f0
|
---|
168 | des_round %f32, %f34, %f0, %f0
|
---|
169 | des_iip %f0, %f0
|
---|
170 |
|
---|
171 | brnz,pn $omask, 2f
|
---|
172 | sub $len, 1, $len
|
---|
173 |
|
---|
174 | std %f0, [$out + 0]
|
---|
175 | brnz,pt $len, .Ldes_cbc_enc_loop
|
---|
176 | add $out, 8, $out
|
---|
177 |
|
---|
178 | st %f0, [$ivec + 0] ! write out ivec
|
---|
179 | retl
|
---|
180 | st %f1, [$ivec + 4]
|
---|
181 | .Lcbc_abort:
|
---|
182 | retl
|
---|
183 | nop
|
---|
184 |
|
---|
185 | .align 16
|
---|
186 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
187 | ! and ~4x deterioration
|
---|
188 | ! in inp==out case
|
---|
189 | faligndata %f0, %f0, %f2 ! handle unaligned output
|
---|
190 |
|
---|
191 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
192 | add $out, 8, $out
|
---|
193 | orn %g0, $omask, $omask
|
---|
194 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
195 |
|
---|
196 | brnz,pt $len, .Ldes_cbc_enc_loop+4
|
---|
197 | orn %g0, $omask, $omask
|
---|
198 |
|
---|
199 | st %f0, [$ivec + 0] ! write out ivec
|
---|
200 | retl
|
---|
201 | st %f1, [$ivec + 4]
|
---|
202 | .type des_t4_cbc_encrypt,#function
|
---|
203 | .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
|
---|
204 |
|
---|
205 | .globl des_t4_cbc_decrypt
|
---|
206 | .align 32
|
---|
207 | des_t4_cbc_decrypt:
|
---|
208 | cmp $len, 0
|
---|
209 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
210 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
211 | ld [$ivec + 0], %f2 ! load ivec
|
---|
212 | ld [$ivec + 4], %f3
|
---|
213 |
|
---|
214 | and $inp, 7, $ileft
|
---|
215 | andn $inp, 7, $inp
|
---|
216 | sll $ileft, 3, $ileft
|
---|
217 | mov 0xff, $omask
|
---|
218 | prefetch [$inp], 20
|
---|
219 | prefetch [$inp + 63], 20
|
---|
220 | sub %g0, $ileft, $iright
|
---|
221 | and $out, 7, %g4
|
---|
222 | alignaddrl $out, %g0, $out
|
---|
223 | srl $omask, %g4, $omask
|
---|
224 | srlx $len, 3, $len
|
---|
225 | movrz %g4, 0, $omask
|
---|
226 | prefetch [$out], 22
|
---|
227 |
|
---|
228 | ldd [$key + 0x78], %f4 ! load key schedule
|
---|
229 | ldd [$key + 0x70], %f6
|
---|
230 | ldd [$key + 0x68], %f8
|
---|
231 | ldd [$key + 0x60], %f10
|
---|
232 | ldd [$key + 0x58], %f12
|
---|
233 | ldd [$key + 0x50], %f14
|
---|
234 | ldd [$key + 0x48], %f16
|
---|
235 | ldd [$key + 0x40], %f18
|
---|
236 | ldd [$key + 0x38], %f20
|
---|
237 | ldd [$key + 0x30], %f22
|
---|
238 | ldd [$key + 0x28], %f24
|
---|
239 | ldd [$key + 0x20], %f26
|
---|
240 | ldd [$key + 0x18], %f28
|
---|
241 | ldd [$key + 0x10], %f30
|
---|
242 | ldd [$key + 0x08], %f32
|
---|
243 | ldd [$key + 0x00], %f34
|
---|
244 |
|
---|
245 | .Ldes_cbc_dec_loop:
|
---|
246 | ldx [$inp + 0], %g4
|
---|
247 | brz,pt $ileft, 4f
|
---|
248 | nop
|
---|
249 |
|
---|
250 | ldx [$inp + 8], %g5
|
---|
251 | sllx %g4, $ileft, %g4
|
---|
252 | srlx %g5, $iright, %g5
|
---|
253 | or %g5, %g4, %g4
|
---|
254 | 4:
|
---|
255 | movxtod %g4, %f0
|
---|
256 | prefetch [$inp + 8+63], 20
|
---|
257 | add $inp, 8, $inp
|
---|
258 | prefetch [$out + 63], 22
|
---|
259 |
|
---|
260 | des_ip %f0, %f0
|
---|
261 | des_round %f4, %f6, %f0, %f0
|
---|
262 | des_round %f8, %f10, %f0, %f0
|
---|
263 | des_round %f12, %f14, %f0, %f0
|
---|
264 | des_round %f16, %f18, %f0, %f0
|
---|
265 | des_round %f20, %f22, %f0, %f0
|
---|
266 | des_round %f24, %f26, %f0, %f0
|
---|
267 | des_round %f28, %f30, %f0, %f0
|
---|
268 | des_round %f32, %f34, %f0, %f0
|
---|
269 | des_iip %f0, %f0
|
---|
270 |
|
---|
271 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
272 | movxtod %g4, %f2
|
---|
273 |
|
---|
274 | brnz,pn $omask, 2f
|
---|
275 | sub $len, 1, $len
|
---|
276 |
|
---|
277 | std %f0, [$out + 0]
|
---|
278 | brnz,pt $len, .Ldes_cbc_dec_loop
|
---|
279 | add $out, 8, $out
|
---|
280 |
|
---|
281 | st %f2, [$ivec + 0] ! write out ivec
|
---|
282 | retl
|
---|
283 | st %f3, [$ivec + 4]
|
---|
284 |
|
---|
285 | .align 16
|
---|
286 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
287 | ! and ~4x deterioration
|
---|
288 | ! in inp==out case
|
---|
289 | faligndata %f0, %f0, %f0 ! handle unaligned output
|
---|
290 |
|
---|
291 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
292 | add $out, 8, $out
|
---|
293 | orn %g0, $omask, $omask
|
---|
294 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
295 |
|
---|
296 | brnz,pt $len, .Ldes_cbc_dec_loop+4
|
---|
297 | orn %g0, $omask, $omask
|
---|
298 |
|
---|
299 | st %f2, [$ivec + 0] ! write out ivec
|
---|
300 | retl
|
---|
301 | st %f3, [$ivec + 4]
|
---|
302 | .type des_t4_cbc_decrypt,#function
|
---|
303 | .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
|
---|
304 | ___
|
---|
305 |
|
---|
306 | # One might wonder why does one have back-to-back des_iip/des_ip
|
---|
307 | # pairs between EDE passes. Indeed, aren't they inverse of each other?
|
---|
308 | # They almost are. Outcome of the pair is 32-bit words being swapped
|
---|
309 | # in target register. Consider pair of des_iip/des_ip as a way to
|
---|
310 | # perform the due swap, it's actually fastest way in this case.
|
---|
311 |
|
---|
312 | $code.=<<___;
|
---|
313 | .globl des_t4_ede3_cbc_encrypt
|
---|
314 | .align 32
|
---|
315 | des_t4_ede3_cbc_encrypt:
|
---|
316 | cmp $len, 0
|
---|
317 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
318 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
319 | ld [$ivec + 0], %f0 ! load ivec
|
---|
320 | ld [$ivec + 4], %f1
|
---|
321 |
|
---|
322 | and $inp, 7, $ileft
|
---|
323 | andn $inp, 7, $inp
|
---|
324 | sll $ileft, 3, $ileft
|
---|
325 | mov 0xff, $omask
|
---|
326 | prefetch [$inp], 20
|
---|
327 | prefetch [$inp + 63], 20
|
---|
328 | sub %g0, $ileft, $iright
|
---|
329 | and $out, 7, %g4
|
---|
330 | alignaddrl $out, %g0, $out
|
---|
331 | srl $omask, %g4, $omask
|
---|
332 | srlx $len, 3, $len
|
---|
333 | movrz %g4, 0, $omask
|
---|
334 | prefetch [$out], 22
|
---|
335 |
|
---|
336 | ldd [$key + 0x00], %f4 ! load key schedule
|
---|
337 | ldd [$key + 0x08], %f6
|
---|
338 | ldd [$key + 0x10], %f8
|
---|
339 | ldd [$key + 0x18], %f10
|
---|
340 | ldd [$key + 0x20], %f12
|
---|
341 | ldd [$key + 0x28], %f14
|
---|
342 | ldd [$key + 0x30], %f16
|
---|
343 | ldd [$key + 0x38], %f18
|
---|
344 | ldd [$key + 0x40], %f20
|
---|
345 | ldd [$key + 0x48], %f22
|
---|
346 | ldd [$key + 0x50], %f24
|
---|
347 | ldd [$key + 0x58], %f26
|
---|
348 | ldd [$key + 0x60], %f28
|
---|
349 | ldd [$key + 0x68], %f30
|
---|
350 | ldd [$key + 0x70], %f32
|
---|
351 | ldd [$key + 0x78], %f34
|
---|
352 |
|
---|
353 | .Ldes_ede3_cbc_enc_loop:
|
---|
354 | ldx [$inp + 0], %g4
|
---|
355 | brz,pt $ileft, 4f
|
---|
356 | nop
|
---|
357 |
|
---|
358 | ldx [$inp + 8], %g5
|
---|
359 | sllx %g4, $ileft, %g4
|
---|
360 | srlx %g5, $iright, %g5
|
---|
361 | or %g5, %g4, %g4
|
---|
362 | 4:
|
---|
363 | movxtod %g4, %f2
|
---|
364 | prefetch [$inp + 8+63], 20
|
---|
365 | add $inp, 8, $inp
|
---|
366 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
367 | prefetch [$out + 63], 22
|
---|
368 |
|
---|
369 | des_ip %f0, %f0
|
---|
370 | des_round %f4, %f6, %f0, %f0
|
---|
371 | des_round %f8, %f10, %f0, %f0
|
---|
372 | des_round %f12, %f14, %f0, %f0
|
---|
373 | des_round %f16, %f18, %f0, %f0
|
---|
374 | ldd [$key + 0x100-0x08], %f36
|
---|
375 | ldd [$key + 0x100-0x10], %f38
|
---|
376 | des_round %f20, %f22, %f0, %f0
|
---|
377 | ldd [$key + 0x100-0x18], %f40
|
---|
378 | ldd [$key + 0x100-0x20], %f42
|
---|
379 | des_round %f24, %f26, %f0, %f0
|
---|
380 | ldd [$key + 0x100-0x28], %f44
|
---|
381 | ldd [$key + 0x100-0x30], %f46
|
---|
382 | des_round %f28, %f30, %f0, %f0
|
---|
383 | ldd [$key + 0x100-0x38], %f48
|
---|
384 | ldd [$key + 0x100-0x40], %f50
|
---|
385 | des_round %f32, %f34, %f0, %f0
|
---|
386 | ldd [$key + 0x100-0x48], %f52
|
---|
387 | ldd [$key + 0x100-0x50], %f54
|
---|
388 | des_iip %f0, %f0
|
---|
389 |
|
---|
390 | ldd [$key + 0x100-0x58], %f56
|
---|
391 | ldd [$key + 0x100-0x60], %f58
|
---|
392 | des_ip %f0, %f0
|
---|
393 | ldd [$key + 0x100-0x68], %f60
|
---|
394 | ldd [$key + 0x100-0x70], %f62
|
---|
395 | des_round %f36, %f38, %f0, %f0
|
---|
396 | ldd [$key + 0x100-0x78], %f36
|
---|
397 | ldd [$key + 0x100-0x80], %f38
|
---|
398 | des_round %f40, %f42, %f0, %f0
|
---|
399 | des_round %f44, %f46, %f0, %f0
|
---|
400 | des_round %f48, %f50, %f0, %f0
|
---|
401 | ldd [$key + 0x100+0x00], %f40
|
---|
402 | ldd [$key + 0x100+0x08], %f42
|
---|
403 | des_round %f52, %f54, %f0, %f0
|
---|
404 | ldd [$key + 0x100+0x10], %f44
|
---|
405 | ldd [$key + 0x100+0x18], %f46
|
---|
406 | des_round %f56, %f58, %f0, %f0
|
---|
407 | ldd [$key + 0x100+0x20], %f48
|
---|
408 | ldd [$key + 0x100+0x28], %f50
|
---|
409 | des_round %f60, %f62, %f0, %f0
|
---|
410 | ldd [$key + 0x100+0x30], %f52
|
---|
411 | ldd [$key + 0x100+0x38], %f54
|
---|
412 | des_round %f36, %f38, %f0, %f0
|
---|
413 | ldd [$key + 0x100+0x40], %f56
|
---|
414 | ldd [$key + 0x100+0x48], %f58
|
---|
415 | des_iip %f0, %f0
|
---|
416 |
|
---|
417 | ldd [$key + 0x100+0x50], %f60
|
---|
418 | ldd [$key + 0x100+0x58], %f62
|
---|
419 | des_ip %f0, %f0
|
---|
420 | ldd [$key + 0x100+0x60], %f36
|
---|
421 | ldd [$key + 0x100+0x68], %f38
|
---|
422 | des_round %f40, %f42, %f0, %f0
|
---|
423 | ldd [$key + 0x100+0x70], %f40
|
---|
424 | ldd [$key + 0x100+0x78], %f42
|
---|
425 | des_round %f44, %f46, %f0, %f0
|
---|
426 | des_round %f48, %f50, %f0, %f0
|
---|
427 | des_round %f52, %f54, %f0, %f0
|
---|
428 | des_round %f56, %f58, %f0, %f0
|
---|
429 | des_round %f60, %f62, %f0, %f0
|
---|
430 | des_round %f36, %f38, %f0, %f0
|
---|
431 | des_round %f40, %f42, %f0, %f0
|
---|
432 | des_iip %f0, %f0
|
---|
433 |
|
---|
434 | brnz,pn $omask, 2f
|
---|
435 | sub $len, 1, $len
|
---|
436 |
|
---|
437 | std %f0, [$out + 0]
|
---|
438 | brnz,pt $len, .Ldes_ede3_cbc_enc_loop
|
---|
439 | add $out, 8, $out
|
---|
440 |
|
---|
441 | st %f0, [$ivec + 0] ! write out ivec
|
---|
442 | retl
|
---|
443 | st %f1, [$ivec + 4]
|
---|
444 |
|
---|
445 | .align 16
|
---|
446 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
447 | ! and ~2x deterioration
|
---|
448 | ! in inp==out case
|
---|
449 | faligndata %f0, %f0, %f2 ! handle unaligned output
|
---|
450 |
|
---|
451 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
452 | add $out, 8, $out
|
---|
453 | orn %g0, $omask, $omask
|
---|
454 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
455 |
|
---|
456 | brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
|
---|
457 | orn %g0, $omask, $omask
|
---|
458 |
|
---|
459 | st %f0, [$ivec + 0] ! write out ivec
|
---|
460 | retl
|
---|
461 | st %f1, [$ivec + 4]
|
---|
462 | .type des_t4_ede3_cbc_encrypt,#function
|
---|
463 | .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
|
---|
464 |
|
---|
465 | .globl des_t4_ede3_cbc_decrypt
|
---|
466 | .align 32
|
---|
467 | des_t4_ede3_cbc_decrypt:
|
---|
468 | cmp $len, 0
|
---|
469 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
470 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
471 | ld [$ivec + 0], %f2 ! load ivec
|
---|
472 | ld [$ivec + 4], %f3
|
---|
473 |
|
---|
474 | and $inp, 7, $ileft
|
---|
475 | andn $inp, 7, $inp
|
---|
476 | sll $ileft, 3, $ileft
|
---|
477 | mov 0xff, $omask
|
---|
478 | prefetch [$inp], 20
|
---|
479 | prefetch [$inp + 63], 20
|
---|
480 | sub %g0, $ileft, $iright
|
---|
481 | and $out, 7, %g4
|
---|
482 | alignaddrl $out, %g0, $out
|
---|
483 | srl $omask, %g4, $omask
|
---|
484 | srlx $len, 3, $len
|
---|
485 | movrz %g4, 0, $omask
|
---|
486 | prefetch [$out], 22
|
---|
487 |
|
---|
488 | ldd [$key + 0x100+0x78], %f4 ! load key schedule
|
---|
489 | ldd [$key + 0x100+0x70], %f6
|
---|
490 | ldd [$key + 0x100+0x68], %f8
|
---|
491 | ldd [$key + 0x100+0x60], %f10
|
---|
492 | ldd [$key + 0x100+0x58], %f12
|
---|
493 | ldd [$key + 0x100+0x50], %f14
|
---|
494 | ldd [$key + 0x100+0x48], %f16
|
---|
495 | ldd [$key + 0x100+0x40], %f18
|
---|
496 | ldd [$key + 0x100+0x38], %f20
|
---|
497 | ldd [$key + 0x100+0x30], %f22
|
---|
498 | ldd [$key + 0x100+0x28], %f24
|
---|
499 | ldd [$key + 0x100+0x20], %f26
|
---|
500 | ldd [$key + 0x100+0x18], %f28
|
---|
501 | ldd [$key + 0x100+0x10], %f30
|
---|
502 | ldd [$key + 0x100+0x08], %f32
|
---|
503 | ldd [$key + 0x100+0x00], %f34
|
---|
504 |
|
---|
505 | .Ldes_ede3_cbc_dec_loop:
|
---|
506 | ldx [$inp + 0], %g4
|
---|
507 | brz,pt $ileft, 4f
|
---|
508 | nop
|
---|
509 |
|
---|
510 | ldx [$inp + 8], %g5
|
---|
511 | sllx %g4, $ileft, %g4
|
---|
512 | srlx %g5, $iright, %g5
|
---|
513 | or %g5, %g4, %g4
|
---|
514 | 4:
|
---|
515 | movxtod %g4, %f0
|
---|
516 | prefetch [$inp + 8+63], 20
|
---|
517 | add $inp, 8, $inp
|
---|
518 | prefetch [$out + 63], 22
|
---|
519 |
|
---|
520 | des_ip %f0, %f0
|
---|
521 | des_round %f4, %f6, %f0, %f0
|
---|
522 | des_round %f8, %f10, %f0, %f0
|
---|
523 | des_round %f12, %f14, %f0, %f0
|
---|
524 | des_round %f16, %f18, %f0, %f0
|
---|
525 | ldd [$key + 0x80+0x00], %f36
|
---|
526 | ldd [$key + 0x80+0x08], %f38
|
---|
527 | des_round %f20, %f22, %f0, %f0
|
---|
528 | ldd [$key + 0x80+0x10], %f40
|
---|
529 | ldd [$key + 0x80+0x18], %f42
|
---|
530 | des_round %f24, %f26, %f0, %f0
|
---|
531 | ldd [$key + 0x80+0x20], %f44
|
---|
532 | ldd [$key + 0x80+0x28], %f46
|
---|
533 | des_round %f28, %f30, %f0, %f0
|
---|
534 | ldd [$key + 0x80+0x30], %f48
|
---|
535 | ldd [$key + 0x80+0x38], %f50
|
---|
536 | des_round %f32, %f34, %f0, %f0
|
---|
537 | ldd [$key + 0x80+0x40], %f52
|
---|
538 | ldd [$key + 0x80+0x48], %f54
|
---|
539 | des_iip %f0, %f0
|
---|
540 |
|
---|
541 | ldd [$key + 0x80+0x50], %f56
|
---|
542 | ldd [$key + 0x80+0x58], %f58
|
---|
543 | des_ip %f0, %f0
|
---|
544 | ldd [$key + 0x80+0x60], %f60
|
---|
545 | ldd [$key + 0x80+0x68], %f62
|
---|
546 | des_round %f36, %f38, %f0, %f0
|
---|
547 | ldd [$key + 0x80+0x70], %f36
|
---|
548 | ldd [$key + 0x80+0x78], %f38
|
---|
549 | des_round %f40, %f42, %f0, %f0
|
---|
550 | des_round %f44, %f46, %f0, %f0
|
---|
551 | des_round %f48, %f50, %f0, %f0
|
---|
552 | ldd [$key + 0x80-0x08], %f40
|
---|
553 | ldd [$key + 0x80-0x10], %f42
|
---|
554 | des_round %f52, %f54, %f0, %f0
|
---|
555 | ldd [$key + 0x80-0x18], %f44
|
---|
556 | ldd [$key + 0x80-0x20], %f46
|
---|
557 | des_round %f56, %f58, %f0, %f0
|
---|
558 | ldd [$key + 0x80-0x28], %f48
|
---|
559 | ldd [$key + 0x80-0x30], %f50
|
---|
560 | des_round %f60, %f62, %f0, %f0
|
---|
561 | ldd [$key + 0x80-0x38], %f52
|
---|
562 | ldd [$key + 0x80-0x40], %f54
|
---|
563 | des_round %f36, %f38, %f0, %f0
|
---|
564 | ldd [$key + 0x80-0x48], %f56
|
---|
565 | ldd [$key + 0x80-0x50], %f58
|
---|
566 | des_iip %f0, %f0
|
---|
567 |
|
---|
568 | ldd [$key + 0x80-0x58], %f60
|
---|
569 | ldd [$key + 0x80-0x60], %f62
|
---|
570 | des_ip %f0, %f0
|
---|
571 | ldd [$key + 0x80-0x68], %f36
|
---|
572 | ldd [$key + 0x80-0x70], %f38
|
---|
573 | des_round %f40, %f42, %f0, %f0
|
---|
574 | ldd [$key + 0x80-0x78], %f40
|
---|
575 | ldd [$key + 0x80-0x80], %f42
|
---|
576 | des_round %f44, %f46, %f0, %f0
|
---|
577 | des_round %f48, %f50, %f0, %f0
|
---|
578 | des_round %f52, %f54, %f0, %f0
|
---|
579 | des_round %f56, %f58, %f0, %f0
|
---|
580 | des_round %f60, %f62, %f0, %f0
|
---|
581 | des_round %f36, %f38, %f0, %f0
|
---|
582 | des_round %f40, %f42, %f0, %f0
|
---|
583 | des_iip %f0, %f0
|
---|
584 |
|
---|
585 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
586 | movxtod %g4, %f2
|
---|
587 |
|
---|
588 | brnz,pn $omask, 2f
|
---|
589 | sub $len, 1, $len
|
---|
590 |
|
---|
591 | std %f0, [$out + 0]
|
---|
592 | brnz,pt $len, .Ldes_ede3_cbc_dec_loop
|
---|
593 | add $out, 8, $out
|
---|
594 |
|
---|
595 | st %f2, [$ivec + 0] ! write out ivec
|
---|
596 | retl
|
---|
597 | st %f3, [$ivec + 4]
|
---|
598 |
|
---|
599 | .align 16
|
---|
600 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
601 | ! and ~3x deterioration
|
---|
602 | ! in inp==out case
|
---|
603 | faligndata %f0, %f0, %f0 ! handle unaligned output
|
---|
604 |
|
---|
605 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
606 | add $out, 8, $out
|
---|
607 | orn %g0, $omask, $omask
|
---|
608 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
609 |
|
---|
610 | brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
|
---|
611 | orn %g0, $omask, $omask
|
---|
612 |
|
---|
613 | st %f2, [$ivec + 0] ! write out ivec
|
---|
614 | retl
|
---|
615 | st %f3, [$ivec + 4]
|
---|
616 | .type des_t4_ede3_cbc_decrypt,#function
|
---|
617 | .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
|
---|
618 | ___
|
---|
619 | }
|
---|
620 | $code.=<<___;
|
---|
621 | .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
|
---|
622 | .align 4
|
---|
623 | ___
|
---|
624 |
|
---|
625 | &emit_assembler();
|
---|
626 |
|
---|
627 | close STDOUT or die "error closing STDOUT: $!";
|
---|