1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2013-2021 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | # ====================================================================
|
---|
11 | # Written by David S. Miller and Andy Polyakov.
|
---|
12 | # The module is licensed under 2-clause BSD
|
---|
13 | # license. March 2013. All rights reserved.
|
---|
14 | # ====================================================================
|
---|
15 |
|
---|
16 | ######################################################################
|
---|
17 | # DES for SPARC T4.
|
---|
18 | #
|
---|
19 | # As with other hardware-assisted ciphers CBC encrypt results [for
|
---|
20 | # aligned data] are virtually identical to critical path lengths:
|
---|
21 | #
|
---|
22 | # DES Triple-DES
|
---|
23 | # CBC encrypt 4.14/4.15(*) 11.7/11.7
|
---|
24 | # CBC decrypt 1.77/4.11(**) 6.42/7.47
|
---|
25 | #
|
---|
26 | # (*) numbers after slash are for
|
---|
27 | # misaligned data;
|
---|
28 | # (**) this is result for largest
|
---|
29 | # block size, unlike all other
|
---|
30 | # cases smaller blocks results
|
---|
31 | # are better[?];
|
---|
32 |
|
---|
33 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
34 | push(@INC,"${dir}","${dir}../../perlasm");
|
---|
35 | require "sparcv9_modes.pl";
|
---|
36 |
|
---|
37 | $output=pop and open STDOUT,">$output";
|
---|
38 |
|
---|
39 | $code.=<<___;
|
---|
40 | #ifndef __ASSEMBLER__
|
---|
41 | # define __ASSEMBLER__ 1
|
---|
42 | #endif
|
---|
43 | #include "crypto/sparc_arch.h"
|
---|
44 |
|
---|
45 | #ifdef __arch64__
|
---|
46 | .register %g2,#scratch
|
---|
47 | .register %g3,#scratch
|
---|
48 | #endif
|
---|
49 |
|
---|
50 | .text
|
---|
51 | ___
|
---|
52 |
|
---|
53 | { my ($inp,$out)=("%o0","%o1");
|
---|
54 |
|
---|
55 | $code.=<<___;
|
---|
56 | .align 32
|
---|
57 | .globl des_t4_key_expand
|
---|
58 | .type des_t4_key_expand,#function
|
---|
59 | des_t4_key_expand:
|
---|
60 | andcc $inp, 0x7, %g0
|
---|
61 | alignaddr $inp, %g0, $inp
|
---|
62 | bz,pt %icc, 1f
|
---|
63 | ldd [$inp + 0x00], %f0
|
---|
64 | ldd [$inp + 0x08], %f2
|
---|
65 | faligndata %f0, %f2, %f0
|
---|
66 | 1: des_kexpand %f0, 0, %f0
|
---|
67 | des_kexpand %f0, 1, %f2
|
---|
68 | std %f0, [$out + 0x00]
|
---|
69 | des_kexpand %f2, 3, %f6
|
---|
70 | std %f2, [$out + 0x08]
|
---|
71 | des_kexpand %f2, 2, %f4
|
---|
72 | des_kexpand %f6, 3, %f10
|
---|
73 | std %f6, [$out + 0x18]
|
---|
74 | des_kexpand %f6, 2, %f8
|
---|
75 | std %f4, [$out + 0x10]
|
---|
76 | des_kexpand %f10, 3, %f14
|
---|
77 | std %f10, [$out + 0x28]
|
---|
78 | des_kexpand %f10, 2, %f12
|
---|
79 | std %f8, [$out + 0x20]
|
---|
80 | des_kexpand %f14, 1, %f16
|
---|
81 | std %f14, [$out + 0x38]
|
---|
82 | des_kexpand %f16, 3, %f20
|
---|
83 | std %f12, [$out + 0x30]
|
---|
84 | des_kexpand %f16, 2, %f18
|
---|
85 | std %f16, [$out + 0x40]
|
---|
86 | des_kexpand %f20, 3, %f24
|
---|
87 | std %f20, [$out + 0x50]
|
---|
88 | des_kexpand %f20, 2, %f22
|
---|
89 | std %f18, [$out + 0x48]
|
---|
90 | des_kexpand %f24, 3, %f28
|
---|
91 | std %f24, [$out + 0x60]
|
---|
92 | des_kexpand %f24, 2, %f26
|
---|
93 | std %f22, [$out + 0x58]
|
---|
94 | des_kexpand %f28, 1, %f30
|
---|
95 | std %f28, [$out + 0x70]
|
---|
96 | std %f26, [$out + 0x68]
|
---|
97 | retl
|
---|
98 | std %f30, [$out + 0x78]
|
---|
99 | .size des_t4_key_expand,.-des_t4_key_expand
|
---|
100 | ___
|
---|
101 | }
|
---|
102 | { my ($inp,$out,$len,$key,$ivec) = map("%o$_",(0..4));
|
---|
103 | my ($ileft,$iright,$omask) = map("%g$_",(1..3));
|
---|
104 |
|
---|
105 | $code.=<<___;
|
---|
106 | .globl des_t4_cbc_encrypt
|
---|
107 | .align 32
|
---|
108 | des_t4_cbc_encrypt:
|
---|
109 | cmp $len, 0
|
---|
110 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
111 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
112 | ld [$ivec + 0], %f0 ! load ivec
|
---|
113 | ld [$ivec + 4], %f1
|
---|
114 |
|
---|
115 | and $inp, 7, $ileft
|
---|
116 | andn $inp, 7, $inp
|
---|
117 | sll $ileft, 3, $ileft
|
---|
118 | mov 0xff, $omask
|
---|
119 | prefetch [$inp], 20
|
---|
120 | prefetch [$inp + 63], 20
|
---|
121 | sub %g0, $ileft, $iright
|
---|
122 | and $out, 7, %g4
|
---|
123 | alignaddrl $out, %g0, $out
|
---|
124 | srl $omask, %g4, $omask
|
---|
125 | srlx $len, 3, $len
|
---|
126 | movrz %g4, 0, $omask
|
---|
127 | prefetch [$out], 22
|
---|
128 |
|
---|
129 | ldd [$key + 0x00], %f4 ! load key schedule
|
---|
130 | ldd [$key + 0x08], %f6
|
---|
131 | ldd [$key + 0x10], %f8
|
---|
132 | ldd [$key + 0x18], %f10
|
---|
133 | ldd [$key + 0x20], %f12
|
---|
134 | ldd [$key + 0x28], %f14
|
---|
135 | ldd [$key + 0x30], %f16
|
---|
136 | ldd [$key + 0x38], %f18
|
---|
137 | ldd [$key + 0x40], %f20
|
---|
138 | ldd [$key + 0x48], %f22
|
---|
139 | ldd [$key + 0x50], %f24
|
---|
140 | ldd [$key + 0x58], %f26
|
---|
141 | ldd [$key + 0x60], %f28
|
---|
142 | ldd [$key + 0x68], %f30
|
---|
143 | ldd [$key + 0x70], %f32
|
---|
144 | ldd [$key + 0x78], %f34
|
---|
145 |
|
---|
146 | .Ldes_cbc_enc_loop:
|
---|
147 | ldx [$inp + 0], %g4
|
---|
148 | brz,pt $ileft, 4f
|
---|
149 | nop
|
---|
150 |
|
---|
151 | ldx [$inp + 8], %g5
|
---|
152 | sllx %g4, $ileft, %g4
|
---|
153 | srlx %g5, $iright, %g5
|
---|
154 | or %g5, %g4, %g4
|
---|
155 | 4:
|
---|
156 | movxtod %g4, %f2
|
---|
157 | prefetch [$inp + 8+63], 20
|
---|
158 | add $inp, 8, $inp
|
---|
159 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
160 | prefetch [$out + 63], 22
|
---|
161 |
|
---|
162 | des_ip %f0, %f0
|
---|
163 | des_round %f4, %f6, %f0, %f0
|
---|
164 | des_round %f8, %f10, %f0, %f0
|
---|
165 | des_round %f12, %f14, %f0, %f0
|
---|
166 | des_round %f16, %f18, %f0, %f0
|
---|
167 | des_round %f20, %f22, %f0, %f0
|
---|
168 | des_round %f24, %f26, %f0, %f0
|
---|
169 | des_round %f28, %f30, %f0, %f0
|
---|
170 | des_round %f32, %f34, %f0, %f0
|
---|
171 | des_iip %f0, %f0
|
---|
172 |
|
---|
173 | brnz,pn $omask, 2f
|
---|
174 | sub $len, 1, $len
|
---|
175 |
|
---|
176 | std %f0, [$out + 0]
|
---|
177 | brnz,pt $len, .Ldes_cbc_enc_loop
|
---|
178 | add $out, 8, $out
|
---|
179 |
|
---|
180 | st %f0, [$ivec + 0] ! write out ivec
|
---|
181 | retl
|
---|
182 | st %f1, [$ivec + 4]
|
---|
183 | .Lcbc_abort:
|
---|
184 | retl
|
---|
185 | nop
|
---|
186 |
|
---|
187 | .align 16
|
---|
188 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
189 | ! and ~4x deterioration
|
---|
190 | ! in inp==out case
|
---|
191 | faligndata %f0, %f0, %f2 ! handle unaligned output
|
---|
192 |
|
---|
193 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
194 | add $out, 8, $out
|
---|
195 | orn %g0, $omask, $omask
|
---|
196 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
197 |
|
---|
198 | brnz,pt $len, .Ldes_cbc_enc_loop+4
|
---|
199 | orn %g0, $omask, $omask
|
---|
200 |
|
---|
201 | st %f0, [$ivec + 0] ! write out ivec
|
---|
202 | retl
|
---|
203 | st %f1, [$ivec + 4]
|
---|
204 | .type des_t4_cbc_encrypt,#function
|
---|
205 | .size des_t4_cbc_encrypt,.-des_t4_cbc_encrypt
|
---|
206 |
|
---|
207 | .globl des_t4_cbc_decrypt
|
---|
208 | .align 32
|
---|
209 | des_t4_cbc_decrypt:
|
---|
210 | cmp $len, 0
|
---|
211 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
212 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
213 | ld [$ivec + 0], %f2 ! load ivec
|
---|
214 | ld [$ivec + 4], %f3
|
---|
215 |
|
---|
216 | and $inp, 7, $ileft
|
---|
217 | andn $inp, 7, $inp
|
---|
218 | sll $ileft, 3, $ileft
|
---|
219 | mov 0xff, $omask
|
---|
220 | prefetch [$inp], 20
|
---|
221 | prefetch [$inp + 63], 20
|
---|
222 | sub %g0, $ileft, $iright
|
---|
223 | and $out, 7, %g4
|
---|
224 | alignaddrl $out, %g0, $out
|
---|
225 | srl $omask, %g4, $omask
|
---|
226 | srlx $len, 3, $len
|
---|
227 | movrz %g4, 0, $omask
|
---|
228 | prefetch [$out], 22
|
---|
229 |
|
---|
230 | ldd [$key + 0x78], %f4 ! load key schedule
|
---|
231 | ldd [$key + 0x70], %f6
|
---|
232 | ldd [$key + 0x68], %f8
|
---|
233 | ldd [$key + 0x60], %f10
|
---|
234 | ldd [$key + 0x58], %f12
|
---|
235 | ldd [$key + 0x50], %f14
|
---|
236 | ldd [$key + 0x48], %f16
|
---|
237 | ldd [$key + 0x40], %f18
|
---|
238 | ldd [$key + 0x38], %f20
|
---|
239 | ldd [$key + 0x30], %f22
|
---|
240 | ldd [$key + 0x28], %f24
|
---|
241 | ldd [$key + 0x20], %f26
|
---|
242 | ldd [$key + 0x18], %f28
|
---|
243 | ldd [$key + 0x10], %f30
|
---|
244 | ldd [$key + 0x08], %f32
|
---|
245 | ldd [$key + 0x00], %f34
|
---|
246 |
|
---|
247 | .Ldes_cbc_dec_loop:
|
---|
248 | ldx [$inp + 0], %g4
|
---|
249 | brz,pt $ileft, 4f
|
---|
250 | nop
|
---|
251 |
|
---|
252 | ldx [$inp + 8], %g5
|
---|
253 | sllx %g4, $ileft, %g4
|
---|
254 | srlx %g5, $iright, %g5
|
---|
255 | or %g5, %g4, %g4
|
---|
256 | 4:
|
---|
257 | movxtod %g4, %f0
|
---|
258 | prefetch [$inp + 8+63], 20
|
---|
259 | add $inp, 8, $inp
|
---|
260 | prefetch [$out + 63], 22
|
---|
261 |
|
---|
262 | des_ip %f0, %f0
|
---|
263 | des_round %f4, %f6, %f0, %f0
|
---|
264 | des_round %f8, %f10, %f0, %f0
|
---|
265 | des_round %f12, %f14, %f0, %f0
|
---|
266 | des_round %f16, %f18, %f0, %f0
|
---|
267 | des_round %f20, %f22, %f0, %f0
|
---|
268 | des_round %f24, %f26, %f0, %f0
|
---|
269 | des_round %f28, %f30, %f0, %f0
|
---|
270 | des_round %f32, %f34, %f0, %f0
|
---|
271 | des_iip %f0, %f0
|
---|
272 |
|
---|
273 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
274 | movxtod %g4, %f2
|
---|
275 |
|
---|
276 | brnz,pn $omask, 2f
|
---|
277 | sub $len, 1, $len
|
---|
278 |
|
---|
279 | std %f0, [$out + 0]
|
---|
280 | brnz,pt $len, .Ldes_cbc_dec_loop
|
---|
281 | add $out, 8, $out
|
---|
282 |
|
---|
283 | st %f2, [$ivec + 0] ! write out ivec
|
---|
284 | retl
|
---|
285 | st %f3, [$ivec + 4]
|
---|
286 |
|
---|
287 | .align 16
|
---|
288 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
289 | ! and ~4x deterioration
|
---|
290 | ! in inp==out case
|
---|
291 | faligndata %f0, %f0, %f0 ! handle unaligned output
|
---|
292 |
|
---|
293 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
294 | add $out, 8, $out
|
---|
295 | orn %g0, $omask, $omask
|
---|
296 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
297 |
|
---|
298 | brnz,pt $len, .Ldes_cbc_dec_loop+4
|
---|
299 | orn %g0, $omask, $omask
|
---|
300 |
|
---|
301 | st %f2, [$ivec + 0] ! write out ivec
|
---|
302 | retl
|
---|
303 | st %f3, [$ivec + 4]
|
---|
304 | .type des_t4_cbc_decrypt,#function
|
---|
305 | .size des_t4_cbc_decrypt,.-des_t4_cbc_decrypt
|
---|
306 | ___
|
---|
307 |
|
---|
308 | # One might wonder why does one have back-to-back des_iip/des_ip
|
---|
309 | # pairs between EDE passes. Indeed, aren't they inverse of each other?
|
---|
310 | # They almost are. Outcome of the pair is 32-bit words being swapped
|
---|
311 | # in target register. Consider pair of des_iip/des_ip as a way to
|
---|
312 | # perform the due swap, it's actually fastest way in this case.
|
---|
313 |
|
---|
314 | $code.=<<___;
|
---|
315 | .globl des_t4_ede3_cbc_encrypt
|
---|
316 | .align 32
|
---|
317 | des_t4_ede3_cbc_encrypt:
|
---|
318 | cmp $len, 0
|
---|
319 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
320 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
321 | ld [$ivec + 0], %f0 ! load ivec
|
---|
322 | ld [$ivec + 4], %f1
|
---|
323 |
|
---|
324 | and $inp, 7, $ileft
|
---|
325 | andn $inp, 7, $inp
|
---|
326 | sll $ileft, 3, $ileft
|
---|
327 | mov 0xff, $omask
|
---|
328 | prefetch [$inp], 20
|
---|
329 | prefetch [$inp + 63], 20
|
---|
330 | sub %g0, $ileft, $iright
|
---|
331 | and $out, 7, %g4
|
---|
332 | alignaddrl $out, %g0, $out
|
---|
333 | srl $omask, %g4, $omask
|
---|
334 | srlx $len, 3, $len
|
---|
335 | movrz %g4, 0, $omask
|
---|
336 | prefetch [$out], 22
|
---|
337 |
|
---|
338 | ldd [$key + 0x00], %f4 ! load key schedule
|
---|
339 | ldd [$key + 0x08], %f6
|
---|
340 | ldd [$key + 0x10], %f8
|
---|
341 | ldd [$key + 0x18], %f10
|
---|
342 | ldd [$key + 0x20], %f12
|
---|
343 | ldd [$key + 0x28], %f14
|
---|
344 | ldd [$key + 0x30], %f16
|
---|
345 | ldd [$key + 0x38], %f18
|
---|
346 | ldd [$key + 0x40], %f20
|
---|
347 | ldd [$key + 0x48], %f22
|
---|
348 | ldd [$key + 0x50], %f24
|
---|
349 | ldd [$key + 0x58], %f26
|
---|
350 | ldd [$key + 0x60], %f28
|
---|
351 | ldd [$key + 0x68], %f30
|
---|
352 | ldd [$key + 0x70], %f32
|
---|
353 | ldd [$key + 0x78], %f34
|
---|
354 |
|
---|
355 | .Ldes_ede3_cbc_enc_loop:
|
---|
356 | ldx [$inp + 0], %g4
|
---|
357 | brz,pt $ileft, 4f
|
---|
358 | nop
|
---|
359 |
|
---|
360 | ldx [$inp + 8], %g5
|
---|
361 | sllx %g4, $ileft, %g4
|
---|
362 | srlx %g5, $iright, %g5
|
---|
363 | or %g5, %g4, %g4
|
---|
364 | 4:
|
---|
365 | movxtod %g4, %f2
|
---|
366 | prefetch [$inp + 8+63], 20
|
---|
367 | add $inp, 8, $inp
|
---|
368 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
369 | prefetch [$out + 63], 22
|
---|
370 |
|
---|
371 | des_ip %f0, %f0
|
---|
372 | des_round %f4, %f6, %f0, %f0
|
---|
373 | des_round %f8, %f10, %f0, %f0
|
---|
374 | des_round %f12, %f14, %f0, %f0
|
---|
375 | des_round %f16, %f18, %f0, %f0
|
---|
376 | ldd [$key + 0x100-0x08], %f36
|
---|
377 | ldd [$key + 0x100-0x10], %f38
|
---|
378 | des_round %f20, %f22, %f0, %f0
|
---|
379 | ldd [$key + 0x100-0x18], %f40
|
---|
380 | ldd [$key + 0x100-0x20], %f42
|
---|
381 | des_round %f24, %f26, %f0, %f0
|
---|
382 | ldd [$key + 0x100-0x28], %f44
|
---|
383 | ldd [$key + 0x100-0x30], %f46
|
---|
384 | des_round %f28, %f30, %f0, %f0
|
---|
385 | ldd [$key + 0x100-0x38], %f48
|
---|
386 | ldd [$key + 0x100-0x40], %f50
|
---|
387 | des_round %f32, %f34, %f0, %f0
|
---|
388 | ldd [$key + 0x100-0x48], %f52
|
---|
389 | ldd [$key + 0x100-0x50], %f54
|
---|
390 | des_iip %f0, %f0
|
---|
391 |
|
---|
392 | ldd [$key + 0x100-0x58], %f56
|
---|
393 | ldd [$key + 0x100-0x60], %f58
|
---|
394 | des_ip %f0, %f0
|
---|
395 | ldd [$key + 0x100-0x68], %f60
|
---|
396 | ldd [$key + 0x100-0x70], %f62
|
---|
397 | des_round %f36, %f38, %f0, %f0
|
---|
398 | ldd [$key + 0x100-0x78], %f36
|
---|
399 | ldd [$key + 0x100-0x80], %f38
|
---|
400 | des_round %f40, %f42, %f0, %f0
|
---|
401 | des_round %f44, %f46, %f0, %f0
|
---|
402 | des_round %f48, %f50, %f0, %f0
|
---|
403 | ldd [$key + 0x100+0x00], %f40
|
---|
404 | ldd [$key + 0x100+0x08], %f42
|
---|
405 | des_round %f52, %f54, %f0, %f0
|
---|
406 | ldd [$key + 0x100+0x10], %f44
|
---|
407 | ldd [$key + 0x100+0x18], %f46
|
---|
408 | des_round %f56, %f58, %f0, %f0
|
---|
409 | ldd [$key + 0x100+0x20], %f48
|
---|
410 | ldd [$key + 0x100+0x28], %f50
|
---|
411 | des_round %f60, %f62, %f0, %f0
|
---|
412 | ldd [$key + 0x100+0x30], %f52
|
---|
413 | ldd [$key + 0x100+0x38], %f54
|
---|
414 | des_round %f36, %f38, %f0, %f0
|
---|
415 | ldd [$key + 0x100+0x40], %f56
|
---|
416 | ldd [$key + 0x100+0x48], %f58
|
---|
417 | des_iip %f0, %f0
|
---|
418 |
|
---|
419 | ldd [$key + 0x100+0x50], %f60
|
---|
420 | ldd [$key + 0x100+0x58], %f62
|
---|
421 | des_ip %f0, %f0
|
---|
422 | ldd [$key + 0x100+0x60], %f36
|
---|
423 | ldd [$key + 0x100+0x68], %f38
|
---|
424 | des_round %f40, %f42, %f0, %f0
|
---|
425 | ldd [$key + 0x100+0x70], %f40
|
---|
426 | ldd [$key + 0x100+0x78], %f42
|
---|
427 | des_round %f44, %f46, %f0, %f0
|
---|
428 | des_round %f48, %f50, %f0, %f0
|
---|
429 | des_round %f52, %f54, %f0, %f0
|
---|
430 | des_round %f56, %f58, %f0, %f0
|
---|
431 | des_round %f60, %f62, %f0, %f0
|
---|
432 | des_round %f36, %f38, %f0, %f0
|
---|
433 | des_round %f40, %f42, %f0, %f0
|
---|
434 | des_iip %f0, %f0
|
---|
435 |
|
---|
436 | brnz,pn $omask, 2f
|
---|
437 | sub $len, 1, $len
|
---|
438 |
|
---|
439 | std %f0, [$out + 0]
|
---|
440 | brnz,pt $len, .Ldes_ede3_cbc_enc_loop
|
---|
441 | add $out, 8, $out
|
---|
442 |
|
---|
443 | st %f0, [$ivec + 0] ! write out ivec
|
---|
444 | retl
|
---|
445 | st %f1, [$ivec + 4]
|
---|
446 |
|
---|
447 | .align 16
|
---|
448 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
449 | ! and ~2x deterioration
|
---|
450 | ! in inp==out case
|
---|
451 | faligndata %f0, %f0, %f2 ! handle unaligned output
|
---|
452 |
|
---|
453 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
454 | add $out, 8, $out
|
---|
455 | orn %g0, $omask, $omask
|
---|
456 | stda %f2, [$out + $omask]0xc0 ! partial store
|
---|
457 |
|
---|
458 | brnz,pt $len, .Ldes_ede3_cbc_enc_loop+4
|
---|
459 | orn %g0, $omask, $omask
|
---|
460 |
|
---|
461 | st %f0, [$ivec + 0] ! write out ivec
|
---|
462 | retl
|
---|
463 | st %f1, [$ivec + 4]
|
---|
464 | .type des_t4_ede3_cbc_encrypt,#function
|
---|
465 | .size des_t4_ede3_cbc_encrypt,.-des_t4_ede3_cbc_encrypt
|
---|
466 |
|
---|
467 | .globl des_t4_ede3_cbc_decrypt
|
---|
468 | .align 32
|
---|
469 | des_t4_ede3_cbc_decrypt:
|
---|
470 | cmp $len, 0
|
---|
471 | be,pn $::size_t_cc, .Lcbc_abort
|
---|
472 | srln $len, 0, $len ! needed on v8+, "nop" on v9
|
---|
473 | ld [$ivec + 0], %f2 ! load ivec
|
---|
474 | ld [$ivec + 4], %f3
|
---|
475 |
|
---|
476 | and $inp, 7, $ileft
|
---|
477 | andn $inp, 7, $inp
|
---|
478 | sll $ileft, 3, $ileft
|
---|
479 | mov 0xff, $omask
|
---|
480 | prefetch [$inp], 20
|
---|
481 | prefetch [$inp + 63], 20
|
---|
482 | sub %g0, $ileft, $iright
|
---|
483 | and $out, 7, %g4
|
---|
484 | alignaddrl $out, %g0, $out
|
---|
485 | srl $omask, %g4, $omask
|
---|
486 | srlx $len, 3, $len
|
---|
487 | movrz %g4, 0, $omask
|
---|
488 | prefetch [$out], 22
|
---|
489 |
|
---|
490 | ldd [$key + 0x100+0x78], %f4 ! load key schedule
|
---|
491 | ldd [$key + 0x100+0x70], %f6
|
---|
492 | ldd [$key + 0x100+0x68], %f8
|
---|
493 | ldd [$key + 0x100+0x60], %f10
|
---|
494 | ldd [$key + 0x100+0x58], %f12
|
---|
495 | ldd [$key + 0x100+0x50], %f14
|
---|
496 | ldd [$key + 0x100+0x48], %f16
|
---|
497 | ldd [$key + 0x100+0x40], %f18
|
---|
498 | ldd [$key + 0x100+0x38], %f20
|
---|
499 | ldd [$key + 0x100+0x30], %f22
|
---|
500 | ldd [$key + 0x100+0x28], %f24
|
---|
501 | ldd [$key + 0x100+0x20], %f26
|
---|
502 | ldd [$key + 0x100+0x18], %f28
|
---|
503 | ldd [$key + 0x100+0x10], %f30
|
---|
504 | ldd [$key + 0x100+0x08], %f32
|
---|
505 | ldd [$key + 0x100+0x00], %f34
|
---|
506 |
|
---|
507 | .Ldes_ede3_cbc_dec_loop:
|
---|
508 | ldx [$inp + 0], %g4
|
---|
509 | brz,pt $ileft, 4f
|
---|
510 | nop
|
---|
511 |
|
---|
512 | ldx [$inp + 8], %g5
|
---|
513 | sllx %g4, $ileft, %g4
|
---|
514 | srlx %g5, $iright, %g5
|
---|
515 | or %g5, %g4, %g4
|
---|
516 | 4:
|
---|
517 | movxtod %g4, %f0
|
---|
518 | prefetch [$inp + 8+63], 20
|
---|
519 | add $inp, 8, $inp
|
---|
520 | prefetch [$out + 63], 22
|
---|
521 |
|
---|
522 | des_ip %f0, %f0
|
---|
523 | des_round %f4, %f6, %f0, %f0
|
---|
524 | des_round %f8, %f10, %f0, %f0
|
---|
525 | des_round %f12, %f14, %f0, %f0
|
---|
526 | des_round %f16, %f18, %f0, %f0
|
---|
527 | ldd [$key + 0x80+0x00], %f36
|
---|
528 | ldd [$key + 0x80+0x08], %f38
|
---|
529 | des_round %f20, %f22, %f0, %f0
|
---|
530 | ldd [$key + 0x80+0x10], %f40
|
---|
531 | ldd [$key + 0x80+0x18], %f42
|
---|
532 | des_round %f24, %f26, %f0, %f0
|
---|
533 | ldd [$key + 0x80+0x20], %f44
|
---|
534 | ldd [$key + 0x80+0x28], %f46
|
---|
535 | des_round %f28, %f30, %f0, %f0
|
---|
536 | ldd [$key + 0x80+0x30], %f48
|
---|
537 | ldd [$key + 0x80+0x38], %f50
|
---|
538 | des_round %f32, %f34, %f0, %f0
|
---|
539 | ldd [$key + 0x80+0x40], %f52
|
---|
540 | ldd [$key + 0x80+0x48], %f54
|
---|
541 | des_iip %f0, %f0
|
---|
542 |
|
---|
543 | ldd [$key + 0x80+0x50], %f56
|
---|
544 | ldd [$key + 0x80+0x58], %f58
|
---|
545 | des_ip %f0, %f0
|
---|
546 | ldd [$key + 0x80+0x60], %f60
|
---|
547 | ldd [$key + 0x80+0x68], %f62
|
---|
548 | des_round %f36, %f38, %f0, %f0
|
---|
549 | ldd [$key + 0x80+0x70], %f36
|
---|
550 | ldd [$key + 0x80+0x78], %f38
|
---|
551 | des_round %f40, %f42, %f0, %f0
|
---|
552 | des_round %f44, %f46, %f0, %f0
|
---|
553 | des_round %f48, %f50, %f0, %f0
|
---|
554 | ldd [$key + 0x80-0x08], %f40
|
---|
555 | ldd [$key + 0x80-0x10], %f42
|
---|
556 | des_round %f52, %f54, %f0, %f0
|
---|
557 | ldd [$key + 0x80-0x18], %f44
|
---|
558 | ldd [$key + 0x80-0x20], %f46
|
---|
559 | des_round %f56, %f58, %f0, %f0
|
---|
560 | ldd [$key + 0x80-0x28], %f48
|
---|
561 | ldd [$key + 0x80-0x30], %f50
|
---|
562 | des_round %f60, %f62, %f0, %f0
|
---|
563 | ldd [$key + 0x80-0x38], %f52
|
---|
564 | ldd [$key + 0x80-0x40], %f54
|
---|
565 | des_round %f36, %f38, %f0, %f0
|
---|
566 | ldd [$key + 0x80-0x48], %f56
|
---|
567 | ldd [$key + 0x80-0x50], %f58
|
---|
568 | des_iip %f0, %f0
|
---|
569 |
|
---|
570 | ldd [$key + 0x80-0x58], %f60
|
---|
571 | ldd [$key + 0x80-0x60], %f62
|
---|
572 | des_ip %f0, %f0
|
---|
573 | ldd [$key + 0x80-0x68], %f36
|
---|
574 | ldd [$key + 0x80-0x70], %f38
|
---|
575 | des_round %f40, %f42, %f0, %f0
|
---|
576 | ldd [$key + 0x80-0x78], %f40
|
---|
577 | ldd [$key + 0x80-0x80], %f42
|
---|
578 | des_round %f44, %f46, %f0, %f0
|
---|
579 | des_round %f48, %f50, %f0, %f0
|
---|
580 | des_round %f52, %f54, %f0, %f0
|
---|
581 | des_round %f56, %f58, %f0, %f0
|
---|
582 | des_round %f60, %f62, %f0, %f0
|
---|
583 | des_round %f36, %f38, %f0, %f0
|
---|
584 | des_round %f40, %f42, %f0, %f0
|
---|
585 | des_iip %f0, %f0
|
---|
586 |
|
---|
587 | fxor %f2, %f0, %f0 ! ^= ivec
|
---|
588 | movxtod %g4, %f2
|
---|
589 |
|
---|
590 | brnz,pn $omask, 2f
|
---|
591 | sub $len, 1, $len
|
---|
592 |
|
---|
593 | std %f0, [$out + 0]
|
---|
594 | brnz,pt $len, .Ldes_ede3_cbc_dec_loop
|
---|
595 | add $out, 8, $out
|
---|
596 |
|
---|
597 | st %f2, [$ivec + 0] ! write out ivec
|
---|
598 | retl
|
---|
599 | st %f3, [$ivec + 4]
|
---|
600 |
|
---|
601 | .align 16
|
---|
602 | 2: ldxa [$inp]0x82, %g4 ! avoid read-after-write hazard
|
---|
603 | ! and ~3x deterioration
|
---|
604 | ! in inp==out case
|
---|
605 | faligndata %f0, %f0, %f0 ! handle unaligned output
|
---|
606 |
|
---|
607 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
608 | add $out, 8, $out
|
---|
609 | orn %g0, $omask, $omask
|
---|
610 | stda %f0, [$out + $omask]0xc0 ! partial store
|
---|
611 |
|
---|
612 | brnz,pt $len, .Ldes_ede3_cbc_dec_loop+4
|
---|
613 | orn %g0, $omask, $omask
|
---|
614 |
|
---|
615 | st %f2, [$ivec + 0] ! write out ivec
|
---|
616 | retl
|
---|
617 | st %f3, [$ivec + 4]
|
---|
618 | .type des_t4_ede3_cbc_decrypt,#function
|
---|
619 | .size des_t4_ede3_cbc_decrypt,.-des_t4_ede3_cbc_decrypt
|
---|
620 | ___
|
---|
621 | }
|
---|
622 | $code.=<<___;
|
---|
623 | .asciz "DES for SPARC T4, David S. Miller, Andy Polyakov"
|
---|
624 | .align 4
|
---|
625 | ___
|
---|
626 |
|
---|
627 | &emit_assembler();
|
---|
628 |
|
---|
629 | close STDOUT or die "error closing STDOUT: $!";
|
---|