1 | ! Copyright 2000-2019 The OpenSSL Project Authors. All Rights Reserved.
|
---|
2 | !
|
---|
3 | ! Licensed under the OpenSSL license (the "License"). You may not use
|
---|
4 | ! this file except in compliance with the License. You can obtain a copy
|
---|
5 | ! in the file LICENSE in the source distribution or at
|
---|
6 | ! https://www.openssl.org/source/license.html
|
---|
7 | !
|
---|
8 | ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
|
---|
9 | !
|
---|
10 | ! Global registers 1 to 5 are used. This is the same as done by the
|
---|
11 | ! cc compiler. The UltraSPARC load/store little endian feature is used.
|
---|
12 | !
|
---|
13 | ! Instruction grouping often refers to one CPU cycle.
|
---|
14 | !
|
---|
15 | ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
|
---|
16 | !
|
---|
17 | ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
|
---|
18 | !
|
---|
19 | ! Performance improvement according to './apps/openssl speed des'
|
---|
20 | !
|
---|
21 | ! 32-bit build:
|
---|
22 | ! 23% faster than cc-5.2 -xarch=v8plus -xO5
|
---|
23 | ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
|
---|
24 | ! 64-bit build:
|
---|
25 | ! 50% faster than cc-5.2 -xarch=v9 -xO5
|
---|
26 | ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
|
---|
27 | !
|
---|
28 |
|
---|
29 | .ident "des_enc.m4 2.1"
|
---|
30 | .file "des_enc-sparc.S"
|
---|
31 |
|
---|
32 | #include <openssl/opensslconf.h>
|
---|
33 |
|
---|
34 | #if defined(__SUNPRO_C) && defined(__sparcv9)
|
---|
35 | # define ABI64 /* They've said -xarch=v9 at command line */
|
---|
36 | #elif defined(__GNUC__) && defined(__arch64__)
|
---|
37 | # define ABI64 /* They've said -m64 at command line */
|
---|
38 | #endif
|
---|
39 |
|
---|
40 | #ifdef ABI64
|
---|
41 | .register %g2,#scratch
|
---|
42 | .register %g3,#scratch
|
---|
43 | # define FRAME -192
|
---|
44 | # define BIAS 2047
|
---|
45 | # define LDPTR ldx
|
---|
46 | # define STPTR stx
|
---|
47 | # define ARG0 128
|
---|
48 | # define ARGSZ 8
|
---|
49 | #else
|
---|
50 | # define FRAME -96
|
---|
51 | # define BIAS 0
|
---|
52 | # define LDPTR ld
|
---|
53 | # define STPTR st
|
---|
54 | # define ARG0 68
|
---|
55 | # define ARGSZ 4
|
---|
56 | #endif
|
---|
57 |
|
---|
58 | #define LOOPS 7
|
---|
59 |
|
---|
60 | #define global0 %g0
|
---|
61 | #define global1 %g1
|
---|
62 | #define global2 %g2
|
---|
63 | #define global3 %g3
|
---|
64 | #define global4 %g4
|
---|
65 | #define global5 %g5
|
---|
66 |
|
---|
67 | #define local0 %l0
|
---|
68 | #define local1 %l1
|
---|
69 | #define local2 %l2
|
---|
70 | #define local3 %l3
|
---|
71 | #define local4 %l4
|
---|
72 | #define local5 %l5
|
---|
73 | #define local7 %l6
|
---|
74 | #define local6 %l7
|
---|
75 |
|
---|
76 | #define in0 %i0
|
---|
77 | #define in1 %i1
|
---|
78 | #define in2 %i2
|
---|
79 | #define in3 %i3
|
---|
80 | #define in4 %i4
|
---|
81 | #define in5 %i5
|
---|
82 | #define in6 %i6
|
---|
83 | #define in7 %i7
|
---|
84 |
|
---|
85 | #define out0 %o0
|
---|
86 | #define out1 %o1
|
---|
87 | #define out2 %o2
|
---|
88 | #define out3 %o3
|
---|
89 | #define out4 %o4
|
---|
90 | #define out5 %o5
|
---|
91 | #define out6 %o6
|
---|
92 | #define out7 %o7
|
---|
93 |
|
---|
94 | #define stub stb
|
---|
95 |
|
---|
96 | changequote({,})
|
---|
97 |
|
---|
98 |
|
---|
99 | ! Macro definitions:
|
---|
100 |
|
---|
101 |
|
---|
102 | ! {ip_macro}
|
---|
103 | !
|
---|
104 | ! The logic used in initial and final permutations is the same as in
|
---|
105 | ! the C code. The permutations are done with a clever shift, xor, and
|
---|
106 | ! technique.
|
---|
107 | !
|
---|
108 | ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
|
---|
109 | ! sbox 6 to local6, and address sbox 8 to out3.
|
---|
110 | !
|
---|
111 | ! Rotates the halves 3 left to bring the sbox bits in convenient positions.
|
---|
112 | !
|
---|
113 | ! Loads key first round from address in parameter 5 to out0, out1.
|
---|
114 | !
|
---|
115 | ! After the original LibDES initial permutation, the resulting left
|
---|
116 | ! is in the variable initially used for right and vice versa. The macro
|
---|
117 | ! implements the possibility to keep the halves in the original registers.
|
---|
118 | !
|
---|
119 | ! parameter 1 left
|
---|
120 | ! parameter 2 right
|
---|
121 | ! parameter 3 result left (modify in first round)
|
---|
122 | ! parameter 4 result right (use in first round)
|
---|
123 | ! parameter 5 key address
|
---|
124 | ! parameter 6 1/2 for include encryption/decryption
|
---|
125 | ! parameter 7 1 for move in1 to in3
|
---|
126 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
127 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
128 |
|
---|
129 | define(ip_macro, {
|
---|
130 |
|
---|
131 | ! {ip_macro}
|
---|
132 | ! $1 $2 $4 $3 $5 $6 $7 $8 $9
|
---|
133 |
|
---|
134 | ld [out2+256], local1
|
---|
135 | srl $2, 4, local4
|
---|
136 |
|
---|
137 | xor local4, $1, local4
|
---|
138 | ifelse($7,1,{mov in1, in3},{nop})
|
---|
139 |
|
---|
140 | ld [out2+260], local2
|
---|
141 | and local4, local1, local4
|
---|
142 | ifelse($8,1,{mov in3, in4},{})
|
---|
143 | ifelse($8,2,{mov in4, in3},{})
|
---|
144 |
|
---|
145 | ld [out2+280], out4 ! loop counter
|
---|
146 | sll local4, 4, local1
|
---|
147 | xor $1, local4, $1
|
---|
148 |
|
---|
149 | ld [out2+264], local3
|
---|
150 | srl $1, 16, local4
|
---|
151 | xor $2, local1, $2
|
---|
152 |
|
---|
153 | ifelse($9,1,{LDPTR KS3, in4},{})
|
---|
154 | xor local4, $2, local4
|
---|
155 | nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
|
---|
156 |
|
---|
157 | ifelse($9,1,{LDPTR KS2, in3},{})
|
---|
158 | and local4, local2, local4
|
---|
159 | nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
|
---|
160 |
|
---|
161 | sll local4, 16, local1
|
---|
162 | xor $2, local4, $2
|
---|
163 |
|
---|
164 | srl $2, 2, local4
|
---|
165 | xor $1, local1, $1
|
---|
166 |
|
---|
167 | sethi %hi(16711680), local5
|
---|
168 | xor local4, $1, local4
|
---|
169 |
|
---|
170 | and local4, local3, local4
|
---|
171 | or local5, 255, local5
|
---|
172 |
|
---|
173 | sll local4, 2, local2
|
---|
174 | xor $1, local4, $1
|
---|
175 |
|
---|
176 | srl $1, 8, local4
|
---|
177 | xor $2, local2, $2
|
---|
178 |
|
---|
179 | xor local4, $2, local4
|
---|
180 | add global1, 768, global4
|
---|
181 |
|
---|
182 | and local4, local5, local4
|
---|
183 | add global1, 1024, global5
|
---|
184 |
|
---|
185 | ld [out2+272], local7
|
---|
186 | sll local4, 8, local1
|
---|
187 | xor $2, local4, $2
|
---|
188 |
|
---|
189 | srl $2, 1, local4
|
---|
190 | xor $1, local1, $1
|
---|
191 |
|
---|
192 | ld [$5], out0 ! key 7531
|
---|
193 | xor local4, $1, local4
|
---|
194 | add global1, 256, global2
|
---|
195 |
|
---|
196 | ld [$5+4], out1 ! key 8642
|
---|
197 | and local4, local7, local4
|
---|
198 | add global1, 512, global3
|
---|
199 |
|
---|
200 | sll local4, 1, local1
|
---|
201 | xor $1, local4, $1
|
---|
202 |
|
---|
203 | sll $1, 3, local3
|
---|
204 | xor $2, local1, $2
|
---|
205 |
|
---|
206 | sll $2, 3, local2
|
---|
207 | add global1, 1280, local6 ! address sbox 8
|
---|
208 |
|
---|
209 | srl $1, 29, local4
|
---|
210 | add global1, 1792, out3 ! address sbox 8
|
---|
211 |
|
---|
212 | srl $2, 29, local1
|
---|
213 | or local4, local3, $4
|
---|
214 |
|
---|
215 | or local2, local1, $3
|
---|
216 |
|
---|
217 | ifelse($6, 1, {
|
---|
218 |
|
---|
219 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds
|
---|
220 | or local2, local1, $3
|
---|
221 | xor $4, out0, local1
|
---|
222 |
|
---|
223 | call .des_enc.1
|
---|
224 | and local1, 252, local1
|
---|
225 |
|
---|
226 | },{})
|
---|
227 |
|
---|
228 | ifelse($6, 2, {
|
---|
229 |
|
---|
230 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds
|
---|
231 | or local2, local1, $3
|
---|
232 | xor $4, out0, local1
|
---|
233 |
|
---|
234 | call .des_dec.1
|
---|
235 | and local1, 252, local1
|
---|
236 |
|
---|
237 | },{})
|
---|
238 | })
|
---|
239 |
|
---|
240 |
|
---|
241 | ! {rounds_macro}
|
---|
242 | !
|
---|
243 | ! The logic used in the DES rounds is the same as in the C code,
|
---|
244 | ! except that calculations for sbox 1 and sbox 5 begin before
|
---|
245 | ! the previous round is finished.
|
---|
246 | !
|
---|
247 | ! In each round one half (work) is modified based on key and the
|
---|
248 | ! other half (use).
|
---|
249 | !
|
---|
250 | ! In this version we do two rounds in a loop repeated 7 times
|
---|
251 | ! and two rounds separately.
|
---|
252 | !
|
---|
253 | ! One half has the bits for the sboxes in the following positions:
|
---|
254 | !
|
---|
255 | ! 777777xx555555xx333333xx111111xx
|
---|
256 | !
|
---|
257 | ! 88xx666666xx444444xx222222xx8888
|
---|
258 | !
|
---|
259 | ! The bits for each sbox are xor-ed with the key bits for that box.
|
---|
260 | ! The above xx bits are cleared, and the result used for lookup in
|
---|
261 | ! the sbox table. Each sbox entry contains the 4 output bits permuted
|
---|
262 | ! into 32 bits according to the P permutation.
|
---|
263 | !
|
---|
264 | ! In the description of DES, left and right are switched after
|
---|
265 | ! each round, except after last round. In this code the original
|
---|
266 | ! left and right are kept in the same register in all rounds, meaning
|
---|
267 | ! that after the 16 rounds the result for right is in the register
|
---|
268 | ! originally used for left.
|
---|
269 | !
|
---|
270 | ! parameter 1 first work (left in first round)
|
---|
271 | ! parameter 2 first use (right in first round)
|
---|
272 | ! parameter 3 enc/dec 1/-1
|
---|
273 | ! parameter 4 loop label
|
---|
274 | ! parameter 5 key address register
|
---|
275 | ! parameter 6 optional address for key next encryption/decryption
|
---|
276 | ! parameter 7 not empty for include retl
|
---|
277 | !
|
---|
278 | ! also compares in2 to 8
|
---|
279 |
|
---|
280 | define(rounds_macro, {
|
---|
281 |
|
---|
282 | ! {rounds_macro}
|
---|
283 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
284 |
|
---|
285 | xor $2, out0, local1
|
---|
286 |
|
---|
287 | ld [out2+284], local5 ! 0x0000FC00
|
---|
288 | ba $4
|
---|
289 | and local1, 252, local1
|
---|
290 |
|
---|
291 | .align 32
|
---|
292 |
|
---|
293 | $4:
|
---|
294 | ! local6 is address sbox 6
|
---|
295 | ! out3 is address sbox 8
|
---|
296 | ! out4 is loop counter
|
---|
297 |
|
---|
298 | ld [global1+local1], local1
|
---|
299 | xor $2, out1, out1 ! 8642
|
---|
300 | xor $2, out0, out0 ! 7531
|
---|
301 | ! fmovs %f0, %f0 ! fxor used for alignment
|
---|
302 |
|
---|
303 | srl out1, 4, local0 ! rotate 4 right
|
---|
304 | and out0, local5, local3 ! 3
|
---|
305 | ! fmovs %f0, %f0
|
---|
306 |
|
---|
307 | ld [$5+$3*8], local7 ! key 7531 next round
|
---|
308 | srl local3, 8, local3 ! 3
|
---|
309 | and local0, 252, local2 ! 2
|
---|
310 | ! fmovs %f0, %f0
|
---|
311 |
|
---|
312 | ld [global3+local3],local3 ! 3
|
---|
313 | sll out1, 28, out1 ! rotate
|
---|
314 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7
|
---|
315 |
|
---|
316 | ld [global2+local2], local2 ! 2
|
---|
317 | srl out0, 24, local1 ! 7
|
---|
318 | or out1, local0, out1 ! rotate
|
---|
319 |
|
---|
320 | ldub [out2+local1], local1 ! 7 (and 0xFC)
|
---|
321 | srl out1, 24, local0 ! 8
|
---|
322 | and out1, local5, local4 ! 4
|
---|
323 |
|
---|
324 | ldub [out2+local0], local0 ! 8 (and 0xFC)
|
---|
325 | srl local4, 8, local4 ! 4
|
---|
326 | xor $1, local2, $1 ! 2 finished local2 now sbox 6
|
---|
327 |
|
---|
328 | ld [global4+local4],local4 ! 4
|
---|
329 | srl out1, 16, local2 ! 6
|
---|
330 | xor $1, local3, $1 ! 3 finished local3 now sbox 5
|
---|
331 |
|
---|
332 | ld [out3+local0],local0 ! 8
|
---|
333 | and local2, 252, local2 ! 6
|
---|
334 | add global1, 1536, local5 ! address sbox 7
|
---|
335 |
|
---|
336 | ld [local6+local2], local2 ! 6
|
---|
337 | srl out0, 16, local3 ! 5
|
---|
338 | xor $1, local4, $1 ! 4 finished
|
---|
339 |
|
---|
340 | ld [local5+local1],local1 ! 7
|
---|
341 | and local3, 252, local3 ! 5
|
---|
342 | xor $1, local0, $1 ! 8 finished
|
---|
343 |
|
---|
344 | ld [global5+local3],local3 ! 5
|
---|
345 | xor $1, local2, $1 ! 6 finished
|
---|
346 | subcc out4, 1, out4
|
---|
347 |
|
---|
348 | ld [$5+$3*8+4], out0 ! key 8642 next round
|
---|
349 | xor $1, local7, local2 ! sbox 5 next round
|
---|
350 | xor $1, local1, $1 ! 7 finished
|
---|
351 |
|
---|
352 | srl local2, 16, local2 ! sbox 5 next round
|
---|
353 | xor $1, local3, $1 ! 5 finished
|
---|
354 |
|
---|
355 | ld [$5+$3*16+4], out1 ! key 8642 next round again
|
---|
356 | and local2, 252, local2 ! sbox5 next round
|
---|
357 | ! next round
|
---|
358 | xor $1, local7, local7 ! 7531
|
---|
359 |
|
---|
360 | ld [global5+local2], local2 ! 5
|
---|
361 | srl local7, 24, local3 ! 7
|
---|
362 | xor $1, out0, out0 ! 8642
|
---|
363 |
|
---|
364 | ldub [out2+local3], local3 ! 7 (and 0xFC)
|
---|
365 | srl out0, 4, local0 ! rotate 4 right
|
---|
366 | and local7, 252, local1 ! 1
|
---|
367 |
|
---|
368 | sll out0, 28, out0 ! rotate
|
---|
369 | xor $2, local2, $2 ! 5 finished local2 used
|
---|
370 |
|
---|
371 | srl local0, 8, local4 ! 4
|
---|
372 | and local0, 252, local2 ! 2
|
---|
373 | ld [local5+local3], local3 ! 7
|
---|
374 |
|
---|
375 | srl local0, 16, local5 ! 6
|
---|
376 | or out0, local0, out0 ! rotate
|
---|
377 | ld [global2+local2], local2 ! 2
|
---|
378 |
|
---|
379 | srl out0, 24, local0
|
---|
380 | ld [$5+$3*16], out0 ! key 7531 next round
|
---|
381 | and local4, 252, local4 ! 4
|
---|
382 |
|
---|
383 | and local5, 252, local5 ! 6
|
---|
384 | ld [global4+local4], local4 ! 4
|
---|
385 | xor $2, local3, $2 ! 7 finished local3 used
|
---|
386 |
|
---|
387 | and local0, 252, local0 ! 8
|
---|
388 | ld [local6+local5], local5 ! 6
|
---|
389 | xor $2, local2, $2 ! 2 finished local2 now sbox 3
|
---|
390 |
|
---|
391 | srl local7, 8, local2 ! 3 start
|
---|
392 | ld [out3+local0], local0 ! 8
|
---|
393 | xor $2, local4, $2 ! 4 finished
|
---|
394 |
|
---|
395 | and local2, 252, local2 ! 3
|
---|
396 | ld [global1+local1], local1 ! 1
|
---|
397 | xor $2, local5, $2 ! 6 finished local5 used
|
---|
398 |
|
---|
399 | ld [global3+local2], local2 ! 3
|
---|
400 | xor $2, local0, $2 ! 8 finished
|
---|
401 | add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
|
---|
402 |
|
---|
403 | ld [out2+284], local5 ! 0x0000FC00
|
---|
404 | xor $2, out0, local4 ! sbox 1 next round
|
---|
405 | xor $2, local1, $2 ! 1 finished
|
---|
406 |
|
---|
407 | xor $2, local2, $2 ! 3 finished
|
---|
408 | bne $4
|
---|
409 | and local4, 252, local1 ! sbox 1 next round
|
---|
410 |
|
---|
411 | ! two rounds more:
|
---|
412 |
|
---|
413 | ld [global1+local1], local1
|
---|
414 | xor $2, out1, out1
|
---|
415 | xor $2, out0, out0
|
---|
416 |
|
---|
417 | srl out1, 4, local0 ! rotate
|
---|
418 | and out0, local5, local3
|
---|
419 |
|
---|
420 | ld [$5+$3*8], local7 ! key 7531
|
---|
421 | srl local3, 8, local3
|
---|
422 | and local0, 252, local2
|
---|
423 |
|
---|
424 | ld [global3+local3],local3
|
---|
425 | sll out1, 28, out1 ! rotate
|
---|
426 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7
|
---|
427 |
|
---|
428 | ld [global2+local2], local2
|
---|
429 | srl out0, 24, local1
|
---|
430 | or out1, local0, out1 ! rotate
|
---|
431 |
|
---|
432 | ldub [out2+local1], local1
|
---|
433 | srl out1, 24, local0
|
---|
434 | and out1, local5, local4
|
---|
435 |
|
---|
436 | ldub [out2+local0], local0
|
---|
437 | srl local4, 8, local4
|
---|
438 | xor $1, local2, $1 ! 2 finished local2 now sbox 6
|
---|
439 |
|
---|
440 | ld [global4+local4],local4
|
---|
441 | srl out1, 16, local2
|
---|
442 | xor $1, local3, $1 ! 3 finished local3 now sbox 5
|
---|
443 |
|
---|
444 | ld [out3+local0],local0
|
---|
445 | and local2, 252, local2
|
---|
446 | add global1, 1536, local5 ! address sbox 7
|
---|
447 |
|
---|
448 | ld [local6+local2], local2
|
---|
449 | srl out0, 16, local3
|
---|
450 | xor $1, local4, $1 ! 4 finished
|
---|
451 |
|
---|
452 | ld [local5+local1],local1
|
---|
453 | and local3, 252, local3
|
---|
454 | xor $1, local0, $1
|
---|
455 |
|
---|
456 | ld [global5+local3],local3
|
---|
457 | xor $1, local2, $1 ! 6 finished
|
---|
458 | cmp in2, 8
|
---|
459 |
|
---|
460 | ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
|
---|
461 | xor $1, local7, local2 ! sbox 5 next round
|
---|
462 | xor $1, local1, $1 ! 7 finished
|
---|
463 |
|
---|
464 | ld [$5+$3*8+4], out0
|
---|
465 | srl local2, 16, local2 ! sbox 5 next round
|
---|
466 | xor $1, local3, $1 ! 5 finished
|
---|
467 |
|
---|
468 | and local2, 252, local2
|
---|
469 | ! next round (two rounds more)
|
---|
470 | xor $1, local7, local7 ! 7531
|
---|
471 |
|
---|
472 | ld [global5+local2], local2
|
---|
473 | srl local7, 24, local3
|
---|
474 | xor $1, out0, out0 ! 8642
|
---|
475 |
|
---|
476 | ldub [out2+local3], local3
|
---|
477 | srl out0, 4, local0 ! rotate
|
---|
478 | and local7, 252, local1
|
---|
479 |
|
---|
480 | sll out0, 28, out0 ! rotate
|
---|
481 | xor $2, local2, $2 ! 5 finished local2 used
|
---|
482 |
|
---|
483 | srl local0, 8, local4
|
---|
484 | and local0, 252, local2
|
---|
485 | ld [local5+local3], local3
|
---|
486 |
|
---|
487 | srl local0, 16, local5
|
---|
488 | or out0, local0, out0 ! rotate
|
---|
489 | ld [global2+local2], local2
|
---|
490 |
|
---|
491 | srl out0, 24, local0
|
---|
492 | ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
|
---|
493 | and local4, 252, local4
|
---|
494 |
|
---|
495 | and local5, 252, local5
|
---|
496 | ld [global4+local4], local4
|
---|
497 | xor $2, local3, $2 ! 7 finished local3 used
|
---|
498 |
|
---|
499 | and local0, 252, local0
|
---|
500 | ld [local6+local5], local5
|
---|
501 | xor $2, local2, $2 ! 2 finished local2 now sbox 3
|
---|
502 |
|
---|
503 | srl local7, 8, local2 ! 3 start
|
---|
504 | ld [out3+local0], local0
|
---|
505 | xor $2, local4, $2
|
---|
506 |
|
---|
507 | and local2, 252, local2
|
---|
508 | ld [global1+local1], local1
|
---|
509 | xor $2, local5, $2 ! 6 finished local5 used
|
---|
510 |
|
---|
511 | ld [global3+local2], local2
|
---|
512 | srl $1, 3, local3
|
---|
513 | xor $2, local0, $2
|
---|
514 |
|
---|
515 | ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
|
---|
516 | sll $1, 29, local4
|
---|
517 | xor $2, local1, $2
|
---|
518 |
|
---|
519 | ifelse($7,{}, {}, {retl})
|
---|
520 | xor $2, local2, $2
|
---|
521 | })
|
---|
522 |
|
---|
523 |
|
---|
524 | ! {fp_macro}
|
---|
525 | !
|
---|
526 | ! parameter 1 right (original left)
|
---|
527 | ! parameter 2 left (original right)
|
---|
528 | ! parameter 3 1 for optional store to [in0]
|
---|
529 | ! parameter 4 1 for load input/output address to local5/7
|
---|
530 | !
|
---|
531 | ! The final permutation logic switches the halves, meaning that
|
---|
532 | ! left and right ends up the registers originally used.
|
---|
533 |
|
---|
534 | define(fp_macro, {
|
---|
535 |
|
---|
536 | ! {fp_macro}
|
---|
537 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
538 |
|
---|
539 | ! initially undo the rotate 3 left done after initial permutation
|
---|
540 | ! original left is received shifted 3 right and 29 left in local3/4
|
---|
541 |
|
---|
542 | sll $2, 29, local1
|
---|
543 | or local3, local4, $1
|
---|
544 |
|
---|
545 | srl $2, 3, $2
|
---|
546 | sethi %hi(0x55555555), local2
|
---|
547 |
|
---|
548 | or $2, local1, $2
|
---|
549 | or local2, %lo(0x55555555), local2
|
---|
550 |
|
---|
551 | srl $2, 1, local3
|
---|
552 | sethi %hi(0x00ff00ff), local1
|
---|
553 | xor local3, $1, local3
|
---|
554 | or local1, %lo(0x00ff00ff), local1
|
---|
555 | and local3, local2, local3
|
---|
556 | sethi %hi(0x33333333), local4
|
---|
557 | sll local3, 1, local2
|
---|
558 |
|
---|
559 | xor $1, local3, $1
|
---|
560 |
|
---|
561 | srl $1, 8, local3
|
---|
562 | xor $2, local2, $2
|
---|
563 | xor local3, $2, local3
|
---|
564 | or local4, %lo(0x33333333), local4
|
---|
565 | and local3, local1, local3
|
---|
566 | sethi %hi(0x0000ffff), local1
|
---|
567 | sll local3, 8, local2
|
---|
568 |
|
---|
569 | xor $2, local3, $2
|
---|
570 |
|
---|
571 | srl $2, 2, local3
|
---|
572 | xor $1, local2, $1
|
---|
573 | xor local3, $1, local3
|
---|
574 | or local1, %lo(0x0000ffff), local1
|
---|
575 | and local3, local4, local3
|
---|
576 | sethi %hi(0x0f0f0f0f), local4
|
---|
577 | sll local3, 2, local2
|
---|
578 |
|
---|
579 | ifelse($4,1, {LDPTR INPUT, local5})
|
---|
580 | xor $1, local3, $1
|
---|
581 |
|
---|
582 | ifelse($4,1, {LDPTR OUTPUT, local7})
|
---|
583 | srl $1, 16, local3
|
---|
584 | xor $2, local2, $2
|
---|
585 | xor local3, $2, local3
|
---|
586 | or local4, %lo(0x0f0f0f0f), local4
|
---|
587 | and local3, local1, local3
|
---|
588 | sll local3, 16, local2
|
---|
589 |
|
---|
590 | xor $2, local3, local1
|
---|
591 |
|
---|
592 | srl local1, 4, local3
|
---|
593 | xor $1, local2, $1
|
---|
594 | xor local3, $1, local3
|
---|
595 | and local3, local4, local3
|
---|
596 | sll local3, 4, local2
|
---|
597 |
|
---|
598 | xor $1, local3, $1
|
---|
599 |
|
---|
600 | ! optional store:
|
---|
601 |
|
---|
602 | ifelse($3,1, {st $1, [in0]})
|
---|
603 |
|
---|
604 | xor local1, local2, $2
|
---|
605 |
|
---|
606 | ifelse($3,1, {st $2, [in0+4]})
|
---|
607 |
|
---|
608 | })
|
---|
609 |
|
---|
610 |
|
---|
611 | ! {fp_ip_macro}
|
---|
612 | !
|
---|
613 | ! Does initial permutation for next block mixed with
|
---|
614 | ! final permutation for current block.
|
---|
615 | !
|
---|
616 | ! parameter 1 original left
|
---|
617 | ! parameter 2 original right
|
---|
618 | ! parameter 3 left ip
|
---|
619 | ! parameter 4 right ip
|
---|
620 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
---|
621 | ! 2: mov in4 to in3
|
---|
622 | !
|
---|
623 | ! also adds -8 to length in2 and loads loop counter to out4
|
---|
624 |
|
---|
625 | define(fp_ip_macro, {
|
---|
626 |
|
---|
627 | ! {fp_ip_macro}
|
---|
628 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
629 |
|
---|
630 | define({temp1},{out4})
|
---|
631 | define({temp2},{local3})
|
---|
632 |
|
---|
633 | define({ip1},{local1})
|
---|
634 | define({ip2},{local2})
|
---|
635 | define({ip4},{local4})
|
---|
636 | define({ip5},{local5})
|
---|
637 |
|
---|
638 | ! $1 in local3, local4
|
---|
639 |
|
---|
640 | ld [out2+256], ip1
|
---|
641 | sll out5, 29, temp1
|
---|
642 | or local3, local4, $1
|
---|
643 |
|
---|
644 | srl out5, 3, $2
|
---|
645 | ifelse($5,2,{mov in4, in3})
|
---|
646 |
|
---|
647 | ld [out2+272], ip5
|
---|
648 | srl $4, 4, local0
|
---|
649 | or $2, temp1, $2
|
---|
650 |
|
---|
651 | srl $2, 1, temp1
|
---|
652 | xor temp1, $1, temp1
|
---|
653 |
|
---|
654 | and temp1, ip5, temp1
|
---|
655 | xor local0, $3, local0
|
---|
656 |
|
---|
657 | sll temp1, 1, temp2
|
---|
658 | xor $1, temp1, $1
|
---|
659 |
|
---|
660 | and local0, ip1, local0
|
---|
661 | add in2, -8, in2
|
---|
662 |
|
---|
663 | sll local0, 4, local7
|
---|
664 | xor $3, local0, $3
|
---|
665 |
|
---|
666 | ld [out2+268], ip4
|
---|
667 | srl $1, 8, temp1
|
---|
668 | xor $2, temp2, $2
|
---|
669 | ld [out2+260], ip2
|
---|
670 | srl $3, 16, local0
|
---|
671 | xor $4, local7, $4
|
---|
672 | xor temp1, $2, temp1
|
---|
673 | xor local0, $4, local0
|
---|
674 | and temp1, ip4, temp1
|
---|
675 | and local0, ip2, local0
|
---|
676 | sll temp1, 8, temp2
|
---|
677 | xor $2, temp1, $2
|
---|
678 | sll local0, 16, local7
|
---|
679 | xor $4, local0, $4
|
---|
680 |
|
---|
681 | srl $2, 2, temp1
|
---|
682 | xor $1, temp2, $1
|
---|
683 |
|
---|
684 | ld [out2+264], temp2 ! ip3
|
---|
685 | srl $4, 2, local0
|
---|
686 | xor $3, local7, $3
|
---|
687 | xor temp1, $1, temp1
|
---|
688 | xor local0, $3, local0
|
---|
689 | and temp1, temp2, temp1
|
---|
690 | and local0, temp2, local0
|
---|
691 | sll temp1, 2, temp2
|
---|
692 | xor $1, temp1, $1
|
---|
693 | sll local0, 2, local7
|
---|
694 | xor $3, local0, $3
|
---|
695 |
|
---|
696 | srl $1, 16, temp1
|
---|
697 | xor $2, temp2, $2
|
---|
698 | srl $3, 8, local0
|
---|
699 | xor $4, local7, $4
|
---|
700 | xor temp1, $2, temp1
|
---|
701 | xor local0, $4, local0
|
---|
702 | and temp1, ip2, temp1
|
---|
703 | and local0, ip4, local0
|
---|
704 | sll temp1, 16, temp2
|
---|
705 | xor $2, temp1, local4
|
---|
706 | sll local0, 8, local7
|
---|
707 | xor $4, local0, $4
|
---|
708 |
|
---|
709 | srl $4, 1, local0
|
---|
710 | xor $3, local7, $3
|
---|
711 |
|
---|
712 | srl local4, 4, temp1
|
---|
713 | xor local0, $3, local0
|
---|
714 |
|
---|
715 | xor $1, temp2, $1
|
---|
716 | and local0, ip5, local0
|
---|
717 |
|
---|
718 | sll local0, 1, local7
|
---|
719 | xor temp1, $1, temp1
|
---|
720 |
|
---|
721 | xor $3, local0, $3
|
---|
722 | xor $4, local7, $4
|
---|
723 |
|
---|
724 | sll $3, 3, local5
|
---|
725 | and temp1, ip1, temp1
|
---|
726 |
|
---|
727 | sll temp1, 4, temp2
|
---|
728 | xor $1, temp1, $1
|
---|
729 |
|
---|
730 | ifelse($5,1,{LDPTR KS2, in4})
|
---|
731 | sll $4, 3, local2
|
---|
732 | xor local4, temp2, $2
|
---|
733 |
|
---|
734 | ! reload since used as temporary:
|
---|
735 |
|
---|
736 | ld [out2+280], out4 ! loop counter
|
---|
737 |
|
---|
738 | srl $3, 29, local0
|
---|
739 | ifelse($5,1,{add in4, 120, in4})
|
---|
740 |
|
---|
741 | ifelse($5,1,{LDPTR KS1, in3})
|
---|
742 | srl $4, 29, local7
|
---|
743 |
|
---|
744 | or local0, local5, $4
|
---|
745 | or local2, local7, $3
|
---|
746 |
|
---|
747 | })
|
---|
748 |
|
---|
749 |
|
---|
750 |
|
---|
751 | ! {load_little_endian}
|
---|
752 | !
|
---|
753 | ! parameter 1 address
|
---|
754 | ! parameter 2 destination left
|
---|
755 | ! parameter 3 destination right
|
---|
756 | ! parameter 4 temporary
|
---|
757 | ! parameter 5 label
|
---|
758 |
|
---|
759 | define(load_little_endian, {
|
---|
760 |
|
---|
761 | ! {load_little_endian}
|
---|
762 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
763 |
|
---|
764 | ! first in memory to rightmost in register
|
---|
765 |
|
---|
766 | $5:
|
---|
767 | ldub [$1+3], $2
|
---|
768 |
|
---|
769 | ldub [$1+2], $4
|
---|
770 | sll $2, 8, $2
|
---|
771 | or $2, $4, $2
|
---|
772 |
|
---|
773 | ldub [$1+1], $4
|
---|
774 | sll $2, 8, $2
|
---|
775 | or $2, $4, $2
|
---|
776 |
|
---|
777 | ldub [$1+0], $4
|
---|
778 | sll $2, 8, $2
|
---|
779 | or $2, $4, $2
|
---|
780 |
|
---|
781 |
|
---|
782 | ldub [$1+3+4], $3
|
---|
783 |
|
---|
784 | ldub [$1+2+4], $4
|
---|
785 | sll $3, 8, $3
|
---|
786 | or $3, $4, $3
|
---|
787 |
|
---|
788 | ldub [$1+1+4], $4
|
---|
789 | sll $3, 8, $3
|
---|
790 | or $3, $4, $3
|
---|
791 |
|
---|
792 | ldub [$1+0+4], $4
|
---|
793 | sll $3, 8, $3
|
---|
794 | or $3, $4, $3
|
---|
795 | $5a:
|
---|
796 |
|
---|
797 | })
|
---|
798 |
|
---|
799 |
|
---|
800 | ! {load_little_endian_inc}
|
---|
801 | !
|
---|
802 | ! parameter 1 address
|
---|
803 | ! parameter 2 destination left
|
---|
804 | ! parameter 3 destination right
|
---|
805 | ! parameter 4 temporary
|
---|
806 | ! parameter 4 label
|
---|
807 | !
|
---|
808 | ! adds 8 to address
|
---|
809 |
|
---|
810 | define(load_little_endian_inc, {
|
---|
811 |
|
---|
812 | ! {load_little_endian_inc}
|
---|
813 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
814 |
|
---|
815 | ! first in memory to rightmost in register
|
---|
816 |
|
---|
817 | $5:
|
---|
818 | ldub [$1+3], $2
|
---|
819 |
|
---|
820 | ldub [$1+2], $4
|
---|
821 | sll $2, 8, $2
|
---|
822 | or $2, $4, $2
|
---|
823 |
|
---|
824 | ldub [$1+1], $4
|
---|
825 | sll $2, 8, $2
|
---|
826 | or $2, $4, $2
|
---|
827 |
|
---|
828 | ldub [$1+0], $4
|
---|
829 | sll $2, 8, $2
|
---|
830 | or $2, $4, $2
|
---|
831 |
|
---|
832 | ldub [$1+3+4], $3
|
---|
833 | add $1, 8, $1
|
---|
834 |
|
---|
835 | ldub [$1+2+4-8], $4
|
---|
836 | sll $3, 8, $3
|
---|
837 | or $3, $4, $3
|
---|
838 |
|
---|
839 | ldub [$1+1+4-8], $4
|
---|
840 | sll $3, 8, $3
|
---|
841 | or $3, $4, $3
|
---|
842 |
|
---|
843 | ldub [$1+0+4-8], $4
|
---|
844 | sll $3, 8, $3
|
---|
845 | or $3, $4, $3
|
---|
846 | $5a:
|
---|
847 |
|
---|
848 | })
|
---|
849 |
|
---|
850 |
|
---|
851 | ! {load_n_bytes}
|
---|
852 | !
|
---|
853 | ! Loads 1 to 7 bytes little endian
|
---|
854 | ! Remaining bytes are zeroed.
|
---|
855 | !
|
---|
856 | ! parameter 1 address
|
---|
857 | ! parameter 2 length
|
---|
858 | ! parameter 3 destination register left
|
---|
859 | ! parameter 4 destination register right
|
---|
860 | ! parameter 5 temp
|
---|
861 | ! parameter 6 temp2
|
---|
862 | ! parameter 7 label
|
---|
863 | ! parameter 8 return label
|
---|
864 |
|
---|
865 | define(load_n_bytes, {
|
---|
866 |
|
---|
867 | ! {load_n_bytes}
|
---|
868 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9
|
---|
869 |
|
---|
870 | $7.0: call .+8
|
---|
871 | sll $2, 2, $6
|
---|
872 |
|
---|
873 | add %o7,$7.jmp.table-$7.0,$5
|
---|
874 |
|
---|
875 | add $5, $6, $5
|
---|
876 | mov 0, $4
|
---|
877 |
|
---|
878 | ld [$5], $5
|
---|
879 |
|
---|
880 | jmp %o7+$5
|
---|
881 | mov 0, $3
|
---|
882 |
|
---|
883 | $7.7:
|
---|
884 | ldub [$1+6], $5
|
---|
885 | sll $5, 16, $5
|
---|
886 | or $3, $5, $3
|
---|
887 | $7.6:
|
---|
888 | ldub [$1+5], $5
|
---|
889 | sll $5, 8, $5
|
---|
890 | or $3, $5, $3
|
---|
891 | $7.5:
|
---|
892 | ldub [$1+4], $5
|
---|
893 | or $3, $5, $3
|
---|
894 | $7.4:
|
---|
895 | ldub [$1+3], $5
|
---|
896 | sll $5, 24, $5
|
---|
897 | or $4, $5, $4
|
---|
898 | $7.3:
|
---|
899 | ldub [$1+2], $5
|
---|
900 | sll $5, 16, $5
|
---|
901 | or $4, $5, $4
|
---|
902 | $7.2:
|
---|
903 | ldub [$1+1], $5
|
---|
904 | sll $5, 8, $5
|
---|
905 | or $4, $5, $4
|
---|
906 | $7.1:
|
---|
907 | ldub [$1+0], $5
|
---|
908 | ba $8
|
---|
909 | or $4, $5, $4
|
---|
910 |
|
---|
911 | .align 4
|
---|
912 |
|
---|
913 | $7.jmp.table:
|
---|
914 | .word 0
|
---|
915 | .word $7.1-$7.0
|
---|
916 | .word $7.2-$7.0
|
---|
917 | .word $7.3-$7.0
|
---|
918 | .word $7.4-$7.0
|
---|
919 | .word $7.5-$7.0
|
---|
920 | .word $7.6-$7.0
|
---|
921 | .word $7.7-$7.0
|
---|
922 | })
|
---|
923 |
|
---|
924 |
|
---|
925 | ! {store_little_endian}
|
---|
926 | !
|
---|
927 | ! parameter 1 address
|
---|
928 | ! parameter 2 source left
|
---|
929 | ! parameter 3 source right
|
---|
930 | ! parameter 4 temporary
|
---|
931 |
|
---|
932 | define(store_little_endian, {
|
---|
933 |
|
---|
934 | ! {store_little_endian}
|
---|
935 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
936 |
|
---|
937 | ! rightmost in register to first in memory
|
---|
938 |
|
---|
939 | $5:
|
---|
940 | and $2, 255, $4
|
---|
941 | stub $4, [$1+0]
|
---|
942 |
|
---|
943 | srl $2, 8, $4
|
---|
944 | and $4, 255, $4
|
---|
945 | stub $4, [$1+1]
|
---|
946 |
|
---|
947 | srl $2, 16, $4
|
---|
948 | and $4, 255, $4
|
---|
949 | stub $4, [$1+2]
|
---|
950 |
|
---|
951 | srl $2, 24, $4
|
---|
952 | stub $4, [$1+3]
|
---|
953 |
|
---|
954 |
|
---|
955 | and $3, 255, $4
|
---|
956 | stub $4, [$1+0+4]
|
---|
957 |
|
---|
958 | srl $3, 8, $4
|
---|
959 | and $4, 255, $4
|
---|
960 | stub $4, [$1+1+4]
|
---|
961 |
|
---|
962 | srl $3, 16, $4
|
---|
963 | and $4, 255, $4
|
---|
964 | stub $4, [$1+2+4]
|
---|
965 |
|
---|
966 | srl $3, 24, $4
|
---|
967 | stub $4, [$1+3+4]
|
---|
968 |
|
---|
969 | $5a:
|
---|
970 |
|
---|
971 | })
|
---|
972 |
|
---|
973 |
|
---|
974 | ! {store_n_bytes}
|
---|
975 | !
|
---|
976 | ! Stores 1 to 7 bytes little endian
|
---|
977 | !
|
---|
978 | ! parameter 1 address
|
---|
979 | ! parameter 2 length
|
---|
980 | ! parameter 3 source register left
|
---|
981 | ! parameter 4 source register right
|
---|
982 | ! parameter 5 temp
|
---|
983 | ! parameter 6 temp2
|
---|
984 | ! parameter 7 label
|
---|
985 | ! parameter 8 return label
|
---|
986 |
|
---|
987 | define(store_n_bytes, {
|
---|
988 |
|
---|
989 | ! {store_n_bytes}
|
---|
990 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9
|
---|
991 |
|
---|
992 | $7.0: call .+8
|
---|
993 | sll $2, 2, $6
|
---|
994 |
|
---|
995 | add %o7,$7.jmp.table-$7.0,$5
|
---|
996 |
|
---|
997 | add $5, $6, $5
|
---|
998 |
|
---|
999 | ld [$5], $5
|
---|
1000 |
|
---|
1001 | jmp %o7+$5
|
---|
1002 | nop
|
---|
1003 |
|
---|
1004 | $7.7:
|
---|
1005 | srl $3, 16, $5
|
---|
1006 | and $5, 0xff, $5
|
---|
1007 | stub $5, [$1+6]
|
---|
1008 | $7.6:
|
---|
1009 | srl $3, 8, $5
|
---|
1010 | and $5, 0xff, $5
|
---|
1011 | stub $5, [$1+5]
|
---|
1012 | $7.5:
|
---|
1013 | and $3, 0xff, $5
|
---|
1014 | stub $5, [$1+4]
|
---|
1015 | $7.4:
|
---|
1016 | srl $4, 24, $5
|
---|
1017 | stub $5, [$1+3]
|
---|
1018 | $7.3:
|
---|
1019 | srl $4, 16, $5
|
---|
1020 | and $5, 0xff, $5
|
---|
1021 | stub $5, [$1+2]
|
---|
1022 | $7.2:
|
---|
1023 | srl $4, 8, $5
|
---|
1024 | and $5, 0xff, $5
|
---|
1025 | stub $5, [$1+1]
|
---|
1026 | $7.1:
|
---|
1027 | and $4, 0xff, $5
|
---|
1028 |
|
---|
1029 |
|
---|
1030 | ba $8
|
---|
1031 | stub $5, [$1]
|
---|
1032 |
|
---|
1033 | .align 4
|
---|
1034 |
|
---|
1035 | $7.jmp.table:
|
---|
1036 |
|
---|
1037 | .word 0
|
---|
1038 | .word $7.1-$7.0
|
---|
1039 | .word $7.2-$7.0
|
---|
1040 | .word $7.3-$7.0
|
---|
1041 | .word $7.4-$7.0
|
---|
1042 | .word $7.5-$7.0
|
---|
1043 | .word $7.6-$7.0
|
---|
1044 | .word $7.7-$7.0
|
---|
1045 | })
|
---|
1046 |
|
---|
1047 |
|
---|
1048 | define(testvalue,{1})
|
---|
1049 |
|
---|
1050 | define(register_init, {
|
---|
1051 |
|
---|
1052 | ! For test purposes:
|
---|
1053 |
|
---|
1054 | sethi %hi(testvalue), local0
|
---|
1055 | or local0, %lo(testvalue), local0
|
---|
1056 |
|
---|
1057 | ifelse($1,{},{}, {mov local0, $1})
|
---|
1058 | ifelse($2,{},{}, {mov local0, $2})
|
---|
1059 | ifelse($3,{},{}, {mov local0, $3})
|
---|
1060 | ifelse($4,{},{}, {mov local0, $4})
|
---|
1061 | ifelse($5,{},{}, {mov local0, $5})
|
---|
1062 | ifelse($6,{},{}, {mov local0, $6})
|
---|
1063 | ifelse($7,{},{}, {mov local0, $7})
|
---|
1064 | ifelse($8,{},{}, {mov local0, $8})
|
---|
1065 |
|
---|
1066 | mov local0, local1
|
---|
1067 | mov local0, local2
|
---|
1068 | mov local0, local3
|
---|
1069 | mov local0, local4
|
---|
1070 | mov local0, local5
|
---|
1071 | mov local0, local7
|
---|
1072 | mov local0, local6
|
---|
1073 | mov local0, out0
|
---|
1074 | mov local0, out1
|
---|
1075 | mov local0, out2
|
---|
1076 | mov local0, out3
|
---|
1077 | mov local0, out4
|
---|
1078 | mov local0, out5
|
---|
1079 | mov local0, global1
|
---|
1080 | mov local0, global2
|
---|
1081 | mov local0, global3
|
---|
1082 | mov local0, global4
|
---|
1083 | mov local0, global5
|
---|
1084 |
|
---|
1085 | })
|
---|
1086 |
|
---|
1087 | .section ".text"
|
---|
1088 |
|
---|
1089 | .align 32
|
---|
1090 |
|
---|
1091 | .des_enc:
|
---|
1092 |
|
---|
1093 | ! key address in3
|
---|
1094 | ! loads key next encryption/decryption first round from [in4]
|
---|
1095 |
|
---|
1096 | rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
|
---|
1097 |
|
---|
1098 |
|
---|
1099 | .align 32
|
---|
1100 |
|
---|
1101 | .des_dec:
|
---|
1102 |
|
---|
1103 | ! implemented with out5 as first parameter to avoid
|
---|
1104 | ! register exchange in ede modes
|
---|
1105 |
|
---|
1106 | ! key address in4
|
---|
1107 | ! loads key next encryption/decryption first round from [in3]
|
---|
1108 |
|
---|
1109 | rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
|
---|
1110 |
|
---|
1111 |
|
---|
1112 |
|
---|
1113 | ! void DES_encrypt1(data, ks, enc)
|
---|
1114 | ! *******************************
|
---|
1115 |
|
---|
1116 | .align 32
|
---|
1117 | .global DES_encrypt1
|
---|
1118 | .type DES_encrypt1,#function
|
---|
1119 |
|
---|
1120 | DES_encrypt1:
|
---|
1121 |
|
---|
1122 | save %sp, FRAME, %sp
|
---|
1123 |
|
---|
1124 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1125 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1126 | 1: call .+8
|
---|
1127 | add %o7,global1,global1
|
---|
1128 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1129 |
|
---|
1130 | ld [in0], in5 ! left
|
---|
1131 | cmp in2, 0 ! enc
|
---|
1132 |
|
---|
1133 | be .encrypt.dec
|
---|
1134 | ld [in0+4], out5 ! right
|
---|
1135 |
|
---|
1136 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1137 | ! parameter 7 1 for move in1 to in3
|
---|
1138 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
1139 |
|
---|
1140 | ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
|
---|
1141 |
|
---|
1142 | rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
|
---|
1143 |
|
---|
1144 | fp_macro(in5, out5, 1) ! 1 for store to [in0]
|
---|
1145 |
|
---|
1146 | ret
|
---|
1147 | restore
|
---|
1148 |
|
---|
1149 | .encrypt.dec:
|
---|
1150 |
|
---|
1151 | add in1, 120, in3 ! use last subkey for first round
|
---|
1152 |
|
---|
1153 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1154 | ! parameter 7 1 for move in1 to in3
|
---|
1155 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
1156 |
|
---|
1157 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
|
---|
1158 |
|
---|
1159 | fp_macro(out5, in5, 1) ! 1 for store to [in0]
|
---|
1160 |
|
---|
1161 | ret
|
---|
1162 | restore
|
---|
1163 |
|
---|
1164 | .DES_encrypt1.end:
|
---|
1165 | .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
|
---|
1166 |
|
---|
1167 |
|
---|
1168 | ! void DES_encrypt2(data, ks, enc)
|
---|
1169 | !*********************************
|
---|
1170 |
|
---|
1171 | ! encrypts/decrypts without initial/final permutation
|
---|
1172 |
|
---|
1173 | .align 32
|
---|
1174 | .global DES_encrypt2
|
---|
1175 | .type DES_encrypt2,#function
|
---|
1176 |
|
---|
1177 | DES_encrypt2:
|
---|
1178 |
|
---|
1179 | save %sp, FRAME, %sp
|
---|
1180 |
|
---|
1181 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1182 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1183 | 1: call .+8
|
---|
1184 | add %o7,global1,global1
|
---|
1185 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1186 |
|
---|
1187 | ! Set sbox address 1 to 6 and rotate halves 3 left
|
---|
1188 | ! Errors caught by destest? Yes. Still? *NO*
|
---|
1189 |
|
---|
1190 | !sethi %hi(DES_SPtrans), global1 ! address sbox 1
|
---|
1191 |
|
---|
1192 | !or global1, %lo(DES_SPtrans), global1 ! sbox 1
|
---|
1193 |
|
---|
1194 | add global1, 256, global2 ! sbox 2
|
---|
1195 | add global1, 512, global3 ! sbox 3
|
---|
1196 |
|
---|
1197 | ld [in0], out5 ! right
|
---|
1198 | add global1, 768, global4 ! sbox 4
|
---|
1199 | add global1, 1024, global5 ! sbox 5
|
---|
1200 |
|
---|
1201 | ld [in0+4], in5 ! left
|
---|
1202 | add global1, 1280, local6 ! sbox 6
|
---|
1203 | add global1, 1792, out3 ! sbox 8
|
---|
1204 |
|
---|
1205 | ! rotate
|
---|
1206 |
|
---|
1207 | sll in5, 3, local5
|
---|
1208 | mov in1, in3 ! key address to in3
|
---|
1209 |
|
---|
1210 | sll out5, 3, local7
|
---|
1211 | srl in5, 29, in5
|
---|
1212 |
|
---|
1213 | srl out5, 29, out5
|
---|
1214 | add in5, local5, in5
|
---|
1215 |
|
---|
1216 | add out5, local7, out5
|
---|
1217 | cmp in2, 0
|
---|
1218 |
|
---|
1219 | ! we use our own stackframe
|
---|
1220 |
|
---|
1221 | be .encrypt2.dec
|
---|
1222 | STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
|
---|
1223 |
|
---|
1224 | ld [in3], out0 ! key 7531 first round
|
---|
1225 | mov LOOPS, out4 ! loop counter
|
---|
1226 |
|
---|
1227 | ld [in3+4], out1 ! key 8642 first round
|
---|
1228 | sethi %hi(0x0000FC00), local5
|
---|
1229 |
|
---|
1230 | call .des_enc
|
---|
1231 | mov in3, in4
|
---|
1232 |
|
---|
1233 | ! rotate
|
---|
1234 | sll in5, 29, in0
|
---|
1235 | srl in5, 3, in5
|
---|
1236 | sll out5, 29, in1
|
---|
1237 | add in5, in0, in5
|
---|
1238 | srl out5, 3, out5
|
---|
1239 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
|
---|
1240 | add out5, in1, out5
|
---|
1241 | st in5, [in0]
|
---|
1242 | st out5, [in0+4]
|
---|
1243 |
|
---|
1244 | ret
|
---|
1245 | restore
|
---|
1246 |
|
---|
1247 |
|
---|
1248 | .encrypt2.dec:
|
---|
1249 |
|
---|
1250 | add in3, 120, in4
|
---|
1251 |
|
---|
1252 | ld [in4], out0 ! key 7531 first round
|
---|
1253 | mov LOOPS, out4 ! loop counter
|
---|
1254 |
|
---|
1255 | ld [in4+4], out1 ! key 8642 first round
|
---|
1256 | sethi %hi(0x0000FC00), local5
|
---|
1257 |
|
---|
1258 | mov in5, local1 ! left expected in out5
|
---|
1259 | mov out5, in5
|
---|
1260 |
|
---|
1261 | call .des_dec
|
---|
1262 | mov local1, out5
|
---|
1263 |
|
---|
1264 | .encrypt2.finish:
|
---|
1265 |
|
---|
1266 | ! rotate
|
---|
1267 | sll in5, 29, in0
|
---|
1268 | srl in5, 3, in5
|
---|
1269 | sll out5, 29, in1
|
---|
1270 | add in5, in0, in5
|
---|
1271 | srl out5, 3, out5
|
---|
1272 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
|
---|
1273 | add out5, in1, out5
|
---|
1274 | st out5, [in0]
|
---|
1275 | st in5, [in0+4]
|
---|
1276 |
|
---|
1277 | ret
|
---|
1278 | restore
|
---|
1279 |
|
---|
1280 | .DES_encrypt2.end:
|
---|
1281 | .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
|
---|
1282 |
|
---|
1283 |
|
---|
1284 | ! void DES_encrypt3(data, ks1, ks2, ks3)
|
---|
1285 | ! **************************************
|
---|
1286 |
|
---|
1287 | .align 32
|
---|
1288 | .global DES_encrypt3
|
---|
1289 | .type DES_encrypt3,#function
|
---|
1290 |
|
---|
1291 | DES_encrypt3:
|
---|
1292 |
|
---|
1293 | save %sp, FRAME, %sp
|
---|
1294 |
|
---|
1295 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1296 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1297 | 1: call .+8
|
---|
1298 | add %o7,global1,global1
|
---|
1299 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1300 |
|
---|
1301 | ld [in0], in5 ! left
|
---|
1302 | add in2, 120, in4 ! ks2
|
---|
1303 |
|
---|
1304 | ld [in0+4], out5 ! right
|
---|
1305 | mov in3, in2 ! save ks3
|
---|
1306 |
|
---|
1307 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1308 | ! parameter 7 1 for mov in1 to in3
|
---|
1309 | ! parameter 8 1 for mov in3 to in4
|
---|
1310 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
1311 |
|
---|
1312 | ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
|
---|
1313 |
|
---|
1314 | call .des_dec
|
---|
1315 | mov in2, in3 ! preload ks3
|
---|
1316 |
|
---|
1317 | call .des_enc
|
---|
1318 | nop
|
---|
1319 |
|
---|
1320 | fp_macro(in5, out5, 1)
|
---|
1321 |
|
---|
1322 | ret
|
---|
1323 | restore
|
---|
1324 |
|
---|
1325 | .DES_encrypt3.end:
|
---|
1326 | .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
|
---|
1327 |
|
---|
1328 |
|
---|
1329 | ! void DES_decrypt3(data, ks1, ks2, ks3)
|
---|
1330 | ! **************************************
|
---|
1331 |
|
---|
1332 | .align 32
|
---|
1333 | .global DES_decrypt3
|
---|
1334 | .type DES_decrypt3,#function
|
---|
1335 |
|
---|
1336 | DES_decrypt3:
|
---|
1337 |
|
---|
1338 | save %sp, FRAME, %sp
|
---|
1339 |
|
---|
1340 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1341 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1342 | 1: call .+8
|
---|
1343 | add %o7,global1,global1
|
---|
1344 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1345 |
|
---|
1346 | ld [in0], in5 ! left
|
---|
1347 | add in3, 120, in4 ! ks3
|
---|
1348 |
|
---|
1349 | ld [in0+4], out5 ! right
|
---|
1350 | mov in2, in3 ! ks2
|
---|
1351 |
|
---|
1352 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1353 | ! parameter 7 1 for mov in1 to in3
|
---|
1354 | ! parameter 8 1 for mov in3 to in4
|
---|
1355 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
1356 |
|
---|
1357 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
|
---|
1358 |
|
---|
1359 | call .des_enc
|
---|
1360 | add in1, 120, in4 ! preload ks1
|
---|
1361 |
|
---|
1362 | call .des_dec
|
---|
1363 | nop
|
---|
1364 |
|
---|
1365 | fp_macro(out5, in5, 1)
|
---|
1366 |
|
---|
1367 | ret
|
---|
1368 | restore
|
---|
1369 |
|
---|
1370 | .DES_decrypt3.end:
|
---|
1371 | .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
|
---|
1372 |
|
---|
1373 | ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
|
---|
1374 | ! *****************************************************************
|
---|
1375 |
|
---|
1376 |
|
---|
1377 | .align 32
|
---|
1378 | .global DES_ncbc_encrypt
|
---|
1379 | .type DES_ncbc_encrypt,#function
|
---|
1380 |
|
---|
1381 | DES_ncbc_encrypt:
|
---|
1382 |
|
---|
1383 | save %sp, FRAME, %sp
|
---|
1384 |
|
---|
1385 | define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
|
---|
1386 | define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
|
---|
1387 | define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
|
---|
1388 |
|
---|
1389 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1390 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1391 | 1: call .+8
|
---|
1392 | add %o7,global1,global1
|
---|
1393 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1394 |
|
---|
1395 | cmp in5, 0 ! enc
|
---|
1396 |
|
---|
1397 | be .ncbc.dec
|
---|
1398 | STPTR in4, IVEC
|
---|
1399 |
|
---|
1400 | ! addr left right temp label
|
---|
1401 | load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
|
---|
1402 |
|
---|
1403 | addcc in2, -8, in2 ! bytes missing when first block done
|
---|
1404 |
|
---|
1405 | bl .ncbc.enc.seven.or.less
|
---|
1406 | mov in3, in4 ! schedule
|
---|
1407 |
|
---|
1408 | .ncbc.enc.next.block:
|
---|
1409 |
|
---|
1410 | load_little_endian(in0, out4, global4, local3, .LLE2) ! block
|
---|
1411 |
|
---|
1412 | .ncbc.enc.next.block_1:
|
---|
1413 |
|
---|
1414 | xor in5, out4, in5 ! iv xor
|
---|
1415 | xor out5, global4, out5 ! iv xor
|
---|
1416 |
|
---|
1417 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
1418 | ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
|
---|
1419 |
|
---|
1420 | .ncbc.enc.next.block_2:
|
---|
1421 |
|
---|
1422 | !// call .des_enc ! compares in2 to 8
|
---|
1423 | ! rounds inlined for alignment purposes
|
---|
1424 |
|
---|
1425 | add global1, 768, global4 ! address sbox 4 since register used below
|
---|
1426 |
|
---|
1427 | rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
|
---|
1428 |
|
---|
1429 | bl .ncbc.enc.next.block_fp
|
---|
1430 | add in0, 8, in0 ! input address
|
---|
1431 |
|
---|
1432 | ! If 8 or more bytes are to be encrypted after this block,
|
---|
1433 | ! we combine final permutation for this block with initial
|
---|
1434 | ! permutation for next block. Load next block:
|
---|
1435 |
|
---|
1436 | load_little_endian(in0, global3, global4, local5, .LLE12)
|
---|
1437 |
|
---|
1438 | ! parameter 1 original left
|
---|
1439 | ! parameter 2 original right
|
---|
1440 | ! parameter 3 left ip
|
---|
1441 | ! parameter 4 right ip
|
---|
1442 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
---|
1443 | ! 2: mov in4 to in3
|
---|
1444 | !
|
---|
1445 | ! also adds -8 to length in2 and loads loop counter to out4
|
---|
1446 |
|
---|
1447 | fp_ip_macro(out0, out1, global3, global4, 2)
|
---|
1448 |
|
---|
1449 | store_little_endian(in1, out0, out1, local3, .SLE10) ! block
|
---|
1450 |
|
---|
1451 | ld [in3], out0 ! key 7531 first round next block
|
---|
1452 | mov in5, local1
|
---|
1453 | xor global3, out5, in5 ! iv xor next block
|
---|
1454 |
|
---|
1455 | ld [in3+4], out1 ! key 8642
|
---|
1456 | add global1, 512, global3 ! address sbox 3 since register used
|
---|
1457 | xor global4, local1, out5 ! iv xor next block
|
---|
1458 |
|
---|
1459 | ba .ncbc.enc.next.block_2
|
---|
1460 | add in1, 8, in1 ! output address
|
---|
1461 |
|
---|
1462 | .ncbc.enc.next.block_fp:
|
---|
1463 |
|
---|
1464 | fp_macro(in5, out5)
|
---|
1465 |
|
---|
1466 | store_little_endian(in1, in5, out5, local3, .SLE1) ! block
|
---|
1467 |
|
---|
1468 | addcc in2, -8, in2 ! bytes missing when next block done
|
---|
1469 |
|
---|
1470 | bpos .ncbc.enc.next.block
|
---|
1471 | add in1, 8, in1
|
---|
1472 |
|
---|
1473 | .ncbc.enc.seven.or.less:
|
---|
1474 |
|
---|
1475 | cmp in2, -8
|
---|
1476 |
|
---|
1477 | ble .ncbc.enc.finish
|
---|
1478 | nop
|
---|
1479 |
|
---|
1480 | add in2, 8, local1 ! bytes to load
|
---|
1481 |
|
---|
1482 | ! addr, length, dest left, dest right, temp, temp2, label, ret label
|
---|
1483 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
|
---|
1484 |
|
---|
1485 | ! Loads 1 to 7 bytes little endian to global4, out4
|
---|
1486 |
|
---|
1487 |
|
---|
1488 | .ncbc.enc.finish:
|
---|
1489 |
|
---|
1490 | LDPTR IVEC, local4
|
---|
1491 | store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
|
---|
1492 |
|
---|
1493 | ret
|
---|
1494 | restore
|
---|
1495 |
|
---|
1496 |
|
---|
1497 | .ncbc.dec:
|
---|
1498 |
|
---|
1499 | STPTR in0, INPUT
|
---|
1500 | cmp in2, 0 ! length
|
---|
1501 | add in3, 120, in3
|
---|
1502 |
|
---|
1503 | LDPTR IVEC, local7 ! ivec
|
---|
1504 | ble .ncbc.dec.finish
|
---|
1505 | mov in3, in4 ! schedule
|
---|
1506 |
|
---|
1507 | STPTR in1, OUTPUT
|
---|
1508 | mov in0, local5 ! input
|
---|
1509 |
|
---|
1510 | load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
|
---|
1511 |
|
---|
1512 | .ncbc.dec.next.block:
|
---|
1513 |
|
---|
1514 | load_little_endian(local5, in5, out5, local3, .LLE4) ! block
|
---|
1515 |
|
---|
1516 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1517 | ! parameter 7 1 for mov in1 to in3
|
---|
1518 | ! parameter 8 1 for mov in3 to in4
|
---|
1519 |
|
---|
1520 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4
|
---|
1521 |
|
---|
1522 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
|
---|
1523 |
|
---|
1524 | ! in2 is bytes left to be stored
|
---|
1525 | ! in2 is compared to 8 in the rounds
|
---|
1526 |
|
---|
1527 | xor out5, in0, out4 ! iv xor
|
---|
1528 | bl .ncbc.dec.seven.or.less
|
---|
1529 | xor in5, in1, global4 ! iv xor
|
---|
1530 |
|
---|
1531 | ! Load ivec next block now, since input and output address might be the same.
|
---|
1532 |
|
---|
1533 | load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
|
---|
1534 |
|
---|
1535 | store_little_endian(local7, out4, global4, local3, .SLE3)
|
---|
1536 |
|
---|
1537 | STPTR local5, INPUT
|
---|
1538 | add local7, 8, local7
|
---|
1539 | addcc in2, -8, in2
|
---|
1540 |
|
---|
1541 | bg .ncbc.dec.next.block
|
---|
1542 | STPTR local7, OUTPUT
|
---|
1543 |
|
---|
1544 |
|
---|
1545 | .ncbc.dec.store.iv:
|
---|
1546 |
|
---|
1547 | LDPTR IVEC, local4 ! ivec
|
---|
1548 | store_little_endian(local4, in0, in1, local5, .SLE4)
|
---|
1549 |
|
---|
1550 | .ncbc.dec.finish:
|
---|
1551 |
|
---|
1552 | ret
|
---|
1553 | restore
|
---|
1554 |
|
---|
1555 | .ncbc.dec.seven.or.less:
|
---|
1556 |
|
---|
1557 | load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
|
---|
1558 |
|
---|
1559 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
|
---|
1560 |
|
---|
1561 |
|
---|
1562 | .DES_ncbc_encrypt.end:
|
---|
1563 | .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
|
---|
1564 |
|
---|
1565 |
|
---|
1566 | ! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
|
---|
1567 | ! **************************************************************************
|
---|
1568 |
|
---|
1569 |
|
---|
1570 | .align 32
|
---|
1571 | .global DES_ede3_cbc_encrypt
|
---|
1572 | .type DES_ede3_cbc_encrypt,#function
|
---|
1573 |
|
---|
1574 | DES_ede3_cbc_encrypt:
|
---|
1575 |
|
---|
1576 | save %sp, FRAME, %sp
|
---|
1577 |
|
---|
1578 | define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
|
---|
1579 | define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
|
---|
1580 | define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
|
---|
1581 |
|
---|
1582 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1583 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1584 | 1: call .+8
|
---|
1585 | add %o7,global1,global1
|
---|
1586 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1587 |
|
---|
1588 | LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
|
---|
1589 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
---|
1590 | cmp local3, 0 ! enc
|
---|
1591 |
|
---|
1592 | be .ede3.dec
|
---|
1593 | STPTR in4, KS2
|
---|
1594 |
|
---|
1595 | STPTR in5, KS3
|
---|
1596 |
|
---|
1597 | load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
|
---|
1598 |
|
---|
1599 | addcc in2, -8, in2 ! bytes missing after next block
|
---|
1600 |
|
---|
1601 | bl .ede3.enc.seven.or.less
|
---|
1602 | STPTR in3, KS1
|
---|
1603 |
|
---|
1604 | .ede3.enc.next.block:
|
---|
1605 |
|
---|
1606 | load_little_endian(in0, out4, global4, local3, .LLE7)
|
---|
1607 |
|
---|
1608 | .ede3.enc.next.block_1:
|
---|
1609 |
|
---|
1610 | LDPTR KS2, in4
|
---|
1611 | xor in5, out4, in5 ! iv xor
|
---|
1612 | xor out5, global4, out5 ! iv xor
|
---|
1613 |
|
---|
1614 | LDPTR KS1, in3
|
---|
1615 | add in4, 120, in4 ! for decryption we use last subkey first
|
---|
1616 | nop
|
---|
1617 |
|
---|
1618 | ip_macro(in5, out5, in5, out5, in3)
|
---|
1619 |
|
---|
1620 | .ede3.enc.next.block_2:
|
---|
1621 |
|
---|
1622 | call .des_enc ! ks1 in3
|
---|
1623 | nop
|
---|
1624 |
|
---|
1625 | call .des_dec ! ks2 in4
|
---|
1626 | LDPTR KS3, in3
|
---|
1627 |
|
---|
1628 | call .des_enc ! ks3 in3 compares in2 to 8
|
---|
1629 | nop
|
---|
1630 |
|
---|
1631 | bl .ede3.enc.next.block_fp
|
---|
1632 | add in0, 8, in0
|
---|
1633 |
|
---|
1634 | ! If 8 or more bytes are to be encrypted after this block,
|
---|
1635 | ! we combine final permutation for this block with initial
|
---|
1636 | ! permutation for next block. Load next block:
|
---|
1637 |
|
---|
1638 | load_little_endian(in0, global3, global4, local5, .LLE11)
|
---|
1639 |
|
---|
1640 | ! parameter 1 original left
|
---|
1641 | ! parameter 2 original right
|
---|
1642 | ! parameter 3 left ip
|
---|
1643 | ! parameter 4 right ip
|
---|
1644 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
---|
1645 | ! 2: mov in4 to in3
|
---|
1646 | !
|
---|
1647 | ! also adds -8 to length in2 and loads loop counter to out4
|
---|
1648 |
|
---|
1649 | fp_ip_macro(out0, out1, global3, global4, 1)
|
---|
1650 |
|
---|
1651 | store_little_endian(in1, out0, out1, local3, .SLE9) ! block
|
---|
1652 |
|
---|
1653 | mov in5, local1
|
---|
1654 | xor global3, out5, in5 ! iv xor next block
|
---|
1655 |
|
---|
1656 | ld [in3], out0 ! key 7531
|
---|
1657 | add global1, 512, global3 ! address sbox 3
|
---|
1658 | xor global4, local1, out5 ! iv xor next block
|
---|
1659 |
|
---|
1660 | ld [in3+4], out1 ! key 8642
|
---|
1661 | add global1, 768, global4 ! address sbox 4
|
---|
1662 | ba .ede3.enc.next.block_2
|
---|
1663 | add in1, 8, in1
|
---|
1664 |
|
---|
1665 | .ede3.enc.next.block_fp:
|
---|
1666 |
|
---|
1667 | fp_macro(in5, out5)
|
---|
1668 |
|
---|
1669 | store_little_endian(in1, in5, out5, local3, .SLE5) ! block
|
---|
1670 |
|
---|
1671 | addcc in2, -8, in2 ! bytes missing when next block done
|
---|
1672 |
|
---|
1673 | bpos .ede3.enc.next.block
|
---|
1674 | add in1, 8, in1
|
---|
1675 |
|
---|
1676 | .ede3.enc.seven.or.less:
|
---|
1677 |
|
---|
1678 | cmp in2, -8
|
---|
1679 |
|
---|
1680 | ble .ede3.enc.finish
|
---|
1681 | nop
|
---|
1682 |
|
---|
1683 | add in2, 8, local1 ! bytes to load
|
---|
1684 |
|
---|
1685 | ! addr, length, dest left, dest right, temp, temp2, label, ret label
|
---|
1686 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
|
---|
1687 |
|
---|
1688 | .ede3.enc.finish:
|
---|
1689 |
|
---|
1690 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
---|
1691 | store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
|
---|
1692 |
|
---|
1693 | ret
|
---|
1694 | restore
|
---|
1695 |
|
---|
1696 | .ede3.dec:
|
---|
1697 |
|
---|
1698 | STPTR in0, INPUT
|
---|
1699 | add in5, 120, in5
|
---|
1700 |
|
---|
1701 | STPTR in1, OUTPUT
|
---|
1702 | mov in0, local5
|
---|
1703 | add in3, 120, in3
|
---|
1704 |
|
---|
1705 | STPTR in3, KS1
|
---|
1706 | cmp in2, 0
|
---|
1707 |
|
---|
1708 | ble .ede3.dec.finish
|
---|
1709 | STPTR in5, KS3
|
---|
1710 |
|
---|
1711 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
|
---|
1712 | load_little_endian(local7, in0, in1, local3, .LLE8)
|
---|
1713 |
|
---|
1714 | .ede3.dec.next.block:
|
---|
1715 |
|
---|
1716 | load_little_endian(local5, in5, out5, local3, .LLE9)
|
---|
1717 |
|
---|
1718 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1719 | ! parameter 7 1 for mov in1 to in3
|
---|
1720 | ! parameter 8 1 for mov in3 to in4
|
---|
1721 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
1722 |
|
---|
1723 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
|
---|
1724 |
|
---|
1725 | call .des_enc ! ks2 in3
|
---|
1726 | LDPTR KS1, in4
|
---|
1727 |
|
---|
1728 | call .des_dec ! ks1 in4
|
---|
1729 | nop
|
---|
1730 |
|
---|
1731 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
|
---|
1732 |
|
---|
1733 | ! in2 is bytes left to be stored
|
---|
1734 | ! in2 is compared to 8 in the rounds
|
---|
1735 |
|
---|
1736 | xor out5, in0, out4
|
---|
1737 | bl .ede3.dec.seven.or.less
|
---|
1738 | xor in5, in1, global4
|
---|
1739 |
|
---|
1740 | load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
|
---|
1741 |
|
---|
1742 | store_little_endian(local7, out4, global4, local3, .SLE7) ! block
|
---|
1743 |
|
---|
1744 | STPTR local5, INPUT
|
---|
1745 | addcc in2, -8, in2
|
---|
1746 | add local7, 8, local7
|
---|
1747 |
|
---|
1748 | bg .ede3.dec.next.block
|
---|
1749 | STPTR local7, OUTPUT
|
---|
1750 |
|
---|
1751 | .ede3.dec.store.iv:
|
---|
1752 |
|
---|
1753 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
---|
1754 | store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
|
---|
1755 |
|
---|
1756 | .ede3.dec.finish:
|
---|
1757 |
|
---|
1758 | ret
|
---|
1759 | restore
|
---|
1760 |
|
---|
1761 | .ede3.dec.seven.or.less:
|
---|
1762 |
|
---|
1763 | load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
|
---|
1764 |
|
---|
1765 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
|
---|
1766 |
|
---|
1767 |
|
---|
1768 | .DES_ede3_cbc_encrypt.end:
|
---|
1769 | .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
|
---|
1770 |
|
---|
1771 | .align 256
|
---|
1772 | .type .des_and,#object
|
---|
1773 | .size .des_and,284
|
---|
1774 |
|
---|
1775 | .des_and:
|
---|
1776 |
|
---|
1777 | ! This table is used for AND 0xFC when it is known that register
|
---|
1778 | ! bits 8-31 are zero. Makes it possible to do three arithmetic
|
---|
1779 | ! operations in one cycle.
|
---|
1780 |
|
---|
1781 | .byte 0, 0, 0, 0, 4, 4, 4, 4
|
---|
1782 | .byte 8, 8, 8, 8, 12, 12, 12, 12
|
---|
1783 | .byte 16, 16, 16, 16, 20, 20, 20, 20
|
---|
1784 | .byte 24, 24, 24, 24, 28, 28, 28, 28
|
---|
1785 | .byte 32, 32, 32, 32, 36, 36, 36, 36
|
---|
1786 | .byte 40, 40, 40, 40, 44, 44, 44, 44
|
---|
1787 | .byte 48, 48, 48, 48, 52, 52, 52, 52
|
---|
1788 | .byte 56, 56, 56, 56, 60, 60, 60, 60
|
---|
1789 | .byte 64, 64, 64, 64, 68, 68, 68, 68
|
---|
1790 | .byte 72, 72, 72, 72, 76, 76, 76, 76
|
---|
1791 | .byte 80, 80, 80, 80, 84, 84, 84, 84
|
---|
1792 | .byte 88, 88, 88, 88, 92, 92, 92, 92
|
---|
1793 | .byte 96, 96, 96, 96, 100, 100, 100, 100
|
---|
1794 | .byte 104, 104, 104, 104, 108, 108, 108, 108
|
---|
1795 | .byte 112, 112, 112, 112, 116, 116, 116, 116
|
---|
1796 | .byte 120, 120, 120, 120, 124, 124, 124, 124
|
---|
1797 | .byte 128, 128, 128, 128, 132, 132, 132, 132
|
---|
1798 | .byte 136, 136, 136, 136, 140, 140, 140, 140
|
---|
1799 | .byte 144, 144, 144, 144, 148, 148, 148, 148
|
---|
1800 | .byte 152, 152, 152, 152, 156, 156, 156, 156
|
---|
1801 | .byte 160, 160, 160, 160, 164, 164, 164, 164
|
---|
1802 | .byte 168, 168, 168, 168, 172, 172, 172, 172
|
---|
1803 | .byte 176, 176, 176, 176, 180, 180, 180, 180
|
---|
1804 | .byte 184, 184, 184, 184, 188, 188, 188, 188
|
---|
1805 | .byte 192, 192, 192, 192, 196, 196, 196, 196
|
---|
1806 | .byte 200, 200, 200, 200, 204, 204, 204, 204
|
---|
1807 | .byte 208, 208, 208, 208, 212, 212, 212, 212
|
---|
1808 | .byte 216, 216, 216, 216, 220, 220, 220, 220
|
---|
1809 | .byte 224, 224, 224, 224, 228, 228, 228, 228
|
---|
1810 | .byte 232, 232, 232, 232, 236, 236, 236, 236
|
---|
1811 | .byte 240, 240, 240, 240, 244, 244, 244, 244
|
---|
1812 | .byte 248, 248, 248, 248, 252, 252, 252, 252
|
---|
1813 |
|
---|
1814 | ! 5 numbers for initial/final permutation
|
---|
1815 |
|
---|
1816 | .word 0x0f0f0f0f ! offset 256
|
---|
1817 | .word 0x0000ffff ! 260
|
---|
1818 | .word 0x33333333 ! 264
|
---|
1819 | .word 0x00ff00ff ! 268
|
---|
1820 | .word 0x55555555 ! 272
|
---|
1821 |
|
---|
1822 | .word 0 ! 276
|
---|
1823 | .word LOOPS ! 280
|
---|
1824 | .word 0x0000FC00 ! 284
|
---|
1825 |
|
---|
1826 | .global DES_SPtrans
|
---|
1827 | .type DES_SPtrans,#object
|
---|
1828 | .size DES_SPtrans,2048
|
---|
1829 | .align 64
|
---|
1830 | DES_SPtrans:
|
---|
1831 | .PIC.DES_SPtrans:
|
---|
1832 | ! nibble 0
|
---|
1833 | .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
|
---|
1834 | .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
|
---|
1835 | .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
|
---|
1836 | .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
|
---|
1837 | .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
|
---|
1838 | .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
|
---|
1839 | .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
|
---|
1840 | .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
|
---|
1841 | .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
|
---|
1842 | .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
|
---|
1843 | .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
|
---|
1844 | .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
|
---|
1845 | .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
|
---|
1846 | .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
|
---|
1847 | .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
|
---|
1848 | .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
|
---|
1849 | ! nibble 1
|
---|
1850 | .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
|
---|
1851 | .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
|
---|
1852 | .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
|
---|
1853 | .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
|
---|
1854 | .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
|
---|
1855 | .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
|
---|
1856 | .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
|
---|
1857 | .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
|
---|
1858 | .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
|
---|
1859 | .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
|
---|
1860 | .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
|
---|
1861 | .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
|
---|
1862 | .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
|
---|
1863 | .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
|
---|
1864 | .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
|
---|
1865 | .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
|
---|
1866 | ! nibble 2
|
---|
1867 | .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
|
---|
1868 | .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
|
---|
1869 | .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
|
---|
1870 | .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
|
---|
1871 | .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
|
---|
1872 | .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
|
---|
1873 | .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
|
---|
1874 | .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
|
---|
1875 | .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
|
---|
1876 | .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
|
---|
1877 | .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
|
---|
1878 | .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
|
---|
1879 | .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
|
---|
1880 | .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
|
---|
1881 | .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
|
---|
1882 | .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
|
---|
1883 | ! nibble 3
|
---|
1884 | .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
|
---|
1885 | .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
|
---|
1886 | .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
|
---|
1887 | .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
|
---|
1888 | .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
|
---|
1889 | .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
|
---|
1890 | .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
|
---|
1891 | .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
|
---|
1892 | .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
|
---|
1893 | .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
|
---|
1894 | .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
|
---|
1895 | .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
|
---|
1896 | .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
|
---|
1897 | .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
|
---|
1898 | .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
|
---|
1899 | .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
|
---|
1900 | ! nibble 4
|
---|
1901 | .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
|
---|
1902 | .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
|
---|
1903 | .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
|
---|
1904 | .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
|
---|
1905 | .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
|
---|
1906 | .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
|
---|
1907 | .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
|
---|
1908 | .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
|
---|
1909 | .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
|
---|
1910 | .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
|
---|
1911 | .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
|
---|
1912 | .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
|
---|
1913 | .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
|
---|
1914 | .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
|
---|
1915 | .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
|
---|
1916 | .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
|
---|
1917 | ! nibble 5
|
---|
1918 | .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
|
---|
1919 | .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
|
---|
1920 | .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
|
---|
1921 | .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
|
---|
1922 | .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
|
---|
1923 | .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
|
---|
1924 | .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
|
---|
1925 | .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
|
---|
1926 | .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
|
---|
1927 | .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
|
---|
1928 | .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
|
---|
1929 | .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
|
---|
1930 | .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
|
---|
1931 | .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
|
---|
1932 | .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
|
---|
1933 | .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
|
---|
1934 | ! nibble 6
|
---|
1935 | .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
|
---|
1936 | .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
|
---|
1937 | .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
|
---|
1938 | .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
|
---|
1939 | .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
|
---|
1940 | .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
|
---|
1941 | .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
|
---|
1942 | .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
|
---|
1943 | .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
|
---|
1944 | .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
|
---|
1945 | .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
|
---|
1946 | .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
|
---|
1947 | .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
|
---|
1948 | .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
|
---|
1949 | .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
|
---|
1950 | .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
|
---|
1951 | ! nibble 7
|
---|
1952 | .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
|
---|
1953 | .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
|
---|
1954 | .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
|
---|
1955 | .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
|
---|
1956 | .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
|
---|
1957 | .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
|
---|
1958 | .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
|
---|
1959 | .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
|
---|
1960 | .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
|
---|
1961 | .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
|
---|
1962 | .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
|
---|
1963 | .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
|
---|
1964 | .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
|
---|
1965 | .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
|
---|
1966 | .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
|
---|
1967 | .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
|
---|
1968 |
|
---|