1 | ! Copyright 2000-2018 The OpenSSL Project Authors. All Rights Reserved.
|
---|
2 | !
|
---|
3 | ! Licensed under the Apache License 2.0 (the "License"). You may not use
|
---|
4 | ! this file except in compliance with the License. You can obtain a copy
|
---|
5 | ! in the file LICENSE in the source distribution or at
|
---|
6 | ! https://www.openssl.org/source/license.html
|
---|
7 | !
|
---|
8 | ! To expand the m4 macros: m4 -B 8192 des_enc.m4 > des_enc.S
|
---|
9 | !
|
---|
10 | ! Global registers 1 to 5 are used. This is the same as done by the
|
---|
11 | ! cc compiler. The UltraSPARC load/store little endian feature is used.
|
---|
12 | !
|
---|
13 | ! Instruction grouping often refers to one CPU cycle.
|
---|
14 | !
|
---|
15 | ! Assemble through gcc: gcc -c -mcpu=ultrasparc -o des_enc.o des_enc.S
|
---|
16 | !
|
---|
17 | ! Assemble through cc: cc -c -xarch=v8plusa -o des_enc.o des_enc.S
|
---|
18 | !
|
---|
19 | ! Performance improvement according to './apps/openssl speed des'
|
---|
20 | !
|
---|
21 | ! 32-bit build:
|
---|
22 | ! 23% faster than cc-5.2 -xarch=v8plus -xO5
|
---|
23 | ! 115% faster than gcc-3.2.1 -m32 -mcpu=ultrasparc -O5
|
---|
24 | ! 64-bit build:
|
---|
25 | ! 50% faster than cc-5.2 -xarch=v9 -xO5
|
---|
26 | ! 100% faster than gcc-3.2.1 -m64 -mcpu=ultrasparc -O5
|
---|
27 | !
|
---|
28 |
|
---|
29 | .ident "des_enc.m4 2.1"
|
---|
30 | .file "des_enc-sparc.S"
|
---|
31 |
|
---|
32 | #if defined(__SUNPRO_C) && defined(__sparcv9)
|
---|
33 | # define ABI64 /* They've said -xarch=v9 at command line */
|
---|
34 | #elif defined(__GNUC__) && defined(__arch64__)
|
---|
35 | # define ABI64 /* They've said -m64 at command line */
|
---|
36 | #endif
|
---|
37 |
|
---|
38 | #ifdef ABI64
|
---|
39 | .register %g2,#scratch
|
---|
40 | .register %g3,#scratch
|
---|
41 | # define FRAME -192
|
---|
42 | # define BIAS 2047
|
---|
43 | # define LDPTR ldx
|
---|
44 | # define STPTR stx
|
---|
45 | # define ARG0 128
|
---|
46 | # define ARGSZ 8
|
---|
47 | #else
|
---|
48 | # define FRAME -96
|
---|
49 | # define BIAS 0
|
---|
50 | # define LDPTR ld
|
---|
51 | # define STPTR st
|
---|
52 | # define ARG0 68
|
---|
53 | # define ARGSZ 4
|
---|
54 | #endif
|
---|
55 |
|
---|
56 | #define LOOPS 7
|
---|
57 |
|
---|
58 | #define global0 %g0
|
---|
59 | #define global1 %g1
|
---|
60 | #define global2 %g2
|
---|
61 | #define global3 %g3
|
---|
62 | #define global4 %g4
|
---|
63 | #define global5 %g5
|
---|
64 |
|
---|
65 | #define local0 %l0
|
---|
66 | #define local1 %l1
|
---|
67 | #define local2 %l2
|
---|
68 | #define local3 %l3
|
---|
69 | #define local4 %l4
|
---|
70 | #define local5 %l5
|
---|
71 | #define local7 %l6
|
---|
72 | #define local6 %l7
|
---|
73 |
|
---|
74 | #define in0 %i0
|
---|
75 | #define in1 %i1
|
---|
76 | #define in2 %i2
|
---|
77 | #define in3 %i3
|
---|
78 | #define in4 %i4
|
---|
79 | #define in5 %i5
|
---|
80 | #define in6 %i6
|
---|
81 | #define in7 %i7
|
---|
82 |
|
---|
83 | #define out0 %o0
|
---|
84 | #define out1 %o1
|
---|
85 | #define out2 %o2
|
---|
86 | #define out3 %o3
|
---|
87 | #define out4 %o4
|
---|
88 | #define out5 %o5
|
---|
89 | #define out6 %o6
|
---|
90 | #define out7 %o7
|
---|
91 |
|
---|
92 | #define stub stb
|
---|
93 |
|
---|
94 | changequote({,})
|
---|
95 |
|
---|
96 |
|
---|
97 | ! Macro definitions:
|
---|
98 |
|
---|
99 |
|
---|
100 | ! {ip_macro}
|
---|
101 | !
|
---|
102 | ! The logic used in initial and final permutations is the same as in
|
---|
103 | ! the C code. The permutations are done with a clever shift, xor, and
|
---|
104 | ! technique.
|
---|
105 | !
|
---|
106 | ! The macro also loads address sbox 1 to 5 to global 1 to 5, address
|
---|
107 | ! sbox 6 to local6, and address sbox 8 to out3.
|
---|
108 | !
|
---|
109 | ! Rotates the halves 3 left to bring the sbox bits in convenient positions.
|
---|
110 | !
|
---|
111 | ! Loads key first round from address in parameter 5 to out0, out1.
|
---|
112 | !
|
---|
113 | ! After the original LibDES initial permutation, the resulting left
|
---|
114 | ! is in the variable initially used for right and vice versa. The macro
|
---|
115 | ! implements the possibility to keep the halves in the original registers.
|
---|
116 | !
|
---|
117 | ! parameter 1 left
|
---|
118 | ! parameter 2 right
|
---|
119 | ! parameter 3 result left (modify in first round)
|
---|
120 | ! parameter 4 result right (use in first round)
|
---|
121 | ! parameter 5 key address
|
---|
122 | ! parameter 6 1/2 for include encryption/decryption
|
---|
123 | ! parameter 7 1 for move in1 to in3
|
---|
124 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
125 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
126 |
|
---|
127 | define(ip_macro, {
|
---|
128 |
|
---|
129 | ! {ip_macro}
|
---|
130 | ! $1 $2 $4 $3 $5 $6 $7 $8 $9
|
---|
131 |
|
---|
132 | ld [out2+256], local1
|
---|
133 | srl $2, 4, local4
|
---|
134 |
|
---|
135 | xor local4, $1, local4
|
---|
136 | ifelse($7,1,{mov in1, in3},{nop})
|
---|
137 |
|
---|
138 | ld [out2+260], local2
|
---|
139 | and local4, local1, local4
|
---|
140 | ifelse($8,1,{mov in3, in4},{})
|
---|
141 | ifelse($8,2,{mov in4, in3},{})
|
---|
142 |
|
---|
143 | ld [out2+280], out4 ! loop counter
|
---|
144 | sll local4, 4, local1
|
---|
145 | xor $1, local4, $1
|
---|
146 |
|
---|
147 | ld [out2+264], local3
|
---|
148 | srl $1, 16, local4
|
---|
149 | xor $2, local1, $2
|
---|
150 |
|
---|
151 | ifelse($9,1,{LDPTR KS3, in4},{})
|
---|
152 | xor local4, $2, local4
|
---|
153 | nop !sethi %hi(DES_SPtrans), global1 ! sbox addr
|
---|
154 |
|
---|
155 | ifelse($9,1,{LDPTR KS2, in3},{})
|
---|
156 | and local4, local2, local4
|
---|
157 | nop !or global1, %lo(DES_SPtrans), global1 ! sbox addr
|
---|
158 |
|
---|
159 | sll local4, 16, local1
|
---|
160 | xor $2, local4, $2
|
---|
161 |
|
---|
162 | srl $2, 2, local4
|
---|
163 | xor $1, local1, $1
|
---|
164 |
|
---|
165 | sethi %hi(16711680), local5
|
---|
166 | xor local4, $1, local4
|
---|
167 |
|
---|
168 | and local4, local3, local4
|
---|
169 | or local5, 255, local5
|
---|
170 |
|
---|
171 | sll local4, 2, local2
|
---|
172 | xor $1, local4, $1
|
---|
173 |
|
---|
174 | srl $1, 8, local4
|
---|
175 | xor $2, local2, $2
|
---|
176 |
|
---|
177 | xor local4, $2, local4
|
---|
178 | add global1, 768, global4
|
---|
179 |
|
---|
180 | and local4, local5, local4
|
---|
181 | add global1, 1024, global5
|
---|
182 |
|
---|
183 | ld [out2+272], local7
|
---|
184 | sll local4, 8, local1
|
---|
185 | xor $2, local4, $2
|
---|
186 |
|
---|
187 | srl $2, 1, local4
|
---|
188 | xor $1, local1, $1
|
---|
189 |
|
---|
190 | ld [$5], out0 ! key 7531
|
---|
191 | xor local4, $1, local4
|
---|
192 | add global1, 256, global2
|
---|
193 |
|
---|
194 | ld [$5+4], out1 ! key 8642
|
---|
195 | and local4, local7, local4
|
---|
196 | add global1, 512, global3
|
---|
197 |
|
---|
198 | sll local4, 1, local1
|
---|
199 | xor $1, local4, $1
|
---|
200 |
|
---|
201 | sll $1, 3, local3
|
---|
202 | xor $2, local1, $2
|
---|
203 |
|
---|
204 | sll $2, 3, local2
|
---|
205 | add global1, 1280, local6 ! address sbox 8
|
---|
206 |
|
---|
207 | srl $1, 29, local4
|
---|
208 | add global1, 1792, out3 ! address sbox 8
|
---|
209 |
|
---|
210 | srl $2, 29, local1
|
---|
211 | or local4, local3, $4
|
---|
212 |
|
---|
213 | or local2, local1, $3
|
---|
214 |
|
---|
215 | ifelse($6, 1, {
|
---|
216 |
|
---|
217 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds
|
---|
218 | or local2, local1, $3
|
---|
219 | xor $4, out0, local1
|
---|
220 |
|
---|
221 | call .des_enc.1
|
---|
222 | and local1, 252, local1
|
---|
223 |
|
---|
224 | },{})
|
---|
225 |
|
---|
226 | ifelse($6, 2, {
|
---|
227 |
|
---|
228 | ld [out2+284], local5 ! 0x0000FC00 used in the rounds
|
---|
229 | or local2, local1, $3
|
---|
230 | xor $4, out0, local1
|
---|
231 |
|
---|
232 | call .des_dec.1
|
---|
233 | and local1, 252, local1
|
---|
234 |
|
---|
235 | },{})
|
---|
236 | })
|
---|
237 |
|
---|
238 |
|
---|
239 | ! {rounds_macro}
|
---|
240 | !
|
---|
241 | ! The logic used in the DES rounds is the same as in the C code,
|
---|
242 | ! except that calculations for sbox 1 and sbox 5 begin before
|
---|
243 | ! the previous round is finished.
|
---|
244 | !
|
---|
245 | ! In each round one half (work) is modified based on key and the
|
---|
246 | ! other half (use).
|
---|
247 | !
|
---|
248 | ! In this version we do two rounds in a loop repeated 7 times
|
---|
249 | ! and two rounds separately.
|
---|
250 | !
|
---|
251 | ! One half has the bits for the sboxes in the following positions:
|
---|
252 | !
|
---|
253 | ! 777777xx555555xx333333xx111111xx
|
---|
254 | !
|
---|
255 | ! 88xx666666xx444444xx222222xx8888
|
---|
256 | !
|
---|
257 | ! The bits for each sbox are xor-ed with the key bits for that box.
|
---|
258 | ! The above xx bits are cleared, and the result used for lookup in
|
---|
259 | ! the sbox table. Each sbox entry contains the 4 output bits permuted
|
---|
260 | ! into 32 bits according to the P permutation.
|
---|
261 | !
|
---|
262 | ! In the description of DES, left and right are switched after
|
---|
263 | ! each round, except after last round. In this code the original
|
---|
264 | ! left and right are kept in the same register in all rounds, meaning
|
---|
265 | ! that after the 16 rounds the result for right is in the register
|
---|
266 | ! originally used for left.
|
---|
267 | !
|
---|
268 | ! parameter 1 first work (left in first round)
|
---|
269 | ! parameter 2 first use (right in first round)
|
---|
270 | ! parameter 3 enc/dec 1/-1
|
---|
271 | ! parameter 4 loop label
|
---|
272 | ! parameter 5 key address register
|
---|
273 | ! parameter 6 optional address for key next encryption/decryption
|
---|
274 | ! parameter 7 not empty for include retl
|
---|
275 | !
|
---|
276 | ! also compares in2 to 8
|
---|
277 |
|
---|
278 | define(rounds_macro, {
|
---|
279 |
|
---|
280 | ! {rounds_macro}
|
---|
281 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
282 |
|
---|
283 | xor $2, out0, local1
|
---|
284 |
|
---|
285 | ld [out2+284], local5 ! 0x0000FC00
|
---|
286 | ba $4
|
---|
287 | and local1, 252, local1
|
---|
288 |
|
---|
289 | .align 32
|
---|
290 |
|
---|
291 | $4:
|
---|
292 | ! local6 is address sbox 6
|
---|
293 | ! out3 is address sbox 8
|
---|
294 | ! out4 is loop counter
|
---|
295 |
|
---|
296 | ld [global1+local1], local1
|
---|
297 | xor $2, out1, out1 ! 8642
|
---|
298 | xor $2, out0, out0 ! 7531
|
---|
299 | ! fmovs %f0, %f0 ! fxor used for alignment
|
---|
300 |
|
---|
301 | srl out1, 4, local0 ! rotate 4 right
|
---|
302 | and out0, local5, local3 ! 3
|
---|
303 | ! fmovs %f0, %f0
|
---|
304 |
|
---|
305 | ld [$5+$3*8], local7 ! key 7531 next round
|
---|
306 | srl local3, 8, local3 ! 3
|
---|
307 | and local0, 252, local2 ! 2
|
---|
308 | ! fmovs %f0, %f0
|
---|
309 |
|
---|
310 | ld [global3+local3],local3 ! 3
|
---|
311 | sll out1, 28, out1 ! rotate
|
---|
312 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7
|
---|
313 |
|
---|
314 | ld [global2+local2], local2 ! 2
|
---|
315 | srl out0, 24, local1 ! 7
|
---|
316 | or out1, local0, out1 ! rotate
|
---|
317 |
|
---|
318 | ldub [out2+local1], local1 ! 7 (and 0xFC)
|
---|
319 | srl out1, 24, local0 ! 8
|
---|
320 | and out1, local5, local4 ! 4
|
---|
321 |
|
---|
322 | ldub [out2+local0], local0 ! 8 (and 0xFC)
|
---|
323 | srl local4, 8, local4 ! 4
|
---|
324 | xor $1, local2, $1 ! 2 finished local2 now sbox 6
|
---|
325 |
|
---|
326 | ld [global4+local4],local4 ! 4
|
---|
327 | srl out1, 16, local2 ! 6
|
---|
328 | xor $1, local3, $1 ! 3 finished local3 now sbox 5
|
---|
329 |
|
---|
330 | ld [out3+local0],local0 ! 8
|
---|
331 | and local2, 252, local2 ! 6
|
---|
332 | add global1, 1536, local5 ! address sbox 7
|
---|
333 |
|
---|
334 | ld [local6+local2], local2 ! 6
|
---|
335 | srl out0, 16, local3 ! 5
|
---|
336 | xor $1, local4, $1 ! 4 finished
|
---|
337 |
|
---|
338 | ld [local5+local1],local1 ! 7
|
---|
339 | and local3, 252, local3 ! 5
|
---|
340 | xor $1, local0, $1 ! 8 finished
|
---|
341 |
|
---|
342 | ld [global5+local3],local3 ! 5
|
---|
343 | xor $1, local2, $1 ! 6 finished
|
---|
344 | subcc out4, 1, out4
|
---|
345 |
|
---|
346 | ld [$5+$3*8+4], out0 ! key 8642 next round
|
---|
347 | xor $1, local7, local2 ! sbox 5 next round
|
---|
348 | xor $1, local1, $1 ! 7 finished
|
---|
349 |
|
---|
350 | srl local2, 16, local2 ! sbox 5 next round
|
---|
351 | xor $1, local3, $1 ! 5 finished
|
---|
352 |
|
---|
353 | ld [$5+$3*16+4], out1 ! key 8642 next round again
|
---|
354 | and local2, 252, local2 ! sbox5 next round
|
---|
355 | ! next round
|
---|
356 | xor $1, local7, local7 ! 7531
|
---|
357 |
|
---|
358 | ld [global5+local2], local2 ! 5
|
---|
359 | srl local7, 24, local3 ! 7
|
---|
360 | xor $1, out0, out0 ! 8642
|
---|
361 |
|
---|
362 | ldub [out2+local3], local3 ! 7 (and 0xFC)
|
---|
363 | srl out0, 4, local0 ! rotate 4 right
|
---|
364 | and local7, 252, local1 ! 1
|
---|
365 |
|
---|
366 | sll out0, 28, out0 ! rotate
|
---|
367 | xor $2, local2, $2 ! 5 finished local2 used
|
---|
368 |
|
---|
369 | srl local0, 8, local4 ! 4
|
---|
370 | and local0, 252, local2 ! 2
|
---|
371 | ld [local5+local3], local3 ! 7
|
---|
372 |
|
---|
373 | srl local0, 16, local5 ! 6
|
---|
374 | or out0, local0, out0 ! rotate
|
---|
375 | ld [global2+local2], local2 ! 2
|
---|
376 |
|
---|
377 | srl out0, 24, local0
|
---|
378 | ld [$5+$3*16], out0 ! key 7531 next round
|
---|
379 | and local4, 252, local4 ! 4
|
---|
380 |
|
---|
381 | and local5, 252, local5 ! 6
|
---|
382 | ld [global4+local4], local4 ! 4
|
---|
383 | xor $2, local3, $2 ! 7 finished local3 used
|
---|
384 |
|
---|
385 | and local0, 252, local0 ! 8
|
---|
386 | ld [local6+local5], local5 ! 6
|
---|
387 | xor $2, local2, $2 ! 2 finished local2 now sbox 3
|
---|
388 |
|
---|
389 | srl local7, 8, local2 ! 3 start
|
---|
390 | ld [out3+local0], local0 ! 8
|
---|
391 | xor $2, local4, $2 ! 4 finished
|
---|
392 |
|
---|
393 | and local2, 252, local2 ! 3
|
---|
394 | ld [global1+local1], local1 ! 1
|
---|
395 | xor $2, local5, $2 ! 6 finished local5 used
|
---|
396 |
|
---|
397 | ld [global3+local2], local2 ! 3
|
---|
398 | xor $2, local0, $2 ! 8 finished
|
---|
399 | add $5, $3*16, $5 ! enc add 8, dec add -8 to key pointer
|
---|
400 |
|
---|
401 | ld [out2+284], local5 ! 0x0000FC00
|
---|
402 | xor $2, out0, local4 ! sbox 1 next round
|
---|
403 | xor $2, local1, $2 ! 1 finished
|
---|
404 |
|
---|
405 | xor $2, local2, $2 ! 3 finished
|
---|
406 | bne $4
|
---|
407 | and local4, 252, local1 ! sbox 1 next round
|
---|
408 |
|
---|
409 | ! two rounds more:
|
---|
410 |
|
---|
411 | ld [global1+local1], local1
|
---|
412 | xor $2, out1, out1
|
---|
413 | xor $2, out0, out0
|
---|
414 |
|
---|
415 | srl out1, 4, local0 ! rotate
|
---|
416 | and out0, local5, local3
|
---|
417 |
|
---|
418 | ld [$5+$3*8], local7 ! key 7531
|
---|
419 | srl local3, 8, local3
|
---|
420 | and local0, 252, local2
|
---|
421 |
|
---|
422 | ld [global3+local3],local3
|
---|
423 | sll out1, 28, out1 ! rotate
|
---|
424 | xor $1, local1, $1 ! 1 finished, local1 now sbox 7
|
---|
425 |
|
---|
426 | ld [global2+local2], local2
|
---|
427 | srl out0, 24, local1
|
---|
428 | or out1, local0, out1 ! rotate
|
---|
429 |
|
---|
430 | ldub [out2+local1], local1
|
---|
431 | srl out1, 24, local0
|
---|
432 | and out1, local5, local4
|
---|
433 |
|
---|
434 | ldub [out2+local0], local0
|
---|
435 | srl local4, 8, local4
|
---|
436 | xor $1, local2, $1 ! 2 finished local2 now sbox 6
|
---|
437 |
|
---|
438 | ld [global4+local4],local4
|
---|
439 | srl out1, 16, local2
|
---|
440 | xor $1, local3, $1 ! 3 finished local3 now sbox 5
|
---|
441 |
|
---|
442 | ld [out3+local0],local0
|
---|
443 | and local2, 252, local2
|
---|
444 | add global1, 1536, local5 ! address sbox 7
|
---|
445 |
|
---|
446 | ld [local6+local2], local2
|
---|
447 | srl out0, 16, local3
|
---|
448 | xor $1, local4, $1 ! 4 finished
|
---|
449 |
|
---|
450 | ld [local5+local1],local1
|
---|
451 | and local3, 252, local3
|
---|
452 | xor $1, local0, $1
|
---|
453 |
|
---|
454 | ld [global5+local3],local3
|
---|
455 | xor $1, local2, $1 ! 6 finished
|
---|
456 | cmp in2, 8
|
---|
457 |
|
---|
458 | ifelse($6,{}, {}, {ld [out2+280], out4}) ! loop counter
|
---|
459 | xor $1, local7, local2 ! sbox 5 next round
|
---|
460 | xor $1, local1, $1 ! 7 finished
|
---|
461 |
|
---|
462 | ld [$5+$3*8+4], out0
|
---|
463 | srl local2, 16, local2 ! sbox 5 next round
|
---|
464 | xor $1, local3, $1 ! 5 finished
|
---|
465 |
|
---|
466 | and local2, 252, local2
|
---|
467 | ! next round (two rounds more)
|
---|
468 | xor $1, local7, local7 ! 7531
|
---|
469 |
|
---|
470 | ld [global5+local2], local2
|
---|
471 | srl local7, 24, local3
|
---|
472 | xor $1, out0, out0 ! 8642
|
---|
473 |
|
---|
474 | ldub [out2+local3], local3
|
---|
475 | srl out0, 4, local0 ! rotate
|
---|
476 | and local7, 252, local1
|
---|
477 |
|
---|
478 | sll out0, 28, out0 ! rotate
|
---|
479 | xor $2, local2, $2 ! 5 finished local2 used
|
---|
480 |
|
---|
481 | srl local0, 8, local4
|
---|
482 | and local0, 252, local2
|
---|
483 | ld [local5+local3], local3
|
---|
484 |
|
---|
485 | srl local0, 16, local5
|
---|
486 | or out0, local0, out0 ! rotate
|
---|
487 | ld [global2+local2], local2
|
---|
488 |
|
---|
489 | srl out0, 24, local0
|
---|
490 | ifelse($6,{}, {}, {ld [$6], out0}) ! key next encryption/decryption
|
---|
491 | and local4, 252, local4
|
---|
492 |
|
---|
493 | and local5, 252, local5
|
---|
494 | ld [global4+local4], local4
|
---|
495 | xor $2, local3, $2 ! 7 finished local3 used
|
---|
496 |
|
---|
497 | and local0, 252, local0
|
---|
498 | ld [local6+local5], local5
|
---|
499 | xor $2, local2, $2 ! 2 finished local2 now sbox 3
|
---|
500 |
|
---|
501 | srl local7, 8, local2 ! 3 start
|
---|
502 | ld [out3+local0], local0
|
---|
503 | xor $2, local4, $2
|
---|
504 |
|
---|
505 | and local2, 252, local2
|
---|
506 | ld [global1+local1], local1
|
---|
507 | xor $2, local5, $2 ! 6 finished local5 used
|
---|
508 |
|
---|
509 | ld [global3+local2], local2
|
---|
510 | srl $1, 3, local3
|
---|
511 | xor $2, local0, $2
|
---|
512 |
|
---|
513 | ifelse($6,{}, {}, {ld [$6+4], out1}) ! key next encryption/decryption
|
---|
514 | sll $1, 29, local4
|
---|
515 | xor $2, local1, $2
|
---|
516 |
|
---|
517 | ifelse($7,{}, {}, {retl})
|
---|
518 | xor $2, local2, $2
|
---|
519 | })
|
---|
520 |
|
---|
521 |
|
---|
522 | ! {fp_macro}
|
---|
523 | !
|
---|
524 | ! parameter 1 right (original left)
|
---|
525 | ! parameter 2 left (original right)
|
---|
526 | ! parameter 3 1 for optional store to [in0]
|
---|
527 | ! parameter 4 1 for load input/output address to local5/7
|
---|
528 | !
|
---|
529 | ! The final permutation logic switches the halves, meaning that
|
---|
530 | ! left and right ends up the registers originally used.
|
---|
531 |
|
---|
532 | define(fp_macro, {
|
---|
533 |
|
---|
534 | ! {fp_macro}
|
---|
535 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
536 |
|
---|
537 | ! initially undo the rotate 3 left done after initial permutation
|
---|
538 | ! original left is received shifted 3 right and 29 left in local3/4
|
---|
539 |
|
---|
540 | sll $2, 29, local1
|
---|
541 | or local3, local4, $1
|
---|
542 |
|
---|
543 | srl $2, 3, $2
|
---|
544 | sethi %hi(0x55555555), local2
|
---|
545 |
|
---|
546 | or $2, local1, $2
|
---|
547 | or local2, %lo(0x55555555), local2
|
---|
548 |
|
---|
549 | srl $2, 1, local3
|
---|
550 | sethi %hi(0x00ff00ff), local1
|
---|
551 | xor local3, $1, local3
|
---|
552 | or local1, %lo(0x00ff00ff), local1
|
---|
553 | and local3, local2, local3
|
---|
554 | sethi %hi(0x33333333), local4
|
---|
555 | sll local3, 1, local2
|
---|
556 |
|
---|
557 | xor $1, local3, $1
|
---|
558 |
|
---|
559 | srl $1, 8, local3
|
---|
560 | xor $2, local2, $2
|
---|
561 | xor local3, $2, local3
|
---|
562 | or local4, %lo(0x33333333), local4
|
---|
563 | and local3, local1, local3
|
---|
564 | sethi %hi(0x0000ffff), local1
|
---|
565 | sll local3, 8, local2
|
---|
566 |
|
---|
567 | xor $2, local3, $2
|
---|
568 |
|
---|
569 | srl $2, 2, local3
|
---|
570 | xor $1, local2, $1
|
---|
571 | xor local3, $1, local3
|
---|
572 | or local1, %lo(0x0000ffff), local1
|
---|
573 | and local3, local4, local3
|
---|
574 | sethi %hi(0x0f0f0f0f), local4
|
---|
575 | sll local3, 2, local2
|
---|
576 |
|
---|
577 | ifelse($4,1, {LDPTR INPUT, local5})
|
---|
578 | xor $1, local3, $1
|
---|
579 |
|
---|
580 | ifelse($4,1, {LDPTR OUTPUT, local7})
|
---|
581 | srl $1, 16, local3
|
---|
582 | xor $2, local2, $2
|
---|
583 | xor local3, $2, local3
|
---|
584 | or local4, %lo(0x0f0f0f0f), local4
|
---|
585 | and local3, local1, local3
|
---|
586 | sll local3, 16, local2
|
---|
587 |
|
---|
588 | xor $2, local3, local1
|
---|
589 |
|
---|
590 | srl local1, 4, local3
|
---|
591 | xor $1, local2, $1
|
---|
592 | xor local3, $1, local3
|
---|
593 | and local3, local4, local3
|
---|
594 | sll local3, 4, local2
|
---|
595 |
|
---|
596 | xor $1, local3, $1
|
---|
597 |
|
---|
598 | ! optional store:
|
---|
599 |
|
---|
600 | ifelse($3,1, {st $1, [in0]})
|
---|
601 |
|
---|
602 | xor local1, local2, $2
|
---|
603 |
|
---|
604 | ifelse($3,1, {st $2, [in0+4]})
|
---|
605 |
|
---|
606 | })
|
---|
607 |
|
---|
608 |
|
---|
609 | ! {fp_ip_macro}
|
---|
610 | !
|
---|
611 | ! Does initial permutation for next block mixed with
|
---|
612 | ! final permutation for current block.
|
---|
613 | !
|
---|
614 | ! parameter 1 original left
|
---|
615 | ! parameter 2 original right
|
---|
616 | ! parameter 3 left ip
|
---|
617 | ! parameter 4 right ip
|
---|
618 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
---|
619 | ! 2: mov in4 to in3
|
---|
620 | !
|
---|
621 | ! also adds -8 to length in2 and loads loop counter to out4
|
---|
622 |
|
---|
623 | define(fp_ip_macro, {
|
---|
624 |
|
---|
625 | ! {fp_ip_macro}
|
---|
626 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
627 |
|
---|
628 | define({temp1},{out4})
|
---|
629 | define({temp2},{local3})
|
---|
630 |
|
---|
631 | define({ip1},{local1})
|
---|
632 | define({ip2},{local2})
|
---|
633 | define({ip4},{local4})
|
---|
634 | define({ip5},{local5})
|
---|
635 |
|
---|
636 | ! $1 in local3, local4
|
---|
637 |
|
---|
638 | ld [out2+256], ip1
|
---|
639 | sll out5, 29, temp1
|
---|
640 | or local3, local4, $1
|
---|
641 |
|
---|
642 | srl out5, 3, $2
|
---|
643 | ifelse($5,2,{mov in4, in3})
|
---|
644 |
|
---|
645 | ld [out2+272], ip5
|
---|
646 | srl $4, 4, local0
|
---|
647 | or $2, temp1, $2
|
---|
648 |
|
---|
649 | srl $2, 1, temp1
|
---|
650 | xor temp1, $1, temp1
|
---|
651 |
|
---|
652 | and temp1, ip5, temp1
|
---|
653 | xor local0, $3, local0
|
---|
654 |
|
---|
655 | sll temp1, 1, temp2
|
---|
656 | xor $1, temp1, $1
|
---|
657 |
|
---|
658 | and local0, ip1, local0
|
---|
659 | add in2, -8, in2
|
---|
660 |
|
---|
661 | sll local0, 4, local7
|
---|
662 | xor $3, local0, $3
|
---|
663 |
|
---|
664 | ld [out2+268], ip4
|
---|
665 | srl $1, 8, temp1
|
---|
666 | xor $2, temp2, $2
|
---|
667 | ld [out2+260], ip2
|
---|
668 | srl $3, 16, local0
|
---|
669 | xor $4, local7, $4
|
---|
670 | xor temp1, $2, temp1
|
---|
671 | xor local0, $4, local0
|
---|
672 | and temp1, ip4, temp1
|
---|
673 | and local0, ip2, local0
|
---|
674 | sll temp1, 8, temp2
|
---|
675 | xor $2, temp1, $2
|
---|
676 | sll local0, 16, local7
|
---|
677 | xor $4, local0, $4
|
---|
678 |
|
---|
679 | srl $2, 2, temp1
|
---|
680 | xor $1, temp2, $1
|
---|
681 |
|
---|
682 | ld [out2+264], temp2 ! ip3
|
---|
683 | srl $4, 2, local0
|
---|
684 | xor $3, local7, $3
|
---|
685 | xor temp1, $1, temp1
|
---|
686 | xor local0, $3, local0
|
---|
687 | and temp1, temp2, temp1
|
---|
688 | and local0, temp2, local0
|
---|
689 | sll temp1, 2, temp2
|
---|
690 | xor $1, temp1, $1
|
---|
691 | sll local0, 2, local7
|
---|
692 | xor $3, local0, $3
|
---|
693 |
|
---|
694 | srl $1, 16, temp1
|
---|
695 | xor $2, temp2, $2
|
---|
696 | srl $3, 8, local0
|
---|
697 | xor $4, local7, $4
|
---|
698 | xor temp1, $2, temp1
|
---|
699 | xor local0, $4, local0
|
---|
700 | and temp1, ip2, temp1
|
---|
701 | and local0, ip4, local0
|
---|
702 | sll temp1, 16, temp2
|
---|
703 | xor $2, temp1, local4
|
---|
704 | sll local0, 8, local7
|
---|
705 | xor $4, local0, $4
|
---|
706 |
|
---|
707 | srl $4, 1, local0
|
---|
708 | xor $3, local7, $3
|
---|
709 |
|
---|
710 | srl local4, 4, temp1
|
---|
711 | xor local0, $3, local0
|
---|
712 |
|
---|
713 | xor $1, temp2, $1
|
---|
714 | and local0, ip5, local0
|
---|
715 |
|
---|
716 | sll local0, 1, local7
|
---|
717 | xor temp1, $1, temp1
|
---|
718 |
|
---|
719 | xor $3, local0, $3
|
---|
720 | xor $4, local7, $4
|
---|
721 |
|
---|
722 | sll $3, 3, local5
|
---|
723 | and temp1, ip1, temp1
|
---|
724 |
|
---|
725 | sll temp1, 4, temp2
|
---|
726 | xor $1, temp1, $1
|
---|
727 |
|
---|
728 | ifelse($5,1,{LDPTR KS2, in4})
|
---|
729 | sll $4, 3, local2
|
---|
730 | xor local4, temp2, $2
|
---|
731 |
|
---|
732 | ! reload since used as temporary:
|
---|
733 |
|
---|
734 | ld [out2+280], out4 ! loop counter
|
---|
735 |
|
---|
736 | srl $3, 29, local0
|
---|
737 | ifelse($5,1,{add in4, 120, in4})
|
---|
738 |
|
---|
739 | ifelse($5,1,{LDPTR KS1, in3})
|
---|
740 | srl $4, 29, local7
|
---|
741 |
|
---|
742 | or local0, local5, $4
|
---|
743 | or local2, local7, $3
|
---|
744 |
|
---|
745 | })
|
---|
746 |
|
---|
747 |
|
---|
748 |
|
---|
749 | ! {load_little_endian}
|
---|
750 | !
|
---|
751 | ! parameter 1 address
|
---|
752 | ! parameter 2 destination left
|
---|
753 | ! parameter 3 destination right
|
---|
754 | ! parameter 4 temporary
|
---|
755 | ! parameter 5 label
|
---|
756 |
|
---|
757 | define(load_little_endian, {
|
---|
758 |
|
---|
759 | ! {load_little_endian}
|
---|
760 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
761 |
|
---|
762 | ! first in memory to rightmost in register
|
---|
763 |
|
---|
764 | $5:
|
---|
765 | ldub [$1+3], $2
|
---|
766 |
|
---|
767 | ldub [$1+2], $4
|
---|
768 | sll $2, 8, $2
|
---|
769 | or $2, $4, $2
|
---|
770 |
|
---|
771 | ldub [$1+1], $4
|
---|
772 | sll $2, 8, $2
|
---|
773 | or $2, $4, $2
|
---|
774 |
|
---|
775 | ldub [$1+0], $4
|
---|
776 | sll $2, 8, $2
|
---|
777 | or $2, $4, $2
|
---|
778 |
|
---|
779 |
|
---|
780 | ldub [$1+3+4], $3
|
---|
781 |
|
---|
782 | ldub [$1+2+4], $4
|
---|
783 | sll $3, 8, $3
|
---|
784 | or $3, $4, $3
|
---|
785 |
|
---|
786 | ldub [$1+1+4], $4
|
---|
787 | sll $3, 8, $3
|
---|
788 | or $3, $4, $3
|
---|
789 |
|
---|
790 | ldub [$1+0+4], $4
|
---|
791 | sll $3, 8, $3
|
---|
792 | or $3, $4, $3
|
---|
793 | $5a:
|
---|
794 |
|
---|
795 | })
|
---|
796 |
|
---|
797 |
|
---|
798 | ! {load_little_endian_inc}
|
---|
799 | !
|
---|
800 | ! parameter 1 address
|
---|
801 | ! parameter 2 destination left
|
---|
802 | ! parameter 3 destination right
|
---|
803 | ! parameter 4 temporary
|
---|
804 | ! parameter 4 label
|
---|
805 | !
|
---|
806 | ! adds 8 to address
|
---|
807 |
|
---|
808 | define(load_little_endian_inc, {
|
---|
809 |
|
---|
810 | ! {load_little_endian_inc}
|
---|
811 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
812 |
|
---|
813 | ! first in memory to rightmost in register
|
---|
814 |
|
---|
815 | $5:
|
---|
816 | ldub [$1+3], $2
|
---|
817 |
|
---|
818 | ldub [$1+2], $4
|
---|
819 | sll $2, 8, $2
|
---|
820 | or $2, $4, $2
|
---|
821 |
|
---|
822 | ldub [$1+1], $4
|
---|
823 | sll $2, 8, $2
|
---|
824 | or $2, $4, $2
|
---|
825 |
|
---|
826 | ldub [$1+0], $4
|
---|
827 | sll $2, 8, $2
|
---|
828 | or $2, $4, $2
|
---|
829 |
|
---|
830 | ldub [$1+3+4], $3
|
---|
831 | add $1, 8, $1
|
---|
832 |
|
---|
833 | ldub [$1+2+4-8], $4
|
---|
834 | sll $3, 8, $3
|
---|
835 | or $3, $4, $3
|
---|
836 |
|
---|
837 | ldub [$1+1+4-8], $4
|
---|
838 | sll $3, 8, $3
|
---|
839 | or $3, $4, $3
|
---|
840 |
|
---|
841 | ldub [$1+0+4-8], $4
|
---|
842 | sll $3, 8, $3
|
---|
843 | or $3, $4, $3
|
---|
844 | $5a:
|
---|
845 |
|
---|
846 | })
|
---|
847 |
|
---|
848 |
|
---|
849 | ! {load_n_bytes}
|
---|
850 | !
|
---|
851 | ! Loads 1 to 7 bytes little endian
|
---|
852 | ! Remaining bytes are zeroed.
|
---|
853 | !
|
---|
854 | ! parameter 1 address
|
---|
855 | ! parameter 2 length
|
---|
856 | ! parameter 3 destination register left
|
---|
857 | ! parameter 4 destination register right
|
---|
858 | ! parameter 5 temp
|
---|
859 | ! parameter 6 temp2
|
---|
860 | ! parameter 7 label
|
---|
861 | ! parameter 8 return label
|
---|
862 |
|
---|
863 | define(load_n_bytes, {
|
---|
864 |
|
---|
865 | ! {load_n_bytes}
|
---|
866 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9
|
---|
867 |
|
---|
868 | $7.0: call .+8
|
---|
869 | sll $2, 2, $6
|
---|
870 |
|
---|
871 | add %o7,$7.jmp.table-$7.0,$5
|
---|
872 |
|
---|
873 | add $5, $6, $5
|
---|
874 | mov 0, $4
|
---|
875 |
|
---|
876 | ld [$5], $5
|
---|
877 |
|
---|
878 | jmp %o7+$5
|
---|
879 | mov 0, $3
|
---|
880 |
|
---|
881 | $7.7:
|
---|
882 | ldub [$1+6], $5
|
---|
883 | sll $5, 16, $5
|
---|
884 | or $3, $5, $3
|
---|
885 | $7.6:
|
---|
886 | ldub [$1+5], $5
|
---|
887 | sll $5, 8, $5
|
---|
888 | or $3, $5, $3
|
---|
889 | $7.5:
|
---|
890 | ldub [$1+4], $5
|
---|
891 | or $3, $5, $3
|
---|
892 | $7.4:
|
---|
893 | ldub [$1+3], $5
|
---|
894 | sll $5, 24, $5
|
---|
895 | or $4, $5, $4
|
---|
896 | $7.3:
|
---|
897 | ldub [$1+2], $5
|
---|
898 | sll $5, 16, $5
|
---|
899 | or $4, $5, $4
|
---|
900 | $7.2:
|
---|
901 | ldub [$1+1], $5
|
---|
902 | sll $5, 8, $5
|
---|
903 | or $4, $5, $4
|
---|
904 | $7.1:
|
---|
905 | ldub [$1+0], $5
|
---|
906 | ba $8
|
---|
907 | or $4, $5, $4
|
---|
908 |
|
---|
909 | .align 4
|
---|
910 |
|
---|
911 | $7.jmp.table:
|
---|
912 | .word 0
|
---|
913 | .word $7.1-$7.0
|
---|
914 | .word $7.2-$7.0
|
---|
915 | .word $7.3-$7.0
|
---|
916 | .word $7.4-$7.0
|
---|
917 | .word $7.5-$7.0
|
---|
918 | .word $7.6-$7.0
|
---|
919 | .word $7.7-$7.0
|
---|
920 | })
|
---|
921 |
|
---|
922 |
|
---|
923 | ! {store_little_endian}
|
---|
924 | !
|
---|
925 | ! parameter 1 address
|
---|
926 | ! parameter 2 source left
|
---|
927 | ! parameter 3 source right
|
---|
928 | ! parameter 4 temporary
|
---|
929 |
|
---|
930 | define(store_little_endian, {
|
---|
931 |
|
---|
932 | ! {store_little_endian}
|
---|
933 | ! $1 $2 $3 $4 $5 $6 $7 $8 $9
|
---|
934 |
|
---|
935 | ! rightmost in register to first in memory
|
---|
936 |
|
---|
937 | $5:
|
---|
938 | and $2, 255, $4
|
---|
939 | stub $4, [$1+0]
|
---|
940 |
|
---|
941 | srl $2, 8, $4
|
---|
942 | and $4, 255, $4
|
---|
943 | stub $4, [$1+1]
|
---|
944 |
|
---|
945 | srl $2, 16, $4
|
---|
946 | and $4, 255, $4
|
---|
947 | stub $4, [$1+2]
|
---|
948 |
|
---|
949 | srl $2, 24, $4
|
---|
950 | stub $4, [$1+3]
|
---|
951 |
|
---|
952 |
|
---|
953 | and $3, 255, $4
|
---|
954 | stub $4, [$1+0+4]
|
---|
955 |
|
---|
956 | srl $3, 8, $4
|
---|
957 | and $4, 255, $4
|
---|
958 | stub $4, [$1+1+4]
|
---|
959 |
|
---|
960 | srl $3, 16, $4
|
---|
961 | and $4, 255, $4
|
---|
962 | stub $4, [$1+2+4]
|
---|
963 |
|
---|
964 | srl $3, 24, $4
|
---|
965 | stub $4, [$1+3+4]
|
---|
966 |
|
---|
967 | $5a:
|
---|
968 |
|
---|
969 | })
|
---|
970 |
|
---|
971 |
|
---|
972 | ! {store_n_bytes}
|
---|
973 | !
|
---|
974 | ! Stores 1 to 7 bytes little endian
|
---|
975 | !
|
---|
976 | ! parameter 1 address
|
---|
977 | ! parameter 2 length
|
---|
978 | ! parameter 3 source register left
|
---|
979 | ! parameter 4 source register right
|
---|
980 | ! parameter 5 temp
|
---|
981 | ! parameter 6 temp2
|
---|
982 | ! parameter 7 label
|
---|
983 | ! parameter 8 return label
|
---|
984 |
|
---|
985 | define(store_n_bytes, {
|
---|
986 |
|
---|
987 | ! {store_n_bytes}
|
---|
988 | ! $1 $2 $5 $6 $7 $8 $7 $8 $9
|
---|
989 |
|
---|
990 | $7.0: call .+8
|
---|
991 | sll $2, 2, $6
|
---|
992 |
|
---|
993 | add %o7,$7.jmp.table-$7.0,$5
|
---|
994 |
|
---|
995 | add $5, $6, $5
|
---|
996 |
|
---|
997 | ld [$5], $5
|
---|
998 |
|
---|
999 | jmp %o7+$5
|
---|
1000 | nop
|
---|
1001 |
|
---|
1002 | $7.7:
|
---|
1003 | srl $3, 16, $5
|
---|
1004 | and $5, 0xff, $5
|
---|
1005 | stub $5, [$1+6]
|
---|
1006 | $7.6:
|
---|
1007 | srl $3, 8, $5
|
---|
1008 | and $5, 0xff, $5
|
---|
1009 | stub $5, [$1+5]
|
---|
1010 | $7.5:
|
---|
1011 | and $3, 0xff, $5
|
---|
1012 | stub $5, [$1+4]
|
---|
1013 | $7.4:
|
---|
1014 | srl $4, 24, $5
|
---|
1015 | stub $5, [$1+3]
|
---|
1016 | $7.3:
|
---|
1017 | srl $4, 16, $5
|
---|
1018 | and $5, 0xff, $5
|
---|
1019 | stub $5, [$1+2]
|
---|
1020 | $7.2:
|
---|
1021 | srl $4, 8, $5
|
---|
1022 | and $5, 0xff, $5
|
---|
1023 | stub $5, [$1+1]
|
---|
1024 | $7.1:
|
---|
1025 | and $4, 0xff, $5
|
---|
1026 |
|
---|
1027 |
|
---|
1028 | ba $8
|
---|
1029 | stub $5, [$1]
|
---|
1030 |
|
---|
1031 | .align 4
|
---|
1032 |
|
---|
1033 | $7.jmp.table:
|
---|
1034 |
|
---|
1035 | .word 0
|
---|
1036 | .word $7.1-$7.0
|
---|
1037 | .word $7.2-$7.0
|
---|
1038 | .word $7.3-$7.0
|
---|
1039 | .word $7.4-$7.0
|
---|
1040 | .word $7.5-$7.0
|
---|
1041 | .word $7.6-$7.0
|
---|
1042 | .word $7.7-$7.0
|
---|
1043 | })
|
---|
1044 |
|
---|
1045 |
|
---|
1046 | define(testvalue,{1})
|
---|
1047 |
|
---|
1048 | define(register_init, {
|
---|
1049 |
|
---|
1050 | ! For test purposes:
|
---|
1051 |
|
---|
1052 | sethi %hi(testvalue), local0
|
---|
1053 | or local0, %lo(testvalue), local0
|
---|
1054 |
|
---|
1055 | ifelse($1,{},{}, {mov local0, $1})
|
---|
1056 | ifelse($2,{},{}, {mov local0, $2})
|
---|
1057 | ifelse($3,{},{}, {mov local0, $3})
|
---|
1058 | ifelse($4,{},{}, {mov local0, $4})
|
---|
1059 | ifelse($5,{},{}, {mov local0, $5})
|
---|
1060 | ifelse($6,{},{}, {mov local0, $6})
|
---|
1061 | ifelse($7,{},{}, {mov local0, $7})
|
---|
1062 | ifelse($8,{},{}, {mov local0, $8})
|
---|
1063 |
|
---|
1064 | mov local0, local1
|
---|
1065 | mov local0, local2
|
---|
1066 | mov local0, local3
|
---|
1067 | mov local0, local4
|
---|
1068 | mov local0, local5
|
---|
1069 | mov local0, local7
|
---|
1070 | mov local0, local6
|
---|
1071 | mov local0, out0
|
---|
1072 | mov local0, out1
|
---|
1073 | mov local0, out2
|
---|
1074 | mov local0, out3
|
---|
1075 | mov local0, out4
|
---|
1076 | mov local0, out5
|
---|
1077 | mov local0, global1
|
---|
1078 | mov local0, global2
|
---|
1079 | mov local0, global3
|
---|
1080 | mov local0, global4
|
---|
1081 | mov local0, global5
|
---|
1082 |
|
---|
1083 | })
|
---|
1084 |
|
---|
1085 | .section ".text"
|
---|
1086 |
|
---|
1087 | .align 32
|
---|
1088 |
|
---|
1089 | .des_enc:
|
---|
1090 |
|
---|
1091 | ! key address in3
|
---|
1092 | ! loads key next encryption/decryption first round from [in4]
|
---|
1093 |
|
---|
1094 | rounds_macro(in5, out5, 1, .des_enc.1, in3, in4, retl)
|
---|
1095 |
|
---|
1096 |
|
---|
1097 | .align 32
|
---|
1098 |
|
---|
1099 | .des_dec:
|
---|
1100 |
|
---|
1101 | ! implemented with out5 as first parameter to avoid
|
---|
1102 | ! register exchange in ede modes
|
---|
1103 |
|
---|
1104 | ! key address in4
|
---|
1105 | ! loads key next encryption/decryption first round from [in3]
|
---|
1106 |
|
---|
1107 | rounds_macro(out5, in5, -1, .des_dec.1, in4, in3, retl)
|
---|
1108 |
|
---|
1109 |
|
---|
1110 |
|
---|
1111 | ! void DES_encrypt1(data, ks, enc)
|
---|
1112 | ! *******************************
|
---|
1113 |
|
---|
1114 | .align 32
|
---|
1115 | .global DES_encrypt1
|
---|
1116 | .type DES_encrypt1,#function
|
---|
1117 |
|
---|
1118 | DES_encrypt1:
|
---|
1119 |
|
---|
1120 | save %sp, FRAME, %sp
|
---|
1121 |
|
---|
1122 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1123 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1124 | 1: call .+8
|
---|
1125 | add %o7,global1,global1
|
---|
1126 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1127 |
|
---|
1128 | ld [in0], in5 ! left
|
---|
1129 | cmp in2, 0 ! enc
|
---|
1130 |
|
---|
1131 | be .encrypt.dec
|
---|
1132 | ld [in0+4], out5 ! right
|
---|
1133 |
|
---|
1134 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1135 | ! parameter 7 1 for move in1 to in3
|
---|
1136 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
1137 |
|
---|
1138 | ip_macro(in5, out5, in5, out5, in3, 0, 1, 1)
|
---|
1139 |
|
---|
1140 | rounds_macro(in5, out5, 1, .des_encrypt1.1, in3, in4) ! in4 not used
|
---|
1141 |
|
---|
1142 | fp_macro(in5, out5, 1) ! 1 for store to [in0]
|
---|
1143 |
|
---|
1144 | ret
|
---|
1145 | restore
|
---|
1146 |
|
---|
1147 | .encrypt.dec:
|
---|
1148 |
|
---|
1149 | add in1, 120, in3 ! use last subkey for first round
|
---|
1150 |
|
---|
1151 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1152 | ! parameter 7 1 for move in1 to in3
|
---|
1153 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
1154 |
|
---|
1155 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include dec, ks in4
|
---|
1156 |
|
---|
1157 | fp_macro(out5, in5, 1) ! 1 for store to [in0]
|
---|
1158 |
|
---|
1159 | ret
|
---|
1160 | restore
|
---|
1161 |
|
---|
1162 | .DES_encrypt1.end:
|
---|
1163 | .size DES_encrypt1,.DES_encrypt1.end-DES_encrypt1
|
---|
1164 |
|
---|
1165 |
|
---|
1166 | ! void DES_encrypt2(data, ks, enc)
|
---|
1167 | !*********************************
|
---|
1168 |
|
---|
1169 | ! encrypts/decrypts without initial/final permutation
|
---|
1170 |
|
---|
1171 | .align 32
|
---|
1172 | .global DES_encrypt2
|
---|
1173 | .type DES_encrypt2,#function
|
---|
1174 |
|
---|
1175 | DES_encrypt2:
|
---|
1176 |
|
---|
1177 | save %sp, FRAME, %sp
|
---|
1178 |
|
---|
1179 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1180 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1181 | 1: call .+8
|
---|
1182 | add %o7,global1,global1
|
---|
1183 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1184 |
|
---|
1185 | ! Set sbox address 1 to 6 and rotate halves 3 left
|
---|
1186 | ! Errors caught by destest? Yes. Still? *NO*
|
---|
1187 |
|
---|
1188 | !sethi %hi(DES_SPtrans), global1 ! address sbox 1
|
---|
1189 |
|
---|
1190 | !or global1, %lo(DES_SPtrans), global1 ! sbox 1
|
---|
1191 |
|
---|
1192 | add global1, 256, global2 ! sbox 2
|
---|
1193 | add global1, 512, global3 ! sbox 3
|
---|
1194 |
|
---|
1195 | ld [in0], out5 ! right
|
---|
1196 | add global1, 768, global4 ! sbox 4
|
---|
1197 | add global1, 1024, global5 ! sbox 5
|
---|
1198 |
|
---|
1199 | ld [in0+4], in5 ! left
|
---|
1200 | add global1, 1280, local6 ! sbox 6
|
---|
1201 | add global1, 1792, out3 ! sbox 8
|
---|
1202 |
|
---|
1203 | ! rotate
|
---|
1204 |
|
---|
1205 | sll in5, 3, local5
|
---|
1206 | mov in1, in3 ! key address to in3
|
---|
1207 |
|
---|
1208 | sll out5, 3, local7
|
---|
1209 | srl in5, 29, in5
|
---|
1210 |
|
---|
1211 | srl out5, 29, out5
|
---|
1212 | add in5, local5, in5
|
---|
1213 |
|
---|
1214 | add out5, local7, out5
|
---|
1215 | cmp in2, 0
|
---|
1216 |
|
---|
1217 | ! we use our own stackframe
|
---|
1218 |
|
---|
1219 | be .encrypt2.dec
|
---|
1220 | STPTR in0, [%sp+BIAS+ARG0+0*ARGSZ]
|
---|
1221 |
|
---|
1222 | ld [in3], out0 ! key 7531 first round
|
---|
1223 | mov LOOPS, out4 ! loop counter
|
---|
1224 |
|
---|
1225 | ld [in3+4], out1 ! key 8642 first round
|
---|
1226 | sethi %hi(0x0000FC00), local5
|
---|
1227 |
|
---|
1228 | call .des_enc
|
---|
1229 | mov in3, in4
|
---|
1230 |
|
---|
1231 | ! rotate
|
---|
1232 | sll in5, 29, in0
|
---|
1233 | srl in5, 3, in5
|
---|
1234 | sll out5, 29, in1
|
---|
1235 | add in5, in0, in5
|
---|
1236 | srl out5, 3, out5
|
---|
1237 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
|
---|
1238 | add out5, in1, out5
|
---|
1239 | st in5, [in0]
|
---|
1240 | st out5, [in0+4]
|
---|
1241 |
|
---|
1242 | ret
|
---|
1243 | restore
|
---|
1244 |
|
---|
1245 |
|
---|
1246 | .encrypt2.dec:
|
---|
1247 |
|
---|
1248 | add in3, 120, in4
|
---|
1249 |
|
---|
1250 | ld [in4], out0 ! key 7531 first round
|
---|
1251 | mov LOOPS, out4 ! loop counter
|
---|
1252 |
|
---|
1253 | ld [in4+4], out1 ! key 8642 first round
|
---|
1254 | sethi %hi(0x0000FC00), local5
|
---|
1255 |
|
---|
1256 | mov in5, local1 ! left expected in out5
|
---|
1257 | mov out5, in5
|
---|
1258 |
|
---|
1259 | call .des_dec
|
---|
1260 | mov local1, out5
|
---|
1261 |
|
---|
1262 | .encrypt2.finish:
|
---|
1263 |
|
---|
1264 | ! rotate
|
---|
1265 | sll in5, 29, in0
|
---|
1266 | srl in5, 3, in5
|
---|
1267 | sll out5, 29, in1
|
---|
1268 | add in5, in0, in5
|
---|
1269 | srl out5, 3, out5
|
---|
1270 | LDPTR [%sp+BIAS+ARG0+0*ARGSZ], in0
|
---|
1271 | add out5, in1, out5
|
---|
1272 | st out5, [in0]
|
---|
1273 | st in5, [in0+4]
|
---|
1274 |
|
---|
1275 | ret
|
---|
1276 | restore
|
---|
1277 |
|
---|
1278 | .DES_encrypt2.end:
|
---|
1279 | .size DES_encrypt2, .DES_encrypt2.end-DES_encrypt2
|
---|
1280 |
|
---|
1281 |
|
---|
1282 | ! void DES_encrypt3(data, ks1, ks2, ks3)
|
---|
1283 | ! **************************************
|
---|
1284 |
|
---|
1285 | .align 32
|
---|
1286 | .global DES_encrypt3
|
---|
1287 | .type DES_encrypt3,#function
|
---|
1288 |
|
---|
1289 | DES_encrypt3:
|
---|
1290 |
|
---|
1291 | save %sp, FRAME, %sp
|
---|
1292 |
|
---|
1293 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1294 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1295 | 1: call .+8
|
---|
1296 | add %o7,global1,global1
|
---|
1297 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1298 |
|
---|
1299 | ld [in0], in5 ! left
|
---|
1300 | add in2, 120, in4 ! ks2
|
---|
1301 |
|
---|
1302 | ld [in0+4], out5 ! right
|
---|
1303 | mov in3, in2 ! save ks3
|
---|
1304 |
|
---|
1305 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1306 | ! parameter 7 1 for mov in1 to in3
|
---|
1307 | ! parameter 8 1 for mov in3 to in4
|
---|
1308 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
1309 |
|
---|
1310 | ip_macro(in5, out5, in5, out5, in3, 1, 1, 0, 0)
|
---|
1311 |
|
---|
1312 | call .des_dec
|
---|
1313 | mov in2, in3 ! preload ks3
|
---|
1314 |
|
---|
1315 | call .des_enc
|
---|
1316 | nop
|
---|
1317 |
|
---|
1318 | fp_macro(in5, out5, 1)
|
---|
1319 |
|
---|
1320 | ret
|
---|
1321 | restore
|
---|
1322 |
|
---|
1323 | .DES_encrypt3.end:
|
---|
1324 | .size DES_encrypt3,.DES_encrypt3.end-DES_encrypt3
|
---|
1325 |
|
---|
1326 |
|
---|
1327 | ! void DES_decrypt3(data, ks1, ks2, ks3)
|
---|
1328 | ! **************************************
|
---|
1329 |
|
---|
1330 | .align 32
|
---|
1331 | .global DES_decrypt3
|
---|
1332 | .type DES_decrypt3,#function
|
---|
1333 |
|
---|
1334 | DES_decrypt3:
|
---|
1335 |
|
---|
1336 | save %sp, FRAME, %sp
|
---|
1337 |
|
---|
1338 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1339 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1340 | 1: call .+8
|
---|
1341 | add %o7,global1,global1
|
---|
1342 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1343 |
|
---|
1344 | ld [in0], in5 ! left
|
---|
1345 | add in3, 120, in4 ! ks3
|
---|
1346 |
|
---|
1347 | ld [in0+4], out5 ! right
|
---|
1348 | mov in2, in3 ! ks2
|
---|
1349 |
|
---|
1350 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1351 | ! parameter 7 1 for mov in1 to in3
|
---|
1352 | ! parameter 8 1 for mov in3 to in4
|
---|
1353 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
1354 |
|
---|
1355 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 0)
|
---|
1356 |
|
---|
1357 | call .des_enc
|
---|
1358 | add in1, 120, in4 ! preload ks1
|
---|
1359 |
|
---|
1360 | call .des_dec
|
---|
1361 | nop
|
---|
1362 |
|
---|
1363 | fp_macro(out5, in5, 1)
|
---|
1364 |
|
---|
1365 | ret
|
---|
1366 | restore
|
---|
1367 |
|
---|
1368 | .DES_decrypt3.end:
|
---|
1369 | .size DES_decrypt3,.DES_decrypt3.end-DES_decrypt3
|
---|
1370 |
|
---|
1371 | ! void DES_ncbc_encrypt(input, output, length, schedule, ivec, enc)
|
---|
1372 | ! *****************************************************************
|
---|
1373 |
|
---|
1374 |
|
---|
1375 | .align 32
|
---|
1376 | .global DES_ncbc_encrypt
|
---|
1377 | .type DES_ncbc_encrypt,#function
|
---|
1378 |
|
---|
1379 | DES_ncbc_encrypt:
|
---|
1380 |
|
---|
1381 | save %sp, FRAME, %sp
|
---|
1382 |
|
---|
1383 | define({INPUT}, { [%sp+BIAS+ARG0+0*ARGSZ] })
|
---|
1384 | define({OUTPUT}, { [%sp+BIAS+ARG0+1*ARGSZ] })
|
---|
1385 | define({IVEC}, { [%sp+BIAS+ARG0+4*ARGSZ] })
|
---|
1386 |
|
---|
1387 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1388 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1389 | 1: call .+8
|
---|
1390 | add %o7,global1,global1
|
---|
1391 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1392 |
|
---|
1393 | cmp in5, 0 ! enc
|
---|
1394 |
|
---|
1395 | be .ncbc.dec
|
---|
1396 | STPTR in4, IVEC
|
---|
1397 |
|
---|
1398 | ! addr left right temp label
|
---|
1399 | load_little_endian(in4, in5, out5, local3, .LLE1) ! iv
|
---|
1400 |
|
---|
1401 | addcc in2, -8, in2 ! bytes missing when first block done
|
---|
1402 |
|
---|
1403 | bl .ncbc.enc.seven.or.less
|
---|
1404 | mov in3, in4 ! schedule
|
---|
1405 |
|
---|
1406 | .ncbc.enc.next.block:
|
---|
1407 |
|
---|
1408 | load_little_endian(in0, out4, global4, local3, .LLE2) ! block
|
---|
1409 |
|
---|
1410 | .ncbc.enc.next.block_1:
|
---|
1411 |
|
---|
1412 | xor in5, out4, in5 ! iv xor
|
---|
1413 | xor out5, global4, out5 ! iv xor
|
---|
1414 |
|
---|
1415 | ! parameter 8 1 for move in3 to in4, 2 for move in4 to in3
|
---|
1416 | ip_macro(in5, out5, in5, out5, in3, 0, 0, 2)
|
---|
1417 |
|
---|
1418 | .ncbc.enc.next.block_2:
|
---|
1419 |
|
---|
1420 | !// call .des_enc ! compares in2 to 8
|
---|
1421 | ! rounds inlined for alignment purposes
|
---|
1422 |
|
---|
1423 | add global1, 768, global4 ! address sbox 4 since register used below
|
---|
1424 |
|
---|
1425 | rounds_macro(in5, out5, 1, .ncbc.enc.1, in3, in4) ! include encryption ks in3
|
---|
1426 |
|
---|
1427 | bl .ncbc.enc.next.block_fp
|
---|
1428 | add in0, 8, in0 ! input address
|
---|
1429 |
|
---|
1430 | ! If 8 or more bytes are to be encrypted after this block,
|
---|
1431 | ! we combine final permutation for this block with initial
|
---|
1432 | ! permutation for next block. Load next block:
|
---|
1433 |
|
---|
1434 | load_little_endian(in0, global3, global4, local5, .LLE12)
|
---|
1435 |
|
---|
1436 | ! parameter 1 original left
|
---|
1437 | ! parameter 2 original right
|
---|
1438 | ! parameter 3 left ip
|
---|
1439 | ! parameter 4 right ip
|
---|
1440 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
---|
1441 | ! 2: mov in4 to in3
|
---|
1442 | !
|
---|
1443 | ! also adds -8 to length in2 and loads loop counter to out4
|
---|
1444 |
|
---|
1445 | fp_ip_macro(out0, out1, global3, global4, 2)
|
---|
1446 |
|
---|
1447 | store_little_endian(in1, out0, out1, local3, .SLE10) ! block
|
---|
1448 |
|
---|
1449 | ld [in3], out0 ! key 7531 first round next block
|
---|
1450 | mov in5, local1
|
---|
1451 | xor global3, out5, in5 ! iv xor next block
|
---|
1452 |
|
---|
1453 | ld [in3+4], out1 ! key 8642
|
---|
1454 | add global1, 512, global3 ! address sbox 3 since register used
|
---|
1455 | xor global4, local1, out5 ! iv xor next block
|
---|
1456 |
|
---|
1457 | ba .ncbc.enc.next.block_2
|
---|
1458 | add in1, 8, in1 ! output address
|
---|
1459 |
|
---|
1460 | .ncbc.enc.next.block_fp:
|
---|
1461 |
|
---|
1462 | fp_macro(in5, out5)
|
---|
1463 |
|
---|
1464 | store_little_endian(in1, in5, out5, local3, .SLE1) ! block
|
---|
1465 |
|
---|
1466 | addcc in2, -8, in2 ! bytes missing when next block done
|
---|
1467 |
|
---|
1468 | bpos .ncbc.enc.next.block
|
---|
1469 | add in1, 8, in1
|
---|
1470 |
|
---|
1471 | .ncbc.enc.seven.or.less:
|
---|
1472 |
|
---|
1473 | cmp in2, -8
|
---|
1474 |
|
---|
1475 | ble .ncbc.enc.finish
|
---|
1476 | nop
|
---|
1477 |
|
---|
1478 | add in2, 8, local1 ! bytes to load
|
---|
1479 |
|
---|
1480 | ! addr, length, dest left, dest right, temp, temp2, label, ret label
|
---|
1481 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB1, .ncbc.enc.next.block_1)
|
---|
1482 |
|
---|
1483 | ! Loads 1 to 7 bytes little endian to global4, out4
|
---|
1484 |
|
---|
1485 |
|
---|
1486 | .ncbc.enc.finish:
|
---|
1487 |
|
---|
1488 | LDPTR IVEC, local4
|
---|
1489 | store_little_endian(local4, in5, out5, local5, .SLE2) ! ivec
|
---|
1490 |
|
---|
1491 | ret
|
---|
1492 | restore
|
---|
1493 |
|
---|
1494 |
|
---|
1495 | .ncbc.dec:
|
---|
1496 |
|
---|
1497 | STPTR in0, INPUT
|
---|
1498 | cmp in2, 0 ! length
|
---|
1499 | add in3, 120, in3
|
---|
1500 |
|
---|
1501 | LDPTR IVEC, local7 ! ivec
|
---|
1502 | ble .ncbc.dec.finish
|
---|
1503 | mov in3, in4 ! schedule
|
---|
1504 |
|
---|
1505 | STPTR in1, OUTPUT
|
---|
1506 | mov in0, local5 ! input
|
---|
1507 |
|
---|
1508 | load_little_endian(local7, in0, in1, local3, .LLE3) ! ivec
|
---|
1509 |
|
---|
1510 | .ncbc.dec.next.block:
|
---|
1511 |
|
---|
1512 | load_little_endian(local5, in5, out5, local3, .LLE4) ! block
|
---|
1513 |
|
---|
1514 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1515 | ! parameter 7 1 for mov in1 to in3
|
---|
1516 | ! parameter 8 1 for mov in3 to in4
|
---|
1517 |
|
---|
1518 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 1) ! include decryption ks in4
|
---|
1519 |
|
---|
1520 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address to local5/7
|
---|
1521 |
|
---|
1522 | ! in2 is bytes left to be stored
|
---|
1523 | ! in2 is compared to 8 in the rounds
|
---|
1524 |
|
---|
1525 | xor out5, in0, out4 ! iv xor
|
---|
1526 | bl .ncbc.dec.seven.or.less
|
---|
1527 | xor in5, in1, global4 ! iv xor
|
---|
1528 |
|
---|
1529 | ! Load ivec next block now, since input and output address might be the same.
|
---|
1530 |
|
---|
1531 | load_little_endian_inc(local5, in0, in1, local3, .LLE5) ! iv
|
---|
1532 |
|
---|
1533 | store_little_endian(local7, out4, global4, local3, .SLE3)
|
---|
1534 |
|
---|
1535 | STPTR local5, INPUT
|
---|
1536 | add local7, 8, local7
|
---|
1537 | addcc in2, -8, in2
|
---|
1538 |
|
---|
1539 | bg .ncbc.dec.next.block
|
---|
1540 | STPTR local7, OUTPUT
|
---|
1541 |
|
---|
1542 |
|
---|
1543 | .ncbc.dec.store.iv:
|
---|
1544 |
|
---|
1545 | LDPTR IVEC, local4 ! ivec
|
---|
1546 | store_little_endian(local4, in0, in1, local5, .SLE4)
|
---|
1547 |
|
---|
1548 | .ncbc.dec.finish:
|
---|
1549 |
|
---|
1550 | ret
|
---|
1551 | restore
|
---|
1552 |
|
---|
1553 | .ncbc.dec.seven.or.less:
|
---|
1554 |
|
---|
1555 | load_little_endian_inc(local5, in0, in1, local3, .LLE13) ! ivec
|
---|
1556 |
|
---|
1557 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB1, .ncbc.dec.store.iv)
|
---|
1558 |
|
---|
1559 |
|
---|
1560 | .DES_ncbc_encrypt.end:
|
---|
1561 | .size DES_ncbc_encrypt, .DES_ncbc_encrypt.end-DES_ncbc_encrypt
|
---|
1562 |
|
---|
1563 |
|
---|
1564 | ! void DES_ede3_cbc_encrypt(input, output, length, ks1, ks2, ks3, ivec, enc)
|
---|
1565 | ! **************************************************************************
|
---|
1566 |
|
---|
1567 |
|
---|
1568 | .align 32
|
---|
1569 | .global DES_ede3_cbc_encrypt
|
---|
1570 | .type DES_ede3_cbc_encrypt,#function
|
---|
1571 |
|
---|
1572 | DES_ede3_cbc_encrypt:
|
---|
1573 |
|
---|
1574 | save %sp, FRAME, %sp
|
---|
1575 |
|
---|
1576 | define({KS1}, { [%sp+BIAS+ARG0+3*ARGSZ] })
|
---|
1577 | define({KS2}, { [%sp+BIAS+ARG0+4*ARGSZ] })
|
---|
1578 | define({KS3}, { [%sp+BIAS+ARG0+5*ARGSZ] })
|
---|
1579 |
|
---|
1580 | sethi %hi(.PIC.DES_SPtrans-1f),global1
|
---|
1581 | or global1,%lo(.PIC.DES_SPtrans-1f),global1
|
---|
1582 | 1: call .+8
|
---|
1583 | add %o7,global1,global1
|
---|
1584 | sub global1,.PIC.DES_SPtrans-.des_and,out2
|
---|
1585 |
|
---|
1586 | LDPTR [%fp+BIAS+ARG0+7*ARGSZ], local3 ! enc
|
---|
1587 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
---|
1588 | cmp local3, 0 ! enc
|
---|
1589 |
|
---|
1590 | be .ede3.dec
|
---|
1591 | STPTR in4, KS2
|
---|
1592 |
|
---|
1593 | STPTR in5, KS3
|
---|
1594 |
|
---|
1595 | load_little_endian(local4, in5, out5, local3, .LLE6) ! ivec
|
---|
1596 |
|
---|
1597 | addcc in2, -8, in2 ! bytes missing after next block
|
---|
1598 |
|
---|
1599 | bl .ede3.enc.seven.or.less
|
---|
1600 | STPTR in3, KS1
|
---|
1601 |
|
---|
1602 | .ede3.enc.next.block:
|
---|
1603 |
|
---|
1604 | load_little_endian(in0, out4, global4, local3, .LLE7)
|
---|
1605 |
|
---|
1606 | .ede3.enc.next.block_1:
|
---|
1607 |
|
---|
1608 | LDPTR KS2, in4
|
---|
1609 | xor in5, out4, in5 ! iv xor
|
---|
1610 | xor out5, global4, out5 ! iv xor
|
---|
1611 |
|
---|
1612 | LDPTR KS1, in3
|
---|
1613 | add in4, 120, in4 ! for decryption we use last subkey first
|
---|
1614 | nop
|
---|
1615 |
|
---|
1616 | ip_macro(in5, out5, in5, out5, in3)
|
---|
1617 |
|
---|
1618 | .ede3.enc.next.block_2:
|
---|
1619 |
|
---|
1620 | call .des_enc ! ks1 in3
|
---|
1621 | nop
|
---|
1622 |
|
---|
1623 | call .des_dec ! ks2 in4
|
---|
1624 | LDPTR KS3, in3
|
---|
1625 |
|
---|
1626 | call .des_enc ! ks3 in3 compares in2 to 8
|
---|
1627 | nop
|
---|
1628 |
|
---|
1629 | bl .ede3.enc.next.block_fp
|
---|
1630 | add in0, 8, in0
|
---|
1631 |
|
---|
1632 | ! If 8 or more bytes are to be encrypted after this block,
|
---|
1633 | ! we combine final permutation for this block with initial
|
---|
1634 | ! permutation for next block. Load next block:
|
---|
1635 |
|
---|
1636 | load_little_endian(in0, global3, global4, local5, .LLE11)
|
---|
1637 |
|
---|
1638 | ! parameter 1 original left
|
---|
1639 | ! parameter 2 original right
|
---|
1640 | ! parameter 3 left ip
|
---|
1641 | ! parameter 4 right ip
|
---|
1642 | ! parameter 5 1: load ks1/ks2 to in3/in4, add 120 to in4
|
---|
1643 | ! 2: mov in4 to in3
|
---|
1644 | !
|
---|
1645 | ! also adds -8 to length in2 and loads loop counter to out4
|
---|
1646 |
|
---|
1647 | fp_ip_macro(out0, out1, global3, global4, 1)
|
---|
1648 |
|
---|
1649 | store_little_endian(in1, out0, out1, local3, .SLE9) ! block
|
---|
1650 |
|
---|
1651 | mov in5, local1
|
---|
1652 | xor global3, out5, in5 ! iv xor next block
|
---|
1653 |
|
---|
1654 | ld [in3], out0 ! key 7531
|
---|
1655 | add global1, 512, global3 ! address sbox 3
|
---|
1656 | xor global4, local1, out5 ! iv xor next block
|
---|
1657 |
|
---|
1658 | ld [in3+4], out1 ! key 8642
|
---|
1659 | add global1, 768, global4 ! address sbox 4
|
---|
1660 | ba .ede3.enc.next.block_2
|
---|
1661 | add in1, 8, in1
|
---|
1662 |
|
---|
1663 | .ede3.enc.next.block_fp:
|
---|
1664 |
|
---|
1665 | fp_macro(in5, out5)
|
---|
1666 |
|
---|
1667 | store_little_endian(in1, in5, out5, local3, .SLE5) ! block
|
---|
1668 |
|
---|
1669 | addcc in2, -8, in2 ! bytes missing when next block done
|
---|
1670 |
|
---|
1671 | bpos .ede3.enc.next.block
|
---|
1672 | add in1, 8, in1
|
---|
1673 |
|
---|
1674 | .ede3.enc.seven.or.less:
|
---|
1675 |
|
---|
1676 | cmp in2, -8
|
---|
1677 |
|
---|
1678 | ble .ede3.enc.finish
|
---|
1679 | nop
|
---|
1680 |
|
---|
1681 | add in2, 8, local1 ! bytes to load
|
---|
1682 |
|
---|
1683 | ! addr, length, dest left, dest right, temp, temp2, label, ret label
|
---|
1684 | load_n_bytes(in0, local1, global4, out4, local2, local3, .LNB2, .ede3.enc.next.block_1)
|
---|
1685 |
|
---|
1686 | .ede3.enc.finish:
|
---|
1687 |
|
---|
1688 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
---|
1689 | store_little_endian(local4, in5, out5, local5, .SLE6) ! ivec
|
---|
1690 |
|
---|
1691 | ret
|
---|
1692 | restore
|
---|
1693 |
|
---|
1694 | .ede3.dec:
|
---|
1695 |
|
---|
1696 | STPTR in0, INPUT
|
---|
1697 | add in5, 120, in5
|
---|
1698 |
|
---|
1699 | STPTR in1, OUTPUT
|
---|
1700 | mov in0, local5
|
---|
1701 | add in3, 120, in3
|
---|
1702 |
|
---|
1703 | STPTR in3, KS1
|
---|
1704 | cmp in2, 0
|
---|
1705 |
|
---|
1706 | ble .ede3.dec.finish
|
---|
1707 | STPTR in5, KS3
|
---|
1708 |
|
---|
1709 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local7 ! iv
|
---|
1710 | load_little_endian(local7, in0, in1, local3, .LLE8)
|
---|
1711 |
|
---|
1712 | .ede3.dec.next.block:
|
---|
1713 |
|
---|
1714 | load_little_endian(local5, in5, out5, local3, .LLE9)
|
---|
1715 |
|
---|
1716 | ! parameter 6 1/2 for include encryption/decryption
|
---|
1717 | ! parameter 7 1 for mov in1 to in3
|
---|
1718 | ! parameter 8 1 for mov in3 to in4
|
---|
1719 | ! parameter 9 1 for load ks3 and ks2 to in4 and in3
|
---|
1720 |
|
---|
1721 | ip_macro(in5, out5, out5, in5, in4, 2, 0, 0, 1) ! inc .des_dec ks3 in4
|
---|
1722 |
|
---|
1723 | call .des_enc ! ks2 in3
|
---|
1724 | LDPTR KS1, in4
|
---|
1725 |
|
---|
1726 | call .des_dec ! ks1 in4
|
---|
1727 | nop
|
---|
1728 |
|
---|
1729 | fp_macro(out5, in5, 0, 1) ! 1 for input and output address local5/7
|
---|
1730 |
|
---|
1731 | ! in2 is bytes left to be stored
|
---|
1732 | ! in2 is compared to 8 in the rounds
|
---|
1733 |
|
---|
1734 | xor out5, in0, out4
|
---|
1735 | bl .ede3.dec.seven.or.less
|
---|
1736 | xor in5, in1, global4
|
---|
1737 |
|
---|
1738 | load_little_endian_inc(local5, in0, in1, local3, .LLE10) ! iv next block
|
---|
1739 |
|
---|
1740 | store_little_endian(local7, out4, global4, local3, .SLE7) ! block
|
---|
1741 |
|
---|
1742 | STPTR local5, INPUT
|
---|
1743 | addcc in2, -8, in2
|
---|
1744 | add local7, 8, local7
|
---|
1745 |
|
---|
1746 | bg .ede3.dec.next.block
|
---|
1747 | STPTR local7, OUTPUT
|
---|
1748 |
|
---|
1749 | .ede3.dec.store.iv:
|
---|
1750 |
|
---|
1751 | LDPTR [%fp+BIAS+ARG0+6*ARGSZ], local4 ! ivec
|
---|
1752 | store_little_endian(local4, in0, in1, local5, .SLE8) ! ivec
|
---|
1753 |
|
---|
1754 | .ede3.dec.finish:
|
---|
1755 |
|
---|
1756 | ret
|
---|
1757 | restore
|
---|
1758 |
|
---|
1759 | .ede3.dec.seven.or.less:
|
---|
1760 |
|
---|
1761 | load_little_endian_inc(local5, in0, in1, local3, .LLE14) ! iv
|
---|
1762 |
|
---|
1763 | store_n_bytes(local7, in2, global4, out4, local3, local4, .SNB2, .ede3.dec.store.iv)
|
---|
1764 |
|
---|
1765 |
|
---|
1766 | .DES_ede3_cbc_encrypt.end:
|
---|
1767 | .size DES_ede3_cbc_encrypt,.DES_ede3_cbc_encrypt.end-DES_ede3_cbc_encrypt
|
---|
1768 |
|
---|
1769 | .align 256
|
---|
1770 | .type .des_and,#object
|
---|
1771 | .size .des_and,284
|
---|
1772 |
|
---|
1773 | .des_and:
|
---|
1774 |
|
---|
1775 | ! This table is used for AND 0xFC when it is known that register
|
---|
1776 | ! bits 8-31 are zero. Makes it possible to do three arithmetic
|
---|
1777 | ! operations in one cycle.
|
---|
1778 |
|
---|
1779 | .byte 0, 0, 0, 0, 4, 4, 4, 4
|
---|
1780 | .byte 8, 8, 8, 8, 12, 12, 12, 12
|
---|
1781 | .byte 16, 16, 16, 16, 20, 20, 20, 20
|
---|
1782 | .byte 24, 24, 24, 24, 28, 28, 28, 28
|
---|
1783 | .byte 32, 32, 32, 32, 36, 36, 36, 36
|
---|
1784 | .byte 40, 40, 40, 40, 44, 44, 44, 44
|
---|
1785 | .byte 48, 48, 48, 48, 52, 52, 52, 52
|
---|
1786 | .byte 56, 56, 56, 56, 60, 60, 60, 60
|
---|
1787 | .byte 64, 64, 64, 64, 68, 68, 68, 68
|
---|
1788 | .byte 72, 72, 72, 72, 76, 76, 76, 76
|
---|
1789 | .byte 80, 80, 80, 80, 84, 84, 84, 84
|
---|
1790 | .byte 88, 88, 88, 88, 92, 92, 92, 92
|
---|
1791 | .byte 96, 96, 96, 96, 100, 100, 100, 100
|
---|
1792 | .byte 104, 104, 104, 104, 108, 108, 108, 108
|
---|
1793 | .byte 112, 112, 112, 112, 116, 116, 116, 116
|
---|
1794 | .byte 120, 120, 120, 120, 124, 124, 124, 124
|
---|
1795 | .byte 128, 128, 128, 128, 132, 132, 132, 132
|
---|
1796 | .byte 136, 136, 136, 136, 140, 140, 140, 140
|
---|
1797 | .byte 144, 144, 144, 144, 148, 148, 148, 148
|
---|
1798 | .byte 152, 152, 152, 152, 156, 156, 156, 156
|
---|
1799 | .byte 160, 160, 160, 160, 164, 164, 164, 164
|
---|
1800 | .byte 168, 168, 168, 168, 172, 172, 172, 172
|
---|
1801 | .byte 176, 176, 176, 176, 180, 180, 180, 180
|
---|
1802 | .byte 184, 184, 184, 184, 188, 188, 188, 188
|
---|
1803 | .byte 192, 192, 192, 192, 196, 196, 196, 196
|
---|
1804 | .byte 200, 200, 200, 200, 204, 204, 204, 204
|
---|
1805 | .byte 208, 208, 208, 208, 212, 212, 212, 212
|
---|
1806 | .byte 216, 216, 216, 216, 220, 220, 220, 220
|
---|
1807 | .byte 224, 224, 224, 224, 228, 228, 228, 228
|
---|
1808 | .byte 232, 232, 232, 232, 236, 236, 236, 236
|
---|
1809 | .byte 240, 240, 240, 240, 244, 244, 244, 244
|
---|
1810 | .byte 248, 248, 248, 248, 252, 252, 252, 252
|
---|
1811 |
|
---|
1812 | ! 5 numbers for initial/final permutation
|
---|
1813 |
|
---|
1814 | .word 0x0f0f0f0f ! offset 256
|
---|
1815 | .word 0x0000ffff ! 260
|
---|
1816 | .word 0x33333333 ! 264
|
---|
1817 | .word 0x00ff00ff ! 268
|
---|
1818 | .word 0x55555555 ! 272
|
---|
1819 |
|
---|
1820 | .word 0 ! 276
|
---|
1821 | .word LOOPS ! 280
|
---|
1822 | .word 0x0000FC00 ! 284
|
---|
1823 |
|
---|
1824 | .global DES_SPtrans
|
---|
1825 | .type DES_SPtrans,#object
|
---|
1826 | .size DES_SPtrans,2048
|
---|
1827 | .align 64
|
---|
1828 | DES_SPtrans:
|
---|
1829 | .PIC.DES_SPtrans:
|
---|
1830 | ! nibble 0
|
---|
1831 | .word 0x02080800, 0x00080000, 0x02000002, 0x02080802
|
---|
1832 | .word 0x02000000, 0x00080802, 0x00080002, 0x02000002
|
---|
1833 | .word 0x00080802, 0x02080800, 0x02080000, 0x00000802
|
---|
1834 | .word 0x02000802, 0x02000000, 0x00000000, 0x00080002
|
---|
1835 | .word 0x00080000, 0x00000002, 0x02000800, 0x00080800
|
---|
1836 | .word 0x02080802, 0x02080000, 0x00000802, 0x02000800
|
---|
1837 | .word 0x00000002, 0x00000800, 0x00080800, 0x02080002
|
---|
1838 | .word 0x00000800, 0x02000802, 0x02080002, 0x00000000
|
---|
1839 | .word 0x00000000, 0x02080802, 0x02000800, 0x00080002
|
---|
1840 | .word 0x02080800, 0x00080000, 0x00000802, 0x02000800
|
---|
1841 | .word 0x02080002, 0x00000800, 0x00080800, 0x02000002
|
---|
1842 | .word 0x00080802, 0x00000002, 0x02000002, 0x02080000
|
---|
1843 | .word 0x02080802, 0x00080800, 0x02080000, 0x02000802
|
---|
1844 | .word 0x02000000, 0x00000802, 0x00080002, 0x00000000
|
---|
1845 | .word 0x00080000, 0x02000000, 0x02000802, 0x02080800
|
---|
1846 | .word 0x00000002, 0x02080002, 0x00000800, 0x00080802
|
---|
1847 | ! nibble 1
|
---|
1848 | .word 0x40108010, 0x00000000, 0x00108000, 0x40100000
|
---|
1849 | .word 0x40000010, 0x00008010, 0x40008000, 0x00108000
|
---|
1850 | .word 0x00008000, 0x40100010, 0x00000010, 0x40008000
|
---|
1851 | .word 0x00100010, 0x40108000, 0x40100000, 0x00000010
|
---|
1852 | .word 0x00100000, 0x40008010, 0x40100010, 0x00008000
|
---|
1853 | .word 0x00108010, 0x40000000, 0x00000000, 0x00100010
|
---|
1854 | .word 0x40008010, 0x00108010, 0x40108000, 0x40000010
|
---|
1855 | .word 0x40000000, 0x00100000, 0x00008010, 0x40108010
|
---|
1856 | .word 0x00100010, 0x40108000, 0x40008000, 0x00108010
|
---|
1857 | .word 0x40108010, 0x00100010, 0x40000010, 0x00000000
|
---|
1858 | .word 0x40000000, 0x00008010, 0x00100000, 0x40100010
|
---|
1859 | .word 0x00008000, 0x40000000, 0x00108010, 0x40008010
|
---|
1860 | .word 0x40108000, 0x00008000, 0x00000000, 0x40000010
|
---|
1861 | .word 0x00000010, 0x40108010, 0x00108000, 0x40100000
|
---|
1862 | .word 0x40100010, 0x00100000, 0x00008010, 0x40008000
|
---|
1863 | .word 0x40008010, 0x00000010, 0x40100000, 0x00108000
|
---|
1864 | ! nibble 2
|
---|
1865 | .word 0x04000001, 0x04040100, 0x00000100, 0x04000101
|
---|
1866 | .word 0x00040001, 0x04000000, 0x04000101, 0x00040100
|
---|
1867 | .word 0x04000100, 0x00040000, 0x04040000, 0x00000001
|
---|
1868 | .word 0x04040101, 0x00000101, 0x00000001, 0x04040001
|
---|
1869 | .word 0x00000000, 0x00040001, 0x04040100, 0x00000100
|
---|
1870 | .word 0x00000101, 0x04040101, 0x00040000, 0x04000001
|
---|
1871 | .word 0x04040001, 0x04000100, 0x00040101, 0x04040000
|
---|
1872 | .word 0x00040100, 0x00000000, 0x04000000, 0x00040101
|
---|
1873 | .word 0x04040100, 0x00000100, 0x00000001, 0x00040000
|
---|
1874 | .word 0x00000101, 0x00040001, 0x04040000, 0x04000101
|
---|
1875 | .word 0x00000000, 0x04040100, 0x00040100, 0x04040001
|
---|
1876 | .word 0x00040001, 0x04000000, 0x04040101, 0x00000001
|
---|
1877 | .word 0x00040101, 0x04000001, 0x04000000, 0x04040101
|
---|
1878 | .word 0x00040000, 0x04000100, 0x04000101, 0x00040100
|
---|
1879 | .word 0x04000100, 0x00000000, 0x04040001, 0x00000101
|
---|
1880 | .word 0x04000001, 0x00040101, 0x00000100, 0x04040000
|
---|
1881 | ! nibble 3
|
---|
1882 | .word 0x00401008, 0x10001000, 0x00000008, 0x10401008
|
---|
1883 | .word 0x00000000, 0x10400000, 0x10001008, 0x00400008
|
---|
1884 | .word 0x10401000, 0x10000008, 0x10000000, 0x00001008
|
---|
1885 | .word 0x10000008, 0x00401008, 0x00400000, 0x10000000
|
---|
1886 | .word 0x10400008, 0x00401000, 0x00001000, 0x00000008
|
---|
1887 | .word 0x00401000, 0x10001008, 0x10400000, 0x00001000
|
---|
1888 | .word 0x00001008, 0x00000000, 0x00400008, 0x10401000
|
---|
1889 | .word 0x10001000, 0x10400008, 0x10401008, 0x00400000
|
---|
1890 | .word 0x10400008, 0x00001008, 0x00400000, 0x10000008
|
---|
1891 | .word 0x00401000, 0x10001000, 0x00000008, 0x10400000
|
---|
1892 | .word 0x10001008, 0x00000000, 0x00001000, 0x00400008
|
---|
1893 | .word 0x00000000, 0x10400008, 0x10401000, 0x00001000
|
---|
1894 | .word 0x10000000, 0x10401008, 0x00401008, 0x00400000
|
---|
1895 | .word 0x10401008, 0x00000008, 0x10001000, 0x00401008
|
---|
1896 | .word 0x00400008, 0x00401000, 0x10400000, 0x10001008
|
---|
1897 | .word 0x00001008, 0x10000000, 0x10000008, 0x10401000
|
---|
1898 | ! nibble 4
|
---|
1899 | .word 0x08000000, 0x00010000, 0x00000400, 0x08010420
|
---|
1900 | .word 0x08010020, 0x08000400, 0x00010420, 0x08010000
|
---|
1901 | .word 0x00010000, 0x00000020, 0x08000020, 0x00010400
|
---|
1902 | .word 0x08000420, 0x08010020, 0x08010400, 0x00000000
|
---|
1903 | .word 0x00010400, 0x08000000, 0x00010020, 0x00000420
|
---|
1904 | .word 0x08000400, 0x00010420, 0x00000000, 0x08000020
|
---|
1905 | .word 0x00000020, 0x08000420, 0x08010420, 0x00010020
|
---|
1906 | .word 0x08010000, 0x00000400, 0x00000420, 0x08010400
|
---|
1907 | .word 0x08010400, 0x08000420, 0x00010020, 0x08010000
|
---|
1908 | .word 0x00010000, 0x00000020, 0x08000020, 0x08000400
|
---|
1909 | .word 0x08000000, 0x00010400, 0x08010420, 0x00000000
|
---|
1910 | .word 0x00010420, 0x08000000, 0x00000400, 0x00010020
|
---|
1911 | .word 0x08000420, 0x00000400, 0x00000000, 0x08010420
|
---|
1912 | .word 0x08010020, 0x08010400, 0x00000420, 0x00010000
|
---|
1913 | .word 0x00010400, 0x08010020, 0x08000400, 0x00000420
|
---|
1914 | .word 0x00000020, 0x00010420, 0x08010000, 0x08000020
|
---|
1915 | ! nibble 5
|
---|
1916 | .word 0x80000040, 0x00200040, 0x00000000, 0x80202000
|
---|
1917 | .word 0x00200040, 0x00002000, 0x80002040, 0x00200000
|
---|
1918 | .word 0x00002040, 0x80202040, 0x00202000, 0x80000000
|
---|
1919 | .word 0x80002000, 0x80000040, 0x80200000, 0x00202040
|
---|
1920 | .word 0x00200000, 0x80002040, 0x80200040, 0x00000000
|
---|
1921 | .word 0x00002000, 0x00000040, 0x80202000, 0x80200040
|
---|
1922 | .word 0x80202040, 0x80200000, 0x80000000, 0x00002040
|
---|
1923 | .word 0x00000040, 0x00202000, 0x00202040, 0x80002000
|
---|
1924 | .word 0x00002040, 0x80000000, 0x80002000, 0x00202040
|
---|
1925 | .word 0x80202000, 0x00200040, 0x00000000, 0x80002000
|
---|
1926 | .word 0x80000000, 0x00002000, 0x80200040, 0x00200000
|
---|
1927 | .word 0x00200040, 0x80202040, 0x00202000, 0x00000040
|
---|
1928 | .word 0x80202040, 0x00202000, 0x00200000, 0x80002040
|
---|
1929 | .word 0x80000040, 0x80200000, 0x00202040, 0x00000000
|
---|
1930 | .word 0x00002000, 0x80000040, 0x80002040, 0x80202000
|
---|
1931 | .word 0x80200000, 0x00002040, 0x00000040, 0x80200040
|
---|
1932 | ! nibble 6
|
---|
1933 | .word 0x00004000, 0x00000200, 0x01000200, 0x01000004
|
---|
1934 | .word 0x01004204, 0x00004004, 0x00004200, 0x00000000
|
---|
1935 | .word 0x01000000, 0x01000204, 0x00000204, 0x01004000
|
---|
1936 | .word 0x00000004, 0x01004200, 0x01004000, 0x00000204
|
---|
1937 | .word 0x01000204, 0x00004000, 0x00004004, 0x01004204
|
---|
1938 | .word 0x00000000, 0x01000200, 0x01000004, 0x00004200
|
---|
1939 | .word 0x01004004, 0x00004204, 0x01004200, 0x00000004
|
---|
1940 | .word 0x00004204, 0x01004004, 0x00000200, 0x01000000
|
---|
1941 | .word 0x00004204, 0x01004000, 0x01004004, 0x00000204
|
---|
1942 | .word 0x00004000, 0x00000200, 0x01000000, 0x01004004
|
---|
1943 | .word 0x01000204, 0x00004204, 0x00004200, 0x00000000
|
---|
1944 | .word 0x00000200, 0x01000004, 0x00000004, 0x01000200
|
---|
1945 | .word 0x00000000, 0x01000204, 0x01000200, 0x00004200
|
---|
1946 | .word 0x00000204, 0x00004000, 0x01004204, 0x01000000
|
---|
1947 | .word 0x01004200, 0x00000004, 0x00004004, 0x01004204
|
---|
1948 | .word 0x01000004, 0x01004200, 0x01004000, 0x00004004
|
---|
1949 | ! nibble 7
|
---|
1950 | .word 0x20800080, 0x20820000, 0x00020080, 0x00000000
|
---|
1951 | .word 0x20020000, 0x00800080, 0x20800000, 0x20820080
|
---|
1952 | .word 0x00000080, 0x20000000, 0x00820000, 0x00020080
|
---|
1953 | .word 0x00820080, 0x20020080, 0x20000080, 0x20800000
|
---|
1954 | .word 0x00020000, 0x00820080, 0x00800080, 0x20020000
|
---|
1955 | .word 0x20820080, 0x20000080, 0x00000000, 0x00820000
|
---|
1956 | .word 0x20000000, 0x00800000, 0x20020080, 0x20800080
|
---|
1957 | .word 0x00800000, 0x00020000, 0x20820000, 0x00000080
|
---|
1958 | .word 0x00800000, 0x00020000, 0x20000080, 0x20820080
|
---|
1959 | .word 0x00020080, 0x20000000, 0x00000000, 0x00820000
|
---|
1960 | .word 0x20800080, 0x20020080, 0x20020000, 0x00800080
|
---|
1961 | .word 0x20820000, 0x00000080, 0x00800080, 0x20020000
|
---|
1962 | .word 0x20820080, 0x00800000, 0x20800000, 0x20000080
|
---|
1963 | .word 0x00820000, 0x00020080, 0x20020080, 0x20800000
|
---|
1964 | .word 0x00000080, 0x20820000, 0x00820080, 0x00000000
|
---|
1965 | .word 0x20000000, 0x20800080, 0x00020000, 0x00820080
|
---|
1966 |
|
---|