VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.7/crypto/genasm-nasm/bsaes-x86_64.S@ 98024

Last change on this file since 98024 was 94083, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Recreate asm files, bugref:10128

File size: 50.8 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN asm_AES_encrypt
9EXTERN asm_AES_decrypt
10
11
12ALIGN 64
13_bsaes_encrypt8:
14
15 lea r11,[$L$BS0]
16
17 movdqa xmm8,XMMWORD[rax]
18 lea rax,[16+rax]
19 movdqa xmm7,XMMWORD[80+r11]
20 pxor xmm15,xmm8
21 pxor xmm0,xmm8
22 pxor xmm1,xmm8
23 pxor xmm2,xmm8
24DB 102,68,15,56,0,255
25DB 102,15,56,0,199
26 pxor xmm3,xmm8
27 pxor xmm4,xmm8
28DB 102,15,56,0,207
29DB 102,15,56,0,215
30 pxor xmm5,xmm8
31 pxor xmm6,xmm8
32DB 102,15,56,0,223
33DB 102,15,56,0,231
34DB 102,15,56,0,239
35DB 102,15,56,0,247
36_bsaes_encrypt8_bitslice:
37 movdqa xmm7,XMMWORD[r11]
38 movdqa xmm8,XMMWORD[16+r11]
39 movdqa xmm9,xmm5
40 psrlq xmm5,1
41 movdqa xmm10,xmm3
42 psrlq xmm3,1
43 pxor xmm5,xmm6
44 pxor xmm3,xmm4
45 pand xmm5,xmm7
46 pand xmm3,xmm7
47 pxor xmm6,xmm5
48 psllq xmm5,1
49 pxor xmm4,xmm3
50 psllq xmm3,1
51 pxor xmm5,xmm9
52 pxor xmm3,xmm10
53 movdqa xmm9,xmm1
54 psrlq xmm1,1
55 movdqa xmm10,xmm15
56 psrlq xmm15,1
57 pxor xmm1,xmm2
58 pxor xmm15,xmm0
59 pand xmm1,xmm7
60 pand xmm15,xmm7
61 pxor xmm2,xmm1
62 psllq xmm1,1
63 pxor xmm0,xmm15
64 psllq xmm15,1
65 pxor xmm1,xmm9
66 pxor xmm15,xmm10
67 movdqa xmm7,XMMWORD[32+r11]
68 movdqa xmm9,xmm4
69 psrlq xmm4,2
70 movdqa xmm10,xmm3
71 psrlq xmm3,2
72 pxor xmm4,xmm6
73 pxor xmm3,xmm5
74 pand xmm4,xmm8
75 pand xmm3,xmm8
76 pxor xmm6,xmm4
77 psllq xmm4,2
78 pxor xmm5,xmm3
79 psllq xmm3,2
80 pxor xmm4,xmm9
81 pxor xmm3,xmm10
82 movdqa xmm9,xmm0
83 psrlq xmm0,2
84 movdqa xmm10,xmm15
85 psrlq xmm15,2
86 pxor xmm0,xmm2
87 pxor xmm15,xmm1
88 pand xmm0,xmm8
89 pand xmm15,xmm8
90 pxor xmm2,xmm0
91 psllq xmm0,2
92 pxor xmm1,xmm15
93 psllq xmm15,2
94 pxor xmm0,xmm9
95 pxor xmm15,xmm10
96 movdqa xmm9,xmm2
97 psrlq xmm2,4
98 movdqa xmm10,xmm1
99 psrlq xmm1,4
100 pxor xmm2,xmm6
101 pxor xmm1,xmm5
102 pand xmm2,xmm7
103 pand xmm1,xmm7
104 pxor xmm6,xmm2
105 psllq xmm2,4
106 pxor xmm5,xmm1
107 psllq xmm1,4
108 pxor xmm2,xmm9
109 pxor xmm1,xmm10
110 movdqa xmm9,xmm0
111 psrlq xmm0,4
112 movdqa xmm10,xmm15
113 psrlq xmm15,4
114 pxor xmm0,xmm4
115 pxor xmm15,xmm3
116 pand xmm0,xmm7
117 pand xmm15,xmm7
118 pxor xmm4,xmm0
119 psllq xmm0,4
120 pxor xmm3,xmm15
121 psllq xmm15,4
122 pxor xmm0,xmm9
123 pxor xmm15,xmm10
124 dec r10d
125 jmp NEAR $L$enc_sbox
126ALIGN 16
127$L$enc_loop:
128 pxor xmm15,XMMWORD[rax]
129 pxor xmm0,XMMWORD[16+rax]
130 pxor xmm1,XMMWORD[32+rax]
131 pxor xmm2,XMMWORD[48+rax]
132DB 102,68,15,56,0,255
133DB 102,15,56,0,199
134 pxor xmm3,XMMWORD[64+rax]
135 pxor xmm4,XMMWORD[80+rax]
136DB 102,15,56,0,207
137DB 102,15,56,0,215
138 pxor xmm5,XMMWORD[96+rax]
139 pxor xmm6,XMMWORD[112+rax]
140DB 102,15,56,0,223
141DB 102,15,56,0,231
142DB 102,15,56,0,239
143DB 102,15,56,0,247
144 lea rax,[128+rax]
145$L$enc_sbox:
146 pxor xmm4,xmm5
147 pxor xmm1,xmm0
148 pxor xmm2,xmm15
149 pxor xmm5,xmm1
150 pxor xmm4,xmm15
151
152 pxor xmm5,xmm2
153 pxor xmm2,xmm6
154 pxor xmm6,xmm4
155 pxor xmm2,xmm3
156 pxor xmm3,xmm4
157 pxor xmm2,xmm0
158
159 pxor xmm1,xmm6
160 pxor xmm0,xmm4
161 movdqa xmm10,xmm6
162 movdqa xmm9,xmm0
163 movdqa xmm8,xmm4
164 movdqa xmm12,xmm1
165 movdqa xmm11,xmm5
166
167 pxor xmm10,xmm3
168 pxor xmm9,xmm1
169 pxor xmm8,xmm2
170 movdqa xmm13,xmm10
171 pxor xmm12,xmm3
172 movdqa xmm7,xmm9
173 pxor xmm11,xmm15
174 movdqa xmm14,xmm10
175
176 por xmm9,xmm8
177 por xmm10,xmm11
178 pxor xmm14,xmm7
179 pand xmm13,xmm11
180 pxor xmm11,xmm8
181 pand xmm7,xmm8
182 pand xmm14,xmm11
183 movdqa xmm11,xmm2
184 pxor xmm11,xmm15
185 pand xmm12,xmm11
186 pxor xmm10,xmm12
187 pxor xmm9,xmm12
188 movdqa xmm12,xmm6
189 movdqa xmm11,xmm4
190 pxor xmm12,xmm0
191 pxor xmm11,xmm5
192 movdqa xmm8,xmm12
193 pand xmm12,xmm11
194 por xmm8,xmm11
195 pxor xmm7,xmm12
196 pxor xmm10,xmm14
197 pxor xmm9,xmm13
198 pxor xmm8,xmm14
199 movdqa xmm11,xmm1
200 pxor xmm7,xmm13
201 movdqa xmm12,xmm3
202 pxor xmm8,xmm13
203 movdqa xmm13,xmm0
204 pand xmm11,xmm2
205 movdqa xmm14,xmm6
206 pand xmm12,xmm15
207 pand xmm13,xmm4
208 por xmm14,xmm5
209 pxor xmm10,xmm11
210 pxor xmm9,xmm12
211 pxor xmm8,xmm13
212 pxor xmm7,xmm14
213
214
215
216
217
218 movdqa xmm11,xmm10
219 pand xmm10,xmm8
220 pxor xmm11,xmm9
221
222 movdqa xmm13,xmm7
223 movdqa xmm14,xmm11
224 pxor xmm13,xmm10
225 pand xmm14,xmm13
226
227 movdqa xmm12,xmm8
228 pxor xmm14,xmm9
229 pxor xmm12,xmm7
230
231 pxor xmm10,xmm9
232
233 pand xmm12,xmm10
234
235 movdqa xmm9,xmm13
236 pxor xmm12,xmm7
237
238 pxor xmm9,xmm12
239 pxor xmm8,xmm12
240
241 pand xmm9,xmm7
242
243 pxor xmm13,xmm9
244 pxor xmm8,xmm9
245
246 pand xmm13,xmm14
247
248 pxor xmm13,xmm11
249 movdqa xmm11,xmm5
250 movdqa xmm7,xmm4
251 movdqa xmm9,xmm14
252 pxor xmm9,xmm13
253 pand xmm9,xmm5
254 pxor xmm5,xmm4
255 pand xmm4,xmm14
256 pand xmm5,xmm13
257 pxor xmm5,xmm4
258 pxor xmm4,xmm9
259 pxor xmm11,xmm15
260 pxor xmm7,xmm2
261 pxor xmm14,xmm12
262 pxor xmm13,xmm8
263 movdqa xmm10,xmm14
264 movdqa xmm9,xmm12
265 pxor xmm10,xmm13
266 pxor xmm9,xmm8
267 pand xmm10,xmm11
268 pand xmm9,xmm15
269 pxor xmm11,xmm7
270 pxor xmm15,xmm2
271 pand xmm7,xmm14
272 pand xmm2,xmm12
273 pand xmm11,xmm13
274 pand xmm15,xmm8
275 pxor xmm7,xmm11
276 pxor xmm15,xmm2
277 pxor xmm11,xmm10
278 pxor xmm2,xmm9
279 pxor xmm5,xmm11
280 pxor xmm15,xmm11
281 pxor xmm4,xmm7
282 pxor xmm2,xmm7
283
284 movdqa xmm11,xmm6
285 movdqa xmm7,xmm0
286 pxor xmm11,xmm3
287 pxor xmm7,xmm1
288 movdqa xmm10,xmm14
289 movdqa xmm9,xmm12
290 pxor xmm10,xmm13
291 pxor xmm9,xmm8
292 pand xmm10,xmm11
293 pand xmm9,xmm3
294 pxor xmm11,xmm7
295 pxor xmm3,xmm1
296 pand xmm7,xmm14
297 pand xmm1,xmm12
298 pand xmm11,xmm13
299 pand xmm3,xmm8
300 pxor xmm7,xmm11
301 pxor xmm3,xmm1
302 pxor xmm11,xmm10
303 pxor xmm1,xmm9
304 pxor xmm14,xmm12
305 pxor xmm13,xmm8
306 movdqa xmm10,xmm14
307 pxor xmm10,xmm13
308 pand xmm10,xmm6
309 pxor xmm6,xmm0
310 pand xmm0,xmm14
311 pand xmm6,xmm13
312 pxor xmm6,xmm0
313 pxor xmm0,xmm10
314 pxor xmm6,xmm11
315 pxor xmm3,xmm11
316 pxor xmm0,xmm7
317 pxor xmm1,xmm7
318 pxor xmm6,xmm15
319 pxor xmm0,xmm5
320 pxor xmm3,xmm6
321 pxor xmm5,xmm15
322 pxor xmm15,xmm0
323
324 pxor xmm0,xmm4
325 pxor xmm4,xmm1
326 pxor xmm1,xmm2
327 pxor xmm2,xmm4
328 pxor xmm3,xmm4
329
330 pxor xmm5,xmm2
331 dec r10d
332 jl NEAR $L$enc_done
333 pshufd xmm7,xmm15,0x93
334 pshufd xmm8,xmm0,0x93
335 pxor xmm15,xmm7
336 pshufd xmm9,xmm3,0x93
337 pxor xmm0,xmm8
338 pshufd xmm10,xmm5,0x93
339 pxor xmm3,xmm9
340 pshufd xmm11,xmm2,0x93
341 pxor xmm5,xmm10
342 pshufd xmm12,xmm6,0x93
343 pxor xmm2,xmm11
344 pshufd xmm13,xmm1,0x93
345 pxor xmm6,xmm12
346 pshufd xmm14,xmm4,0x93
347 pxor xmm1,xmm13
348 pxor xmm4,xmm14
349
350 pxor xmm8,xmm15
351 pxor xmm7,xmm4
352 pxor xmm8,xmm4
353 pshufd xmm15,xmm15,0x4E
354 pxor xmm9,xmm0
355 pshufd xmm0,xmm0,0x4E
356 pxor xmm12,xmm2
357 pxor xmm15,xmm7
358 pxor xmm13,xmm6
359 pxor xmm0,xmm8
360 pxor xmm11,xmm5
361 pshufd xmm7,xmm2,0x4E
362 pxor xmm14,xmm1
363 pshufd xmm8,xmm6,0x4E
364 pxor xmm10,xmm3
365 pshufd xmm2,xmm5,0x4E
366 pxor xmm10,xmm4
367 pshufd xmm6,xmm4,0x4E
368 pxor xmm11,xmm4
369 pshufd xmm5,xmm1,0x4E
370 pxor xmm7,xmm11
371 pshufd xmm1,xmm3,0x4E
372 pxor xmm8,xmm12
373 pxor xmm2,xmm10
374 pxor xmm6,xmm14
375 pxor xmm5,xmm13
376 movdqa xmm3,xmm7
377 pxor xmm1,xmm9
378 movdqa xmm4,xmm8
379 movdqa xmm7,XMMWORD[48+r11]
380 jnz NEAR $L$enc_loop
381 movdqa xmm7,XMMWORD[64+r11]
382 jmp NEAR $L$enc_loop
383ALIGN 16
384$L$enc_done:
385 movdqa xmm7,XMMWORD[r11]
386 movdqa xmm8,XMMWORD[16+r11]
387 movdqa xmm9,xmm1
388 psrlq xmm1,1
389 movdqa xmm10,xmm2
390 psrlq xmm2,1
391 pxor xmm1,xmm4
392 pxor xmm2,xmm6
393 pand xmm1,xmm7
394 pand xmm2,xmm7
395 pxor xmm4,xmm1
396 psllq xmm1,1
397 pxor xmm6,xmm2
398 psllq xmm2,1
399 pxor xmm1,xmm9
400 pxor xmm2,xmm10
401 movdqa xmm9,xmm3
402 psrlq xmm3,1
403 movdqa xmm10,xmm15
404 psrlq xmm15,1
405 pxor xmm3,xmm5
406 pxor xmm15,xmm0
407 pand xmm3,xmm7
408 pand xmm15,xmm7
409 pxor xmm5,xmm3
410 psllq xmm3,1
411 pxor xmm0,xmm15
412 psllq xmm15,1
413 pxor xmm3,xmm9
414 pxor xmm15,xmm10
415 movdqa xmm7,XMMWORD[32+r11]
416 movdqa xmm9,xmm6
417 psrlq xmm6,2
418 movdqa xmm10,xmm2
419 psrlq xmm2,2
420 pxor xmm6,xmm4
421 pxor xmm2,xmm1
422 pand xmm6,xmm8
423 pand xmm2,xmm8
424 pxor xmm4,xmm6
425 psllq xmm6,2
426 pxor xmm1,xmm2
427 psllq xmm2,2
428 pxor xmm6,xmm9
429 pxor xmm2,xmm10
430 movdqa xmm9,xmm0
431 psrlq xmm0,2
432 movdqa xmm10,xmm15
433 psrlq xmm15,2
434 pxor xmm0,xmm5
435 pxor xmm15,xmm3
436 pand xmm0,xmm8
437 pand xmm15,xmm8
438 pxor xmm5,xmm0
439 psllq xmm0,2
440 pxor xmm3,xmm15
441 psllq xmm15,2
442 pxor xmm0,xmm9
443 pxor xmm15,xmm10
444 movdqa xmm9,xmm5
445 psrlq xmm5,4
446 movdqa xmm10,xmm3
447 psrlq xmm3,4
448 pxor xmm5,xmm4
449 pxor xmm3,xmm1
450 pand xmm5,xmm7
451 pand xmm3,xmm7
452 pxor xmm4,xmm5
453 psllq xmm5,4
454 pxor xmm1,xmm3
455 psllq xmm3,4
456 pxor xmm5,xmm9
457 pxor xmm3,xmm10
458 movdqa xmm9,xmm0
459 psrlq xmm0,4
460 movdqa xmm10,xmm15
461 psrlq xmm15,4
462 pxor xmm0,xmm6
463 pxor xmm15,xmm2
464 pand xmm0,xmm7
465 pand xmm15,xmm7
466 pxor xmm6,xmm0
467 psllq xmm0,4
468 pxor xmm2,xmm15
469 psllq xmm15,4
470 pxor xmm0,xmm9
471 pxor xmm15,xmm10
472 movdqa xmm7,XMMWORD[rax]
473 pxor xmm3,xmm7
474 pxor xmm5,xmm7
475 pxor xmm2,xmm7
476 pxor xmm6,xmm7
477 pxor xmm1,xmm7
478 pxor xmm4,xmm7
479 pxor xmm15,xmm7
480 pxor xmm0,xmm7
481 DB 0F3h,0C3h ;repret
482
483
484
485
486ALIGN 64
487_bsaes_decrypt8:
488
489 lea r11,[$L$BS0]
490
491 movdqa xmm8,XMMWORD[rax]
492 lea rax,[16+rax]
493 movdqa xmm7,XMMWORD[((-48))+r11]
494 pxor xmm15,xmm8
495 pxor xmm0,xmm8
496 pxor xmm1,xmm8
497 pxor xmm2,xmm8
498DB 102,68,15,56,0,255
499DB 102,15,56,0,199
500 pxor xmm3,xmm8
501 pxor xmm4,xmm8
502DB 102,15,56,0,207
503DB 102,15,56,0,215
504 pxor xmm5,xmm8
505 pxor xmm6,xmm8
506DB 102,15,56,0,223
507DB 102,15,56,0,231
508DB 102,15,56,0,239
509DB 102,15,56,0,247
510 movdqa xmm7,XMMWORD[r11]
511 movdqa xmm8,XMMWORD[16+r11]
512 movdqa xmm9,xmm5
513 psrlq xmm5,1
514 movdqa xmm10,xmm3
515 psrlq xmm3,1
516 pxor xmm5,xmm6
517 pxor xmm3,xmm4
518 pand xmm5,xmm7
519 pand xmm3,xmm7
520 pxor xmm6,xmm5
521 psllq xmm5,1
522 pxor xmm4,xmm3
523 psllq xmm3,1
524 pxor xmm5,xmm9
525 pxor xmm3,xmm10
526 movdqa xmm9,xmm1
527 psrlq xmm1,1
528 movdqa xmm10,xmm15
529 psrlq xmm15,1
530 pxor xmm1,xmm2
531 pxor xmm15,xmm0
532 pand xmm1,xmm7
533 pand xmm15,xmm7
534 pxor xmm2,xmm1
535 psllq xmm1,1
536 pxor xmm0,xmm15
537 psllq xmm15,1
538 pxor xmm1,xmm9
539 pxor xmm15,xmm10
540 movdqa xmm7,XMMWORD[32+r11]
541 movdqa xmm9,xmm4
542 psrlq xmm4,2
543 movdqa xmm10,xmm3
544 psrlq xmm3,2
545 pxor xmm4,xmm6
546 pxor xmm3,xmm5
547 pand xmm4,xmm8
548 pand xmm3,xmm8
549 pxor xmm6,xmm4
550 psllq xmm4,2
551 pxor xmm5,xmm3
552 psllq xmm3,2
553 pxor xmm4,xmm9
554 pxor xmm3,xmm10
555 movdqa xmm9,xmm0
556 psrlq xmm0,2
557 movdqa xmm10,xmm15
558 psrlq xmm15,2
559 pxor xmm0,xmm2
560 pxor xmm15,xmm1
561 pand xmm0,xmm8
562 pand xmm15,xmm8
563 pxor xmm2,xmm0
564 psllq xmm0,2
565 pxor xmm1,xmm15
566 psllq xmm15,2
567 pxor xmm0,xmm9
568 pxor xmm15,xmm10
569 movdqa xmm9,xmm2
570 psrlq xmm2,4
571 movdqa xmm10,xmm1
572 psrlq xmm1,4
573 pxor xmm2,xmm6
574 pxor xmm1,xmm5
575 pand xmm2,xmm7
576 pand xmm1,xmm7
577 pxor xmm6,xmm2
578 psllq xmm2,4
579 pxor xmm5,xmm1
580 psllq xmm1,4
581 pxor xmm2,xmm9
582 pxor xmm1,xmm10
583 movdqa xmm9,xmm0
584 psrlq xmm0,4
585 movdqa xmm10,xmm15
586 psrlq xmm15,4
587 pxor xmm0,xmm4
588 pxor xmm15,xmm3
589 pand xmm0,xmm7
590 pand xmm15,xmm7
591 pxor xmm4,xmm0
592 psllq xmm0,4
593 pxor xmm3,xmm15
594 psllq xmm15,4
595 pxor xmm0,xmm9
596 pxor xmm15,xmm10
597 dec r10d
598 jmp NEAR $L$dec_sbox
599ALIGN 16
600$L$dec_loop:
601 pxor xmm15,XMMWORD[rax]
602 pxor xmm0,XMMWORD[16+rax]
603 pxor xmm1,XMMWORD[32+rax]
604 pxor xmm2,XMMWORD[48+rax]
605DB 102,68,15,56,0,255
606DB 102,15,56,0,199
607 pxor xmm3,XMMWORD[64+rax]
608 pxor xmm4,XMMWORD[80+rax]
609DB 102,15,56,0,207
610DB 102,15,56,0,215
611 pxor xmm5,XMMWORD[96+rax]
612 pxor xmm6,XMMWORD[112+rax]
613DB 102,15,56,0,223
614DB 102,15,56,0,231
615DB 102,15,56,0,239
616DB 102,15,56,0,247
617 lea rax,[128+rax]
618$L$dec_sbox:
619 pxor xmm2,xmm3
620
621 pxor xmm3,xmm6
622 pxor xmm1,xmm6
623 pxor xmm5,xmm3
624 pxor xmm6,xmm5
625 pxor xmm0,xmm6
626
627 pxor xmm15,xmm0
628 pxor xmm1,xmm4
629 pxor xmm2,xmm15
630 pxor xmm4,xmm15
631 pxor xmm0,xmm2
632 movdqa xmm10,xmm2
633 movdqa xmm9,xmm6
634 movdqa xmm8,xmm0
635 movdqa xmm12,xmm3
636 movdqa xmm11,xmm4
637
638 pxor xmm10,xmm15
639 pxor xmm9,xmm3
640 pxor xmm8,xmm5
641 movdqa xmm13,xmm10
642 pxor xmm12,xmm15
643 movdqa xmm7,xmm9
644 pxor xmm11,xmm1
645 movdqa xmm14,xmm10
646
647 por xmm9,xmm8
648 por xmm10,xmm11
649 pxor xmm14,xmm7
650 pand xmm13,xmm11
651 pxor xmm11,xmm8
652 pand xmm7,xmm8
653 pand xmm14,xmm11
654 movdqa xmm11,xmm5
655 pxor xmm11,xmm1
656 pand xmm12,xmm11
657 pxor xmm10,xmm12
658 pxor xmm9,xmm12
659 movdqa xmm12,xmm2
660 movdqa xmm11,xmm0
661 pxor xmm12,xmm6
662 pxor xmm11,xmm4
663 movdqa xmm8,xmm12
664 pand xmm12,xmm11
665 por xmm8,xmm11
666 pxor xmm7,xmm12
667 pxor xmm10,xmm14
668 pxor xmm9,xmm13
669 pxor xmm8,xmm14
670 movdqa xmm11,xmm3
671 pxor xmm7,xmm13
672 movdqa xmm12,xmm15
673 pxor xmm8,xmm13
674 movdqa xmm13,xmm6
675 pand xmm11,xmm5
676 movdqa xmm14,xmm2
677 pand xmm12,xmm1
678 pand xmm13,xmm0
679 por xmm14,xmm4
680 pxor xmm10,xmm11
681 pxor xmm9,xmm12
682 pxor xmm8,xmm13
683 pxor xmm7,xmm14
684
685
686
687
688
689 movdqa xmm11,xmm10
690 pand xmm10,xmm8
691 pxor xmm11,xmm9
692
693 movdqa xmm13,xmm7
694 movdqa xmm14,xmm11
695 pxor xmm13,xmm10
696 pand xmm14,xmm13
697
698 movdqa xmm12,xmm8
699 pxor xmm14,xmm9
700 pxor xmm12,xmm7
701
702 pxor xmm10,xmm9
703
704 pand xmm12,xmm10
705
706 movdqa xmm9,xmm13
707 pxor xmm12,xmm7
708
709 pxor xmm9,xmm12
710 pxor xmm8,xmm12
711
712 pand xmm9,xmm7
713
714 pxor xmm13,xmm9
715 pxor xmm8,xmm9
716
717 pand xmm13,xmm14
718
719 pxor xmm13,xmm11
720 movdqa xmm11,xmm4
721 movdqa xmm7,xmm0
722 movdqa xmm9,xmm14
723 pxor xmm9,xmm13
724 pand xmm9,xmm4
725 pxor xmm4,xmm0
726 pand xmm0,xmm14
727 pand xmm4,xmm13
728 pxor xmm4,xmm0
729 pxor xmm0,xmm9
730 pxor xmm11,xmm1
731 pxor xmm7,xmm5
732 pxor xmm14,xmm12
733 pxor xmm13,xmm8
734 movdqa xmm10,xmm14
735 movdqa xmm9,xmm12
736 pxor xmm10,xmm13
737 pxor xmm9,xmm8
738 pand xmm10,xmm11
739 pand xmm9,xmm1
740 pxor xmm11,xmm7
741 pxor xmm1,xmm5
742 pand xmm7,xmm14
743 pand xmm5,xmm12
744 pand xmm11,xmm13
745 pand xmm1,xmm8
746 pxor xmm7,xmm11
747 pxor xmm1,xmm5
748 pxor xmm11,xmm10
749 pxor xmm5,xmm9
750 pxor xmm4,xmm11
751 pxor xmm1,xmm11
752 pxor xmm0,xmm7
753 pxor xmm5,xmm7
754
755 movdqa xmm11,xmm2
756 movdqa xmm7,xmm6
757 pxor xmm11,xmm15
758 pxor xmm7,xmm3
759 movdqa xmm10,xmm14
760 movdqa xmm9,xmm12
761 pxor xmm10,xmm13
762 pxor xmm9,xmm8
763 pand xmm10,xmm11
764 pand xmm9,xmm15
765 pxor xmm11,xmm7
766 pxor xmm15,xmm3
767 pand xmm7,xmm14
768 pand xmm3,xmm12
769 pand xmm11,xmm13
770 pand xmm15,xmm8
771 pxor xmm7,xmm11
772 pxor xmm15,xmm3
773 pxor xmm11,xmm10
774 pxor xmm3,xmm9
775 pxor xmm14,xmm12
776 pxor xmm13,xmm8
777 movdqa xmm10,xmm14
778 pxor xmm10,xmm13
779 pand xmm10,xmm2
780 pxor xmm2,xmm6
781 pand xmm6,xmm14
782 pand xmm2,xmm13
783 pxor xmm2,xmm6
784 pxor xmm6,xmm10
785 pxor xmm2,xmm11
786 pxor xmm15,xmm11
787 pxor xmm6,xmm7
788 pxor xmm3,xmm7
789 pxor xmm0,xmm6
790 pxor xmm5,xmm4
791
792 pxor xmm3,xmm0
793 pxor xmm1,xmm6
794 pxor xmm4,xmm6
795 pxor xmm3,xmm1
796 pxor xmm6,xmm15
797 pxor xmm3,xmm4
798 pxor xmm2,xmm5
799 pxor xmm5,xmm0
800 pxor xmm2,xmm3
801
802 pxor xmm3,xmm15
803 pxor xmm6,xmm2
804 dec r10d
805 jl NEAR $L$dec_done
806
807 pshufd xmm7,xmm15,0x4E
808 pshufd xmm13,xmm2,0x4E
809 pxor xmm7,xmm15
810 pshufd xmm14,xmm4,0x4E
811 pxor xmm13,xmm2
812 pshufd xmm8,xmm0,0x4E
813 pxor xmm14,xmm4
814 pshufd xmm9,xmm5,0x4E
815 pxor xmm8,xmm0
816 pshufd xmm10,xmm3,0x4E
817 pxor xmm9,xmm5
818 pxor xmm15,xmm13
819 pxor xmm0,xmm13
820 pshufd xmm11,xmm1,0x4E
821 pxor xmm10,xmm3
822 pxor xmm5,xmm7
823 pxor xmm3,xmm8
824 pshufd xmm12,xmm6,0x4E
825 pxor xmm11,xmm1
826 pxor xmm0,xmm14
827 pxor xmm1,xmm9
828 pxor xmm12,xmm6
829
830 pxor xmm5,xmm14
831 pxor xmm3,xmm13
832 pxor xmm1,xmm13
833 pxor xmm6,xmm10
834 pxor xmm2,xmm11
835 pxor xmm1,xmm14
836 pxor xmm6,xmm14
837 pxor xmm4,xmm12
838 pshufd xmm7,xmm15,0x93
839 pshufd xmm8,xmm0,0x93
840 pxor xmm15,xmm7
841 pshufd xmm9,xmm5,0x93
842 pxor xmm0,xmm8
843 pshufd xmm10,xmm3,0x93
844 pxor xmm5,xmm9
845 pshufd xmm11,xmm1,0x93
846 pxor xmm3,xmm10
847 pshufd xmm12,xmm6,0x93
848 pxor xmm1,xmm11
849 pshufd xmm13,xmm2,0x93
850 pxor xmm6,xmm12
851 pshufd xmm14,xmm4,0x93
852 pxor xmm2,xmm13
853 pxor xmm4,xmm14
854
855 pxor xmm8,xmm15
856 pxor xmm7,xmm4
857 pxor xmm8,xmm4
858 pshufd xmm15,xmm15,0x4E
859 pxor xmm9,xmm0
860 pshufd xmm0,xmm0,0x4E
861 pxor xmm12,xmm1
862 pxor xmm15,xmm7
863 pxor xmm13,xmm6
864 pxor xmm0,xmm8
865 pxor xmm11,xmm3
866 pshufd xmm7,xmm1,0x4E
867 pxor xmm14,xmm2
868 pshufd xmm8,xmm6,0x4E
869 pxor xmm10,xmm5
870 pshufd xmm1,xmm3,0x4E
871 pxor xmm10,xmm4
872 pshufd xmm6,xmm4,0x4E
873 pxor xmm11,xmm4
874 pshufd xmm3,xmm2,0x4E
875 pxor xmm7,xmm11
876 pshufd xmm2,xmm5,0x4E
877 pxor xmm8,xmm12
878 pxor xmm10,xmm1
879 pxor xmm6,xmm14
880 pxor xmm13,xmm3
881 movdqa xmm3,xmm7
882 pxor xmm2,xmm9
883 movdqa xmm5,xmm13
884 movdqa xmm4,xmm8
885 movdqa xmm1,xmm2
886 movdqa xmm2,xmm10
887 movdqa xmm7,XMMWORD[((-16))+r11]
888 jnz NEAR $L$dec_loop
889 movdqa xmm7,XMMWORD[((-32))+r11]
890 jmp NEAR $L$dec_loop
891ALIGN 16
892$L$dec_done:
893 movdqa xmm7,XMMWORD[r11]
894 movdqa xmm8,XMMWORD[16+r11]
895 movdqa xmm9,xmm2
896 psrlq xmm2,1
897 movdqa xmm10,xmm1
898 psrlq xmm1,1
899 pxor xmm2,xmm4
900 pxor xmm1,xmm6
901 pand xmm2,xmm7
902 pand xmm1,xmm7
903 pxor xmm4,xmm2
904 psllq xmm2,1
905 pxor xmm6,xmm1
906 psllq xmm1,1
907 pxor xmm2,xmm9
908 pxor xmm1,xmm10
909 movdqa xmm9,xmm5
910 psrlq xmm5,1
911 movdqa xmm10,xmm15
912 psrlq xmm15,1
913 pxor xmm5,xmm3
914 pxor xmm15,xmm0
915 pand xmm5,xmm7
916 pand xmm15,xmm7
917 pxor xmm3,xmm5
918 psllq xmm5,1
919 pxor xmm0,xmm15
920 psllq xmm15,1
921 pxor xmm5,xmm9
922 pxor xmm15,xmm10
923 movdqa xmm7,XMMWORD[32+r11]
924 movdqa xmm9,xmm6
925 psrlq xmm6,2
926 movdqa xmm10,xmm1
927 psrlq xmm1,2
928 pxor xmm6,xmm4
929 pxor xmm1,xmm2
930 pand xmm6,xmm8
931 pand xmm1,xmm8
932 pxor xmm4,xmm6
933 psllq xmm6,2
934 pxor xmm2,xmm1
935 psllq xmm1,2
936 pxor xmm6,xmm9
937 pxor xmm1,xmm10
938 movdqa xmm9,xmm0
939 psrlq xmm0,2
940 movdqa xmm10,xmm15
941 psrlq xmm15,2
942 pxor xmm0,xmm3
943 pxor xmm15,xmm5
944 pand xmm0,xmm8
945 pand xmm15,xmm8
946 pxor xmm3,xmm0
947 psllq xmm0,2
948 pxor xmm5,xmm15
949 psllq xmm15,2
950 pxor xmm0,xmm9
951 pxor xmm15,xmm10
952 movdqa xmm9,xmm3
953 psrlq xmm3,4
954 movdqa xmm10,xmm5
955 psrlq xmm5,4
956 pxor xmm3,xmm4
957 pxor xmm5,xmm2
958 pand xmm3,xmm7
959 pand xmm5,xmm7
960 pxor xmm4,xmm3
961 psllq xmm3,4
962 pxor xmm2,xmm5
963 psllq xmm5,4
964 pxor xmm3,xmm9
965 pxor xmm5,xmm10
966 movdqa xmm9,xmm0
967 psrlq xmm0,4
968 movdqa xmm10,xmm15
969 psrlq xmm15,4
970 pxor xmm0,xmm6
971 pxor xmm15,xmm1
972 pand xmm0,xmm7
973 pand xmm15,xmm7
974 pxor xmm6,xmm0
975 psllq xmm0,4
976 pxor xmm1,xmm15
977 psllq xmm15,4
978 pxor xmm0,xmm9
979 pxor xmm15,xmm10
980 movdqa xmm7,XMMWORD[rax]
981 pxor xmm5,xmm7
982 pxor xmm3,xmm7
983 pxor xmm1,xmm7
984 pxor xmm6,xmm7
985 pxor xmm2,xmm7
986 pxor xmm4,xmm7
987 pxor xmm15,xmm7
988 pxor xmm0,xmm7
989 DB 0F3h,0C3h ;repret
990
991
992
993ALIGN 16
994_bsaes_key_convert:
995
996 lea r11,[$L$masks]
997 movdqu xmm7,XMMWORD[rcx]
998 lea rcx,[16+rcx]
999 movdqa xmm0,XMMWORD[r11]
1000 movdqa xmm1,XMMWORD[16+r11]
1001 movdqa xmm2,XMMWORD[32+r11]
1002 movdqa xmm3,XMMWORD[48+r11]
1003 movdqa xmm4,XMMWORD[64+r11]
1004 pcmpeqd xmm5,xmm5
1005
1006 movdqu xmm6,XMMWORD[rcx]
1007 movdqa XMMWORD[rax],xmm7
1008 lea rax,[16+rax]
1009 dec r10d
1010 jmp NEAR $L$key_loop
1011ALIGN 16
1012$L$key_loop:
1013DB 102,15,56,0,244
1014
1015 movdqa xmm8,xmm0
1016 movdqa xmm9,xmm1
1017
1018 pand xmm8,xmm6
1019 pand xmm9,xmm6
1020 movdqa xmm10,xmm2
1021 pcmpeqb xmm8,xmm0
1022 psllq xmm0,4
1023 movdqa xmm11,xmm3
1024 pcmpeqb xmm9,xmm1
1025 psllq xmm1,4
1026
1027 pand xmm10,xmm6
1028 pand xmm11,xmm6
1029 movdqa xmm12,xmm0
1030 pcmpeqb xmm10,xmm2
1031 psllq xmm2,4
1032 movdqa xmm13,xmm1
1033 pcmpeqb xmm11,xmm3
1034 psllq xmm3,4
1035
1036 movdqa xmm14,xmm2
1037 movdqa xmm15,xmm3
1038 pxor xmm8,xmm5
1039 pxor xmm9,xmm5
1040
1041 pand xmm12,xmm6
1042 pand xmm13,xmm6
1043 movdqa XMMWORD[rax],xmm8
1044 pcmpeqb xmm12,xmm0
1045 psrlq xmm0,4
1046 movdqa XMMWORD[16+rax],xmm9
1047 pcmpeqb xmm13,xmm1
1048 psrlq xmm1,4
1049 lea rcx,[16+rcx]
1050
1051 pand xmm14,xmm6
1052 pand xmm15,xmm6
1053 movdqa XMMWORD[32+rax],xmm10
1054 pcmpeqb xmm14,xmm2
1055 psrlq xmm2,4
1056 movdqa XMMWORD[48+rax],xmm11
1057 pcmpeqb xmm15,xmm3
1058 psrlq xmm3,4
1059 movdqu xmm6,XMMWORD[rcx]
1060
1061 pxor xmm13,xmm5
1062 pxor xmm14,xmm5
1063 movdqa XMMWORD[64+rax],xmm12
1064 movdqa XMMWORD[80+rax],xmm13
1065 movdqa XMMWORD[96+rax],xmm14
1066 movdqa XMMWORD[112+rax],xmm15
1067 lea rax,[128+rax]
1068 dec r10d
1069 jnz NEAR $L$key_loop
1070
1071 movdqa xmm7,XMMWORD[80+r11]
1072
1073 DB 0F3h,0C3h ;repret
1074
1075
1076EXTERN asm_AES_cbc_encrypt
1077global ossl_bsaes_cbc_encrypt
1078
1079ALIGN 16
1080ossl_bsaes_cbc_encrypt:
1081
1082DB 243,15,30,250
1083 mov r11d,DWORD[48+rsp]
1084 cmp r11d,0
1085 jne NEAR asm_AES_cbc_encrypt
1086 cmp r8,128
1087 jb NEAR asm_AES_cbc_encrypt
1088
1089 mov rax,rsp
1090$L$cbc_dec_prologue:
1091 push rbp
1092
1093 push rbx
1094
1095 push r12
1096
1097 push r13
1098
1099 push r14
1100
1101 push r15
1102
1103 lea rsp,[((-72))+rsp]
1104
1105 mov r10,QWORD[160+rsp]
1106 lea rsp,[((-160))+rsp]
1107 movaps XMMWORD[64+rsp],xmm6
1108 movaps XMMWORD[80+rsp],xmm7
1109 movaps XMMWORD[96+rsp],xmm8
1110 movaps XMMWORD[112+rsp],xmm9
1111 movaps XMMWORD[128+rsp],xmm10
1112 movaps XMMWORD[144+rsp],xmm11
1113 movaps XMMWORD[160+rsp],xmm12
1114 movaps XMMWORD[176+rsp],xmm13
1115 movaps XMMWORD[192+rsp],xmm14
1116 movaps XMMWORD[208+rsp],xmm15
1117$L$cbc_dec_body:
1118 mov rbp,rsp
1119
1120 mov eax,DWORD[240+r9]
1121 mov r12,rcx
1122 mov r13,rdx
1123 mov r14,r8
1124 mov r15,r9
1125 mov rbx,r10
1126 shr r14,4
1127
1128 mov edx,eax
1129 shl rax,7
1130 sub rax,96
1131 sub rsp,rax
1132
1133 mov rax,rsp
1134 mov rcx,r15
1135 mov r10d,edx
1136 call _bsaes_key_convert
1137 pxor xmm7,XMMWORD[rsp]
1138 movdqa XMMWORD[rax],xmm6
1139 movdqa XMMWORD[rsp],xmm7
1140
1141 movdqu xmm14,XMMWORD[rbx]
1142 sub r14,8
1143$L$cbc_dec_loop:
1144 movdqu xmm15,XMMWORD[r12]
1145 movdqu xmm0,XMMWORD[16+r12]
1146 movdqu xmm1,XMMWORD[32+r12]
1147 movdqu xmm2,XMMWORD[48+r12]
1148 movdqu xmm3,XMMWORD[64+r12]
1149 movdqu xmm4,XMMWORD[80+r12]
1150 mov rax,rsp
1151 movdqu xmm5,XMMWORD[96+r12]
1152 mov r10d,edx
1153 movdqu xmm6,XMMWORD[112+r12]
1154 movdqa XMMWORD[32+rbp],xmm14
1155
1156 call _bsaes_decrypt8
1157
1158 pxor xmm15,XMMWORD[32+rbp]
1159 movdqu xmm7,XMMWORD[r12]
1160 movdqu xmm8,XMMWORD[16+r12]
1161 pxor xmm0,xmm7
1162 movdqu xmm9,XMMWORD[32+r12]
1163 pxor xmm5,xmm8
1164 movdqu xmm10,XMMWORD[48+r12]
1165 pxor xmm3,xmm9
1166 movdqu xmm11,XMMWORD[64+r12]
1167 pxor xmm1,xmm10
1168 movdqu xmm12,XMMWORD[80+r12]
1169 pxor xmm6,xmm11
1170 movdqu xmm13,XMMWORD[96+r12]
1171 pxor xmm2,xmm12
1172 movdqu xmm14,XMMWORD[112+r12]
1173 pxor xmm4,xmm13
1174 movdqu XMMWORD[r13],xmm15
1175 lea r12,[128+r12]
1176 movdqu XMMWORD[16+r13],xmm0
1177 movdqu XMMWORD[32+r13],xmm5
1178 movdqu XMMWORD[48+r13],xmm3
1179 movdqu XMMWORD[64+r13],xmm1
1180 movdqu XMMWORD[80+r13],xmm6
1181 movdqu XMMWORD[96+r13],xmm2
1182 movdqu XMMWORD[112+r13],xmm4
1183 lea r13,[128+r13]
1184 sub r14,8
1185 jnc NEAR $L$cbc_dec_loop
1186
1187 add r14,8
1188 jz NEAR $L$cbc_dec_done
1189
1190 movdqu xmm15,XMMWORD[r12]
1191 mov rax,rsp
1192 mov r10d,edx
1193 cmp r14,2
1194 jb NEAR $L$cbc_dec_one
1195 movdqu xmm0,XMMWORD[16+r12]
1196 je NEAR $L$cbc_dec_two
1197 movdqu xmm1,XMMWORD[32+r12]
1198 cmp r14,4
1199 jb NEAR $L$cbc_dec_three
1200 movdqu xmm2,XMMWORD[48+r12]
1201 je NEAR $L$cbc_dec_four
1202 movdqu xmm3,XMMWORD[64+r12]
1203 cmp r14,6
1204 jb NEAR $L$cbc_dec_five
1205 movdqu xmm4,XMMWORD[80+r12]
1206 je NEAR $L$cbc_dec_six
1207 movdqu xmm5,XMMWORD[96+r12]
1208 movdqa XMMWORD[32+rbp],xmm14
1209 call _bsaes_decrypt8
1210 pxor xmm15,XMMWORD[32+rbp]
1211 movdqu xmm7,XMMWORD[r12]
1212 movdqu xmm8,XMMWORD[16+r12]
1213 pxor xmm0,xmm7
1214 movdqu xmm9,XMMWORD[32+r12]
1215 pxor xmm5,xmm8
1216 movdqu xmm10,XMMWORD[48+r12]
1217 pxor xmm3,xmm9
1218 movdqu xmm11,XMMWORD[64+r12]
1219 pxor xmm1,xmm10
1220 movdqu xmm12,XMMWORD[80+r12]
1221 pxor xmm6,xmm11
1222 movdqu xmm14,XMMWORD[96+r12]
1223 pxor xmm2,xmm12
1224 movdqu XMMWORD[r13],xmm15
1225 movdqu XMMWORD[16+r13],xmm0
1226 movdqu XMMWORD[32+r13],xmm5
1227 movdqu XMMWORD[48+r13],xmm3
1228 movdqu XMMWORD[64+r13],xmm1
1229 movdqu XMMWORD[80+r13],xmm6
1230 movdqu XMMWORD[96+r13],xmm2
1231 jmp NEAR $L$cbc_dec_done
1232ALIGN 16
1233$L$cbc_dec_six:
1234 movdqa XMMWORD[32+rbp],xmm14
1235 call _bsaes_decrypt8
1236 pxor xmm15,XMMWORD[32+rbp]
1237 movdqu xmm7,XMMWORD[r12]
1238 movdqu xmm8,XMMWORD[16+r12]
1239 pxor xmm0,xmm7
1240 movdqu xmm9,XMMWORD[32+r12]
1241 pxor xmm5,xmm8
1242 movdqu xmm10,XMMWORD[48+r12]
1243 pxor xmm3,xmm9
1244 movdqu xmm11,XMMWORD[64+r12]
1245 pxor xmm1,xmm10
1246 movdqu xmm14,XMMWORD[80+r12]
1247 pxor xmm6,xmm11
1248 movdqu XMMWORD[r13],xmm15
1249 movdqu XMMWORD[16+r13],xmm0
1250 movdqu XMMWORD[32+r13],xmm5
1251 movdqu XMMWORD[48+r13],xmm3
1252 movdqu XMMWORD[64+r13],xmm1
1253 movdqu XMMWORD[80+r13],xmm6
1254 jmp NEAR $L$cbc_dec_done
1255ALIGN 16
1256$L$cbc_dec_five:
1257 movdqa XMMWORD[32+rbp],xmm14
1258 call _bsaes_decrypt8
1259 pxor xmm15,XMMWORD[32+rbp]
1260 movdqu xmm7,XMMWORD[r12]
1261 movdqu xmm8,XMMWORD[16+r12]
1262 pxor xmm0,xmm7
1263 movdqu xmm9,XMMWORD[32+r12]
1264 pxor xmm5,xmm8
1265 movdqu xmm10,XMMWORD[48+r12]
1266 pxor xmm3,xmm9
1267 movdqu xmm14,XMMWORD[64+r12]
1268 pxor xmm1,xmm10
1269 movdqu XMMWORD[r13],xmm15
1270 movdqu XMMWORD[16+r13],xmm0
1271 movdqu XMMWORD[32+r13],xmm5
1272 movdqu XMMWORD[48+r13],xmm3
1273 movdqu XMMWORD[64+r13],xmm1
1274 jmp NEAR $L$cbc_dec_done
1275ALIGN 16
1276$L$cbc_dec_four:
1277 movdqa XMMWORD[32+rbp],xmm14
1278 call _bsaes_decrypt8
1279 pxor xmm15,XMMWORD[32+rbp]
1280 movdqu xmm7,XMMWORD[r12]
1281 movdqu xmm8,XMMWORD[16+r12]
1282 pxor xmm0,xmm7
1283 movdqu xmm9,XMMWORD[32+r12]
1284 pxor xmm5,xmm8
1285 movdqu xmm14,XMMWORD[48+r12]
1286 pxor xmm3,xmm9
1287 movdqu XMMWORD[r13],xmm15
1288 movdqu XMMWORD[16+r13],xmm0
1289 movdqu XMMWORD[32+r13],xmm5
1290 movdqu XMMWORD[48+r13],xmm3
1291 jmp NEAR $L$cbc_dec_done
1292ALIGN 16
1293$L$cbc_dec_three:
1294 movdqa XMMWORD[32+rbp],xmm14
1295 call _bsaes_decrypt8
1296 pxor xmm15,XMMWORD[32+rbp]
1297 movdqu xmm7,XMMWORD[r12]
1298 movdqu xmm8,XMMWORD[16+r12]
1299 pxor xmm0,xmm7
1300 movdqu xmm14,XMMWORD[32+r12]
1301 pxor xmm5,xmm8
1302 movdqu XMMWORD[r13],xmm15
1303 movdqu XMMWORD[16+r13],xmm0
1304 movdqu XMMWORD[32+r13],xmm5
1305 jmp NEAR $L$cbc_dec_done
1306ALIGN 16
1307$L$cbc_dec_two:
1308 movdqa XMMWORD[32+rbp],xmm14
1309 call _bsaes_decrypt8
1310 pxor xmm15,XMMWORD[32+rbp]
1311 movdqu xmm7,XMMWORD[r12]
1312 movdqu xmm14,XMMWORD[16+r12]
1313 pxor xmm0,xmm7
1314 movdqu XMMWORD[r13],xmm15
1315 movdqu XMMWORD[16+r13],xmm0
1316 jmp NEAR $L$cbc_dec_done
1317ALIGN 16
1318$L$cbc_dec_one:
1319 lea rcx,[r12]
1320 lea rdx,[32+rbp]
1321 lea r8,[r15]
1322 call asm_AES_decrypt
1323 pxor xmm14,XMMWORD[32+rbp]
1324 movdqu XMMWORD[r13],xmm14
1325 movdqa xmm14,xmm15
1326
1327$L$cbc_dec_done:
1328 movdqu XMMWORD[rbx],xmm14
1329 lea rax,[rsp]
1330 pxor xmm0,xmm0
1331$L$cbc_dec_bzero:
1332 movdqa XMMWORD[rax],xmm0
1333 movdqa XMMWORD[16+rax],xmm0
1334 lea rax,[32+rax]
1335 cmp rbp,rax
1336 ja NEAR $L$cbc_dec_bzero
1337
1338 lea rax,[120+rbp]
1339
1340 movaps xmm6,XMMWORD[64+rbp]
1341 movaps xmm7,XMMWORD[80+rbp]
1342 movaps xmm8,XMMWORD[96+rbp]
1343 movaps xmm9,XMMWORD[112+rbp]
1344 movaps xmm10,XMMWORD[128+rbp]
1345 movaps xmm11,XMMWORD[144+rbp]
1346 movaps xmm12,XMMWORD[160+rbp]
1347 movaps xmm13,XMMWORD[176+rbp]
1348 movaps xmm14,XMMWORD[192+rbp]
1349 movaps xmm15,XMMWORD[208+rbp]
1350 lea rax,[160+rax]
1351$L$cbc_dec_tail:
1352 mov r15,QWORD[((-48))+rax]
1353
1354 mov r14,QWORD[((-40))+rax]
1355
1356 mov r13,QWORD[((-32))+rax]
1357
1358 mov r12,QWORD[((-24))+rax]
1359
1360 mov rbx,QWORD[((-16))+rax]
1361
1362 mov rbp,QWORD[((-8))+rax]
1363
1364 lea rsp,[rax]
1365
1366$L$cbc_dec_epilogue:
1367 DB 0F3h,0C3h ;repret
1368
1369
1370
1371global ossl_bsaes_ctr32_encrypt_blocks
1372
1373ALIGN 16
1374ossl_bsaes_ctr32_encrypt_blocks:
1375
1376DB 243,15,30,250
1377 mov rax,rsp
1378$L$ctr_enc_prologue:
1379 push rbp
1380
1381 push rbx
1382
1383 push r12
1384
1385 push r13
1386
1387 push r14
1388
1389 push r15
1390
1391 lea rsp,[((-72))+rsp]
1392
1393 mov r10,QWORD[160+rsp]
1394 lea rsp,[((-160))+rsp]
1395 movaps XMMWORD[64+rsp],xmm6
1396 movaps XMMWORD[80+rsp],xmm7
1397 movaps XMMWORD[96+rsp],xmm8
1398 movaps XMMWORD[112+rsp],xmm9
1399 movaps XMMWORD[128+rsp],xmm10
1400 movaps XMMWORD[144+rsp],xmm11
1401 movaps XMMWORD[160+rsp],xmm12
1402 movaps XMMWORD[176+rsp],xmm13
1403 movaps XMMWORD[192+rsp],xmm14
1404 movaps XMMWORD[208+rsp],xmm15
1405$L$ctr_enc_body:
1406 mov rbp,rsp
1407
1408 movdqu xmm0,XMMWORD[r10]
1409 mov eax,DWORD[240+r9]
1410 mov r12,rcx
1411 mov r13,rdx
1412 mov r14,r8
1413 mov r15,r9
1414 movdqa XMMWORD[32+rbp],xmm0
1415 cmp r8,8
1416 jb NEAR $L$ctr_enc_short
1417
1418 mov ebx,eax
1419 shl rax,7
1420 sub rax,96
1421 sub rsp,rax
1422
1423 mov rax,rsp
1424 mov rcx,r15
1425 mov r10d,ebx
1426 call _bsaes_key_convert
1427 pxor xmm7,xmm6
1428 movdqa XMMWORD[rax],xmm7
1429
1430 movdqa xmm8,XMMWORD[rsp]
1431 lea r11,[$L$ADD1]
1432 movdqa xmm15,XMMWORD[32+rbp]
1433 movdqa xmm7,XMMWORD[((-32))+r11]
1434DB 102,68,15,56,0,199
1435DB 102,68,15,56,0,255
1436 movdqa XMMWORD[rsp],xmm8
1437 jmp NEAR $L$ctr_enc_loop
1438ALIGN 16
1439$L$ctr_enc_loop:
1440 movdqa XMMWORD[32+rbp],xmm15
1441 movdqa xmm0,xmm15
1442 movdqa xmm1,xmm15
1443 paddd xmm0,XMMWORD[r11]
1444 movdqa xmm2,xmm15
1445 paddd xmm1,XMMWORD[16+r11]
1446 movdqa xmm3,xmm15
1447 paddd xmm2,XMMWORD[32+r11]
1448 movdqa xmm4,xmm15
1449 paddd xmm3,XMMWORD[48+r11]
1450 movdqa xmm5,xmm15
1451 paddd xmm4,XMMWORD[64+r11]
1452 movdqa xmm6,xmm15
1453 paddd xmm5,XMMWORD[80+r11]
1454 paddd xmm6,XMMWORD[96+r11]
1455
1456
1457
1458 movdqa xmm8,XMMWORD[rsp]
1459 lea rax,[16+rsp]
1460 movdqa xmm7,XMMWORD[((-16))+r11]
1461 pxor xmm15,xmm8
1462 pxor xmm0,xmm8
1463 pxor xmm1,xmm8
1464 pxor xmm2,xmm8
1465DB 102,68,15,56,0,255
1466DB 102,15,56,0,199
1467 pxor xmm3,xmm8
1468 pxor xmm4,xmm8
1469DB 102,15,56,0,207
1470DB 102,15,56,0,215
1471 pxor xmm5,xmm8
1472 pxor xmm6,xmm8
1473DB 102,15,56,0,223
1474DB 102,15,56,0,231
1475DB 102,15,56,0,239
1476DB 102,15,56,0,247
1477 lea r11,[$L$BS0]
1478 mov r10d,ebx
1479
1480 call _bsaes_encrypt8_bitslice
1481
1482 sub r14,8
1483 jc NEAR $L$ctr_enc_loop_done
1484
1485 movdqu xmm7,XMMWORD[r12]
1486 movdqu xmm8,XMMWORD[16+r12]
1487 movdqu xmm9,XMMWORD[32+r12]
1488 movdqu xmm10,XMMWORD[48+r12]
1489 movdqu xmm11,XMMWORD[64+r12]
1490 movdqu xmm12,XMMWORD[80+r12]
1491 movdqu xmm13,XMMWORD[96+r12]
1492 movdqu xmm14,XMMWORD[112+r12]
1493 lea r12,[128+r12]
1494 pxor xmm7,xmm15
1495 movdqa xmm15,XMMWORD[32+rbp]
1496 pxor xmm0,xmm8
1497 movdqu XMMWORD[r13],xmm7
1498 pxor xmm3,xmm9
1499 movdqu XMMWORD[16+r13],xmm0
1500 pxor xmm5,xmm10
1501 movdqu XMMWORD[32+r13],xmm3
1502 pxor xmm2,xmm11
1503 movdqu XMMWORD[48+r13],xmm5
1504 pxor xmm6,xmm12
1505 movdqu XMMWORD[64+r13],xmm2
1506 pxor xmm1,xmm13
1507 movdqu XMMWORD[80+r13],xmm6
1508 pxor xmm4,xmm14
1509 movdqu XMMWORD[96+r13],xmm1
1510 lea r11,[$L$ADD1]
1511 movdqu XMMWORD[112+r13],xmm4
1512 lea r13,[128+r13]
1513 paddd xmm15,XMMWORD[112+r11]
1514 jnz NEAR $L$ctr_enc_loop
1515
1516 jmp NEAR $L$ctr_enc_done
1517ALIGN 16
1518$L$ctr_enc_loop_done:
1519 add r14,8
1520 movdqu xmm7,XMMWORD[r12]
1521 pxor xmm15,xmm7
1522 movdqu XMMWORD[r13],xmm15
1523 cmp r14,2
1524 jb NEAR $L$ctr_enc_done
1525 movdqu xmm8,XMMWORD[16+r12]
1526 pxor xmm0,xmm8
1527 movdqu XMMWORD[16+r13],xmm0
1528 je NEAR $L$ctr_enc_done
1529 movdqu xmm9,XMMWORD[32+r12]
1530 pxor xmm3,xmm9
1531 movdqu XMMWORD[32+r13],xmm3
1532 cmp r14,4
1533 jb NEAR $L$ctr_enc_done
1534 movdqu xmm10,XMMWORD[48+r12]
1535 pxor xmm5,xmm10
1536 movdqu XMMWORD[48+r13],xmm5
1537 je NEAR $L$ctr_enc_done
1538 movdqu xmm11,XMMWORD[64+r12]
1539 pxor xmm2,xmm11
1540 movdqu XMMWORD[64+r13],xmm2
1541 cmp r14,6
1542 jb NEAR $L$ctr_enc_done
1543 movdqu xmm12,XMMWORD[80+r12]
1544 pxor xmm6,xmm12
1545 movdqu XMMWORD[80+r13],xmm6
1546 je NEAR $L$ctr_enc_done
1547 movdqu xmm13,XMMWORD[96+r12]
1548 pxor xmm1,xmm13
1549 movdqu XMMWORD[96+r13],xmm1
1550 jmp NEAR $L$ctr_enc_done
1551
1552ALIGN 16
1553$L$ctr_enc_short:
1554 lea rcx,[32+rbp]
1555 lea rdx,[48+rbp]
1556 lea r8,[r15]
1557 call asm_AES_encrypt
1558 movdqu xmm0,XMMWORD[r12]
1559 lea r12,[16+r12]
1560 mov eax,DWORD[44+rbp]
1561 bswap eax
1562 pxor xmm0,XMMWORD[48+rbp]
1563 inc eax
1564 movdqu XMMWORD[r13],xmm0
1565 bswap eax
1566 lea r13,[16+r13]
1567 mov DWORD[44+rsp],eax
1568 dec r14
1569 jnz NEAR $L$ctr_enc_short
1570
1571$L$ctr_enc_done:
1572 lea rax,[rsp]
1573 pxor xmm0,xmm0
1574$L$ctr_enc_bzero:
1575 movdqa XMMWORD[rax],xmm0
1576 movdqa XMMWORD[16+rax],xmm0
1577 lea rax,[32+rax]
1578 cmp rbp,rax
1579 ja NEAR $L$ctr_enc_bzero
1580
1581 lea rax,[120+rbp]
1582
1583 movaps xmm6,XMMWORD[64+rbp]
1584 movaps xmm7,XMMWORD[80+rbp]
1585 movaps xmm8,XMMWORD[96+rbp]
1586 movaps xmm9,XMMWORD[112+rbp]
1587 movaps xmm10,XMMWORD[128+rbp]
1588 movaps xmm11,XMMWORD[144+rbp]
1589 movaps xmm12,XMMWORD[160+rbp]
1590 movaps xmm13,XMMWORD[176+rbp]
1591 movaps xmm14,XMMWORD[192+rbp]
1592 movaps xmm15,XMMWORD[208+rbp]
1593 lea rax,[160+rax]
1594$L$ctr_enc_tail:
1595 mov r15,QWORD[((-48))+rax]
1596
1597 mov r14,QWORD[((-40))+rax]
1598
1599 mov r13,QWORD[((-32))+rax]
1600
1601 mov r12,QWORD[((-24))+rax]
1602
1603 mov rbx,QWORD[((-16))+rax]
1604
1605 mov rbp,QWORD[((-8))+rax]
1606
1607 lea rsp,[rax]
1608
1609$L$ctr_enc_epilogue:
1610 DB 0F3h,0C3h ;repret
1611
1612
1613global ossl_bsaes_xts_encrypt
1614
1615ALIGN 16
1616ossl_bsaes_xts_encrypt:
1617
1618 mov rax,rsp
1619$L$xts_enc_prologue:
1620 push rbp
1621
1622 push rbx
1623
1624 push r12
1625
1626 push r13
1627
1628 push r14
1629
1630 push r15
1631
1632 lea rsp,[((-72))+rsp]
1633
1634 mov r10,QWORD[160+rsp]
1635 mov r11,QWORD[168+rsp]
1636 lea rsp,[((-160))+rsp]
1637 movaps XMMWORD[64+rsp],xmm6
1638 movaps XMMWORD[80+rsp],xmm7
1639 movaps XMMWORD[96+rsp],xmm8
1640 movaps XMMWORD[112+rsp],xmm9
1641 movaps XMMWORD[128+rsp],xmm10
1642 movaps XMMWORD[144+rsp],xmm11
1643 movaps XMMWORD[160+rsp],xmm12
1644 movaps XMMWORD[176+rsp],xmm13
1645 movaps XMMWORD[192+rsp],xmm14
1646 movaps XMMWORD[208+rsp],xmm15
1647$L$xts_enc_body:
1648 mov rbp,rsp
1649
1650 mov r12,rcx
1651 mov r13,rdx
1652 mov r14,r8
1653 mov r15,r9
1654
1655 lea rcx,[r11]
1656 lea rdx,[32+rbp]
1657 lea r8,[r10]
1658 call asm_AES_encrypt
1659
1660 mov eax,DWORD[240+r15]
1661 mov rbx,r14
1662
1663 mov edx,eax
1664 shl rax,7
1665 sub rax,96
1666 sub rsp,rax
1667
1668 mov rax,rsp
1669 mov rcx,r15
1670 mov r10d,edx
1671 call _bsaes_key_convert
1672 pxor xmm7,xmm6
1673 movdqa XMMWORD[rax],xmm7
1674
1675 and r14,-16
1676 sub rsp,0x80
1677 movdqa xmm6,XMMWORD[32+rbp]
1678
1679 pxor xmm14,xmm14
1680 movdqa xmm12,XMMWORD[$L$xts_magic]
1681 pcmpgtd xmm14,xmm6
1682
1683 sub r14,0x80
1684 jc NEAR $L$xts_enc_short
1685 jmp NEAR $L$xts_enc_loop
1686
1687ALIGN 16
1688$L$xts_enc_loop:
1689 pshufd xmm13,xmm14,0x13
1690 pxor xmm14,xmm14
1691 movdqa xmm15,xmm6
1692 movdqa XMMWORD[rsp],xmm6
1693 paddq xmm6,xmm6
1694 pand xmm13,xmm12
1695 pcmpgtd xmm14,xmm6
1696 pxor xmm6,xmm13
1697 pshufd xmm13,xmm14,0x13
1698 pxor xmm14,xmm14
1699 movdqa xmm0,xmm6
1700 movdqa XMMWORD[16+rsp],xmm6
1701 paddq xmm6,xmm6
1702 pand xmm13,xmm12
1703 pcmpgtd xmm14,xmm6
1704 pxor xmm6,xmm13
1705 movdqu xmm7,XMMWORD[r12]
1706 pshufd xmm13,xmm14,0x13
1707 pxor xmm14,xmm14
1708 movdqa xmm1,xmm6
1709 movdqa XMMWORD[32+rsp],xmm6
1710 paddq xmm6,xmm6
1711 pand xmm13,xmm12
1712 pcmpgtd xmm14,xmm6
1713 pxor xmm6,xmm13
1714 movdqu xmm8,XMMWORD[16+r12]
1715 pxor xmm15,xmm7
1716 pshufd xmm13,xmm14,0x13
1717 pxor xmm14,xmm14
1718 movdqa xmm2,xmm6
1719 movdqa XMMWORD[48+rsp],xmm6
1720 paddq xmm6,xmm6
1721 pand xmm13,xmm12
1722 pcmpgtd xmm14,xmm6
1723 pxor xmm6,xmm13
1724 movdqu xmm9,XMMWORD[32+r12]
1725 pxor xmm0,xmm8
1726 pshufd xmm13,xmm14,0x13
1727 pxor xmm14,xmm14
1728 movdqa xmm3,xmm6
1729 movdqa XMMWORD[64+rsp],xmm6
1730 paddq xmm6,xmm6
1731 pand xmm13,xmm12
1732 pcmpgtd xmm14,xmm6
1733 pxor xmm6,xmm13
1734 movdqu xmm10,XMMWORD[48+r12]
1735 pxor xmm1,xmm9
1736 pshufd xmm13,xmm14,0x13
1737 pxor xmm14,xmm14
1738 movdqa xmm4,xmm6
1739 movdqa XMMWORD[80+rsp],xmm6
1740 paddq xmm6,xmm6
1741 pand xmm13,xmm12
1742 pcmpgtd xmm14,xmm6
1743 pxor xmm6,xmm13
1744 movdqu xmm11,XMMWORD[64+r12]
1745 pxor xmm2,xmm10
1746 pshufd xmm13,xmm14,0x13
1747 pxor xmm14,xmm14
1748 movdqa xmm5,xmm6
1749 movdqa XMMWORD[96+rsp],xmm6
1750 paddq xmm6,xmm6
1751 pand xmm13,xmm12
1752 pcmpgtd xmm14,xmm6
1753 pxor xmm6,xmm13
1754 movdqu xmm12,XMMWORD[80+r12]
1755 pxor xmm3,xmm11
1756 movdqu xmm13,XMMWORD[96+r12]
1757 pxor xmm4,xmm12
1758 movdqu xmm14,XMMWORD[112+r12]
1759 lea r12,[128+r12]
1760 movdqa XMMWORD[112+rsp],xmm6
1761 pxor xmm5,xmm13
1762 lea rax,[128+rsp]
1763 pxor xmm6,xmm14
1764 mov r10d,edx
1765
1766 call _bsaes_encrypt8
1767
1768 pxor xmm15,XMMWORD[rsp]
1769 pxor xmm0,XMMWORD[16+rsp]
1770 movdqu XMMWORD[r13],xmm15
1771 pxor xmm3,XMMWORD[32+rsp]
1772 movdqu XMMWORD[16+r13],xmm0
1773 pxor xmm5,XMMWORD[48+rsp]
1774 movdqu XMMWORD[32+r13],xmm3
1775 pxor xmm2,XMMWORD[64+rsp]
1776 movdqu XMMWORD[48+r13],xmm5
1777 pxor xmm6,XMMWORD[80+rsp]
1778 movdqu XMMWORD[64+r13],xmm2
1779 pxor xmm1,XMMWORD[96+rsp]
1780 movdqu XMMWORD[80+r13],xmm6
1781 pxor xmm4,XMMWORD[112+rsp]
1782 movdqu XMMWORD[96+r13],xmm1
1783 movdqu XMMWORD[112+r13],xmm4
1784 lea r13,[128+r13]
1785
1786 movdqa xmm6,XMMWORD[112+rsp]
1787 pxor xmm14,xmm14
1788 movdqa xmm12,XMMWORD[$L$xts_magic]
1789 pcmpgtd xmm14,xmm6
1790 pshufd xmm13,xmm14,0x13
1791 pxor xmm14,xmm14
1792 paddq xmm6,xmm6
1793 pand xmm13,xmm12
1794 pcmpgtd xmm14,xmm6
1795 pxor xmm6,xmm13
1796
1797 sub r14,0x80
1798 jnc NEAR $L$xts_enc_loop
1799
1800$L$xts_enc_short:
1801 add r14,0x80
1802 jz NEAR $L$xts_enc_done
1803 pshufd xmm13,xmm14,0x13
1804 pxor xmm14,xmm14
1805 movdqa xmm15,xmm6
1806 movdqa XMMWORD[rsp],xmm6
1807 paddq xmm6,xmm6
1808 pand xmm13,xmm12
1809 pcmpgtd xmm14,xmm6
1810 pxor xmm6,xmm13
1811 pshufd xmm13,xmm14,0x13
1812 pxor xmm14,xmm14
1813 movdqa xmm0,xmm6
1814 movdqa XMMWORD[16+rsp],xmm6
1815 paddq xmm6,xmm6
1816 pand xmm13,xmm12
1817 pcmpgtd xmm14,xmm6
1818 pxor xmm6,xmm13
1819 movdqu xmm7,XMMWORD[r12]
1820 cmp r14,16
1821 je NEAR $L$xts_enc_1
1822 pshufd xmm13,xmm14,0x13
1823 pxor xmm14,xmm14
1824 movdqa xmm1,xmm6
1825 movdqa XMMWORD[32+rsp],xmm6
1826 paddq xmm6,xmm6
1827 pand xmm13,xmm12
1828 pcmpgtd xmm14,xmm6
1829 pxor xmm6,xmm13
1830 movdqu xmm8,XMMWORD[16+r12]
1831 cmp r14,32
1832 je NEAR $L$xts_enc_2
1833 pxor xmm15,xmm7
1834 pshufd xmm13,xmm14,0x13
1835 pxor xmm14,xmm14
1836 movdqa xmm2,xmm6
1837 movdqa XMMWORD[48+rsp],xmm6
1838 paddq xmm6,xmm6
1839 pand xmm13,xmm12
1840 pcmpgtd xmm14,xmm6
1841 pxor xmm6,xmm13
1842 movdqu xmm9,XMMWORD[32+r12]
1843 cmp r14,48
1844 je NEAR $L$xts_enc_3
1845 pxor xmm0,xmm8
1846 pshufd xmm13,xmm14,0x13
1847 pxor xmm14,xmm14
1848 movdqa xmm3,xmm6
1849 movdqa XMMWORD[64+rsp],xmm6
1850 paddq xmm6,xmm6
1851 pand xmm13,xmm12
1852 pcmpgtd xmm14,xmm6
1853 pxor xmm6,xmm13
1854 movdqu xmm10,XMMWORD[48+r12]
1855 cmp r14,64
1856 je NEAR $L$xts_enc_4
1857 pxor xmm1,xmm9
1858 pshufd xmm13,xmm14,0x13
1859 pxor xmm14,xmm14
1860 movdqa xmm4,xmm6
1861 movdqa XMMWORD[80+rsp],xmm6
1862 paddq xmm6,xmm6
1863 pand xmm13,xmm12
1864 pcmpgtd xmm14,xmm6
1865 pxor xmm6,xmm13
1866 movdqu xmm11,XMMWORD[64+r12]
1867 cmp r14,80
1868 je NEAR $L$xts_enc_5
1869 pxor xmm2,xmm10
1870 pshufd xmm13,xmm14,0x13
1871 pxor xmm14,xmm14
1872 movdqa xmm5,xmm6
1873 movdqa XMMWORD[96+rsp],xmm6
1874 paddq xmm6,xmm6
1875 pand xmm13,xmm12
1876 pcmpgtd xmm14,xmm6
1877 pxor xmm6,xmm13
1878 movdqu xmm12,XMMWORD[80+r12]
1879 cmp r14,96
1880 je NEAR $L$xts_enc_6
1881 pxor xmm3,xmm11
1882 movdqu xmm13,XMMWORD[96+r12]
1883 pxor xmm4,xmm12
1884 movdqa XMMWORD[112+rsp],xmm6
1885 lea r12,[112+r12]
1886 pxor xmm5,xmm13
1887 lea rax,[128+rsp]
1888 mov r10d,edx
1889
1890 call _bsaes_encrypt8
1891
1892 pxor xmm15,XMMWORD[rsp]
1893 pxor xmm0,XMMWORD[16+rsp]
1894 movdqu XMMWORD[r13],xmm15
1895 pxor xmm3,XMMWORD[32+rsp]
1896 movdqu XMMWORD[16+r13],xmm0
1897 pxor xmm5,XMMWORD[48+rsp]
1898 movdqu XMMWORD[32+r13],xmm3
1899 pxor xmm2,XMMWORD[64+rsp]
1900 movdqu XMMWORD[48+r13],xmm5
1901 pxor xmm6,XMMWORD[80+rsp]
1902 movdqu XMMWORD[64+r13],xmm2
1903 pxor xmm1,XMMWORD[96+rsp]
1904 movdqu XMMWORD[80+r13],xmm6
1905 movdqu XMMWORD[96+r13],xmm1
1906 lea r13,[112+r13]
1907
1908 movdqa xmm6,XMMWORD[112+rsp]
1909 jmp NEAR $L$xts_enc_done
1910ALIGN 16
1911$L$xts_enc_6:
1912 pxor xmm3,xmm11
1913 lea r12,[96+r12]
1914 pxor xmm4,xmm12
1915 lea rax,[128+rsp]
1916 mov r10d,edx
1917
1918 call _bsaes_encrypt8
1919
1920 pxor xmm15,XMMWORD[rsp]
1921 pxor xmm0,XMMWORD[16+rsp]
1922 movdqu XMMWORD[r13],xmm15
1923 pxor xmm3,XMMWORD[32+rsp]
1924 movdqu XMMWORD[16+r13],xmm0
1925 pxor xmm5,XMMWORD[48+rsp]
1926 movdqu XMMWORD[32+r13],xmm3
1927 pxor xmm2,XMMWORD[64+rsp]
1928 movdqu XMMWORD[48+r13],xmm5
1929 pxor xmm6,XMMWORD[80+rsp]
1930 movdqu XMMWORD[64+r13],xmm2
1931 movdqu XMMWORD[80+r13],xmm6
1932 lea r13,[96+r13]
1933
1934 movdqa xmm6,XMMWORD[96+rsp]
1935 jmp NEAR $L$xts_enc_done
1936ALIGN 16
1937$L$xts_enc_5:
1938 pxor xmm2,xmm10
1939 lea r12,[80+r12]
1940 pxor xmm3,xmm11
1941 lea rax,[128+rsp]
1942 mov r10d,edx
1943
1944 call _bsaes_encrypt8
1945
1946 pxor xmm15,XMMWORD[rsp]
1947 pxor xmm0,XMMWORD[16+rsp]
1948 movdqu XMMWORD[r13],xmm15
1949 pxor xmm3,XMMWORD[32+rsp]
1950 movdqu XMMWORD[16+r13],xmm0
1951 pxor xmm5,XMMWORD[48+rsp]
1952 movdqu XMMWORD[32+r13],xmm3
1953 pxor xmm2,XMMWORD[64+rsp]
1954 movdqu XMMWORD[48+r13],xmm5
1955 movdqu XMMWORD[64+r13],xmm2
1956 lea r13,[80+r13]
1957
1958 movdqa xmm6,XMMWORD[80+rsp]
1959 jmp NEAR $L$xts_enc_done
1960ALIGN 16
1961$L$xts_enc_4:
1962 pxor xmm1,xmm9
1963 lea r12,[64+r12]
1964 pxor xmm2,xmm10
1965 lea rax,[128+rsp]
1966 mov r10d,edx
1967
1968 call _bsaes_encrypt8
1969
1970 pxor xmm15,XMMWORD[rsp]
1971 pxor xmm0,XMMWORD[16+rsp]
1972 movdqu XMMWORD[r13],xmm15
1973 pxor xmm3,XMMWORD[32+rsp]
1974 movdqu XMMWORD[16+r13],xmm0
1975 pxor xmm5,XMMWORD[48+rsp]
1976 movdqu XMMWORD[32+r13],xmm3
1977 movdqu XMMWORD[48+r13],xmm5
1978 lea r13,[64+r13]
1979
1980 movdqa xmm6,XMMWORD[64+rsp]
1981 jmp NEAR $L$xts_enc_done
1982ALIGN 16
1983$L$xts_enc_3:
1984 pxor xmm0,xmm8
1985 lea r12,[48+r12]
1986 pxor xmm1,xmm9
1987 lea rax,[128+rsp]
1988 mov r10d,edx
1989
1990 call _bsaes_encrypt8
1991
1992 pxor xmm15,XMMWORD[rsp]
1993 pxor xmm0,XMMWORD[16+rsp]
1994 movdqu XMMWORD[r13],xmm15
1995 pxor xmm3,XMMWORD[32+rsp]
1996 movdqu XMMWORD[16+r13],xmm0
1997 movdqu XMMWORD[32+r13],xmm3
1998 lea r13,[48+r13]
1999
2000 movdqa xmm6,XMMWORD[48+rsp]
2001 jmp NEAR $L$xts_enc_done
2002ALIGN 16
2003$L$xts_enc_2:
2004 pxor xmm15,xmm7
2005 lea r12,[32+r12]
2006 pxor xmm0,xmm8
2007 lea rax,[128+rsp]
2008 mov r10d,edx
2009
2010 call _bsaes_encrypt8
2011
2012 pxor xmm15,XMMWORD[rsp]
2013 pxor xmm0,XMMWORD[16+rsp]
2014 movdqu XMMWORD[r13],xmm15
2015 movdqu XMMWORD[16+r13],xmm0
2016 lea r13,[32+r13]
2017
2018 movdqa xmm6,XMMWORD[32+rsp]
2019 jmp NEAR $L$xts_enc_done
2020ALIGN 16
2021$L$xts_enc_1:
2022 pxor xmm7,xmm15
2023 lea r12,[16+r12]
2024 movdqa XMMWORD[32+rbp],xmm7
2025 lea rcx,[32+rbp]
2026 lea rdx,[32+rbp]
2027 lea r8,[r15]
2028 call asm_AES_encrypt
2029 pxor xmm15,XMMWORD[32+rbp]
2030
2031
2032
2033
2034
2035 movdqu XMMWORD[r13],xmm15
2036 lea r13,[16+r13]
2037
2038 movdqa xmm6,XMMWORD[16+rsp]
2039
2040$L$xts_enc_done:
2041 and ebx,15
2042 jz NEAR $L$xts_enc_ret
2043 mov rdx,r13
2044
2045$L$xts_enc_steal:
2046 movzx eax,BYTE[r12]
2047 movzx ecx,BYTE[((-16))+rdx]
2048 lea r12,[1+r12]
2049 mov BYTE[((-16))+rdx],al
2050 mov BYTE[rdx],cl
2051 lea rdx,[1+rdx]
2052 sub ebx,1
2053 jnz NEAR $L$xts_enc_steal
2054
2055 movdqu xmm15,XMMWORD[((-16))+r13]
2056 lea rcx,[32+rbp]
2057 pxor xmm15,xmm6
2058 lea rdx,[32+rbp]
2059 movdqa XMMWORD[32+rbp],xmm15
2060 lea r8,[r15]
2061 call asm_AES_encrypt
2062 pxor xmm6,XMMWORD[32+rbp]
2063 movdqu XMMWORD[(-16)+r13],xmm6
2064
2065$L$xts_enc_ret:
2066 lea rax,[rsp]
2067 pxor xmm0,xmm0
2068$L$xts_enc_bzero:
2069 movdqa XMMWORD[rax],xmm0
2070 movdqa XMMWORD[16+rax],xmm0
2071 lea rax,[32+rax]
2072 cmp rbp,rax
2073 ja NEAR $L$xts_enc_bzero
2074
2075 lea rax,[120+rbp]
2076
2077 movaps xmm6,XMMWORD[64+rbp]
2078 movaps xmm7,XMMWORD[80+rbp]
2079 movaps xmm8,XMMWORD[96+rbp]
2080 movaps xmm9,XMMWORD[112+rbp]
2081 movaps xmm10,XMMWORD[128+rbp]
2082 movaps xmm11,XMMWORD[144+rbp]
2083 movaps xmm12,XMMWORD[160+rbp]
2084 movaps xmm13,XMMWORD[176+rbp]
2085 movaps xmm14,XMMWORD[192+rbp]
2086 movaps xmm15,XMMWORD[208+rbp]
2087 lea rax,[160+rax]
2088$L$xts_enc_tail:
2089 mov r15,QWORD[((-48))+rax]
2090
2091 mov r14,QWORD[((-40))+rax]
2092
2093 mov r13,QWORD[((-32))+rax]
2094
2095 mov r12,QWORD[((-24))+rax]
2096
2097 mov rbx,QWORD[((-16))+rax]
2098
2099 mov rbp,QWORD[((-8))+rax]
2100
2101 lea rsp,[rax]
2102
2103$L$xts_enc_epilogue:
2104 DB 0F3h,0C3h ;repret
2105
2106
2107
2108global ossl_bsaes_xts_decrypt
2109
2110ALIGN 16
2111ossl_bsaes_xts_decrypt:
2112
2113 mov rax,rsp
2114$L$xts_dec_prologue:
2115 push rbp
2116
2117 push rbx
2118
2119 push r12
2120
2121 push r13
2122
2123 push r14
2124
2125 push r15
2126
2127 lea rsp,[((-72))+rsp]
2128
2129 mov r10,QWORD[160+rsp]
2130 mov r11,QWORD[168+rsp]
2131 lea rsp,[((-160))+rsp]
2132 movaps XMMWORD[64+rsp],xmm6
2133 movaps XMMWORD[80+rsp],xmm7
2134 movaps XMMWORD[96+rsp],xmm8
2135 movaps XMMWORD[112+rsp],xmm9
2136 movaps XMMWORD[128+rsp],xmm10
2137 movaps XMMWORD[144+rsp],xmm11
2138 movaps XMMWORD[160+rsp],xmm12
2139 movaps XMMWORD[176+rsp],xmm13
2140 movaps XMMWORD[192+rsp],xmm14
2141 movaps XMMWORD[208+rsp],xmm15
2142$L$xts_dec_body:
2143 mov rbp,rsp
2144 mov r12,rcx
2145 mov r13,rdx
2146 mov r14,r8
2147 mov r15,r9
2148
2149 lea rcx,[r11]
2150 lea rdx,[32+rbp]
2151 lea r8,[r10]
2152 call asm_AES_encrypt
2153
2154 mov eax,DWORD[240+r15]
2155 mov rbx,r14
2156
2157 mov edx,eax
2158 shl rax,7
2159 sub rax,96
2160 sub rsp,rax
2161
2162 mov rax,rsp
2163 mov rcx,r15
2164 mov r10d,edx
2165 call _bsaes_key_convert
2166 pxor xmm7,XMMWORD[rsp]
2167 movdqa XMMWORD[rax],xmm6
2168 movdqa XMMWORD[rsp],xmm7
2169
2170 xor eax,eax
2171 and r14,-16
2172 test ebx,15
2173 setnz al
2174 shl rax,4
2175 sub r14,rax
2176
2177 sub rsp,0x80
2178 movdqa xmm6,XMMWORD[32+rbp]
2179
2180 pxor xmm14,xmm14
2181 movdqa xmm12,XMMWORD[$L$xts_magic]
2182 pcmpgtd xmm14,xmm6
2183
2184 sub r14,0x80
2185 jc NEAR $L$xts_dec_short
2186 jmp NEAR $L$xts_dec_loop
2187
2188ALIGN 16
2189$L$xts_dec_loop:
2190 pshufd xmm13,xmm14,0x13
2191 pxor xmm14,xmm14
2192 movdqa xmm15,xmm6
2193 movdqa XMMWORD[rsp],xmm6
2194 paddq xmm6,xmm6
2195 pand xmm13,xmm12
2196 pcmpgtd xmm14,xmm6
2197 pxor xmm6,xmm13
2198 pshufd xmm13,xmm14,0x13
2199 pxor xmm14,xmm14
2200 movdqa xmm0,xmm6
2201 movdqa XMMWORD[16+rsp],xmm6
2202 paddq xmm6,xmm6
2203 pand xmm13,xmm12
2204 pcmpgtd xmm14,xmm6
2205 pxor xmm6,xmm13
2206 movdqu xmm7,XMMWORD[r12]
2207 pshufd xmm13,xmm14,0x13
2208 pxor xmm14,xmm14
2209 movdqa xmm1,xmm6
2210 movdqa XMMWORD[32+rsp],xmm6
2211 paddq xmm6,xmm6
2212 pand xmm13,xmm12
2213 pcmpgtd xmm14,xmm6
2214 pxor xmm6,xmm13
2215 movdqu xmm8,XMMWORD[16+r12]
2216 pxor xmm15,xmm7
2217 pshufd xmm13,xmm14,0x13
2218 pxor xmm14,xmm14
2219 movdqa xmm2,xmm6
2220 movdqa XMMWORD[48+rsp],xmm6
2221 paddq xmm6,xmm6
2222 pand xmm13,xmm12
2223 pcmpgtd xmm14,xmm6
2224 pxor xmm6,xmm13
2225 movdqu xmm9,XMMWORD[32+r12]
2226 pxor xmm0,xmm8
2227 pshufd xmm13,xmm14,0x13
2228 pxor xmm14,xmm14
2229 movdqa xmm3,xmm6
2230 movdqa XMMWORD[64+rsp],xmm6
2231 paddq xmm6,xmm6
2232 pand xmm13,xmm12
2233 pcmpgtd xmm14,xmm6
2234 pxor xmm6,xmm13
2235 movdqu xmm10,XMMWORD[48+r12]
2236 pxor xmm1,xmm9
2237 pshufd xmm13,xmm14,0x13
2238 pxor xmm14,xmm14
2239 movdqa xmm4,xmm6
2240 movdqa XMMWORD[80+rsp],xmm6
2241 paddq xmm6,xmm6
2242 pand xmm13,xmm12
2243 pcmpgtd xmm14,xmm6
2244 pxor xmm6,xmm13
2245 movdqu xmm11,XMMWORD[64+r12]
2246 pxor xmm2,xmm10
2247 pshufd xmm13,xmm14,0x13
2248 pxor xmm14,xmm14
2249 movdqa xmm5,xmm6
2250 movdqa XMMWORD[96+rsp],xmm6
2251 paddq xmm6,xmm6
2252 pand xmm13,xmm12
2253 pcmpgtd xmm14,xmm6
2254 pxor xmm6,xmm13
2255 movdqu xmm12,XMMWORD[80+r12]
2256 pxor xmm3,xmm11
2257 movdqu xmm13,XMMWORD[96+r12]
2258 pxor xmm4,xmm12
2259 movdqu xmm14,XMMWORD[112+r12]
2260 lea r12,[128+r12]
2261 movdqa XMMWORD[112+rsp],xmm6
2262 pxor xmm5,xmm13
2263 lea rax,[128+rsp]
2264 pxor xmm6,xmm14
2265 mov r10d,edx
2266
2267 call _bsaes_decrypt8
2268
2269 pxor xmm15,XMMWORD[rsp]
2270 pxor xmm0,XMMWORD[16+rsp]
2271 movdqu XMMWORD[r13],xmm15
2272 pxor xmm5,XMMWORD[32+rsp]
2273 movdqu XMMWORD[16+r13],xmm0
2274 pxor xmm3,XMMWORD[48+rsp]
2275 movdqu XMMWORD[32+r13],xmm5
2276 pxor xmm1,XMMWORD[64+rsp]
2277 movdqu XMMWORD[48+r13],xmm3
2278 pxor xmm6,XMMWORD[80+rsp]
2279 movdqu XMMWORD[64+r13],xmm1
2280 pxor xmm2,XMMWORD[96+rsp]
2281 movdqu XMMWORD[80+r13],xmm6
2282 pxor xmm4,XMMWORD[112+rsp]
2283 movdqu XMMWORD[96+r13],xmm2
2284 movdqu XMMWORD[112+r13],xmm4
2285 lea r13,[128+r13]
2286
2287 movdqa xmm6,XMMWORD[112+rsp]
2288 pxor xmm14,xmm14
2289 movdqa xmm12,XMMWORD[$L$xts_magic]
2290 pcmpgtd xmm14,xmm6
2291 pshufd xmm13,xmm14,0x13
2292 pxor xmm14,xmm14
2293 paddq xmm6,xmm6
2294 pand xmm13,xmm12
2295 pcmpgtd xmm14,xmm6
2296 pxor xmm6,xmm13
2297
2298 sub r14,0x80
2299 jnc NEAR $L$xts_dec_loop
2300
2301$L$xts_dec_short:
2302 add r14,0x80
2303 jz NEAR $L$xts_dec_done
2304 pshufd xmm13,xmm14,0x13
2305 pxor xmm14,xmm14
2306 movdqa xmm15,xmm6
2307 movdqa XMMWORD[rsp],xmm6
2308 paddq xmm6,xmm6
2309 pand xmm13,xmm12
2310 pcmpgtd xmm14,xmm6
2311 pxor xmm6,xmm13
2312 pshufd xmm13,xmm14,0x13
2313 pxor xmm14,xmm14
2314 movdqa xmm0,xmm6
2315 movdqa XMMWORD[16+rsp],xmm6
2316 paddq xmm6,xmm6
2317 pand xmm13,xmm12
2318 pcmpgtd xmm14,xmm6
2319 pxor xmm6,xmm13
2320 movdqu xmm7,XMMWORD[r12]
2321 cmp r14,16
2322 je NEAR $L$xts_dec_1
2323 pshufd xmm13,xmm14,0x13
2324 pxor xmm14,xmm14
2325 movdqa xmm1,xmm6
2326 movdqa XMMWORD[32+rsp],xmm6
2327 paddq xmm6,xmm6
2328 pand xmm13,xmm12
2329 pcmpgtd xmm14,xmm6
2330 pxor xmm6,xmm13
2331 movdqu xmm8,XMMWORD[16+r12]
2332 cmp r14,32
2333 je NEAR $L$xts_dec_2
2334 pxor xmm15,xmm7
2335 pshufd xmm13,xmm14,0x13
2336 pxor xmm14,xmm14
2337 movdqa xmm2,xmm6
2338 movdqa XMMWORD[48+rsp],xmm6
2339 paddq xmm6,xmm6
2340 pand xmm13,xmm12
2341 pcmpgtd xmm14,xmm6
2342 pxor xmm6,xmm13
2343 movdqu xmm9,XMMWORD[32+r12]
2344 cmp r14,48
2345 je NEAR $L$xts_dec_3
2346 pxor xmm0,xmm8
2347 pshufd xmm13,xmm14,0x13
2348 pxor xmm14,xmm14
2349 movdqa xmm3,xmm6
2350 movdqa XMMWORD[64+rsp],xmm6
2351 paddq xmm6,xmm6
2352 pand xmm13,xmm12
2353 pcmpgtd xmm14,xmm6
2354 pxor xmm6,xmm13
2355 movdqu xmm10,XMMWORD[48+r12]
2356 cmp r14,64
2357 je NEAR $L$xts_dec_4
2358 pxor xmm1,xmm9
2359 pshufd xmm13,xmm14,0x13
2360 pxor xmm14,xmm14
2361 movdqa xmm4,xmm6
2362 movdqa XMMWORD[80+rsp],xmm6
2363 paddq xmm6,xmm6
2364 pand xmm13,xmm12
2365 pcmpgtd xmm14,xmm6
2366 pxor xmm6,xmm13
2367 movdqu xmm11,XMMWORD[64+r12]
2368 cmp r14,80
2369 je NEAR $L$xts_dec_5
2370 pxor xmm2,xmm10
2371 pshufd xmm13,xmm14,0x13
2372 pxor xmm14,xmm14
2373 movdqa xmm5,xmm6
2374 movdqa XMMWORD[96+rsp],xmm6
2375 paddq xmm6,xmm6
2376 pand xmm13,xmm12
2377 pcmpgtd xmm14,xmm6
2378 pxor xmm6,xmm13
2379 movdqu xmm12,XMMWORD[80+r12]
2380 cmp r14,96
2381 je NEAR $L$xts_dec_6
2382 pxor xmm3,xmm11
2383 movdqu xmm13,XMMWORD[96+r12]
2384 pxor xmm4,xmm12
2385 movdqa XMMWORD[112+rsp],xmm6
2386 lea r12,[112+r12]
2387 pxor xmm5,xmm13
2388 lea rax,[128+rsp]
2389 mov r10d,edx
2390
2391 call _bsaes_decrypt8
2392
2393 pxor xmm15,XMMWORD[rsp]
2394 pxor xmm0,XMMWORD[16+rsp]
2395 movdqu XMMWORD[r13],xmm15
2396 pxor xmm5,XMMWORD[32+rsp]
2397 movdqu XMMWORD[16+r13],xmm0
2398 pxor xmm3,XMMWORD[48+rsp]
2399 movdqu XMMWORD[32+r13],xmm5
2400 pxor xmm1,XMMWORD[64+rsp]
2401 movdqu XMMWORD[48+r13],xmm3
2402 pxor xmm6,XMMWORD[80+rsp]
2403 movdqu XMMWORD[64+r13],xmm1
2404 pxor xmm2,XMMWORD[96+rsp]
2405 movdqu XMMWORD[80+r13],xmm6
2406 movdqu XMMWORD[96+r13],xmm2
2407 lea r13,[112+r13]
2408
2409 movdqa xmm6,XMMWORD[112+rsp]
2410 jmp NEAR $L$xts_dec_done
2411ALIGN 16
2412$L$xts_dec_6:
2413 pxor xmm3,xmm11
2414 lea r12,[96+r12]
2415 pxor xmm4,xmm12
2416 lea rax,[128+rsp]
2417 mov r10d,edx
2418
2419 call _bsaes_decrypt8
2420
2421 pxor xmm15,XMMWORD[rsp]
2422 pxor xmm0,XMMWORD[16+rsp]
2423 movdqu XMMWORD[r13],xmm15
2424 pxor xmm5,XMMWORD[32+rsp]
2425 movdqu XMMWORD[16+r13],xmm0
2426 pxor xmm3,XMMWORD[48+rsp]
2427 movdqu XMMWORD[32+r13],xmm5
2428 pxor xmm1,XMMWORD[64+rsp]
2429 movdqu XMMWORD[48+r13],xmm3
2430 pxor xmm6,XMMWORD[80+rsp]
2431 movdqu XMMWORD[64+r13],xmm1
2432 movdqu XMMWORD[80+r13],xmm6
2433 lea r13,[96+r13]
2434
2435 movdqa xmm6,XMMWORD[96+rsp]
2436 jmp NEAR $L$xts_dec_done
2437ALIGN 16
2438$L$xts_dec_5:
2439 pxor xmm2,xmm10
2440 lea r12,[80+r12]
2441 pxor xmm3,xmm11
2442 lea rax,[128+rsp]
2443 mov r10d,edx
2444
2445 call _bsaes_decrypt8
2446
2447 pxor xmm15,XMMWORD[rsp]
2448 pxor xmm0,XMMWORD[16+rsp]
2449 movdqu XMMWORD[r13],xmm15
2450 pxor xmm5,XMMWORD[32+rsp]
2451 movdqu XMMWORD[16+r13],xmm0
2452 pxor xmm3,XMMWORD[48+rsp]
2453 movdqu XMMWORD[32+r13],xmm5
2454 pxor xmm1,XMMWORD[64+rsp]
2455 movdqu XMMWORD[48+r13],xmm3
2456 movdqu XMMWORD[64+r13],xmm1
2457 lea r13,[80+r13]
2458
2459 movdqa xmm6,XMMWORD[80+rsp]
2460 jmp NEAR $L$xts_dec_done
2461ALIGN 16
2462$L$xts_dec_4:
2463 pxor xmm1,xmm9
2464 lea r12,[64+r12]
2465 pxor xmm2,xmm10
2466 lea rax,[128+rsp]
2467 mov r10d,edx
2468
2469 call _bsaes_decrypt8
2470
2471 pxor xmm15,XMMWORD[rsp]
2472 pxor xmm0,XMMWORD[16+rsp]
2473 movdqu XMMWORD[r13],xmm15
2474 pxor xmm5,XMMWORD[32+rsp]
2475 movdqu XMMWORD[16+r13],xmm0
2476 pxor xmm3,XMMWORD[48+rsp]
2477 movdqu XMMWORD[32+r13],xmm5
2478 movdqu XMMWORD[48+r13],xmm3
2479 lea r13,[64+r13]
2480
2481 movdqa xmm6,XMMWORD[64+rsp]
2482 jmp NEAR $L$xts_dec_done
2483ALIGN 16
2484$L$xts_dec_3:
2485 pxor xmm0,xmm8
2486 lea r12,[48+r12]
2487 pxor xmm1,xmm9
2488 lea rax,[128+rsp]
2489 mov r10d,edx
2490
2491 call _bsaes_decrypt8
2492
2493 pxor xmm15,XMMWORD[rsp]
2494 pxor xmm0,XMMWORD[16+rsp]
2495 movdqu XMMWORD[r13],xmm15
2496 pxor xmm5,XMMWORD[32+rsp]
2497 movdqu XMMWORD[16+r13],xmm0
2498 movdqu XMMWORD[32+r13],xmm5
2499 lea r13,[48+r13]
2500
2501 movdqa xmm6,XMMWORD[48+rsp]
2502 jmp NEAR $L$xts_dec_done
2503ALIGN 16
2504$L$xts_dec_2:
2505 pxor xmm15,xmm7
2506 lea r12,[32+r12]
2507 pxor xmm0,xmm8
2508 lea rax,[128+rsp]
2509 mov r10d,edx
2510
2511 call _bsaes_decrypt8
2512
2513 pxor xmm15,XMMWORD[rsp]
2514 pxor xmm0,XMMWORD[16+rsp]
2515 movdqu XMMWORD[r13],xmm15
2516 movdqu XMMWORD[16+r13],xmm0
2517 lea r13,[32+r13]
2518
2519 movdqa xmm6,XMMWORD[32+rsp]
2520 jmp NEAR $L$xts_dec_done
2521ALIGN 16
2522$L$xts_dec_1:
2523 pxor xmm7,xmm15
2524 lea r12,[16+r12]
2525 movdqa XMMWORD[32+rbp],xmm7
2526 lea rcx,[32+rbp]
2527 lea rdx,[32+rbp]
2528 lea r8,[r15]
2529 call asm_AES_decrypt
2530 pxor xmm15,XMMWORD[32+rbp]
2531
2532
2533
2534
2535
2536 movdqu XMMWORD[r13],xmm15
2537 lea r13,[16+r13]
2538
2539 movdqa xmm6,XMMWORD[16+rsp]
2540
2541$L$xts_dec_done:
2542 and ebx,15
2543 jz NEAR $L$xts_dec_ret
2544
2545 pxor xmm14,xmm14
2546 movdqa xmm12,XMMWORD[$L$xts_magic]
2547 pcmpgtd xmm14,xmm6
2548 pshufd xmm13,xmm14,0x13
2549 movdqa xmm5,xmm6
2550 paddq xmm6,xmm6
2551 pand xmm13,xmm12
2552 movdqu xmm15,XMMWORD[r12]
2553 pxor xmm6,xmm13
2554
2555 lea rcx,[32+rbp]
2556 pxor xmm15,xmm6
2557 lea rdx,[32+rbp]
2558 movdqa XMMWORD[32+rbp],xmm15
2559 lea r8,[r15]
2560 call asm_AES_decrypt
2561 pxor xmm6,XMMWORD[32+rbp]
2562 mov rdx,r13
2563 movdqu XMMWORD[r13],xmm6
2564
2565$L$xts_dec_steal:
2566 movzx eax,BYTE[16+r12]
2567 movzx ecx,BYTE[rdx]
2568 lea r12,[1+r12]
2569 mov BYTE[rdx],al
2570 mov BYTE[16+rdx],cl
2571 lea rdx,[1+rdx]
2572 sub ebx,1
2573 jnz NEAR $L$xts_dec_steal
2574
2575 movdqu xmm15,XMMWORD[r13]
2576 lea rcx,[32+rbp]
2577 pxor xmm15,xmm5
2578 lea rdx,[32+rbp]
2579 movdqa XMMWORD[32+rbp],xmm15
2580 lea r8,[r15]
2581 call asm_AES_decrypt
2582 pxor xmm5,XMMWORD[32+rbp]
2583 movdqu XMMWORD[r13],xmm5
2584
2585$L$xts_dec_ret:
2586 lea rax,[rsp]
2587 pxor xmm0,xmm0
2588$L$xts_dec_bzero:
2589 movdqa XMMWORD[rax],xmm0
2590 movdqa XMMWORD[16+rax],xmm0
2591 lea rax,[32+rax]
2592 cmp rbp,rax
2593 ja NEAR $L$xts_dec_bzero
2594
2595 lea rax,[120+rbp]
2596
2597 movaps xmm6,XMMWORD[64+rbp]
2598 movaps xmm7,XMMWORD[80+rbp]
2599 movaps xmm8,XMMWORD[96+rbp]
2600 movaps xmm9,XMMWORD[112+rbp]
2601 movaps xmm10,XMMWORD[128+rbp]
2602 movaps xmm11,XMMWORD[144+rbp]
2603 movaps xmm12,XMMWORD[160+rbp]
2604 movaps xmm13,XMMWORD[176+rbp]
2605 movaps xmm14,XMMWORD[192+rbp]
2606 movaps xmm15,XMMWORD[208+rbp]
2607 lea rax,[160+rax]
2608$L$xts_dec_tail:
2609 mov r15,QWORD[((-48))+rax]
2610
2611 mov r14,QWORD[((-40))+rax]
2612
2613 mov r13,QWORD[((-32))+rax]
2614
2615 mov r12,QWORD[((-24))+rax]
2616
2617 mov rbx,QWORD[((-16))+rax]
2618
2619 mov rbp,QWORD[((-8))+rax]
2620
2621 lea rsp,[rax]
2622
2623$L$xts_dec_epilogue:
2624 DB 0F3h,0C3h ;repret
2625
2626
2627
2628ALIGN 64
2629_bsaes_const:
2630$L$M0ISR:
2631 DQ 0x0a0e0206070b0f03,0x0004080c0d010509
2632$L$ISRM0:
2633 DQ 0x01040b0e0205080f,0x0306090c00070a0d
2634$L$ISR:
2635 DQ 0x0504070602010003,0x0f0e0d0c080b0a09
2636$L$BS0:
2637 DQ 0x5555555555555555,0x5555555555555555
2638$L$BS1:
2639 DQ 0x3333333333333333,0x3333333333333333
2640$L$BS2:
2641 DQ 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
2642$L$SR:
2643 DQ 0x0504070600030201,0x0f0e0d0c0a09080b
2644$L$SRM0:
2645 DQ 0x0304090e00050a0f,0x01060b0c0207080d
2646$L$M0SR:
2647 DQ 0x0a0e02060f03070b,0x0004080c05090d01
2648$L$SWPUP:
2649 DQ 0x0706050403020100,0x0c0d0e0f0b0a0908
2650$L$SWPUPM0SR:
2651 DQ 0x0a0d02060c03070b,0x0004080f05090e01
2652$L$ADD1:
2653 DQ 0x0000000000000000,0x0000000100000000
2654$L$ADD2:
2655 DQ 0x0000000000000000,0x0000000200000000
2656$L$ADD3:
2657 DQ 0x0000000000000000,0x0000000300000000
2658$L$ADD4:
2659 DQ 0x0000000000000000,0x0000000400000000
2660$L$ADD5:
2661 DQ 0x0000000000000000,0x0000000500000000
2662$L$ADD6:
2663 DQ 0x0000000000000000,0x0000000600000000
2664$L$ADD7:
2665 DQ 0x0000000000000000,0x0000000700000000
2666$L$ADD8:
2667 DQ 0x0000000000000000,0x0000000800000000
2668$L$xts_magic:
2669 DD 0x87,0,1,0
2670$L$masks:
2671 DQ 0x0101010101010101,0x0101010101010101
2672 DQ 0x0202020202020202,0x0202020202020202
2673 DQ 0x0404040404040404,0x0404040404040404
2674 DQ 0x0808080808080808,0x0808080808080808
2675$L$M0:
2676 DQ 0x02060a0e03070b0f,0x0004080c0105090d
2677$L$63:
2678 DQ 0x6363636363636363,0x6363636363636363
2679DB 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102
2680DB 111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44
2681DB 32,69,109,105,108,105,97,32,75,195,164,115,112,101,114,44
2682DB 32,80,101,116,101,114,32,83,99,104,119,97,98,101,44,32
2683DB 65,110,100,121,32,80,111,108,121,97,107,111,118,0
2684ALIGN 64
2685
2686EXTERN __imp_RtlVirtualUnwind
2687
2688ALIGN 16
2689se_handler:
2690 push rsi
2691 push rdi
2692 push rbx
2693 push rbp
2694 push r12
2695 push r13
2696 push r14
2697 push r15
2698 pushfq
2699 sub rsp,64
2700
2701 mov rax,QWORD[120+r8]
2702 mov rbx,QWORD[248+r8]
2703
2704 mov rsi,QWORD[8+r9]
2705 mov r11,QWORD[56+r9]
2706
2707 mov r10d,DWORD[r11]
2708 lea r10,[r10*1+rsi]
2709 cmp rbx,r10
2710 jbe NEAR $L$in_prologue
2711
2712 mov r10d,DWORD[4+r11]
2713 lea r10,[r10*1+rsi]
2714 cmp rbx,r10
2715 jae NEAR $L$in_prologue
2716
2717 mov r10d,DWORD[8+r11]
2718 lea r10,[r10*1+rsi]
2719 cmp rbx,r10
2720 jae NEAR $L$in_tail
2721
2722 mov rax,QWORD[160+r8]
2723
2724 lea rsi,[64+rax]
2725 lea rdi,[512+r8]
2726 mov ecx,20
2727 DD 0xa548f3fc
2728 lea rax,[((160+120))+rax]
2729
2730$L$in_tail:
2731 mov rbp,QWORD[((-48))+rax]
2732 mov rbx,QWORD[((-40))+rax]
2733 mov r12,QWORD[((-32))+rax]
2734 mov r13,QWORD[((-24))+rax]
2735 mov r14,QWORD[((-16))+rax]
2736 mov r15,QWORD[((-8))+rax]
2737 mov QWORD[144+r8],rbx
2738 mov QWORD[160+r8],rbp
2739 mov QWORD[216+r8],r12
2740 mov QWORD[224+r8],r13
2741 mov QWORD[232+r8],r14
2742 mov QWORD[240+r8],r15
2743
2744$L$in_prologue:
2745 mov QWORD[152+r8],rax
2746
2747 mov rdi,QWORD[40+r9]
2748 mov rsi,r8
2749 mov ecx,154
2750 DD 0xa548f3fc
2751
2752 mov rsi,r9
2753 xor rcx,rcx
2754 mov rdx,QWORD[8+rsi]
2755 mov r8,QWORD[rsi]
2756 mov r9,QWORD[16+rsi]
2757 mov r10,QWORD[40+rsi]
2758 lea r11,[56+rsi]
2759 lea r12,[24+rsi]
2760 mov QWORD[32+rsp],r10
2761 mov QWORD[40+rsp],r11
2762 mov QWORD[48+rsp],r12
2763 mov QWORD[56+rsp],rcx
2764 call QWORD[__imp_RtlVirtualUnwind]
2765
2766 mov eax,1
2767 add rsp,64
2768 popfq
2769 pop r15
2770 pop r14
2771 pop r13
2772 pop r12
2773 pop rbp
2774 pop rbx
2775 pop rdi
2776 pop rsi
2777 DB 0F3h,0C3h ;repret
2778
2779
2780section .pdata rdata align=4
2781ALIGN 4
2782 DD $L$cbc_dec_prologue wrt ..imagebase
2783 DD $L$cbc_dec_epilogue wrt ..imagebase
2784 DD $L$cbc_dec_info wrt ..imagebase
2785
2786 DD $L$ctr_enc_prologue wrt ..imagebase
2787 DD $L$ctr_enc_epilogue wrt ..imagebase
2788 DD $L$ctr_enc_info wrt ..imagebase
2789
2790 DD $L$xts_enc_prologue wrt ..imagebase
2791 DD $L$xts_enc_epilogue wrt ..imagebase
2792 DD $L$xts_enc_info wrt ..imagebase
2793
2794 DD $L$xts_dec_prologue wrt ..imagebase
2795 DD $L$xts_dec_epilogue wrt ..imagebase
2796 DD $L$xts_dec_info wrt ..imagebase
2797
2798section .xdata rdata align=8
2799ALIGN 8
2800$L$cbc_dec_info:
2801DB 9,0,0,0
2802 DD se_handler wrt ..imagebase
2803 DD $L$cbc_dec_body wrt ..imagebase,$L$cbc_dec_epilogue wrt ..imagebase
2804 DD $L$cbc_dec_tail wrt ..imagebase
2805 DD 0
2806$L$ctr_enc_info:
2807DB 9,0,0,0
2808 DD se_handler wrt ..imagebase
2809 DD $L$ctr_enc_body wrt ..imagebase,$L$ctr_enc_epilogue wrt ..imagebase
2810 DD $L$ctr_enc_tail wrt ..imagebase
2811 DD 0
2812$L$xts_enc_info:
2813DB 9,0,0,0
2814 DD se_handler wrt ..imagebase
2815 DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
2816 DD $L$xts_enc_tail wrt ..imagebase
2817 DD 0
2818$L$xts_dec_info:
2819DB 9,0,0,0
2820 DD se_handler wrt ..imagebase
2821 DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
2822 DD $L$xts_dec_tail wrt ..imagebase
2823 DD 0
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette