VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1j/crypto/genasm-nasm/aesni-x86_64.S@ 88461

Last change on this file since 88461 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 95.5 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7EXTERN OPENSSL_ia32cap_P
8global aesni_encrypt
9
10ALIGN 16
11aesni_encrypt:
12
13 movups xmm2,XMMWORD[rcx]
14 mov eax,DWORD[240+r8]
15 movups xmm0,XMMWORD[r8]
16 movups xmm1,XMMWORD[16+r8]
17 lea r8,[32+r8]
18 xorps xmm2,xmm0
19$L$oop_enc1_1:
20DB 102,15,56,220,209
21 dec eax
22 movups xmm1,XMMWORD[r8]
23 lea r8,[16+r8]
24 jnz NEAR $L$oop_enc1_1
25DB 102,15,56,221,209
26 pxor xmm0,xmm0
27 pxor xmm1,xmm1
28 movups XMMWORD[rdx],xmm2
29 pxor xmm2,xmm2
30 DB 0F3h,0C3h ;repret
31
32
33
34global aesni_decrypt
35
36ALIGN 16
37aesni_decrypt:
38
39 movups xmm2,XMMWORD[rcx]
40 mov eax,DWORD[240+r8]
41 movups xmm0,XMMWORD[r8]
42 movups xmm1,XMMWORD[16+r8]
43 lea r8,[32+r8]
44 xorps xmm2,xmm0
45$L$oop_dec1_2:
46DB 102,15,56,222,209
47 dec eax
48 movups xmm1,XMMWORD[r8]
49 lea r8,[16+r8]
50 jnz NEAR $L$oop_dec1_2
51DB 102,15,56,223,209
52 pxor xmm0,xmm0
53 pxor xmm1,xmm1
54 movups XMMWORD[rdx],xmm2
55 pxor xmm2,xmm2
56 DB 0F3h,0C3h ;repret
57
58
59
60ALIGN 16
61_aesni_encrypt2:
62
63 movups xmm0,XMMWORD[rcx]
64 shl eax,4
65 movups xmm1,XMMWORD[16+rcx]
66 xorps xmm2,xmm0
67 xorps xmm3,xmm0
68 movups xmm0,XMMWORD[32+rcx]
69 lea rcx,[32+rax*1+rcx]
70 neg rax
71 add rax,16
72
73$L$enc_loop2:
74DB 102,15,56,220,209
75DB 102,15,56,220,217
76 movups xmm1,XMMWORD[rax*1+rcx]
77 add rax,32
78DB 102,15,56,220,208
79DB 102,15,56,220,216
80 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
81 jnz NEAR $L$enc_loop2
82
83DB 102,15,56,220,209
84DB 102,15,56,220,217
85DB 102,15,56,221,208
86DB 102,15,56,221,216
87 DB 0F3h,0C3h ;repret
88
89
90
91ALIGN 16
92_aesni_decrypt2:
93
94 movups xmm0,XMMWORD[rcx]
95 shl eax,4
96 movups xmm1,XMMWORD[16+rcx]
97 xorps xmm2,xmm0
98 xorps xmm3,xmm0
99 movups xmm0,XMMWORD[32+rcx]
100 lea rcx,[32+rax*1+rcx]
101 neg rax
102 add rax,16
103
104$L$dec_loop2:
105DB 102,15,56,222,209
106DB 102,15,56,222,217
107 movups xmm1,XMMWORD[rax*1+rcx]
108 add rax,32
109DB 102,15,56,222,208
110DB 102,15,56,222,216
111 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
112 jnz NEAR $L$dec_loop2
113
114DB 102,15,56,222,209
115DB 102,15,56,222,217
116DB 102,15,56,223,208
117DB 102,15,56,223,216
118 DB 0F3h,0C3h ;repret
119
120
121
122ALIGN 16
123_aesni_encrypt3:
124
125 movups xmm0,XMMWORD[rcx]
126 shl eax,4
127 movups xmm1,XMMWORD[16+rcx]
128 xorps xmm2,xmm0
129 xorps xmm3,xmm0
130 xorps xmm4,xmm0
131 movups xmm0,XMMWORD[32+rcx]
132 lea rcx,[32+rax*1+rcx]
133 neg rax
134 add rax,16
135
136$L$enc_loop3:
137DB 102,15,56,220,209
138DB 102,15,56,220,217
139DB 102,15,56,220,225
140 movups xmm1,XMMWORD[rax*1+rcx]
141 add rax,32
142DB 102,15,56,220,208
143DB 102,15,56,220,216
144DB 102,15,56,220,224
145 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
146 jnz NEAR $L$enc_loop3
147
148DB 102,15,56,220,209
149DB 102,15,56,220,217
150DB 102,15,56,220,225
151DB 102,15,56,221,208
152DB 102,15,56,221,216
153DB 102,15,56,221,224
154 DB 0F3h,0C3h ;repret
155
156
157
158ALIGN 16
159_aesni_decrypt3:
160
161 movups xmm0,XMMWORD[rcx]
162 shl eax,4
163 movups xmm1,XMMWORD[16+rcx]
164 xorps xmm2,xmm0
165 xorps xmm3,xmm0
166 xorps xmm4,xmm0
167 movups xmm0,XMMWORD[32+rcx]
168 lea rcx,[32+rax*1+rcx]
169 neg rax
170 add rax,16
171
172$L$dec_loop3:
173DB 102,15,56,222,209
174DB 102,15,56,222,217
175DB 102,15,56,222,225
176 movups xmm1,XMMWORD[rax*1+rcx]
177 add rax,32
178DB 102,15,56,222,208
179DB 102,15,56,222,216
180DB 102,15,56,222,224
181 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
182 jnz NEAR $L$dec_loop3
183
184DB 102,15,56,222,209
185DB 102,15,56,222,217
186DB 102,15,56,222,225
187DB 102,15,56,223,208
188DB 102,15,56,223,216
189DB 102,15,56,223,224
190 DB 0F3h,0C3h ;repret
191
192
193
194ALIGN 16
195_aesni_encrypt4:
196
197 movups xmm0,XMMWORD[rcx]
198 shl eax,4
199 movups xmm1,XMMWORD[16+rcx]
200 xorps xmm2,xmm0
201 xorps xmm3,xmm0
202 xorps xmm4,xmm0
203 xorps xmm5,xmm0
204 movups xmm0,XMMWORD[32+rcx]
205 lea rcx,[32+rax*1+rcx]
206 neg rax
207DB 0x0f,0x1f,0x00
208 add rax,16
209
210$L$enc_loop4:
211DB 102,15,56,220,209
212DB 102,15,56,220,217
213DB 102,15,56,220,225
214DB 102,15,56,220,233
215 movups xmm1,XMMWORD[rax*1+rcx]
216 add rax,32
217DB 102,15,56,220,208
218DB 102,15,56,220,216
219DB 102,15,56,220,224
220DB 102,15,56,220,232
221 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
222 jnz NEAR $L$enc_loop4
223
224DB 102,15,56,220,209
225DB 102,15,56,220,217
226DB 102,15,56,220,225
227DB 102,15,56,220,233
228DB 102,15,56,221,208
229DB 102,15,56,221,216
230DB 102,15,56,221,224
231DB 102,15,56,221,232
232 DB 0F3h,0C3h ;repret
233
234
235
236ALIGN 16
237_aesni_decrypt4:
238
239 movups xmm0,XMMWORD[rcx]
240 shl eax,4
241 movups xmm1,XMMWORD[16+rcx]
242 xorps xmm2,xmm0
243 xorps xmm3,xmm0
244 xorps xmm4,xmm0
245 xorps xmm5,xmm0
246 movups xmm0,XMMWORD[32+rcx]
247 lea rcx,[32+rax*1+rcx]
248 neg rax
249DB 0x0f,0x1f,0x00
250 add rax,16
251
252$L$dec_loop4:
253DB 102,15,56,222,209
254DB 102,15,56,222,217
255DB 102,15,56,222,225
256DB 102,15,56,222,233
257 movups xmm1,XMMWORD[rax*1+rcx]
258 add rax,32
259DB 102,15,56,222,208
260DB 102,15,56,222,216
261DB 102,15,56,222,224
262DB 102,15,56,222,232
263 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
264 jnz NEAR $L$dec_loop4
265
266DB 102,15,56,222,209
267DB 102,15,56,222,217
268DB 102,15,56,222,225
269DB 102,15,56,222,233
270DB 102,15,56,223,208
271DB 102,15,56,223,216
272DB 102,15,56,223,224
273DB 102,15,56,223,232
274 DB 0F3h,0C3h ;repret
275
276
277
278ALIGN 16
279_aesni_encrypt6:
280
281 movups xmm0,XMMWORD[rcx]
282 shl eax,4
283 movups xmm1,XMMWORD[16+rcx]
284 xorps xmm2,xmm0
285 pxor xmm3,xmm0
286 pxor xmm4,xmm0
287DB 102,15,56,220,209
288 lea rcx,[32+rax*1+rcx]
289 neg rax
290DB 102,15,56,220,217
291 pxor xmm5,xmm0
292 pxor xmm6,xmm0
293DB 102,15,56,220,225
294 pxor xmm7,xmm0
295 movups xmm0,XMMWORD[rax*1+rcx]
296 add rax,16
297 jmp NEAR $L$enc_loop6_enter
298ALIGN 16
299$L$enc_loop6:
300DB 102,15,56,220,209
301DB 102,15,56,220,217
302DB 102,15,56,220,225
303$L$enc_loop6_enter:
304DB 102,15,56,220,233
305DB 102,15,56,220,241
306DB 102,15,56,220,249
307 movups xmm1,XMMWORD[rax*1+rcx]
308 add rax,32
309DB 102,15,56,220,208
310DB 102,15,56,220,216
311DB 102,15,56,220,224
312DB 102,15,56,220,232
313DB 102,15,56,220,240
314DB 102,15,56,220,248
315 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
316 jnz NEAR $L$enc_loop6
317
318DB 102,15,56,220,209
319DB 102,15,56,220,217
320DB 102,15,56,220,225
321DB 102,15,56,220,233
322DB 102,15,56,220,241
323DB 102,15,56,220,249
324DB 102,15,56,221,208
325DB 102,15,56,221,216
326DB 102,15,56,221,224
327DB 102,15,56,221,232
328DB 102,15,56,221,240
329DB 102,15,56,221,248
330 DB 0F3h,0C3h ;repret
331
332
333
334ALIGN 16
335_aesni_decrypt6:
336
337 movups xmm0,XMMWORD[rcx]
338 shl eax,4
339 movups xmm1,XMMWORD[16+rcx]
340 xorps xmm2,xmm0
341 pxor xmm3,xmm0
342 pxor xmm4,xmm0
343DB 102,15,56,222,209
344 lea rcx,[32+rax*1+rcx]
345 neg rax
346DB 102,15,56,222,217
347 pxor xmm5,xmm0
348 pxor xmm6,xmm0
349DB 102,15,56,222,225
350 pxor xmm7,xmm0
351 movups xmm0,XMMWORD[rax*1+rcx]
352 add rax,16
353 jmp NEAR $L$dec_loop6_enter
354ALIGN 16
355$L$dec_loop6:
356DB 102,15,56,222,209
357DB 102,15,56,222,217
358DB 102,15,56,222,225
359$L$dec_loop6_enter:
360DB 102,15,56,222,233
361DB 102,15,56,222,241
362DB 102,15,56,222,249
363 movups xmm1,XMMWORD[rax*1+rcx]
364 add rax,32
365DB 102,15,56,222,208
366DB 102,15,56,222,216
367DB 102,15,56,222,224
368DB 102,15,56,222,232
369DB 102,15,56,222,240
370DB 102,15,56,222,248
371 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
372 jnz NEAR $L$dec_loop6
373
374DB 102,15,56,222,209
375DB 102,15,56,222,217
376DB 102,15,56,222,225
377DB 102,15,56,222,233
378DB 102,15,56,222,241
379DB 102,15,56,222,249
380DB 102,15,56,223,208
381DB 102,15,56,223,216
382DB 102,15,56,223,224
383DB 102,15,56,223,232
384DB 102,15,56,223,240
385DB 102,15,56,223,248
386 DB 0F3h,0C3h ;repret
387
388
389
390ALIGN 16
391_aesni_encrypt8:
392
393 movups xmm0,XMMWORD[rcx]
394 shl eax,4
395 movups xmm1,XMMWORD[16+rcx]
396 xorps xmm2,xmm0
397 xorps xmm3,xmm0
398 pxor xmm4,xmm0
399 pxor xmm5,xmm0
400 pxor xmm6,xmm0
401 lea rcx,[32+rax*1+rcx]
402 neg rax
403DB 102,15,56,220,209
404 pxor xmm7,xmm0
405 pxor xmm8,xmm0
406DB 102,15,56,220,217
407 pxor xmm9,xmm0
408 movups xmm0,XMMWORD[rax*1+rcx]
409 add rax,16
410 jmp NEAR $L$enc_loop8_inner
411ALIGN 16
412$L$enc_loop8:
413DB 102,15,56,220,209
414DB 102,15,56,220,217
415$L$enc_loop8_inner:
416DB 102,15,56,220,225
417DB 102,15,56,220,233
418DB 102,15,56,220,241
419DB 102,15,56,220,249
420DB 102,68,15,56,220,193
421DB 102,68,15,56,220,201
422$L$enc_loop8_enter:
423 movups xmm1,XMMWORD[rax*1+rcx]
424 add rax,32
425DB 102,15,56,220,208
426DB 102,15,56,220,216
427DB 102,15,56,220,224
428DB 102,15,56,220,232
429DB 102,15,56,220,240
430DB 102,15,56,220,248
431DB 102,68,15,56,220,192
432DB 102,68,15,56,220,200
433 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
434 jnz NEAR $L$enc_loop8
435
436DB 102,15,56,220,209
437DB 102,15,56,220,217
438DB 102,15,56,220,225
439DB 102,15,56,220,233
440DB 102,15,56,220,241
441DB 102,15,56,220,249
442DB 102,68,15,56,220,193
443DB 102,68,15,56,220,201
444DB 102,15,56,221,208
445DB 102,15,56,221,216
446DB 102,15,56,221,224
447DB 102,15,56,221,232
448DB 102,15,56,221,240
449DB 102,15,56,221,248
450DB 102,68,15,56,221,192
451DB 102,68,15,56,221,200
452 DB 0F3h,0C3h ;repret
453
454
455
456ALIGN 16
457_aesni_decrypt8:
458
459 movups xmm0,XMMWORD[rcx]
460 shl eax,4
461 movups xmm1,XMMWORD[16+rcx]
462 xorps xmm2,xmm0
463 xorps xmm3,xmm0
464 pxor xmm4,xmm0
465 pxor xmm5,xmm0
466 pxor xmm6,xmm0
467 lea rcx,[32+rax*1+rcx]
468 neg rax
469DB 102,15,56,222,209
470 pxor xmm7,xmm0
471 pxor xmm8,xmm0
472DB 102,15,56,222,217
473 pxor xmm9,xmm0
474 movups xmm0,XMMWORD[rax*1+rcx]
475 add rax,16
476 jmp NEAR $L$dec_loop8_inner
477ALIGN 16
478$L$dec_loop8:
479DB 102,15,56,222,209
480DB 102,15,56,222,217
481$L$dec_loop8_inner:
482DB 102,15,56,222,225
483DB 102,15,56,222,233
484DB 102,15,56,222,241
485DB 102,15,56,222,249
486DB 102,68,15,56,222,193
487DB 102,68,15,56,222,201
488$L$dec_loop8_enter:
489 movups xmm1,XMMWORD[rax*1+rcx]
490 add rax,32
491DB 102,15,56,222,208
492DB 102,15,56,222,216
493DB 102,15,56,222,224
494DB 102,15,56,222,232
495DB 102,15,56,222,240
496DB 102,15,56,222,248
497DB 102,68,15,56,222,192
498DB 102,68,15,56,222,200
499 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
500 jnz NEAR $L$dec_loop8
501
502DB 102,15,56,222,209
503DB 102,15,56,222,217
504DB 102,15,56,222,225
505DB 102,15,56,222,233
506DB 102,15,56,222,241
507DB 102,15,56,222,249
508DB 102,68,15,56,222,193
509DB 102,68,15,56,222,201
510DB 102,15,56,223,208
511DB 102,15,56,223,216
512DB 102,15,56,223,224
513DB 102,15,56,223,232
514DB 102,15,56,223,240
515DB 102,15,56,223,248
516DB 102,68,15,56,223,192
517DB 102,68,15,56,223,200
518 DB 0F3h,0C3h ;repret
519
520
521global aesni_ecb_encrypt
522
523ALIGN 16
524aesni_ecb_encrypt:
525 mov QWORD[8+rsp],rdi ;WIN64 prologue
526 mov QWORD[16+rsp],rsi
527 mov rax,rsp
528$L$SEH_begin_aesni_ecb_encrypt:
529 mov rdi,rcx
530 mov rsi,rdx
531 mov rdx,r8
532 mov rcx,r9
533 mov r8,QWORD[40+rsp]
534
535
536
537 lea rsp,[((-88))+rsp]
538 movaps XMMWORD[rsp],xmm6
539 movaps XMMWORD[16+rsp],xmm7
540 movaps XMMWORD[32+rsp],xmm8
541 movaps XMMWORD[48+rsp],xmm9
542$L$ecb_enc_body:
543 and rdx,-16
544 jz NEAR $L$ecb_ret
545
546 mov eax,DWORD[240+rcx]
547 movups xmm0,XMMWORD[rcx]
548 mov r11,rcx
549 mov r10d,eax
550 test r8d,r8d
551 jz NEAR $L$ecb_decrypt
552
553 cmp rdx,0x80
554 jb NEAR $L$ecb_enc_tail
555
556 movdqu xmm2,XMMWORD[rdi]
557 movdqu xmm3,XMMWORD[16+rdi]
558 movdqu xmm4,XMMWORD[32+rdi]
559 movdqu xmm5,XMMWORD[48+rdi]
560 movdqu xmm6,XMMWORD[64+rdi]
561 movdqu xmm7,XMMWORD[80+rdi]
562 movdqu xmm8,XMMWORD[96+rdi]
563 movdqu xmm9,XMMWORD[112+rdi]
564 lea rdi,[128+rdi]
565 sub rdx,0x80
566 jmp NEAR $L$ecb_enc_loop8_enter
567ALIGN 16
568$L$ecb_enc_loop8:
569 movups XMMWORD[rsi],xmm2
570 mov rcx,r11
571 movdqu xmm2,XMMWORD[rdi]
572 mov eax,r10d
573 movups XMMWORD[16+rsi],xmm3
574 movdqu xmm3,XMMWORD[16+rdi]
575 movups XMMWORD[32+rsi],xmm4
576 movdqu xmm4,XMMWORD[32+rdi]
577 movups XMMWORD[48+rsi],xmm5
578 movdqu xmm5,XMMWORD[48+rdi]
579 movups XMMWORD[64+rsi],xmm6
580 movdqu xmm6,XMMWORD[64+rdi]
581 movups XMMWORD[80+rsi],xmm7
582 movdqu xmm7,XMMWORD[80+rdi]
583 movups XMMWORD[96+rsi],xmm8
584 movdqu xmm8,XMMWORD[96+rdi]
585 movups XMMWORD[112+rsi],xmm9
586 lea rsi,[128+rsi]
587 movdqu xmm9,XMMWORD[112+rdi]
588 lea rdi,[128+rdi]
589$L$ecb_enc_loop8_enter:
590
591 call _aesni_encrypt8
592
593 sub rdx,0x80
594 jnc NEAR $L$ecb_enc_loop8
595
596 movups XMMWORD[rsi],xmm2
597 mov rcx,r11
598 movups XMMWORD[16+rsi],xmm3
599 mov eax,r10d
600 movups XMMWORD[32+rsi],xmm4
601 movups XMMWORD[48+rsi],xmm5
602 movups XMMWORD[64+rsi],xmm6
603 movups XMMWORD[80+rsi],xmm7
604 movups XMMWORD[96+rsi],xmm8
605 movups XMMWORD[112+rsi],xmm9
606 lea rsi,[128+rsi]
607 add rdx,0x80
608 jz NEAR $L$ecb_ret
609
610$L$ecb_enc_tail:
611 movups xmm2,XMMWORD[rdi]
612 cmp rdx,0x20
613 jb NEAR $L$ecb_enc_one
614 movups xmm3,XMMWORD[16+rdi]
615 je NEAR $L$ecb_enc_two
616 movups xmm4,XMMWORD[32+rdi]
617 cmp rdx,0x40
618 jb NEAR $L$ecb_enc_three
619 movups xmm5,XMMWORD[48+rdi]
620 je NEAR $L$ecb_enc_four
621 movups xmm6,XMMWORD[64+rdi]
622 cmp rdx,0x60
623 jb NEAR $L$ecb_enc_five
624 movups xmm7,XMMWORD[80+rdi]
625 je NEAR $L$ecb_enc_six
626 movdqu xmm8,XMMWORD[96+rdi]
627 xorps xmm9,xmm9
628 call _aesni_encrypt8
629 movups XMMWORD[rsi],xmm2
630 movups XMMWORD[16+rsi],xmm3
631 movups XMMWORD[32+rsi],xmm4
632 movups XMMWORD[48+rsi],xmm5
633 movups XMMWORD[64+rsi],xmm6
634 movups XMMWORD[80+rsi],xmm7
635 movups XMMWORD[96+rsi],xmm8
636 jmp NEAR $L$ecb_ret
637ALIGN 16
638$L$ecb_enc_one:
639 movups xmm0,XMMWORD[rcx]
640 movups xmm1,XMMWORD[16+rcx]
641 lea rcx,[32+rcx]
642 xorps xmm2,xmm0
643$L$oop_enc1_3:
644DB 102,15,56,220,209
645 dec eax
646 movups xmm1,XMMWORD[rcx]
647 lea rcx,[16+rcx]
648 jnz NEAR $L$oop_enc1_3
649DB 102,15,56,221,209
650 movups XMMWORD[rsi],xmm2
651 jmp NEAR $L$ecb_ret
652ALIGN 16
653$L$ecb_enc_two:
654 call _aesni_encrypt2
655 movups XMMWORD[rsi],xmm2
656 movups XMMWORD[16+rsi],xmm3
657 jmp NEAR $L$ecb_ret
658ALIGN 16
659$L$ecb_enc_three:
660 call _aesni_encrypt3
661 movups XMMWORD[rsi],xmm2
662 movups XMMWORD[16+rsi],xmm3
663 movups XMMWORD[32+rsi],xmm4
664 jmp NEAR $L$ecb_ret
665ALIGN 16
666$L$ecb_enc_four:
667 call _aesni_encrypt4
668 movups XMMWORD[rsi],xmm2
669 movups XMMWORD[16+rsi],xmm3
670 movups XMMWORD[32+rsi],xmm4
671 movups XMMWORD[48+rsi],xmm5
672 jmp NEAR $L$ecb_ret
673ALIGN 16
674$L$ecb_enc_five:
675 xorps xmm7,xmm7
676 call _aesni_encrypt6
677 movups XMMWORD[rsi],xmm2
678 movups XMMWORD[16+rsi],xmm3
679 movups XMMWORD[32+rsi],xmm4
680 movups XMMWORD[48+rsi],xmm5
681 movups XMMWORD[64+rsi],xmm6
682 jmp NEAR $L$ecb_ret
683ALIGN 16
684$L$ecb_enc_six:
685 call _aesni_encrypt6
686 movups XMMWORD[rsi],xmm2
687 movups XMMWORD[16+rsi],xmm3
688 movups XMMWORD[32+rsi],xmm4
689 movups XMMWORD[48+rsi],xmm5
690 movups XMMWORD[64+rsi],xmm6
691 movups XMMWORD[80+rsi],xmm7
692 jmp NEAR $L$ecb_ret
693
694ALIGN 16
695$L$ecb_decrypt:
696 cmp rdx,0x80
697 jb NEAR $L$ecb_dec_tail
698
699 movdqu xmm2,XMMWORD[rdi]
700 movdqu xmm3,XMMWORD[16+rdi]
701 movdqu xmm4,XMMWORD[32+rdi]
702 movdqu xmm5,XMMWORD[48+rdi]
703 movdqu xmm6,XMMWORD[64+rdi]
704 movdqu xmm7,XMMWORD[80+rdi]
705 movdqu xmm8,XMMWORD[96+rdi]
706 movdqu xmm9,XMMWORD[112+rdi]
707 lea rdi,[128+rdi]
708 sub rdx,0x80
709 jmp NEAR $L$ecb_dec_loop8_enter
710ALIGN 16
711$L$ecb_dec_loop8:
712 movups XMMWORD[rsi],xmm2
713 mov rcx,r11
714 movdqu xmm2,XMMWORD[rdi]
715 mov eax,r10d
716 movups XMMWORD[16+rsi],xmm3
717 movdqu xmm3,XMMWORD[16+rdi]
718 movups XMMWORD[32+rsi],xmm4
719 movdqu xmm4,XMMWORD[32+rdi]
720 movups XMMWORD[48+rsi],xmm5
721 movdqu xmm5,XMMWORD[48+rdi]
722 movups XMMWORD[64+rsi],xmm6
723 movdqu xmm6,XMMWORD[64+rdi]
724 movups XMMWORD[80+rsi],xmm7
725 movdqu xmm7,XMMWORD[80+rdi]
726 movups XMMWORD[96+rsi],xmm8
727 movdqu xmm8,XMMWORD[96+rdi]
728 movups XMMWORD[112+rsi],xmm9
729 lea rsi,[128+rsi]
730 movdqu xmm9,XMMWORD[112+rdi]
731 lea rdi,[128+rdi]
732$L$ecb_dec_loop8_enter:
733
734 call _aesni_decrypt8
735
736 movups xmm0,XMMWORD[r11]
737 sub rdx,0x80
738 jnc NEAR $L$ecb_dec_loop8
739
740 movups XMMWORD[rsi],xmm2
741 pxor xmm2,xmm2
742 mov rcx,r11
743 movups XMMWORD[16+rsi],xmm3
744 pxor xmm3,xmm3
745 mov eax,r10d
746 movups XMMWORD[32+rsi],xmm4
747 pxor xmm4,xmm4
748 movups XMMWORD[48+rsi],xmm5
749 pxor xmm5,xmm5
750 movups XMMWORD[64+rsi],xmm6
751 pxor xmm6,xmm6
752 movups XMMWORD[80+rsi],xmm7
753 pxor xmm7,xmm7
754 movups XMMWORD[96+rsi],xmm8
755 pxor xmm8,xmm8
756 movups XMMWORD[112+rsi],xmm9
757 pxor xmm9,xmm9
758 lea rsi,[128+rsi]
759 add rdx,0x80
760 jz NEAR $L$ecb_ret
761
762$L$ecb_dec_tail:
763 movups xmm2,XMMWORD[rdi]
764 cmp rdx,0x20
765 jb NEAR $L$ecb_dec_one
766 movups xmm3,XMMWORD[16+rdi]
767 je NEAR $L$ecb_dec_two
768 movups xmm4,XMMWORD[32+rdi]
769 cmp rdx,0x40
770 jb NEAR $L$ecb_dec_three
771 movups xmm5,XMMWORD[48+rdi]
772 je NEAR $L$ecb_dec_four
773 movups xmm6,XMMWORD[64+rdi]
774 cmp rdx,0x60
775 jb NEAR $L$ecb_dec_five
776 movups xmm7,XMMWORD[80+rdi]
777 je NEAR $L$ecb_dec_six
778 movups xmm8,XMMWORD[96+rdi]
779 movups xmm0,XMMWORD[rcx]
780 xorps xmm9,xmm9
781 call _aesni_decrypt8
782 movups XMMWORD[rsi],xmm2
783 pxor xmm2,xmm2
784 movups XMMWORD[16+rsi],xmm3
785 pxor xmm3,xmm3
786 movups XMMWORD[32+rsi],xmm4
787 pxor xmm4,xmm4
788 movups XMMWORD[48+rsi],xmm5
789 pxor xmm5,xmm5
790 movups XMMWORD[64+rsi],xmm6
791 pxor xmm6,xmm6
792 movups XMMWORD[80+rsi],xmm7
793 pxor xmm7,xmm7
794 movups XMMWORD[96+rsi],xmm8
795 pxor xmm8,xmm8
796 pxor xmm9,xmm9
797 jmp NEAR $L$ecb_ret
798ALIGN 16
799$L$ecb_dec_one:
800 movups xmm0,XMMWORD[rcx]
801 movups xmm1,XMMWORD[16+rcx]
802 lea rcx,[32+rcx]
803 xorps xmm2,xmm0
804$L$oop_dec1_4:
805DB 102,15,56,222,209
806 dec eax
807 movups xmm1,XMMWORD[rcx]
808 lea rcx,[16+rcx]
809 jnz NEAR $L$oop_dec1_4
810DB 102,15,56,223,209
811 movups XMMWORD[rsi],xmm2
812 pxor xmm2,xmm2
813 jmp NEAR $L$ecb_ret
814ALIGN 16
815$L$ecb_dec_two:
816 call _aesni_decrypt2
817 movups XMMWORD[rsi],xmm2
818 pxor xmm2,xmm2
819 movups XMMWORD[16+rsi],xmm3
820 pxor xmm3,xmm3
821 jmp NEAR $L$ecb_ret
822ALIGN 16
823$L$ecb_dec_three:
824 call _aesni_decrypt3
825 movups XMMWORD[rsi],xmm2
826 pxor xmm2,xmm2
827 movups XMMWORD[16+rsi],xmm3
828 pxor xmm3,xmm3
829 movups XMMWORD[32+rsi],xmm4
830 pxor xmm4,xmm4
831 jmp NEAR $L$ecb_ret
832ALIGN 16
833$L$ecb_dec_four:
834 call _aesni_decrypt4
835 movups XMMWORD[rsi],xmm2
836 pxor xmm2,xmm2
837 movups XMMWORD[16+rsi],xmm3
838 pxor xmm3,xmm3
839 movups XMMWORD[32+rsi],xmm4
840 pxor xmm4,xmm4
841 movups XMMWORD[48+rsi],xmm5
842 pxor xmm5,xmm5
843 jmp NEAR $L$ecb_ret
844ALIGN 16
845$L$ecb_dec_five:
846 xorps xmm7,xmm7
847 call _aesni_decrypt6
848 movups XMMWORD[rsi],xmm2
849 pxor xmm2,xmm2
850 movups XMMWORD[16+rsi],xmm3
851 pxor xmm3,xmm3
852 movups XMMWORD[32+rsi],xmm4
853 pxor xmm4,xmm4
854 movups XMMWORD[48+rsi],xmm5
855 pxor xmm5,xmm5
856 movups XMMWORD[64+rsi],xmm6
857 pxor xmm6,xmm6
858 pxor xmm7,xmm7
859 jmp NEAR $L$ecb_ret
860ALIGN 16
861$L$ecb_dec_six:
862 call _aesni_decrypt6
863 movups XMMWORD[rsi],xmm2
864 pxor xmm2,xmm2
865 movups XMMWORD[16+rsi],xmm3
866 pxor xmm3,xmm3
867 movups XMMWORD[32+rsi],xmm4
868 pxor xmm4,xmm4
869 movups XMMWORD[48+rsi],xmm5
870 pxor xmm5,xmm5
871 movups XMMWORD[64+rsi],xmm6
872 pxor xmm6,xmm6
873 movups XMMWORD[80+rsi],xmm7
874 pxor xmm7,xmm7
875
876$L$ecb_ret:
877 xorps xmm0,xmm0
878 pxor xmm1,xmm1
879 movaps xmm6,XMMWORD[rsp]
880 movaps XMMWORD[rsp],xmm0
881 movaps xmm7,XMMWORD[16+rsp]
882 movaps XMMWORD[16+rsp],xmm0
883 movaps xmm8,XMMWORD[32+rsp]
884 movaps XMMWORD[32+rsp],xmm0
885 movaps xmm9,XMMWORD[48+rsp]
886 movaps XMMWORD[48+rsp],xmm0
887 lea rsp,[88+rsp]
888$L$ecb_enc_ret:
889 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
890 mov rsi,QWORD[16+rsp]
891 DB 0F3h,0C3h ;repret
892
893$L$SEH_end_aesni_ecb_encrypt:
894global aesni_ccm64_encrypt_blocks
895
896ALIGN 16
897aesni_ccm64_encrypt_blocks:
898 mov QWORD[8+rsp],rdi ;WIN64 prologue
899 mov QWORD[16+rsp],rsi
900 mov rax,rsp
901$L$SEH_begin_aesni_ccm64_encrypt_blocks:
902 mov rdi,rcx
903 mov rsi,rdx
904 mov rdx,r8
905 mov rcx,r9
906 mov r8,QWORD[40+rsp]
907 mov r9,QWORD[48+rsp]
908
909
910 lea rsp,[((-88))+rsp]
911 movaps XMMWORD[rsp],xmm6
912 movaps XMMWORD[16+rsp],xmm7
913 movaps XMMWORD[32+rsp],xmm8
914 movaps XMMWORD[48+rsp],xmm9
915$L$ccm64_enc_body:
916 mov eax,DWORD[240+rcx]
917 movdqu xmm6,XMMWORD[r8]
918 movdqa xmm9,XMMWORD[$L$increment64]
919 movdqa xmm7,XMMWORD[$L$bswap_mask]
920
921 shl eax,4
922 mov r10d,16
923 lea r11,[rcx]
924 movdqu xmm3,XMMWORD[r9]
925 movdqa xmm2,xmm6
926 lea rcx,[32+rax*1+rcx]
927DB 102,15,56,0,247
928 sub r10,rax
929 jmp NEAR $L$ccm64_enc_outer
930ALIGN 16
931$L$ccm64_enc_outer:
932 movups xmm0,XMMWORD[r11]
933 mov rax,r10
934 movups xmm8,XMMWORD[rdi]
935
936 xorps xmm2,xmm0
937 movups xmm1,XMMWORD[16+r11]
938 xorps xmm0,xmm8
939 xorps xmm3,xmm0
940 movups xmm0,XMMWORD[32+r11]
941
942$L$ccm64_enc2_loop:
943DB 102,15,56,220,209
944DB 102,15,56,220,217
945 movups xmm1,XMMWORD[rax*1+rcx]
946 add rax,32
947DB 102,15,56,220,208
948DB 102,15,56,220,216
949 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
950 jnz NEAR $L$ccm64_enc2_loop
951DB 102,15,56,220,209
952DB 102,15,56,220,217
953 paddq xmm6,xmm9
954 dec rdx
955DB 102,15,56,221,208
956DB 102,15,56,221,216
957
958 lea rdi,[16+rdi]
959 xorps xmm8,xmm2
960 movdqa xmm2,xmm6
961 movups XMMWORD[rsi],xmm8
962DB 102,15,56,0,215
963 lea rsi,[16+rsi]
964 jnz NEAR $L$ccm64_enc_outer
965
966 pxor xmm0,xmm0
967 pxor xmm1,xmm1
968 pxor xmm2,xmm2
969 movups XMMWORD[r9],xmm3
970 pxor xmm3,xmm3
971 pxor xmm8,xmm8
972 pxor xmm6,xmm6
973 movaps xmm6,XMMWORD[rsp]
974 movaps XMMWORD[rsp],xmm0
975 movaps xmm7,XMMWORD[16+rsp]
976 movaps XMMWORD[16+rsp],xmm0
977 movaps xmm8,XMMWORD[32+rsp]
978 movaps XMMWORD[32+rsp],xmm0
979 movaps xmm9,XMMWORD[48+rsp]
980 movaps XMMWORD[48+rsp],xmm0
981 lea rsp,[88+rsp]
982$L$ccm64_enc_ret:
983 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
984 mov rsi,QWORD[16+rsp]
985 DB 0F3h,0C3h ;repret
986$L$SEH_end_aesni_ccm64_encrypt_blocks:
987global aesni_ccm64_decrypt_blocks
988
989ALIGN 16
990aesni_ccm64_decrypt_blocks:
991 mov QWORD[8+rsp],rdi ;WIN64 prologue
992 mov QWORD[16+rsp],rsi
993 mov rax,rsp
994$L$SEH_begin_aesni_ccm64_decrypt_blocks:
995 mov rdi,rcx
996 mov rsi,rdx
997 mov rdx,r8
998 mov rcx,r9
999 mov r8,QWORD[40+rsp]
1000 mov r9,QWORD[48+rsp]
1001
1002
1003 lea rsp,[((-88))+rsp]
1004 movaps XMMWORD[rsp],xmm6
1005 movaps XMMWORD[16+rsp],xmm7
1006 movaps XMMWORD[32+rsp],xmm8
1007 movaps XMMWORD[48+rsp],xmm9
1008$L$ccm64_dec_body:
1009 mov eax,DWORD[240+rcx]
1010 movups xmm6,XMMWORD[r8]
1011 movdqu xmm3,XMMWORD[r9]
1012 movdqa xmm9,XMMWORD[$L$increment64]
1013 movdqa xmm7,XMMWORD[$L$bswap_mask]
1014
1015 movaps xmm2,xmm6
1016 mov r10d,eax
1017 mov r11,rcx
1018DB 102,15,56,0,247
1019 movups xmm0,XMMWORD[rcx]
1020 movups xmm1,XMMWORD[16+rcx]
1021 lea rcx,[32+rcx]
1022 xorps xmm2,xmm0
1023$L$oop_enc1_5:
1024DB 102,15,56,220,209
1025 dec eax
1026 movups xmm1,XMMWORD[rcx]
1027 lea rcx,[16+rcx]
1028 jnz NEAR $L$oop_enc1_5
1029DB 102,15,56,221,209
1030 shl r10d,4
1031 mov eax,16
1032 movups xmm8,XMMWORD[rdi]
1033 paddq xmm6,xmm9
1034 lea rdi,[16+rdi]
1035 sub rax,r10
1036 lea rcx,[32+r10*1+r11]
1037 mov r10,rax
1038 jmp NEAR $L$ccm64_dec_outer
1039ALIGN 16
1040$L$ccm64_dec_outer:
1041 xorps xmm8,xmm2
1042 movdqa xmm2,xmm6
1043 movups XMMWORD[rsi],xmm8
1044 lea rsi,[16+rsi]
1045DB 102,15,56,0,215
1046
1047 sub rdx,1
1048 jz NEAR $L$ccm64_dec_break
1049
1050 movups xmm0,XMMWORD[r11]
1051 mov rax,r10
1052 movups xmm1,XMMWORD[16+r11]
1053 xorps xmm8,xmm0
1054 xorps xmm2,xmm0
1055 xorps xmm3,xmm8
1056 movups xmm0,XMMWORD[32+r11]
1057 jmp NEAR $L$ccm64_dec2_loop
1058ALIGN 16
1059$L$ccm64_dec2_loop:
1060DB 102,15,56,220,209
1061DB 102,15,56,220,217
1062 movups xmm1,XMMWORD[rax*1+rcx]
1063 add rax,32
1064DB 102,15,56,220,208
1065DB 102,15,56,220,216
1066 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
1067 jnz NEAR $L$ccm64_dec2_loop
1068 movups xmm8,XMMWORD[rdi]
1069 paddq xmm6,xmm9
1070DB 102,15,56,220,209
1071DB 102,15,56,220,217
1072DB 102,15,56,221,208
1073DB 102,15,56,221,216
1074 lea rdi,[16+rdi]
1075 jmp NEAR $L$ccm64_dec_outer
1076
1077ALIGN 16
1078$L$ccm64_dec_break:
1079
1080 mov eax,DWORD[240+r11]
1081 movups xmm0,XMMWORD[r11]
1082 movups xmm1,XMMWORD[16+r11]
1083 xorps xmm8,xmm0
1084 lea r11,[32+r11]
1085 xorps xmm3,xmm8
1086$L$oop_enc1_6:
1087DB 102,15,56,220,217
1088 dec eax
1089 movups xmm1,XMMWORD[r11]
1090 lea r11,[16+r11]
1091 jnz NEAR $L$oop_enc1_6
1092DB 102,15,56,221,217
1093 pxor xmm0,xmm0
1094 pxor xmm1,xmm1
1095 pxor xmm2,xmm2
1096 movups XMMWORD[r9],xmm3
1097 pxor xmm3,xmm3
1098 pxor xmm8,xmm8
1099 pxor xmm6,xmm6
1100 movaps xmm6,XMMWORD[rsp]
1101 movaps XMMWORD[rsp],xmm0
1102 movaps xmm7,XMMWORD[16+rsp]
1103 movaps XMMWORD[16+rsp],xmm0
1104 movaps xmm8,XMMWORD[32+rsp]
1105 movaps XMMWORD[32+rsp],xmm0
1106 movaps xmm9,XMMWORD[48+rsp]
1107 movaps XMMWORD[48+rsp],xmm0
1108 lea rsp,[88+rsp]
1109$L$ccm64_dec_ret:
1110 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1111 mov rsi,QWORD[16+rsp]
1112 DB 0F3h,0C3h ;repret
1113$L$SEH_end_aesni_ccm64_decrypt_blocks:
1114global aesni_ctr32_encrypt_blocks
1115
1116ALIGN 16
1117aesni_ctr32_encrypt_blocks:
1118 mov QWORD[8+rsp],rdi ;WIN64 prologue
1119 mov QWORD[16+rsp],rsi
1120 mov rax,rsp
1121$L$SEH_begin_aesni_ctr32_encrypt_blocks:
1122 mov rdi,rcx
1123 mov rsi,rdx
1124 mov rdx,r8
1125 mov rcx,r9
1126 mov r8,QWORD[40+rsp]
1127
1128
1129
1130 cmp rdx,1
1131 jne NEAR $L$ctr32_bulk
1132
1133
1134
1135 movups xmm2,XMMWORD[r8]
1136 movups xmm3,XMMWORD[rdi]
1137 mov edx,DWORD[240+rcx]
1138 movups xmm0,XMMWORD[rcx]
1139 movups xmm1,XMMWORD[16+rcx]
1140 lea rcx,[32+rcx]
1141 xorps xmm2,xmm0
1142$L$oop_enc1_7:
1143DB 102,15,56,220,209
1144 dec edx
1145 movups xmm1,XMMWORD[rcx]
1146 lea rcx,[16+rcx]
1147 jnz NEAR $L$oop_enc1_7
1148DB 102,15,56,221,209
1149 pxor xmm0,xmm0
1150 pxor xmm1,xmm1
1151 xorps xmm2,xmm3
1152 pxor xmm3,xmm3
1153 movups XMMWORD[rsi],xmm2
1154 xorps xmm2,xmm2
1155 jmp NEAR $L$ctr32_epilogue
1156
1157ALIGN 16
1158$L$ctr32_bulk:
1159 lea r11,[rsp]
1160
1161 push rbp
1162
1163 sub rsp,288
1164 and rsp,-16
1165 movaps XMMWORD[(-168)+r11],xmm6
1166 movaps XMMWORD[(-152)+r11],xmm7
1167 movaps XMMWORD[(-136)+r11],xmm8
1168 movaps XMMWORD[(-120)+r11],xmm9
1169 movaps XMMWORD[(-104)+r11],xmm10
1170 movaps XMMWORD[(-88)+r11],xmm11
1171 movaps XMMWORD[(-72)+r11],xmm12
1172 movaps XMMWORD[(-56)+r11],xmm13
1173 movaps XMMWORD[(-40)+r11],xmm14
1174 movaps XMMWORD[(-24)+r11],xmm15
1175$L$ctr32_body:
1176
1177
1178
1179
1180 movdqu xmm2,XMMWORD[r8]
1181 movdqu xmm0,XMMWORD[rcx]
1182 mov r8d,DWORD[12+r8]
1183 pxor xmm2,xmm0
1184 mov ebp,DWORD[12+rcx]
1185 movdqa XMMWORD[rsp],xmm2
1186 bswap r8d
1187 movdqa xmm3,xmm2
1188 movdqa xmm4,xmm2
1189 movdqa xmm5,xmm2
1190 movdqa XMMWORD[64+rsp],xmm2
1191 movdqa XMMWORD[80+rsp],xmm2
1192 movdqa XMMWORD[96+rsp],xmm2
1193 mov r10,rdx
1194 movdqa XMMWORD[112+rsp],xmm2
1195
1196 lea rax,[1+r8]
1197 lea rdx,[2+r8]
1198 bswap eax
1199 bswap edx
1200 xor eax,ebp
1201 xor edx,ebp
1202DB 102,15,58,34,216,3
1203 lea rax,[3+r8]
1204 movdqa XMMWORD[16+rsp],xmm3
1205DB 102,15,58,34,226,3
1206 bswap eax
1207 mov rdx,r10
1208 lea r10,[4+r8]
1209 movdqa XMMWORD[32+rsp],xmm4
1210 xor eax,ebp
1211 bswap r10d
1212DB 102,15,58,34,232,3
1213 xor r10d,ebp
1214 movdqa XMMWORD[48+rsp],xmm5
1215 lea r9,[5+r8]
1216 mov DWORD[((64+12))+rsp],r10d
1217 bswap r9d
1218 lea r10,[6+r8]
1219 mov eax,DWORD[240+rcx]
1220 xor r9d,ebp
1221 bswap r10d
1222 mov DWORD[((80+12))+rsp],r9d
1223 xor r10d,ebp
1224 lea r9,[7+r8]
1225 mov DWORD[((96+12))+rsp],r10d
1226 bswap r9d
1227 mov r10d,DWORD[((OPENSSL_ia32cap_P+4))]
1228 xor r9d,ebp
1229 and r10d,71303168
1230 mov DWORD[((112+12))+rsp],r9d
1231
1232 movups xmm1,XMMWORD[16+rcx]
1233
1234 movdqa xmm6,XMMWORD[64+rsp]
1235 movdqa xmm7,XMMWORD[80+rsp]
1236
1237 cmp rdx,8
1238 jb NEAR $L$ctr32_tail
1239
1240 sub rdx,6
1241 cmp r10d,4194304
1242 je NEAR $L$ctr32_6x
1243
1244 lea rcx,[128+rcx]
1245 sub rdx,2
1246 jmp NEAR $L$ctr32_loop8
1247
1248ALIGN 16
1249$L$ctr32_6x:
1250 shl eax,4
1251 mov r10d,48
1252 bswap ebp
1253 lea rcx,[32+rax*1+rcx]
1254 sub r10,rax
1255 jmp NEAR $L$ctr32_loop6
1256
1257ALIGN 16
1258$L$ctr32_loop6:
1259 add r8d,6
1260 movups xmm0,XMMWORD[((-48))+r10*1+rcx]
1261DB 102,15,56,220,209
1262 mov eax,r8d
1263 xor eax,ebp
1264DB 102,15,56,220,217
1265DB 0x0f,0x38,0xf1,0x44,0x24,12
1266 lea eax,[1+r8]
1267DB 102,15,56,220,225
1268 xor eax,ebp
1269DB 0x0f,0x38,0xf1,0x44,0x24,28
1270DB 102,15,56,220,233
1271 lea eax,[2+r8]
1272 xor eax,ebp
1273DB 102,15,56,220,241
1274DB 0x0f,0x38,0xf1,0x44,0x24,44
1275 lea eax,[3+r8]
1276DB 102,15,56,220,249
1277 movups xmm1,XMMWORD[((-32))+r10*1+rcx]
1278 xor eax,ebp
1279
1280DB 102,15,56,220,208
1281DB 0x0f,0x38,0xf1,0x44,0x24,60
1282 lea eax,[4+r8]
1283DB 102,15,56,220,216
1284 xor eax,ebp
1285DB 0x0f,0x38,0xf1,0x44,0x24,76
1286DB 102,15,56,220,224
1287 lea eax,[5+r8]
1288 xor eax,ebp
1289DB 102,15,56,220,232
1290DB 0x0f,0x38,0xf1,0x44,0x24,92
1291 mov rax,r10
1292DB 102,15,56,220,240
1293DB 102,15,56,220,248
1294 movups xmm0,XMMWORD[((-16))+r10*1+rcx]
1295
1296 call $L$enc_loop6
1297
1298 movdqu xmm8,XMMWORD[rdi]
1299 movdqu xmm9,XMMWORD[16+rdi]
1300 movdqu xmm10,XMMWORD[32+rdi]
1301 movdqu xmm11,XMMWORD[48+rdi]
1302 movdqu xmm12,XMMWORD[64+rdi]
1303 movdqu xmm13,XMMWORD[80+rdi]
1304 lea rdi,[96+rdi]
1305 movups xmm1,XMMWORD[((-64))+r10*1+rcx]
1306 pxor xmm8,xmm2
1307 movaps xmm2,XMMWORD[rsp]
1308 pxor xmm9,xmm3
1309 movaps xmm3,XMMWORD[16+rsp]
1310 pxor xmm10,xmm4
1311 movaps xmm4,XMMWORD[32+rsp]
1312 pxor xmm11,xmm5
1313 movaps xmm5,XMMWORD[48+rsp]
1314 pxor xmm12,xmm6
1315 movaps xmm6,XMMWORD[64+rsp]
1316 pxor xmm13,xmm7
1317 movaps xmm7,XMMWORD[80+rsp]
1318 movdqu XMMWORD[rsi],xmm8
1319 movdqu XMMWORD[16+rsi],xmm9
1320 movdqu XMMWORD[32+rsi],xmm10
1321 movdqu XMMWORD[48+rsi],xmm11
1322 movdqu XMMWORD[64+rsi],xmm12
1323 movdqu XMMWORD[80+rsi],xmm13
1324 lea rsi,[96+rsi]
1325
1326 sub rdx,6
1327 jnc NEAR $L$ctr32_loop6
1328
1329 add rdx,6
1330 jz NEAR $L$ctr32_done
1331
1332 lea eax,[((-48))+r10]
1333 lea rcx,[((-80))+r10*1+rcx]
1334 neg eax
1335 shr eax,4
1336 jmp NEAR $L$ctr32_tail
1337
1338ALIGN 32
1339$L$ctr32_loop8:
1340 add r8d,8
1341 movdqa xmm8,XMMWORD[96+rsp]
1342DB 102,15,56,220,209
1343 mov r9d,r8d
1344 movdqa xmm9,XMMWORD[112+rsp]
1345DB 102,15,56,220,217
1346 bswap r9d
1347 movups xmm0,XMMWORD[((32-128))+rcx]
1348DB 102,15,56,220,225
1349 xor r9d,ebp
1350 nop
1351DB 102,15,56,220,233
1352 mov DWORD[((0+12))+rsp],r9d
1353 lea r9,[1+r8]
1354DB 102,15,56,220,241
1355DB 102,15,56,220,249
1356DB 102,68,15,56,220,193
1357DB 102,68,15,56,220,201
1358 movups xmm1,XMMWORD[((48-128))+rcx]
1359 bswap r9d
1360DB 102,15,56,220,208
1361DB 102,15,56,220,216
1362 xor r9d,ebp
1363DB 0x66,0x90
1364DB 102,15,56,220,224
1365DB 102,15,56,220,232
1366 mov DWORD[((16+12))+rsp],r9d
1367 lea r9,[2+r8]
1368DB 102,15,56,220,240
1369DB 102,15,56,220,248
1370DB 102,68,15,56,220,192
1371DB 102,68,15,56,220,200
1372 movups xmm0,XMMWORD[((64-128))+rcx]
1373 bswap r9d
1374DB 102,15,56,220,209
1375DB 102,15,56,220,217
1376 xor r9d,ebp
1377DB 0x66,0x90
1378DB 102,15,56,220,225
1379DB 102,15,56,220,233
1380 mov DWORD[((32+12))+rsp],r9d
1381 lea r9,[3+r8]
1382DB 102,15,56,220,241
1383DB 102,15,56,220,249
1384DB 102,68,15,56,220,193
1385DB 102,68,15,56,220,201
1386 movups xmm1,XMMWORD[((80-128))+rcx]
1387 bswap r9d
1388DB 102,15,56,220,208
1389DB 102,15,56,220,216
1390 xor r9d,ebp
1391DB 0x66,0x90
1392DB 102,15,56,220,224
1393DB 102,15,56,220,232
1394 mov DWORD[((48+12))+rsp],r9d
1395 lea r9,[4+r8]
1396DB 102,15,56,220,240
1397DB 102,15,56,220,248
1398DB 102,68,15,56,220,192
1399DB 102,68,15,56,220,200
1400 movups xmm0,XMMWORD[((96-128))+rcx]
1401 bswap r9d
1402DB 102,15,56,220,209
1403DB 102,15,56,220,217
1404 xor r9d,ebp
1405DB 0x66,0x90
1406DB 102,15,56,220,225
1407DB 102,15,56,220,233
1408 mov DWORD[((64+12))+rsp],r9d
1409 lea r9,[5+r8]
1410DB 102,15,56,220,241
1411DB 102,15,56,220,249
1412DB 102,68,15,56,220,193
1413DB 102,68,15,56,220,201
1414 movups xmm1,XMMWORD[((112-128))+rcx]
1415 bswap r9d
1416DB 102,15,56,220,208
1417DB 102,15,56,220,216
1418 xor r9d,ebp
1419DB 0x66,0x90
1420DB 102,15,56,220,224
1421DB 102,15,56,220,232
1422 mov DWORD[((80+12))+rsp],r9d
1423 lea r9,[6+r8]
1424DB 102,15,56,220,240
1425DB 102,15,56,220,248
1426DB 102,68,15,56,220,192
1427DB 102,68,15,56,220,200
1428 movups xmm0,XMMWORD[((128-128))+rcx]
1429 bswap r9d
1430DB 102,15,56,220,209
1431DB 102,15,56,220,217
1432 xor r9d,ebp
1433DB 0x66,0x90
1434DB 102,15,56,220,225
1435DB 102,15,56,220,233
1436 mov DWORD[((96+12))+rsp],r9d
1437 lea r9,[7+r8]
1438DB 102,15,56,220,241
1439DB 102,15,56,220,249
1440DB 102,68,15,56,220,193
1441DB 102,68,15,56,220,201
1442 movups xmm1,XMMWORD[((144-128))+rcx]
1443 bswap r9d
1444DB 102,15,56,220,208
1445DB 102,15,56,220,216
1446DB 102,15,56,220,224
1447 xor r9d,ebp
1448 movdqu xmm10,XMMWORD[rdi]
1449DB 102,15,56,220,232
1450 mov DWORD[((112+12))+rsp],r9d
1451 cmp eax,11
1452DB 102,15,56,220,240
1453DB 102,15,56,220,248
1454DB 102,68,15,56,220,192
1455DB 102,68,15,56,220,200
1456 movups xmm0,XMMWORD[((160-128))+rcx]
1457
1458 jb NEAR $L$ctr32_enc_done
1459
1460DB 102,15,56,220,209
1461DB 102,15,56,220,217
1462DB 102,15,56,220,225
1463DB 102,15,56,220,233
1464DB 102,15,56,220,241
1465DB 102,15,56,220,249
1466DB 102,68,15,56,220,193
1467DB 102,68,15,56,220,201
1468 movups xmm1,XMMWORD[((176-128))+rcx]
1469
1470DB 102,15,56,220,208
1471DB 102,15,56,220,216
1472DB 102,15,56,220,224
1473DB 102,15,56,220,232
1474DB 102,15,56,220,240
1475DB 102,15,56,220,248
1476DB 102,68,15,56,220,192
1477DB 102,68,15,56,220,200
1478 movups xmm0,XMMWORD[((192-128))+rcx]
1479 je NEAR $L$ctr32_enc_done
1480
1481DB 102,15,56,220,209
1482DB 102,15,56,220,217
1483DB 102,15,56,220,225
1484DB 102,15,56,220,233
1485DB 102,15,56,220,241
1486DB 102,15,56,220,249
1487DB 102,68,15,56,220,193
1488DB 102,68,15,56,220,201
1489 movups xmm1,XMMWORD[((208-128))+rcx]
1490
1491DB 102,15,56,220,208
1492DB 102,15,56,220,216
1493DB 102,15,56,220,224
1494DB 102,15,56,220,232
1495DB 102,15,56,220,240
1496DB 102,15,56,220,248
1497DB 102,68,15,56,220,192
1498DB 102,68,15,56,220,200
1499 movups xmm0,XMMWORD[((224-128))+rcx]
1500 jmp NEAR $L$ctr32_enc_done
1501
1502ALIGN 16
1503$L$ctr32_enc_done:
1504 movdqu xmm11,XMMWORD[16+rdi]
1505 pxor xmm10,xmm0
1506 movdqu xmm12,XMMWORD[32+rdi]
1507 pxor xmm11,xmm0
1508 movdqu xmm13,XMMWORD[48+rdi]
1509 pxor xmm12,xmm0
1510 movdqu xmm14,XMMWORD[64+rdi]
1511 pxor xmm13,xmm0
1512 movdqu xmm15,XMMWORD[80+rdi]
1513 pxor xmm14,xmm0
1514 pxor xmm15,xmm0
1515DB 102,15,56,220,209
1516DB 102,15,56,220,217
1517DB 102,15,56,220,225
1518DB 102,15,56,220,233
1519DB 102,15,56,220,241
1520DB 102,15,56,220,249
1521DB 102,68,15,56,220,193
1522DB 102,68,15,56,220,201
1523 movdqu xmm1,XMMWORD[96+rdi]
1524 lea rdi,[128+rdi]
1525
1526DB 102,65,15,56,221,210
1527 pxor xmm1,xmm0
1528 movdqu xmm10,XMMWORD[((112-128))+rdi]
1529DB 102,65,15,56,221,219
1530 pxor xmm10,xmm0
1531 movdqa xmm11,XMMWORD[rsp]
1532DB 102,65,15,56,221,228
1533DB 102,65,15,56,221,237
1534 movdqa xmm12,XMMWORD[16+rsp]
1535 movdqa xmm13,XMMWORD[32+rsp]
1536DB 102,65,15,56,221,246
1537DB 102,65,15,56,221,255
1538 movdqa xmm14,XMMWORD[48+rsp]
1539 movdqa xmm15,XMMWORD[64+rsp]
1540DB 102,68,15,56,221,193
1541 movdqa xmm0,XMMWORD[80+rsp]
1542 movups xmm1,XMMWORD[((16-128))+rcx]
1543DB 102,69,15,56,221,202
1544
1545 movups XMMWORD[rsi],xmm2
1546 movdqa xmm2,xmm11
1547 movups XMMWORD[16+rsi],xmm3
1548 movdqa xmm3,xmm12
1549 movups XMMWORD[32+rsi],xmm4
1550 movdqa xmm4,xmm13
1551 movups XMMWORD[48+rsi],xmm5
1552 movdqa xmm5,xmm14
1553 movups XMMWORD[64+rsi],xmm6
1554 movdqa xmm6,xmm15
1555 movups XMMWORD[80+rsi],xmm7
1556 movdqa xmm7,xmm0
1557 movups XMMWORD[96+rsi],xmm8
1558 movups XMMWORD[112+rsi],xmm9
1559 lea rsi,[128+rsi]
1560
1561 sub rdx,8
1562 jnc NEAR $L$ctr32_loop8
1563
1564 add rdx,8
1565 jz NEAR $L$ctr32_done
1566 lea rcx,[((-128))+rcx]
1567
1568$L$ctr32_tail:
1569
1570
1571 lea rcx,[16+rcx]
1572 cmp rdx,4
1573 jb NEAR $L$ctr32_loop3
1574 je NEAR $L$ctr32_loop4
1575
1576
1577 shl eax,4
1578 movdqa xmm8,XMMWORD[96+rsp]
1579 pxor xmm9,xmm9
1580
1581 movups xmm0,XMMWORD[16+rcx]
1582DB 102,15,56,220,209
1583DB 102,15,56,220,217
1584 lea rcx,[((32-16))+rax*1+rcx]
1585 neg rax
1586DB 102,15,56,220,225
1587 add rax,16
1588 movups xmm10,XMMWORD[rdi]
1589DB 102,15,56,220,233
1590DB 102,15,56,220,241
1591 movups xmm11,XMMWORD[16+rdi]
1592 movups xmm12,XMMWORD[32+rdi]
1593DB 102,15,56,220,249
1594DB 102,68,15,56,220,193
1595
1596 call $L$enc_loop8_enter
1597
1598 movdqu xmm13,XMMWORD[48+rdi]
1599 pxor xmm2,xmm10
1600 movdqu xmm10,XMMWORD[64+rdi]
1601 pxor xmm3,xmm11
1602 movdqu XMMWORD[rsi],xmm2
1603 pxor xmm4,xmm12
1604 movdqu XMMWORD[16+rsi],xmm3
1605 pxor xmm5,xmm13
1606 movdqu XMMWORD[32+rsi],xmm4
1607 pxor xmm6,xmm10
1608 movdqu XMMWORD[48+rsi],xmm5
1609 movdqu XMMWORD[64+rsi],xmm6
1610 cmp rdx,6
1611 jb NEAR $L$ctr32_done
1612
1613 movups xmm11,XMMWORD[80+rdi]
1614 xorps xmm7,xmm11
1615 movups XMMWORD[80+rsi],xmm7
1616 je NEAR $L$ctr32_done
1617
1618 movups xmm12,XMMWORD[96+rdi]
1619 xorps xmm8,xmm12
1620 movups XMMWORD[96+rsi],xmm8
1621 jmp NEAR $L$ctr32_done
1622
1623ALIGN 32
1624$L$ctr32_loop4:
1625DB 102,15,56,220,209
1626 lea rcx,[16+rcx]
1627 dec eax
1628DB 102,15,56,220,217
1629DB 102,15,56,220,225
1630DB 102,15,56,220,233
1631 movups xmm1,XMMWORD[rcx]
1632 jnz NEAR $L$ctr32_loop4
1633DB 102,15,56,221,209
1634DB 102,15,56,221,217
1635 movups xmm10,XMMWORD[rdi]
1636 movups xmm11,XMMWORD[16+rdi]
1637DB 102,15,56,221,225
1638DB 102,15,56,221,233
1639 movups xmm12,XMMWORD[32+rdi]
1640 movups xmm13,XMMWORD[48+rdi]
1641
1642 xorps xmm2,xmm10
1643 movups XMMWORD[rsi],xmm2
1644 xorps xmm3,xmm11
1645 movups XMMWORD[16+rsi],xmm3
1646 pxor xmm4,xmm12
1647 movdqu XMMWORD[32+rsi],xmm4
1648 pxor xmm5,xmm13
1649 movdqu XMMWORD[48+rsi],xmm5
1650 jmp NEAR $L$ctr32_done
1651
1652ALIGN 32
1653$L$ctr32_loop3:
1654DB 102,15,56,220,209
1655 lea rcx,[16+rcx]
1656 dec eax
1657DB 102,15,56,220,217
1658DB 102,15,56,220,225
1659 movups xmm1,XMMWORD[rcx]
1660 jnz NEAR $L$ctr32_loop3
1661DB 102,15,56,221,209
1662DB 102,15,56,221,217
1663DB 102,15,56,221,225
1664
1665 movups xmm10,XMMWORD[rdi]
1666 xorps xmm2,xmm10
1667 movups XMMWORD[rsi],xmm2
1668 cmp rdx,2
1669 jb NEAR $L$ctr32_done
1670
1671 movups xmm11,XMMWORD[16+rdi]
1672 xorps xmm3,xmm11
1673 movups XMMWORD[16+rsi],xmm3
1674 je NEAR $L$ctr32_done
1675
1676 movups xmm12,XMMWORD[32+rdi]
1677 xorps xmm4,xmm12
1678 movups XMMWORD[32+rsi],xmm4
1679
1680$L$ctr32_done:
1681 xorps xmm0,xmm0
1682 xor ebp,ebp
1683 pxor xmm1,xmm1
1684 pxor xmm2,xmm2
1685 pxor xmm3,xmm3
1686 pxor xmm4,xmm4
1687 pxor xmm5,xmm5
1688 movaps xmm6,XMMWORD[((-168))+r11]
1689 movaps XMMWORD[(-168)+r11],xmm0
1690 movaps xmm7,XMMWORD[((-152))+r11]
1691 movaps XMMWORD[(-152)+r11],xmm0
1692 movaps xmm8,XMMWORD[((-136))+r11]
1693 movaps XMMWORD[(-136)+r11],xmm0
1694 movaps xmm9,XMMWORD[((-120))+r11]
1695 movaps XMMWORD[(-120)+r11],xmm0
1696 movaps xmm10,XMMWORD[((-104))+r11]
1697 movaps XMMWORD[(-104)+r11],xmm0
1698 movaps xmm11,XMMWORD[((-88))+r11]
1699 movaps XMMWORD[(-88)+r11],xmm0
1700 movaps xmm12,XMMWORD[((-72))+r11]
1701 movaps XMMWORD[(-72)+r11],xmm0
1702 movaps xmm13,XMMWORD[((-56))+r11]
1703 movaps XMMWORD[(-56)+r11],xmm0
1704 movaps xmm14,XMMWORD[((-40))+r11]
1705 movaps XMMWORD[(-40)+r11],xmm0
1706 movaps xmm15,XMMWORD[((-24))+r11]
1707 movaps XMMWORD[(-24)+r11],xmm0
1708 movaps XMMWORD[rsp],xmm0
1709 movaps XMMWORD[16+rsp],xmm0
1710 movaps XMMWORD[32+rsp],xmm0
1711 movaps XMMWORD[48+rsp],xmm0
1712 movaps XMMWORD[64+rsp],xmm0
1713 movaps XMMWORD[80+rsp],xmm0
1714 movaps XMMWORD[96+rsp],xmm0
1715 movaps XMMWORD[112+rsp],xmm0
1716 mov rbp,QWORD[((-8))+r11]
1717
1718 lea rsp,[r11]
1719
1720$L$ctr32_epilogue:
1721 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1722 mov rsi,QWORD[16+rsp]
1723 DB 0F3h,0C3h ;repret
1724
1725$L$SEH_end_aesni_ctr32_encrypt_blocks:
1726global aesni_xts_encrypt
1727
1728ALIGN 16
1729aesni_xts_encrypt:
1730 mov QWORD[8+rsp],rdi ;WIN64 prologue
1731 mov QWORD[16+rsp],rsi
1732 mov rax,rsp
1733$L$SEH_begin_aesni_xts_encrypt:
1734 mov rdi,rcx
1735 mov rsi,rdx
1736 mov rdx,r8
1737 mov rcx,r9
1738 mov r8,QWORD[40+rsp]
1739 mov r9,QWORD[48+rsp]
1740
1741
1742
1743 lea r11,[rsp]
1744
1745 push rbp
1746
1747 sub rsp,272
1748 and rsp,-16
1749 movaps XMMWORD[(-168)+r11],xmm6
1750 movaps XMMWORD[(-152)+r11],xmm7
1751 movaps XMMWORD[(-136)+r11],xmm8
1752 movaps XMMWORD[(-120)+r11],xmm9
1753 movaps XMMWORD[(-104)+r11],xmm10
1754 movaps XMMWORD[(-88)+r11],xmm11
1755 movaps XMMWORD[(-72)+r11],xmm12
1756 movaps XMMWORD[(-56)+r11],xmm13
1757 movaps XMMWORD[(-40)+r11],xmm14
1758 movaps XMMWORD[(-24)+r11],xmm15
1759$L$xts_enc_body:
1760 movups xmm2,XMMWORD[r9]
1761 mov eax,DWORD[240+r8]
1762 mov r10d,DWORD[240+rcx]
1763 movups xmm0,XMMWORD[r8]
1764 movups xmm1,XMMWORD[16+r8]
1765 lea r8,[32+r8]
1766 xorps xmm2,xmm0
1767$L$oop_enc1_8:
1768DB 102,15,56,220,209
1769 dec eax
1770 movups xmm1,XMMWORD[r8]
1771 lea r8,[16+r8]
1772 jnz NEAR $L$oop_enc1_8
1773DB 102,15,56,221,209
1774 movups xmm0,XMMWORD[rcx]
1775 mov rbp,rcx
1776 mov eax,r10d
1777 shl r10d,4
1778 mov r9,rdx
1779 and rdx,-16
1780
1781 movups xmm1,XMMWORD[16+r10*1+rcx]
1782
1783 movdqa xmm8,XMMWORD[$L$xts_magic]
1784 movdqa xmm15,xmm2
1785 pshufd xmm9,xmm2,0x5f
1786 pxor xmm1,xmm0
1787 movdqa xmm14,xmm9
1788 paddd xmm9,xmm9
1789 movdqa xmm10,xmm15
1790 psrad xmm14,31
1791 paddq xmm15,xmm15
1792 pand xmm14,xmm8
1793 pxor xmm10,xmm0
1794 pxor xmm15,xmm14
1795 movdqa xmm14,xmm9
1796 paddd xmm9,xmm9
1797 movdqa xmm11,xmm15
1798 psrad xmm14,31
1799 paddq xmm15,xmm15
1800 pand xmm14,xmm8
1801 pxor xmm11,xmm0
1802 pxor xmm15,xmm14
1803 movdqa xmm14,xmm9
1804 paddd xmm9,xmm9
1805 movdqa xmm12,xmm15
1806 psrad xmm14,31
1807 paddq xmm15,xmm15
1808 pand xmm14,xmm8
1809 pxor xmm12,xmm0
1810 pxor xmm15,xmm14
1811 movdqa xmm14,xmm9
1812 paddd xmm9,xmm9
1813 movdqa xmm13,xmm15
1814 psrad xmm14,31
1815 paddq xmm15,xmm15
1816 pand xmm14,xmm8
1817 pxor xmm13,xmm0
1818 pxor xmm15,xmm14
1819 movdqa xmm14,xmm15
1820 psrad xmm9,31
1821 paddq xmm15,xmm15
1822 pand xmm9,xmm8
1823 pxor xmm14,xmm0
1824 pxor xmm15,xmm9
1825 movaps XMMWORD[96+rsp],xmm1
1826
1827 sub rdx,16*6
1828 jc NEAR $L$xts_enc_short
1829
1830 mov eax,16+96
1831 lea rcx,[32+r10*1+rbp]
1832 sub rax,r10
1833 movups xmm1,XMMWORD[16+rbp]
1834 mov r10,rax
1835 lea r8,[$L$xts_magic]
1836 jmp NEAR $L$xts_enc_grandloop
1837
1838ALIGN 32
1839$L$xts_enc_grandloop:
1840 movdqu xmm2,XMMWORD[rdi]
1841 movdqa xmm8,xmm0
1842 movdqu xmm3,XMMWORD[16+rdi]
1843 pxor xmm2,xmm10
1844 movdqu xmm4,XMMWORD[32+rdi]
1845 pxor xmm3,xmm11
1846DB 102,15,56,220,209
1847 movdqu xmm5,XMMWORD[48+rdi]
1848 pxor xmm4,xmm12
1849DB 102,15,56,220,217
1850 movdqu xmm6,XMMWORD[64+rdi]
1851 pxor xmm5,xmm13
1852DB 102,15,56,220,225
1853 movdqu xmm7,XMMWORD[80+rdi]
1854 pxor xmm8,xmm15
1855 movdqa xmm9,XMMWORD[96+rsp]
1856 pxor xmm6,xmm14
1857DB 102,15,56,220,233
1858 movups xmm0,XMMWORD[32+rbp]
1859 lea rdi,[96+rdi]
1860 pxor xmm7,xmm8
1861
1862 pxor xmm10,xmm9
1863DB 102,15,56,220,241
1864 pxor xmm11,xmm9
1865 movdqa XMMWORD[rsp],xmm10
1866DB 102,15,56,220,249
1867 movups xmm1,XMMWORD[48+rbp]
1868 pxor xmm12,xmm9
1869
1870DB 102,15,56,220,208
1871 pxor xmm13,xmm9
1872 movdqa XMMWORD[16+rsp],xmm11
1873DB 102,15,56,220,216
1874 pxor xmm14,xmm9
1875 movdqa XMMWORD[32+rsp],xmm12
1876DB 102,15,56,220,224
1877DB 102,15,56,220,232
1878 pxor xmm8,xmm9
1879 movdqa XMMWORD[64+rsp],xmm14
1880DB 102,15,56,220,240
1881DB 102,15,56,220,248
1882 movups xmm0,XMMWORD[64+rbp]
1883 movdqa XMMWORD[80+rsp],xmm8
1884 pshufd xmm9,xmm15,0x5f
1885 jmp NEAR $L$xts_enc_loop6
1886ALIGN 32
1887$L$xts_enc_loop6:
1888DB 102,15,56,220,209
1889DB 102,15,56,220,217
1890DB 102,15,56,220,225
1891DB 102,15,56,220,233
1892DB 102,15,56,220,241
1893DB 102,15,56,220,249
1894 movups xmm1,XMMWORD[((-64))+rax*1+rcx]
1895 add rax,32
1896
1897DB 102,15,56,220,208
1898DB 102,15,56,220,216
1899DB 102,15,56,220,224
1900DB 102,15,56,220,232
1901DB 102,15,56,220,240
1902DB 102,15,56,220,248
1903 movups xmm0,XMMWORD[((-80))+rax*1+rcx]
1904 jnz NEAR $L$xts_enc_loop6
1905
1906 movdqa xmm8,XMMWORD[r8]
1907 movdqa xmm14,xmm9
1908 paddd xmm9,xmm9
1909DB 102,15,56,220,209
1910 paddq xmm15,xmm15
1911 psrad xmm14,31
1912DB 102,15,56,220,217
1913 pand xmm14,xmm8
1914 movups xmm10,XMMWORD[rbp]
1915DB 102,15,56,220,225
1916DB 102,15,56,220,233
1917DB 102,15,56,220,241
1918 pxor xmm15,xmm14
1919 movaps xmm11,xmm10
1920DB 102,15,56,220,249
1921 movups xmm1,XMMWORD[((-64))+rcx]
1922
1923 movdqa xmm14,xmm9
1924DB 102,15,56,220,208
1925 paddd xmm9,xmm9
1926 pxor xmm10,xmm15
1927DB 102,15,56,220,216
1928 psrad xmm14,31
1929 paddq xmm15,xmm15
1930DB 102,15,56,220,224
1931DB 102,15,56,220,232
1932 pand xmm14,xmm8
1933 movaps xmm12,xmm11
1934DB 102,15,56,220,240
1935 pxor xmm15,xmm14
1936 movdqa xmm14,xmm9
1937DB 102,15,56,220,248
1938 movups xmm0,XMMWORD[((-48))+rcx]
1939
1940 paddd xmm9,xmm9
1941DB 102,15,56,220,209
1942 pxor xmm11,xmm15
1943 psrad xmm14,31
1944DB 102,15,56,220,217
1945 paddq xmm15,xmm15
1946 pand xmm14,xmm8
1947DB 102,15,56,220,225
1948DB 102,15,56,220,233
1949 movdqa XMMWORD[48+rsp],xmm13
1950 pxor xmm15,xmm14
1951DB 102,15,56,220,241
1952 movaps xmm13,xmm12
1953 movdqa xmm14,xmm9
1954DB 102,15,56,220,249
1955 movups xmm1,XMMWORD[((-32))+rcx]
1956
1957 paddd xmm9,xmm9
1958DB 102,15,56,220,208
1959 pxor xmm12,xmm15
1960 psrad xmm14,31
1961DB 102,15,56,220,216
1962 paddq xmm15,xmm15
1963 pand xmm14,xmm8
1964DB 102,15,56,220,224
1965DB 102,15,56,220,232
1966DB 102,15,56,220,240
1967 pxor xmm15,xmm14
1968 movaps xmm14,xmm13
1969DB 102,15,56,220,248
1970
1971 movdqa xmm0,xmm9
1972 paddd xmm9,xmm9
1973DB 102,15,56,220,209
1974 pxor xmm13,xmm15
1975 psrad xmm0,31
1976DB 102,15,56,220,217
1977 paddq xmm15,xmm15
1978 pand xmm0,xmm8
1979DB 102,15,56,220,225
1980DB 102,15,56,220,233
1981 pxor xmm15,xmm0
1982 movups xmm0,XMMWORD[rbp]
1983DB 102,15,56,220,241
1984DB 102,15,56,220,249
1985 movups xmm1,XMMWORD[16+rbp]
1986
1987 pxor xmm14,xmm15
1988DB 102,15,56,221,84,36,0
1989 psrad xmm9,31
1990 paddq xmm15,xmm15
1991DB 102,15,56,221,92,36,16
1992DB 102,15,56,221,100,36,32
1993 pand xmm9,xmm8
1994 mov rax,r10
1995DB 102,15,56,221,108,36,48
1996DB 102,15,56,221,116,36,64
1997DB 102,15,56,221,124,36,80
1998 pxor xmm15,xmm9
1999
2000 lea rsi,[96+rsi]
2001 movups XMMWORD[(-96)+rsi],xmm2
2002 movups XMMWORD[(-80)+rsi],xmm3
2003 movups XMMWORD[(-64)+rsi],xmm4
2004 movups XMMWORD[(-48)+rsi],xmm5
2005 movups XMMWORD[(-32)+rsi],xmm6
2006 movups XMMWORD[(-16)+rsi],xmm7
2007 sub rdx,16*6
2008 jnc NEAR $L$xts_enc_grandloop
2009
2010 mov eax,16+96
2011 sub eax,r10d
2012 mov rcx,rbp
2013 shr eax,4
2014
2015$L$xts_enc_short:
2016
2017 mov r10d,eax
2018 pxor xmm10,xmm0
2019 add rdx,16*6
2020 jz NEAR $L$xts_enc_done
2021
2022 pxor xmm11,xmm0
2023 cmp rdx,0x20
2024 jb NEAR $L$xts_enc_one
2025 pxor xmm12,xmm0
2026 je NEAR $L$xts_enc_two
2027
2028 pxor xmm13,xmm0
2029 cmp rdx,0x40
2030 jb NEAR $L$xts_enc_three
2031 pxor xmm14,xmm0
2032 je NEAR $L$xts_enc_four
2033
2034 movdqu xmm2,XMMWORD[rdi]
2035 movdqu xmm3,XMMWORD[16+rdi]
2036 movdqu xmm4,XMMWORD[32+rdi]
2037 pxor xmm2,xmm10
2038 movdqu xmm5,XMMWORD[48+rdi]
2039 pxor xmm3,xmm11
2040 movdqu xmm6,XMMWORD[64+rdi]
2041 lea rdi,[80+rdi]
2042 pxor xmm4,xmm12
2043 pxor xmm5,xmm13
2044 pxor xmm6,xmm14
2045 pxor xmm7,xmm7
2046
2047 call _aesni_encrypt6
2048
2049 xorps xmm2,xmm10
2050 movdqa xmm10,xmm15
2051 xorps xmm3,xmm11
2052 xorps xmm4,xmm12
2053 movdqu XMMWORD[rsi],xmm2
2054 xorps xmm5,xmm13
2055 movdqu XMMWORD[16+rsi],xmm3
2056 xorps xmm6,xmm14
2057 movdqu XMMWORD[32+rsi],xmm4
2058 movdqu XMMWORD[48+rsi],xmm5
2059 movdqu XMMWORD[64+rsi],xmm6
2060 lea rsi,[80+rsi]
2061 jmp NEAR $L$xts_enc_done
2062
2063ALIGN 16
2064$L$xts_enc_one:
2065 movups xmm2,XMMWORD[rdi]
2066 lea rdi,[16+rdi]
2067 xorps xmm2,xmm10
2068 movups xmm0,XMMWORD[rcx]
2069 movups xmm1,XMMWORD[16+rcx]
2070 lea rcx,[32+rcx]
2071 xorps xmm2,xmm0
2072$L$oop_enc1_9:
2073DB 102,15,56,220,209
2074 dec eax
2075 movups xmm1,XMMWORD[rcx]
2076 lea rcx,[16+rcx]
2077 jnz NEAR $L$oop_enc1_9
2078DB 102,15,56,221,209
2079 xorps xmm2,xmm10
2080 movdqa xmm10,xmm11
2081 movups XMMWORD[rsi],xmm2
2082 lea rsi,[16+rsi]
2083 jmp NEAR $L$xts_enc_done
2084
2085ALIGN 16
2086$L$xts_enc_two:
2087 movups xmm2,XMMWORD[rdi]
2088 movups xmm3,XMMWORD[16+rdi]
2089 lea rdi,[32+rdi]
2090 xorps xmm2,xmm10
2091 xorps xmm3,xmm11
2092
2093 call _aesni_encrypt2
2094
2095 xorps xmm2,xmm10
2096 movdqa xmm10,xmm12
2097 xorps xmm3,xmm11
2098 movups XMMWORD[rsi],xmm2
2099 movups XMMWORD[16+rsi],xmm3
2100 lea rsi,[32+rsi]
2101 jmp NEAR $L$xts_enc_done
2102
2103ALIGN 16
2104$L$xts_enc_three:
2105 movups xmm2,XMMWORD[rdi]
2106 movups xmm3,XMMWORD[16+rdi]
2107 movups xmm4,XMMWORD[32+rdi]
2108 lea rdi,[48+rdi]
2109 xorps xmm2,xmm10
2110 xorps xmm3,xmm11
2111 xorps xmm4,xmm12
2112
2113 call _aesni_encrypt3
2114
2115 xorps xmm2,xmm10
2116 movdqa xmm10,xmm13
2117 xorps xmm3,xmm11
2118 xorps xmm4,xmm12
2119 movups XMMWORD[rsi],xmm2
2120 movups XMMWORD[16+rsi],xmm3
2121 movups XMMWORD[32+rsi],xmm4
2122 lea rsi,[48+rsi]
2123 jmp NEAR $L$xts_enc_done
2124
2125ALIGN 16
2126$L$xts_enc_four:
2127 movups xmm2,XMMWORD[rdi]
2128 movups xmm3,XMMWORD[16+rdi]
2129 movups xmm4,XMMWORD[32+rdi]
2130 xorps xmm2,xmm10
2131 movups xmm5,XMMWORD[48+rdi]
2132 lea rdi,[64+rdi]
2133 xorps xmm3,xmm11
2134 xorps xmm4,xmm12
2135 xorps xmm5,xmm13
2136
2137 call _aesni_encrypt4
2138
2139 pxor xmm2,xmm10
2140 movdqa xmm10,xmm14
2141 pxor xmm3,xmm11
2142 pxor xmm4,xmm12
2143 movdqu XMMWORD[rsi],xmm2
2144 pxor xmm5,xmm13
2145 movdqu XMMWORD[16+rsi],xmm3
2146 movdqu XMMWORD[32+rsi],xmm4
2147 movdqu XMMWORD[48+rsi],xmm5
2148 lea rsi,[64+rsi]
2149 jmp NEAR $L$xts_enc_done
2150
2151ALIGN 16
2152$L$xts_enc_done:
2153 and r9,15
2154 jz NEAR $L$xts_enc_ret
2155 mov rdx,r9
2156
2157$L$xts_enc_steal:
2158 movzx eax,BYTE[rdi]
2159 movzx ecx,BYTE[((-16))+rsi]
2160 lea rdi,[1+rdi]
2161 mov BYTE[((-16))+rsi],al
2162 mov BYTE[rsi],cl
2163 lea rsi,[1+rsi]
2164 sub rdx,1
2165 jnz NEAR $L$xts_enc_steal
2166
2167 sub rsi,r9
2168 mov rcx,rbp
2169 mov eax,r10d
2170
2171 movups xmm2,XMMWORD[((-16))+rsi]
2172 xorps xmm2,xmm10
2173 movups xmm0,XMMWORD[rcx]
2174 movups xmm1,XMMWORD[16+rcx]
2175 lea rcx,[32+rcx]
2176 xorps xmm2,xmm0
2177$L$oop_enc1_10:
2178DB 102,15,56,220,209
2179 dec eax
2180 movups xmm1,XMMWORD[rcx]
2181 lea rcx,[16+rcx]
2182 jnz NEAR $L$oop_enc1_10
2183DB 102,15,56,221,209
2184 xorps xmm2,xmm10
2185 movups XMMWORD[(-16)+rsi],xmm2
2186
2187$L$xts_enc_ret:
2188 xorps xmm0,xmm0
2189 pxor xmm1,xmm1
2190 pxor xmm2,xmm2
2191 pxor xmm3,xmm3
2192 pxor xmm4,xmm4
2193 pxor xmm5,xmm5
2194 movaps xmm6,XMMWORD[((-168))+r11]
2195 movaps XMMWORD[(-168)+r11],xmm0
2196 movaps xmm7,XMMWORD[((-152))+r11]
2197 movaps XMMWORD[(-152)+r11],xmm0
2198 movaps xmm8,XMMWORD[((-136))+r11]
2199 movaps XMMWORD[(-136)+r11],xmm0
2200 movaps xmm9,XMMWORD[((-120))+r11]
2201 movaps XMMWORD[(-120)+r11],xmm0
2202 movaps xmm10,XMMWORD[((-104))+r11]
2203 movaps XMMWORD[(-104)+r11],xmm0
2204 movaps xmm11,XMMWORD[((-88))+r11]
2205 movaps XMMWORD[(-88)+r11],xmm0
2206 movaps xmm12,XMMWORD[((-72))+r11]
2207 movaps XMMWORD[(-72)+r11],xmm0
2208 movaps xmm13,XMMWORD[((-56))+r11]
2209 movaps XMMWORD[(-56)+r11],xmm0
2210 movaps xmm14,XMMWORD[((-40))+r11]
2211 movaps XMMWORD[(-40)+r11],xmm0
2212 movaps xmm15,XMMWORD[((-24))+r11]
2213 movaps XMMWORD[(-24)+r11],xmm0
2214 movaps XMMWORD[rsp],xmm0
2215 movaps XMMWORD[16+rsp],xmm0
2216 movaps XMMWORD[32+rsp],xmm0
2217 movaps XMMWORD[48+rsp],xmm0
2218 movaps XMMWORD[64+rsp],xmm0
2219 movaps XMMWORD[80+rsp],xmm0
2220 movaps XMMWORD[96+rsp],xmm0
2221 mov rbp,QWORD[((-8))+r11]
2222
2223 lea rsp,[r11]
2224
2225$L$xts_enc_epilogue:
2226 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2227 mov rsi,QWORD[16+rsp]
2228 DB 0F3h,0C3h ;repret
2229
2230$L$SEH_end_aesni_xts_encrypt:
2231global aesni_xts_decrypt
2232
2233ALIGN 16
2234aesni_xts_decrypt:
2235 mov QWORD[8+rsp],rdi ;WIN64 prologue
2236 mov QWORD[16+rsp],rsi
2237 mov rax,rsp
2238$L$SEH_begin_aesni_xts_decrypt:
2239 mov rdi,rcx
2240 mov rsi,rdx
2241 mov rdx,r8
2242 mov rcx,r9
2243 mov r8,QWORD[40+rsp]
2244 mov r9,QWORD[48+rsp]
2245
2246
2247
2248 lea r11,[rsp]
2249
2250 push rbp
2251
2252 sub rsp,272
2253 and rsp,-16
2254 movaps XMMWORD[(-168)+r11],xmm6
2255 movaps XMMWORD[(-152)+r11],xmm7
2256 movaps XMMWORD[(-136)+r11],xmm8
2257 movaps XMMWORD[(-120)+r11],xmm9
2258 movaps XMMWORD[(-104)+r11],xmm10
2259 movaps XMMWORD[(-88)+r11],xmm11
2260 movaps XMMWORD[(-72)+r11],xmm12
2261 movaps XMMWORD[(-56)+r11],xmm13
2262 movaps XMMWORD[(-40)+r11],xmm14
2263 movaps XMMWORD[(-24)+r11],xmm15
2264$L$xts_dec_body:
2265 movups xmm2,XMMWORD[r9]
2266 mov eax,DWORD[240+r8]
2267 mov r10d,DWORD[240+rcx]
2268 movups xmm0,XMMWORD[r8]
2269 movups xmm1,XMMWORD[16+r8]
2270 lea r8,[32+r8]
2271 xorps xmm2,xmm0
2272$L$oop_enc1_11:
2273DB 102,15,56,220,209
2274 dec eax
2275 movups xmm1,XMMWORD[r8]
2276 lea r8,[16+r8]
2277 jnz NEAR $L$oop_enc1_11
2278DB 102,15,56,221,209
2279 xor eax,eax
2280 test rdx,15
2281 setnz al
2282 shl rax,4
2283 sub rdx,rax
2284
2285 movups xmm0,XMMWORD[rcx]
2286 mov rbp,rcx
2287 mov eax,r10d
2288 shl r10d,4
2289 mov r9,rdx
2290 and rdx,-16
2291
2292 movups xmm1,XMMWORD[16+r10*1+rcx]
2293
2294 movdqa xmm8,XMMWORD[$L$xts_magic]
2295 movdqa xmm15,xmm2
2296 pshufd xmm9,xmm2,0x5f
2297 pxor xmm1,xmm0
2298 movdqa xmm14,xmm9
2299 paddd xmm9,xmm9
2300 movdqa xmm10,xmm15
2301 psrad xmm14,31
2302 paddq xmm15,xmm15
2303 pand xmm14,xmm8
2304 pxor xmm10,xmm0
2305 pxor xmm15,xmm14
2306 movdqa xmm14,xmm9
2307 paddd xmm9,xmm9
2308 movdqa xmm11,xmm15
2309 psrad xmm14,31
2310 paddq xmm15,xmm15
2311 pand xmm14,xmm8
2312 pxor xmm11,xmm0
2313 pxor xmm15,xmm14
2314 movdqa xmm14,xmm9
2315 paddd xmm9,xmm9
2316 movdqa xmm12,xmm15
2317 psrad xmm14,31
2318 paddq xmm15,xmm15
2319 pand xmm14,xmm8
2320 pxor xmm12,xmm0
2321 pxor xmm15,xmm14
2322 movdqa xmm14,xmm9
2323 paddd xmm9,xmm9
2324 movdqa xmm13,xmm15
2325 psrad xmm14,31
2326 paddq xmm15,xmm15
2327 pand xmm14,xmm8
2328 pxor xmm13,xmm0
2329 pxor xmm15,xmm14
2330 movdqa xmm14,xmm15
2331 psrad xmm9,31
2332 paddq xmm15,xmm15
2333 pand xmm9,xmm8
2334 pxor xmm14,xmm0
2335 pxor xmm15,xmm9
2336 movaps XMMWORD[96+rsp],xmm1
2337
2338 sub rdx,16*6
2339 jc NEAR $L$xts_dec_short
2340
2341 mov eax,16+96
2342 lea rcx,[32+r10*1+rbp]
2343 sub rax,r10
2344 movups xmm1,XMMWORD[16+rbp]
2345 mov r10,rax
2346 lea r8,[$L$xts_magic]
2347 jmp NEAR $L$xts_dec_grandloop
2348
2349ALIGN 32
2350$L$xts_dec_grandloop:
2351 movdqu xmm2,XMMWORD[rdi]
2352 movdqa xmm8,xmm0
2353 movdqu xmm3,XMMWORD[16+rdi]
2354 pxor xmm2,xmm10
2355 movdqu xmm4,XMMWORD[32+rdi]
2356 pxor xmm3,xmm11
2357DB 102,15,56,222,209
2358 movdqu xmm5,XMMWORD[48+rdi]
2359 pxor xmm4,xmm12
2360DB 102,15,56,222,217
2361 movdqu xmm6,XMMWORD[64+rdi]
2362 pxor xmm5,xmm13
2363DB 102,15,56,222,225
2364 movdqu xmm7,XMMWORD[80+rdi]
2365 pxor xmm8,xmm15
2366 movdqa xmm9,XMMWORD[96+rsp]
2367 pxor xmm6,xmm14
2368DB 102,15,56,222,233
2369 movups xmm0,XMMWORD[32+rbp]
2370 lea rdi,[96+rdi]
2371 pxor xmm7,xmm8
2372
2373 pxor xmm10,xmm9
2374DB 102,15,56,222,241
2375 pxor xmm11,xmm9
2376 movdqa XMMWORD[rsp],xmm10
2377DB 102,15,56,222,249
2378 movups xmm1,XMMWORD[48+rbp]
2379 pxor xmm12,xmm9
2380
2381DB 102,15,56,222,208
2382 pxor xmm13,xmm9
2383 movdqa XMMWORD[16+rsp],xmm11
2384DB 102,15,56,222,216
2385 pxor xmm14,xmm9
2386 movdqa XMMWORD[32+rsp],xmm12
2387DB 102,15,56,222,224
2388DB 102,15,56,222,232
2389 pxor xmm8,xmm9
2390 movdqa XMMWORD[64+rsp],xmm14
2391DB 102,15,56,222,240
2392DB 102,15,56,222,248
2393 movups xmm0,XMMWORD[64+rbp]
2394 movdqa XMMWORD[80+rsp],xmm8
2395 pshufd xmm9,xmm15,0x5f
2396 jmp NEAR $L$xts_dec_loop6
2397ALIGN 32
2398$L$xts_dec_loop6:
2399DB 102,15,56,222,209
2400DB 102,15,56,222,217
2401DB 102,15,56,222,225
2402DB 102,15,56,222,233
2403DB 102,15,56,222,241
2404DB 102,15,56,222,249
2405 movups xmm1,XMMWORD[((-64))+rax*1+rcx]
2406 add rax,32
2407
2408DB 102,15,56,222,208
2409DB 102,15,56,222,216
2410DB 102,15,56,222,224
2411DB 102,15,56,222,232
2412DB 102,15,56,222,240
2413DB 102,15,56,222,248
2414 movups xmm0,XMMWORD[((-80))+rax*1+rcx]
2415 jnz NEAR $L$xts_dec_loop6
2416
2417 movdqa xmm8,XMMWORD[r8]
2418 movdqa xmm14,xmm9
2419 paddd xmm9,xmm9
2420DB 102,15,56,222,209
2421 paddq xmm15,xmm15
2422 psrad xmm14,31
2423DB 102,15,56,222,217
2424 pand xmm14,xmm8
2425 movups xmm10,XMMWORD[rbp]
2426DB 102,15,56,222,225
2427DB 102,15,56,222,233
2428DB 102,15,56,222,241
2429 pxor xmm15,xmm14
2430 movaps xmm11,xmm10
2431DB 102,15,56,222,249
2432 movups xmm1,XMMWORD[((-64))+rcx]
2433
2434 movdqa xmm14,xmm9
2435DB 102,15,56,222,208
2436 paddd xmm9,xmm9
2437 pxor xmm10,xmm15
2438DB 102,15,56,222,216
2439 psrad xmm14,31
2440 paddq xmm15,xmm15
2441DB 102,15,56,222,224
2442DB 102,15,56,222,232
2443 pand xmm14,xmm8
2444 movaps xmm12,xmm11
2445DB 102,15,56,222,240
2446 pxor xmm15,xmm14
2447 movdqa xmm14,xmm9
2448DB 102,15,56,222,248
2449 movups xmm0,XMMWORD[((-48))+rcx]
2450
2451 paddd xmm9,xmm9
2452DB 102,15,56,222,209
2453 pxor xmm11,xmm15
2454 psrad xmm14,31
2455DB 102,15,56,222,217
2456 paddq xmm15,xmm15
2457 pand xmm14,xmm8
2458DB 102,15,56,222,225
2459DB 102,15,56,222,233
2460 movdqa XMMWORD[48+rsp],xmm13
2461 pxor xmm15,xmm14
2462DB 102,15,56,222,241
2463 movaps xmm13,xmm12
2464 movdqa xmm14,xmm9
2465DB 102,15,56,222,249
2466 movups xmm1,XMMWORD[((-32))+rcx]
2467
2468 paddd xmm9,xmm9
2469DB 102,15,56,222,208
2470 pxor xmm12,xmm15
2471 psrad xmm14,31
2472DB 102,15,56,222,216
2473 paddq xmm15,xmm15
2474 pand xmm14,xmm8
2475DB 102,15,56,222,224
2476DB 102,15,56,222,232
2477DB 102,15,56,222,240
2478 pxor xmm15,xmm14
2479 movaps xmm14,xmm13
2480DB 102,15,56,222,248
2481
2482 movdqa xmm0,xmm9
2483 paddd xmm9,xmm9
2484DB 102,15,56,222,209
2485 pxor xmm13,xmm15
2486 psrad xmm0,31
2487DB 102,15,56,222,217
2488 paddq xmm15,xmm15
2489 pand xmm0,xmm8
2490DB 102,15,56,222,225
2491DB 102,15,56,222,233
2492 pxor xmm15,xmm0
2493 movups xmm0,XMMWORD[rbp]
2494DB 102,15,56,222,241
2495DB 102,15,56,222,249
2496 movups xmm1,XMMWORD[16+rbp]
2497
2498 pxor xmm14,xmm15
2499DB 102,15,56,223,84,36,0
2500 psrad xmm9,31
2501 paddq xmm15,xmm15
2502DB 102,15,56,223,92,36,16
2503DB 102,15,56,223,100,36,32
2504 pand xmm9,xmm8
2505 mov rax,r10
2506DB 102,15,56,223,108,36,48
2507DB 102,15,56,223,116,36,64
2508DB 102,15,56,223,124,36,80
2509 pxor xmm15,xmm9
2510
2511 lea rsi,[96+rsi]
2512 movups XMMWORD[(-96)+rsi],xmm2
2513 movups XMMWORD[(-80)+rsi],xmm3
2514 movups XMMWORD[(-64)+rsi],xmm4
2515 movups XMMWORD[(-48)+rsi],xmm5
2516 movups XMMWORD[(-32)+rsi],xmm6
2517 movups XMMWORD[(-16)+rsi],xmm7
2518 sub rdx,16*6
2519 jnc NEAR $L$xts_dec_grandloop
2520
2521 mov eax,16+96
2522 sub eax,r10d
2523 mov rcx,rbp
2524 shr eax,4
2525
2526$L$xts_dec_short:
2527
2528 mov r10d,eax
2529 pxor xmm10,xmm0
2530 pxor xmm11,xmm0
2531 add rdx,16*6
2532 jz NEAR $L$xts_dec_done
2533
2534 pxor xmm12,xmm0
2535 cmp rdx,0x20
2536 jb NEAR $L$xts_dec_one
2537 pxor xmm13,xmm0
2538 je NEAR $L$xts_dec_two
2539
2540 pxor xmm14,xmm0
2541 cmp rdx,0x40
2542 jb NEAR $L$xts_dec_three
2543 je NEAR $L$xts_dec_four
2544
2545 movdqu xmm2,XMMWORD[rdi]
2546 movdqu xmm3,XMMWORD[16+rdi]
2547 movdqu xmm4,XMMWORD[32+rdi]
2548 pxor xmm2,xmm10
2549 movdqu xmm5,XMMWORD[48+rdi]
2550 pxor xmm3,xmm11
2551 movdqu xmm6,XMMWORD[64+rdi]
2552 lea rdi,[80+rdi]
2553 pxor xmm4,xmm12
2554 pxor xmm5,xmm13
2555 pxor xmm6,xmm14
2556
2557 call _aesni_decrypt6
2558
2559 xorps xmm2,xmm10
2560 xorps xmm3,xmm11
2561 xorps xmm4,xmm12
2562 movdqu XMMWORD[rsi],xmm2
2563 xorps xmm5,xmm13
2564 movdqu XMMWORD[16+rsi],xmm3
2565 xorps xmm6,xmm14
2566 movdqu XMMWORD[32+rsi],xmm4
2567 pxor xmm14,xmm14
2568 movdqu XMMWORD[48+rsi],xmm5
2569 pcmpgtd xmm14,xmm15
2570 movdqu XMMWORD[64+rsi],xmm6
2571 lea rsi,[80+rsi]
2572 pshufd xmm11,xmm14,0x13
2573 and r9,15
2574 jz NEAR $L$xts_dec_ret
2575
2576 movdqa xmm10,xmm15
2577 paddq xmm15,xmm15
2578 pand xmm11,xmm8
2579 pxor xmm11,xmm15
2580 jmp NEAR $L$xts_dec_done2
2581
2582ALIGN 16
2583$L$xts_dec_one:
2584 movups xmm2,XMMWORD[rdi]
2585 lea rdi,[16+rdi]
2586 xorps xmm2,xmm10
2587 movups xmm0,XMMWORD[rcx]
2588 movups xmm1,XMMWORD[16+rcx]
2589 lea rcx,[32+rcx]
2590 xorps xmm2,xmm0
2591$L$oop_dec1_12:
2592DB 102,15,56,222,209
2593 dec eax
2594 movups xmm1,XMMWORD[rcx]
2595 lea rcx,[16+rcx]
2596 jnz NEAR $L$oop_dec1_12
2597DB 102,15,56,223,209
2598 xorps xmm2,xmm10
2599 movdqa xmm10,xmm11
2600 movups XMMWORD[rsi],xmm2
2601 movdqa xmm11,xmm12
2602 lea rsi,[16+rsi]
2603 jmp NEAR $L$xts_dec_done
2604
2605ALIGN 16
2606$L$xts_dec_two:
2607 movups xmm2,XMMWORD[rdi]
2608 movups xmm3,XMMWORD[16+rdi]
2609 lea rdi,[32+rdi]
2610 xorps xmm2,xmm10
2611 xorps xmm3,xmm11
2612
2613 call _aesni_decrypt2
2614
2615 xorps xmm2,xmm10
2616 movdqa xmm10,xmm12
2617 xorps xmm3,xmm11
2618 movdqa xmm11,xmm13
2619 movups XMMWORD[rsi],xmm2
2620 movups XMMWORD[16+rsi],xmm3
2621 lea rsi,[32+rsi]
2622 jmp NEAR $L$xts_dec_done
2623
2624ALIGN 16
2625$L$xts_dec_three:
2626 movups xmm2,XMMWORD[rdi]
2627 movups xmm3,XMMWORD[16+rdi]
2628 movups xmm4,XMMWORD[32+rdi]
2629 lea rdi,[48+rdi]
2630 xorps xmm2,xmm10
2631 xorps xmm3,xmm11
2632 xorps xmm4,xmm12
2633
2634 call _aesni_decrypt3
2635
2636 xorps xmm2,xmm10
2637 movdqa xmm10,xmm13
2638 xorps xmm3,xmm11
2639 movdqa xmm11,xmm14
2640 xorps xmm4,xmm12
2641 movups XMMWORD[rsi],xmm2
2642 movups XMMWORD[16+rsi],xmm3
2643 movups XMMWORD[32+rsi],xmm4
2644 lea rsi,[48+rsi]
2645 jmp NEAR $L$xts_dec_done
2646
2647ALIGN 16
2648$L$xts_dec_four:
2649 movups xmm2,XMMWORD[rdi]
2650 movups xmm3,XMMWORD[16+rdi]
2651 movups xmm4,XMMWORD[32+rdi]
2652 xorps xmm2,xmm10
2653 movups xmm5,XMMWORD[48+rdi]
2654 lea rdi,[64+rdi]
2655 xorps xmm3,xmm11
2656 xorps xmm4,xmm12
2657 xorps xmm5,xmm13
2658
2659 call _aesni_decrypt4
2660
2661 pxor xmm2,xmm10
2662 movdqa xmm10,xmm14
2663 pxor xmm3,xmm11
2664 movdqa xmm11,xmm15
2665 pxor xmm4,xmm12
2666 movdqu XMMWORD[rsi],xmm2
2667 pxor xmm5,xmm13
2668 movdqu XMMWORD[16+rsi],xmm3
2669 movdqu XMMWORD[32+rsi],xmm4
2670 movdqu XMMWORD[48+rsi],xmm5
2671 lea rsi,[64+rsi]
2672 jmp NEAR $L$xts_dec_done
2673
2674ALIGN 16
2675$L$xts_dec_done:
2676 and r9,15
2677 jz NEAR $L$xts_dec_ret
2678$L$xts_dec_done2:
2679 mov rdx,r9
2680 mov rcx,rbp
2681 mov eax,r10d
2682
2683 movups xmm2,XMMWORD[rdi]
2684 xorps xmm2,xmm11
2685 movups xmm0,XMMWORD[rcx]
2686 movups xmm1,XMMWORD[16+rcx]
2687 lea rcx,[32+rcx]
2688 xorps xmm2,xmm0
2689$L$oop_dec1_13:
2690DB 102,15,56,222,209
2691 dec eax
2692 movups xmm1,XMMWORD[rcx]
2693 lea rcx,[16+rcx]
2694 jnz NEAR $L$oop_dec1_13
2695DB 102,15,56,223,209
2696 xorps xmm2,xmm11
2697 movups XMMWORD[rsi],xmm2
2698
2699$L$xts_dec_steal:
2700 movzx eax,BYTE[16+rdi]
2701 movzx ecx,BYTE[rsi]
2702 lea rdi,[1+rdi]
2703 mov BYTE[rsi],al
2704 mov BYTE[16+rsi],cl
2705 lea rsi,[1+rsi]
2706 sub rdx,1
2707 jnz NEAR $L$xts_dec_steal
2708
2709 sub rsi,r9
2710 mov rcx,rbp
2711 mov eax,r10d
2712
2713 movups xmm2,XMMWORD[rsi]
2714 xorps xmm2,xmm10
2715 movups xmm0,XMMWORD[rcx]
2716 movups xmm1,XMMWORD[16+rcx]
2717 lea rcx,[32+rcx]
2718 xorps xmm2,xmm0
2719$L$oop_dec1_14:
2720DB 102,15,56,222,209
2721 dec eax
2722 movups xmm1,XMMWORD[rcx]
2723 lea rcx,[16+rcx]
2724 jnz NEAR $L$oop_dec1_14
2725DB 102,15,56,223,209
2726 xorps xmm2,xmm10
2727 movups XMMWORD[rsi],xmm2
2728
2729$L$xts_dec_ret:
2730 xorps xmm0,xmm0
2731 pxor xmm1,xmm1
2732 pxor xmm2,xmm2
2733 pxor xmm3,xmm3
2734 pxor xmm4,xmm4
2735 pxor xmm5,xmm5
2736 movaps xmm6,XMMWORD[((-168))+r11]
2737 movaps XMMWORD[(-168)+r11],xmm0
2738 movaps xmm7,XMMWORD[((-152))+r11]
2739 movaps XMMWORD[(-152)+r11],xmm0
2740 movaps xmm8,XMMWORD[((-136))+r11]
2741 movaps XMMWORD[(-136)+r11],xmm0
2742 movaps xmm9,XMMWORD[((-120))+r11]
2743 movaps XMMWORD[(-120)+r11],xmm0
2744 movaps xmm10,XMMWORD[((-104))+r11]
2745 movaps XMMWORD[(-104)+r11],xmm0
2746 movaps xmm11,XMMWORD[((-88))+r11]
2747 movaps XMMWORD[(-88)+r11],xmm0
2748 movaps xmm12,XMMWORD[((-72))+r11]
2749 movaps XMMWORD[(-72)+r11],xmm0
2750 movaps xmm13,XMMWORD[((-56))+r11]
2751 movaps XMMWORD[(-56)+r11],xmm0
2752 movaps xmm14,XMMWORD[((-40))+r11]
2753 movaps XMMWORD[(-40)+r11],xmm0
2754 movaps xmm15,XMMWORD[((-24))+r11]
2755 movaps XMMWORD[(-24)+r11],xmm0
2756 movaps XMMWORD[rsp],xmm0
2757 movaps XMMWORD[16+rsp],xmm0
2758 movaps XMMWORD[32+rsp],xmm0
2759 movaps XMMWORD[48+rsp],xmm0
2760 movaps XMMWORD[64+rsp],xmm0
2761 movaps XMMWORD[80+rsp],xmm0
2762 movaps XMMWORD[96+rsp],xmm0
2763 mov rbp,QWORD[((-8))+r11]
2764
2765 lea rsp,[r11]
2766
2767$L$xts_dec_epilogue:
2768 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2769 mov rsi,QWORD[16+rsp]
2770 DB 0F3h,0C3h ;repret
2771
2772$L$SEH_end_aesni_xts_decrypt:
2773global aesni_ocb_encrypt
2774
2775ALIGN 32
2776aesni_ocb_encrypt:
2777 mov QWORD[8+rsp],rdi ;WIN64 prologue
2778 mov QWORD[16+rsp],rsi
2779 mov rax,rsp
2780$L$SEH_begin_aesni_ocb_encrypt:
2781 mov rdi,rcx
2782 mov rsi,rdx
2783 mov rdx,r8
2784 mov rcx,r9
2785 mov r8,QWORD[40+rsp]
2786 mov r9,QWORD[48+rsp]
2787
2788
2789
2790 lea rax,[rsp]
2791 push rbx
2792
2793 push rbp
2794
2795 push r12
2796
2797 push r13
2798
2799 push r14
2800
2801 lea rsp,[((-160))+rsp]
2802 movaps XMMWORD[rsp],xmm6
2803 movaps XMMWORD[16+rsp],xmm7
2804 movaps XMMWORD[32+rsp],xmm8
2805 movaps XMMWORD[48+rsp],xmm9
2806 movaps XMMWORD[64+rsp],xmm10
2807 movaps XMMWORD[80+rsp],xmm11
2808 movaps XMMWORD[96+rsp],xmm12
2809 movaps XMMWORD[112+rsp],xmm13
2810 movaps XMMWORD[128+rsp],xmm14
2811 movaps XMMWORD[144+rsp],xmm15
2812$L$ocb_enc_body:
2813 mov rbx,QWORD[56+rax]
2814 mov rbp,QWORD[((56+8))+rax]
2815
2816 mov r10d,DWORD[240+rcx]
2817 mov r11,rcx
2818 shl r10d,4
2819 movups xmm9,XMMWORD[rcx]
2820 movups xmm1,XMMWORD[16+r10*1+rcx]
2821
2822 movdqu xmm15,XMMWORD[r9]
2823 pxor xmm9,xmm1
2824 pxor xmm15,xmm1
2825
2826 mov eax,16+32
2827 lea rcx,[32+r10*1+r11]
2828 movups xmm1,XMMWORD[16+r11]
2829 sub rax,r10
2830 mov r10,rax
2831
2832 movdqu xmm10,XMMWORD[rbx]
2833 movdqu xmm8,XMMWORD[rbp]
2834
2835 test r8,1
2836 jnz NEAR $L$ocb_enc_odd
2837
2838 bsf r12,r8
2839 add r8,1
2840 shl r12,4
2841 movdqu xmm7,XMMWORD[r12*1+rbx]
2842 movdqu xmm2,XMMWORD[rdi]
2843 lea rdi,[16+rdi]
2844
2845 call __ocb_encrypt1
2846
2847 movdqa xmm15,xmm7
2848 movups XMMWORD[rsi],xmm2
2849 lea rsi,[16+rsi]
2850 sub rdx,1
2851 jz NEAR $L$ocb_enc_done
2852
2853$L$ocb_enc_odd:
2854 lea r12,[1+r8]
2855 lea r13,[3+r8]
2856 lea r14,[5+r8]
2857 lea r8,[6+r8]
2858 bsf r12,r12
2859 bsf r13,r13
2860 bsf r14,r14
2861 shl r12,4
2862 shl r13,4
2863 shl r14,4
2864
2865 sub rdx,6
2866 jc NEAR $L$ocb_enc_short
2867 jmp NEAR $L$ocb_enc_grandloop
2868
2869ALIGN 32
2870$L$ocb_enc_grandloop:
2871 movdqu xmm2,XMMWORD[rdi]
2872 movdqu xmm3,XMMWORD[16+rdi]
2873 movdqu xmm4,XMMWORD[32+rdi]
2874 movdqu xmm5,XMMWORD[48+rdi]
2875 movdqu xmm6,XMMWORD[64+rdi]
2876 movdqu xmm7,XMMWORD[80+rdi]
2877 lea rdi,[96+rdi]
2878
2879 call __ocb_encrypt6
2880
2881 movups XMMWORD[rsi],xmm2
2882 movups XMMWORD[16+rsi],xmm3
2883 movups XMMWORD[32+rsi],xmm4
2884 movups XMMWORD[48+rsi],xmm5
2885 movups XMMWORD[64+rsi],xmm6
2886 movups XMMWORD[80+rsi],xmm7
2887 lea rsi,[96+rsi]
2888 sub rdx,6
2889 jnc NEAR $L$ocb_enc_grandloop
2890
2891$L$ocb_enc_short:
2892 add rdx,6
2893 jz NEAR $L$ocb_enc_done
2894
2895 movdqu xmm2,XMMWORD[rdi]
2896 cmp rdx,2
2897 jb NEAR $L$ocb_enc_one
2898 movdqu xmm3,XMMWORD[16+rdi]
2899 je NEAR $L$ocb_enc_two
2900
2901 movdqu xmm4,XMMWORD[32+rdi]
2902 cmp rdx,4
2903 jb NEAR $L$ocb_enc_three
2904 movdqu xmm5,XMMWORD[48+rdi]
2905 je NEAR $L$ocb_enc_four
2906
2907 movdqu xmm6,XMMWORD[64+rdi]
2908 pxor xmm7,xmm7
2909
2910 call __ocb_encrypt6
2911
2912 movdqa xmm15,xmm14
2913 movups XMMWORD[rsi],xmm2
2914 movups XMMWORD[16+rsi],xmm3
2915 movups XMMWORD[32+rsi],xmm4
2916 movups XMMWORD[48+rsi],xmm5
2917 movups XMMWORD[64+rsi],xmm6
2918
2919 jmp NEAR $L$ocb_enc_done
2920
2921ALIGN 16
2922$L$ocb_enc_one:
2923 movdqa xmm7,xmm10
2924
2925 call __ocb_encrypt1
2926
2927 movdqa xmm15,xmm7
2928 movups XMMWORD[rsi],xmm2
2929 jmp NEAR $L$ocb_enc_done
2930
2931ALIGN 16
2932$L$ocb_enc_two:
2933 pxor xmm4,xmm4
2934 pxor xmm5,xmm5
2935
2936 call __ocb_encrypt4
2937
2938 movdqa xmm15,xmm11
2939 movups XMMWORD[rsi],xmm2
2940 movups XMMWORD[16+rsi],xmm3
2941
2942 jmp NEAR $L$ocb_enc_done
2943
2944ALIGN 16
2945$L$ocb_enc_three:
2946 pxor xmm5,xmm5
2947
2948 call __ocb_encrypt4
2949
2950 movdqa xmm15,xmm12
2951 movups XMMWORD[rsi],xmm2
2952 movups XMMWORD[16+rsi],xmm3
2953 movups XMMWORD[32+rsi],xmm4
2954
2955 jmp NEAR $L$ocb_enc_done
2956
2957ALIGN 16
2958$L$ocb_enc_four:
2959 call __ocb_encrypt4
2960
2961 movdqa xmm15,xmm13
2962 movups XMMWORD[rsi],xmm2
2963 movups XMMWORD[16+rsi],xmm3
2964 movups XMMWORD[32+rsi],xmm4
2965 movups XMMWORD[48+rsi],xmm5
2966
2967$L$ocb_enc_done:
2968 pxor xmm15,xmm0
2969 movdqu XMMWORD[rbp],xmm8
2970 movdqu XMMWORD[r9],xmm15
2971
2972 xorps xmm0,xmm0
2973 pxor xmm1,xmm1
2974 pxor xmm2,xmm2
2975 pxor xmm3,xmm3
2976 pxor xmm4,xmm4
2977 pxor xmm5,xmm5
2978 movaps xmm6,XMMWORD[rsp]
2979 movaps XMMWORD[rsp],xmm0
2980 movaps xmm7,XMMWORD[16+rsp]
2981 movaps XMMWORD[16+rsp],xmm0
2982 movaps xmm8,XMMWORD[32+rsp]
2983 movaps XMMWORD[32+rsp],xmm0
2984 movaps xmm9,XMMWORD[48+rsp]
2985 movaps XMMWORD[48+rsp],xmm0
2986 movaps xmm10,XMMWORD[64+rsp]
2987 movaps XMMWORD[64+rsp],xmm0
2988 movaps xmm11,XMMWORD[80+rsp]
2989 movaps XMMWORD[80+rsp],xmm0
2990 movaps xmm12,XMMWORD[96+rsp]
2991 movaps XMMWORD[96+rsp],xmm0
2992 movaps xmm13,XMMWORD[112+rsp]
2993 movaps XMMWORD[112+rsp],xmm0
2994 movaps xmm14,XMMWORD[128+rsp]
2995 movaps XMMWORD[128+rsp],xmm0
2996 movaps xmm15,XMMWORD[144+rsp]
2997 movaps XMMWORD[144+rsp],xmm0
2998 lea rax,[((160+40))+rsp]
2999$L$ocb_enc_pop:
3000 mov r14,QWORD[((-40))+rax]
3001
3002 mov r13,QWORD[((-32))+rax]
3003
3004 mov r12,QWORD[((-24))+rax]
3005
3006 mov rbp,QWORD[((-16))+rax]
3007
3008 mov rbx,QWORD[((-8))+rax]
3009
3010 lea rsp,[rax]
3011
3012$L$ocb_enc_epilogue:
3013 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3014 mov rsi,QWORD[16+rsp]
3015 DB 0F3h,0C3h ;repret
3016
3017$L$SEH_end_aesni_ocb_encrypt:
3018
3019
3020ALIGN 32
3021__ocb_encrypt6:
3022 pxor xmm15,xmm9
3023 movdqu xmm11,XMMWORD[r12*1+rbx]
3024 movdqa xmm12,xmm10
3025 movdqu xmm13,XMMWORD[r13*1+rbx]
3026 movdqa xmm14,xmm10
3027 pxor xmm10,xmm15
3028 movdqu xmm15,XMMWORD[r14*1+rbx]
3029 pxor xmm11,xmm10
3030 pxor xmm8,xmm2
3031 pxor xmm2,xmm10
3032 pxor xmm12,xmm11
3033 pxor xmm8,xmm3
3034 pxor xmm3,xmm11
3035 pxor xmm13,xmm12
3036 pxor xmm8,xmm4
3037 pxor xmm4,xmm12
3038 pxor xmm14,xmm13
3039 pxor xmm8,xmm5
3040 pxor xmm5,xmm13
3041 pxor xmm15,xmm14
3042 pxor xmm8,xmm6
3043 pxor xmm6,xmm14
3044 pxor xmm8,xmm7
3045 pxor xmm7,xmm15
3046 movups xmm0,XMMWORD[32+r11]
3047
3048 lea r12,[1+r8]
3049 lea r13,[3+r8]
3050 lea r14,[5+r8]
3051 add r8,6
3052 pxor xmm10,xmm9
3053 bsf r12,r12
3054 bsf r13,r13
3055 bsf r14,r14
3056
3057DB 102,15,56,220,209
3058DB 102,15,56,220,217
3059DB 102,15,56,220,225
3060DB 102,15,56,220,233
3061 pxor xmm11,xmm9
3062 pxor xmm12,xmm9
3063DB 102,15,56,220,241
3064 pxor xmm13,xmm9
3065 pxor xmm14,xmm9
3066DB 102,15,56,220,249
3067 movups xmm1,XMMWORD[48+r11]
3068 pxor xmm15,xmm9
3069
3070DB 102,15,56,220,208
3071DB 102,15,56,220,216
3072DB 102,15,56,220,224
3073DB 102,15,56,220,232
3074DB 102,15,56,220,240
3075DB 102,15,56,220,248
3076 movups xmm0,XMMWORD[64+r11]
3077 shl r12,4
3078 shl r13,4
3079 jmp NEAR $L$ocb_enc_loop6
3080
3081ALIGN 32
3082$L$ocb_enc_loop6:
3083DB 102,15,56,220,209
3084DB 102,15,56,220,217
3085DB 102,15,56,220,225
3086DB 102,15,56,220,233
3087DB 102,15,56,220,241
3088DB 102,15,56,220,249
3089 movups xmm1,XMMWORD[rax*1+rcx]
3090 add rax,32
3091
3092DB 102,15,56,220,208
3093DB 102,15,56,220,216
3094DB 102,15,56,220,224
3095DB 102,15,56,220,232
3096DB 102,15,56,220,240
3097DB 102,15,56,220,248
3098 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3099 jnz NEAR $L$ocb_enc_loop6
3100
3101DB 102,15,56,220,209
3102DB 102,15,56,220,217
3103DB 102,15,56,220,225
3104DB 102,15,56,220,233
3105DB 102,15,56,220,241
3106DB 102,15,56,220,249
3107 movups xmm1,XMMWORD[16+r11]
3108 shl r14,4
3109
3110DB 102,65,15,56,221,210
3111 movdqu xmm10,XMMWORD[rbx]
3112 mov rax,r10
3113DB 102,65,15,56,221,219
3114DB 102,65,15,56,221,228
3115DB 102,65,15,56,221,237
3116DB 102,65,15,56,221,246
3117DB 102,65,15,56,221,255
3118 DB 0F3h,0C3h ;repret
3119
3120
3121
3122ALIGN 32
3123__ocb_encrypt4:
3124 pxor xmm15,xmm9
3125 movdqu xmm11,XMMWORD[r12*1+rbx]
3126 movdqa xmm12,xmm10
3127 movdqu xmm13,XMMWORD[r13*1+rbx]
3128 pxor xmm10,xmm15
3129 pxor xmm11,xmm10
3130 pxor xmm8,xmm2
3131 pxor xmm2,xmm10
3132 pxor xmm12,xmm11
3133 pxor xmm8,xmm3
3134 pxor xmm3,xmm11
3135 pxor xmm13,xmm12
3136 pxor xmm8,xmm4
3137 pxor xmm4,xmm12
3138 pxor xmm8,xmm5
3139 pxor xmm5,xmm13
3140 movups xmm0,XMMWORD[32+r11]
3141
3142 pxor xmm10,xmm9
3143 pxor xmm11,xmm9
3144 pxor xmm12,xmm9
3145 pxor xmm13,xmm9
3146
3147DB 102,15,56,220,209
3148DB 102,15,56,220,217
3149DB 102,15,56,220,225
3150DB 102,15,56,220,233
3151 movups xmm1,XMMWORD[48+r11]
3152
3153DB 102,15,56,220,208
3154DB 102,15,56,220,216
3155DB 102,15,56,220,224
3156DB 102,15,56,220,232
3157 movups xmm0,XMMWORD[64+r11]
3158 jmp NEAR $L$ocb_enc_loop4
3159
3160ALIGN 32
3161$L$ocb_enc_loop4:
3162DB 102,15,56,220,209
3163DB 102,15,56,220,217
3164DB 102,15,56,220,225
3165DB 102,15,56,220,233
3166 movups xmm1,XMMWORD[rax*1+rcx]
3167 add rax,32
3168
3169DB 102,15,56,220,208
3170DB 102,15,56,220,216
3171DB 102,15,56,220,224
3172DB 102,15,56,220,232
3173 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3174 jnz NEAR $L$ocb_enc_loop4
3175
3176DB 102,15,56,220,209
3177DB 102,15,56,220,217
3178DB 102,15,56,220,225
3179DB 102,15,56,220,233
3180 movups xmm1,XMMWORD[16+r11]
3181 mov rax,r10
3182
3183DB 102,65,15,56,221,210
3184DB 102,65,15,56,221,219
3185DB 102,65,15,56,221,228
3186DB 102,65,15,56,221,237
3187 DB 0F3h,0C3h ;repret
3188
3189
3190
3191ALIGN 32
3192__ocb_encrypt1:
3193 pxor xmm7,xmm15
3194 pxor xmm7,xmm9
3195 pxor xmm8,xmm2
3196 pxor xmm2,xmm7
3197 movups xmm0,XMMWORD[32+r11]
3198
3199DB 102,15,56,220,209
3200 movups xmm1,XMMWORD[48+r11]
3201 pxor xmm7,xmm9
3202
3203DB 102,15,56,220,208
3204 movups xmm0,XMMWORD[64+r11]
3205 jmp NEAR $L$ocb_enc_loop1
3206
3207ALIGN 32
3208$L$ocb_enc_loop1:
3209DB 102,15,56,220,209
3210 movups xmm1,XMMWORD[rax*1+rcx]
3211 add rax,32
3212
3213DB 102,15,56,220,208
3214 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3215 jnz NEAR $L$ocb_enc_loop1
3216
3217DB 102,15,56,220,209
3218 movups xmm1,XMMWORD[16+r11]
3219 mov rax,r10
3220
3221DB 102,15,56,221,215
3222 DB 0F3h,0C3h ;repret
3223
3224
3225global aesni_ocb_decrypt
3226
3227ALIGN 32
3228aesni_ocb_decrypt:
3229 mov QWORD[8+rsp],rdi ;WIN64 prologue
3230 mov QWORD[16+rsp],rsi
3231 mov rax,rsp
3232$L$SEH_begin_aesni_ocb_decrypt:
3233 mov rdi,rcx
3234 mov rsi,rdx
3235 mov rdx,r8
3236 mov rcx,r9
3237 mov r8,QWORD[40+rsp]
3238 mov r9,QWORD[48+rsp]
3239
3240
3241
3242 lea rax,[rsp]
3243 push rbx
3244
3245 push rbp
3246
3247 push r12
3248
3249 push r13
3250
3251 push r14
3252
3253 lea rsp,[((-160))+rsp]
3254 movaps XMMWORD[rsp],xmm6
3255 movaps XMMWORD[16+rsp],xmm7
3256 movaps XMMWORD[32+rsp],xmm8
3257 movaps XMMWORD[48+rsp],xmm9
3258 movaps XMMWORD[64+rsp],xmm10
3259 movaps XMMWORD[80+rsp],xmm11
3260 movaps XMMWORD[96+rsp],xmm12
3261 movaps XMMWORD[112+rsp],xmm13
3262 movaps XMMWORD[128+rsp],xmm14
3263 movaps XMMWORD[144+rsp],xmm15
3264$L$ocb_dec_body:
3265 mov rbx,QWORD[56+rax]
3266 mov rbp,QWORD[((56+8))+rax]
3267
3268 mov r10d,DWORD[240+rcx]
3269 mov r11,rcx
3270 shl r10d,4
3271 movups xmm9,XMMWORD[rcx]
3272 movups xmm1,XMMWORD[16+r10*1+rcx]
3273
3274 movdqu xmm15,XMMWORD[r9]
3275 pxor xmm9,xmm1
3276 pxor xmm15,xmm1
3277
3278 mov eax,16+32
3279 lea rcx,[32+r10*1+r11]
3280 movups xmm1,XMMWORD[16+r11]
3281 sub rax,r10
3282 mov r10,rax
3283
3284 movdqu xmm10,XMMWORD[rbx]
3285 movdqu xmm8,XMMWORD[rbp]
3286
3287 test r8,1
3288 jnz NEAR $L$ocb_dec_odd
3289
3290 bsf r12,r8
3291 add r8,1
3292 shl r12,4
3293 movdqu xmm7,XMMWORD[r12*1+rbx]
3294 movdqu xmm2,XMMWORD[rdi]
3295 lea rdi,[16+rdi]
3296
3297 call __ocb_decrypt1
3298
3299 movdqa xmm15,xmm7
3300 movups XMMWORD[rsi],xmm2
3301 xorps xmm8,xmm2
3302 lea rsi,[16+rsi]
3303 sub rdx,1
3304 jz NEAR $L$ocb_dec_done
3305
3306$L$ocb_dec_odd:
3307 lea r12,[1+r8]
3308 lea r13,[3+r8]
3309 lea r14,[5+r8]
3310 lea r8,[6+r8]
3311 bsf r12,r12
3312 bsf r13,r13
3313 bsf r14,r14
3314 shl r12,4
3315 shl r13,4
3316 shl r14,4
3317
3318 sub rdx,6
3319 jc NEAR $L$ocb_dec_short
3320 jmp NEAR $L$ocb_dec_grandloop
3321
3322ALIGN 32
3323$L$ocb_dec_grandloop:
3324 movdqu xmm2,XMMWORD[rdi]
3325 movdqu xmm3,XMMWORD[16+rdi]
3326 movdqu xmm4,XMMWORD[32+rdi]
3327 movdqu xmm5,XMMWORD[48+rdi]
3328 movdqu xmm6,XMMWORD[64+rdi]
3329 movdqu xmm7,XMMWORD[80+rdi]
3330 lea rdi,[96+rdi]
3331
3332 call __ocb_decrypt6
3333
3334 movups XMMWORD[rsi],xmm2
3335 pxor xmm8,xmm2
3336 movups XMMWORD[16+rsi],xmm3
3337 pxor xmm8,xmm3
3338 movups XMMWORD[32+rsi],xmm4
3339 pxor xmm8,xmm4
3340 movups XMMWORD[48+rsi],xmm5
3341 pxor xmm8,xmm5
3342 movups XMMWORD[64+rsi],xmm6
3343 pxor xmm8,xmm6
3344 movups XMMWORD[80+rsi],xmm7
3345 pxor xmm8,xmm7
3346 lea rsi,[96+rsi]
3347 sub rdx,6
3348 jnc NEAR $L$ocb_dec_grandloop
3349
3350$L$ocb_dec_short:
3351 add rdx,6
3352 jz NEAR $L$ocb_dec_done
3353
3354 movdqu xmm2,XMMWORD[rdi]
3355 cmp rdx,2
3356 jb NEAR $L$ocb_dec_one
3357 movdqu xmm3,XMMWORD[16+rdi]
3358 je NEAR $L$ocb_dec_two
3359
3360 movdqu xmm4,XMMWORD[32+rdi]
3361 cmp rdx,4
3362 jb NEAR $L$ocb_dec_three
3363 movdqu xmm5,XMMWORD[48+rdi]
3364 je NEAR $L$ocb_dec_four
3365
3366 movdqu xmm6,XMMWORD[64+rdi]
3367 pxor xmm7,xmm7
3368
3369 call __ocb_decrypt6
3370
3371 movdqa xmm15,xmm14
3372 movups XMMWORD[rsi],xmm2
3373 pxor xmm8,xmm2
3374 movups XMMWORD[16+rsi],xmm3
3375 pxor xmm8,xmm3
3376 movups XMMWORD[32+rsi],xmm4
3377 pxor xmm8,xmm4
3378 movups XMMWORD[48+rsi],xmm5
3379 pxor xmm8,xmm5
3380 movups XMMWORD[64+rsi],xmm6
3381 pxor xmm8,xmm6
3382
3383 jmp NEAR $L$ocb_dec_done
3384
3385ALIGN 16
3386$L$ocb_dec_one:
3387 movdqa xmm7,xmm10
3388
3389 call __ocb_decrypt1
3390
3391 movdqa xmm15,xmm7
3392 movups XMMWORD[rsi],xmm2
3393 xorps xmm8,xmm2
3394 jmp NEAR $L$ocb_dec_done
3395
3396ALIGN 16
3397$L$ocb_dec_two:
3398 pxor xmm4,xmm4
3399 pxor xmm5,xmm5
3400
3401 call __ocb_decrypt4
3402
3403 movdqa xmm15,xmm11
3404 movups XMMWORD[rsi],xmm2
3405 xorps xmm8,xmm2
3406 movups XMMWORD[16+rsi],xmm3
3407 xorps xmm8,xmm3
3408
3409 jmp NEAR $L$ocb_dec_done
3410
3411ALIGN 16
3412$L$ocb_dec_three:
3413 pxor xmm5,xmm5
3414
3415 call __ocb_decrypt4
3416
3417 movdqa xmm15,xmm12
3418 movups XMMWORD[rsi],xmm2
3419 xorps xmm8,xmm2
3420 movups XMMWORD[16+rsi],xmm3
3421 xorps xmm8,xmm3
3422 movups XMMWORD[32+rsi],xmm4
3423 xorps xmm8,xmm4
3424
3425 jmp NEAR $L$ocb_dec_done
3426
3427ALIGN 16
3428$L$ocb_dec_four:
3429 call __ocb_decrypt4
3430
3431 movdqa xmm15,xmm13
3432 movups XMMWORD[rsi],xmm2
3433 pxor xmm8,xmm2
3434 movups XMMWORD[16+rsi],xmm3
3435 pxor xmm8,xmm3
3436 movups XMMWORD[32+rsi],xmm4
3437 pxor xmm8,xmm4
3438 movups XMMWORD[48+rsi],xmm5
3439 pxor xmm8,xmm5
3440
3441$L$ocb_dec_done:
3442 pxor xmm15,xmm0
3443 movdqu XMMWORD[rbp],xmm8
3444 movdqu XMMWORD[r9],xmm15
3445
3446 xorps xmm0,xmm0
3447 pxor xmm1,xmm1
3448 pxor xmm2,xmm2
3449 pxor xmm3,xmm3
3450 pxor xmm4,xmm4
3451 pxor xmm5,xmm5
3452 movaps xmm6,XMMWORD[rsp]
3453 movaps XMMWORD[rsp],xmm0
3454 movaps xmm7,XMMWORD[16+rsp]
3455 movaps XMMWORD[16+rsp],xmm0
3456 movaps xmm8,XMMWORD[32+rsp]
3457 movaps XMMWORD[32+rsp],xmm0
3458 movaps xmm9,XMMWORD[48+rsp]
3459 movaps XMMWORD[48+rsp],xmm0
3460 movaps xmm10,XMMWORD[64+rsp]
3461 movaps XMMWORD[64+rsp],xmm0
3462 movaps xmm11,XMMWORD[80+rsp]
3463 movaps XMMWORD[80+rsp],xmm0
3464 movaps xmm12,XMMWORD[96+rsp]
3465 movaps XMMWORD[96+rsp],xmm0
3466 movaps xmm13,XMMWORD[112+rsp]
3467 movaps XMMWORD[112+rsp],xmm0
3468 movaps xmm14,XMMWORD[128+rsp]
3469 movaps XMMWORD[128+rsp],xmm0
3470 movaps xmm15,XMMWORD[144+rsp]
3471 movaps XMMWORD[144+rsp],xmm0
3472 lea rax,[((160+40))+rsp]
3473$L$ocb_dec_pop:
3474 mov r14,QWORD[((-40))+rax]
3475
3476 mov r13,QWORD[((-32))+rax]
3477
3478 mov r12,QWORD[((-24))+rax]
3479
3480 mov rbp,QWORD[((-16))+rax]
3481
3482 mov rbx,QWORD[((-8))+rax]
3483
3484 lea rsp,[rax]
3485
3486$L$ocb_dec_epilogue:
3487 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3488 mov rsi,QWORD[16+rsp]
3489 DB 0F3h,0C3h ;repret
3490
3491$L$SEH_end_aesni_ocb_decrypt:
3492
3493
3494ALIGN 32
3495__ocb_decrypt6:
3496 pxor xmm15,xmm9
3497 movdqu xmm11,XMMWORD[r12*1+rbx]
3498 movdqa xmm12,xmm10
3499 movdqu xmm13,XMMWORD[r13*1+rbx]
3500 movdqa xmm14,xmm10
3501 pxor xmm10,xmm15
3502 movdqu xmm15,XMMWORD[r14*1+rbx]
3503 pxor xmm11,xmm10
3504 pxor xmm2,xmm10
3505 pxor xmm12,xmm11
3506 pxor xmm3,xmm11
3507 pxor xmm13,xmm12
3508 pxor xmm4,xmm12
3509 pxor xmm14,xmm13
3510 pxor xmm5,xmm13
3511 pxor xmm15,xmm14
3512 pxor xmm6,xmm14
3513 pxor xmm7,xmm15
3514 movups xmm0,XMMWORD[32+r11]
3515
3516 lea r12,[1+r8]
3517 lea r13,[3+r8]
3518 lea r14,[5+r8]
3519 add r8,6
3520 pxor xmm10,xmm9
3521 bsf r12,r12
3522 bsf r13,r13
3523 bsf r14,r14
3524
3525DB 102,15,56,222,209
3526DB 102,15,56,222,217
3527DB 102,15,56,222,225
3528DB 102,15,56,222,233
3529 pxor xmm11,xmm9
3530 pxor xmm12,xmm9
3531DB 102,15,56,222,241
3532 pxor xmm13,xmm9
3533 pxor xmm14,xmm9
3534DB 102,15,56,222,249
3535 movups xmm1,XMMWORD[48+r11]
3536 pxor xmm15,xmm9
3537
3538DB 102,15,56,222,208
3539DB 102,15,56,222,216
3540DB 102,15,56,222,224
3541DB 102,15,56,222,232
3542DB 102,15,56,222,240
3543DB 102,15,56,222,248
3544 movups xmm0,XMMWORD[64+r11]
3545 shl r12,4
3546 shl r13,4
3547 jmp NEAR $L$ocb_dec_loop6
3548
3549ALIGN 32
3550$L$ocb_dec_loop6:
3551DB 102,15,56,222,209
3552DB 102,15,56,222,217
3553DB 102,15,56,222,225
3554DB 102,15,56,222,233
3555DB 102,15,56,222,241
3556DB 102,15,56,222,249
3557 movups xmm1,XMMWORD[rax*1+rcx]
3558 add rax,32
3559
3560DB 102,15,56,222,208
3561DB 102,15,56,222,216
3562DB 102,15,56,222,224
3563DB 102,15,56,222,232
3564DB 102,15,56,222,240
3565DB 102,15,56,222,248
3566 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3567 jnz NEAR $L$ocb_dec_loop6
3568
3569DB 102,15,56,222,209
3570DB 102,15,56,222,217
3571DB 102,15,56,222,225
3572DB 102,15,56,222,233
3573DB 102,15,56,222,241
3574DB 102,15,56,222,249
3575 movups xmm1,XMMWORD[16+r11]
3576 shl r14,4
3577
3578DB 102,65,15,56,223,210
3579 movdqu xmm10,XMMWORD[rbx]
3580 mov rax,r10
3581DB 102,65,15,56,223,219
3582DB 102,65,15,56,223,228
3583DB 102,65,15,56,223,237
3584DB 102,65,15,56,223,246
3585DB 102,65,15,56,223,255
3586 DB 0F3h,0C3h ;repret
3587
3588
3589
3590ALIGN 32
3591__ocb_decrypt4:
3592 pxor xmm15,xmm9
3593 movdqu xmm11,XMMWORD[r12*1+rbx]
3594 movdqa xmm12,xmm10
3595 movdqu xmm13,XMMWORD[r13*1+rbx]
3596 pxor xmm10,xmm15
3597 pxor xmm11,xmm10
3598 pxor xmm2,xmm10
3599 pxor xmm12,xmm11
3600 pxor xmm3,xmm11
3601 pxor xmm13,xmm12
3602 pxor xmm4,xmm12
3603 pxor xmm5,xmm13
3604 movups xmm0,XMMWORD[32+r11]
3605
3606 pxor xmm10,xmm9
3607 pxor xmm11,xmm9
3608 pxor xmm12,xmm9
3609 pxor xmm13,xmm9
3610
3611DB 102,15,56,222,209
3612DB 102,15,56,222,217
3613DB 102,15,56,222,225
3614DB 102,15,56,222,233
3615 movups xmm1,XMMWORD[48+r11]
3616
3617DB 102,15,56,222,208
3618DB 102,15,56,222,216
3619DB 102,15,56,222,224
3620DB 102,15,56,222,232
3621 movups xmm0,XMMWORD[64+r11]
3622 jmp NEAR $L$ocb_dec_loop4
3623
3624ALIGN 32
3625$L$ocb_dec_loop4:
3626DB 102,15,56,222,209
3627DB 102,15,56,222,217
3628DB 102,15,56,222,225
3629DB 102,15,56,222,233
3630 movups xmm1,XMMWORD[rax*1+rcx]
3631 add rax,32
3632
3633DB 102,15,56,222,208
3634DB 102,15,56,222,216
3635DB 102,15,56,222,224
3636DB 102,15,56,222,232
3637 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3638 jnz NEAR $L$ocb_dec_loop4
3639
3640DB 102,15,56,222,209
3641DB 102,15,56,222,217
3642DB 102,15,56,222,225
3643DB 102,15,56,222,233
3644 movups xmm1,XMMWORD[16+r11]
3645 mov rax,r10
3646
3647DB 102,65,15,56,223,210
3648DB 102,65,15,56,223,219
3649DB 102,65,15,56,223,228
3650DB 102,65,15,56,223,237
3651 DB 0F3h,0C3h ;repret
3652
3653
3654
3655ALIGN 32
3656__ocb_decrypt1:
3657 pxor xmm7,xmm15
3658 pxor xmm7,xmm9
3659 pxor xmm2,xmm7
3660 movups xmm0,XMMWORD[32+r11]
3661
3662DB 102,15,56,222,209
3663 movups xmm1,XMMWORD[48+r11]
3664 pxor xmm7,xmm9
3665
3666DB 102,15,56,222,208
3667 movups xmm0,XMMWORD[64+r11]
3668 jmp NEAR $L$ocb_dec_loop1
3669
3670ALIGN 32
3671$L$ocb_dec_loop1:
3672DB 102,15,56,222,209
3673 movups xmm1,XMMWORD[rax*1+rcx]
3674 add rax,32
3675
3676DB 102,15,56,222,208
3677 movups xmm0,XMMWORD[((-16))+rax*1+rcx]
3678 jnz NEAR $L$ocb_dec_loop1
3679
3680DB 102,15,56,222,209
3681 movups xmm1,XMMWORD[16+r11]
3682 mov rax,r10
3683
3684DB 102,15,56,223,215
3685 DB 0F3h,0C3h ;repret
3686
3687global aesni_cbc_encrypt
3688
3689ALIGN 16
3690aesni_cbc_encrypt:
3691 mov QWORD[8+rsp],rdi ;WIN64 prologue
3692 mov QWORD[16+rsp],rsi
3693 mov rax,rsp
3694$L$SEH_begin_aesni_cbc_encrypt:
3695 mov rdi,rcx
3696 mov rsi,rdx
3697 mov rdx,r8
3698 mov rcx,r9
3699 mov r8,QWORD[40+rsp]
3700 mov r9,QWORD[48+rsp]
3701
3702
3703
3704 test rdx,rdx
3705 jz NEAR $L$cbc_ret
3706
3707 mov r10d,DWORD[240+rcx]
3708 mov r11,rcx
3709 test r9d,r9d
3710 jz NEAR $L$cbc_decrypt
3711
3712 movups xmm2,XMMWORD[r8]
3713 mov eax,r10d
3714 cmp rdx,16
3715 jb NEAR $L$cbc_enc_tail
3716 sub rdx,16
3717 jmp NEAR $L$cbc_enc_loop
3718ALIGN 16
3719$L$cbc_enc_loop:
3720 movups xmm3,XMMWORD[rdi]
3721 lea rdi,[16+rdi]
3722
3723 movups xmm0,XMMWORD[rcx]
3724 movups xmm1,XMMWORD[16+rcx]
3725 xorps xmm3,xmm0
3726 lea rcx,[32+rcx]
3727 xorps xmm2,xmm3
3728$L$oop_enc1_15:
3729DB 102,15,56,220,209
3730 dec eax
3731 movups xmm1,XMMWORD[rcx]
3732 lea rcx,[16+rcx]
3733 jnz NEAR $L$oop_enc1_15
3734DB 102,15,56,221,209
3735 mov eax,r10d
3736 mov rcx,r11
3737 movups XMMWORD[rsi],xmm2
3738 lea rsi,[16+rsi]
3739 sub rdx,16
3740 jnc NEAR $L$cbc_enc_loop
3741 add rdx,16
3742 jnz NEAR $L$cbc_enc_tail
3743 pxor xmm0,xmm0
3744 pxor xmm1,xmm1
3745 movups XMMWORD[r8],xmm2
3746 pxor xmm2,xmm2
3747 pxor xmm3,xmm3
3748 jmp NEAR $L$cbc_ret
3749
3750$L$cbc_enc_tail:
3751 mov rcx,rdx
3752 xchg rsi,rdi
3753 DD 0x9066A4F3
3754 mov ecx,16
3755 sub rcx,rdx
3756 xor eax,eax
3757 DD 0x9066AAF3
3758 lea rdi,[((-16))+rdi]
3759 mov eax,r10d
3760 mov rsi,rdi
3761 mov rcx,r11
3762 xor rdx,rdx
3763 jmp NEAR $L$cbc_enc_loop
3764
3765ALIGN 16
3766$L$cbc_decrypt:
3767 cmp rdx,16
3768 jne NEAR $L$cbc_decrypt_bulk
3769
3770
3771
3772 movdqu xmm2,XMMWORD[rdi]
3773 movdqu xmm3,XMMWORD[r8]
3774 movdqa xmm4,xmm2
3775 movups xmm0,XMMWORD[rcx]
3776 movups xmm1,XMMWORD[16+rcx]
3777 lea rcx,[32+rcx]
3778 xorps xmm2,xmm0
3779$L$oop_dec1_16:
3780DB 102,15,56,222,209
3781 dec r10d
3782 movups xmm1,XMMWORD[rcx]
3783 lea rcx,[16+rcx]
3784 jnz NEAR $L$oop_dec1_16
3785DB 102,15,56,223,209
3786 pxor xmm0,xmm0
3787 pxor xmm1,xmm1
3788 movdqu XMMWORD[r8],xmm4
3789 xorps xmm2,xmm3
3790 pxor xmm3,xmm3
3791 movups XMMWORD[rsi],xmm2
3792 pxor xmm2,xmm2
3793 jmp NEAR $L$cbc_ret
3794ALIGN 16
3795$L$cbc_decrypt_bulk:
3796 lea r11,[rsp]
3797
3798 push rbp
3799
3800 sub rsp,176
3801 and rsp,-16
3802 movaps XMMWORD[16+rsp],xmm6
3803 movaps XMMWORD[32+rsp],xmm7
3804 movaps XMMWORD[48+rsp],xmm8
3805 movaps XMMWORD[64+rsp],xmm9
3806 movaps XMMWORD[80+rsp],xmm10
3807 movaps XMMWORD[96+rsp],xmm11
3808 movaps XMMWORD[112+rsp],xmm12
3809 movaps XMMWORD[128+rsp],xmm13
3810 movaps XMMWORD[144+rsp],xmm14
3811 movaps XMMWORD[160+rsp],xmm15
3812$L$cbc_decrypt_body:
3813 mov rbp,rcx
3814 movups xmm10,XMMWORD[r8]
3815 mov eax,r10d
3816 cmp rdx,0x50
3817 jbe NEAR $L$cbc_dec_tail
3818
3819 movups xmm0,XMMWORD[rcx]
3820 movdqu xmm2,XMMWORD[rdi]
3821 movdqu xmm3,XMMWORD[16+rdi]
3822 movdqa xmm11,xmm2
3823 movdqu xmm4,XMMWORD[32+rdi]
3824 movdqa xmm12,xmm3
3825 movdqu xmm5,XMMWORD[48+rdi]
3826 movdqa xmm13,xmm4
3827 movdqu xmm6,XMMWORD[64+rdi]
3828 movdqa xmm14,xmm5
3829 movdqu xmm7,XMMWORD[80+rdi]
3830 movdqa xmm15,xmm6
3831 mov r9d,DWORD[((OPENSSL_ia32cap_P+4))]
3832 cmp rdx,0x70
3833 jbe NEAR $L$cbc_dec_six_or_seven
3834
3835 and r9d,71303168
3836 sub rdx,0x50
3837 cmp r9d,4194304
3838 je NEAR $L$cbc_dec_loop6_enter
3839 sub rdx,0x20
3840 lea rcx,[112+rcx]
3841 jmp NEAR $L$cbc_dec_loop8_enter
3842ALIGN 16
3843$L$cbc_dec_loop8:
3844 movups XMMWORD[rsi],xmm9
3845 lea rsi,[16+rsi]
3846$L$cbc_dec_loop8_enter:
3847 movdqu xmm8,XMMWORD[96+rdi]
3848 pxor xmm2,xmm0
3849 movdqu xmm9,XMMWORD[112+rdi]
3850 pxor xmm3,xmm0
3851 movups xmm1,XMMWORD[((16-112))+rcx]
3852 pxor xmm4,xmm0
3853 mov rbp,-1
3854 cmp rdx,0x70
3855 pxor xmm5,xmm0
3856 pxor xmm6,xmm0
3857 pxor xmm7,xmm0
3858 pxor xmm8,xmm0
3859
3860DB 102,15,56,222,209
3861 pxor xmm9,xmm0
3862 movups xmm0,XMMWORD[((32-112))+rcx]
3863DB 102,15,56,222,217
3864DB 102,15,56,222,225
3865DB 102,15,56,222,233
3866DB 102,15,56,222,241
3867DB 102,15,56,222,249
3868DB 102,68,15,56,222,193
3869 adc rbp,0
3870 and rbp,128
3871DB 102,68,15,56,222,201
3872 add rbp,rdi
3873 movups xmm1,XMMWORD[((48-112))+rcx]
3874DB 102,15,56,222,208
3875DB 102,15,56,222,216
3876DB 102,15,56,222,224
3877DB 102,15,56,222,232
3878DB 102,15,56,222,240
3879DB 102,15,56,222,248
3880DB 102,68,15,56,222,192
3881DB 102,68,15,56,222,200
3882 movups xmm0,XMMWORD[((64-112))+rcx]
3883 nop
3884DB 102,15,56,222,209
3885DB 102,15,56,222,217
3886DB 102,15,56,222,225
3887DB 102,15,56,222,233
3888DB 102,15,56,222,241
3889DB 102,15,56,222,249
3890DB 102,68,15,56,222,193
3891DB 102,68,15,56,222,201
3892 movups xmm1,XMMWORD[((80-112))+rcx]
3893 nop
3894DB 102,15,56,222,208
3895DB 102,15,56,222,216
3896DB 102,15,56,222,224
3897DB 102,15,56,222,232
3898DB 102,15,56,222,240
3899DB 102,15,56,222,248
3900DB 102,68,15,56,222,192
3901DB 102,68,15,56,222,200
3902 movups xmm0,XMMWORD[((96-112))+rcx]
3903 nop
3904DB 102,15,56,222,209
3905DB 102,15,56,222,217
3906DB 102,15,56,222,225
3907DB 102,15,56,222,233
3908DB 102,15,56,222,241
3909DB 102,15,56,222,249
3910DB 102,68,15,56,222,193
3911DB 102,68,15,56,222,201
3912 movups xmm1,XMMWORD[((112-112))+rcx]
3913 nop
3914DB 102,15,56,222,208
3915DB 102,15,56,222,216
3916DB 102,15,56,222,224
3917DB 102,15,56,222,232
3918DB 102,15,56,222,240
3919DB 102,15,56,222,248
3920DB 102,68,15,56,222,192
3921DB 102,68,15,56,222,200
3922 movups xmm0,XMMWORD[((128-112))+rcx]
3923 nop
3924DB 102,15,56,222,209
3925DB 102,15,56,222,217
3926DB 102,15,56,222,225
3927DB 102,15,56,222,233
3928DB 102,15,56,222,241
3929DB 102,15,56,222,249
3930DB 102,68,15,56,222,193
3931DB 102,68,15,56,222,201
3932 movups xmm1,XMMWORD[((144-112))+rcx]
3933 cmp eax,11
3934DB 102,15,56,222,208
3935DB 102,15,56,222,216
3936DB 102,15,56,222,224
3937DB 102,15,56,222,232
3938DB 102,15,56,222,240
3939DB 102,15,56,222,248
3940DB 102,68,15,56,222,192
3941DB 102,68,15,56,222,200
3942 movups xmm0,XMMWORD[((160-112))+rcx]
3943 jb NEAR $L$cbc_dec_done
3944DB 102,15,56,222,209
3945DB 102,15,56,222,217
3946DB 102,15,56,222,225
3947DB 102,15,56,222,233
3948DB 102,15,56,222,241
3949DB 102,15,56,222,249
3950DB 102,68,15,56,222,193
3951DB 102,68,15,56,222,201
3952 movups xmm1,XMMWORD[((176-112))+rcx]
3953 nop
3954DB 102,15,56,222,208
3955DB 102,15,56,222,216
3956DB 102,15,56,222,224
3957DB 102,15,56,222,232
3958DB 102,15,56,222,240
3959DB 102,15,56,222,248
3960DB 102,68,15,56,222,192
3961DB 102,68,15,56,222,200
3962 movups xmm0,XMMWORD[((192-112))+rcx]
3963 je NEAR $L$cbc_dec_done
3964DB 102,15,56,222,209
3965DB 102,15,56,222,217
3966DB 102,15,56,222,225
3967DB 102,15,56,222,233
3968DB 102,15,56,222,241
3969DB 102,15,56,222,249
3970DB 102,68,15,56,222,193
3971DB 102,68,15,56,222,201
3972 movups xmm1,XMMWORD[((208-112))+rcx]
3973 nop
3974DB 102,15,56,222,208
3975DB 102,15,56,222,216
3976DB 102,15,56,222,224
3977DB 102,15,56,222,232
3978DB 102,15,56,222,240
3979DB 102,15,56,222,248
3980DB 102,68,15,56,222,192
3981DB 102,68,15,56,222,200
3982 movups xmm0,XMMWORD[((224-112))+rcx]
3983 jmp NEAR $L$cbc_dec_done
3984ALIGN 16
3985$L$cbc_dec_done:
3986DB 102,15,56,222,209
3987DB 102,15,56,222,217
3988 pxor xmm10,xmm0
3989 pxor xmm11,xmm0
3990DB 102,15,56,222,225
3991DB 102,15,56,222,233
3992 pxor xmm12,xmm0
3993 pxor xmm13,xmm0
3994DB 102,15,56,222,241
3995DB 102,15,56,222,249
3996 pxor xmm14,xmm0
3997 pxor xmm15,xmm0
3998DB 102,68,15,56,222,193
3999DB 102,68,15,56,222,201
4000 movdqu xmm1,XMMWORD[80+rdi]
4001
4002DB 102,65,15,56,223,210
4003 movdqu xmm10,XMMWORD[96+rdi]
4004 pxor xmm1,xmm0
4005DB 102,65,15,56,223,219
4006 pxor xmm10,xmm0
4007 movdqu xmm0,XMMWORD[112+rdi]
4008DB 102,65,15,56,223,228
4009 lea rdi,[128+rdi]
4010 movdqu xmm11,XMMWORD[rbp]
4011DB 102,65,15,56,223,237
4012DB 102,65,15,56,223,246
4013 movdqu xmm12,XMMWORD[16+rbp]
4014 movdqu xmm13,XMMWORD[32+rbp]
4015DB 102,65,15,56,223,255
4016DB 102,68,15,56,223,193
4017 movdqu xmm14,XMMWORD[48+rbp]
4018 movdqu xmm15,XMMWORD[64+rbp]
4019DB 102,69,15,56,223,202
4020 movdqa xmm10,xmm0
4021 movdqu xmm1,XMMWORD[80+rbp]
4022 movups xmm0,XMMWORD[((-112))+rcx]
4023
4024 movups XMMWORD[rsi],xmm2
4025 movdqa xmm2,xmm11
4026 movups XMMWORD[16+rsi],xmm3
4027 movdqa xmm3,xmm12
4028 movups XMMWORD[32+rsi],xmm4
4029 movdqa xmm4,xmm13
4030 movups XMMWORD[48+rsi],xmm5
4031 movdqa xmm5,xmm14
4032 movups XMMWORD[64+rsi],xmm6
4033 movdqa xmm6,xmm15
4034 movups XMMWORD[80+rsi],xmm7
4035 movdqa xmm7,xmm1
4036 movups XMMWORD[96+rsi],xmm8
4037 lea rsi,[112+rsi]
4038
4039 sub rdx,0x80
4040 ja NEAR $L$cbc_dec_loop8
4041
4042 movaps xmm2,xmm9
4043 lea rcx,[((-112))+rcx]
4044 add rdx,0x70
4045 jle NEAR $L$cbc_dec_clear_tail_collected
4046 movups XMMWORD[rsi],xmm9
4047 lea rsi,[16+rsi]
4048 cmp rdx,0x50
4049 jbe NEAR $L$cbc_dec_tail
4050
4051 movaps xmm2,xmm11
4052$L$cbc_dec_six_or_seven:
4053 cmp rdx,0x60
4054 ja NEAR $L$cbc_dec_seven
4055
4056 movaps xmm8,xmm7
4057 call _aesni_decrypt6
4058 pxor xmm2,xmm10
4059 movaps xmm10,xmm8
4060 pxor xmm3,xmm11
4061 movdqu XMMWORD[rsi],xmm2
4062 pxor xmm4,xmm12
4063 movdqu XMMWORD[16+rsi],xmm3
4064 pxor xmm3,xmm3
4065 pxor xmm5,xmm13
4066 movdqu XMMWORD[32+rsi],xmm4
4067 pxor xmm4,xmm4
4068 pxor xmm6,xmm14
4069 movdqu XMMWORD[48+rsi],xmm5
4070 pxor xmm5,xmm5
4071 pxor xmm7,xmm15
4072 movdqu XMMWORD[64+rsi],xmm6
4073 pxor xmm6,xmm6
4074 lea rsi,[80+rsi]
4075 movdqa xmm2,xmm7
4076 pxor xmm7,xmm7
4077 jmp NEAR $L$cbc_dec_tail_collected
4078
4079ALIGN 16
4080$L$cbc_dec_seven:
4081 movups xmm8,XMMWORD[96+rdi]
4082 xorps xmm9,xmm9
4083 call _aesni_decrypt8
4084 movups xmm9,XMMWORD[80+rdi]
4085 pxor xmm2,xmm10
4086 movups xmm10,XMMWORD[96+rdi]
4087 pxor xmm3,xmm11
4088 movdqu XMMWORD[rsi],xmm2
4089 pxor xmm4,xmm12
4090 movdqu XMMWORD[16+rsi],xmm3
4091 pxor xmm3,xmm3
4092 pxor xmm5,xmm13
4093 movdqu XMMWORD[32+rsi],xmm4
4094 pxor xmm4,xmm4
4095 pxor xmm6,xmm14
4096 movdqu XMMWORD[48+rsi],xmm5
4097 pxor xmm5,xmm5
4098 pxor xmm7,xmm15
4099 movdqu XMMWORD[64+rsi],xmm6
4100 pxor xmm6,xmm6
4101 pxor xmm8,xmm9
4102 movdqu XMMWORD[80+rsi],xmm7
4103 pxor xmm7,xmm7
4104 lea rsi,[96+rsi]
4105 movdqa xmm2,xmm8
4106 pxor xmm8,xmm8
4107 pxor xmm9,xmm9
4108 jmp NEAR $L$cbc_dec_tail_collected
4109
4110ALIGN 16
4111$L$cbc_dec_loop6:
4112 movups XMMWORD[rsi],xmm7
4113 lea rsi,[16+rsi]
4114 movdqu xmm2,XMMWORD[rdi]
4115 movdqu xmm3,XMMWORD[16+rdi]
4116 movdqa xmm11,xmm2
4117 movdqu xmm4,XMMWORD[32+rdi]
4118 movdqa xmm12,xmm3
4119 movdqu xmm5,XMMWORD[48+rdi]
4120 movdqa xmm13,xmm4
4121 movdqu xmm6,XMMWORD[64+rdi]
4122 movdqa xmm14,xmm5
4123 movdqu xmm7,XMMWORD[80+rdi]
4124 movdqa xmm15,xmm6
4125$L$cbc_dec_loop6_enter:
4126 lea rdi,[96+rdi]
4127 movdqa xmm8,xmm7
4128
4129 call _aesni_decrypt6
4130
4131 pxor xmm2,xmm10
4132 movdqa xmm10,xmm8
4133 pxor xmm3,xmm11
4134 movdqu XMMWORD[rsi],xmm2
4135 pxor xmm4,xmm12
4136 movdqu XMMWORD[16+rsi],xmm3
4137 pxor xmm5,xmm13
4138 movdqu XMMWORD[32+rsi],xmm4
4139 pxor xmm6,xmm14
4140 mov rcx,rbp
4141 movdqu XMMWORD[48+rsi],xmm5
4142 pxor xmm7,xmm15
4143 mov eax,r10d
4144 movdqu XMMWORD[64+rsi],xmm6
4145 lea rsi,[80+rsi]
4146 sub rdx,0x60
4147 ja NEAR $L$cbc_dec_loop6
4148
4149 movdqa xmm2,xmm7
4150 add rdx,0x50
4151 jle NEAR $L$cbc_dec_clear_tail_collected
4152 movups XMMWORD[rsi],xmm7
4153 lea rsi,[16+rsi]
4154
4155$L$cbc_dec_tail:
4156 movups xmm2,XMMWORD[rdi]
4157 sub rdx,0x10
4158 jbe NEAR $L$cbc_dec_one
4159
4160 movups xmm3,XMMWORD[16+rdi]
4161 movaps xmm11,xmm2
4162 sub rdx,0x10
4163 jbe NEAR $L$cbc_dec_two
4164
4165 movups xmm4,XMMWORD[32+rdi]
4166 movaps xmm12,xmm3
4167 sub rdx,0x10
4168 jbe NEAR $L$cbc_dec_three
4169
4170 movups xmm5,XMMWORD[48+rdi]
4171 movaps xmm13,xmm4
4172 sub rdx,0x10
4173 jbe NEAR $L$cbc_dec_four
4174
4175 movups xmm6,XMMWORD[64+rdi]
4176 movaps xmm14,xmm5
4177 movaps xmm15,xmm6
4178 xorps xmm7,xmm7
4179 call _aesni_decrypt6
4180 pxor xmm2,xmm10
4181 movaps xmm10,xmm15
4182 pxor xmm3,xmm11
4183 movdqu XMMWORD[rsi],xmm2
4184 pxor xmm4,xmm12
4185 movdqu XMMWORD[16+rsi],xmm3
4186 pxor xmm3,xmm3
4187 pxor xmm5,xmm13
4188 movdqu XMMWORD[32+rsi],xmm4
4189 pxor xmm4,xmm4
4190 pxor xmm6,xmm14
4191 movdqu XMMWORD[48+rsi],xmm5
4192 pxor xmm5,xmm5
4193 lea rsi,[64+rsi]
4194 movdqa xmm2,xmm6
4195 pxor xmm6,xmm6
4196 pxor xmm7,xmm7
4197 sub rdx,0x10
4198 jmp NEAR $L$cbc_dec_tail_collected
4199
4200ALIGN 16
4201$L$cbc_dec_one:
4202 movaps xmm11,xmm2
4203 movups xmm0,XMMWORD[rcx]
4204 movups xmm1,XMMWORD[16+rcx]
4205 lea rcx,[32+rcx]
4206 xorps xmm2,xmm0
4207$L$oop_dec1_17:
4208DB 102,15,56,222,209
4209 dec eax
4210 movups xmm1,XMMWORD[rcx]
4211 lea rcx,[16+rcx]
4212 jnz NEAR $L$oop_dec1_17
4213DB 102,15,56,223,209
4214 xorps xmm2,xmm10
4215 movaps xmm10,xmm11
4216 jmp NEAR $L$cbc_dec_tail_collected
4217ALIGN 16
4218$L$cbc_dec_two:
4219 movaps xmm12,xmm3
4220 call _aesni_decrypt2
4221 pxor xmm2,xmm10
4222 movaps xmm10,xmm12
4223 pxor xmm3,xmm11
4224 movdqu XMMWORD[rsi],xmm2
4225 movdqa xmm2,xmm3
4226 pxor xmm3,xmm3
4227 lea rsi,[16+rsi]
4228 jmp NEAR $L$cbc_dec_tail_collected
4229ALIGN 16
4230$L$cbc_dec_three:
4231 movaps xmm13,xmm4
4232 call _aesni_decrypt3
4233 pxor xmm2,xmm10
4234 movaps xmm10,xmm13
4235 pxor xmm3,xmm11
4236 movdqu XMMWORD[rsi],xmm2
4237 pxor xmm4,xmm12
4238 movdqu XMMWORD[16+rsi],xmm3
4239 pxor xmm3,xmm3
4240 movdqa xmm2,xmm4
4241 pxor xmm4,xmm4
4242 lea rsi,[32+rsi]
4243 jmp NEAR $L$cbc_dec_tail_collected
4244ALIGN 16
4245$L$cbc_dec_four:
4246 movaps xmm14,xmm5
4247 call _aesni_decrypt4
4248 pxor xmm2,xmm10
4249 movaps xmm10,xmm14
4250 pxor xmm3,xmm11
4251 movdqu XMMWORD[rsi],xmm2
4252 pxor xmm4,xmm12
4253 movdqu XMMWORD[16+rsi],xmm3
4254 pxor xmm3,xmm3
4255 pxor xmm5,xmm13
4256 movdqu XMMWORD[32+rsi],xmm4
4257 pxor xmm4,xmm4
4258 movdqa xmm2,xmm5
4259 pxor xmm5,xmm5
4260 lea rsi,[48+rsi]
4261 jmp NEAR $L$cbc_dec_tail_collected
4262
4263ALIGN 16
4264$L$cbc_dec_clear_tail_collected:
4265 pxor xmm3,xmm3
4266 pxor xmm4,xmm4
4267 pxor xmm5,xmm5
4268$L$cbc_dec_tail_collected:
4269 movups XMMWORD[r8],xmm10
4270 and rdx,15
4271 jnz NEAR $L$cbc_dec_tail_partial
4272 movups XMMWORD[rsi],xmm2
4273 pxor xmm2,xmm2
4274 jmp NEAR $L$cbc_dec_ret
4275ALIGN 16
4276$L$cbc_dec_tail_partial:
4277 movaps XMMWORD[rsp],xmm2
4278 pxor xmm2,xmm2
4279 mov rcx,16
4280 mov rdi,rsi
4281 sub rcx,rdx
4282 lea rsi,[rsp]
4283 DD 0x9066A4F3
4284 movdqa XMMWORD[rsp],xmm2
4285
4286$L$cbc_dec_ret:
4287 xorps xmm0,xmm0
4288 pxor xmm1,xmm1
4289 movaps xmm6,XMMWORD[16+rsp]
4290 movaps XMMWORD[16+rsp],xmm0
4291 movaps xmm7,XMMWORD[32+rsp]
4292 movaps XMMWORD[32+rsp],xmm0
4293 movaps xmm8,XMMWORD[48+rsp]
4294 movaps XMMWORD[48+rsp],xmm0
4295 movaps xmm9,XMMWORD[64+rsp]
4296 movaps XMMWORD[64+rsp],xmm0
4297 movaps xmm10,XMMWORD[80+rsp]
4298 movaps XMMWORD[80+rsp],xmm0
4299 movaps xmm11,XMMWORD[96+rsp]
4300 movaps XMMWORD[96+rsp],xmm0
4301 movaps xmm12,XMMWORD[112+rsp]
4302 movaps XMMWORD[112+rsp],xmm0
4303 movaps xmm13,XMMWORD[128+rsp]
4304 movaps XMMWORD[128+rsp],xmm0
4305 movaps xmm14,XMMWORD[144+rsp]
4306 movaps XMMWORD[144+rsp],xmm0
4307 movaps xmm15,XMMWORD[160+rsp]
4308 movaps XMMWORD[160+rsp],xmm0
4309 mov rbp,QWORD[((-8))+r11]
4310
4311 lea rsp,[r11]
4312
4313$L$cbc_ret:
4314 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4315 mov rsi,QWORD[16+rsp]
4316 DB 0F3h,0C3h ;repret
4317
4318$L$SEH_end_aesni_cbc_encrypt:
4319global aesni_set_decrypt_key
4320
4321ALIGN 16
4322aesni_set_decrypt_key:
4323
4324DB 0x48,0x83,0xEC,0x08
4325
4326 call __aesni_set_encrypt_key
4327 shl edx,4
4328 test eax,eax
4329 jnz NEAR $L$dec_key_ret
4330 lea rcx,[16+rdx*1+r8]
4331
4332 movups xmm0,XMMWORD[r8]
4333 movups xmm1,XMMWORD[rcx]
4334 movups XMMWORD[rcx],xmm0
4335 movups XMMWORD[r8],xmm1
4336 lea r8,[16+r8]
4337 lea rcx,[((-16))+rcx]
4338
4339$L$dec_key_inverse:
4340 movups xmm0,XMMWORD[r8]
4341 movups xmm1,XMMWORD[rcx]
4342DB 102,15,56,219,192
4343DB 102,15,56,219,201
4344 lea r8,[16+r8]
4345 lea rcx,[((-16))+rcx]
4346 movups XMMWORD[16+rcx],xmm0
4347 movups XMMWORD[(-16)+r8],xmm1
4348 cmp rcx,r8
4349 ja NEAR $L$dec_key_inverse
4350
4351 movups xmm0,XMMWORD[r8]
4352DB 102,15,56,219,192
4353 pxor xmm1,xmm1
4354 movups XMMWORD[rcx],xmm0
4355 pxor xmm0,xmm0
4356$L$dec_key_ret:
4357 add rsp,8
4358
4359 DB 0F3h,0C3h ;repret
4360
4361$L$SEH_end_set_decrypt_key:
4362
4363global aesni_set_encrypt_key
4364
4365ALIGN 16
4366aesni_set_encrypt_key:
4367__aesni_set_encrypt_key:
4368
4369DB 0x48,0x83,0xEC,0x08
4370
4371 mov rax,-1
4372 test rcx,rcx
4373 jz NEAR $L$enc_key_ret
4374 test r8,r8
4375 jz NEAR $L$enc_key_ret
4376
4377 mov r10d,268437504
4378 movups xmm0,XMMWORD[rcx]
4379 xorps xmm4,xmm4
4380 and r10d,DWORD[((OPENSSL_ia32cap_P+4))]
4381 lea rax,[16+r8]
4382 cmp edx,256
4383 je NEAR $L$14rounds
4384 cmp edx,192
4385 je NEAR $L$12rounds
4386 cmp edx,128
4387 jne NEAR $L$bad_keybits
4388
4389$L$10rounds:
4390 mov edx,9
4391 cmp r10d,268435456
4392 je NEAR $L$10rounds_alt
4393
4394 movups XMMWORD[r8],xmm0
4395DB 102,15,58,223,200,1
4396 call $L$key_expansion_128_cold
4397DB 102,15,58,223,200,2
4398 call $L$key_expansion_128
4399DB 102,15,58,223,200,4
4400 call $L$key_expansion_128
4401DB 102,15,58,223,200,8
4402 call $L$key_expansion_128
4403DB 102,15,58,223,200,16
4404 call $L$key_expansion_128
4405DB 102,15,58,223,200,32
4406 call $L$key_expansion_128
4407DB 102,15,58,223,200,64
4408 call $L$key_expansion_128
4409DB 102,15,58,223,200,128
4410 call $L$key_expansion_128
4411DB 102,15,58,223,200,27
4412 call $L$key_expansion_128
4413DB 102,15,58,223,200,54
4414 call $L$key_expansion_128
4415 movups XMMWORD[rax],xmm0
4416 mov DWORD[80+rax],edx
4417 xor eax,eax
4418 jmp NEAR $L$enc_key_ret
4419
4420ALIGN 16
4421$L$10rounds_alt:
4422 movdqa xmm5,XMMWORD[$L$key_rotate]
4423 mov r10d,8
4424 movdqa xmm4,XMMWORD[$L$key_rcon1]
4425 movdqa xmm2,xmm0
4426 movdqu XMMWORD[r8],xmm0
4427 jmp NEAR $L$oop_key128
4428
4429ALIGN 16
4430$L$oop_key128:
4431DB 102,15,56,0,197
4432DB 102,15,56,221,196
4433 pslld xmm4,1
4434 lea rax,[16+rax]
4435
4436 movdqa xmm3,xmm2
4437 pslldq xmm2,4
4438 pxor xmm3,xmm2
4439 pslldq xmm2,4
4440 pxor xmm3,xmm2
4441 pslldq xmm2,4
4442 pxor xmm2,xmm3
4443
4444 pxor xmm0,xmm2
4445 movdqu XMMWORD[(-16)+rax],xmm0
4446 movdqa xmm2,xmm0
4447
4448 dec r10d
4449 jnz NEAR $L$oop_key128
4450
4451 movdqa xmm4,XMMWORD[$L$key_rcon1b]
4452
4453DB 102,15,56,0,197
4454DB 102,15,56,221,196
4455 pslld xmm4,1
4456
4457 movdqa xmm3,xmm2
4458 pslldq xmm2,4
4459 pxor xmm3,xmm2
4460 pslldq xmm2,4
4461 pxor xmm3,xmm2
4462 pslldq xmm2,4
4463 pxor xmm2,xmm3
4464
4465 pxor xmm0,xmm2
4466 movdqu XMMWORD[rax],xmm0
4467
4468 movdqa xmm2,xmm0
4469DB 102,15,56,0,197
4470DB 102,15,56,221,196
4471
4472 movdqa xmm3,xmm2
4473 pslldq xmm2,4
4474 pxor xmm3,xmm2
4475 pslldq xmm2,4
4476 pxor xmm3,xmm2
4477 pslldq xmm2,4
4478 pxor xmm2,xmm3
4479
4480 pxor xmm0,xmm2
4481 movdqu XMMWORD[16+rax],xmm0
4482
4483 mov DWORD[96+rax],edx
4484 xor eax,eax
4485 jmp NEAR $L$enc_key_ret
4486
4487ALIGN 16
4488$L$12rounds:
4489 movq xmm2,QWORD[16+rcx]
4490 mov edx,11
4491 cmp r10d,268435456
4492 je NEAR $L$12rounds_alt
4493
4494 movups XMMWORD[r8],xmm0
4495DB 102,15,58,223,202,1
4496 call $L$key_expansion_192a_cold
4497DB 102,15,58,223,202,2
4498 call $L$key_expansion_192b
4499DB 102,15,58,223,202,4
4500 call $L$key_expansion_192a
4501DB 102,15,58,223,202,8
4502 call $L$key_expansion_192b
4503DB 102,15,58,223,202,16
4504 call $L$key_expansion_192a
4505DB 102,15,58,223,202,32
4506 call $L$key_expansion_192b
4507DB 102,15,58,223,202,64
4508 call $L$key_expansion_192a
4509DB 102,15,58,223,202,128
4510 call $L$key_expansion_192b
4511 movups XMMWORD[rax],xmm0
4512 mov DWORD[48+rax],edx
4513 xor rax,rax
4514 jmp NEAR $L$enc_key_ret
4515
4516ALIGN 16
4517$L$12rounds_alt:
4518 movdqa xmm5,XMMWORD[$L$key_rotate192]
4519 movdqa xmm4,XMMWORD[$L$key_rcon1]
4520 mov r10d,8
4521 movdqu XMMWORD[r8],xmm0
4522 jmp NEAR $L$oop_key192
4523
4524ALIGN 16
4525$L$oop_key192:
4526 movq QWORD[rax],xmm2
4527 movdqa xmm1,xmm2
4528DB 102,15,56,0,213
4529DB 102,15,56,221,212
4530 pslld xmm4,1
4531 lea rax,[24+rax]
4532
4533 movdqa xmm3,xmm0
4534 pslldq xmm0,4
4535 pxor xmm3,xmm0
4536 pslldq xmm0,4
4537 pxor xmm3,xmm0
4538 pslldq xmm0,4
4539 pxor xmm0,xmm3
4540
4541 pshufd xmm3,xmm0,0xff
4542 pxor xmm3,xmm1
4543 pslldq xmm1,4
4544 pxor xmm3,xmm1
4545
4546 pxor xmm0,xmm2
4547 pxor xmm2,xmm3
4548 movdqu XMMWORD[(-16)+rax],xmm0
4549
4550 dec r10d
4551 jnz NEAR $L$oop_key192
4552
4553 mov DWORD[32+rax],edx
4554 xor eax,eax
4555 jmp NEAR $L$enc_key_ret
4556
4557ALIGN 16
4558$L$14rounds:
4559 movups xmm2,XMMWORD[16+rcx]
4560 mov edx,13
4561 lea rax,[16+rax]
4562 cmp r10d,268435456
4563 je NEAR $L$14rounds_alt
4564
4565 movups XMMWORD[r8],xmm0
4566 movups XMMWORD[16+r8],xmm2
4567DB 102,15,58,223,202,1
4568 call $L$key_expansion_256a_cold
4569DB 102,15,58,223,200,1
4570 call $L$key_expansion_256b
4571DB 102,15,58,223,202,2
4572 call $L$key_expansion_256a
4573DB 102,15,58,223,200,2
4574 call $L$key_expansion_256b
4575DB 102,15,58,223,202,4
4576 call $L$key_expansion_256a
4577DB 102,15,58,223,200,4
4578 call $L$key_expansion_256b
4579DB 102,15,58,223,202,8
4580 call $L$key_expansion_256a
4581DB 102,15,58,223,200,8
4582 call $L$key_expansion_256b
4583DB 102,15,58,223,202,16
4584 call $L$key_expansion_256a
4585DB 102,15,58,223,200,16
4586 call $L$key_expansion_256b
4587DB 102,15,58,223,202,32
4588 call $L$key_expansion_256a
4589DB 102,15,58,223,200,32
4590 call $L$key_expansion_256b
4591DB 102,15,58,223,202,64
4592 call $L$key_expansion_256a
4593 movups XMMWORD[rax],xmm0
4594 mov DWORD[16+rax],edx
4595 xor rax,rax
4596 jmp NEAR $L$enc_key_ret
4597
4598ALIGN 16
4599$L$14rounds_alt:
4600 movdqa xmm5,XMMWORD[$L$key_rotate]
4601 movdqa xmm4,XMMWORD[$L$key_rcon1]
4602 mov r10d,7
4603 movdqu XMMWORD[r8],xmm0
4604 movdqa xmm1,xmm2
4605 movdqu XMMWORD[16+r8],xmm2
4606 jmp NEAR $L$oop_key256
4607
4608ALIGN 16
4609$L$oop_key256:
4610DB 102,15,56,0,213
4611DB 102,15,56,221,212
4612
4613 movdqa xmm3,xmm0
4614 pslldq xmm0,4
4615 pxor xmm3,xmm0
4616 pslldq xmm0,4
4617 pxor xmm3,xmm0
4618 pslldq xmm0,4
4619 pxor xmm0,xmm3
4620 pslld xmm4,1
4621
4622 pxor xmm0,xmm2
4623 movdqu XMMWORD[rax],xmm0
4624
4625 dec r10d
4626 jz NEAR $L$done_key256
4627
4628 pshufd xmm2,xmm0,0xff
4629 pxor xmm3,xmm3
4630DB 102,15,56,221,211
4631
4632 movdqa xmm3,xmm1
4633 pslldq xmm1,4
4634 pxor xmm3,xmm1
4635 pslldq xmm1,4
4636 pxor xmm3,xmm1
4637 pslldq xmm1,4
4638 pxor xmm1,xmm3
4639
4640 pxor xmm2,xmm1
4641 movdqu XMMWORD[16+rax],xmm2
4642 lea rax,[32+rax]
4643 movdqa xmm1,xmm2
4644
4645 jmp NEAR $L$oop_key256
4646
4647$L$done_key256:
4648 mov DWORD[16+rax],edx
4649 xor eax,eax
4650 jmp NEAR $L$enc_key_ret
4651
4652ALIGN 16
4653$L$bad_keybits:
4654 mov rax,-2
4655$L$enc_key_ret:
4656 pxor xmm0,xmm0
4657 pxor xmm1,xmm1
4658 pxor xmm2,xmm2
4659 pxor xmm3,xmm3
4660 pxor xmm4,xmm4
4661 pxor xmm5,xmm5
4662 add rsp,8
4663
4664 DB 0F3h,0C3h ;repret
4665
4666$L$SEH_end_set_encrypt_key:
4667
4668ALIGN 16
4669$L$key_expansion_128:
4670 movups XMMWORD[rax],xmm0
4671 lea rax,[16+rax]
4672$L$key_expansion_128_cold:
4673 shufps xmm4,xmm0,16
4674 xorps xmm0,xmm4
4675 shufps xmm4,xmm0,140
4676 xorps xmm0,xmm4
4677 shufps xmm1,xmm1,255
4678 xorps xmm0,xmm1
4679 DB 0F3h,0C3h ;repret
4680
4681ALIGN 16
4682$L$key_expansion_192a:
4683 movups XMMWORD[rax],xmm0
4684 lea rax,[16+rax]
4685$L$key_expansion_192a_cold:
4686 movaps xmm5,xmm2
4687$L$key_expansion_192b_warm:
4688 shufps xmm4,xmm0,16
4689 movdqa xmm3,xmm2
4690 xorps xmm0,xmm4
4691 shufps xmm4,xmm0,140
4692 pslldq xmm3,4
4693 xorps xmm0,xmm4
4694 pshufd xmm1,xmm1,85
4695 pxor xmm2,xmm3
4696 pxor xmm0,xmm1
4697 pshufd xmm3,xmm0,255
4698 pxor xmm2,xmm3
4699 DB 0F3h,0C3h ;repret
4700
4701ALIGN 16
4702$L$key_expansion_192b:
4703 movaps xmm3,xmm0
4704 shufps xmm5,xmm0,68
4705 movups XMMWORD[rax],xmm5
4706 shufps xmm3,xmm2,78
4707 movups XMMWORD[16+rax],xmm3
4708 lea rax,[32+rax]
4709 jmp NEAR $L$key_expansion_192b_warm
4710
4711ALIGN 16
4712$L$key_expansion_256a:
4713 movups XMMWORD[rax],xmm2
4714 lea rax,[16+rax]
4715$L$key_expansion_256a_cold:
4716 shufps xmm4,xmm0,16
4717 xorps xmm0,xmm4
4718 shufps xmm4,xmm0,140
4719 xorps xmm0,xmm4
4720 shufps xmm1,xmm1,255
4721 xorps xmm0,xmm1
4722 DB 0F3h,0C3h ;repret
4723
4724ALIGN 16
4725$L$key_expansion_256b:
4726 movups XMMWORD[rax],xmm0
4727 lea rax,[16+rax]
4728
4729 shufps xmm4,xmm2,16
4730 xorps xmm2,xmm4
4731 shufps xmm4,xmm2,140
4732 xorps xmm2,xmm4
4733 shufps xmm1,xmm1,170
4734 xorps xmm2,xmm1
4735 DB 0F3h,0C3h ;repret
4736
4737
4738ALIGN 64
4739$L$bswap_mask:
4740DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4741$L$increment32:
4742 DD 6,6,6,0
4743$L$increment64:
4744 DD 1,0,0,0
4745$L$xts_magic:
4746 DD 0x87,0,1,0
4747$L$increment1:
4748DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4749$L$key_rotate:
4750 DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4751$L$key_rotate192:
4752 DD 0x04070605,0x04070605,0x04070605,0x04070605
4753$L$key_rcon1:
4754 DD 1,1,1,1
4755$L$key_rcon1b:
4756 DD 0x1b,0x1b,0x1b,0x1b
4757
4758DB 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
4759DB 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
4760DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
4761DB 115,108,46,111,114,103,62,0
4762ALIGN 64
4763EXTERN __imp_RtlVirtualUnwind
4764
4765ALIGN 16
4766ecb_ccm64_se_handler:
4767 push rsi
4768 push rdi
4769 push rbx
4770 push rbp
4771 push r12
4772 push r13
4773 push r14
4774 push r15
4775 pushfq
4776 sub rsp,64
4777
4778 mov rax,QWORD[120+r8]
4779 mov rbx,QWORD[248+r8]
4780
4781 mov rsi,QWORD[8+r9]
4782 mov r11,QWORD[56+r9]
4783
4784 mov r10d,DWORD[r11]
4785 lea r10,[r10*1+rsi]
4786 cmp rbx,r10
4787 jb NEAR $L$common_seh_tail
4788
4789 mov rax,QWORD[152+r8]
4790
4791 mov r10d,DWORD[4+r11]
4792 lea r10,[r10*1+rsi]
4793 cmp rbx,r10
4794 jae NEAR $L$common_seh_tail
4795
4796 lea rsi,[rax]
4797 lea rdi,[512+r8]
4798 mov ecx,8
4799 DD 0xa548f3fc
4800 lea rax,[88+rax]
4801
4802 jmp NEAR $L$common_seh_tail
4803
4804
4805
4806ALIGN 16
4807ctr_xts_se_handler:
4808 push rsi
4809 push rdi
4810 push rbx
4811 push rbp
4812 push r12
4813 push r13
4814 push r14
4815 push r15
4816 pushfq
4817 sub rsp,64
4818
4819 mov rax,QWORD[120+r8]
4820 mov rbx,QWORD[248+r8]
4821
4822 mov rsi,QWORD[8+r9]
4823 mov r11,QWORD[56+r9]
4824
4825 mov r10d,DWORD[r11]
4826 lea r10,[r10*1+rsi]
4827 cmp rbx,r10
4828 jb NEAR $L$common_seh_tail
4829
4830 mov rax,QWORD[152+r8]
4831
4832 mov r10d,DWORD[4+r11]
4833 lea r10,[r10*1+rsi]
4834 cmp rbx,r10
4835 jae NEAR $L$common_seh_tail
4836
4837 mov rax,QWORD[208+r8]
4838
4839 lea rsi,[((-168))+rax]
4840 lea rdi,[512+r8]
4841 mov ecx,20
4842 DD 0xa548f3fc
4843
4844 mov rbp,QWORD[((-8))+rax]
4845 mov QWORD[160+r8],rbp
4846 jmp NEAR $L$common_seh_tail
4847
4848
4849
4850ALIGN 16
4851ocb_se_handler:
4852 push rsi
4853 push rdi
4854 push rbx
4855 push rbp
4856 push r12
4857 push r13
4858 push r14
4859 push r15
4860 pushfq
4861 sub rsp,64
4862
4863 mov rax,QWORD[120+r8]
4864 mov rbx,QWORD[248+r8]
4865
4866 mov rsi,QWORD[8+r9]
4867 mov r11,QWORD[56+r9]
4868
4869 mov r10d,DWORD[r11]
4870 lea r10,[r10*1+rsi]
4871 cmp rbx,r10
4872 jb NEAR $L$common_seh_tail
4873
4874 mov r10d,DWORD[4+r11]
4875 lea r10,[r10*1+rsi]
4876 cmp rbx,r10
4877 jae NEAR $L$common_seh_tail
4878
4879 mov r10d,DWORD[8+r11]
4880 lea r10,[r10*1+rsi]
4881 cmp rbx,r10
4882 jae NEAR $L$ocb_no_xmm
4883
4884 mov rax,QWORD[152+r8]
4885
4886 lea rsi,[rax]
4887 lea rdi,[512+r8]
4888 mov ecx,20
4889 DD 0xa548f3fc
4890 lea rax,[((160+40))+rax]
4891
4892$L$ocb_no_xmm:
4893 mov rbx,QWORD[((-8))+rax]
4894 mov rbp,QWORD[((-16))+rax]
4895 mov r12,QWORD[((-24))+rax]
4896 mov r13,QWORD[((-32))+rax]
4897 mov r14,QWORD[((-40))+rax]
4898
4899 mov QWORD[144+r8],rbx
4900 mov QWORD[160+r8],rbp
4901 mov QWORD[216+r8],r12
4902 mov QWORD[224+r8],r13
4903 mov QWORD[232+r8],r14
4904
4905 jmp NEAR $L$common_seh_tail
4906
4907
4908ALIGN 16
4909cbc_se_handler:
4910 push rsi
4911 push rdi
4912 push rbx
4913 push rbp
4914 push r12
4915 push r13
4916 push r14
4917 push r15
4918 pushfq
4919 sub rsp,64
4920
4921 mov rax,QWORD[152+r8]
4922 mov rbx,QWORD[248+r8]
4923
4924 lea r10,[$L$cbc_decrypt_bulk]
4925 cmp rbx,r10
4926 jb NEAR $L$common_seh_tail
4927
4928 mov rax,QWORD[120+r8]
4929
4930 lea r10,[$L$cbc_decrypt_body]
4931 cmp rbx,r10
4932 jb NEAR $L$common_seh_tail
4933
4934 mov rax,QWORD[152+r8]
4935
4936 lea r10,[$L$cbc_ret]
4937 cmp rbx,r10
4938 jae NEAR $L$common_seh_tail
4939
4940 lea rsi,[16+rax]
4941 lea rdi,[512+r8]
4942 mov ecx,20
4943 DD 0xa548f3fc
4944
4945 mov rax,QWORD[208+r8]
4946
4947 mov rbp,QWORD[((-8))+rax]
4948 mov QWORD[160+r8],rbp
4949
4950$L$common_seh_tail:
4951 mov rdi,QWORD[8+rax]
4952 mov rsi,QWORD[16+rax]
4953 mov QWORD[152+r8],rax
4954 mov QWORD[168+r8],rsi
4955 mov QWORD[176+r8],rdi
4956
4957 mov rdi,QWORD[40+r9]
4958 mov rsi,r8
4959 mov ecx,154
4960 DD 0xa548f3fc
4961
4962 mov rsi,r9
4963 xor rcx,rcx
4964 mov rdx,QWORD[8+rsi]
4965 mov r8,QWORD[rsi]
4966 mov r9,QWORD[16+rsi]
4967 mov r10,QWORD[40+rsi]
4968 lea r11,[56+rsi]
4969 lea r12,[24+rsi]
4970 mov QWORD[32+rsp],r10
4971 mov QWORD[40+rsp],r11
4972 mov QWORD[48+rsp],r12
4973 mov QWORD[56+rsp],rcx
4974 call QWORD[__imp_RtlVirtualUnwind]
4975
4976 mov eax,1
4977 add rsp,64
4978 popfq
4979 pop r15
4980 pop r14
4981 pop r13
4982 pop r12
4983 pop rbp
4984 pop rbx
4985 pop rdi
4986 pop rsi
4987 DB 0F3h,0C3h ;repret
4988
4989
4990section .pdata rdata align=4
4991ALIGN 4
4992 DD $L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
4993 DD $L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
4994 DD $L$SEH_info_ecb wrt ..imagebase
4995
4996 DD $L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
4997 DD $L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
4998 DD $L$SEH_info_ccm64_enc wrt ..imagebase
4999
5000 DD $L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
5001 DD $L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
5002 DD $L$SEH_info_ccm64_dec wrt ..imagebase
5003
5004 DD $L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
5005 DD $L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
5006 DD $L$SEH_info_ctr32 wrt ..imagebase
5007
5008 DD $L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
5009 DD $L$SEH_end_aesni_xts_encrypt wrt ..imagebase
5010 DD $L$SEH_info_xts_enc wrt ..imagebase
5011
5012 DD $L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
5013 DD $L$SEH_end_aesni_xts_decrypt wrt ..imagebase
5014 DD $L$SEH_info_xts_dec wrt ..imagebase
5015
5016 DD $L$SEH_begin_aesni_ocb_encrypt wrt ..imagebase
5017 DD $L$SEH_end_aesni_ocb_encrypt wrt ..imagebase
5018 DD $L$SEH_info_ocb_enc wrt ..imagebase
5019
5020 DD $L$SEH_begin_aesni_ocb_decrypt wrt ..imagebase
5021 DD $L$SEH_end_aesni_ocb_decrypt wrt ..imagebase
5022 DD $L$SEH_info_ocb_dec wrt ..imagebase
5023 DD $L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
5024 DD $L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
5025 DD $L$SEH_info_cbc wrt ..imagebase
5026
5027 DD aesni_set_decrypt_key wrt ..imagebase
5028 DD $L$SEH_end_set_decrypt_key wrt ..imagebase
5029 DD $L$SEH_info_key wrt ..imagebase
5030
5031 DD aesni_set_encrypt_key wrt ..imagebase
5032 DD $L$SEH_end_set_encrypt_key wrt ..imagebase
5033 DD $L$SEH_info_key wrt ..imagebase
5034section .xdata rdata align=8
5035ALIGN 8
5036$L$SEH_info_ecb:
5037DB 9,0,0,0
5038 DD ecb_ccm64_se_handler wrt ..imagebase
5039 DD $L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
5040$L$SEH_info_ccm64_enc:
5041DB 9,0,0,0
5042 DD ecb_ccm64_se_handler wrt ..imagebase
5043 DD $L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
5044$L$SEH_info_ccm64_dec:
5045DB 9,0,0,0
5046 DD ecb_ccm64_se_handler wrt ..imagebase
5047 DD $L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
5048$L$SEH_info_ctr32:
5049DB 9,0,0,0
5050 DD ctr_xts_se_handler wrt ..imagebase
5051 DD $L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
5052$L$SEH_info_xts_enc:
5053DB 9,0,0,0
5054 DD ctr_xts_se_handler wrt ..imagebase
5055 DD $L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
5056$L$SEH_info_xts_dec:
5057DB 9,0,0,0
5058 DD ctr_xts_se_handler wrt ..imagebase
5059 DD $L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
5060$L$SEH_info_ocb_enc:
5061DB 9,0,0,0
5062 DD ocb_se_handler wrt ..imagebase
5063 DD $L$ocb_enc_body wrt ..imagebase,$L$ocb_enc_epilogue wrt ..imagebase
5064 DD $L$ocb_enc_pop wrt ..imagebase
5065 DD 0
5066$L$SEH_info_ocb_dec:
5067DB 9,0,0,0
5068 DD ocb_se_handler wrt ..imagebase
5069 DD $L$ocb_dec_body wrt ..imagebase,$L$ocb_dec_epilogue wrt ..imagebase
5070 DD $L$ocb_dec_pop wrt ..imagebase
5071 DD 0
5072$L$SEH_info_cbc:
5073DB 9,0,0,0
5074 DD cbc_se_handler wrt ..imagebase
5075$L$SEH_info_key:
5076DB 0x01,0x04,0x01,0x00
5077DB 0x04,0x02,0x00,0x00
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette