VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/aesni-sha256-x86_64.S@ 95218

Last change on this file since 95218 was 94083, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Recreate asm files, bugref:10128

File size: 86.1 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9global aesni_cbc_sha256_enc
10
11ALIGN 16
12aesni_cbc_sha256_enc:
13
14 lea r11,[OPENSSL_ia32cap_P]
15 mov eax,1
16 cmp rcx,0
17 je NEAR $L$probe
18 mov eax,DWORD[r11]
19 mov r10,QWORD[4+r11]
20 bt r10,61
21 jc NEAR aesni_cbc_sha256_enc_shaext
22 mov r11,r10
23 shr r11,32
24
25 test r10d,2048
26 jnz NEAR aesni_cbc_sha256_enc_xop
27 and r11d,296
28 cmp r11d,296
29 je NEAR aesni_cbc_sha256_enc_avx2
30 and r10d,268435456
31 jnz NEAR aesni_cbc_sha256_enc_avx
32 ud2
33 xor eax,eax
34 cmp rcx,0
35 je NEAR $L$probe
36 ud2
37$L$probe:
38 DB 0F3h,0C3h ;repret
39
40
41
42ALIGN 64
43
44K256:
45 DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
46 DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
47 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
48 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
49 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
50 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
51 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
52 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
53 DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
54 DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
55 DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
56 DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
57 DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
58 DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
59 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
60 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
61 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
62 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
63 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
64 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
65 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
66 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
67 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
68 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
69 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
70 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
71 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
72 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
73 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
74 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
75 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
76 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
77
78 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
79 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
80 DD 0,0,0,0,0,0,0,0,-1,-1,-1,-1
81 DD 0,0,0,0,0,0,0,0
82DB 65,69,83,78,73,45,67,66,67,43,83,72,65,50,53,54
83DB 32,115,116,105,116,99,104,32,102,111,114,32,120,56,54,95
84DB 54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98
85DB 121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108
86DB 46,111,114,103,62,0
87ALIGN 64
88
89ALIGN 64
90aesni_cbc_sha256_enc_xop:
91 mov QWORD[8+rsp],rdi ;WIN64 prologue
92 mov QWORD[16+rsp],rsi
93 mov rax,rsp
94$L$SEH_begin_aesni_cbc_sha256_enc_xop:
95 mov rdi,rcx
96 mov rsi,rdx
97 mov rdx,r8
98 mov rcx,r9
99 mov r8,QWORD[40+rsp]
100 mov r9,QWORD[48+rsp]
101
102
103
104$L$xop_shortcut:
105 mov r10,QWORD[56+rsp]
106 mov rax,rsp
107
108 push rbx
109
110 push rbp
111
112 push r12
113
114 push r13
115
116 push r14
117
118 push r15
119
120 sub rsp,288
121 and rsp,-64
122
123 shl rdx,6
124 sub rsi,rdi
125 sub r10,rdi
126 add rdx,rdi
127
128
129 mov QWORD[((64+8))+rsp],rsi
130 mov QWORD[((64+16))+rsp],rdx
131
132 mov QWORD[((64+32))+rsp],r8
133 mov QWORD[((64+40))+rsp],r9
134 mov QWORD[((64+48))+rsp],r10
135 mov QWORD[120+rsp],rax
136
137 movaps XMMWORD[128+rsp],xmm6
138 movaps XMMWORD[144+rsp],xmm7
139 movaps XMMWORD[160+rsp],xmm8
140 movaps XMMWORD[176+rsp],xmm9
141 movaps XMMWORD[192+rsp],xmm10
142 movaps XMMWORD[208+rsp],xmm11
143 movaps XMMWORD[224+rsp],xmm12
144 movaps XMMWORD[240+rsp],xmm13
145 movaps XMMWORD[256+rsp],xmm14
146 movaps XMMWORD[272+rsp],xmm15
147$L$prologue_xop:
148 vzeroall
149
150 mov r12,rdi
151 lea rdi,[128+rcx]
152 lea r13,[((K256+544))]
153 mov r14d,DWORD[((240-128))+rdi]
154 mov r15,r9
155 mov rsi,r10
156 vmovdqu xmm8,XMMWORD[r8]
157 sub r14,9
158
159 mov eax,DWORD[r15]
160 mov ebx,DWORD[4+r15]
161 mov ecx,DWORD[8+r15]
162 mov edx,DWORD[12+r15]
163 mov r8d,DWORD[16+r15]
164 mov r9d,DWORD[20+r15]
165 mov r10d,DWORD[24+r15]
166 mov r11d,DWORD[28+r15]
167
168 vmovdqa xmm14,XMMWORD[r14*8+r13]
169 vmovdqa xmm13,XMMWORD[16+r14*8+r13]
170 vmovdqa xmm12,XMMWORD[32+r14*8+r13]
171 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
172 jmp NEAR $L$loop_xop
173ALIGN 16
174$L$loop_xop:
175 vmovdqa xmm7,XMMWORD[((K256+512))]
176 vmovdqu xmm0,XMMWORD[r12*1+rsi]
177 vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
178 vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
179 vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
180 vpshufb xmm0,xmm0,xmm7
181 lea rbp,[K256]
182 vpshufb xmm1,xmm1,xmm7
183 vpshufb xmm2,xmm2,xmm7
184 vpaddd xmm4,xmm0,XMMWORD[rbp]
185 vpshufb xmm3,xmm3,xmm7
186 vpaddd xmm5,xmm1,XMMWORD[32+rbp]
187 vpaddd xmm6,xmm2,XMMWORD[64+rbp]
188 vpaddd xmm7,xmm3,XMMWORD[96+rbp]
189 vmovdqa XMMWORD[rsp],xmm4
190 mov r14d,eax
191 vmovdqa XMMWORD[16+rsp],xmm5
192 mov esi,ebx
193 vmovdqa XMMWORD[32+rsp],xmm6
194 xor esi,ecx
195 vmovdqa XMMWORD[48+rsp],xmm7
196 mov r13d,r8d
197 jmp NEAR $L$xop_00_47
198
199ALIGN 16
200$L$xop_00_47:
201 sub rbp,-16*2*4
202 vmovdqu xmm9,XMMWORD[r12]
203 mov QWORD[((64+0))+rsp],r12
204 vpalignr xmm4,xmm1,xmm0,4
205 ror r13d,14
206 mov eax,r14d
207 vpalignr xmm7,xmm3,xmm2,4
208 mov r12d,r9d
209 xor r13d,r8d
210DB 143,232,120,194,236,14
211 ror r14d,9
212 xor r12d,r10d
213 vpsrld xmm4,xmm4,3
214 ror r13d,5
215 xor r14d,eax
216 vpaddd xmm0,xmm0,xmm7
217 and r12d,r8d
218 vpxor xmm9,xmm9,xmm10
219 vmovdqu xmm10,XMMWORD[((16-128))+rdi]
220 xor r13d,r8d
221 add r11d,DWORD[rsp]
222 mov r15d,eax
223DB 143,232,120,194,245,11
224 ror r14d,11
225 xor r12d,r10d
226 vpxor xmm4,xmm4,xmm5
227 xor r15d,ebx
228 ror r13d,6
229 add r11d,r12d
230 and esi,r15d
231DB 143,232,120,194,251,13
232 xor r14d,eax
233 add r11d,r13d
234 vpxor xmm4,xmm4,xmm6
235 xor esi,ebx
236 add edx,r11d
237 vpsrld xmm6,xmm3,10
238 ror r14d,2
239 add r11d,esi
240 vpaddd xmm0,xmm0,xmm4
241 mov r13d,edx
242 add r14d,r11d
243DB 143,232,120,194,239,2
244 ror r13d,14
245 mov r11d,r14d
246 vpxor xmm7,xmm7,xmm6
247 mov r12d,r8d
248 xor r13d,edx
249 ror r14d,9
250 xor r12d,r9d
251 vpxor xmm7,xmm7,xmm5
252 ror r13d,5
253 xor r14d,r11d
254 and r12d,edx
255 vpxor xmm9,xmm9,xmm8
256 xor r13d,edx
257 vpsrldq xmm7,xmm7,8
258 add r10d,DWORD[4+rsp]
259 mov esi,r11d
260 ror r14d,11
261 xor r12d,r9d
262 vpaddd xmm0,xmm0,xmm7
263 xor esi,eax
264 ror r13d,6
265 add r10d,r12d
266 and r15d,esi
267DB 143,232,120,194,248,13
268 xor r14d,r11d
269 add r10d,r13d
270 vpsrld xmm6,xmm0,10
271 xor r15d,eax
272 add ecx,r10d
273DB 143,232,120,194,239,2
274 ror r14d,2
275 add r10d,r15d
276 vpxor xmm7,xmm7,xmm6
277 mov r13d,ecx
278 add r14d,r10d
279 ror r13d,14
280 mov r10d,r14d
281 vpxor xmm7,xmm7,xmm5
282 mov r12d,edx
283 xor r13d,ecx
284 ror r14d,9
285 xor r12d,r8d
286 vpslldq xmm7,xmm7,8
287 ror r13d,5
288 xor r14d,r10d
289 and r12d,ecx
290 vaesenc xmm9,xmm9,xmm10
291 vmovdqu xmm10,XMMWORD[((32-128))+rdi]
292 xor r13d,ecx
293 vpaddd xmm0,xmm0,xmm7
294 add r9d,DWORD[8+rsp]
295 mov r15d,r10d
296 ror r14d,11
297 xor r12d,r8d
298 vpaddd xmm6,xmm0,XMMWORD[rbp]
299 xor r15d,r11d
300 ror r13d,6
301 add r9d,r12d
302 and esi,r15d
303 xor r14d,r10d
304 add r9d,r13d
305 xor esi,r11d
306 add ebx,r9d
307 ror r14d,2
308 add r9d,esi
309 mov r13d,ebx
310 add r14d,r9d
311 ror r13d,14
312 mov r9d,r14d
313 mov r12d,ecx
314 xor r13d,ebx
315 ror r14d,9
316 xor r12d,edx
317 ror r13d,5
318 xor r14d,r9d
319 and r12d,ebx
320 vaesenc xmm9,xmm9,xmm10
321 vmovdqu xmm10,XMMWORD[((48-128))+rdi]
322 xor r13d,ebx
323 add r8d,DWORD[12+rsp]
324 mov esi,r9d
325 ror r14d,11
326 xor r12d,edx
327 xor esi,r10d
328 ror r13d,6
329 add r8d,r12d
330 and r15d,esi
331 xor r14d,r9d
332 add r8d,r13d
333 xor r15d,r10d
334 add eax,r8d
335 ror r14d,2
336 add r8d,r15d
337 mov r13d,eax
338 add r14d,r8d
339 vmovdqa XMMWORD[rsp],xmm6
340 vpalignr xmm4,xmm2,xmm1,4
341 ror r13d,14
342 mov r8d,r14d
343 vpalignr xmm7,xmm0,xmm3,4
344 mov r12d,ebx
345 xor r13d,eax
346DB 143,232,120,194,236,14
347 ror r14d,9
348 xor r12d,ecx
349 vpsrld xmm4,xmm4,3
350 ror r13d,5
351 xor r14d,r8d
352 vpaddd xmm1,xmm1,xmm7
353 and r12d,eax
354 vaesenc xmm9,xmm9,xmm10
355 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
356 xor r13d,eax
357 add edx,DWORD[16+rsp]
358 mov r15d,r8d
359DB 143,232,120,194,245,11
360 ror r14d,11
361 xor r12d,ecx
362 vpxor xmm4,xmm4,xmm5
363 xor r15d,r9d
364 ror r13d,6
365 add edx,r12d
366 and esi,r15d
367DB 143,232,120,194,248,13
368 xor r14d,r8d
369 add edx,r13d
370 vpxor xmm4,xmm4,xmm6
371 xor esi,r9d
372 add r11d,edx
373 vpsrld xmm6,xmm0,10
374 ror r14d,2
375 add edx,esi
376 vpaddd xmm1,xmm1,xmm4
377 mov r13d,r11d
378 add r14d,edx
379DB 143,232,120,194,239,2
380 ror r13d,14
381 mov edx,r14d
382 vpxor xmm7,xmm7,xmm6
383 mov r12d,eax
384 xor r13d,r11d
385 ror r14d,9
386 xor r12d,ebx
387 vpxor xmm7,xmm7,xmm5
388 ror r13d,5
389 xor r14d,edx
390 and r12d,r11d
391 vaesenc xmm9,xmm9,xmm10
392 vmovdqu xmm10,XMMWORD[((80-128))+rdi]
393 xor r13d,r11d
394 vpsrldq xmm7,xmm7,8
395 add ecx,DWORD[20+rsp]
396 mov esi,edx
397 ror r14d,11
398 xor r12d,ebx
399 vpaddd xmm1,xmm1,xmm7
400 xor esi,r8d
401 ror r13d,6
402 add ecx,r12d
403 and r15d,esi
404DB 143,232,120,194,249,13
405 xor r14d,edx
406 add ecx,r13d
407 vpsrld xmm6,xmm1,10
408 xor r15d,r8d
409 add r10d,ecx
410DB 143,232,120,194,239,2
411 ror r14d,2
412 add ecx,r15d
413 vpxor xmm7,xmm7,xmm6
414 mov r13d,r10d
415 add r14d,ecx
416 ror r13d,14
417 mov ecx,r14d
418 vpxor xmm7,xmm7,xmm5
419 mov r12d,r11d
420 xor r13d,r10d
421 ror r14d,9
422 xor r12d,eax
423 vpslldq xmm7,xmm7,8
424 ror r13d,5
425 xor r14d,ecx
426 and r12d,r10d
427 vaesenc xmm9,xmm9,xmm10
428 vmovdqu xmm10,XMMWORD[((96-128))+rdi]
429 xor r13d,r10d
430 vpaddd xmm1,xmm1,xmm7
431 add ebx,DWORD[24+rsp]
432 mov r15d,ecx
433 ror r14d,11
434 xor r12d,eax
435 vpaddd xmm6,xmm1,XMMWORD[32+rbp]
436 xor r15d,edx
437 ror r13d,6
438 add ebx,r12d
439 and esi,r15d
440 xor r14d,ecx
441 add ebx,r13d
442 xor esi,edx
443 add r9d,ebx
444 ror r14d,2
445 add ebx,esi
446 mov r13d,r9d
447 add r14d,ebx
448 ror r13d,14
449 mov ebx,r14d
450 mov r12d,r10d
451 xor r13d,r9d
452 ror r14d,9
453 xor r12d,r11d
454 ror r13d,5
455 xor r14d,ebx
456 and r12d,r9d
457 vaesenc xmm9,xmm9,xmm10
458 vmovdqu xmm10,XMMWORD[((112-128))+rdi]
459 xor r13d,r9d
460 add eax,DWORD[28+rsp]
461 mov esi,ebx
462 ror r14d,11
463 xor r12d,r11d
464 xor esi,ecx
465 ror r13d,6
466 add eax,r12d
467 and r15d,esi
468 xor r14d,ebx
469 add eax,r13d
470 xor r15d,ecx
471 add r8d,eax
472 ror r14d,2
473 add eax,r15d
474 mov r13d,r8d
475 add r14d,eax
476 vmovdqa XMMWORD[16+rsp],xmm6
477 vpalignr xmm4,xmm3,xmm2,4
478 ror r13d,14
479 mov eax,r14d
480 vpalignr xmm7,xmm1,xmm0,4
481 mov r12d,r9d
482 xor r13d,r8d
483DB 143,232,120,194,236,14
484 ror r14d,9
485 xor r12d,r10d
486 vpsrld xmm4,xmm4,3
487 ror r13d,5
488 xor r14d,eax
489 vpaddd xmm2,xmm2,xmm7
490 and r12d,r8d
491 vaesenc xmm9,xmm9,xmm10
492 vmovdqu xmm10,XMMWORD[((128-128))+rdi]
493 xor r13d,r8d
494 add r11d,DWORD[32+rsp]
495 mov r15d,eax
496DB 143,232,120,194,245,11
497 ror r14d,11
498 xor r12d,r10d
499 vpxor xmm4,xmm4,xmm5
500 xor r15d,ebx
501 ror r13d,6
502 add r11d,r12d
503 and esi,r15d
504DB 143,232,120,194,249,13
505 xor r14d,eax
506 add r11d,r13d
507 vpxor xmm4,xmm4,xmm6
508 xor esi,ebx
509 add edx,r11d
510 vpsrld xmm6,xmm1,10
511 ror r14d,2
512 add r11d,esi
513 vpaddd xmm2,xmm2,xmm4
514 mov r13d,edx
515 add r14d,r11d
516DB 143,232,120,194,239,2
517 ror r13d,14
518 mov r11d,r14d
519 vpxor xmm7,xmm7,xmm6
520 mov r12d,r8d
521 xor r13d,edx
522 ror r14d,9
523 xor r12d,r9d
524 vpxor xmm7,xmm7,xmm5
525 ror r13d,5
526 xor r14d,r11d
527 and r12d,edx
528 vaesenc xmm9,xmm9,xmm10
529 vmovdqu xmm10,XMMWORD[((144-128))+rdi]
530 xor r13d,edx
531 vpsrldq xmm7,xmm7,8
532 add r10d,DWORD[36+rsp]
533 mov esi,r11d
534 ror r14d,11
535 xor r12d,r9d
536 vpaddd xmm2,xmm2,xmm7
537 xor esi,eax
538 ror r13d,6
539 add r10d,r12d
540 and r15d,esi
541DB 143,232,120,194,250,13
542 xor r14d,r11d
543 add r10d,r13d
544 vpsrld xmm6,xmm2,10
545 xor r15d,eax
546 add ecx,r10d
547DB 143,232,120,194,239,2
548 ror r14d,2
549 add r10d,r15d
550 vpxor xmm7,xmm7,xmm6
551 mov r13d,ecx
552 add r14d,r10d
553 ror r13d,14
554 mov r10d,r14d
555 vpxor xmm7,xmm7,xmm5
556 mov r12d,edx
557 xor r13d,ecx
558 ror r14d,9
559 xor r12d,r8d
560 vpslldq xmm7,xmm7,8
561 ror r13d,5
562 xor r14d,r10d
563 and r12d,ecx
564 vaesenc xmm9,xmm9,xmm10
565 vmovdqu xmm10,XMMWORD[((160-128))+rdi]
566 xor r13d,ecx
567 vpaddd xmm2,xmm2,xmm7
568 add r9d,DWORD[40+rsp]
569 mov r15d,r10d
570 ror r14d,11
571 xor r12d,r8d
572 vpaddd xmm6,xmm2,XMMWORD[64+rbp]
573 xor r15d,r11d
574 ror r13d,6
575 add r9d,r12d
576 and esi,r15d
577 xor r14d,r10d
578 add r9d,r13d
579 xor esi,r11d
580 add ebx,r9d
581 ror r14d,2
582 add r9d,esi
583 mov r13d,ebx
584 add r14d,r9d
585 ror r13d,14
586 mov r9d,r14d
587 mov r12d,ecx
588 xor r13d,ebx
589 ror r14d,9
590 xor r12d,edx
591 ror r13d,5
592 xor r14d,r9d
593 and r12d,ebx
594 vaesenclast xmm11,xmm9,xmm10
595 vaesenc xmm9,xmm9,xmm10
596 vmovdqu xmm10,XMMWORD[((176-128))+rdi]
597 xor r13d,ebx
598 add r8d,DWORD[44+rsp]
599 mov esi,r9d
600 ror r14d,11
601 xor r12d,edx
602 xor esi,r10d
603 ror r13d,6
604 add r8d,r12d
605 and r15d,esi
606 xor r14d,r9d
607 add r8d,r13d
608 xor r15d,r10d
609 add eax,r8d
610 ror r14d,2
611 add r8d,r15d
612 mov r13d,eax
613 add r14d,r8d
614 vmovdqa XMMWORD[32+rsp],xmm6
615 vpalignr xmm4,xmm0,xmm3,4
616 ror r13d,14
617 mov r8d,r14d
618 vpalignr xmm7,xmm2,xmm1,4
619 mov r12d,ebx
620 xor r13d,eax
621DB 143,232,120,194,236,14
622 ror r14d,9
623 xor r12d,ecx
624 vpsrld xmm4,xmm4,3
625 ror r13d,5
626 xor r14d,r8d
627 vpaddd xmm3,xmm3,xmm7
628 and r12d,eax
629 vpand xmm8,xmm11,xmm12
630 vaesenc xmm9,xmm9,xmm10
631 vmovdqu xmm10,XMMWORD[((192-128))+rdi]
632 xor r13d,eax
633 add edx,DWORD[48+rsp]
634 mov r15d,r8d
635DB 143,232,120,194,245,11
636 ror r14d,11
637 xor r12d,ecx
638 vpxor xmm4,xmm4,xmm5
639 xor r15d,r9d
640 ror r13d,6
641 add edx,r12d
642 and esi,r15d
643DB 143,232,120,194,250,13
644 xor r14d,r8d
645 add edx,r13d
646 vpxor xmm4,xmm4,xmm6
647 xor esi,r9d
648 add r11d,edx
649 vpsrld xmm6,xmm2,10
650 ror r14d,2
651 add edx,esi
652 vpaddd xmm3,xmm3,xmm4
653 mov r13d,r11d
654 add r14d,edx
655DB 143,232,120,194,239,2
656 ror r13d,14
657 mov edx,r14d
658 vpxor xmm7,xmm7,xmm6
659 mov r12d,eax
660 xor r13d,r11d
661 ror r14d,9
662 xor r12d,ebx
663 vpxor xmm7,xmm7,xmm5
664 ror r13d,5
665 xor r14d,edx
666 and r12d,r11d
667 vaesenclast xmm11,xmm9,xmm10
668 vaesenc xmm9,xmm9,xmm10
669 vmovdqu xmm10,XMMWORD[((208-128))+rdi]
670 xor r13d,r11d
671 vpsrldq xmm7,xmm7,8
672 add ecx,DWORD[52+rsp]
673 mov esi,edx
674 ror r14d,11
675 xor r12d,ebx
676 vpaddd xmm3,xmm3,xmm7
677 xor esi,r8d
678 ror r13d,6
679 add ecx,r12d
680 and r15d,esi
681DB 143,232,120,194,251,13
682 xor r14d,edx
683 add ecx,r13d
684 vpsrld xmm6,xmm3,10
685 xor r15d,r8d
686 add r10d,ecx
687DB 143,232,120,194,239,2
688 ror r14d,2
689 add ecx,r15d
690 vpxor xmm7,xmm7,xmm6
691 mov r13d,r10d
692 add r14d,ecx
693 ror r13d,14
694 mov ecx,r14d
695 vpxor xmm7,xmm7,xmm5
696 mov r12d,r11d
697 xor r13d,r10d
698 ror r14d,9
699 xor r12d,eax
700 vpslldq xmm7,xmm7,8
701 ror r13d,5
702 xor r14d,ecx
703 and r12d,r10d
704 vpand xmm11,xmm11,xmm13
705 vaesenc xmm9,xmm9,xmm10
706 vmovdqu xmm10,XMMWORD[((224-128))+rdi]
707 xor r13d,r10d
708 vpaddd xmm3,xmm3,xmm7
709 add ebx,DWORD[56+rsp]
710 mov r15d,ecx
711 ror r14d,11
712 xor r12d,eax
713 vpaddd xmm6,xmm3,XMMWORD[96+rbp]
714 xor r15d,edx
715 ror r13d,6
716 add ebx,r12d
717 and esi,r15d
718 xor r14d,ecx
719 add ebx,r13d
720 xor esi,edx
721 add r9d,ebx
722 ror r14d,2
723 add ebx,esi
724 mov r13d,r9d
725 add r14d,ebx
726 ror r13d,14
727 mov ebx,r14d
728 mov r12d,r10d
729 xor r13d,r9d
730 ror r14d,9
731 xor r12d,r11d
732 ror r13d,5
733 xor r14d,ebx
734 and r12d,r9d
735 vpor xmm8,xmm8,xmm11
736 vaesenclast xmm11,xmm9,xmm10
737 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
738 xor r13d,r9d
739 add eax,DWORD[60+rsp]
740 mov esi,ebx
741 ror r14d,11
742 xor r12d,r11d
743 xor esi,ecx
744 ror r13d,6
745 add eax,r12d
746 and r15d,esi
747 xor r14d,ebx
748 add eax,r13d
749 xor r15d,ecx
750 add r8d,eax
751 ror r14d,2
752 add eax,r15d
753 mov r13d,r8d
754 add r14d,eax
755 vmovdqa XMMWORD[48+rsp],xmm6
756 mov r12,QWORD[((64+0))+rsp]
757 vpand xmm11,xmm11,xmm14
758 mov r15,QWORD[((64+8))+rsp]
759 vpor xmm8,xmm8,xmm11
760 vmovdqu XMMWORD[r12*1+r15],xmm8
761 lea r12,[16+r12]
762 cmp BYTE[131+rbp],0
763 jne NEAR $L$xop_00_47
764 vmovdqu xmm9,XMMWORD[r12]
765 mov QWORD[((64+0))+rsp],r12
766 ror r13d,14
767 mov eax,r14d
768 mov r12d,r9d
769 xor r13d,r8d
770 ror r14d,9
771 xor r12d,r10d
772 ror r13d,5
773 xor r14d,eax
774 and r12d,r8d
775 vpxor xmm9,xmm9,xmm10
776 vmovdqu xmm10,XMMWORD[((16-128))+rdi]
777 xor r13d,r8d
778 add r11d,DWORD[rsp]
779 mov r15d,eax
780 ror r14d,11
781 xor r12d,r10d
782 xor r15d,ebx
783 ror r13d,6
784 add r11d,r12d
785 and esi,r15d
786 xor r14d,eax
787 add r11d,r13d
788 xor esi,ebx
789 add edx,r11d
790 ror r14d,2
791 add r11d,esi
792 mov r13d,edx
793 add r14d,r11d
794 ror r13d,14
795 mov r11d,r14d
796 mov r12d,r8d
797 xor r13d,edx
798 ror r14d,9
799 xor r12d,r9d
800 ror r13d,5
801 xor r14d,r11d
802 and r12d,edx
803 vpxor xmm9,xmm9,xmm8
804 xor r13d,edx
805 add r10d,DWORD[4+rsp]
806 mov esi,r11d
807 ror r14d,11
808 xor r12d,r9d
809 xor esi,eax
810 ror r13d,6
811 add r10d,r12d
812 and r15d,esi
813 xor r14d,r11d
814 add r10d,r13d
815 xor r15d,eax
816 add ecx,r10d
817 ror r14d,2
818 add r10d,r15d
819 mov r13d,ecx
820 add r14d,r10d
821 ror r13d,14
822 mov r10d,r14d
823 mov r12d,edx
824 xor r13d,ecx
825 ror r14d,9
826 xor r12d,r8d
827 ror r13d,5
828 xor r14d,r10d
829 and r12d,ecx
830 vaesenc xmm9,xmm9,xmm10
831 vmovdqu xmm10,XMMWORD[((32-128))+rdi]
832 xor r13d,ecx
833 add r9d,DWORD[8+rsp]
834 mov r15d,r10d
835 ror r14d,11
836 xor r12d,r8d
837 xor r15d,r11d
838 ror r13d,6
839 add r9d,r12d
840 and esi,r15d
841 xor r14d,r10d
842 add r9d,r13d
843 xor esi,r11d
844 add ebx,r9d
845 ror r14d,2
846 add r9d,esi
847 mov r13d,ebx
848 add r14d,r9d
849 ror r13d,14
850 mov r9d,r14d
851 mov r12d,ecx
852 xor r13d,ebx
853 ror r14d,9
854 xor r12d,edx
855 ror r13d,5
856 xor r14d,r9d
857 and r12d,ebx
858 vaesenc xmm9,xmm9,xmm10
859 vmovdqu xmm10,XMMWORD[((48-128))+rdi]
860 xor r13d,ebx
861 add r8d,DWORD[12+rsp]
862 mov esi,r9d
863 ror r14d,11
864 xor r12d,edx
865 xor esi,r10d
866 ror r13d,6
867 add r8d,r12d
868 and r15d,esi
869 xor r14d,r9d
870 add r8d,r13d
871 xor r15d,r10d
872 add eax,r8d
873 ror r14d,2
874 add r8d,r15d
875 mov r13d,eax
876 add r14d,r8d
877 ror r13d,14
878 mov r8d,r14d
879 mov r12d,ebx
880 xor r13d,eax
881 ror r14d,9
882 xor r12d,ecx
883 ror r13d,5
884 xor r14d,r8d
885 and r12d,eax
886 vaesenc xmm9,xmm9,xmm10
887 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
888 xor r13d,eax
889 add edx,DWORD[16+rsp]
890 mov r15d,r8d
891 ror r14d,11
892 xor r12d,ecx
893 xor r15d,r9d
894 ror r13d,6
895 add edx,r12d
896 and esi,r15d
897 xor r14d,r8d
898 add edx,r13d
899 xor esi,r9d
900 add r11d,edx
901 ror r14d,2
902 add edx,esi
903 mov r13d,r11d
904 add r14d,edx
905 ror r13d,14
906 mov edx,r14d
907 mov r12d,eax
908 xor r13d,r11d
909 ror r14d,9
910 xor r12d,ebx
911 ror r13d,5
912 xor r14d,edx
913 and r12d,r11d
914 vaesenc xmm9,xmm9,xmm10
915 vmovdqu xmm10,XMMWORD[((80-128))+rdi]
916 xor r13d,r11d
917 add ecx,DWORD[20+rsp]
918 mov esi,edx
919 ror r14d,11
920 xor r12d,ebx
921 xor esi,r8d
922 ror r13d,6
923 add ecx,r12d
924 and r15d,esi
925 xor r14d,edx
926 add ecx,r13d
927 xor r15d,r8d
928 add r10d,ecx
929 ror r14d,2
930 add ecx,r15d
931 mov r13d,r10d
932 add r14d,ecx
933 ror r13d,14
934 mov ecx,r14d
935 mov r12d,r11d
936 xor r13d,r10d
937 ror r14d,9
938 xor r12d,eax
939 ror r13d,5
940 xor r14d,ecx
941 and r12d,r10d
942 vaesenc xmm9,xmm9,xmm10
943 vmovdqu xmm10,XMMWORD[((96-128))+rdi]
944 xor r13d,r10d
945 add ebx,DWORD[24+rsp]
946 mov r15d,ecx
947 ror r14d,11
948 xor r12d,eax
949 xor r15d,edx
950 ror r13d,6
951 add ebx,r12d
952 and esi,r15d
953 xor r14d,ecx
954 add ebx,r13d
955 xor esi,edx
956 add r9d,ebx
957 ror r14d,2
958 add ebx,esi
959 mov r13d,r9d
960 add r14d,ebx
961 ror r13d,14
962 mov ebx,r14d
963 mov r12d,r10d
964 xor r13d,r9d
965 ror r14d,9
966 xor r12d,r11d
967 ror r13d,5
968 xor r14d,ebx
969 and r12d,r9d
970 vaesenc xmm9,xmm9,xmm10
971 vmovdqu xmm10,XMMWORD[((112-128))+rdi]
972 xor r13d,r9d
973 add eax,DWORD[28+rsp]
974 mov esi,ebx
975 ror r14d,11
976 xor r12d,r11d
977 xor esi,ecx
978 ror r13d,6
979 add eax,r12d
980 and r15d,esi
981 xor r14d,ebx
982 add eax,r13d
983 xor r15d,ecx
984 add r8d,eax
985 ror r14d,2
986 add eax,r15d
987 mov r13d,r8d
988 add r14d,eax
989 ror r13d,14
990 mov eax,r14d
991 mov r12d,r9d
992 xor r13d,r8d
993 ror r14d,9
994 xor r12d,r10d
995 ror r13d,5
996 xor r14d,eax
997 and r12d,r8d
998 vaesenc xmm9,xmm9,xmm10
999 vmovdqu xmm10,XMMWORD[((128-128))+rdi]
1000 xor r13d,r8d
1001 add r11d,DWORD[32+rsp]
1002 mov r15d,eax
1003 ror r14d,11
1004 xor r12d,r10d
1005 xor r15d,ebx
1006 ror r13d,6
1007 add r11d,r12d
1008 and esi,r15d
1009 xor r14d,eax
1010 add r11d,r13d
1011 xor esi,ebx
1012 add edx,r11d
1013 ror r14d,2
1014 add r11d,esi
1015 mov r13d,edx
1016 add r14d,r11d
1017 ror r13d,14
1018 mov r11d,r14d
1019 mov r12d,r8d
1020 xor r13d,edx
1021 ror r14d,9
1022 xor r12d,r9d
1023 ror r13d,5
1024 xor r14d,r11d
1025 and r12d,edx
1026 vaesenc xmm9,xmm9,xmm10
1027 vmovdqu xmm10,XMMWORD[((144-128))+rdi]
1028 xor r13d,edx
1029 add r10d,DWORD[36+rsp]
1030 mov esi,r11d
1031 ror r14d,11
1032 xor r12d,r9d
1033 xor esi,eax
1034 ror r13d,6
1035 add r10d,r12d
1036 and r15d,esi
1037 xor r14d,r11d
1038 add r10d,r13d
1039 xor r15d,eax
1040 add ecx,r10d
1041 ror r14d,2
1042 add r10d,r15d
1043 mov r13d,ecx
1044 add r14d,r10d
1045 ror r13d,14
1046 mov r10d,r14d
1047 mov r12d,edx
1048 xor r13d,ecx
1049 ror r14d,9
1050 xor r12d,r8d
1051 ror r13d,5
1052 xor r14d,r10d
1053 and r12d,ecx
1054 vaesenc xmm9,xmm9,xmm10
1055 vmovdqu xmm10,XMMWORD[((160-128))+rdi]
1056 xor r13d,ecx
1057 add r9d,DWORD[40+rsp]
1058 mov r15d,r10d
1059 ror r14d,11
1060 xor r12d,r8d
1061 xor r15d,r11d
1062 ror r13d,6
1063 add r9d,r12d
1064 and esi,r15d
1065 xor r14d,r10d
1066 add r9d,r13d
1067 xor esi,r11d
1068 add ebx,r9d
1069 ror r14d,2
1070 add r9d,esi
1071 mov r13d,ebx
1072 add r14d,r9d
1073 ror r13d,14
1074 mov r9d,r14d
1075 mov r12d,ecx
1076 xor r13d,ebx
1077 ror r14d,9
1078 xor r12d,edx
1079 ror r13d,5
1080 xor r14d,r9d
1081 and r12d,ebx
1082 vaesenclast xmm11,xmm9,xmm10
1083 vaesenc xmm9,xmm9,xmm10
1084 vmovdqu xmm10,XMMWORD[((176-128))+rdi]
1085 xor r13d,ebx
1086 add r8d,DWORD[44+rsp]
1087 mov esi,r9d
1088 ror r14d,11
1089 xor r12d,edx
1090 xor esi,r10d
1091 ror r13d,6
1092 add r8d,r12d
1093 and r15d,esi
1094 xor r14d,r9d
1095 add r8d,r13d
1096 xor r15d,r10d
1097 add eax,r8d
1098 ror r14d,2
1099 add r8d,r15d
1100 mov r13d,eax
1101 add r14d,r8d
1102 ror r13d,14
1103 mov r8d,r14d
1104 mov r12d,ebx
1105 xor r13d,eax
1106 ror r14d,9
1107 xor r12d,ecx
1108 ror r13d,5
1109 xor r14d,r8d
1110 and r12d,eax
1111 vpand xmm8,xmm11,xmm12
1112 vaesenc xmm9,xmm9,xmm10
1113 vmovdqu xmm10,XMMWORD[((192-128))+rdi]
1114 xor r13d,eax
1115 add edx,DWORD[48+rsp]
1116 mov r15d,r8d
1117 ror r14d,11
1118 xor r12d,ecx
1119 xor r15d,r9d
1120 ror r13d,6
1121 add edx,r12d
1122 and esi,r15d
1123 xor r14d,r8d
1124 add edx,r13d
1125 xor esi,r9d
1126 add r11d,edx
1127 ror r14d,2
1128 add edx,esi
1129 mov r13d,r11d
1130 add r14d,edx
1131 ror r13d,14
1132 mov edx,r14d
1133 mov r12d,eax
1134 xor r13d,r11d
1135 ror r14d,9
1136 xor r12d,ebx
1137 ror r13d,5
1138 xor r14d,edx
1139 and r12d,r11d
1140 vaesenclast xmm11,xmm9,xmm10
1141 vaesenc xmm9,xmm9,xmm10
1142 vmovdqu xmm10,XMMWORD[((208-128))+rdi]
1143 xor r13d,r11d
1144 add ecx,DWORD[52+rsp]
1145 mov esi,edx
1146 ror r14d,11
1147 xor r12d,ebx
1148 xor esi,r8d
1149 ror r13d,6
1150 add ecx,r12d
1151 and r15d,esi
1152 xor r14d,edx
1153 add ecx,r13d
1154 xor r15d,r8d
1155 add r10d,ecx
1156 ror r14d,2
1157 add ecx,r15d
1158 mov r13d,r10d
1159 add r14d,ecx
1160 ror r13d,14
1161 mov ecx,r14d
1162 mov r12d,r11d
1163 xor r13d,r10d
1164 ror r14d,9
1165 xor r12d,eax
1166 ror r13d,5
1167 xor r14d,ecx
1168 and r12d,r10d
1169 vpand xmm11,xmm11,xmm13
1170 vaesenc xmm9,xmm9,xmm10
1171 vmovdqu xmm10,XMMWORD[((224-128))+rdi]
1172 xor r13d,r10d
1173 add ebx,DWORD[56+rsp]
1174 mov r15d,ecx
1175 ror r14d,11
1176 xor r12d,eax
1177 xor r15d,edx
1178 ror r13d,6
1179 add ebx,r12d
1180 and esi,r15d
1181 xor r14d,ecx
1182 add ebx,r13d
1183 xor esi,edx
1184 add r9d,ebx
1185 ror r14d,2
1186 add ebx,esi
1187 mov r13d,r9d
1188 add r14d,ebx
1189 ror r13d,14
1190 mov ebx,r14d
1191 mov r12d,r10d
1192 xor r13d,r9d
1193 ror r14d,9
1194 xor r12d,r11d
1195 ror r13d,5
1196 xor r14d,ebx
1197 and r12d,r9d
1198 vpor xmm8,xmm8,xmm11
1199 vaesenclast xmm11,xmm9,xmm10
1200 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
1201 xor r13d,r9d
1202 add eax,DWORD[60+rsp]
1203 mov esi,ebx
1204 ror r14d,11
1205 xor r12d,r11d
1206 xor esi,ecx
1207 ror r13d,6
1208 add eax,r12d
1209 and r15d,esi
1210 xor r14d,ebx
1211 add eax,r13d
1212 xor r15d,ecx
1213 add r8d,eax
1214 ror r14d,2
1215 add eax,r15d
1216 mov r13d,r8d
1217 add r14d,eax
1218 mov r12,QWORD[((64+0))+rsp]
1219 mov r13,QWORD[((64+8))+rsp]
1220 mov r15,QWORD[((64+40))+rsp]
1221 mov rsi,QWORD[((64+48))+rsp]
1222
1223 vpand xmm11,xmm11,xmm14
1224 mov eax,r14d
1225 vpor xmm8,xmm8,xmm11
1226 vmovdqu XMMWORD[r13*1+r12],xmm8
1227 lea r12,[16+r12]
1228
1229 add eax,DWORD[r15]
1230 add ebx,DWORD[4+r15]
1231 add ecx,DWORD[8+r15]
1232 add edx,DWORD[12+r15]
1233 add r8d,DWORD[16+r15]
1234 add r9d,DWORD[20+r15]
1235 add r10d,DWORD[24+r15]
1236 add r11d,DWORD[28+r15]
1237
1238 cmp r12,QWORD[((64+16))+rsp]
1239
1240 mov DWORD[r15],eax
1241 mov DWORD[4+r15],ebx
1242 mov DWORD[8+r15],ecx
1243 mov DWORD[12+r15],edx
1244 mov DWORD[16+r15],r8d
1245 mov DWORD[20+r15],r9d
1246 mov DWORD[24+r15],r10d
1247 mov DWORD[28+r15],r11d
1248
1249 jb NEAR $L$loop_xop
1250
1251 mov r8,QWORD[((64+32))+rsp]
1252 mov rsi,QWORD[120+rsp]
1253
1254 vmovdqu XMMWORD[r8],xmm8
1255 vzeroall
1256 movaps xmm6,XMMWORD[128+rsp]
1257 movaps xmm7,XMMWORD[144+rsp]
1258 movaps xmm8,XMMWORD[160+rsp]
1259 movaps xmm9,XMMWORD[176+rsp]
1260 movaps xmm10,XMMWORD[192+rsp]
1261 movaps xmm11,XMMWORD[208+rsp]
1262 movaps xmm12,XMMWORD[224+rsp]
1263 movaps xmm13,XMMWORD[240+rsp]
1264 movaps xmm14,XMMWORD[256+rsp]
1265 movaps xmm15,XMMWORD[272+rsp]
1266 mov r15,QWORD[((-48))+rsi]
1267
1268 mov r14,QWORD[((-40))+rsi]
1269
1270 mov r13,QWORD[((-32))+rsi]
1271
1272 mov r12,QWORD[((-24))+rsi]
1273
1274 mov rbp,QWORD[((-16))+rsi]
1275
1276 mov rbx,QWORD[((-8))+rsi]
1277
1278 lea rsp,[rsi]
1279
1280$L$epilogue_xop:
1281 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1282 mov rsi,QWORD[16+rsp]
1283 DB 0F3h,0C3h ;repret
1284
1285$L$SEH_end_aesni_cbc_sha256_enc_xop:
1286
1287ALIGN 64
1288aesni_cbc_sha256_enc_avx:
1289 mov QWORD[8+rsp],rdi ;WIN64 prologue
1290 mov QWORD[16+rsp],rsi
1291 mov rax,rsp
1292$L$SEH_begin_aesni_cbc_sha256_enc_avx:
1293 mov rdi,rcx
1294 mov rsi,rdx
1295 mov rdx,r8
1296 mov rcx,r9
1297 mov r8,QWORD[40+rsp]
1298 mov r9,QWORD[48+rsp]
1299
1300
1301
1302$L$avx_shortcut:
1303 mov r10,QWORD[56+rsp]
1304 mov rax,rsp
1305
1306 push rbx
1307
1308 push rbp
1309
1310 push r12
1311
1312 push r13
1313
1314 push r14
1315
1316 push r15
1317
1318 sub rsp,288
1319 and rsp,-64
1320
1321 shl rdx,6
1322 sub rsi,rdi
1323 sub r10,rdi
1324 add rdx,rdi
1325
1326
1327 mov QWORD[((64+8))+rsp],rsi
1328 mov QWORD[((64+16))+rsp],rdx
1329
1330 mov QWORD[((64+32))+rsp],r8
1331 mov QWORD[((64+40))+rsp],r9
1332 mov QWORD[((64+48))+rsp],r10
1333 mov QWORD[120+rsp],rax
1334
1335 movaps XMMWORD[128+rsp],xmm6
1336 movaps XMMWORD[144+rsp],xmm7
1337 movaps XMMWORD[160+rsp],xmm8
1338 movaps XMMWORD[176+rsp],xmm9
1339 movaps XMMWORD[192+rsp],xmm10
1340 movaps XMMWORD[208+rsp],xmm11
1341 movaps XMMWORD[224+rsp],xmm12
1342 movaps XMMWORD[240+rsp],xmm13
1343 movaps XMMWORD[256+rsp],xmm14
1344 movaps XMMWORD[272+rsp],xmm15
1345$L$prologue_avx:
1346 vzeroall
1347
1348 mov r12,rdi
1349 lea rdi,[128+rcx]
1350 lea r13,[((K256+544))]
1351 mov r14d,DWORD[((240-128))+rdi]
1352 mov r15,r9
1353 mov rsi,r10
1354 vmovdqu xmm8,XMMWORD[r8]
1355 sub r14,9
1356
1357 mov eax,DWORD[r15]
1358 mov ebx,DWORD[4+r15]
1359 mov ecx,DWORD[8+r15]
1360 mov edx,DWORD[12+r15]
1361 mov r8d,DWORD[16+r15]
1362 mov r9d,DWORD[20+r15]
1363 mov r10d,DWORD[24+r15]
1364 mov r11d,DWORD[28+r15]
1365
1366 vmovdqa xmm14,XMMWORD[r14*8+r13]
1367 vmovdqa xmm13,XMMWORD[16+r14*8+r13]
1368 vmovdqa xmm12,XMMWORD[32+r14*8+r13]
1369 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
1370 jmp NEAR $L$loop_avx
1371ALIGN 16
1372$L$loop_avx:
1373 vmovdqa xmm7,XMMWORD[((K256+512))]
1374 vmovdqu xmm0,XMMWORD[r12*1+rsi]
1375 vmovdqu xmm1,XMMWORD[16+r12*1+rsi]
1376 vmovdqu xmm2,XMMWORD[32+r12*1+rsi]
1377 vmovdqu xmm3,XMMWORD[48+r12*1+rsi]
1378 vpshufb xmm0,xmm0,xmm7
1379 lea rbp,[K256]
1380 vpshufb xmm1,xmm1,xmm7
1381 vpshufb xmm2,xmm2,xmm7
1382 vpaddd xmm4,xmm0,XMMWORD[rbp]
1383 vpshufb xmm3,xmm3,xmm7
1384 vpaddd xmm5,xmm1,XMMWORD[32+rbp]
1385 vpaddd xmm6,xmm2,XMMWORD[64+rbp]
1386 vpaddd xmm7,xmm3,XMMWORD[96+rbp]
1387 vmovdqa XMMWORD[rsp],xmm4
1388 mov r14d,eax
1389 vmovdqa XMMWORD[16+rsp],xmm5
1390 mov esi,ebx
1391 vmovdqa XMMWORD[32+rsp],xmm6
1392 xor esi,ecx
1393 vmovdqa XMMWORD[48+rsp],xmm7
1394 mov r13d,r8d
1395 jmp NEAR $L$avx_00_47
1396
1397ALIGN 16
1398$L$avx_00_47:
1399 sub rbp,-16*2*4
1400 vmovdqu xmm9,XMMWORD[r12]
1401 mov QWORD[((64+0))+rsp],r12
1402 vpalignr xmm4,xmm1,xmm0,4
1403 shrd r13d,r13d,14
1404 mov eax,r14d
1405 mov r12d,r9d
1406 vpalignr xmm7,xmm3,xmm2,4
1407 xor r13d,r8d
1408 shrd r14d,r14d,9
1409 xor r12d,r10d
1410 vpsrld xmm6,xmm4,7
1411 shrd r13d,r13d,5
1412 xor r14d,eax
1413 and r12d,r8d
1414 vpaddd xmm0,xmm0,xmm7
1415 vpxor xmm9,xmm9,xmm10
1416 vmovdqu xmm10,XMMWORD[((16-128))+rdi]
1417 xor r13d,r8d
1418 add r11d,DWORD[rsp]
1419 mov r15d,eax
1420 vpsrld xmm7,xmm4,3
1421 shrd r14d,r14d,11
1422 xor r12d,r10d
1423 xor r15d,ebx
1424 vpslld xmm5,xmm4,14
1425 shrd r13d,r13d,6
1426 add r11d,r12d
1427 and esi,r15d
1428 vpxor xmm4,xmm7,xmm6
1429 xor r14d,eax
1430 add r11d,r13d
1431 xor esi,ebx
1432 vpshufd xmm7,xmm3,250
1433 add edx,r11d
1434 shrd r14d,r14d,2
1435 add r11d,esi
1436 vpsrld xmm6,xmm6,11
1437 mov r13d,edx
1438 add r14d,r11d
1439 shrd r13d,r13d,14
1440 vpxor xmm4,xmm4,xmm5
1441 mov r11d,r14d
1442 mov r12d,r8d
1443 xor r13d,edx
1444 vpslld xmm5,xmm5,11
1445 shrd r14d,r14d,9
1446 xor r12d,r9d
1447 shrd r13d,r13d,5
1448 vpxor xmm4,xmm4,xmm6
1449 xor r14d,r11d
1450 and r12d,edx
1451 vpxor xmm9,xmm9,xmm8
1452 xor r13d,edx
1453 vpsrld xmm6,xmm7,10
1454 add r10d,DWORD[4+rsp]
1455 mov esi,r11d
1456 shrd r14d,r14d,11
1457 vpxor xmm4,xmm4,xmm5
1458 xor r12d,r9d
1459 xor esi,eax
1460 shrd r13d,r13d,6
1461 vpsrlq xmm7,xmm7,17
1462 add r10d,r12d
1463 and r15d,esi
1464 xor r14d,r11d
1465 vpaddd xmm0,xmm0,xmm4
1466 add r10d,r13d
1467 xor r15d,eax
1468 add ecx,r10d
1469 vpxor xmm6,xmm6,xmm7
1470 shrd r14d,r14d,2
1471 add r10d,r15d
1472 mov r13d,ecx
1473 vpsrlq xmm7,xmm7,2
1474 add r14d,r10d
1475 shrd r13d,r13d,14
1476 mov r10d,r14d
1477 vpxor xmm6,xmm6,xmm7
1478 mov r12d,edx
1479 xor r13d,ecx
1480 shrd r14d,r14d,9
1481 vpshufd xmm6,xmm6,132
1482 xor r12d,r8d
1483 shrd r13d,r13d,5
1484 xor r14d,r10d
1485 vpsrldq xmm6,xmm6,8
1486 and r12d,ecx
1487 vaesenc xmm9,xmm9,xmm10
1488 vmovdqu xmm10,XMMWORD[((32-128))+rdi]
1489 xor r13d,ecx
1490 add r9d,DWORD[8+rsp]
1491 vpaddd xmm0,xmm0,xmm6
1492 mov r15d,r10d
1493 shrd r14d,r14d,11
1494 xor r12d,r8d
1495 vpshufd xmm7,xmm0,80
1496 xor r15d,r11d
1497 shrd r13d,r13d,6
1498 add r9d,r12d
1499 vpsrld xmm6,xmm7,10
1500 and esi,r15d
1501 xor r14d,r10d
1502 add r9d,r13d
1503 vpsrlq xmm7,xmm7,17
1504 xor esi,r11d
1505 add ebx,r9d
1506 shrd r14d,r14d,2
1507 vpxor xmm6,xmm6,xmm7
1508 add r9d,esi
1509 mov r13d,ebx
1510 add r14d,r9d
1511 vpsrlq xmm7,xmm7,2
1512 shrd r13d,r13d,14
1513 mov r9d,r14d
1514 mov r12d,ecx
1515 vpxor xmm6,xmm6,xmm7
1516 xor r13d,ebx
1517 shrd r14d,r14d,9
1518 xor r12d,edx
1519 vpshufd xmm6,xmm6,232
1520 shrd r13d,r13d,5
1521 xor r14d,r9d
1522 and r12d,ebx
1523 vpslldq xmm6,xmm6,8
1524 vaesenc xmm9,xmm9,xmm10
1525 vmovdqu xmm10,XMMWORD[((48-128))+rdi]
1526 xor r13d,ebx
1527 add r8d,DWORD[12+rsp]
1528 mov esi,r9d
1529 vpaddd xmm0,xmm0,xmm6
1530 shrd r14d,r14d,11
1531 xor r12d,edx
1532 xor esi,r10d
1533 vpaddd xmm6,xmm0,XMMWORD[rbp]
1534 shrd r13d,r13d,6
1535 add r8d,r12d
1536 and r15d,esi
1537 xor r14d,r9d
1538 add r8d,r13d
1539 xor r15d,r10d
1540 add eax,r8d
1541 shrd r14d,r14d,2
1542 add r8d,r15d
1543 mov r13d,eax
1544 add r14d,r8d
1545 vmovdqa XMMWORD[rsp],xmm6
1546 vpalignr xmm4,xmm2,xmm1,4
1547 shrd r13d,r13d,14
1548 mov r8d,r14d
1549 mov r12d,ebx
1550 vpalignr xmm7,xmm0,xmm3,4
1551 xor r13d,eax
1552 shrd r14d,r14d,9
1553 xor r12d,ecx
1554 vpsrld xmm6,xmm4,7
1555 shrd r13d,r13d,5
1556 xor r14d,r8d
1557 and r12d,eax
1558 vpaddd xmm1,xmm1,xmm7
1559 vaesenc xmm9,xmm9,xmm10
1560 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
1561 xor r13d,eax
1562 add edx,DWORD[16+rsp]
1563 mov r15d,r8d
1564 vpsrld xmm7,xmm4,3
1565 shrd r14d,r14d,11
1566 xor r12d,ecx
1567 xor r15d,r9d
1568 vpslld xmm5,xmm4,14
1569 shrd r13d,r13d,6
1570 add edx,r12d
1571 and esi,r15d
1572 vpxor xmm4,xmm7,xmm6
1573 xor r14d,r8d
1574 add edx,r13d
1575 xor esi,r9d
1576 vpshufd xmm7,xmm0,250
1577 add r11d,edx
1578 shrd r14d,r14d,2
1579 add edx,esi
1580 vpsrld xmm6,xmm6,11
1581 mov r13d,r11d
1582 add r14d,edx
1583 shrd r13d,r13d,14
1584 vpxor xmm4,xmm4,xmm5
1585 mov edx,r14d
1586 mov r12d,eax
1587 xor r13d,r11d
1588 vpslld xmm5,xmm5,11
1589 shrd r14d,r14d,9
1590 xor r12d,ebx
1591 shrd r13d,r13d,5
1592 vpxor xmm4,xmm4,xmm6
1593 xor r14d,edx
1594 and r12d,r11d
1595 vaesenc xmm9,xmm9,xmm10
1596 vmovdqu xmm10,XMMWORD[((80-128))+rdi]
1597 xor r13d,r11d
1598 vpsrld xmm6,xmm7,10
1599 add ecx,DWORD[20+rsp]
1600 mov esi,edx
1601 shrd r14d,r14d,11
1602 vpxor xmm4,xmm4,xmm5
1603 xor r12d,ebx
1604 xor esi,r8d
1605 shrd r13d,r13d,6
1606 vpsrlq xmm7,xmm7,17
1607 add ecx,r12d
1608 and r15d,esi
1609 xor r14d,edx
1610 vpaddd xmm1,xmm1,xmm4
1611 add ecx,r13d
1612 xor r15d,r8d
1613 add r10d,ecx
1614 vpxor xmm6,xmm6,xmm7
1615 shrd r14d,r14d,2
1616 add ecx,r15d
1617 mov r13d,r10d
1618 vpsrlq xmm7,xmm7,2
1619 add r14d,ecx
1620 shrd r13d,r13d,14
1621 mov ecx,r14d
1622 vpxor xmm6,xmm6,xmm7
1623 mov r12d,r11d
1624 xor r13d,r10d
1625 shrd r14d,r14d,9
1626 vpshufd xmm6,xmm6,132
1627 xor r12d,eax
1628 shrd r13d,r13d,5
1629 xor r14d,ecx
1630 vpsrldq xmm6,xmm6,8
1631 and r12d,r10d
1632 vaesenc xmm9,xmm9,xmm10
1633 vmovdqu xmm10,XMMWORD[((96-128))+rdi]
1634 xor r13d,r10d
1635 add ebx,DWORD[24+rsp]
1636 vpaddd xmm1,xmm1,xmm6
1637 mov r15d,ecx
1638 shrd r14d,r14d,11
1639 xor r12d,eax
1640 vpshufd xmm7,xmm1,80
1641 xor r15d,edx
1642 shrd r13d,r13d,6
1643 add ebx,r12d
1644 vpsrld xmm6,xmm7,10
1645 and esi,r15d
1646 xor r14d,ecx
1647 add ebx,r13d
1648 vpsrlq xmm7,xmm7,17
1649 xor esi,edx
1650 add r9d,ebx
1651 shrd r14d,r14d,2
1652 vpxor xmm6,xmm6,xmm7
1653 add ebx,esi
1654 mov r13d,r9d
1655 add r14d,ebx
1656 vpsrlq xmm7,xmm7,2
1657 shrd r13d,r13d,14
1658 mov ebx,r14d
1659 mov r12d,r10d
1660 vpxor xmm6,xmm6,xmm7
1661 xor r13d,r9d
1662 shrd r14d,r14d,9
1663 xor r12d,r11d
1664 vpshufd xmm6,xmm6,232
1665 shrd r13d,r13d,5
1666 xor r14d,ebx
1667 and r12d,r9d
1668 vpslldq xmm6,xmm6,8
1669 vaesenc xmm9,xmm9,xmm10
1670 vmovdqu xmm10,XMMWORD[((112-128))+rdi]
1671 xor r13d,r9d
1672 add eax,DWORD[28+rsp]
1673 mov esi,ebx
1674 vpaddd xmm1,xmm1,xmm6
1675 shrd r14d,r14d,11
1676 xor r12d,r11d
1677 xor esi,ecx
1678 vpaddd xmm6,xmm1,XMMWORD[32+rbp]
1679 shrd r13d,r13d,6
1680 add eax,r12d
1681 and r15d,esi
1682 xor r14d,ebx
1683 add eax,r13d
1684 xor r15d,ecx
1685 add r8d,eax
1686 shrd r14d,r14d,2
1687 add eax,r15d
1688 mov r13d,r8d
1689 add r14d,eax
1690 vmovdqa XMMWORD[16+rsp],xmm6
1691 vpalignr xmm4,xmm3,xmm2,4
1692 shrd r13d,r13d,14
1693 mov eax,r14d
1694 mov r12d,r9d
1695 vpalignr xmm7,xmm1,xmm0,4
1696 xor r13d,r8d
1697 shrd r14d,r14d,9
1698 xor r12d,r10d
1699 vpsrld xmm6,xmm4,7
1700 shrd r13d,r13d,5
1701 xor r14d,eax
1702 and r12d,r8d
1703 vpaddd xmm2,xmm2,xmm7
1704 vaesenc xmm9,xmm9,xmm10
1705 vmovdqu xmm10,XMMWORD[((128-128))+rdi]
1706 xor r13d,r8d
1707 add r11d,DWORD[32+rsp]
1708 mov r15d,eax
1709 vpsrld xmm7,xmm4,3
1710 shrd r14d,r14d,11
1711 xor r12d,r10d
1712 xor r15d,ebx
1713 vpslld xmm5,xmm4,14
1714 shrd r13d,r13d,6
1715 add r11d,r12d
1716 and esi,r15d
1717 vpxor xmm4,xmm7,xmm6
1718 xor r14d,eax
1719 add r11d,r13d
1720 xor esi,ebx
1721 vpshufd xmm7,xmm1,250
1722 add edx,r11d
1723 shrd r14d,r14d,2
1724 add r11d,esi
1725 vpsrld xmm6,xmm6,11
1726 mov r13d,edx
1727 add r14d,r11d
1728 shrd r13d,r13d,14
1729 vpxor xmm4,xmm4,xmm5
1730 mov r11d,r14d
1731 mov r12d,r8d
1732 xor r13d,edx
1733 vpslld xmm5,xmm5,11
1734 shrd r14d,r14d,9
1735 xor r12d,r9d
1736 shrd r13d,r13d,5
1737 vpxor xmm4,xmm4,xmm6
1738 xor r14d,r11d
1739 and r12d,edx
1740 vaesenc xmm9,xmm9,xmm10
1741 vmovdqu xmm10,XMMWORD[((144-128))+rdi]
1742 xor r13d,edx
1743 vpsrld xmm6,xmm7,10
1744 add r10d,DWORD[36+rsp]
1745 mov esi,r11d
1746 shrd r14d,r14d,11
1747 vpxor xmm4,xmm4,xmm5
1748 xor r12d,r9d
1749 xor esi,eax
1750 shrd r13d,r13d,6
1751 vpsrlq xmm7,xmm7,17
1752 add r10d,r12d
1753 and r15d,esi
1754 xor r14d,r11d
1755 vpaddd xmm2,xmm2,xmm4
1756 add r10d,r13d
1757 xor r15d,eax
1758 add ecx,r10d
1759 vpxor xmm6,xmm6,xmm7
1760 shrd r14d,r14d,2
1761 add r10d,r15d
1762 mov r13d,ecx
1763 vpsrlq xmm7,xmm7,2
1764 add r14d,r10d
1765 shrd r13d,r13d,14
1766 mov r10d,r14d
1767 vpxor xmm6,xmm6,xmm7
1768 mov r12d,edx
1769 xor r13d,ecx
1770 shrd r14d,r14d,9
1771 vpshufd xmm6,xmm6,132
1772 xor r12d,r8d
1773 shrd r13d,r13d,5
1774 xor r14d,r10d
1775 vpsrldq xmm6,xmm6,8
1776 and r12d,ecx
1777 vaesenc xmm9,xmm9,xmm10
1778 vmovdqu xmm10,XMMWORD[((160-128))+rdi]
1779 xor r13d,ecx
1780 add r9d,DWORD[40+rsp]
1781 vpaddd xmm2,xmm2,xmm6
1782 mov r15d,r10d
1783 shrd r14d,r14d,11
1784 xor r12d,r8d
1785 vpshufd xmm7,xmm2,80
1786 xor r15d,r11d
1787 shrd r13d,r13d,6
1788 add r9d,r12d
1789 vpsrld xmm6,xmm7,10
1790 and esi,r15d
1791 xor r14d,r10d
1792 add r9d,r13d
1793 vpsrlq xmm7,xmm7,17
1794 xor esi,r11d
1795 add ebx,r9d
1796 shrd r14d,r14d,2
1797 vpxor xmm6,xmm6,xmm7
1798 add r9d,esi
1799 mov r13d,ebx
1800 add r14d,r9d
1801 vpsrlq xmm7,xmm7,2
1802 shrd r13d,r13d,14
1803 mov r9d,r14d
1804 mov r12d,ecx
1805 vpxor xmm6,xmm6,xmm7
1806 xor r13d,ebx
1807 shrd r14d,r14d,9
1808 xor r12d,edx
1809 vpshufd xmm6,xmm6,232
1810 shrd r13d,r13d,5
1811 xor r14d,r9d
1812 and r12d,ebx
1813 vpslldq xmm6,xmm6,8
1814 vaesenclast xmm11,xmm9,xmm10
1815 vaesenc xmm9,xmm9,xmm10
1816 vmovdqu xmm10,XMMWORD[((176-128))+rdi]
1817 xor r13d,ebx
1818 add r8d,DWORD[44+rsp]
1819 mov esi,r9d
1820 vpaddd xmm2,xmm2,xmm6
1821 shrd r14d,r14d,11
1822 xor r12d,edx
1823 xor esi,r10d
1824 vpaddd xmm6,xmm2,XMMWORD[64+rbp]
1825 shrd r13d,r13d,6
1826 add r8d,r12d
1827 and r15d,esi
1828 xor r14d,r9d
1829 add r8d,r13d
1830 xor r15d,r10d
1831 add eax,r8d
1832 shrd r14d,r14d,2
1833 add r8d,r15d
1834 mov r13d,eax
1835 add r14d,r8d
1836 vmovdqa XMMWORD[32+rsp],xmm6
1837 vpalignr xmm4,xmm0,xmm3,4
1838 shrd r13d,r13d,14
1839 mov r8d,r14d
1840 mov r12d,ebx
1841 vpalignr xmm7,xmm2,xmm1,4
1842 xor r13d,eax
1843 shrd r14d,r14d,9
1844 xor r12d,ecx
1845 vpsrld xmm6,xmm4,7
1846 shrd r13d,r13d,5
1847 xor r14d,r8d
1848 and r12d,eax
1849 vpaddd xmm3,xmm3,xmm7
1850 vpand xmm8,xmm11,xmm12
1851 vaesenc xmm9,xmm9,xmm10
1852 vmovdqu xmm10,XMMWORD[((192-128))+rdi]
1853 xor r13d,eax
1854 add edx,DWORD[48+rsp]
1855 mov r15d,r8d
1856 vpsrld xmm7,xmm4,3
1857 shrd r14d,r14d,11
1858 xor r12d,ecx
1859 xor r15d,r9d
1860 vpslld xmm5,xmm4,14
1861 shrd r13d,r13d,6
1862 add edx,r12d
1863 and esi,r15d
1864 vpxor xmm4,xmm7,xmm6
1865 xor r14d,r8d
1866 add edx,r13d
1867 xor esi,r9d
1868 vpshufd xmm7,xmm2,250
1869 add r11d,edx
1870 shrd r14d,r14d,2
1871 add edx,esi
1872 vpsrld xmm6,xmm6,11
1873 mov r13d,r11d
1874 add r14d,edx
1875 shrd r13d,r13d,14
1876 vpxor xmm4,xmm4,xmm5
1877 mov edx,r14d
1878 mov r12d,eax
1879 xor r13d,r11d
1880 vpslld xmm5,xmm5,11
1881 shrd r14d,r14d,9
1882 xor r12d,ebx
1883 shrd r13d,r13d,5
1884 vpxor xmm4,xmm4,xmm6
1885 xor r14d,edx
1886 and r12d,r11d
1887 vaesenclast xmm11,xmm9,xmm10
1888 vaesenc xmm9,xmm9,xmm10
1889 vmovdqu xmm10,XMMWORD[((208-128))+rdi]
1890 xor r13d,r11d
1891 vpsrld xmm6,xmm7,10
1892 add ecx,DWORD[52+rsp]
1893 mov esi,edx
1894 shrd r14d,r14d,11
1895 vpxor xmm4,xmm4,xmm5
1896 xor r12d,ebx
1897 xor esi,r8d
1898 shrd r13d,r13d,6
1899 vpsrlq xmm7,xmm7,17
1900 add ecx,r12d
1901 and r15d,esi
1902 xor r14d,edx
1903 vpaddd xmm3,xmm3,xmm4
1904 add ecx,r13d
1905 xor r15d,r8d
1906 add r10d,ecx
1907 vpxor xmm6,xmm6,xmm7
1908 shrd r14d,r14d,2
1909 add ecx,r15d
1910 mov r13d,r10d
1911 vpsrlq xmm7,xmm7,2
1912 add r14d,ecx
1913 shrd r13d,r13d,14
1914 mov ecx,r14d
1915 vpxor xmm6,xmm6,xmm7
1916 mov r12d,r11d
1917 xor r13d,r10d
1918 shrd r14d,r14d,9
1919 vpshufd xmm6,xmm6,132
1920 xor r12d,eax
1921 shrd r13d,r13d,5
1922 xor r14d,ecx
1923 vpsrldq xmm6,xmm6,8
1924 and r12d,r10d
1925 vpand xmm11,xmm11,xmm13
1926 vaesenc xmm9,xmm9,xmm10
1927 vmovdqu xmm10,XMMWORD[((224-128))+rdi]
1928 xor r13d,r10d
1929 add ebx,DWORD[56+rsp]
1930 vpaddd xmm3,xmm3,xmm6
1931 mov r15d,ecx
1932 shrd r14d,r14d,11
1933 xor r12d,eax
1934 vpshufd xmm7,xmm3,80
1935 xor r15d,edx
1936 shrd r13d,r13d,6
1937 add ebx,r12d
1938 vpsrld xmm6,xmm7,10
1939 and esi,r15d
1940 xor r14d,ecx
1941 add ebx,r13d
1942 vpsrlq xmm7,xmm7,17
1943 xor esi,edx
1944 add r9d,ebx
1945 shrd r14d,r14d,2
1946 vpxor xmm6,xmm6,xmm7
1947 add ebx,esi
1948 mov r13d,r9d
1949 add r14d,ebx
1950 vpsrlq xmm7,xmm7,2
1951 shrd r13d,r13d,14
1952 mov ebx,r14d
1953 mov r12d,r10d
1954 vpxor xmm6,xmm6,xmm7
1955 xor r13d,r9d
1956 shrd r14d,r14d,9
1957 xor r12d,r11d
1958 vpshufd xmm6,xmm6,232
1959 shrd r13d,r13d,5
1960 xor r14d,ebx
1961 and r12d,r9d
1962 vpslldq xmm6,xmm6,8
1963 vpor xmm8,xmm8,xmm11
1964 vaesenclast xmm11,xmm9,xmm10
1965 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
1966 xor r13d,r9d
1967 add eax,DWORD[60+rsp]
1968 mov esi,ebx
1969 vpaddd xmm3,xmm3,xmm6
1970 shrd r14d,r14d,11
1971 xor r12d,r11d
1972 xor esi,ecx
1973 vpaddd xmm6,xmm3,XMMWORD[96+rbp]
1974 shrd r13d,r13d,6
1975 add eax,r12d
1976 and r15d,esi
1977 xor r14d,ebx
1978 add eax,r13d
1979 xor r15d,ecx
1980 add r8d,eax
1981 shrd r14d,r14d,2
1982 add eax,r15d
1983 mov r13d,r8d
1984 add r14d,eax
1985 vmovdqa XMMWORD[48+rsp],xmm6
1986 mov r12,QWORD[((64+0))+rsp]
1987 vpand xmm11,xmm11,xmm14
1988 mov r15,QWORD[((64+8))+rsp]
1989 vpor xmm8,xmm8,xmm11
1990 vmovdqu XMMWORD[r12*1+r15],xmm8
1991 lea r12,[16+r12]
1992 cmp BYTE[131+rbp],0
1993 jne NEAR $L$avx_00_47
1994 vmovdqu xmm9,XMMWORD[r12]
1995 mov QWORD[((64+0))+rsp],r12
1996 shrd r13d,r13d,14
1997 mov eax,r14d
1998 mov r12d,r9d
1999 xor r13d,r8d
2000 shrd r14d,r14d,9
2001 xor r12d,r10d
2002 shrd r13d,r13d,5
2003 xor r14d,eax
2004 and r12d,r8d
2005 vpxor xmm9,xmm9,xmm10
2006 vmovdqu xmm10,XMMWORD[((16-128))+rdi]
2007 xor r13d,r8d
2008 add r11d,DWORD[rsp]
2009 mov r15d,eax
2010 shrd r14d,r14d,11
2011 xor r12d,r10d
2012 xor r15d,ebx
2013 shrd r13d,r13d,6
2014 add r11d,r12d
2015 and esi,r15d
2016 xor r14d,eax
2017 add r11d,r13d
2018 xor esi,ebx
2019 add edx,r11d
2020 shrd r14d,r14d,2
2021 add r11d,esi
2022 mov r13d,edx
2023 add r14d,r11d
2024 shrd r13d,r13d,14
2025 mov r11d,r14d
2026 mov r12d,r8d
2027 xor r13d,edx
2028 shrd r14d,r14d,9
2029 xor r12d,r9d
2030 shrd r13d,r13d,5
2031 xor r14d,r11d
2032 and r12d,edx
2033 vpxor xmm9,xmm9,xmm8
2034 xor r13d,edx
2035 add r10d,DWORD[4+rsp]
2036 mov esi,r11d
2037 shrd r14d,r14d,11
2038 xor r12d,r9d
2039 xor esi,eax
2040 shrd r13d,r13d,6
2041 add r10d,r12d
2042 and r15d,esi
2043 xor r14d,r11d
2044 add r10d,r13d
2045 xor r15d,eax
2046 add ecx,r10d
2047 shrd r14d,r14d,2
2048 add r10d,r15d
2049 mov r13d,ecx
2050 add r14d,r10d
2051 shrd r13d,r13d,14
2052 mov r10d,r14d
2053 mov r12d,edx
2054 xor r13d,ecx
2055 shrd r14d,r14d,9
2056 xor r12d,r8d
2057 shrd r13d,r13d,5
2058 xor r14d,r10d
2059 and r12d,ecx
2060 vaesenc xmm9,xmm9,xmm10
2061 vmovdqu xmm10,XMMWORD[((32-128))+rdi]
2062 xor r13d,ecx
2063 add r9d,DWORD[8+rsp]
2064 mov r15d,r10d
2065 shrd r14d,r14d,11
2066 xor r12d,r8d
2067 xor r15d,r11d
2068 shrd r13d,r13d,6
2069 add r9d,r12d
2070 and esi,r15d
2071 xor r14d,r10d
2072 add r9d,r13d
2073 xor esi,r11d
2074 add ebx,r9d
2075 shrd r14d,r14d,2
2076 add r9d,esi
2077 mov r13d,ebx
2078 add r14d,r9d
2079 shrd r13d,r13d,14
2080 mov r9d,r14d
2081 mov r12d,ecx
2082 xor r13d,ebx
2083 shrd r14d,r14d,9
2084 xor r12d,edx
2085 shrd r13d,r13d,5
2086 xor r14d,r9d
2087 and r12d,ebx
2088 vaesenc xmm9,xmm9,xmm10
2089 vmovdqu xmm10,XMMWORD[((48-128))+rdi]
2090 xor r13d,ebx
2091 add r8d,DWORD[12+rsp]
2092 mov esi,r9d
2093 shrd r14d,r14d,11
2094 xor r12d,edx
2095 xor esi,r10d
2096 shrd r13d,r13d,6
2097 add r8d,r12d
2098 and r15d,esi
2099 xor r14d,r9d
2100 add r8d,r13d
2101 xor r15d,r10d
2102 add eax,r8d
2103 shrd r14d,r14d,2
2104 add r8d,r15d
2105 mov r13d,eax
2106 add r14d,r8d
2107 shrd r13d,r13d,14
2108 mov r8d,r14d
2109 mov r12d,ebx
2110 xor r13d,eax
2111 shrd r14d,r14d,9
2112 xor r12d,ecx
2113 shrd r13d,r13d,5
2114 xor r14d,r8d
2115 and r12d,eax
2116 vaesenc xmm9,xmm9,xmm10
2117 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
2118 xor r13d,eax
2119 add edx,DWORD[16+rsp]
2120 mov r15d,r8d
2121 shrd r14d,r14d,11
2122 xor r12d,ecx
2123 xor r15d,r9d
2124 shrd r13d,r13d,6
2125 add edx,r12d
2126 and esi,r15d
2127 xor r14d,r8d
2128 add edx,r13d
2129 xor esi,r9d
2130 add r11d,edx
2131 shrd r14d,r14d,2
2132 add edx,esi
2133 mov r13d,r11d
2134 add r14d,edx
2135 shrd r13d,r13d,14
2136 mov edx,r14d
2137 mov r12d,eax
2138 xor r13d,r11d
2139 shrd r14d,r14d,9
2140 xor r12d,ebx
2141 shrd r13d,r13d,5
2142 xor r14d,edx
2143 and r12d,r11d
2144 vaesenc xmm9,xmm9,xmm10
2145 vmovdqu xmm10,XMMWORD[((80-128))+rdi]
2146 xor r13d,r11d
2147 add ecx,DWORD[20+rsp]
2148 mov esi,edx
2149 shrd r14d,r14d,11
2150 xor r12d,ebx
2151 xor esi,r8d
2152 shrd r13d,r13d,6
2153 add ecx,r12d
2154 and r15d,esi
2155 xor r14d,edx
2156 add ecx,r13d
2157 xor r15d,r8d
2158 add r10d,ecx
2159 shrd r14d,r14d,2
2160 add ecx,r15d
2161 mov r13d,r10d
2162 add r14d,ecx
2163 shrd r13d,r13d,14
2164 mov ecx,r14d
2165 mov r12d,r11d
2166 xor r13d,r10d
2167 shrd r14d,r14d,9
2168 xor r12d,eax
2169 shrd r13d,r13d,5
2170 xor r14d,ecx
2171 and r12d,r10d
2172 vaesenc xmm9,xmm9,xmm10
2173 vmovdqu xmm10,XMMWORD[((96-128))+rdi]
2174 xor r13d,r10d
2175 add ebx,DWORD[24+rsp]
2176 mov r15d,ecx
2177 shrd r14d,r14d,11
2178 xor r12d,eax
2179 xor r15d,edx
2180 shrd r13d,r13d,6
2181 add ebx,r12d
2182 and esi,r15d
2183 xor r14d,ecx
2184 add ebx,r13d
2185 xor esi,edx
2186 add r9d,ebx
2187 shrd r14d,r14d,2
2188 add ebx,esi
2189 mov r13d,r9d
2190 add r14d,ebx
2191 shrd r13d,r13d,14
2192 mov ebx,r14d
2193 mov r12d,r10d
2194 xor r13d,r9d
2195 shrd r14d,r14d,9
2196 xor r12d,r11d
2197 shrd r13d,r13d,5
2198 xor r14d,ebx
2199 and r12d,r9d
2200 vaesenc xmm9,xmm9,xmm10
2201 vmovdqu xmm10,XMMWORD[((112-128))+rdi]
2202 xor r13d,r9d
2203 add eax,DWORD[28+rsp]
2204 mov esi,ebx
2205 shrd r14d,r14d,11
2206 xor r12d,r11d
2207 xor esi,ecx
2208 shrd r13d,r13d,6
2209 add eax,r12d
2210 and r15d,esi
2211 xor r14d,ebx
2212 add eax,r13d
2213 xor r15d,ecx
2214 add r8d,eax
2215 shrd r14d,r14d,2
2216 add eax,r15d
2217 mov r13d,r8d
2218 add r14d,eax
2219 shrd r13d,r13d,14
2220 mov eax,r14d
2221 mov r12d,r9d
2222 xor r13d,r8d
2223 shrd r14d,r14d,9
2224 xor r12d,r10d
2225 shrd r13d,r13d,5
2226 xor r14d,eax
2227 and r12d,r8d
2228 vaesenc xmm9,xmm9,xmm10
2229 vmovdqu xmm10,XMMWORD[((128-128))+rdi]
2230 xor r13d,r8d
2231 add r11d,DWORD[32+rsp]
2232 mov r15d,eax
2233 shrd r14d,r14d,11
2234 xor r12d,r10d
2235 xor r15d,ebx
2236 shrd r13d,r13d,6
2237 add r11d,r12d
2238 and esi,r15d
2239 xor r14d,eax
2240 add r11d,r13d
2241 xor esi,ebx
2242 add edx,r11d
2243 shrd r14d,r14d,2
2244 add r11d,esi
2245 mov r13d,edx
2246 add r14d,r11d
2247 shrd r13d,r13d,14
2248 mov r11d,r14d
2249 mov r12d,r8d
2250 xor r13d,edx
2251 shrd r14d,r14d,9
2252 xor r12d,r9d
2253 shrd r13d,r13d,5
2254 xor r14d,r11d
2255 and r12d,edx
2256 vaesenc xmm9,xmm9,xmm10
2257 vmovdqu xmm10,XMMWORD[((144-128))+rdi]
2258 xor r13d,edx
2259 add r10d,DWORD[36+rsp]
2260 mov esi,r11d
2261 shrd r14d,r14d,11
2262 xor r12d,r9d
2263 xor esi,eax
2264 shrd r13d,r13d,6
2265 add r10d,r12d
2266 and r15d,esi
2267 xor r14d,r11d
2268 add r10d,r13d
2269 xor r15d,eax
2270 add ecx,r10d
2271 shrd r14d,r14d,2
2272 add r10d,r15d
2273 mov r13d,ecx
2274 add r14d,r10d
2275 shrd r13d,r13d,14
2276 mov r10d,r14d
2277 mov r12d,edx
2278 xor r13d,ecx
2279 shrd r14d,r14d,9
2280 xor r12d,r8d
2281 shrd r13d,r13d,5
2282 xor r14d,r10d
2283 and r12d,ecx
2284 vaesenc xmm9,xmm9,xmm10
2285 vmovdqu xmm10,XMMWORD[((160-128))+rdi]
2286 xor r13d,ecx
2287 add r9d,DWORD[40+rsp]
2288 mov r15d,r10d
2289 shrd r14d,r14d,11
2290 xor r12d,r8d
2291 xor r15d,r11d
2292 shrd r13d,r13d,6
2293 add r9d,r12d
2294 and esi,r15d
2295 xor r14d,r10d
2296 add r9d,r13d
2297 xor esi,r11d
2298 add ebx,r9d
2299 shrd r14d,r14d,2
2300 add r9d,esi
2301 mov r13d,ebx
2302 add r14d,r9d
2303 shrd r13d,r13d,14
2304 mov r9d,r14d
2305 mov r12d,ecx
2306 xor r13d,ebx
2307 shrd r14d,r14d,9
2308 xor r12d,edx
2309 shrd r13d,r13d,5
2310 xor r14d,r9d
2311 and r12d,ebx
2312 vaesenclast xmm11,xmm9,xmm10
2313 vaesenc xmm9,xmm9,xmm10
2314 vmovdqu xmm10,XMMWORD[((176-128))+rdi]
2315 xor r13d,ebx
2316 add r8d,DWORD[44+rsp]
2317 mov esi,r9d
2318 shrd r14d,r14d,11
2319 xor r12d,edx
2320 xor esi,r10d
2321 shrd r13d,r13d,6
2322 add r8d,r12d
2323 and r15d,esi
2324 xor r14d,r9d
2325 add r8d,r13d
2326 xor r15d,r10d
2327 add eax,r8d
2328 shrd r14d,r14d,2
2329 add r8d,r15d
2330 mov r13d,eax
2331 add r14d,r8d
2332 shrd r13d,r13d,14
2333 mov r8d,r14d
2334 mov r12d,ebx
2335 xor r13d,eax
2336 shrd r14d,r14d,9
2337 xor r12d,ecx
2338 shrd r13d,r13d,5
2339 xor r14d,r8d
2340 and r12d,eax
2341 vpand xmm8,xmm11,xmm12
2342 vaesenc xmm9,xmm9,xmm10
2343 vmovdqu xmm10,XMMWORD[((192-128))+rdi]
2344 xor r13d,eax
2345 add edx,DWORD[48+rsp]
2346 mov r15d,r8d
2347 shrd r14d,r14d,11
2348 xor r12d,ecx
2349 xor r15d,r9d
2350 shrd r13d,r13d,6
2351 add edx,r12d
2352 and esi,r15d
2353 xor r14d,r8d
2354 add edx,r13d
2355 xor esi,r9d
2356 add r11d,edx
2357 shrd r14d,r14d,2
2358 add edx,esi
2359 mov r13d,r11d
2360 add r14d,edx
2361 shrd r13d,r13d,14
2362 mov edx,r14d
2363 mov r12d,eax
2364 xor r13d,r11d
2365 shrd r14d,r14d,9
2366 xor r12d,ebx
2367 shrd r13d,r13d,5
2368 xor r14d,edx
2369 and r12d,r11d
2370 vaesenclast xmm11,xmm9,xmm10
2371 vaesenc xmm9,xmm9,xmm10
2372 vmovdqu xmm10,XMMWORD[((208-128))+rdi]
2373 xor r13d,r11d
2374 add ecx,DWORD[52+rsp]
2375 mov esi,edx
2376 shrd r14d,r14d,11
2377 xor r12d,ebx
2378 xor esi,r8d
2379 shrd r13d,r13d,6
2380 add ecx,r12d
2381 and r15d,esi
2382 xor r14d,edx
2383 add ecx,r13d
2384 xor r15d,r8d
2385 add r10d,ecx
2386 shrd r14d,r14d,2
2387 add ecx,r15d
2388 mov r13d,r10d
2389 add r14d,ecx
2390 shrd r13d,r13d,14
2391 mov ecx,r14d
2392 mov r12d,r11d
2393 xor r13d,r10d
2394 shrd r14d,r14d,9
2395 xor r12d,eax
2396 shrd r13d,r13d,5
2397 xor r14d,ecx
2398 and r12d,r10d
2399 vpand xmm11,xmm11,xmm13
2400 vaesenc xmm9,xmm9,xmm10
2401 vmovdqu xmm10,XMMWORD[((224-128))+rdi]
2402 xor r13d,r10d
2403 add ebx,DWORD[56+rsp]
2404 mov r15d,ecx
2405 shrd r14d,r14d,11
2406 xor r12d,eax
2407 xor r15d,edx
2408 shrd r13d,r13d,6
2409 add ebx,r12d
2410 and esi,r15d
2411 xor r14d,ecx
2412 add ebx,r13d
2413 xor esi,edx
2414 add r9d,ebx
2415 shrd r14d,r14d,2
2416 add ebx,esi
2417 mov r13d,r9d
2418 add r14d,ebx
2419 shrd r13d,r13d,14
2420 mov ebx,r14d
2421 mov r12d,r10d
2422 xor r13d,r9d
2423 shrd r14d,r14d,9
2424 xor r12d,r11d
2425 shrd r13d,r13d,5
2426 xor r14d,ebx
2427 and r12d,r9d
2428 vpor xmm8,xmm8,xmm11
2429 vaesenclast xmm11,xmm9,xmm10
2430 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
2431 xor r13d,r9d
2432 add eax,DWORD[60+rsp]
2433 mov esi,ebx
2434 shrd r14d,r14d,11
2435 xor r12d,r11d
2436 xor esi,ecx
2437 shrd r13d,r13d,6
2438 add eax,r12d
2439 and r15d,esi
2440 xor r14d,ebx
2441 add eax,r13d
2442 xor r15d,ecx
2443 add r8d,eax
2444 shrd r14d,r14d,2
2445 add eax,r15d
2446 mov r13d,r8d
2447 add r14d,eax
2448 mov r12,QWORD[((64+0))+rsp]
2449 mov r13,QWORD[((64+8))+rsp]
2450 mov r15,QWORD[((64+40))+rsp]
2451 mov rsi,QWORD[((64+48))+rsp]
2452
2453 vpand xmm11,xmm11,xmm14
2454 mov eax,r14d
2455 vpor xmm8,xmm8,xmm11
2456 vmovdqu XMMWORD[r13*1+r12],xmm8
2457 lea r12,[16+r12]
2458
2459 add eax,DWORD[r15]
2460 add ebx,DWORD[4+r15]
2461 add ecx,DWORD[8+r15]
2462 add edx,DWORD[12+r15]
2463 add r8d,DWORD[16+r15]
2464 add r9d,DWORD[20+r15]
2465 add r10d,DWORD[24+r15]
2466 add r11d,DWORD[28+r15]
2467
2468 cmp r12,QWORD[((64+16))+rsp]
2469
2470 mov DWORD[r15],eax
2471 mov DWORD[4+r15],ebx
2472 mov DWORD[8+r15],ecx
2473 mov DWORD[12+r15],edx
2474 mov DWORD[16+r15],r8d
2475 mov DWORD[20+r15],r9d
2476 mov DWORD[24+r15],r10d
2477 mov DWORD[28+r15],r11d
2478 jb NEAR $L$loop_avx
2479
2480 mov r8,QWORD[((64+32))+rsp]
2481 mov rsi,QWORD[120+rsp]
2482
2483 vmovdqu XMMWORD[r8],xmm8
2484 vzeroall
2485 movaps xmm6,XMMWORD[128+rsp]
2486 movaps xmm7,XMMWORD[144+rsp]
2487 movaps xmm8,XMMWORD[160+rsp]
2488 movaps xmm9,XMMWORD[176+rsp]
2489 movaps xmm10,XMMWORD[192+rsp]
2490 movaps xmm11,XMMWORD[208+rsp]
2491 movaps xmm12,XMMWORD[224+rsp]
2492 movaps xmm13,XMMWORD[240+rsp]
2493 movaps xmm14,XMMWORD[256+rsp]
2494 movaps xmm15,XMMWORD[272+rsp]
2495 mov r15,QWORD[((-48))+rsi]
2496
2497 mov r14,QWORD[((-40))+rsi]
2498
2499 mov r13,QWORD[((-32))+rsi]
2500
2501 mov r12,QWORD[((-24))+rsi]
2502
2503 mov rbp,QWORD[((-16))+rsi]
2504
2505 mov rbx,QWORD[((-8))+rsi]
2506
2507 lea rsp,[rsi]
2508
2509$L$epilogue_avx:
2510 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2511 mov rsi,QWORD[16+rsp]
2512 DB 0F3h,0C3h ;repret
2513
2514$L$SEH_end_aesni_cbc_sha256_enc_avx:
2515
2516ALIGN 64
2517aesni_cbc_sha256_enc_avx2:
2518 mov QWORD[8+rsp],rdi ;WIN64 prologue
2519 mov QWORD[16+rsp],rsi
2520 mov rax,rsp
2521$L$SEH_begin_aesni_cbc_sha256_enc_avx2:
2522 mov rdi,rcx
2523 mov rsi,rdx
2524 mov rdx,r8
2525 mov rcx,r9
2526 mov r8,QWORD[40+rsp]
2527 mov r9,QWORD[48+rsp]
2528
2529
2530
2531$L$avx2_shortcut:
2532 mov r10,QWORD[56+rsp]
2533 mov rax,rsp
2534
2535 push rbx
2536
2537 push rbp
2538
2539 push r12
2540
2541 push r13
2542
2543 push r14
2544
2545 push r15
2546
2547 sub rsp,736
2548 and rsp,-256*4
2549 add rsp,448
2550
2551 shl rdx,6
2552 sub rsi,rdi
2553 sub r10,rdi
2554 add rdx,rdi
2555
2556
2557
2558 mov QWORD[((64+16))+rsp],rdx
2559
2560 mov QWORD[((64+32))+rsp],r8
2561 mov QWORD[((64+40))+rsp],r9
2562 mov QWORD[((64+48))+rsp],r10
2563 mov QWORD[120+rsp],rax
2564
2565 movaps XMMWORD[128+rsp],xmm6
2566 movaps XMMWORD[144+rsp],xmm7
2567 movaps XMMWORD[160+rsp],xmm8
2568 movaps XMMWORD[176+rsp],xmm9
2569 movaps XMMWORD[192+rsp],xmm10
2570 movaps XMMWORD[208+rsp],xmm11
2571 movaps XMMWORD[224+rsp],xmm12
2572 movaps XMMWORD[240+rsp],xmm13
2573 movaps XMMWORD[256+rsp],xmm14
2574 movaps XMMWORD[272+rsp],xmm15
2575$L$prologue_avx2:
2576 vzeroall
2577
2578 mov r13,rdi
2579 vpinsrq xmm15,xmm15,rsi,1
2580 lea rdi,[128+rcx]
2581 lea r12,[((K256+544))]
2582 mov r14d,DWORD[((240-128))+rdi]
2583 mov r15,r9
2584 mov rsi,r10
2585 vmovdqu xmm8,XMMWORD[r8]
2586 lea r14,[((-9))+r14]
2587
2588 vmovdqa xmm14,XMMWORD[r14*8+r12]
2589 vmovdqa xmm13,XMMWORD[16+r14*8+r12]
2590 vmovdqa xmm12,XMMWORD[32+r14*8+r12]
2591
2592 sub r13,-16*4
2593 mov eax,DWORD[r15]
2594 lea r12,[r13*1+rsi]
2595 mov ebx,DWORD[4+r15]
2596 cmp r13,rdx
2597 mov ecx,DWORD[8+r15]
2598 cmove r12,rsp
2599 mov edx,DWORD[12+r15]
2600 mov r8d,DWORD[16+r15]
2601 mov r9d,DWORD[20+r15]
2602 mov r10d,DWORD[24+r15]
2603 mov r11d,DWORD[28+r15]
2604 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
2605 jmp NEAR $L$oop_avx2
2606ALIGN 16
2607$L$oop_avx2:
2608 vmovdqa ymm7,YMMWORD[((K256+512))]
2609 vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi]
2610 vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi]
2611 vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi]
2612 vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi]
2613
2614 vinserti128 ymm0,ymm0,XMMWORD[r12],1
2615 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
2616 vpshufb ymm0,ymm0,ymm7
2617 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
2618 vpshufb ymm1,ymm1,ymm7
2619 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
2620
2621 lea rbp,[K256]
2622 vpshufb ymm2,ymm2,ymm7
2623 lea r13,[((-64))+r13]
2624 vpaddd ymm4,ymm0,YMMWORD[rbp]
2625 vpshufb ymm3,ymm3,ymm7
2626 vpaddd ymm5,ymm1,YMMWORD[32+rbp]
2627 vpaddd ymm6,ymm2,YMMWORD[64+rbp]
2628 vpaddd ymm7,ymm3,YMMWORD[96+rbp]
2629 vmovdqa YMMWORD[rsp],ymm4
2630 xor r14d,r14d
2631 vmovdqa YMMWORD[32+rsp],ymm5
2632 lea rsp,[((-64))+rsp]
2633 mov esi,ebx
2634 vmovdqa YMMWORD[rsp],ymm6
2635 xor esi,ecx
2636 vmovdqa YMMWORD[32+rsp],ymm7
2637 mov r12d,r9d
2638 sub rbp,-16*2*4
2639 jmp NEAR $L$avx2_00_47
2640
2641ALIGN 16
2642$L$avx2_00_47:
2643 vmovdqu xmm9,XMMWORD[r13]
2644 vpinsrq xmm15,xmm15,r13,0
2645 lea rsp,[((-64))+rsp]
2646 vpalignr ymm4,ymm1,ymm0,4
2647 add r11d,DWORD[((0+128))+rsp]
2648 and r12d,r8d
2649 rorx r13d,r8d,25
2650 vpalignr ymm7,ymm3,ymm2,4
2651 rorx r15d,r8d,11
2652 lea eax,[r14*1+rax]
2653 lea r11d,[r12*1+r11]
2654 vpsrld ymm6,ymm4,7
2655 andn r12d,r8d,r10d
2656 xor r13d,r15d
2657 rorx r14d,r8d,6
2658 vpaddd ymm0,ymm0,ymm7
2659 lea r11d,[r12*1+r11]
2660 xor r13d,r14d
2661 mov r15d,eax
2662 vpsrld ymm7,ymm4,3
2663 rorx r12d,eax,22
2664 lea r11d,[r13*1+r11]
2665 xor r15d,ebx
2666 vpslld ymm5,ymm4,14
2667 rorx r14d,eax,13
2668 rorx r13d,eax,2
2669 lea edx,[r11*1+rdx]
2670 vpxor ymm4,ymm7,ymm6
2671 and esi,r15d
2672 vpxor xmm9,xmm9,xmm10
2673 vmovdqu xmm10,XMMWORD[((16-128))+rdi]
2674 xor r14d,r12d
2675 xor esi,ebx
2676 vpshufd ymm7,ymm3,250
2677 xor r14d,r13d
2678 lea r11d,[rsi*1+r11]
2679 mov r12d,r8d
2680 vpsrld ymm6,ymm6,11
2681 add r10d,DWORD[((4+128))+rsp]
2682 and r12d,edx
2683 rorx r13d,edx,25
2684 vpxor ymm4,ymm4,ymm5
2685 rorx esi,edx,11
2686 lea r11d,[r14*1+r11]
2687 lea r10d,[r12*1+r10]
2688 vpslld ymm5,ymm5,11
2689 andn r12d,edx,r9d
2690 xor r13d,esi
2691 rorx r14d,edx,6
2692 vpxor ymm4,ymm4,ymm6
2693 lea r10d,[r12*1+r10]
2694 xor r13d,r14d
2695 mov esi,r11d
2696 vpsrld ymm6,ymm7,10
2697 rorx r12d,r11d,22
2698 lea r10d,[r13*1+r10]
2699 xor esi,eax
2700 vpxor ymm4,ymm4,ymm5
2701 rorx r14d,r11d,13
2702 rorx r13d,r11d,2
2703 lea ecx,[r10*1+rcx]
2704 vpsrlq ymm7,ymm7,17
2705 and r15d,esi
2706 vpxor xmm9,xmm9,xmm8
2707 xor r14d,r12d
2708 xor r15d,eax
2709 vpaddd ymm0,ymm0,ymm4
2710 xor r14d,r13d
2711 lea r10d,[r15*1+r10]
2712 mov r12d,edx
2713 vpxor ymm6,ymm6,ymm7
2714 add r9d,DWORD[((8+128))+rsp]
2715 and r12d,ecx
2716 rorx r13d,ecx,25
2717 vpsrlq ymm7,ymm7,2
2718 rorx r15d,ecx,11
2719 lea r10d,[r14*1+r10]
2720 lea r9d,[r12*1+r9]
2721 vpxor ymm6,ymm6,ymm7
2722 andn r12d,ecx,r8d
2723 xor r13d,r15d
2724 rorx r14d,ecx,6
2725 vpshufd ymm6,ymm6,132
2726 lea r9d,[r12*1+r9]
2727 xor r13d,r14d
2728 mov r15d,r10d
2729 vpsrldq ymm6,ymm6,8
2730 rorx r12d,r10d,22
2731 lea r9d,[r13*1+r9]
2732 xor r15d,r11d
2733 vpaddd ymm0,ymm0,ymm6
2734 rorx r14d,r10d,13
2735 rorx r13d,r10d,2
2736 lea ebx,[r9*1+rbx]
2737 vpshufd ymm7,ymm0,80
2738 and esi,r15d
2739 vaesenc xmm9,xmm9,xmm10
2740 vmovdqu xmm10,XMMWORD[((32-128))+rdi]
2741 xor r14d,r12d
2742 xor esi,r11d
2743 vpsrld ymm6,ymm7,10
2744 xor r14d,r13d
2745 lea r9d,[rsi*1+r9]
2746 mov r12d,ecx
2747 vpsrlq ymm7,ymm7,17
2748 add r8d,DWORD[((12+128))+rsp]
2749 and r12d,ebx
2750 rorx r13d,ebx,25
2751 vpxor ymm6,ymm6,ymm7
2752 rorx esi,ebx,11
2753 lea r9d,[r14*1+r9]
2754 lea r8d,[r12*1+r8]
2755 vpsrlq ymm7,ymm7,2
2756 andn r12d,ebx,edx
2757 xor r13d,esi
2758 rorx r14d,ebx,6
2759 vpxor ymm6,ymm6,ymm7
2760 lea r8d,[r12*1+r8]
2761 xor r13d,r14d
2762 mov esi,r9d
2763 vpshufd ymm6,ymm6,232
2764 rorx r12d,r9d,22
2765 lea r8d,[r13*1+r8]
2766 xor esi,r10d
2767 vpslldq ymm6,ymm6,8
2768 rorx r14d,r9d,13
2769 rorx r13d,r9d,2
2770 lea eax,[r8*1+rax]
2771 vpaddd ymm0,ymm0,ymm6
2772 and r15d,esi
2773 vaesenc xmm9,xmm9,xmm10
2774 vmovdqu xmm10,XMMWORD[((48-128))+rdi]
2775 xor r14d,r12d
2776 xor r15d,r10d
2777 vpaddd ymm6,ymm0,YMMWORD[rbp]
2778 xor r14d,r13d
2779 lea r8d,[r15*1+r8]
2780 mov r12d,ebx
2781 vmovdqa YMMWORD[rsp],ymm6
2782 vpalignr ymm4,ymm2,ymm1,4
2783 add edx,DWORD[((32+128))+rsp]
2784 and r12d,eax
2785 rorx r13d,eax,25
2786 vpalignr ymm7,ymm0,ymm3,4
2787 rorx r15d,eax,11
2788 lea r8d,[r14*1+r8]
2789 lea edx,[r12*1+rdx]
2790 vpsrld ymm6,ymm4,7
2791 andn r12d,eax,ecx
2792 xor r13d,r15d
2793 rorx r14d,eax,6
2794 vpaddd ymm1,ymm1,ymm7
2795 lea edx,[r12*1+rdx]
2796 xor r13d,r14d
2797 mov r15d,r8d
2798 vpsrld ymm7,ymm4,3
2799 rorx r12d,r8d,22
2800 lea edx,[r13*1+rdx]
2801 xor r15d,r9d
2802 vpslld ymm5,ymm4,14
2803 rorx r14d,r8d,13
2804 rorx r13d,r8d,2
2805 lea r11d,[rdx*1+r11]
2806 vpxor ymm4,ymm7,ymm6
2807 and esi,r15d
2808 vaesenc xmm9,xmm9,xmm10
2809 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
2810 xor r14d,r12d
2811 xor esi,r9d
2812 vpshufd ymm7,ymm0,250
2813 xor r14d,r13d
2814 lea edx,[rsi*1+rdx]
2815 mov r12d,eax
2816 vpsrld ymm6,ymm6,11
2817 add ecx,DWORD[((36+128))+rsp]
2818 and r12d,r11d
2819 rorx r13d,r11d,25
2820 vpxor ymm4,ymm4,ymm5
2821 rorx esi,r11d,11
2822 lea edx,[r14*1+rdx]
2823 lea ecx,[r12*1+rcx]
2824 vpslld ymm5,ymm5,11
2825 andn r12d,r11d,ebx
2826 xor r13d,esi
2827 rorx r14d,r11d,6
2828 vpxor ymm4,ymm4,ymm6
2829 lea ecx,[r12*1+rcx]
2830 xor r13d,r14d
2831 mov esi,edx
2832 vpsrld ymm6,ymm7,10
2833 rorx r12d,edx,22
2834 lea ecx,[r13*1+rcx]
2835 xor esi,r8d
2836 vpxor ymm4,ymm4,ymm5
2837 rorx r14d,edx,13
2838 rorx r13d,edx,2
2839 lea r10d,[rcx*1+r10]
2840 vpsrlq ymm7,ymm7,17
2841 and r15d,esi
2842 vaesenc xmm9,xmm9,xmm10
2843 vmovdqu xmm10,XMMWORD[((80-128))+rdi]
2844 xor r14d,r12d
2845 xor r15d,r8d
2846 vpaddd ymm1,ymm1,ymm4
2847 xor r14d,r13d
2848 lea ecx,[r15*1+rcx]
2849 mov r12d,r11d
2850 vpxor ymm6,ymm6,ymm7
2851 add ebx,DWORD[((40+128))+rsp]
2852 and r12d,r10d
2853 rorx r13d,r10d,25
2854 vpsrlq ymm7,ymm7,2
2855 rorx r15d,r10d,11
2856 lea ecx,[r14*1+rcx]
2857 lea ebx,[r12*1+rbx]
2858 vpxor ymm6,ymm6,ymm7
2859 andn r12d,r10d,eax
2860 xor r13d,r15d
2861 rorx r14d,r10d,6
2862 vpshufd ymm6,ymm6,132
2863 lea ebx,[r12*1+rbx]
2864 xor r13d,r14d
2865 mov r15d,ecx
2866 vpsrldq ymm6,ymm6,8
2867 rorx r12d,ecx,22
2868 lea ebx,[r13*1+rbx]
2869 xor r15d,edx
2870 vpaddd ymm1,ymm1,ymm6
2871 rorx r14d,ecx,13
2872 rorx r13d,ecx,2
2873 lea r9d,[rbx*1+r9]
2874 vpshufd ymm7,ymm1,80
2875 and esi,r15d
2876 vaesenc xmm9,xmm9,xmm10
2877 vmovdqu xmm10,XMMWORD[((96-128))+rdi]
2878 xor r14d,r12d
2879 xor esi,edx
2880 vpsrld ymm6,ymm7,10
2881 xor r14d,r13d
2882 lea ebx,[rsi*1+rbx]
2883 mov r12d,r10d
2884 vpsrlq ymm7,ymm7,17
2885 add eax,DWORD[((44+128))+rsp]
2886 and r12d,r9d
2887 rorx r13d,r9d,25
2888 vpxor ymm6,ymm6,ymm7
2889 rorx esi,r9d,11
2890 lea ebx,[r14*1+rbx]
2891 lea eax,[r12*1+rax]
2892 vpsrlq ymm7,ymm7,2
2893 andn r12d,r9d,r11d
2894 xor r13d,esi
2895 rorx r14d,r9d,6
2896 vpxor ymm6,ymm6,ymm7
2897 lea eax,[r12*1+rax]
2898 xor r13d,r14d
2899 mov esi,ebx
2900 vpshufd ymm6,ymm6,232
2901 rorx r12d,ebx,22
2902 lea eax,[r13*1+rax]
2903 xor esi,ecx
2904 vpslldq ymm6,ymm6,8
2905 rorx r14d,ebx,13
2906 rorx r13d,ebx,2
2907 lea r8d,[rax*1+r8]
2908 vpaddd ymm1,ymm1,ymm6
2909 and r15d,esi
2910 vaesenc xmm9,xmm9,xmm10
2911 vmovdqu xmm10,XMMWORD[((112-128))+rdi]
2912 xor r14d,r12d
2913 xor r15d,ecx
2914 vpaddd ymm6,ymm1,YMMWORD[32+rbp]
2915 xor r14d,r13d
2916 lea eax,[r15*1+rax]
2917 mov r12d,r9d
2918 vmovdqa YMMWORD[32+rsp],ymm6
2919 lea rsp,[((-64))+rsp]
2920 vpalignr ymm4,ymm3,ymm2,4
2921 add r11d,DWORD[((0+128))+rsp]
2922 and r12d,r8d
2923 rorx r13d,r8d,25
2924 vpalignr ymm7,ymm1,ymm0,4
2925 rorx r15d,r8d,11
2926 lea eax,[r14*1+rax]
2927 lea r11d,[r12*1+r11]
2928 vpsrld ymm6,ymm4,7
2929 andn r12d,r8d,r10d
2930 xor r13d,r15d
2931 rorx r14d,r8d,6
2932 vpaddd ymm2,ymm2,ymm7
2933 lea r11d,[r12*1+r11]
2934 xor r13d,r14d
2935 mov r15d,eax
2936 vpsrld ymm7,ymm4,3
2937 rorx r12d,eax,22
2938 lea r11d,[r13*1+r11]
2939 xor r15d,ebx
2940 vpslld ymm5,ymm4,14
2941 rorx r14d,eax,13
2942 rorx r13d,eax,2
2943 lea edx,[r11*1+rdx]
2944 vpxor ymm4,ymm7,ymm6
2945 and esi,r15d
2946 vaesenc xmm9,xmm9,xmm10
2947 vmovdqu xmm10,XMMWORD[((128-128))+rdi]
2948 xor r14d,r12d
2949 xor esi,ebx
2950 vpshufd ymm7,ymm1,250
2951 xor r14d,r13d
2952 lea r11d,[rsi*1+r11]
2953 mov r12d,r8d
2954 vpsrld ymm6,ymm6,11
2955 add r10d,DWORD[((4+128))+rsp]
2956 and r12d,edx
2957 rorx r13d,edx,25
2958 vpxor ymm4,ymm4,ymm5
2959 rorx esi,edx,11
2960 lea r11d,[r14*1+r11]
2961 lea r10d,[r12*1+r10]
2962 vpslld ymm5,ymm5,11
2963 andn r12d,edx,r9d
2964 xor r13d,esi
2965 rorx r14d,edx,6
2966 vpxor ymm4,ymm4,ymm6
2967 lea r10d,[r12*1+r10]
2968 xor r13d,r14d
2969 mov esi,r11d
2970 vpsrld ymm6,ymm7,10
2971 rorx r12d,r11d,22
2972 lea r10d,[r13*1+r10]
2973 xor esi,eax
2974 vpxor ymm4,ymm4,ymm5
2975 rorx r14d,r11d,13
2976 rorx r13d,r11d,2
2977 lea ecx,[r10*1+rcx]
2978 vpsrlq ymm7,ymm7,17
2979 and r15d,esi
2980 vaesenc xmm9,xmm9,xmm10
2981 vmovdqu xmm10,XMMWORD[((144-128))+rdi]
2982 xor r14d,r12d
2983 xor r15d,eax
2984 vpaddd ymm2,ymm2,ymm4
2985 xor r14d,r13d
2986 lea r10d,[r15*1+r10]
2987 mov r12d,edx
2988 vpxor ymm6,ymm6,ymm7
2989 add r9d,DWORD[((8+128))+rsp]
2990 and r12d,ecx
2991 rorx r13d,ecx,25
2992 vpsrlq ymm7,ymm7,2
2993 rorx r15d,ecx,11
2994 lea r10d,[r14*1+r10]
2995 lea r9d,[r12*1+r9]
2996 vpxor ymm6,ymm6,ymm7
2997 andn r12d,ecx,r8d
2998 xor r13d,r15d
2999 rorx r14d,ecx,6
3000 vpshufd ymm6,ymm6,132
3001 lea r9d,[r12*1+r9]
3002 xor r13d,r14d
3003 mov r15d,r10d
3004 vpsrldq ymm6,ymm6,8
3005 rorx r12d,r10d,22
3006 lea r9d,[r13*1+r9]
3007 xor r15d,r11d
3008 vpaddd ymm2,ymm2,ymm6
3009 rorx r14d,r10d,13
3010 rorx r13d,r10d,2
3011 lea ebx,[r9*1+rbx]
3012 vpshufd ymm7,ymm2,80
3013 and esi,r15d
3014 vaesenc xmm9,xmm9,xmm10
3015 vmovdqu xmm10,XMMWORD[((160-128))+rdi]
3016 xor r14d,r12d
3017 xor esi,r11d
3018 vpsrld ymm6,ymm7,10
3019 xor r14d,r13d
3020 lea r9d,[rsi*1+r9]
3021 mov r12d,ecx
3022 vpsrlq ymm7,ymm7,17
3023 add r8d,DWORD[((12+128))+rsp]
3024 and r12d,ebx
3025 rorx r13d,ebx,25
3026 vpxor ymm6,ymm6,ymm7
3027 rorx esi,ebx,11
3028 lea r9d,[r14*1+r9]
3029 lea r8d,[r12*1+r8]
3030 vpsrlq ymm7,ymm7,2
3031 andn r12d,ebx,edx
3032 xor r13d,esi
3033 rorx r14d,ebx,6
3034 vpxor ymm6,ymm6,ymm7
3035 lea r8d,[r12*1+r8]
3036 xor r13d,r14d
3037 mov esi,r9d
3038 vpshufd ymm6,ymm6,232
3039 rorx r12d,r9d,22
3040 lea r8d,[r13*1+r8]
3041 xor esi,r10d
3042 vpslldq ymm6,ymm6,8
3043 rorx r14d,r9d,13
3044 rorx r13d,r9d,2
3045 lea eax,[r8*1+rax]
3046 vpaddd ymm2,ymm2,ymm6
3047 and r15d,esi
3048 vaesenclast xmm11,xmm9,xmm10
3049 vaesenc xmm9,xmm9,xmm10
3050 vmovdqu xmm10,XMMWORD[((176-128))+rdi]
3051 xor r14d,r12d
3052 xor r15d,r10d
3053 vpaddd ymm6,ymm2,YMMWORD[64+rbp]
3054 xor r14d,r13d
3055 lea r8d,[r15*1+r8]
3056 mov r12d,ebx
3057 vmovdqa YMMWORD[rsp],ymm6
3058 vpalignr ymm4,ymm0,ymm3,4
3059 add edx,DWORD[((32+128))+rsp]
3060 and r12d,eax
3061 rorx r13d,eax,25
3062 vpalignr ymm7,ymm2,ymm1,4
3063 rorx r15d,eax,11
3064 lea r8d,[r14*1+r8]
3065 lea edx,[r12*1+rdx]
3066 vpsrld ymm6,ymm4,7
3067 andn r12d,eax,ecx
3068 xor r13d,r15d
3069 rorx r14d,eax,6
3070 vpaddd ymm3,ymm3,ymm7
3071 lea edx,[r12*1+rdx]
3072 xor r13d,r14d
3073 mov r15d,r8d
3074 vpsrld ymm7,ymm4,3
3075 rorx r12d,r8d,22
3076 lea edx,[r13*1+rdx]
3077 xor r15d,r9d
3078 vpslld ymm5,ymm4,14
3079 rorx r14d,r8d,13
3080 rorx r13d,r8d,2
3081 lea r11d,[rdx*1+r11]
3082 vpxor ymm4,ymm7,ymm6
3083 and esi,r15d
3084 vpand xmm8,xmm11,xmm12
3085 vaesenc xmm9,xmm9,xmm10
3086 vmovdqu xmm10,XMMWORD[((192-128))+rdi]
3087 xor r14d,r12d
3088 xor esi,r9d
3089 vpshufd ymm7,ymm2,250
3090 xor r14d,r13d
3091 lea edx,[rsi*1+rdx]
3092 mov r12d,eax
3093 vpsrld ymm6,ymm6,11
3094 add ecx,DWORD[((36+128))+rsp]
3095 and r12d,r11d
3096 rorx r13d,r11d,25
3097 vpxor ymm4,ymm4,ymm5
3098 rorx esi,r11d,11
3099 lea edx,[r14*1+rdx]
3100 lea ecx,[r12*1+rcx]
3101 vpslld ymm5,ymm5,11
3102 andn r12d,r11d,ebx
3103 xor r13d,esi
3104 rorx r14d,r11d,6
3105 vpxor ymm4,ymm4,ymm6
3106 lea ecx,[r12*1+rcx]
3107 xor r13d,r14d
3108 mov esi,edx
3109 vpsrld ymm6,ymm7,10
3110 rorx r12d,edx,22
3111 lea ecx,[r13*1+rcx]
3112 xor esi,r8d
3113 vpxor ymm4,ymm4,ymm5
3114 rorx r14d,edx,13
3115 rorx r13d,edx,2
3116 lea r10d,[rcx*1+r10]
3117 vpsrlq ymm7,ymm7,17
3118 and r15d,esi
3119 vaesenclast xmm11,xmm9,xmm10
3120 vaesenc xmm9,xmm9,xmm10
3121 vmovdqu xmm10,XMMWORD[((208-128))+rdi]
3122 xor r14d,r12d
3123 xor r15d,r8d
3124 vpaddd ymm3,ymm3,ymm4
3125 xor r14d,r13d
3126 lea ecx,[r15*1+rcx]
3127 mov r12d,r11d
3128 vpxor ymm6,ymm6,ymm7
3129 add ebx,DWORD[((40+128))+rsp]
3130 and r12d,r10d
3131 rorx r13d,r10d,25
3132 vpsrlq ymm7,ymm7,2
3133 rorx r15d,r10d,11
3134 lea ecx,[r14*1+rcx]
3135 lea ebx,[r12*1+rbx]
3136 vpxor ymm6,ymm6,ymm7
3137 andn r12d,r10d,eax
3138 xor r13d,r15d
3139 rorx r14d,r10d,6
3140 vpshufd ymm6,ymm6,132
3141 lea ebx,[r12*1+rbx]
3142 xor r13d,r14d
3143 mov r15d,ecx
3144 vpsrldq ymm6,ymm6,8
3145 rorx r12d,ecx,22
3146 lea ebx,[r13*1+rbx]
3147 xor r15d,edx
3148 vpaddd ymm3,ymm3,ymm6
3149 rorx r14d,ecx,13
3150 rorx r13d,ecx,2
3151 lea r9d,[rbx*1+r9]
3152 vpshufd ymm7,ymm3,80
3153 and esi,r15d
3154 vpand xmm11,xmm11,xmm13
3155 vaesenc xmm9,xmm9,xmm10
3156 vmovdqu xmm10,XMMWORD[((224-128))+rdi]
3157 xor r14d,r12d
3158 xor esi,edx
3159 vpsrld ymm6,ymm7,10
3160 xor r14d,r13d
3161 lea ebx,[rsi*1+rbx]
3162 mov r12d,r10d
3163 vpsrlq ymm7,ymm7,17
3164 add eax,DWORD[((44+128))+rsp]
3165 and r12d,r9d
3166 rorx r13d,r9d,25
3167 vpxor ymm6,ymm6,ymm7
3168 rorx esi,r9d,11
3169 lea ebx,[r14*1+rbx]
3170 lea eax,[r12*1+rax]
3171 vpsrlq ymm7,ymm7,2
3172 andn r12d,r9d,r11d
3173 xor r13d,esi
3174 rorx r14d,r9d,6
3175 vpxor ymm6,ymm6,ymm7
3176 lea eax,[r12*1+rax]
3177 xor r13d,r14d
3178 mov esi,ebx
3179 vpshufd ymm6,ymm6,232
3180 rorx r12d,ebx,22
3181 lea eax,[r13*1+rax]
3182 xor esi,ecx
3183 vpslldq ymm6,ymm6,8
3184 rorx r14d,ebx,13
3185 rorx r13d,ebx,2
3186 lea r8d,[rax*1+r8]
3187 vpaddd ymm3,ymm3,ymm6
3188 and r15d,esi
3189 vpor xmm8,xmm8,xmm11
3190 vaesenclast xmm11,xmm9,xmm10
3191 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
3192 xor r14d,r12d
3193 xor r15d,ecx
3194 vpaddd ymm6,ymm3,YMMWORD[96+rbp]
3195 xor r14d,r13d
3196 lea eax,[r15*1+rax]
3197 mov r12d,r9d
3198 vmovdqa YMMWORD[32+rsp],ymm6
3199 vmovq r13,xmm15
3200 vpextrq r15,xmm15,1
3201 vpand xmm11,xmm11,xmm14
3202 vpor xmm8,xmm8,xmm11
3203 vmovdqu XMMWORD[r13*1+r15],xmm8
3204 lea r13,[16+r13]
3205 lea rbp,[128+rbp]
3206 cmp BYTE[3+rbp],0
3207 jne NEAR $L$avx2_00_47
3208 vmovdqu xmm9,XMMWORD[r13]
3209 vpinsrq xmm15,xmm15,r13,0
3210 add r11d,DWORD[((0+64))+rsp]
3211 and r12d,r8d
3212 rorx r13d,r8d,25
3213 rorx r15d,r8d,11
3214 lea eax,[r14*1+rax]
3215 lea r11d,[r12*1+r11]
3216 andn r12d,r8d,r10d
3217 xor r13d,r15d
3218 rorx r14d,r8d,6
3219 lea r11d,[r12*1+r11]
3220 xor r13d,r14d
3221 mov r15d,eax
3222 rorx r12d,eax,22
3223 lea r11d,[r13*1+r11]
3224 xor r15d,ebx
3225 rorx r14d,eax,13
3226 rorx r13d,eax,2
3227 lea edx,[r11*1+rdx]
3228 and esi,r15d
3229 vpxor xmm9,xmm9,xmm10
3230 vmovdqu xmm10,XMMWORD[((16-128))+rdi]
3231 xor r14d,r12d
3232 xor esi,ebx
3233 xor r14d,r13d
3234 lea r11d,[rsi*1+r11]
3235 mov r12d,r8d
3236 add r10d,DWORD[((4+64))+rsp]
3237 and r12d,edx
3238 rorx r13d,edx,25
3239 rorx esi,edx,11
3240 lea r11d,[r14*1+r11]
3241 lea r10d,[r12*1+r10]
3242 andn r12d,edx,r9d
3243 xor r13d,esi
3244 rorx r14d,edx,6
3245 lea r10d,[r12*1+r10]
3246 xor r13d,r14d
3247 mov esi,r11d
3248 rorx r12d,r11d,22
3249 lea r10d,[r13*1+r10]
3250 xor esi,eax
3251 rorx r14d,r11d,13
3252 rorx r13d,r11d,2
3253 lea ecx,[r10*1+rcx]
3254 and r15d,esi
3255 vpxor xmm9,xmm9,xmm8
3256 xor r14d,r12d
3257 xor r15d,eax
3258 xor r14d,r13d
3259 lea r10d,[r15*1+r10]
3260 mov r12d,edx
3261 add r9d,DWORD[((8+64))+rsp]
3262 and r12d,ecx
3263 rorx r13d,ecx,25
3264 rorx r15d,ecx,11
3265 lea r10d,[r14*1+r10]
3266 lea r9d,[r12*1+r9]
3267 andn r12d,ecx,r8d
3268 xor r13d,r15d
3269 rorx r14d,ecx,6
3270 lea r9d,[r12*1+r9]
3271 xor r13d,r14d
3272 mov r15d,r10d
3273 rorx r12d,r10d,22
3274 lea r9d,[r13*1+r9]
3275 xor r15d,r11d
3276 rorx r14d,r10d,13
3277 rorx r13d,r10d,2
3278 lea ebx,[r9*1+rbx]
3279 and esi,r15d
3280 vaesenc xmm9,xmm9,xmm10
3281 vmovdqu xmm10,XMMWORD[((32-128))+rdi]
3282 xor r14d,r12d
3283 xor esi,r11d
3284 xor r14d,r13d
3285 lea r9d,[rsi*1+r9]
3286 mov r12d,ecx
3287 add r8d,DWORD[((12+64))+rsp]
3288 and r12d,ebx
3289 rorx r13d,ebx,25
3290 rorx esi,ebx,11
3291 lea r9d,[r14*1+r9]
3292 lea r8d,[r12*1+r8]
3293 andn r12d,ebx,edx
3294 xor r13d,esi
3295 rorx r14d,ebx,6
3296 lea r8d,[r12*1+r8]
3297 xor r13d,r14d
3298 mov esi,r9d
3299 rorx r12d,r9d,22
3300 lea r8d,[r13*1+r8]
3301 xor esi,r10d
3302 rorx r14d,r9d,13
3303 rorx r13d,r9d,2
3304 lea eax,[r8*1+rax]
3305 and r15d,esi
3306 vaesenc xmm9,xmm9,xmm10
3307 vmovdqu xmm10,XMMWORD[((48-128))+rdi]
3308 xor r14d,r12d
3309 xor r15d,r10d
3310 xor r14d,r13d
3311 lea r8d,[r15*1+r8]
3312 mov r12d,ebx
3313 add edx,DWORD[((32+64))+rsp]
3314 and r12d,eax
3315 rorx r13d,eax,25
3316 rorx r15d,eax,11
3317 lea r8d,[r14*1+r8]
3318 lea edx,[r12*1+rdx]
3319 andn r12d,eax,ecx
3320 xor r13d,r15d
3321 rorx r14d,eax,6
3322 lea edx,[r12*1+rdx]
3323 xor r13d,r14d
3324 mov r15d,r8d
3325 rorx r12d,r8d,22
3326 lea edx,[r13*1+rdx]
3327 xor r15d,r9d
3328 rorx r14d,r8d,13
3329 rorx r13d,r8d,2
3330 lea r11d,[rdx*1+r11]
3331 and esi,r15d
3332 vaesenc xmm9,xmm9,xmm10
3333 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
3334 xor r14d,r12d
3335 xor esi,r9d
3336 xor r14d,r13d
3337 lea edx,[rsi*1+rdx]
3338 mov r12d,eax
3339 add ecx,DWORD[((36+64))+rsp]
3340 and r12d,r11d
3341 rorx r13d,r11d,25
3342 rorx esi,r11d,11
3343 lea edx,[r14*1+rdx]
3344 lea ecx,[r12*1+rcx]
3345 andn r12d,r11d,ebx
3346 xor r13d,esi
3347 rorx r14d,r11d,6
3348 lea ecx,[r12*1+rcx]
3349 xor r13d,r14d
3350 mov esi,edx
3351 rorx r12d,edx,22
3352 lea ecx,[r13*1+rcx]
3353 xor esi,r8d
3354 rorx r14d,edx,13
3355 rorx r13d,edx,2
3356 lea r10d,[rcx*1+r10]
3357 and r15d,esi
3358 vaesenc xmm9,xmm9,xmm10
3359 vmovdqu xmm10,XMMWORD[((80-128))+rdi]
3360 xor r14d,r12d
3361 xor r15d,r8d
3362 xor r14d,r13d
3363 lea ecx,[r15*1+rcx]
3364 mov r12d,r11d
3365 add ebx,DWORD[((40+64))+rsp]
3366 and r12d,r10d
3367 rorx r13d,r10d,25
3368 rorx r15d,r10d,11
3369 lea ecx,[r14*1+rcx]
3370 lea ebx,[r12*1+rbx]
3371 andn r12d,r10d,eax
3372 xor r13d,r15d
3373 rorx r14d,r10d,6
3374 lea ebx,[r12*1+rbx]
3375 xor r13d,r14d
3376 mov r15d,ecx
3377 rorx r12d,ecx,22
3378 lea ebx,[r13*1+rbx]
3379 xor r15d,edx
3380 rorx r14d,ecx,13
3381 rorx r13d,ecx,2
3382 lea r9d,[rbx*1+r9]
3383 and esi,r15d
3384 vaesenc xmm9,xmm9,xmm10
3385 vmovdqu xmm10,XMMWORD[((96-128))+rdi]
3386 xor r14d,r12d
3387 xor esi,edx
3388 xor r14d,r13d
3389 lea ebx,[rsi*1+rbx]
3390 mov r12d,r10d
3391 add eax,DWORD[((44+64))+rsp]
3392 and r12d,r9d
3393 rorx r13d,r9d,25
3394 rorx esi,r9d,11
3395 lea ebx,[r14*1+rbx]
3396 lea eax,[r12*1+rax]
3397 andn r12d,r9d,r11d
3398 xor r13d,esi
3399 rorx r14d,r9d,6
3400 lea eax,[r12*1+rax]
3401 xor r13d,r14d
3402 mov esi,ebx
3403 rorx r12d,ebx,22
3404 lea eax,[r13*1+rax]
3405 xor esi,ecx
3406 rorx r14d,ebx,13
3407 rorx r13d,ebx,2
3408 lea r8d,[rax*1+r8]
3409 and r15d,esi
3410 vaesenc xmm9,xmm9,xmm10
3411 vmovdqu xmm10,XMMWORD[((112-128))+rdi]
3412 xor r14d,r12d
3413 xor r15d,ecx
3414 xor r14d,r13d
3415 lea eax,[r15*1+rax]
3416 mov r12d,r9d
3417 add r11d,DWORD[rsp]
3418 and r12d,r8d
3419 rorx r13d,r8d,25
3420 rorx r15d,r8d,11
3421 lea eax,[r14*1+rax]
3422 lea r11d,[r12*1+r11]
3423 andn r12d,r8d,r10d
3424 xor r13d,r15d
3425 rorx r14d,r8d,6
3426 lea r11d,[r12*1+r11]
3427 xor r13d,r14d
3428 mov r15d,eax
3429 rorx r12d,eax,22
3430 lea r11d,[r13*1+r11]
3431 xor r15d,ebx
3432 rorx r14d,eax,13
3433 rorx r13d,eax,2
3434 lea edx,[r11*1+rdx]
3435 and esi,r15d
3436 vaesenc xmm9,xmm9,xmm10
3437 vmovdqu xmm10,XMMWORD[((128-128))+rdi]
3438 xor r14d,r12d
3439 xor esi,ebx
3440 xor r14d,r13d
3441 lea r11d,[rsi*1+r11]
3442 mov r12d,r8d
3443 add r10d,DWORD[4+rsp]
3444 and r12d,edx
3445 rorx r13d,edx,25
3446 rorx esi,edx,11
3447 lea r11d,[r14*1+r11]
3448 lea r10d,[r12*1+r10]
3449 andn r12d,edx,r9d
3450 xor r13d,esi
3451 rorx r14d,edx,6
3452 lea r10d,[r12*1+r10]
3453 xor r13d,r14d
3454 mov esi,r11d
3455 rorx r12d,r11d,22
3456 lea r10d,[r13*1+r10]
3457 xor esi,eax
3458 rorx r14d,r11d,13
3459 rorx r13d,r11d,2
3460 lea ecx,[r10*1+rcx]
3461 and r15d,esi
3462 vaesenc xmm9,xmm9,xmm10
3463 vmovdqu xmm10,XMMWORD[((144-128))+rdi]
3464 xor r14d,r12d
3465 xor r15d,eax
3466 xor r14d,r13d
3467 lea r10d,[r15*1+r10]
3468 mov r12d,edx
3469 add r9d,DWORD[8+rsp]
3470 and r12d,ecx
3471 rorx r13d,ecx,25
3472 rorx r15d,ecx,11
3473 lea r10d,[r14*1+r10]
3474 lea r9d,[r12*1+r9]
3475 andn r12d,ecx,r8d
3476 xor r13d,r15d
3477 rorx r14d,ecx,6
3478 lea r9d,[r12*1+r9]
3479 xor r13d,r14d
3480 mov r15d,r10d
3481 rorx r12d,r10d,22
3482 lea r9d,[r13*1+r9]
3483 xor r15d,r11d
3484 rorx r14d,r10d,13
3485 rorx r13d,r10d,2
3486 lea ebx,[r9*1+rbx]
3487 and esi,r15d
3488 vaesenc xmm9,xmm9,xmm10
3489 vmovdqu xmm10,XMMWORD[((160-128))+rdi]
3490 xor r14d,r12d
3491 xor esi,r11d
3492 xor r14d,r13d
3493 lea r9d,[rsi*1+r9]
3494 mov r12d,ecx
3495 add r8d,DWORD[12+rsp]
3496 and r12d,ebx
3497 rorx r13d,ebx,25
3498 rorx esi,ebx,11
3499 lea r9d,[r14*1+r9]
3500 lea r8d,[r12*1+r8]
3501 andn r12d,ebx,edx
3502 xor r13d,esi
3503 rorx r14d,ebx,6
3504 lea r8d,[r12*1+r8]
3505 xor r13d,r14d
3506 mov esi,r9d
3507 rorx r12d,r9d,22
3508 lea r8d,[r13*1+r8]
3509 xor esi,r10d
3510 rorx r14d,r9d,13
3511 rorx r13d,r9d,2
3512 lea eax,[r8*1+rax]
3513 and r15d,esi
3514 vaesenclast xmm11,xmm9,xmm10
3515 vaesenc xmm9,xmm9,xmm10
3516 vmovdqu xmm10,XMMWORD[((176-128))+rdi]
3517 xor r14d,r12d
3518 xor r15d,r10d
3519 xor r14d,r13d
3520 lea r8d,[r15*1+r8]
3521 mov r12d,ebx
3522 add edx,DWORD[32+rsp]
3523 and r12d,eax
3524 rorx r13d,eax,25
3525 rorx r15d,eax,11
3526 lea r8d,[r14*1+r8]
3527 lea edx,[r12*1+rdx]
3528 andn r12d,eax,ecx
3529 xor r13d,r15d
3530 rorx r14d,eax,6
3531 lea edx,[r12*1+rdx]
3532 xor r13d,r14d
3533 mov r15d,r8d
3534 rorx r12d,r8d,22
3535 lea edx,[r13*1+rdx]
3536 xor r15d,r9d
3537 rorx r14d,r8d,13
3538 rorx r13d,r8d,2
3539 lea r11d,[rdx*1+r11]
3540 and esi,r15d
3541 vpand xmm8,xmm11,xmm12
3542 vaesenc xmm9,xmm9,xmm10
3543 vmovdqu xmm10,XMMWORD[((192-128))+rdi]
3544 xor r14d,r12d
3545 xor esi,r9d
3546 xor r14d,r13d
3547 lea edx,[rsi*1+rdx]
3548 mov r12d,eax
3549 add ecx,DWORD[36+rsp]
3550 and r12d,r11d
3551 rorx r13d,r11d,25
3552 rorx esi,r11d,11
3553 lea edx,[r14*1+rdx]
3554 lea ecx,[r12*1+rcx]
3555 andn r12d,r11d,ebx
3556 xor r13d,esi
3557 rorx r14d,r11d,6
3558 lea ecx,[r12*1+rcx]
3559 xor r13d,r14d
3560 mov esi,edx
3561 rorx r12d,edx,22
3562 lea ecx,[r13*1+rcx]
3563 xor esi,r8d
3564 rorx r14d,edx,13
3565 rorx r13d,edx,2
3566 lea r10d,[rcx*1+r10]
3567 and r15d,esi
3568 vaesenclast xmm11,xmm9,xmm10
3569 vaesenc xmm9,xmm9,xmm10
3570 vmovdqu xmm10,XMMWORD[((208-128))+rdi]
3571 xor r14d,r12d
3572 xor r15d,r8d
3573 xor r14d,r13d
3574 lea ecx,[r15*1+rcx]
3575 mov r12d,r11d
3576 add ebx,DWORD[40+rsp]
3577 and r12d,r10d
3578 rorx r13d,r10d,25
3579 rorx r15d,r10d,11
3580 lea ecx,[r14*1+rcx]
3581 lea ebx,[r12*1+rbx]
3582 andn r12d,r10d,eax
3583 xor r13d,r15d
3584 rorx r14d,r10d,6
3585 lea ebx,[r12*1+rbx]
3586 xor r13d,r14d
3587 mov r15d,ecx
3588 rorx r12d,ecx,22
3589 lea ebx,[r13*1+rbx]
3590 xor r15d,edx
3591 rorx r14d,ecx,13
3592 rorx r13d,ecx,2
3593 lea r9d,[rbx*1+r9]
3594 and esi,r15d
3595 vpand xmm11,xmm11,xmm13
3596 vaesenc xmm9,xmm9,xmm10
3597 vmovdqu xmm10,XMMWORD[((224-128))+rdi]
3598 xor r14d,r12d
3599 xor esi,edx
3600 xor r14d,r13d
3601 lea ebx,[rsi*1+rbx]
3602 mov r12d,r10d
3603 add eax,DWORD[44+rsp]
3604 and r12d,r9d
3605 rorx r13d,r9d,25
3606 rorx esi,r9d,11
3607 lea ebx,[r14*1+rbx]
3608 lea eax,[r12*1+rax]
3609 andn r12d,r9d,r11d
3610 xor r13d,esi
3611 rorx r14d,r9d,6
3612 lea eax,[r12*1+rax]
3613 xor r13d,r14d
3614 mov esi,ebx
3615 rorx r12d,ebx,22
3616 lea eax,[r13*1+rax]
3617 xor esi,ecx
3618 rorx r14d,ebx,13
3619 rorx r13d,ebx,2
3620 lea r8d,[rax*1+r8]
3621 and r15d,esi
3622 vpor xmm8,xmm8,xmm11
3623 vaesenclast xmm11,xmm9,xmm10
3624 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
3625 xor r14d,r12d
3626 xor r15d,ecx
3627 xor r14d,r13d
3628 lea eax,[r15*1+rax]
3629 mov r12d,r9d
3630 vpextrq r12,xmm15,1
3631 vmovq r13,xmm15
3632 mov r15,QWORD[552+rsp]
3633 add eax,r14d
3634 lea rbp,[448+rsp]
3635
3636 vpand xmm11,xmm11,xmm14
3637 vpor xmm8,xmm8,xmm11
3638 vmovdqu XMMWORD[r13*1+r12],xmm8
3639 lea r13,[16+r13]
3640
3641 add eax,DWORD[r15]
3642 add ebx,DWORD[4+r15]
3643 add ecx,DWORD[8+r15]
3644 add edx,DWORD[12+r15]
3645 add r8d,DWORD[16+r15]
3646 add r9d,DWORD[20+r15]
3647 add r10d,DWORD[24+r15]
3648 add r11d,DWORD[28+r15]
3649
3650 mov DWORD[r15],eax
3651 mov DWORD[4+r15],ebx
3652 mov DWORD[8+r15],ecx
3653 mov DWORD[12+r15],edx
3654 mov DWORD[16+r15],r8d
3655 mov DWORD[20+r15],r9d
3656 mov DWORD[24+r15],r10d
3657 mov DWORD[28+r15],r11d
3658
3659 cmp r13,QWORD[80+rbp]
3660 je NEAR $L$done_avx2
3661
3662 xor r14d,r14d
3663 mov esi,ebx
3664 mov r12d,r9d
3665 xor esi,ecx
3666 jmp NEAR $L$ower_avx2
3667ALIGN 16
3668$L$ower_avx2:
3669 vmovdqu xmm9,XMMWORD[r13]
3670 vpinsrq xmm15,xmm15,r13,0
3671 add r11d,DWORD[((0+16))+rbp]
3672 and r12d,r8d
3673 rorx r13d,r8d,25
3674 rorx r15d,r8d,11
3675 lea eax,[r14*1+rax]
3676 lea r11d,[r12*1+r11]
3677 andn r12d,r8d,r10d
3678 xor r13d,r15d
3679 rorx r14d,r8d,6
3680 lea r11d,[r12*1+r11]
3681 xor r13d,r14d
3682 mov r15d,eax
3683 rorx r12d,eax,22
3684 lea r11d,[r13*1+r11]
3685 xor r15d,ebx
3686 rorx r14d,eax,13
3687 rorx r13d,eax,2
3688 lea edx,[r11*1+rdx]
3689 and esi,r15d
3690 vpxor xmm9,xmm9,xmm10
3691 vmovdqu xmm10,XMMWORD[((16-128))+rdi]
3692 xor r14d,r12d
3693 xor esi,ebx
3694 xor r14d,r13d
3695 lea r11d,[rsi*1+r11]
3696 mov r12d,r8d
3697 add r10d,DWORD[((4+16))+rbp]
3698 and r12d,edx
3699 rorx r13d,edx,25
3700 rorx esi,edx,11
3701 lea r11d,[r14*1+r11]
3702 lea r10d,[r12*1+r10]
3703 andn r12d,edx,r9d
3704 xor r13d,esi
3705 rorx r14d,edx,6
3706 lea r10d,[r12*1+r10]
3707 xor r13d,r14d
3708 mov esi,r11d
3709 rorx r12d,r11d,22
3710 lea r10d,[r13*1+r10]
3711 xor esi,eax
3712 rorx r14d,r11d,13
3713 rorx r13d,r11d,2
3714 lea ecx,[r10*1+rcx]
3715 and r15d,esi
3716 vpxor xmm9,xmm9,xmm8
3717 xor r14d,r12d
3718 xor r15d,eax
3719 xor r14d,r13d
3720 lea r10d,[r15*1+r10]
3721 mov r12d,edx
3722 add r9d,DWORD[((8+16))+rbp]
3723 and r12d,ecx
3724 rorx r13d,ecx,25
3725 rorx r15d,ecx,11
3726 lea r10d,[r14*1+r10]
3727 lea r9d,[r12*1+r9]
3728 andn r12d,ecx,r8d
3729 xor r13d,r15d
3730 rorx r14d,ecx,6
3731 lea r9d,[r12*1+r9]
3732 xor r13d,r14d
3733 mov r15d,r10d
3734 rorx r12d,r10d,22
3735 lea r9d,[r13*1+r9]
3736 xor r15d,r11d
3737 rorx r14d,r10d,13
3738 rorx r13d,r10d,2
3739 lea ebx,[r9*1+rbx]
3740 and esi,r15d
3741 vaesenc xmm9,xmm9,xmm10
3742 vmovdqu xmm10,XMMWORD[((32-128))+rdi]
3743 xor r14d,r12d
3744 xor esi,r11d
3745 xor r14d,r13d
3746 lea r9d,[rsi*1+r9]
3747 mov r12d,ecx
3748 add r8d,DWORD[((12+16))+rbp]
3749 and r12d,ebx
3750 rorx r13d,ebx,25
3751 rorx esi,ebx,11
3752 lea r9d,[r14*1+r9]
3753 lea r8d,[r12*1+r8]
3754 andn r12d,ebx,edx
3755 xor r13d,esi
3756 rorx r14d,ebx,6
3757 lea r8d,[r12*1+r8]
3758 xor r13d,r14d
3759 mov esi,r9d
3760 rorx r12d,r9d,22
3761 lea r8d,[r13*1+r8]
3762 xor esi,r10d
3763 rorx r14d,r9d,13
3764 rorx r13d,r9d,2
3765 lea eax,[r8*1+rax]
3766 and r15d,esi
3767 vaesenc xmm9,xmm9,xmm10
3768 vmovdqu xmm10,XMMWORD[((48-128))+rdi]
3769 xor r14d,r12d
3770 xor r15d,r10d
3771 xor r14d,r13d
3772 lea r8d,[r15*1+r8]
3773 mov r12d,ebx
3774 add edx,DWORD[((32+16))+rbp]
3775 and r12d,eax
3776 rorx r13d,eax,25
3777 rorx r15d,eax,11
3778 lea r8d,[r14*1+r8]
3779 lea edx,[r12*1+rdx]
3780 andn r12d,eax,ecx
3781 xor r13d,r15d
3782 rorx r14d,eax,6
3783 lea edx,[r12*1+rdx]
3784 xor r13d,r14d
3785 mov r15d,r8d
3786 rorx r12d,r8d,22
3787 lea edx,[r13*1+rdx]
3788 xor r15d,r9d
3789 rorx r14d,r8d,13
3790 rorx r13d,r8d,2
3791 lea r11d,[rdx*1+r11]
3792 and esi,r15d
3793 vaesenc xmm9,xmm9,xmm10
3794 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
3795 xor r14d,r12d
3796 xor esi,r9d
3797 xor r14d,r13d
3798 lea edx,[rsi*1+rdx]
3799 mov r12d,eax
3800 add ecx,DWORD[((36+16))+rbp]
3801 and r12d,r11d
3802 rorx r13d,r11d,25
3803 rorx esi,r11d,11
3804 lea edx,[r14*1+rdx]
3805 lea ecx,[r12*1+rcx]
3806 andn r12d,r11d,ebx
3807 xor r13d,esi
3808 rorx r14d,r11d,6
3809 lea ecx,[r12*1+rcx]
3810 xor r13d,r14d
3811 mov esi,edx
3812 rorx r12d,edx,22
3813 lea ecx,[r13*1+rcx]
3814 xor esi,r8d
3815 rorx r14d,edx,13
3816 rorx r13d,edx,2
3817 lea r10d,[rcx*1+r10]
3818 and r15d,esi
3819 vaesenc xmm9,xmm9,xmm10
3820 vmovdqu xmm10,XMMWORD[((80-128))+rdi]
3821 xor r14d,r12d
3822 xor r15d,r8d
3823 xor r14d,r13d
3824 lea ecx,[r15*1+rcx]
3825 mov r12d,r11d
3826 add ebx,DWORD[((40+16))+rbp]
3827 and r12d,r10d
3828 rorx r13d,r10d,25
3829 rorx r15d,r10d,11
3830 lea ecx,[r14*1+rcx]
3831 lea ebx,[r12*1+rbx]
3832 andn r12d,r10d,eax
3833 xor r13d,r15d
3834 rorx r14d,r10d,6
3835 lea ebx,[r12*1+rbx]
3836 xor r13d,r14d
3837 mov r15d,ecx
3838 rorx r12d,ecx,22
3839 lea ebx,[r13*1+rbx]
3840 xor r15d,edx
3841 rorx r14d,ecx,13
3842 rorx r13d,ecx,2
3843 lea r9d,[rbx*1+r9]
3844 and esi,r15d
3845 vaesenc xmm9,xmm9,xmm10
3846 vmovdqu xmm10,XMMWORD[((96-128))+rdi]
3847 xor r14d,r12d
3848 xor esi,edx
3849 xor r14d,r13d
3850 lea ebx,[rsi*1+rbx]
3851 mov r12d,r10d
3852 add eax,DWORD[((44+16))+rbp]
3853 and r12d,r9d
3854 rorx r13d,r9d,25
3855 rorx esi,r9d,11
3856 lea ebx,[r14*1+rbx]
3857 lea eax,[r12*1+rax]
3858 andn r12d,r9d,r11d
3859 xor r13d,esi
3860 rorx r14d,r9d,6
3861 lea eax,[r12*1+rax]
3862 xor r13d,r14d
3863 mov esi,ebx
3864 rorx r12d,ebx,22
3865 lea eax,[r13*1+rax]
3866 xor esi,ecx
3867 rorx r14d,ebx,13
3868 rorx r13d,ebx,2
3869 lea r8d,[rax*1+r8]
3870 and r15d,esi
3871 vaesenc xmm9,xmm9,xmm10
3872 vmovdqu xmm10,XMMWORD[((112-128))+rdi]
3873 xor r14d,r12d
3874 xor r15d,ecx
3875 xor r14d,r13d
3876 lea eax,[r15*1+rax]
3877 mov r12d,r9d
3878 lea rbp,[((-64))+rbp]
3879 add r11d,DWORD[((0+16))+rbp]
3880 and r12d,r8d
3881 rorx r13d,r8d,25
3882 rorx r15d,r8d,11
3883 lea eax,[r14*1+rax]
3884 lea r11d,[r12*1+r11]
3885 andn r12d,r8d,r10d
3886 xor r13d,r15d
3887 rorx r14d,r8d,6
3888 lea r11d,[r12*1+r11]
3889 xor r13d,r14d
3890 mov r15d,eax
3891 rorx r12d,eax,22
3892 lea r11d,[r13*1+r11]
3893 xor r15d,ebx
3894 rorx r14d,eax,13
3895 rorx r13d,eax,2
3896 lea edx,[r11*1+rdx]
3897 and esi,r15d
3898 vaesenc xmm9,xmm9,xmm10
3899 vmovdqu xmm10,XMMWORD[((128-128))+rdi]
3900 xor r14d,r12d
3901 xor esi,ebx
3902 xor r14d,r13d
3903 lea r11d,[rsi*1+r11]
3904 mov r12d,r8d
3905 add r10d,DWORD[((4+16))+rbp]
3906 and r12d,edx
3907 rorx r13d,edx,25
3908 rorx esi,edx,11
3909 lea r11d,[r14*1+r11]
3910 lea r10d,[r12*1+r10]
3911 andn r12d,edx,r9d
3912 xor r13d,esi
3913 rorx r14d,edx,6
3914 lea r10d,[r12*1+r10]
3915 xor r13d,r14d
3916 mov esi,r11d
3917 rorx r12d,r11d,22
3918 lea r10d,[r13*1+r10]
3919 xor esi,eax
3920 rorx r14d,r11d,13
3921 rorx r13d,r11d,2
3922 lea ecx,[r10*1+rcx]
3923 and r15d,esi
3924 vaesenc xmm9,xmm9,xmm10
3925 vmovdqu xmm10,XMMWORD[((144-128))+rdi]
3926 xor r14d,r12d
3927 xor r15d,eax
3928 xor r14d,r13d
3929 lea r10d,[r15*1+r10]
3930 mov r12d,edx
3931 add r9d,DWORD[((8+16))+rbp]
3932 and r12d,ecx
3933 rorx r13d,ecx,25
3934 rorx r15d,ecx,11
3935 lea r10d,[r14*1+r10]
3936 lea r9d,[r12*1+r9]
3937 andn r12d,ecx,r8d
3938 xor r13d,r15d
3939 rorx r14d,ecx,6
3940 lea r9d,[r12*1+r9]
3941 xor r13d,r14d
3942 mov r15d,r10d
3943 rorx r12d,r10d,22
3944 lea r9d,[r13*1+r9]
3945 xor r15d,r11d
3946 rorx r14d,r10d,13
3947 rorx r13d,r10d,2
3948 lea ebx,[r9*1+rbx]
3949 and esi,r15d
3950 vaesenc xmm9,xmm9,xmm10
3951 vmovdqu xmm10,XMMWORD[((160-128))+rdi]
3952 xor r14d,r12d
3953 xor esi,r11d
3954 xor r14d,r13d
3955 lea r9d,[rsi*1+r9]
3956 mov r12d,ecx
3957 add r8d,DWORD[((12+16))+rbp]
3958 and r12d,ebx
3959 rorx r13d,ebx,25
3960 rorx esi,ebx,11
3961 lea r9d,[r14*1+r9]
3962 lea r8d,[r12*1+r8]
3963 andn r12d,ebx,edx
3964 xor r13d,esi
3965 rorx r14d,ebx,6
3966 lea r8d,[r12*1+r8]
3967 xor r13d,r14d
3968 mov esi,r9d
3969 rorx r12d,r9d,22
3970 lea r8d,[r13*1+r8]
3971 xor esi,r10d
3972 rorx r14d,r9d,13
3973 rorx r13d,r9d,2
3974 lea eax,[r8*1+rax]
3975 and r15d,esi
3976 vaesenclast xmm11,xmm9,xmm10
3977 vaesenc xmm9,xmm9,xmm10
3978 vmovdqu xmm10,XMMWORD[((176-128))+rdi]
3979 xor r14d,r12d
3980 xor r15d,r10d
3981 xor r14d,r13d
3982 lea r8d,[r15*1+r8]
3983 mov r12d,ebx
3984 add edx,DWORD[((32+16))+rbp]
3985 and r12d,eax
3986 rorx r13d,eax,25
3987 rorx r15d,eax,11
3988 lea r8d,[r14*1+r8]
3989 lea edx,[r12*1+rdx]
3990 andn r12d,eax,ecx
3991 xor r13d,r15d
3992 rorx r14d,eax,6
3993 lea edx,[r12*1+rdx]
3994 xor r13d,r14d
3995 mov r15d,r8d
3996 rorx r12d,r8d,22
3997 lea edx,[r13*1+rdx]
3998 xor r15d,r9d
3999 rorx r14d,r8d,13
4000 rorx r13d,r8d,2
4001 lea r11d,[rdx*1+r11]
4002 and esi,r15d
4003 vpand xmm8,xmm11,xmm12
4004 vaesenc xmm9,xmm9,xmm10
4005 vmovdqu xmm10,XMMWORD[((192-128))+rdi]
4006 xor r14d,r12d
4007 xor esi,r9d
4008 xor r14d,r13d
4009 lea edx,[rsi*1+rdx]
4010 mov r12d,eax
4011 add ecx,DWORD[((36+16))+rbp]
4012 and r12d,r11d
4013 rorx r13d,r11d,25
4014 rorx esi,r11d,11
4015 lea edx,[r14*1+rdx]
4016 lea ecx,[r12*1+rcx]
4017 andn r12d,r11d,ebx
4018 xor r13d,esi
4019 rorx r14d,r11d,6
4020 lea ecx,[r12*1+rcx]
4021 xor r13d,r14d
4022 mov esi,edx
4023 rorx r12d,edx,22
4024 lea ecx,[r13*1+rcx]
4025 xor esi,r8d
4026 rorx r14d,edx,13
4027 rorx r13d,edx,2
4028 lea r10d,[rcx*1+r10]
4029 and r15d,esi
4030 vaesenclast xmm11,xmm9,xmm10
4031 vaesenc xmm9,xmm9,xmm10
4032 vmovdqu xmm10,XMMWORD[((208-128))+rdi]
4033 xor r14d,r12d
4034 xor r15d,r8d
4035 xor r14d,r13d
4036 lea ecx,[r15*1+rcx]
4037 mov r12d,r11d
4038 add ebx,DWORD[((40+16))+rbp]
4039 and r12d,r10d
4040 rorx r13d,r10d,25
4041 rorx r15d,r10d,11
4042 lea ecx,[r14*1+rcx]
4043 lea ebx,[r12*1+rbx]
4044 andn r12d,r10d,eax
4045 xor r13d,r15d
4046 rorx r14d,r10d,6
4047 lea ebx,[r12*1+rbx]
4048 xor r13d,r14d
4049 mov r15d,ecx
4050 rorx r12d,ecx,22
4051 lea ebx,[r13*1+rbx]
4052 xor r15d,edx
4053 rorx r14d,ecx,13
4054 rorx r13d,ecx,2
4055 lea r9d,[rbx*1+r9]
4056 and esi,r15d
4057 vpand xmm11,xmm11,xmm13
4058 vaesenc xmm9,xmm9,xmm10
4059 vmovdqu xmm10,XMMWORD[((224-128))+rdi]
4060 xor r14d,r12d
4061 xor esi,edx
4062 xor r14d,r13d
4063 lea ebx,[rsi*1+rbx]
4064 mov r12d,r10d
4065 add eax,DWORD[((44+16))+rbp]
4066 and r12d,r9d
4067 rorx r13d,r9d,25
4068 rorx esi,r9d,11
4069 lea ebx,[r14*1+rbx]
4070 lea eax,[r12*1+rax]
4071 andn r12d,r9d,r11d
4072 xor r13d,esi
4073 rorx r14d,r9d,6
4074 lea eax,[r12*1+rax]
4075 xor r13d,r14d
4076 mov esi,ebx
4077 rorx r12d,ebx,22
4078 lea eax,[r13*1+rax]
4079 xor esi,ecx
4080 rorx r14d,ebx,13
4081 rorx r13d,ebx,2
4082 lea r8d,[rax*1+r8]
4083 and r15d,esi
4084 vpor xmm8,xmm8,xmm11
4085 vaesenclast xmm11,xmm9,xmm10
4086 vmovdqu xmm10,XMMWORD[((0-128))+rdi]
4087 xor r14d,r12d
4088 xor r15d,ecx
4089 xor r14d,r13d
4090 lea eax,[r15*1+rax]
4091 mov r12d,r9d
4092 vmovq r13,xmm15
4093 vpextrq r15,xmm15,1
4094 vpand xmm11,xmm11,xmm14
4095 vpor xmm8,xmm8,xmm11
4096 lea rbp,[((-64))+rbp]
4097 vmovdqu XMMWORD[r13*1+r15],xmm8
4098 lea r13,[16+r13]
4099 cmp rbp,rsp
4100 jae NEAR $L$ower_avx2
4101
4102 mov r15,QWORD[552+rsp]
4103 lea r13,[64+r13]
4104 mov rsi,QWORD[560+rsp]
4105 add eax,r14d
4106 lea rsp,[448+rsp]
4107
4108 add eax,DWORD[r15]
4109 add ebx,DWORD[4+r15]
4110 add ecx,DWORD[8+r15]
4111 add edx,DWORD[12+r15]
4112 add r8d,DWORD[16+r15]
4113 add r9d,DWORD[20+r15]
4114 add r10d,DWORD[24+r15]
4115 lea r12,[r13*1+rsi]
4116 add r11d,DWORD[28+r15]
4117
4118 cmp r13,QWORD[((64+16))+rsp]
4119
4120 mov DWORD[r15],eax
4121 cmove r12,rsp
4122 mov DWORD[4+r15],ebx
4123 mov DWORD[8+r15],ecx
4124 mov DWORD[12+r15],edx
4125 mov DWORD[16+r15],r8d
4126 mov DWORD[20+r15],r9d
4127 mov DWORD[24+r15],r10d
4128 mov DWORD[28+r15],r11d
4129
4130 jbe NEAR $L$oop_avx2
4131 lea rbp,[rsp]
4132
4133
4134
4135
4136$L$done_avx2:
4137 mov r8,QWORD[((64+32))+rbp]
4138 mov rsi,QWORD[((64+56))+rbp]
4139
4140 vmovdqu XMMWORD[r8],xmm8
4141 vzeroall
4142 movaps xmm6,XMMWORD[128+rbp]
4143 movaps xmm7,XMMWORD[144+rbp]
4144 movaps xmm8,XMMWORD[160+rbp]
4145 movaps xmm9,XMMWORD[176+rbp]
4146 movaps xmm10,XMMWORD[192+rbp]
4147 movaps xmm11,XMMWORD[208+rbp]
4148 movaps xmm12,XMMWORD[224+rbp]
4149 movaps xmm13,XMMWORD[240+rbp]
4150 movaps xmm14,XMMWORD[256+rbp]
4151 movaps xmm15,XMMWORD[272+rbp]
4152 mov r15,QWORD[((-48))+rsi]
4153
4154 mov r14,QWORD[((-40))+rsi]
4155
4156 mov r13,QWORD[((-32))+rsi]
4157
4158 mov r12,QWORD[((-24))+rsi]
4159
4160 mov rbp,QWORD[((-16))+rsi]
4161
4162 mov rbx,QWORD[((-8))+rsi]
4163
4164 lea rsp,[rsi]
4165
4166$L$epilogue_avx2:
4167 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4168 mov rsi,QWORD[16+rsp]
4169 DB 0F3h,0C3h ;repret
4170
4171$L$SEH_end_aesni_cbc_sha256_enc_avx2:
4172
4173ALIGN 32
4174aesni_cbc_sha256_enc_shaext:
4175 mov QWORD[8+rsp],rdi ;WIN64 prologue
4176 mov QWORD[16+rsp],rsi
4177 mov rax,rsp
4178$L$SEH_begin_aesni_cbc_sha256_enc_shaext:
4179 mov rdi,rcx
4180 mov rsi,rdx
4181 mov rdx,r8
4182 mov rcx,r9
4183 mov r8,QWORD[40+rsp]
4184 mov r9,QWORD[48+rsp]
4185
4186
4187
4188 mov r10,QWORD[56+rsp]
4189 lea rsp,[((-168))+rsp]
4190 movaps XMMWORD[(-8-160)+rax],xmm6
4191 movaps XMMWORD[(-8-144)+rax],xmm7
4192 movaps XMMWORD[(-8-128)+rax],xmm8
4193 movaps XMMWORD[(-8-112)+rax],xmm9
4194 movaps XMMWORD[(-8-96)+rax],xmm10
4195 movaps XMMWORD[(-8-80)+rax],xmm11
4196 movaps XMMWORD[(-8-64)+rax],xmm12
4197 movaps XMMWORD[(-8-48)+rax],xmm13
4198 movaps XMMWORD[(-8-32)+rax],xmm14
4199 movaps XMMWORD[(-8-16)+rax],xmm15
4200$L$prologue_shaext:
4201 lea rax,[((K256+128))]
4202 movdqu xmm1,XMMWORD[r9]
4203 movdqu xmm2,XMMWORD[16+r9]
4204 movdqa xmm3,XMMWORD[((512-128))+rax]
4205
4206 mov r11d,DWORD[240+rcx]
4207 sub rsi,rdi
4208 movups xmm15,XMMWORD[rcx]
4209 movups xmm6,XMMWORD[r8]
4210 movups xmm4,XMMWORD[16+rcx]
4211 lea rcx,[112+rcx]
4212
4213 pshufd xmm0,xmm1,0x1b
4214 pshufd xmm1,xmm1,0xb1
4215 pshufd xmm2,xmm2,0x1b
4216 movdqa xmm7,xmm3
4217DB 102,15,58,15,202,8
4218 punpcklqdq xmm2,xmm0
4219
4220 jmp NEAR $L$oop_shaext
4221
4222ALIGN 16
4223$L$oop_shaext:
4224 movdqu xmm10,XMMWORD[r10]
4225 movdqu xmm11,XMMWORD[16+r10]
4226 movdqu xmm12,XMMWORD[32+r10]
4227DB 102,68,15,56,0,211
4228 movdqu xmm13,XMMWORD[48+r10]
4229
4230 movdqa xmm0,XMMWORD[((0-128))+rax]
4231 paddd xmm0,xmm10
4232DB 102,68,15,56,0,219
4233 movdqa xmm9,xmm2
4234 movdqa xmm8,xmm1
4235 movups xmm14,XMMWORD[rdi]
4236 xorps xmm14,xmm15
4237 xorps xmm6,xmm14
4238 movups xmm5,XMMWORD[((-80))+rcx]
4239 aesenc xmm6,xmm4
4240DB 15,56,203,209
4241 pshufd xmm0,xmm0,0x0e
4242 movups xmm4,XMMWORD[((-64))+rcx]
4243 aesenc xmm6,xmm5
4244DB 15,56,203,202
4245
4246 movdqa xmm0,XMMWORD[((32-128))+rax]
4247 paddd xmm0,xmm11
4248DB 102,68,15,56,0,227
4249 lea r10,[64+r10]
4250 movups xmm5,XMMWORD[((-48))+rcx]
4251 aesenc xmm6,xmm4
4252DB 15,56,203,209
4253 pshufd xmm0,xmm0,0x0e
4254 movups xmm4,XMMWORD[((-32))+rcx]
4255 aesenc xmm6,xmm5
4256DB 15,56,203,202
4257
4258 movdqa xmm0,XMMWORD[((64-128))+rax]
4259 paddd xmm0,xmm12
4260DB 102,68,15,56,0,235
4261DB 69,15,56,204,211
4262 movups xmm5,XMMWORD[((-16))+rcx]
4263 aesenc xmm6,xmm4
4264DB 15,56,203,209
4265 pshufd xmm0,xmm0,0x0e
4266 movdqa xmm3,xmm13
4267DB 102,65,15,58,15,220,4
4268 paddd xmm10,xmm3
4269 movups xmm4,XMMWORD[rcx]
4270 aesenc xmm6,xmm5
4271DB 15,56,203,202
4272
4273 movdqa xmm0,XMMWORD[((96-128))+rax]
4274 paddd xmm0,xmm13
4275DB 69,15,56,205,213
4276DB 69,15,56,204,220
4277 movups xmm5,XMMWORD[16+rcx]
4278 aesenc xmm6,xmm4
4279DB 15,56,203,209
4280 pshufd xmm0,xmm0,0x0e
4281 movups xmm4,XMMWORD[32+rcx]
4282 aesenc xmm6,xmm5
4283 movdqa xmm3,xmm10
4284DB 102,65,15,58,15,221,4
4285 paddd xmm11,xmm3
4286DB 15,56,203,202
4287 movdqa xmm0,XMMWORD[((128-128))+rax]
4288 paddd xmm0,xmm10
4289DB 69,15,56,205,218
4290DB 69,15,56,204,229
4291 movups xmm5,XMMWORD[48+rcx]
4292 aesenc xmm6,xmm4
4293DB 15,56,203,209
4294 pshufd xmm0,xmm0,0x0e
4295 movdqa xmm3,xmm11
4296DB 102,65,15,58,15,218,4
4297 paddd xmm12,xmm3
4298 cmp r11d,11
4299 jb NEAR $L$aesenclast1
4300 movups xmm4,XMMWORD[64+rcx]
4301 aesenc xmm6,xmm5
4302 movups xmm5,XMMWORD[80+rcx]
4303 aesenc xmm6,xmm4
4304 je NEAR $L$aesenclast1
4305 movups xmm4,XMMWORD[96+rcx]
4306 aesenc xmm6,xmm5
4307 movups xmm5,XMMWORD[112+rcx]
4308 aesenc xmm6,xmm4
4309$L$aesenclast1:
4310 aesenclast xmm6,xmm5
4311 movups xmm4,XMMWORD[((16-112))+rcx]
4312 nop
4313DB 15,56,203,202
4314 movups xmm14,XMMWORD[16+rdi]
4315 xorps xmm14,xmm15
4316 movups XMMWORD[rdi*1+rsi],xmm6
4317 xorps xmm6,xmm14
4318 movups xmm5,XMMWORD[((-80))+rcx]
4319 aesenc xmm6,xmm4
4320 movdqa xmm0,XMMWORD[((160-128))+rax]
4321 paddd xmm0,xmm11
4322DB 69,15,56,205,227
4323DB 69,15,56,204,234
4324 movups xmm4,XMMWORD[((-64))+rcx]
4325 aesenc xmm6,xmm5
4326DB 15,56,203,209
4327 pshufd xmm0,xmm0,0x0e
4328 movdqa xmm3,xmm12
4329DB 102,65,15,58,15,219,4
4330 paddd xmm13,xmm3
4331 movups xmm5,XMMWORD[((-48))+rcx]
4332 aesenc xmm6,xmm4
4333DB 15,56,203,202
4334 movdqa xmm0,XMMWORD[((192-128))+rax]
4335 paddd xmm0,xmm12
4336DB 69,15,56,205,236
4337DB 69,15,56,204,211
4338 movups xmm4,XMMWORD[((-32))+rcx]
4339 aesenc xmm6,xmm5
4340DB 15,56,203,209
4341 pshufd xmm0,xmm0,0x0e
4342 movdqa xmm3,xmm13
4343DB 102,65,15,58,15,220,4
4344 paddd xmm10,xmm3
4345 movups xmm5,XMMWORD[((-16))+rcx]
4346 aesenc xmm6,xmm4
4347DB 15,56,203,202
4348 movdqa xmm0,XMMWORD[((224-128))+rax]
4349 paddd xmm0,xmm13
4350DB 69,15,56,205,213
4351DB 69,15,56,204,220
4352 movups xmm4,XMMWORD[rcx]
4353 aesenc xmm6,xmm5
4354DB 15,56,203,209
4355 pshufd xmm0,xmm0,0x0e
4356 movdqa xmm3,xmm10
4357DB 102,65,15,58,15,221,4
4358 paddd xmm11,xmm3
4359 movups xmm5,XMMWORD[16+rcx]
4360 aesenc xmm6,xmm4
4361DB 15,56,203,202
4362 movdqa xmm0,XMMWORD[((256-128))+rax]
4363 paddd xmm0,xmm10
4364DB 69,15,56,205,218
4365DB 69,15,56,204,229
4366 movups xmm4,XMMWORD[32+rcx]
4367 aesenc xmm6,xmm5
4368DB 15,56,203,209
4369 pshufd xmm0,xmm0,0x0e
4370 movdqa xmm3,xmm11
4371DB 102,65,15,58,15,218,4
4372 paddd xmm12,xmm3
4373 movups xmm5,XMMWORD[48+rcx]
4374 aesenc xmm6,xmm4
4375 cmp r11d,11
4376 jb NEAR $L$aesenclast2
4377 movups xmm4,XMMWORD[64+rcx]
4378 aesenc xmm6,xmm5
4379 movups xmm5,XMMWORD[80+rcx]
4380 aesenc xmm6,xmm4
4381 je NEAR $L$aesenclast2
4382 movups xmm4,XMMWORD[96+rcx]
4383 aesenc xmm6,xmm5
4384 movups xmm5,XMMWORD[112+rcx]
4385 aesenc xmm6,xmm4
4386$L$aesenclast2:
4387 aesenclast xmm6,xmm5
4388 movups xmm4,XMMWORD[((16-112))+rcx]
4389 nop
4390DB 15,56,203,202
4391 movups xmm14,XMMWORD[32+rdi]
4392 xorps xmm14,xmm15
4393 movups XMMWORD[16+rdi*1+rsi],xmm6
4394 xorps xmm6,xmm14
4395 movups xmm5,XMMWORD[((-80))+rcx]
4396 aesenc xmm6,xmm4
4397 movdqa xmm0,XMMWORD[((288-128))+rax]
4398 paddd xmm0,xmm11
4399DB 69,15,56,205,227
4400DB 69,15,56,204,234
4401 movups xmm4,XMMWORD[((-64))+rcx]
4402 aesenc xmm6,xmm5
4403DB 15,56,203,209
4404 pshufd xmm0,xmm0,0x0e
4405 movdqa xmm3,xmm12
4406DB 102,65,15,58,15,219,4
4407 paddd xmm13,xmm3
4408 movups xmm5,XMMWORD[((-48))+rcx]
4409 aesenc xmm6,xmm4
4410DB 15,56,203,202
4411 movdqa xmm0,XMMWORD[((320-128))+rax]
4412 paddd xmm0,xmm12
4413DB 69,15,56,205,236
4414DB 69,15,56,204,211
4415 movups xmm4,XMMWORD[((-32))+rcx]
4416 aesenc xmm6,xmm5
4417DB 15,56,203,209
4418 pshufd xmm0,xmm0,0x0e
4419 movdqa xmm3,xmm13
4420DB 102,65,15,58,15,220,4
4421 paddd xmm10,xmm3
4422 movups xmm5,XMMWORD[((-16))+rcx]
4423 aesenc xmm6,xmm4
4424DB 15,56,203,202
4425 movdqa xmm0,XMMWORD[((352-128))+rax]
4426 paddd xmm0,xmm13
4427DB 69,15,56,205,213
4428DB 69,15,56,204,220
4429 movups xmm4,XMMWORD[rcx]
4430 aesenc xmm6,xmm5
4431DB 15,56,203,209
4432 pshufd xmm0,xmm0,0x0e
4433 movdqa xmm3,xmm10
4434DB 102,65,15,58,15,221,4
4435 paddd xmm11,xmm3
4436 movups xmm5,XMMWORD[16+rcx]
4437 aesenc xmm6,xmm4
4438DB 15,56,203,202
4439 movdqa xmm0,XMMWORD[((384-128))+rax]
4440 paddd xmm0,xmm10
4441DB 69,15,56,205,218
4442DB 69,15,56,204,229
4443 movups xmm4,XMMWORD[32+rcx]
4444 aesenc xmm6,xmm5
4445DB 15,56,203,209
4446 pshufd xmm0,xmm0,0x0e
4447 movdqa xmm3,xmm11
4448DB 102,65,15,58,15,218,4
4449 paddd xmm12,xmm3
4450 movups xmm5,XMMWORD[48+rcx]
4451 aesenc xmm6,xmm4
4452DB 15,56,203,202
4453 movdqa xmm0,XMMWORD[((416-128))+rax]
4454 paddd xmm0,xmm11
4455DB 69,15,56,205,227
4456DB 69,15,56,204,234
4457 cmp r11d,11
4458 jb NEAR $L$aesenclast3
4459 movups xmm4,XMMWORD[64+rcx]
4460 aesenc xmm6,xmm5
4461 movups xmm5,XMMWORD[80+rcx]
4462 aesenc xmm6,xmm4
4463 je NEAR $L$aesenclast3
4464 movups xmm4,XMMWORD[96+rcx]
4465 aesenc xmm6,xmm5
4466 movups xmm5,XMMWORD[112+rcx]
4467 aesenc xmm6,xmm4
4468$L$aesenclast3:
4469 aesenclast xmm6,xmm5
4470 movups xmm4,XMMWORD[((16-112))+rcx]
4471 nop
4472DB 15,56,203,209
4473 pshufd xmm0,xmm0,0x0e
4474 movdqa xmm3,xmm12
4475DB 102,65,15,58,15,219,4
4476 paddd xmm13,xmm3
4477 movups xmm14,XMMWORD[48+rdi]
4478 xorps xmm14,xmm15
4479 movups XMMWORD[32+rdi*1+rsi],xmm6
4480 xorps xmm6,xmm14
4481 movups xmm5,XMMWORD[((-80))+rcx]
4482 aesenc xmm6,xmm4
4483 movups xmm4,XMMWORD[((-64))+rcx]
4484 aesenc xmm6,xmm5
4485DB 15,56,203,202
4486
4487 movdqa xmm0,XMMWORD[((448-128))+rax]
4488 paddd xmm0,xmm12
4489DB 69,15,56,205,236
4490 movdqa xmm3,xmm7
4491 movups xmm5,XMMWORD[((-48))+rcx]
4492 aesenc xmm6,xmm4
4493DB 15,56,203,209
4494 pshufd xmm0,xmm0,0x0e
4495 movups xmm4,XMMWORD[((-32))+rcx]
4496 aesenc xmm6,xmm5
4497DB 15,56,203,202
4498
4499 movdqa xmm0,XMMWORD[((480-128))+rax]
4500 paddd xmm0,xmm13
4501 movups xmm5,XMMWORD[((-16))+rcx]
4502 aesenc xmm6,xmm4
4503 movups xmm4,XMMWORD[rcx]
4504 aesenc xmm6,xmm5
4505DB 15,56,203,209
4506 pshufd xmm0,xmm0,0x0e
4507 movups xmm5,XMMWORD[16+rcx]
4508 aesenc xmm6,xmm4
4509DB 15,56,203,202
4510
4511 movups xmm4,XMMWORD[32+rcx]
4512 aesenc xmm6,xmm5
4513 movups xmm5,XMMWORD[48+rcx]
4514 aesenc xmm6,xmm4
4515 cmp r11d,11
4516 jb NEAR $L$aesenclast4
4517 movups xmm4,XMMWORD[64+rcx]
4518 aesenc xmm6,xmm5
4519 movups xmm5,XMMWORD[80+rcx]
4520 aesenc xmm6,xmm4
4521 je NEAR $L$aesenclast4
4522 movups xmm4,XMMWORD[96+rcx]
4523 aesenc xmm6,xmm5
4524 movups xmm5,XMMWORD[112+rcx]
4525 aesenc xmm6,xmm4
4526$L$aesenclast4:
4527 aesenclast xmm6,xmm5
4528 movups xmm4,XMMWORD[((16-112))+rcx]
4529 nop
4530
4531 paddd xmm2,xmm9
4532 paddd xmm1,xmm8
4533
4534 dec rdx
4535 movups XMMWORD[48+rdi*1+rsi],xmm6
4536 lea rdi,[64+rdi]
4537 jnz NEAR $L$oop_shaext
4538
4539 pshufd xmm2,xmm2,0xb1
4540 pshufd xmm3,xmm1,0x1b
4541 pshufd xmm1,xmm1,0xb1
4542 punpckhqdq xmm1,xmm2
4543DB 102,15,58,15,211,8
4544
4545 movups XMMWORD[r8],xmm6
4546 movdqu XMMWORD[r9],xmm1
4547 movdqu XMMWORD[16+r9],xmm2
4548 movaps xmm6,XMMWORD[rsp]
4549 movaps xmm7,XMMWORD[16+rsp]
4550 movaps xmm8,XMMWORD[32+rsp]
4551 movaps xmm9,XMMWORD[48+rsp]
4552 movaps xmm10,XMMWORD[64+rsp]
4553 movaps xmm11,XMMWORD[80+rsp]
4554 movaps xmm12,XMMWORD[96+rsp]
4555 movaps xmm13,XMMWORD[112+rsp]
4556 movaps xmm14,XMMWORD[128+rsp]
4557 movaps xmm15,XMMWORD[144+rsp]
4558 lea rsp,[((8+160))+rsp]
4559$L$epilogue_shaext:
4560 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4561 mov rsi,QWORD[16+rsp]
4562 DB 0F3h,0C3h ;repret
4563
4564$L$SEH_end_aesni_cbc_sha256_enc_shaext:
4565EXTERN __imp_RtlVirtualUnwind
4566
4567ALIGN 16
4568se_handler:
4569 push rsi
4570 push rdi
4571 push rbx
4572 push rbp
4573 push r12
4574 push r13
4575 push r14
4576 push r15
4577 pushfq
4578 sub rsp,64
4579
4580 mov rax,QWORD[120+r8]
4581 mov rbx,QWORD[248+r8]
4582
4583 mov rsi,QWORD[8+r9]
4584 mov r11,QWORD[56+r9]
4585
4586 mov r10d,DWORD[r11]
4587 lea r10,[r10*1+rsi]
4588 cmp rbx,r10
4589 jb NEAR $L$in_prologue
4590
4591 mov rax,QWORD[152+r8]
4592
4593 mov r10d,DWORD[4+r11]
4594 lea r10,[r10*1+rsi]
4595 cmp rbx,r10
4596 jae NEAR $L$in_prologue
4597 lea r10,[aesni_cbc_sha256_enc_shaext]
4598 cmp rbx,r10
4599 jb NEAR $L$not_in_shaext
4600
4601 lea rsi,[rax]
4602 lea rdi,[512+r8]
4603 mov ecx,20
4604 DD 0xa548f3fc
4605 lea rax,[168+rax]
4606 jmp NEAR $L$in_prologue
4607$L$not_in_shaext:
4608 lea r10,[$L$avx2_shortcut]
4609 cmp rbx,r10
4610 jb NEAR $L$not_in_avx2
4611
4612 and rax,-256*4
4613 add rax,448
4614$L$not_in_avx2:
4615 mov rsi,rax
4616 mov rax,QWORD[((64+56))+rax]
4617
4618 mov rbx,QWORD[((-8))+rax]
4619 mov rbp,QWORD[((-16))+rax]
4620 mov r12,QWORD[((-24))+rax]
4621 mov r13,QWORD[((-32))+rax]
4622 mov r14,QWORD[((-40))+rax]
4623 mov r15,QWORD[((-48))+rax]
4624 mov QWORD[144+r8],rbx
4625 mov QWORD[160+r8],rbp
4626 mov QWORD[216+r8],r12
4627 mov QWORD[224+r8],r13
4628 mov QWORD[232+r8],r14
4629 mov QWORD[240+r8],r15
4630
4631 lea rsi,[((64+64))+rsi]
4632 lea rdi,[512+r8]
4633 mov ecx,20
4634 DD 0xa548f3fc
4635
4636$L$in_prologue:
4637 mov rdi,QWORD[8+rax]
4638 mov rsi,QWORD[16+rax]
4639 mov QWORD[152+r8],rax
4640 mov QWORD[168+r8],rsi
4641 mov QWORD[176+r8],rdi
4642
4643 mov rdi,QWORD[40+r9]
4644 mov rsi,r8
4645 mov ecx,154
4646 DD 0xa548f3fc
4647
4648 mov rsi,r9
4649 xor rcx,rcx
4650 mov rdx,QWORD[8+rsi]
4651 mov r8,QWORD[rsi]
4652 mov r9,QWORD[16+rsi]
4653 mov r10,QWORD[40+rsi]
4654 lea r11,[56+rsi]
4655 lea r12,[24+rsi]
4656 mov QWORD[32+rsp],r10
4657 mov QWORD[40+rsp],r11
4658 mov QWORD[48+rsp],r12
4659 mov QWORD[56+rsp],rcx
4660 call QWORD[__imp_RtlVirtualUnwind]
4661
4662 mov eax,1
4663 add rsp,64
4664 popfq
4665 pop r15
4666 pop r14
4667 pop r13
4668 pop r12
4669 pop rbp
4670 pop rbx
4671 pop rdi
4672 pop rsi
4673 DB 0F3h,0C3h ;repret
4674
4675
4676section .pdata rdata align=4
4677 DD $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase
4678 DD $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase
4679 DD $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase
4680
4681 DD $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase
4682 DD $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase
4683 DD $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase
4684 DD $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
4685 DD $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
4686 DD $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase
4687 DD $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase
4688 DD $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase
4689 DD $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase
4690section .xdata rdata align=8
4691ALIGN 8
4692$L$SEH_info_aesni_cbc_sha256_enc_xop:
4693DB 9,0,0,0
4694 DD se_handler wrt ..imagebase
4695 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
4696
4697$L$SEH_info_aesni_cbc_sha256_enc_avx:
4698DB 9,0,0,0
4699 DD se_handler wrt ..imagebase
4700 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
4701$L$SEH_info_aesni_cbc_sha256_enc_avx2:
4702DB 9,0,0,0
4703 DD se_handler wrt ..imagebase
4704 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
4705$L$SEH_info_aesni_cbc_sha256_enc_shaext:
4706DB 9,0,0,0
4707 DD se_handler wrt ..imagebase
4708 DD $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette