VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.7/crypto/genasm-nasm/sha256-mb-x86_64.S@ 107835

Last change on this file since 107835 was 99371, checked in by vboxsync, 23 months ago

openssl-3.1.0: After generating headers and asm (kmk recreate-headers recreate-headers)

File size: 149.5 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10global sha256_multi_block
11
12ALIGN 32
13sha256_multi_block:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17$L$SEH_begin_sha256_multi_block:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21
22
23
24 mov rcx,QWORD[((OPENSSL_ia32cap_P+4))]
25 bt rcx,61
26 jc NEAR _shaext_shortcut
27 test ecx,268435456
28 jnz NEAR _avx_shortcut
29 mov rax,rsp
30
31 push rbx
32
33 push rbp
34
35 lea rsp,[((-168))+rsp]
36 movaps XMMWORD[rsp],xmm6
37 movaps XMMWORD[16+rsp],xmm7
38 movaps XMMWORD[32+rsp],xmm8
39 movaps XMMWORD[48+rsp],xmm9
40 movaps XMMWORD[(-120)+rax],xmm10
41 movaps XMMWORD[(-104)+rax],xmm11
42 movaps XMMWORD[(-88)+rax],xmm12
43 movaps XMMWORD[(-72)+rax],xmm13
44 movaps XMMWORD[(-56)+rax],xmm14
45 movaps XMMWORD[(-40)+rax],xmm15
46 sub rsp,288
47 and rsp,-256
48 mov QWORD[272+rsp],rax
49
50$L$body:
51 lea rbp,[((K256+128))]
52 lea rbx,[256+rsp]
53 lea rdi,[128+rdi]
54
55$L$oop_grande:
56 mov DWORD[280+rsp],edx
57 xor edx,edx
58
59 mov r8,QWORD[rsi]
60
61 mov ecx,DWORD[8+rsi]
62 cmp ecx,edx
63 cmovg edx,ecx
64 test ecx,ecx
65 mov DWORD[rbx],ecx
66 cmovle r8,rbp
67
68 mov r9,QWORD[16+rsi]
69
70 mov ecx,DWORD[24+rsi]
71 cmp ecx,edx
72 cmovg edx,ecx
73 test ecx,ecx
74 mov DWORD[4+rbx],ecx
75 cmovle r9,rbp
76
77 mov r10,QWORD[32+rsi]
78
79 mov ecx,DWORD[40+rsi]
80 cmp ecx,edx
81 cmovg edx,ecx
82 test ecx,ecx
83 mov DWORD[8+rbx],ecx
84 cmovle r10,rbp
85
86 mov r11,QWORD[48+rsi]
87
88 mov ecx,DWORD[56+rsi]
89 cmp ecx,edx
90 cmovg edx,ecx
91 test ecx,ecx
92 mov DWORD[12+rbx],ecx
93 cmovle r11,rbp
94 test edx,edx
95 jz NEAR $L$done
96
97 movdqu xmm8,XMMWORD[((0-128))+rdi]
98 lea rax,[128+rsp]
99 movdqu xmm9,XMMWORD[((32-128))+rdi]
100 movdqu xmm10,XMMWORD[((64-128))+rdi]
101 movdqu xmm11,XMMWORD[((96-128))+rdi]
102 movdqu xmm12,XMMWORD[((128-128))+rdi]
103 movdqu xmm13,XMMWORD[((160-128))+rdi]
104 movdqu xmm14,XMMWORD[((192-128))+rdi]
105 movdqu xmm15,XMMWORD[((224-128))+rdi]
106 movdqu xmm6,XMMWORD[$L$pbswap]
107 jmp NEAR $L$oop
108
109ALIGN 32
110$L$oop:
111 movdqa xmm4,xmm10
112 pxor xmm4,xmm9
113 movd xmm5,DWORD[r8]
114 movd xmm0,DWORD[r9]
115 movd xmm1,DWORD[r10]
116 movd xmm2,DWORD[r11]
117 punpckldq xmm5,xmm1
118 punpckldq xmm0,xmm2
119 punpckldq xmm5,xmm0
120 movdqa xmm7,xmm12
121DB 102,15,56,0,238
122 movdqa xmm2,xmm12
123
124 psrld xmm7,6
125 movdqa xmm1,xmm12
126 pslld xmm2,7
127 movdqa XMMWORD[(0-128)+rax],xmm5
128 paddd xmm5,xmm15
129
130 psrld xmm1,11
131 pxor xmm7,xmm2
132 pslld xmm2,21-7
133 paddd xmm5,XMMWORD[((-128))+rbp]
134 pxor xmm7,xmm1
135
136 psrld xmm1,25-11
137 movdqa xmm0,xmm12
138
139 pxor xmm7,xmm2
140 movdqa xmm3,xmm12
141 pslld xmm2,26-21
142 pandn xmm0,xmm14
143 pand xmm3,xmm13
144 pxor xmm7,xmm1
145
146
147 movdqa xmm1,xmm8
148 pxor xmm7,xmm2
149 movdqa xmm2,xmm8
150 psrld xmm1,2
151 paddd xmm5,xmm7
152 pxor xmm0,xmm3
153 movdqa xmm3,xmm9
154 movdqa xmm7,xmm8
155 pslld xmm2,10
156 pxor xmm3,xmm8
157
158
159 psrld xmm7,13
160 pxor xmm1,xmm2
161 paddd xmm5,xmm0
162 pslld xmm2,19-10
163 pand xmm4,xmm3
164 pxor xmm1,xmm7
165
166
167 psrld xmm7,22-13
168 pxor xmm1,xmm2
169 movdqa xmm15,xmm9
170 pslld xmm2,30-19
171 pxor xmm7,xmm1
172 pxor xmm15,xmm4
173 paddd xmm11,xmm5
174 pxor xmm7,xmm2
175
176 paddd xmm15,xmm5
177 paddd xmm15,xmm7
178 movd xmm5,DWORD[4+r8]
179 movd xmm0,DWORD[4+r9]
180 movd xmm1,DWORD[4+r10]
181 movd xmm2,DWORD[4+r11]
182 punpckldq xmm5,xmm1
183 punpckldq xmm0,xmm2
184 punpckldq xmm5,xmm0
185 movdqa xmm7,xmm11
186
187 movdqa xmm2,xmm11
188DB 102,15,56,0,238
189 psrld xmm7,6
190 movdqa xmm1,xmm11
191 pslld xmm2,7
192 movdqa XMMWORD[(16-128)+rax],xmm5
193 paddd xmm5,xmm14
194
195 psrld xmm1,11
196 pxor xmm7,xmm2
197 pslld xmm2,21-7
198 paddd xmm5,XMMWORD[((-96))+rbp]
199 pxor xmm7,xmm1
200
201 psrld xmm1,25-11
202 movdqa xmm0,xmm11
203
204 pxor xmm7,xmm2
205 movdqa xmm4,xmm11
206 pslld xmm2,26-21
207 pandn xmm0,xmm13
208 pand xmm4,xmm12
209 pxor xmm7,xmm1
210
211
212 movdqa xmm1,xmm15
213 pxor xmm7,xmm2
214 movdqa xmm2,xmm15
215 psrld xmm1,2
216 paddd xmm5,xmm7
217 pxor xmm0,xmm4
218 movdqa xmm4,xmm8
219 movdqa xmm7,xmm15
220 pslld xmm2,10
221 pxor xmm4,xmm15
222
223
224 psrld xmm7,13
225 pxor xmm1,xmm2
226 paddd xmm5,xmm0
227 pslld xmm2,19-10
228 pand xmm3,xmm4
229 pxor xmm1,xmm7
230
231
232 psrld xmm7,22-13
233 pxor xmm1,xmm2
234 movdqa xmm14,xmm8
235 pslld xmm2,30-19
236 pxor xmm7,xmm1
237 pxor xmm14,xmm3
238 paddd xmm10,xmm5
239 pxor xmm7,xmm2
240
241 paddd xmm14,xmm5
242 paddd xmm14,xmm7
243 movd xmm5,DWORD[8+r8]
244 movd xmm0,DWORD[8+r9]
245 movd xmm1,DWORD[8+r10]
246 movd xmm2,DWORD[8+r11]
247 punpckldq xmm5,xmm1
248 punpckldq xmm0,xmm2
249 punpckldq xmm5,xmm0
250 movdqa xmm7,xmm10
251DB 102,15,56,0,238
252 movdqa xmm2,xmm10
253
254 psrld xmm7,6
255 movdqa xmm1,xmm10
256 pslld xmm2,7
257 movdqa XMMWORD[(32-128)+rax],xmm5
258 paddd xmm5,xmm13
259
260 psrld xmm1,11
261 pxor xmm7,xmm2
262 pslld xmm2,21-7
263 paddd xmm5,XMMWORD[((-64))+rbp]
264 pxor xmm7,xmm1
265
266 psrld xmm1,25-11
267 movdqa xmm0,xmm10
268
269 pxor xmm7,xmm2
270 movdqa xmm3,xmm10
271 pslld xmm2,26-21
272 pandn xmm0,xmm12
273 pand xmm3,xmm11
274 pxor xmm7,xmm1
275
276
277 movdqa xmm1,xmm14
278 pxor xmm7,xmm2
279 movdqa xmm2,xmm14
280 psrld xmm1,2
281 paddd xmm5,xmm7
282 pxor xmm0,xmm3
283 movdqa xmm3,xmm15
284 movdqa xmm7,xmm14
285 pslld xmm2,10
286 pxor xmm3,xmm14
287
288
289 psrld xmm7,13
290 pxor xmm1,xmm2
291 paddd xmm5,xmm0
292 pslld xmm2,19-10
293 pand xmm4,xmm3
294 pxor xmm1,xmm7
295
296
297 psrld xmm7,22-13
298 pxor xmm1,xmm2
299 movdqa xmm13,xmm15
300 pslld xmm2,30-19
301 pxor xmm7,xmm1
302 pxor xmm13,xmm4
303 paddd xmm9,xmm5
304 pxor xmm7,xmm2
305
306 paddd xmm13,xmm5
307 paddd xmm13,xmm7
308 movd xmm5,DWORD[12+r8]
309 movd xmm0,DWORD[12+r9]
310 movd xmm1,DWORD[12+r10]
311 movd xmm2,DWORD[12+r11]
312 punpckldq xmm5,xmm1
313 punpckldq xmm0,xmm2
314 punpckldq xmm5,xmm0
315 movdqa xmm7,xmm9
316
317 movdqa xmm2,xmm9
318DB 102,15,56,0,238
319 psrld xmm7,6
320 movdqa xmm1,xmm9
321 pslld xmm2,7
322 movdqa XMMWORD[(48-128)+rax],xmm5
323 paddd xmm5,xmm12
324
325 psrld xmm1,11
326 pxor xmm7,xmm2
327 pslld xmm2,21-7
328 paddd xmm5,XMMWORD[((-32))+rbp]
329 pxor xmm7,xmm1
330
331 psrld xmm1,25-11
332 movdqa xmm0,xmm9
333
334 pxor xmm7,xmm2
335 movdqa xmm4,xmm9
336 pslld xmm2,26-21
337 pandn xmm0,xmm11
338 pand xmm4,xmm10
339 pxor xmm7,xmm1
340
341
342 movdqa xmm1,xmm13
343 pxor xmm7,xmm2
344 movdqa xmm2,xmm13
345 psrld xmm1,2
346 paddd xmm5,xmm7
347 pxor xmm0,xmm4
348 movdqa xmm4,xmm14
349 movdqa xmm7,xmm13
350 pslld xmm2,10
351 pxor xmm4,xmm13
352
353
354 psrld xmm7,13
355 pxor xmm1,xmm2
356 paddd xmm5,xmm0
357 pslld xmm2,19-10
358 pand xmm3,xmm4
359 pxor xmm1,xmm7
360
361
362 psrld xmm7,22-13
363 pxor xmm1,xmm2
364 movdqa xmm12,xmm14
365 pslld xmm2,30-19
366 pxor xmm7,xmm1
367 pxor xmm12,xmm3
368 paddd xmm8,xmm5
369 pxor xmm7,xmm2
370
371 paddd xmm12,xmm5
372 paddd xmm12,xmm7
373 movd xmm5,DWORD[16+r8]
374 movd xmm0,DWORD[16+r9]
375 movd xmm1,DWORD[16+r10]
376 movd xmm2,DWORD[16+r11]
377 punpckldq xmm5,xmm1
378 punpckldq xmm0,xmm2
379 punpckldq xmm5,xmm0
380 movdqa xmm7,xmm8
381DB 102,15,56,0,238
382 movdqa xmm2,xmm8
383
384 psrld xmm7,6
385 movdqa xmm1,xmm8
386 pslld xmm2,7
387 movdqa XMMWORD[(64-128)+rax],xmm5
388 paddd xmm5,xmm11
389
390 psrld xmm1,11
391 pxor xmm7,xmm2
392 pslld xmm2,21-7
393 paddd xmm5,XMMWORD[rbp]
394 pxor xmm7,xmm1
395
396 psrld xmm1,25-11
397 movdqa xmm0,xmm8
398
399 pxor xmm7,xmm2
400 movdqa xmm3,xmm8
401 pslld xmm2,26-21
402 pandn xmm0,xmm10
403 pand xmm3,xmm9
404 pxor xmm7,xmm1
405
406
407 movdqa xmm1,xmm12
408 pxor xmm7,xmm2
409 movdqa xmm2,xmm12
410 psrld xmm1,2
411 paddd xmm5,xmm7
412 pxor xmm0,xmm3
413 movdqa xmm3,xmm13
414 movdqa xmm7,xmm12
415 pslld xmm2,10
416 pxor xmm3,xmm12
417
418
419 psrld xmm7,13
420 pxor xmm1,xmm2
421 paddd xmm5,xmm0
422 pslld xmm2,19-10
423 pand xmm4,xmm3
424 pxor xmm1,xmm7
425
426
427 psrld xmm7,22-13
428 pxor xmm1,xmm2
429 movdqa xmm11,xmm13
430 pslld xmm2,30-19
431 pxor xmm7,xmm1
432 pxor xmm11,xmm4
433 paddd xmm15,xmm5
434 pxor xmm7,xmm2
435
436 paddd xmm11,xmm5
437 paddd xmm11,xmm7
438 movd xmm5,DWORD[20+r8]
439 movd xmm0,DWORD[20+r9]
440 movd xmm1,DWORD[20+r10]
441 movd xmm2,DWORD[20+r11]
442 punpckldq xmm5,xmm1
443 punpckldq xmm0,xmm2
444 punpckldq xmm5,xmm0
445 movdqa xmm7,xmm15
446
447 movdqa xmm2,xmm15
448DB 102,15,56,0,238
449 psrld xmm7,6
450 movdqa xmm1,xmm15
451 pslld xmm2,7
452 movdqa XMMWORD[(80-128)+rax],xmm5
453 paddd xmm5,xmm10
454
455 psrld xmm1,11
456 pxor xmm7,xmm2
457 pslld xmm2,21-7
458 paddd xmm5,XMMWORD[32+rbp]
459 pxor xmm7,xmm1
460
461 psrld xmm1,25-11
462 movdqa xmm0,xmm15
463
464 pxor xmm7,xmm2
465 movdqa xmm4,xmm15
466 pslld xmm2,26-21
467 pandn xmm0,xmm9
468 pand xmm4,xmm8
469 pxor xmm7,xmm1
470
471
472 movdqa xmm1,xmm11
473 pxor xmm7,xmm2
474 movdqa xmm2,xmm11
475 psrld xmm1,2
476 paddd xmm5,xmm7
477 pxor xmm0,xmm4
478 movdqa xmm4,xmm12
479 movdqa xmm7,xmm11
480 pslld xmm2,10
481 pxor xmm4,xmm11
482
483
484 psrld xmm7,13
485 pxor xmm1,xmm2
486 paddd xmm5,xmm0
487 pslld xmm2,19-10
488 pand xmm3,xmm4
489 pxor xmm1,xmm7
490
491
492 psrld xmm7,22-13
493 pxor xmm1,xmm2
494 movdqa xmm10,xmm12
495 pslld xmm2,30-19
496 pxor xmm7,xmm1
497 pxor xmm10,xmm3
498 paddd xmm14,xmm5
499 pxor xmm7,xmm2
500
501 paddd xmm10,xmm5
502 paddd xmm10,xmm7
503 movd xmm5,DWORD[24+r8]
504 movd xmm0,DWORD[24+r9]
505 movd xmm1,DWORD[24+r10]
506 movd xmm2,DWORD[24+r11]
507 punpckldq xmm5,xmm1
508 punpckldq xmm0,xmm2
509 punpckldq xmm5,xmm0
510 movdqa xmm7,xmm14
511DB 102,15,56,0,238
512 movdqa xmm2,xmm14
513
514 psrld xmm7,6
515 movdqa xmm1,xmm14
516 pslld xmm2,7
517 movdqa XMMWORD[(96-128)+rax],xmm5
518 paddd xmm5,xmm9
519
520 psrld xmm1,11
521 pxor xmm7,xmm2
522 pslld xmm2,21-7
523 paddd xmm5,XMMWORD[64+rbp]
524 pxor xmm7,xmm1
525
526 psrld xmm1,25-11
527 movdqa xmm0,xmm14
528
529 pxor xmm7,xmm2
530 movdqa xmm3,xmm14
531 pslld xmm2,26-21
532 pandn xmm0,xmm8
533 pand xmm3,xmm15
534 pxor xmm7,xmm1
535
536
537 movdqa xmm1,xmm10
538 pxor xmm7,xmm2
539 movdqa xmm2,xmm10
540 psrld xmm1,2
541 paddd xmm5,xmm7
542 pxor xmm0,xmm3
543 movdqa xmm3,xmm11
544 movdqa xmm7,xmm10
545 pslld xmm2,10
546 pxor xmm3,xmm10
547
548
549 psrld xmm7,13
550 pxor xmm1,xmm2
551 paddd xmm5,xmm0
552 pslld xmm2,19-10
553 pand xmm4,xmm3
554 pxor xmm1,xmm7
555
556
557 psrld xmm7,22-13
558 pxor xmm1,xmm2
559 movdqa xmm9,xmm11
560 pslld xmm2,30-19
561 pxor xmm7,xmm1
562 pxor xmm9,xmm4
563 paddd xmm13,xmm5
564 pxor xmm7,xmm2
565
566 paddd xmm9,xmm5
567 paddd xmm9,xmm7
568 movd xmm5,DWORD[28+r8]
569 movd xmm0,DWORD[28+r9]
570 movd xmm1,DWORD[28+r10]
571 movd xmm2,DWORD[28+r11]
572 punpckldq xmm5,xmm1
573 punpckldq xmm0,xmm2
574 punpckldq xmm5,xmm0
575 movdqa xmm7,xmm13
576
577 movdqa xmm2,xmm13
578DB 102,15,56,0,238
579 psrld xmm7,6
580 movdqa xmm1,xmm13
581 pslld xmm2,7
582 movdqa XMMWORD[(112-128)+rax],xmm5
583 paddd xmm5,xmm8
584
585 psrld xmm1,11
586 pxor xmm7,xmm2
587 pslld xmm2,21-7
588 paddd xmm5,XMMWORD[96+rbp]
589 pxor xmm7,xmm1
590
591 psrld xmm1,25-11
592 movdqa xmm0,xmm13
593
594 pxor xmm7,xmm2
595 movdqa xmm4,xmm13
596 pslld xmm2,26-21
597 pandn xmm0,xmm15
598 pand xmm4,xmm14
599 pxor xmm7,xmm1
600
601
602 movdqa xmm1,xmm9
603 pxor xmm7,xmm2
604 movdqa xmm2,xmm9
605 psrld xmm1,2
606 paddd xmm5,xmm7
607 pxor xmm0,xmm4
608 movdqa xmm4,xmm10
609 movdqa xmm7,xmm9
610 pslld xmm2,10
611 pxor xmm4,xmm9
612
613
614 psrld xmm7,13
615 pxor xmm1,xmm2
616 paddd xmm5,xmm0
617 pslld xmm2,19-10
618 pand xmm3,xmm4
619 pxor xmm1,xmm7
620
621
622 psrld xmm7,22-13
623 pxor xmm1,xmm2
624 movdqa xmm8,xmm10
625 pslld xmm2,30-19
626 pxor xmm7,xmm1
627 pxor xmm8,xmm3
628 paddd xmm12,xmm5
629 pxor xmm7,xmm2
630
631 paddd xmm8,xmm5
632 paddd xmm8,xmm7
633 lea rbp,[256+rbp]
634 movd xmm5,DWORD[32+r8]
635 movd xmm0,DWORD[32+r9]
636 movd xmm1,DWORD[32+r10]
637 movd xmm2,DWORD[32+r11]
638 punpckldq xmm5,xmm1
639 punpckldq xmm0,xmm2
640 punpckldq xmm5,xmm0
641 movdqa xmm7,xmm12
642DB 102,15,56,0,238
643 movdqa xmm2,xmm12
644
645 psrld xmm7,6
646 movdqa xmm1,xmm12
647 pslld xmm2,7
648 movdqa XMMWORD[(128-128)+rax],xmm5
649 paddd xmm5,xmm15
650
651 psrld xmm1,11
652 pxor xmm7,xmm2
653 pslld xmm2,21-7
654 paddd xmm5,XMMWORD[((-128))+rbp]
655 pxor xmm7,xmm1
656
657 psrld xmm1,25-11
658 movdqa xmm0,xmm12
659
660 pxor xmm7,xmm2
661 movdqa xmm3,xmm12
662 pslld xmm2,26-21
663 pandn xmm0,xmm14
664 pand xmm3,xmm13
665 pxor xmm7,xmm1
666
667
668 movdqa xmm1,xmm8
669 pxor xmm7,xmm2
670 movdqa xmm2,xmm8
671 psrld xmm1,2
672 paddd xmm5,xmm7
673 pxor xmm0,xmm3
674 movdqa xmm3,xmm9
675 movdqa xmm7,xmm8
676 pslld xmm2,10
677 pxor xmm3,xmm8
678
679
680 psrld xmm7,13
681 pxor xmm1,xmm2
682 paddd xmm5,xmm0
683 pslld xmm2,19-10
684 pand xmm4,xmm3
685 pxor xmm1,xmm7
686
687
688 psrld xmm7,22-13
689 pxor xmm1,xmm2
690 movdqa xmm15,xmm9
691 pslld xmm2,30-19
692 pxor xmm7,xmm1
693 pxor xmm15,xmm4
694 paddd xmm11,xmm5
695 pxor xmm7,xmm2
696
697 paddd xmm15,xmm5
698 paddd xmm15,xmm7
699 movd xmm5,DWORD[36+r8]
700 movd xmm0,DWORD[36+r9]
701 movd xmm1,DWORD[36+r10]
702 movd xmm2,DWORD[36+r11]
703 punpckldq xmm5,xmm1
704 punpckldq xmm0,xmm2
705 punpckldq xmm5,xmm0
706 movdqa xmm7,xmm11
707
708 movdqa xmm2,xmm11
709DB 102,15,56,0,238
710 psrld xmm7,6
711 movdqa xmm1,xmm11
712 pslld xmm2,7
713 movdqa XMMWORD[(144-128)+rax],xmm5
714 paddd xmm5,xmm14
715
716 psrld xmm1,11
717 pxor xmm7,xmm2
718 pslld xmm2,21-7
719 paddd xmm5,XMMWORD[((-96))+rbp]
720 pxor xmm7,xmm1
721
722 psrld xmm1,25-11
723 movdqa xmm0,xmm11
724
725 pxor xmm7,xmm2
726 movdqa xmm4,xmm11
727 pslld xmm2,26-21
728 pandn xmm0,xmm13
729 pand xmm4,xmm12
730 pxor xmm7,xmm1
731
732
733 movdqa xmm1,xmm15
734 pxor xmm7,xmm2
735 movdqa xmm2,xmm15
736 psrld xmm1,2
737 paddd xmm5,xmm7
738 pxor xmm0,xmm4
739 movdqa xmm4,xmm8
740 movdqa xmm7,xmm15
741 pslld xmm2,10
742 pxor xmm4,xmm15
743
744
745 psrld xmm7,13
746 pxor xmm1,xmm2
747 paddd xmm5,xmm0
748 pslld xmm2,19-10
749 pand xmm3,xmm4
750 pxor xmm1,xmm7
751
752
753 psrld xmm7,22-13
754 pxor xmm1,xmm2
755 movdqa xmm14,xmm8
756 pslld xmm2,30-19
757 pxor xmm7,xmm1
758 pxor xmm14,xmm3
759 paddd xmm10,xmm5
760 pxor xmm7,xmm2
761
762 paddd xmm14,xmm5
763 paddd xmm14,xmm7
764 movd xmm5,DWORD[40+r8]
765 movd xmm0,DWORD[40+r9]
766 movd xmm1,DWORD[40+r10]
767 movd xmm2,DWORD[40+r11]
768 punpckldq xmm5,xmm1
769 punpckldq xmm0,xmm2
770 punpckldq xmm5,xmm0
771 movdqa xmm7,xmm10
772DB 102,15,56,0,238
773 movdqa xmm2,xmm10
774
775 psrld xmm7,6
776 movdqa xmm1,xmm10
777 pslld xmm2,7
778 movdqa XMMWORD[(160-128)+rax],xmm5
779 paddd xmm5,xmm13
780
781 psrld xmm1,11
782 pxor xmm7,xmm2
783 pslld xmm2,21-7
784 paddd xmm5,XMMWORD[((-64))+rbp]
785 pxor xmm7,xmm1
786
787 psrld xmm1,25-11
788 movdqa xmm0,xmm10
789
790 pxor xmm7,xmm2
791 movdqa xmm3,xmm10
792 pslld xmm2,26-21
793 pandn xmm0,xmm12
794 pand xmm3,xmm11
795 pxor xmm7,xmm1
796
797
798 movdqa xmm1,xmm14
799 pxor xmm7,xmm2
800 movdqa xmm2,xmm14
801 psrld xmm1,2
802 paddd xmm5,xmm7
803 pxor xmm0,xmm3
804 movdqa xmm3,xmm15
805 movdqa xmm7,xmm14
806 pslld xmm2,10
807 pxor xmm3,xmm14
808
809
810 psrld xmm7,13
811 pxor xmm1,xmm2
812 paddd xmm5,xmm0
813 pslld xmm2,19-10
814 pand xmm4,xmm3
815 pxor xmm1,xmm7
816
817
818 psrld xmm7,22-13
819 pxor xmm1,xmm2
820 movdqa xmm13,xmm15
821 pslld xmm2,30-19
822 pxor xmm7,xmm1
823 pxor xmm13,xmm4
824 paddd xmm9,xmm5
825 pxor xmm7,xmm2
826
827 paddd xmm13,xmm5
828 paddd xmm13,xmm7
829 movd xmm5,DWORD[44+r8]
830 movd xmm0,DWORD[44+r9]
831 movd xmm1,DWORD[44+r10]
832 movd xmm2,DWORD[44+r11]
833 punpckldq xmm5,xmm1
834 punpckldq xmm0,xmm2
835 punpckldq xmm5,xmm0
836 movdqa xmm7,xmm9
837
838 movdqa xmm2,xmm9
839DB 102,15,56,0,238
840 psrld xmm7,6
841 movdqa xmm1,xmm9
842 pslld xmm2,7
843 movdqa XMMWORD[(176-128)+rax],xmm5
844 paddd xmm5,xmm12
845
846 psrld xmm1,11
847 pxor xmm7,xmm2
848 pslld xmm2,21-7
849 paddd xmm5,XMMWORD[((-32))+rbp]
850 pxor xmm7,xmm1
851
852 psrld xmm1,25-11
853 movdqa xmm0,xmm9
854
855 pxor xmm7,xmm2
856 movdqa xmm4,xmm9
857 pslld xmm2,26-21
858 pandn xmm0,xmm11
859 pand xmm4,xmm10
860 pxor xmm7,xmm1
861
862
863 movdqa xmm1,xmm13
864 pxor xmm7,xmm2
865 movdqa xmm2,xmm13
866 psrld xmm1,2
867 paddd xmm5,xmm7
868 pxor xmm0,xmm4
869 movdqa xmm4,xmm14
870 movdqa xmm7,xmm13
871 pslld xmm2,10
872 pxor xmm4,xmm13
873
874
875 psrld xmm7,13
876 pxor xmm1,xmm2
877 paddd xmm5,xmm0
878 pslld xmm2,19-10
879 pand xmm3,xmm4
880 pxor xmm1,xmm7
881
882
883 psrld xmm7,22-13
884 pxor xmm1,xmm2
885 movdqa xmm12,xmm14
886 pslld xmm2,30-19
887 pxor xmm7,xmm1
888 pxor xmm12,xmm3
889 paddd xmm8,xmm5
890 pxor xmm7,xmm2
891
892 paddd xmm12,xmm5
893 paddd xmm12,xmm7
894 movd xmm5,DWORD[48+r8]
895 movd xmm0,DWORD[48+r9]
896 movd xmm1,DWORD[48+r10]
897 movd xmm2,DWORD[48+r11]
898 punpckldq xmm5,xmm1
899 punpckldq xmm0,xmm2
900 punpckldq xmm5,xmm0
901 movdqa xmm7,xmm8
902DB 102,15,56,0,238
903 movdqa xmm2,xmm8
904
905 psrld xmm7,6
906 movdqa xmm1,xmm8
907 pslld xmm2,7
908 movdqa XMMWORD[(192-128)+rax],xmm5
909 paddd xmm5,xmm11
910
911 psrld xmm1,11
912 pxor xmm7,xmm2
913 pslld xmm2,21-7
914 paddd xmm5,XMMWORD[rbp]
915 pxor xmm7,xmm1
916
917 psrld xmm1,25-11
918 movdqa xmm0,xmm8
919
920 pxor xmm7,xmm2
921 movdqa xmm3,xmm8
922 pslld xmm2,26-21
923 pandn xmm0,xmm10
924 pand xmm3,xmm9
925 pxor xmm7,xmm1
926
927
928 movdqa xmm1,xmm12
929 pxor xmm7,xmm2
930 movdqa xmm2,xmm12
931 psrld xmm1,2
932 paddd xmm5,xmm7
933 pxor xmm0,xmm3
934 movdqa xmm3,xmm13
935 movdqa xmm7,xmm12
936 pslld xmm2,10
937 pxor xmm3,xmm12
938
939
940 psrld xmm7,13
941 pxor xmm1,xmm2
942 paddd xmm5,xmm0
943 pslld xmm2,19-10
944 pand xmm4,xmm3
945 pxor xmm1,xmm7
946
947
948 psrld xmm7,22-13
949 pxor xmm1,xmm2
950 movdqa xmm11,xmm13
951 pslld xmm2,30-19
952 pxor xmm7,xmm1
953 pxor xmm11,xmm4
954 paddd xmm15,xmm5
955 pxor xmm7,xmm2
956
957 paddd xmm11,xmm5
958 paddd xmm11,xmm7
959 movd xmm5,DWORD[52+r8]
960 movd xmm0,DWORD[52+r9]
961 movd xmm1,DWORD[52+r10]
962 movd xmm2,DWORD[52+r11]
963 punpckldq xmm5,xmm1
964 punpckldq xmm0,xmm2
965 punpckldq xmm5,xmm0
966 movdqa xmm7,xmm15
967
968 movdqa xmm2,xmm15
969DB 102,15,56,0,238
970 psrld xmm7,6
971 movdqa xmm1,xmm15
972 pslld xmm2,7
973 movdqa XMMWORD[(208-128)+rax],xmm5
974 paddd xmm5,xmm10
975
976 psrld xmm1,11
977 pxor xmm7,xmm2
978 pslld xmm2,21-7
979 paddd xmm5,XMMWORD[32+rbp]
980 pxor xmm7,xmm1
981
982 psrld xmm1,25-11
983 movdqa xmm0,xmm15
984
985 pxor xmm7,xmm2
986 movdqa xmm4,xmm15
987 pslld xmm2,26-21
988 pandn xmm0,xmm9
989 pand xmm4,xmm8
990 pxor xmm7,xmm1
991
992
993 movdqa xmm1,xmm11
994 pxor xmm7,xmm2
995 movdqa xmm2,xmm11
996 psrld xmm1,2
997 paddd xmm5,xmm7
998 pxor xmm0,xmm4
999 movdqa xmm4,xmm12
1000 movdqa xmm7,xmm11
1001 pslld xmm2,10
1002 pxor xmm4,xmm11
1003
1004
1005 psrld xmm7,13
1006 pxor xmm1,xmm2
1007 paddd xmm5,xmm0
1008 pslld xmm2,19-10
1009 pand xmm3,xmm4
1010 pxor xmm1,xmm7
1011
1012
1013 psrld xmm7,22-13
1014 pxor xmm1,xmm2
1015 movdqa xmm10,xmm12
1016 pslld xmm2,30-19
1017 pxor xmm7,xmm1
1018 pxor xmm10,xmm3
1019 paddd xmm14,xmm5
1020 pxor xmm7,xmm2
1021
1022 paddd xmm10,xmm5
1023 paddd xmm10,xmm7
1024 movd xmm5,DWORD[56+r8]
1025 movd xmm0,DWORD[56+r9]
1026 movd xmm1,DWORD[56+r10]
1027 movd xmm2,DWORD[56+r11]
1028 punpckldq xmm5,xmm1
1029 punpckldq xmm0,xmm2
1030 punpckldq xmm5,xmm0
1031 movdqa xmm7,xmm14
1032DB 102,15,56,0,238
1033 movdqa xmm2,xmm14
1034
1035 psrld xmm7,6
1036 movdqa xmm1,xmm14
1037 pslld xmm2,7
1038 movdqa XMMWORD[(224-128)+rax],xmm5
1039 paddd xmm5,xmm9
1040
1041 psrld xmm1,11
1042 pxor xmm7,xmm2
1043 pslld xmm2,21-7
1044 paddd xmm5,XMMWORD[64+rbp]
1045 pxor xmm7,xmm1
1046
1047 psrld xmm1,25-11
1048 movdqa xmm0,xmm14
1049
1050 pxor xmm7,xmm2
1051 movdqa xmm3,xmm14
1052 pslld xmm2,26-21
1053 pandn xmm0,xmm8
1054 pand xmm3,xmm15
1055 pxor xmm7,xmm1
1056
1057
1058 movdqa xmm1,xmm10
1059 pxor xmm7,xmm2
1060 movdqa xmm2,xmm10
1061 psrld xmm1,2
1062 paddd xmm5,xmm7
1063 pxor xmm0,xmm3
1064 movdqa xmm3,xmm11
1065 movdqa xmm7,xmm10
1066 pslld xmm2,10
1067 pxor xmm3,xmm10
1068
1069
1070 psrld xmm7,13
1071 pxor xmm1,xmm2
1072 paddd xmm5,xmm0
1073 pslld xmm2,19-10
1074 pand xmm4,xmm3
1075 pxor xmm1,xmm7
1076
1077
1078 psrld xmm7,22-13
1079 pxor xmm1,xmm2
1080 movdqa xmm9,xmm11
1081 pslld xmm2,30-19
1082 pxor xmm7,xmm1
1083 pxor xmm9,xmm4
1084 paddd xmm13,xmm5
1085 pxor xmm7,xmm2
1086
1087 paddd xmm9,xmm5
1088 paddd xmm9,xmm7
1089 movd xmm5,DWORD[60+r8]
1090 lea r8,[64+r8]
1091 movd xmm0,DWORD[60+r9]
1092 lea r9,[64+r9]
1093 movd xmm1,DWORD[60+r10]
1094 lea r10,[64+r10]
1095 movd xmm2,DWORD[60+r11]
1096 lea r11,[64+r11]
1097 punpckldq xmm5,xmm1
1098 punpckldq xmm0,xmm2
1099 punpckldq xmm5,xmm0
1100 movdqa xmm7,xmm13
1101
1102 movdqa xmm2,xmm13
1103DB 102,15,56,0,238
1104 psrld xmm7,6
1105 movdqa xmm1,xmm13
1106 pslld xmm2,7
1107 movdqa XMMWORD[(240-128)+rax],xmm5
1108 paddd xmm5,xmm8
1109
1110 psrld xmm1,11
1111 pxor xmm7,xmm2
1112 pslld xmm2,21-7
1113 paddd xmm5,XMMWORD[96+rbp]
1114 pxor xmm7,xmm1
1115
1116 psrld xmm1,25-11
1117 movdqa xmm0,xmm13
1118 prefetcht0 [63+r8]
1119 pxor xmm7,xmm2
1120 movdqa xmm4,xmm13
1121 pslld xmm2,26-21
1122 pandn xmm0,xmm15
1123 pand xmm4,xmm14
1124 pxor xmm7,xmm1
1125
1126 prefetcht0 [63+r9]
1127 movdqa xmm1,xmm9
1128 pxor xmm7,xmm2
1129 movdqa xmm2,xmm9
1130 psrld xmm1,2
1131 paddd xmm5,xmm7
1132 pxor xmm0,xmm4
1133 movdqa xmm4,xmm10
1134 movdqa xmm7,xmm9
1135 pslld xmm2,10
1136 pxor xmm4,xmm9
1137
1138 prefetcht0 [63+r10]
1139 psrld xmm7,13
1140 pxor xmm1,xmm2
1141 paddd xmm5,xmm0
1142 pslld xmm2,19-10
1143 pand xmm3,xmm4
1144 pxor xmm1,xmm7
1145
1146 prefetcht0 [63+r11]
1147 psrld xmm7,22-13
1148 pxor xmm1,xmm2
1149 movdqa xmm8,xmm10
1150 pslld xmm2,30-19
1151 pxor xmm7,xmm1
1152 pxor xmm8,xmm3
1153 paddd xmm12,xmm5
1154 pxor xmm7,xmm2
1155
1156 paddd xmm8,xmm5
1157 paddd xmm8,xmm7
1158 lea rbp,[256+rbp]
1159 movdqu xmm5,XMMWORD[((0-128))+rax]
1160 mov ecx,3
1161 jmp NEAR $L$oop_16_xx
1162ALIGN 32
1163$L$oop_16_xx:
1164 movdqa xmm6,XMMWORD[((16-128))+rax]
1165 paddd xmm5,XMMWORD[((144-128))+rax]
1166
1167 movdqa xmm7,xmm6
1168 movdqa xmm1,xmm6
1169 psrld xmm7,3
1170 movdqa xmm2,xmm6
1171
1172 psrld xmm1,7
1173 movdqa xmm0,XMMWORD[((224-128))+rax]
1174 pslld xmm2,14
1175 pxor xmm7,xmm1
1176 psrld xmm1,18-7
1177 movdqa xmm3,xmm0
1178 pxor xmm7,xmm2
1179 pslld xmm2,25-14
1180 pxor xmm7,xmm1
1181 psrld xmm0,10
1182 movdqa xmm1,xmm3
1183
1184 psrld xmm3,17
1185 pxor xmm7,xmm2
1186 pslld xmm1,13
1187 paddd xmm5,xmm7
1188 pxor xmm0,xmm3
1189 psrld xmm3,19-17
1190 pxor xmm0,xmm1
1191 pslld xmm1,15-13
1192 pxor xmm0,xmm3
1193 pxor xmm0,xmm1
1194 paddd xmm5,xmm0
1195 movdqa xmm7,xmm12
1196
1197 movdqa xmm2,xmm12
1198
1199 psrld xmm7,6
1200 movdqa xmm1,xmm12
1201 pslld xmm2,7
1202 movdqa XMMWORD[(0-128)+rax],xmm5
1203 paddd xmm5,xmm15
1204
1205 psrld xmm1,11
1206 pxor xmm7,xmm2
1207 pslld xmm2,21-7
1208 paddd xmm5,XMMWORD[((-128))+rbp]
1209 pxor xmm7,xmm1
1210
1211 psrld xmm1,25-11
1212 movdqa xmm0,xmm12
1213
1214 pxor xmm7,xmm2
1215 movdqa xmm3,xmm12
1216 pslld xmm2,26-21
1217 pandn xmm0,xmm14
1218 pand xmm3,xmm13
1219 pxor xmm7,xmm1
1220
1221
1222 movdqa xmm1,xmm8
1223 pxor xmm7,xmm2
1224 movdqa xmm2,xmm8
1225 psrld xmm1,2
1226 paddd xmm5,xmm7
1227 pxor xmm0,xmm3
1228 movdqa xmm3,xmm9
1229 movdqa xmm7,xmm8
1230 pslld xmm2,10
1231 pxor xmm3,xmm8
1232
1233
1234 psrld xmm7,13
1235 pxor xmm1,xmm2
1236 paddd xmm5,xmm0
1237 pslld xmm2,19-10
1238 pand xmm4,xmm3
1239 pxor xmm1,xmm7
1240
1241
1242 psrld xmm7,22-13
1243 pxor xmm1,xmm2
1244 movdqa xmm15,xmm9
1245 pslld xmm2,30-19
1246 pxor xmm7,xmm1
1247 pxor xmm15,xmm4
1248 paddd xmm11,xmm5
1249 pxor xmm7,xmm2
1250
1251 paddd xmm15,xmm5
1252 paddd xmm15,xmm7
1253 movdqa xmm5,XMMWORD[((32-128))+rax]
1254 paddd xmm6,XMMWORD[((160-128))+rax]
1255
1256 movdqa xmm7,xmm5
1257 movdqa xmm1,xmm5
1258 psrld xmm7,3
1259 movdqa xmm2,xmm5
1260
1261 psrld xmm1,7
1262 movdqa xmm0,XMMWORD[((240-128))+rax]
1263 pslld xmm2,14
1264 pxor xmm7,xmm1
1265 psrld xmm1,18-7
1266 movdqa xmm4,xmm0
1267 pxor xmm7,xmm2
1268 pslld xmm2,25-14
1269 pxor xmm7,xmm1
1270 psrld xmm0,10
1271 movdqa xmm1,xmm4
1272
1273 psrld xmm4,17
1274 pxor xmm7,xmm2
1275 pslld xmm1,13
1276 paddd xmm6,xmm7
1277 pxor xmm0,xmm4
1278 psrld xmm4,19-17
1279 pxor xmm0,xmm1
1280 pslld xmm1,15-13
1281 pxor xmm0,xmm4
1282 pxor xmm0,xmm1
1283 paddd xmm6,xmm0
1284 movdqa xmm7,xmm11
1285
1286 movdqa xmm2,xmm11
1287
1288 psrld xmm7,6
1289 movdqa xmm1,xmm11
1290 pslld xmm2,7
1291 movdqa XMMWORD[(16-128)+rax],xmm6
1292 paddd xmm6,xmm14
1293
1294 psrld xmm1,11
1295 pxor xmm7,xmm2
1296 pslld xmm2,21-7
1297 paddd xmm6,XMMWORD[((-96))+rbp]
1298 pxor xmm7,xmm1
1299
1300 psrld xmm1,25-11
1301 movdqa xmm0,xmm11
1302
1303 pxor xmm7,xmm2
1304 movdqa xmm4,xmm11
1305 pslld xmm2,26-21
1306 pandn xmm0,xmm13
1307 pand xmm4,xmm12
1308 pxor xmm7,xmm1
1309
1310
1311 movdqa xmm1,xmm15
1312 pxor xmm7,xmm2
1313 movdqa xmm2,xmm15
1314 psrld xmm1,2
1315 paddd xmm6,xmm7
1316 pxor xmm0,xmm4
1317 movdqa xmm4,xmm8
1318 movdqa xmm7,xmm15
1319 pslld xmm2,10
1320 pxor xmm4,xmm15
1321
1322
1323 psrld xmm7,13
1324 pxor xmm1,xmm2
1325 paddd xmm6,xmm0
1326 pslld xmm2,19-10
1327 pand xmm3,xmm4
1328 pxor xmm1,xmm7
1329
1330
1331 psrld xmm7,22-13
1332 pxor xmm1,xmm2
1333 movdqa xmm14,xmm8
1334 pslld xmm2,30-19
1335 pxor xmm7,xmm1
1336 pxor xmm14,xmm3
1337 paddd xmm10,xmm6
1338 pxor xmm7,xmm2
1339
1340 paddd xmm14,xmm6
1341 paddd xmm14,xmm7
1342 movdqa xmm6,XMMWORD[((48-128))+rax]
1343 paddd xmm5,XMMWORD[((176-128))+rax]
1344
1345 movdqa xmm7,xmm6
1346 movdqa xmm1,xmm6
1347 psrld xmm7,3
1348 movdqa xmm2,xmm6
1349
1350 psrld xmm1,7
1351 movdqa xmm0,XMMWORD[((0-128))+rax]
1352 pslld xmm2,14
1353 pxor xmm7,xmm1
1354 psrld xmm1,18-7
1355 movdqa xmm3,xmm0
1356 pxor xmm7,xmm2
1357 pslld xmm2,25-14
1358 pxor xmm7,xmm1
1359 psrld xmm0,10
1360 movdqa xmm1,xmm3
1361
1362 psrld xmm3,17
1363 pxor xmm7,xmm2
1364 pslld xmm1,13
1365 paddd xmm5,xmm7
1366 pxor xmm0,xmm3
1367 psrld xmm3,19-17
1368 pxor xmm0,xmm1
1369 pslld xmm1,15-13
1370 pxor xmm0,xmm3
1371 pxor xmm0,xmm1
1372 paddd xmm5,xmm0
1373 movdqa xmm7,xmm10
1374
1375 movdqa xmm2,xmm10
1376
1377 psrld xmm7,6
1378 movdqa xmm1,xmm10
1379 pslld xmm2,7
1380 movdqa XMMWORD[(32-128)+rax],xmm5
1381 paddd xmm5,xmm13
1382
1383 psrld xmm1,11
1384 pxor xmm7,xmm2
1385 pslld xmm2,21-7
1386 paddd xmm5,XMMWORD[((-64))+rbp]
1387 pxor xmm7,xmm1
1388
1389 psrld xmm1,25-11
1390 movdqa xmm0,xmm10
1391
1392 pxor xmm7,xmm2
1393 movdqa xmm3,xmm10
1394 pslld xmm2,26-21
1395 pandn xmm0,xmm12
1396 pand xmm3,xmm11
1397 pxor xmm7,xmm1
1398
1399
1400 movdqa xmm1,xmm14
1401 pxor xmm7,xmm2
1402 movdqa xmm2,xmm14
1403 psrld xmm1,2
1404 paddd xmm5,xmm7
1405 pxor xmm0,xmm3
1406 movdqa xmm3,xmm15
1407 movdqa xmm7,xmm14
1408 pslld xmm2,10
1409 pxor xmm3,xmm14
1410
1411
1412 psrld xmm7,13
1413 pxor xmm1,xmm2
1414 paddd xmm5,xmm0
1415 pslld xmm2,19-10
1416 pand xmm4,xmm3
1417 pxor xmm1,xmm7
1418
1419
1420 psrld xmm7,22-13
1421 pxor xmm1,xmm2
1422 movdqa xmm13,xmm15
1423 pslld xmm2,30-19
1424 pxor xmm7,xmm1
1425 pxor xmm13,xmm4
1426 paddd xmm9,xmm5
1427 pxor xmm7,xmm2
1428
1429 paddd xmm13,xmm5
1430 paddd xmm13,xmm7
1431 movdqa xmm5,XMMWORD[((64-128))+rax]
1432 paddd xmm6,XMMWORD[((192-128))+rax]
1433
1434 movdqa xmm7,xmm5
1435 movdqa xmm1,xmm5
1436 psrld xmm7,3
1437 movdqa xmm2,xmm5
1438
1439 psrld xmm1,7
1440 movdqa xmm0,XMMWORD[((16-128))+rax]
1441 pslld xmm2,14
1442 pxor xmm7,xmm1
1443 psrld xmm1,18-7
1444 movdqa xmm4,xmm0
1445 pxor xmm7,xmm2
1446 pslld xmm2,25-14
1447 pxor xmm7,xmm1
1448 psrld xmm0,10
1449 movdqa xmm1,xmm4
1450
1451 psrld xmm4,17
1452 pxor xmm7,xmm2
1453 pslld xmm1,13
1454 paddd xmm6,xmm7
1455 pxor xmm0,xmm4
1456 psrld xmm4,19-17
1457 pxor xmm0,xmm1
1458 pslld xmm1,15-13
1459 pxor xmm0,xmm4
1460 pxor xmm0,xmm1
1461 paddd xmm6,xmm0
1462 movdqa xmm7,xmm9
1463
1464 movdqa xmm2,xmm9
1465
1466 psrld xmm7,6
1467 movdqa xmm1,xmm9
1468 pslld xmm2,7
1469 movdqa XMMWORD[(48-128)+rax],xmm6
1470 paddd xmm6,xmm12
1471
1472 psrld xmm1,11
1473 pxor xmm7,xmm2
1474 pslld xmm2,21-7
1475 paddd xmm6,XMMWORD[((-32))+rbp]
1476 pxor xmm7,xmm1
1477
1478 psrld xmm1,25-11
1479 movdqa xmm0,xmm9
1480
1481 pxor xmm7,xmm2
1482 movdqa xmm4,xmm9
1483 pslld xmm2,26-21
1484 pandn xmm0,xmm11
1485 pand xmm4,xmm10
1486 pxor xmm7,xmm1
1487
1488
1489 movdqa xmm1,xmm13
1490 pxor xmm7,xmm2
1491 movdqa xmm2,xmm13
1492 psrld xmm1,2
1493 paddd xmm6,xmm7
1494 pxor xmm0,xmm4
1495 movdqa xmm4,xmm14
1496 movdqa xmm7,xmm13
1497 pslld xmm2,10
1498 pxor xmm4,xmm13
1499
1500
1501 psrld xmm7,13
1502 pxor xmm1,xmm2
1503 paddd xmm6,xmm0
1504 pslld xmm2,19-10
1505 pand xmm3,xmm4
1506 pxor xmm1,xmm7
1507
1508
1509 psrld xmm7,22-13
1510 pxor xmm1,xmm2
1511 movdqa xmm12,xmm14
1512 pslld xmm2,30-19
1513 pxor xmm7,xmm1
1514 pxor xmm12,xmm3
1515 paddd xmm8,xmm6
1516 pxor xmm7,xmm2
1517
1518 paddd xmm12,xmm6
1519 paddd xmm12,xmm7
1520 movdqa xmm6,XMMWORD[((80-128))+rax]
1521 paddd xmm5,XMMWORD[((208-128))+rax]
1522
1523 movdqa xmm7,xmm6
1524 movdqa xmm1,xmm6
1525 psrld xmm7,3
1526 movdqa xmm2,xmm6
1527
1528 psrld xmm1,7
1529 movdqa xmm0,XMMWORD[((32-128))+rax]
1530 pslld xmm2,14
1531 pxor xmm7,xmm1
1532 psrld xmm1,18-7
1533 movdqa xmm3,xmm0
1534 pxor xmm7,xmm2
1535 pslld xmm2,25-14
1536 pxor xmm7,xmm1
1537 psrld xmm0,10
1538 movdqa xmm1,xmm3
1539
1540 psrld xmm3,17
1541 pxor xmm7,xmm2
1542 pslld xmm1,13
1543 paddd xmm5,xmm7
1544 pxor xmm0,xmm3
1545 psrld xmm3,19-17
1546 pxor xmm0,xmm1
1547 pslld xmm1,15-13
1548 pxor xmm0,xmm3
1549 pxor xmm0,xmm1
1550 paddd xmm5,xmm0
1551 movdqa xmm7,xmm8
1552
1553 movdqa xmm2,xmm8
1554
1555 psrld xmm7,6
1556 movdqa xmm1,xmm8
1557 pslld xmm2,7
1558 movdqa XMMWORD[(64-128)+rax],xmm5
1559 paddd xmm5,xmm11
1560
1561 psrld xmm1,11
1562 pxor xmm7,xmm2
1563 pslld xmm2,21-7
1564 paddd xmm5,XMMWORD[rbp]
1565 pxor xmm7,xmm1
1566
1567 psrld xmm1,25-11
1568 movdqa xmm0,xmm8
1569
1570 pxor xmm7,xmm2
1571 movdqa xmm3,xmm8
1572 pslld xmm2,26-21
1573 pandn xmm0,xmm10
1574 pand xmm3,xmm9
1575 pxor xmm7,xmm1
1576
1577
1578 movdqa xmm1,xmm12
1579 pxor xmm7,xmm2
1580 movdqa xmm2,xmm12
1581 psrld xmm1,2
1582 paddd xmm5,xmm7
1583 pxor xmm0,xmm3
1584 movdqa xmm3,xmm13
1585 movdqa xmm7,xmm12
1586 pslld xmm2,10
1587 pxor xmm3,xmm12
1588
1589
1590 psrld xmm7,13
1591 pxor xmm1,xmm2
1592 paddd xmm5,xmm0
1593 pslld xmm2,19-10
1594 pand xmm4,xmm3
1595 pxor xmm1,xmm7
1596
1597
1598 psrld xmm7,22-13
1599 pxor xmm1,xmm2
1600 movdqa xmm11,xmm13
1601 pslld xmm2,30-19
1602 pxor xmm7,xmm1
1603 pxor xmm11,xmm4
1604 paddd xmm15,xmm5
1605 pxor xmm7,xmm2
1606
1607 paddd xmm11,xmm5
1608 paddd xmm11,xmm7
1609 movdqa xmm5,XMMWORD[((96-128))+rax]
1610 paddd xmm6,XMMWORD[((224-128))+rax]
1611
1612 movdqa xmm7,xmm5
1613 movdqa xmm1,xmm5
1614 psrld xmm7,3
1615 movdqa xmm2,xmm5
1616
1617 psrld xmm1,7
1618 movdqa xmm0,XMMWORD[((48-128))+rax]
1619 pslld xmm2,14
1620 pxor xmm7,xmm1
1621 psrld xmm1,18-7
1622 movdqa xmm4,xmm0
1623 pxor xmm7,xmm2
1624 pslld xmm2,25-14
1625 pxor xmm7,xmm1
1626 psrld xmm0,10
1627 movdqa xmm1,xmm4
1628
1629 psrld xmm4,17
1630 pxor xmm7,xmm2
1631 pslld xmm1,13
1632 paddd xmm6,xmm7
1633 pxor xmm0,xmm4
1634 psrld xmm4,19-17
1635 pxor xmm0,xmm1
1636 pslld xmm1,15-13
1637 pxor xmm0,xmm4
1638 pxor xmm0,xmm1
1639 paddd xmm6,xmm0
1640 movdqa xmm7,xmm15
1641
1642 movdqa xmm2,xmm15
1643
1644 psrld xmm7,6
1645 movdqa xmm1,xmm15
1646 pslld xmm2,7
1647 movdqa XMMWORD[(80-128)+rax],xmm6
1648 paddd xmm6,xmm10
1649
1650 psrld xmm1,11
1651 pxor xmm7,xmm2
1652 pslld xmm2,21-7
1653 paddd xmm6,XMMWORD[32+rbp]
1654 pxor xmm7,xmm1
1655
1656 psrld xmm1,25-11
1657 movdqa xmm0,xmm15
1658
1659 pxor xmm7,xmm2
1660 movdqa xmm4,xmm15
1661 pslld xmm2,26-21
1662 pandn xmm0,xmm9
1663 pand xmm4,xmm8
1664 pxor xmm7,xmm1
1665
1666
1667 movdqa xmm1,xmm11
1668 pxor xmm7,xmm2
1669 movdqa xmm2,xmm11
1670 psrld xmm1,2
1671 paddd xmm6,xmm7
1672 pxor xmm0,xmm4
1673 movdqa xmm4,xmm12
1674 movdqa xmm7,xmm11
1675 pslld xmm2,10
1676 pxor xmm4,xmm11
1677
1678
1679 psrld xmm7,13
1680 pxor xmm1,xmm2
1681 paddd xmm6,xmm0
1682 pslld xmm2,19-10
1683 pand xmm3,xmm4
1684 pxor xmm1,xmm7
1685
1686
1687 psrld xmm7,22-13
1688 pxor xmm1,xmm2
1689 movdqa xmm10,xmm12
1690 pslld xmm2,30-19
1691 pxor xmm7,xmm1
1692 pxor xmm10,xmm3
1693 paddd xmm14,xmm6
1694 pxor xmm7,xmm2
1695
1696 paddd xmm10,xmm6
1697 paddd xmm10,xmm7
1698 movdqa xmm6,XMMWORD[((112-128))+rax]
1699 paddd xmm5,XMMWORD[((240-128))+rax]
1700
1701 movdqa xmm7,xmm6
1702 movdqa xmm1,xmm6
1703 psrld xmm7,3
1704 movdqa xmm2,xmm6
1705
1706 psrld xmm1,7
1707 movdqa xmm0,XMMWORD[((64-128))+rax]
1708 pslld xmm2,14
1709 pxor xmm7,xmm1
1710 psrld xmm1,18-7
1711 movdqa xmm3,xmm0
1712 pxor xmm7,xmm2
1713 pslld xmm2,25-14
1714 pxor xmm7,xmm1
1715 psrld xmm0,10
1716 movdqa xmm1,xmm3
1717
1718 psrld xmm3,17
1719 pxor xmm7,xmm2
1720 pslld xmm1,13
1721 paddd xmm5,xmm7
1722 pxor xmm0,xmm3
1723 psrld xmm3,19-17
1724 pxor xmm0,xmm1
1725 pslld xmm1,15-13
1726 pxor xmm0,xmm3
1727 pxor xmm0,xmm1
1728 paddd xmm5,xmm0
1729 movdqa xmm7,xmm14
1730
1731 movdqa xmm2,xmm14
1732
1733 psrld xmm7,6
1734 movdqa xmm1,xmm14
1735 pslld xmm2,7
1736 movdqa XMMWORD[(96-128)+rax],xmm5
1737 paddd xmm5,xmm9
1738
1739 psrld xmm1,11
1740 pxor xmm7,xmm2
1741 pslld xmm2,21-7
1742 paddd xmm5,XMMWORD[64+rbp]
1743 pxor xmm7,xmm1
1744
1745 psrld xmm1,25-11
1746 movdqa xmm0,xmm14
1747
1748 pxor xmm7,xmm2
1749 movdqa xmm3,xmm14
1750 pslld xmm2,26-21
1751 pandn xmm0,xmm8
1752 pand xmm3,xmm15
1753 pxor xmm7,xmm1
1754
1755
1756 movdqa xmm1,xmm10
1757 pxor xmm7,xmm2
1758 movdqa xmm2,xmm10
1759 psrld xmm1,2
1760 paddd xmm5,xmm7
1761 pxor xmm0,xmm3
1762 movdqa xmm3,xmm11
1763 movdqa xmm7,xmm10
1764 pslld xmm2,10
1765 pxor xmm3,xmm10
1766
1767
1768 psrld xmm7,13
1769 pxor xmm1,xmm2
1770 paddd xmm5,xmm0
1771 pslld xmm2,19-10
1772 pand xmm4,xmm3
1773 pxor xmm1,xmm7
1774
1775
1776 psrld xmm7,22-13
1777 pxor xmm1,xmm2
1778 movdqa xmm9,xmm11
1779 pslld xmm2,30-19
1780 pxor xmm7,xmm1
1781 pxor xmm9,xmm4
1782 paddd xmm13,xmm5
1783 pxor xmm7,xmm2
1784
1785 paddd xmm9,xmm5
1786 paddd xmm9,xmm7
1787 movdqa xmm5,XMMWORD[((128-128))+rax]
1788 paddd xmm6,XMMWORD[((0-128))+rax]
1789
1790 movdqa xmm7,xmm5
1791 movdqa xmm1,xmm5
1792 psrld xmm7,3
1793 movdqa xmm2,xmm5
1794
1795 psrld xmm1,7
1796 movdqa xmm0,XMMWORD[((80-128))+rax]
1797 pslld xmm2,14
1798 pxor xmm7,xmm1
1799 psrld xmm1,18-7
1800 movdqa xmm4,xmm0
1801 pxor xmm7,xmm2
1802 pslld xmm2,25-14
1803 pxor xmm7,xmm1
1804 psrld xmm0,10
1805 movdqa xmm1,xmm4
1806
1807 psrld xmm4,17
1808 pxor xmm7,xmm2
1809 pslld xmm1,13
1810 paddd xmm6,xmm7
1811 pxor xmm0,xmm4
1812 psrld xmm4,19-17
1813 pxor xmm0,xmm1
1814 pslld xmm1,15-13
1815 pxor xmm0,xmm4
1816 pxor xmm0,xmm1
1817 paddd xmm6,xmm0
1818 movdqa xmm7,xmm13
1819
1820 movdqa xmm2,xmm13
1821
1822 psrld xmm7,6
1823 movdqa xmm1,xmm13
1824 pslld xmm2,7
1825 movdqa XMMWORD[(112-128)+rax],xmm6
1826 paddd xmm6,xmm8
1827
1828 psrld xmm1,11
1829 pxor xmm7,xmm2
1830 pslld xmm2,21-7
1831 paddd xmm6,XMMWORD[96+rbp]
1832 pxor xmm7,xmm1
1833
1834 psrld xmm1,25-11
1835 movdqa xmm0,xmm13
1836
1837 pxor xmm7,xmm2
1838 movdqa xmm4,xmm13
1839 pslld xmm2,26-21
1840 pandn xmm0,xmm15
1841 pand xmm4,xmm14
1842 pxor xmm7,xmm1
1843
1844
1845 movdqa xmm1,xmm9
1846 pxor xmm7,xmm2
1847 movdqa xmm2,xmm9
1848 psrld xmm1,2
1849 paddd xmm6,xmm7
1850 pxor xmm0,xmm4
1851 movdqa xmm4,xmm10
1852 movdqa xmm7,xmm9
1853 pslld xmm2,10
1854 pxor xmm4,xmm9
1855
1856
1857 psrld xmm7,13
1858 pxor xmm1,xmm2
1859 paddd xmm6,xmm0
1860 pslld xmm2,19-10
1861 pand xmm3,xmm4
1862 pxor xmm1,xmm7
1863
1864
1865 psrld xmm7,22-13
1866 pxor xmm1,xmm2
1867 movdqa xmm8,xmm10
1868 pslld xmm2,30-19
1869 pxor xmm7,xmm1
1870 pxor xmm8,xmm3
1871 paddd xmm12,xmm6
1872 pxor xmm7,xmm2
1873
1874 paddd xmm8,xmm6
1875 paddd xmm8,xmm7
1876 lea rbp,[256+rbp]
1877 movdqa xmm6,XMMWORD[((144-128))+rax]
1878 paddd xmm5,XMMWORD[((16-128))+rax]
1879
1880 movdqa xmm7,xmm6
1881 movdqa xmm1,xmm6
1882 psrld xmm7,3
1883 movdqa xmm2,xmm6
1884
1885 psrld xmm1,7
1886 movdqa xmm0,XMMWORD[((96-128))+rax]
1887 pslld xmm2,14
1888 pxor xmm7,xmm1
1889 psrld xmm1,18-7
1890 movdqa xmm3,xmm0
1891 pxor xmm7,xmm2
1892 pslld xmm2,25-14
1893 pxor xmm7,xmm1
1894 psrld xmm0,10
1895 movdqa xmm1,xmm3
1896
1897 psrld xmm3,17
1898 pxor xmm7,xmm2
1899 pslld xmm1,13
1900 paddd xmm5,xmm7
1901 pxor xmm0,xmm3
1902 psrld xmm3,19-17
1903 pxor xmm0,xmm1
1904 pslld xmm1,15-13
1905 pxor xmm0,xmm3
1906 pxor xmm0,xmm1
1907 paddd xmm5,xmm0
1908 movdqa xmm7,xmm12
1909
1910 movdqa xmm2,xmm12
1911
1912 psrld xmm7,6
1913 movdqa xmm1,xmm12
1914 pslld xmm2,7
1915 movdqa XMMWORD[(128-128)+rax],xmm5
1916 paddd xmm5,xmm15
1917
1918 psrld xmm1,11
1919 pxor xmm7,xmm2
1920 pslld xmm2,21-7
1921 paddd xmm5,XMMWORD[((-128))+rbp]
1922 pxor xmm7,xmm1
1923
1924 psrld xmm1,25-11
1925 movdqa xmm0,xmm12
1926
1927 pxor xmm7,xmm2
1928 movdqa xmm3,xmm12
1929 pslld xmm2,26-21
1930 pandn xmm0,xmm14
1931 pand xmm3,xmm13
1932 pxor xmm7,xmm1
1933
1934
1935 movdqa xmm1,xmm8
1936 pxor xmm7,xmm2
1937 movdqa xmm2,xmm8
1938 psrld xmm1,2
1939 paddd xmm5,xmm7
1940 pxor xmm0,xmm3
1941 movdqa xmm3,xmm9
1942 movdqa xmm7,xmm8
1943 pslld xmm2,10
1944 pxor xmm3,xmm8
1945
1946
1947 psrld xmm7,13
1948 pxor xmm1,xmm2
1949 paddd xmm5,xmm0
1950 pslld xmm2,19-10
1951 pand xmm4,xmm3
1952 pxor xmm1,xmm7
1953
1954
1955 psrld xmm7,22-13
1956 pxor xmm1,xmm2
1957 movdqa xmm15,xmm9
1958 pslld xmm2,30-19
1959 pxor xmm7,xmm1
1960 pxor xmm15,xmm4
1961 paddd xmm11,xmm5
1962 pxor xmm7,xmm2
1963
1964 paddd xmm15,xmm5
1965 paddd xmm15,xmm7
1966 movdqa xmm5,XMMWORD[((160-128))+rax]
1967 paddd xmm6,XMMWORD[((32-128))+rax]
1968
1969 movdqa xmm7,xmm5
1970 movdqa xmm1,xmm5
1971 psrld xmm7,3
1972 movdqa xmm2,xmm5
1973
1974 psrld xmm1,7
1975 movdqa xmm0,XMMWORD[((112-128))+rax]
1976 pslld xmm2,14
1977 pxor xmm7,xmm1
1978 psrld xmm1,18-7
1979 movdqa xmm4,xmm0
1980 pxor xmm7,xmm2
1981 pslld xmm2,25-14
1982 pxor xmm7,xmm1
1983 psrld xmm0,10
1984 movdqa xmm1,xmm4
1985
1986 psrld xmm4,17
1987 pxor xmm7,xmm2
1988 pslld xmm1,13
1989 paddd xmm6,xmm7
1990 pxor xmm0,xmm4
1991 psrld xmm4,19-17
1992 pxor xmm0,xmm1
1993 pslld xmm1,15-13
1994 pxor xmm0,xmm4
1995 pxor xmm0,xmm1
1996 paddd xmm6,xmm0
1997 movdqa xmm7,xmm11
1998
1999 movdqa xmm2,xmm11
2000
2001 psrld xmm7,6
2002 movdqa xmm1,xmm11
2003 pslld xmm2,7
2004 movdqa XMMWORD[(144-128)+rax],xmm6
2005 paddd xmm6,xmm14
2006
2007 psrld xmm1,11
2008 pxor xmm7,xmm2
2009 pslld xmm2,21-7
2010 paddd xmm6,XMMWORD[((-96))+rbp]
2011 pxor xmm7,xmm1
2012
2013 psrld xmm1,25-11
2014 movdqa xmm0,xmm11
2015
2016 pxor xmm7,xmm2
2017 movdqa xmm4,xmm11
2018 pslld xmm2,26-21
2019 pandn xmm0,xmm13
2020 pand xmm4,xmm12
2021 pxor xmm7,xmm1
2022
2023
2024 movdqa xmm1,xmm15
2025 pxor xmm7,xmm2
2026 movdqa xmm2,xmm15
2027 psrld xmm1,2
2028 paddd xmm6,xmm7
2029 pxor xmm0,xmm4
2030 movdqa xmm4,xmm8
2031 movdqa xmm7,xmm15
2032 pslld xmm2,10
2033 pxor xmm4,xmm15
2034
2035
2036 psrld xmm7,13
2037 pxor xmm1,xmm2
2038 paddd xmm6,xmm0
2039 pslld xmm2,19-10
2040 pand xmm3,xmm4
2041 pxor xmm1,xmm7
2042
2043
2044 psrld xmm7,22-13
2045 pxor xmm1,xmm2
2046 movdqa xmm14,xmm8
2047 pslld xmm2,30-19
2048 pxor xmm7,xmm1
2049 pxor xmm14,xmm3
2050 paddd xmm10,xmm6
2051 pxor xmm7,xmm2
2052
2053 paddd xmm14,xmm6
2054 paddd xmm14,xmm7
2055 movdqa xmm6,XMMWORD[((176-128))+rax]
2056 paddd xmm5,XMMWORD[((48-128))+rax]
2057
2058 movdqa xmm7,xmm6
2059 movdqa xmm1,xmm6
2060 psrld xmm7,3
2061 movdqa xmm2,xmm6
2062
2063 psrld xmm1,7
2064 movdqa xmm0,XMMWORD[((128-128))+rax]
2065 pslld xmm2,14
2066 pxor xmm7,xmm1
2067 psrld xmm1,18-7
2068 movdqa xmm3,xmm0
2069 pxor xmm7,xmm2
2070 pslld xmm2,25-14
2071 pxor xmm7,xmm1
2072 psrld xmm0,10
2073 movdqa xmm1,xmm3
2074
2075 psrld xmm3,17
2076 pxor xmm7,xmm2
2077 pslld xmm1,13
2078 paddd xmm5,xmm7
2079 pxor xmm0,xmm3
2080 psrld xmm3,19-17
2081 pxor xmm0,xmm1
2082 pslld xmm1,15-13
2083 pxor xmm0,xmm3
2084 pxor xmm0,xmm1
2085 paddd xmm5,xmm0
2086 movdqa xmm7,xmm10
2087
2088 movdqa xmm2,xmm10
2089
2090 psrld xmm7,6
2091 movdqa xmm1,xmm10
2092 pslld xmm2,7
2093 movdqa XMMWORD[(160-128)+rax],xmm5
2094 paddd xmm5,xmm13
2095
2096 psrld xmm1,11
2097 pxor xmm7,xmm2
2098 pslld xmm2,21-7
2099 paddd xmm5,XMMWORD[((-64))+rbp]
2100 pxor xmm7,xmm1
2101
2102 psrld xmm1,25-11
2103 movdqa xmm0,xmm10
2104
2105 pxor xmm7,xmm2
2106 movdqa xmm3,xmm10
2107 pslld xmm2,26-21
2108 pandn xmm0,xmm12
2109 pand xmm3,xmm11
2110 pxor xmm7,xmm1
2111
2112
2113 movdqa xmm1,xmm14
2114 pxor xmm7,xmm2
2115 movdqa xmm2,xmm14
2116 psrld xmm1,2
2117 paddd xmm5,xmm7
2118 pxor xmm0,xmm3
2119 movdqa xmm3,xmm15
2120 movdqa xmm7,xmm14
2121 pslld xmm2,10
2122 pxor xmm3,xmm14
2123
2124
2125 psrld xmm7,13
2126 pxor xmm1,xmm2
2127 paddd xmm5,xmm0
2128 pslld xmm2,19-10
2129 pand xmm4,xmm3
2130 pxor xmm1,xmm7
2131
2132
2133 psrld xmm7,22-13
2134 pxor xmm1,xmm2
2135 movdqa xmm13,xmm15
2136 pslld xmm2,30-19
2137 pxor xmm7,xmm1
2138 pxor xmm13,xmm4
2139 paddd xmm9,xmm5
2140 pxor xmm7,xmm2
2141
2142 paddd xmm13,xmm5
2143 paddd xmm13,xmm7
2144 movdqa xmm5,XMMWORD[((192-128))+rax]
2145 paddd xmm6,XMMWORD[((64-128))+rax]
2146
2147 movdqa xmm7,xmm5
2148 movdqa xmm1,xmm5
2149 psrld xmm7,3
2150 movdqa xmm2,xmm5
2151
2152 psrld xmm1,7
2153 movdqa xmm0,XMMWORD[((144-128))+rax]
2154 pslld xmm2,14
2155 pxor xmm7,xmm1
2156 psrld xmm1,18-7
2157 movdqa xmm4,xmm0
2158 pxor xmm7,xmm2
2159 pslld xmm2,25-14
2160 pxor xmm7,xmm1
2161 psrld xmm0,10
2162 movdqa xmm1,xmm4
2163
2164 psrld xmm4,17
2165 pxor xmm7,xmm2
2166 pslld xmm1,13
2167 paddd xmm6,xmm7
2168 pxor xmm0,xmm4
2169 psrld xmm4,19-17
2170 pxor xmm0,xmm1
2171 pslld xmm1,15-13
2172 pxor xmm0,xmm4
2173 pxor xmm0,xmm1
2174 paddd xmm6,xmm0
2175 movdqa xmm7,xmm9
2176
2177 movdqa xmm2,xmm9
2178
2179 psrld xmm7,6
2180 movdqa xmm1,xmm9
2181 pslld xmm2,7
2182 movdqa XMMWORD[(176-128)+rax],xmm6
2183 paddd xmm6,xmm12
2184
2185 psrld xmm1,11
2186 pxor xmm7,xmm2
2187 pslld xmm2,21-7
2188 paddd xmm6,XMMWORD[((-32))+rbp]
2189 pxor xmm7,xmm1
2190
2191 psrld xmm1,25-11
2192 movdqa xmm0,xmm9
2193
2194 pxor xmm7,xmm2
2195 movdqa xmm4,xmm9
2196 pslld xmm2,26-21
2197 pandn xmm0,xmm11
2198 pand xmm4,xmm10
2199 pxor xmm7,xmm1
2200
2201
2202 movdqa xmm1,xmm13
2203 pxor xmm7,xmm2
2204 movdqa xmm2,xmm13
2205 psrld xmm1,2
2206 paddd xmm6,xmm7
2207 pxor xmm0,xmm4
2208 movdqa xmm4,xmm14
2209 movdqa xmm7,xmm13
2210 pslld xmm2,10
2211 pxor xmm4,xmm13
2212
2213
2214 psrld xmm7,13
2215 pxor xmm1,xmm2
2216 paddd xmm6,xmm0
2217 pslld xmm2,19-10
2218 pand xmm3,xmm4
2219 pxor xmm1,xmm7
2220
2221
2222 psrld xmm7,22-13
2223 pxor xmm1,xmm2
2224 movdqa xmm12,xmm14
2225 pslld xmm2,30-19
2226 pxor xmm7,xmm1
2227 pxor xmm12,xmm3
2228 paddd xmm8,xmm6
2229 pxor xmm7,xmm2
2230
2231 paddd xmm12,xmm6
2232 paddd xmm12,xmm7
2233 movdqa xmm6,XMMWORD[((208-128))+rax]
2234 paddd xmm5,XMMWORD[((80-128))+rax]
2235
2236 movdqa xmm7,xmm6
2237 movdqa xmm1,xmm6
2238 psrld xmm7,3
2239 movdqa xmm2,xmm6
2240
2241 psrld xmm1,7
2242 movdqa xmm0,XMMWORD[((160-128))+rax]
2243 pslld xmm2,14
2244 pxor xmm7,xmm1
2245 psrld xmm1,18-7
2246 movdqa xmm3,xmm0
2247 pxor xmm7,xmm2
2248 pslld xmm2,25-14
2249 pxor xmm7,xmm1
2250 psrld xmm0,10
2251 movdqa xmm1,xmm3
2252
2253 psrld xmm3,17
2254 pxor xmm7,xmm2
2255 pslld xmm1,13
2256 paddd xmm5,xmm7
2257 pxor xmm0,xmm3
2258 psrld xmm3,19-17
2259 pxor xmm0,xmm1
2260 pslld xmm1,15-13
2261 pxor xmm0,xmm3
2262 pxor xmm0,xmm1
2263 paddd xmm5,xmm0
2264 movdqa xmm7,xmm8
2265
2266 movdqa xmm2,xmm8
2267
2268 psrld xmm7,6
2269 movdqa xmm1,xmm8
2270 pslld xmm2,7
2271 movdqa XMMWORD[(192-128)+rax],xmm5
2272 paddd xmm5,xmm11
2273
2274 psrld xmm1,11
2275 pxor xmm7,xmm2
2276 pslld xmm2,21-7
2277 paddd xmm5,XMMWORD[rbp]
2278 pxor xmm7,xmm1
2279
2280 psrld xmm1,25-11
2281 movdqa xmm0,xmm8
2282
2283 pxor xmm7,xmm2
2284 movdqa xmm3,xmm8
2285 pslld xmm2,26-21
2286 pandn xmm0,xmm10
2287 pand xmm3,xmm9
2288 pxor xmm7,xmm1
2289
2290
2291 movdqa xmm1,xmm12
2292 pxor xmm7,xmm2
2293 movdqa xmm2,xmm12
2294 psrld xmm1,2
2295 paddd xmm5,xmm7
2296 pxor xmm0,xmm3
2297 movdqa xmm3,xmm13
2298 movdqa xmm7,xmm12
2299 pslld xmm2,10
2300 pxor xmm3,xmm12
2301
2302
2303 psrld xmm7,13
2304 pxor xmm1,xmm2
2305 paddd xmm5,xmm0
2306 pslld xmm2,19-10
2307 pand xmm4,xmm3
2308 pxor xmm1,xmm7
2309
2310
2311 psrld xmm7,22-13
2312 pxor xmm1,xmm2
2313 movdqa xmm11,xmm13
2314 pslld xmm2,30-19
2315 pxor xmm7,xmm1
2316 pxor xmm11,xmm4
2317 paddd xmm15,xmm5
2318 pxor xmm7,xmm2
2319
2320 paddd xmm11,xmm5
2321 paddd xmm11,xmm7
2322 movdqa xmm5,XMMWORD[((224-128))+rax]
2323 paddd xmm6,XMMWORD[((96-128))+rax]
2324
2325 movdqa xmm7,xmm5
2326 movdqa xmm1,xmm5
2327 psrld xmm7,3
2328 movdqa xmm2,xmm5
2329
2330 psrld xmm1,7
2331 movdqa xmm0,XMMWORD[((176-128))+rax]
2332 pslld xmm2,14
2333 pxor xmm7,xmm1
2334 psrld xmm1,18-7
2335 movdqa xmm4,xmm0
2336 pxor xmm7,xmm2
2337 pslld xmm2,25-14
2338 pxor xmm7,xmm1
2339 psrld xmm0,10
2340 movdqa xmm1,xmm4
2341
2342 psrld xmm4,17
2343 pxor xmm7,xmm2
2344 pslld xmm1,13
2345 paddd xmm6,xmm7
2346 pxor xmm0,xmm4
2347 psrld xmm4,19-17
2348 pxor xmm0,xmm1
2349 pslld xmm1,15-13
2350 pxor xmm0,xmm4
2351 pxor xmm0,xmm1
2352 paddd xmm6,xmm0
2353 movdqa xmm7,xmm15
2354
2355 movdqa xmm2,xmm15
2356
2357 psrld xmm7,6
2358 movdqa xmm1,xmm15
2359 pslld xmm2,7
2360 movdqa XMMWORD[(208-128)+rax],xmm6
2361 paddd xmm6,xmm10
2362
2363 psrld xmm1,11
2364 pxor xmm7,xmm2
2365 pslld xmm2,21-7
2366 paddd xmm6,XMMWORD[32+rbp]
2367 pxor xmm7,xmm1
2368
2369 psrld xmm1,25-11
2370 movdqa xmm0,xmm15
2371
2372 pxor xmm7,xmm2
2373 movdqa xmm4,xmm15
2374 pslld xmm2,26-21
2375 pandn xmm0,xmm9
2376 pand xmm4,xmm8
2377 pxor xmm7,xmm1
2378
2379
2380 movdqa xmm1,xmm11
2381 pxor xmm7,xmm2
2382 movdqa xmm2,xmm11
2383 psrld xmm1,2
2384 paddd xmm6,xmm7
2385 pxor xmm0,xmm4
2386 movdqa xmm4,xmm12
2387 movdqa xmm7,xmm11
2388 pslld xmm2,10
2389 pxor xmm4,xmm11
2390
2391
2392 psrld xmm7,13
2393 pxor xmm1,xmm2
2394 paddd xmm6,xmm0
2395 pslld xmm2,19-10
2396 pand xmm3,xmm4
2397 pxor xmm1,xmm7
2398
2399
2400 psrld xmm7,22-13
2401 pxor xmm1,xmm2
2402 movdqa xmm10,xmm12
2403 pslld xmm2,30-19
2404 pxor xmm7,xmm1
2405 pxor xmm10,xmm3
2406 paddd xmm14,xmm6
2407 pxor xmm7,xmm2
2408
2409 paddd xmm10,xmm6
2410 paddd xmm10,xmm7
2411 movdqa xmm6,XMMWORD[((240-128))+rax]
2412 paddd xmm5,XMMWORD[((112-128))+rax]
2413
2414 movdqa xmm7,xmm6
2415 movdqa xmm1,xmm6
2416 psrld xmm7,3
2417 movdqa xmm2,xmm6
2418
2419 psrld xmm1,7
2420 movdqa xmm0,XMMWORD[((192-128))+rax]
2421 pslld xmm2,14
2422 pxor xmm7,xmm1
2423 psrld xmm1,18-7
2424 movdqa xmm3,xmm0
2425 pxor xmm7,xmm2
2426 pslld xmm2,25-14
2427 pxor xmm7,xmm1
2428 psrld xmm0,10
2429 movdqa xmm1,xmm3
2430
2431 psrld xmm3,17
2432 pxor xmm7,xmm2
2433 pslld xmm1,13
2434 paddd xmm5,xmm7
2435 pxor xmm0,xmm3
2436 psrld xmm3,19-17
2437 pxor xmm0,xmm1
2438 pslld xmm1,15-13
2439 pxor xmm0,xmm3
2440 pxor xmm0,xmm1
2441 paddd xmm5,xmm0
2442 movdqa xmm7,xmm14
2443
2444 movdqa xmm2,xmm14
2445
2446 psrld xmm7,6
2447 movdqa xmm1,xmm14
2448 pslld xmm2,7
2449 movdqa XMMWORD[(224-128)+rax],xmm5
2450 paddd xmm5,xmm9
2451
2452 psrld xmm1,11
2453 pxor xmm7,xmm2
2454 pslld xmm2,21-7
2455 paddd xmm5,XMMWORD[64+rbp]
2456 pxor xmm7,xmm1
2457
2458 psrld xmm1,25-11
2459 movdqa xmm0,xmm14
2460
2461 pxor xmm7,xmm2
2462 movdqa xmm3,xmm14
2463 pslld xmm2,26-21
2464 pandn xmm0,xmm8
2465 pand xmm3,xmm15
2466 pxor xmm7,xmm1
2467
2468
2469 movdqa xmm1,xmm10
2470 pxor xmm7,xmm2
2471 movdqa xmm2,xmm10
2472 psrld xmm1,2
2473 paddd xmm5,xmm7
2474 pxor xmm0,xmm3
2475 movdqa xmm3,xmm11
2476 movdqa xmm7,xmm10
2477 pslld xmm2,10
2478 pxor xmm3,xmm10
2479
2480
2481 psrld xmm7,13
2482 pxor xmm1,xmm2
2483 paddd xmm5,xmm0
2484 pslld xmm2,19-10
2485 pand xmm4,xmm3
2486 pxor xmm1,xmm7
2487
2488
2489 psrld xmm7,22-13
2490 pxor xmm1,xmm2
2491 movdqa xmm9,xmm11
2492 pslld xmm2,30-19
2493 pxor xmm7,xmm1
2494 pxor xmm9,xmm4
2495 paddd xmm13,xmm5
2496 pxor xmm7,xmm2
2497
2498 paddd xmm9,xmm5
2499 paddd xmm9,xmm7
2500 movdqa xmm5,XMMWORD[((0-128))+rax]
2501 paddd xmm6,XMMWORD[((128-128))+rax]
2502
2503 movdqa xmm7,xmm5
2504 movdqa xmm1,xmm5
2505 psrld xmm7,3
2506 movdqa xmm2,xmm5
2507
2508 psrld xmm1,7
2509 movdqa xmm0,XMMWORD[((208-128))+rax]
2510 pslld xmm2,14
2511 pxor xmm7,xmm1
2512 psrld xmm1,18-7
2513 movdqa xmm4,xmm0
2514 pxor xmm7,xmm2
2515 pslld xmm2,25-14
2516 pxor xmm7,xmm1
2517 psrld xmm0,10
2518 movdqa xmm1,xmm4
2519
2520 psrld xmm4,17
2521 pxor xmm7,xmm2
2522 pslld xmm1,13
2523 paddd xmm6,xmm7
2524 pxor xmm0,xmm4
2525 psrld xmm4,19-17
2526 pxor xmm0,xmm1
2527 pslld xmm1,15-13
2528 pxor xmm0,xmm4
2529 pxor xmm0,xmm1
2530 paddd xmm6,xmm0
2531 movdqa xmm7,xmm13
2532
2533 movdqa xmm2,xmm13
2534
2535 psrld xmm7,6
2536 movdqa xmm1,xmm13
2537 pslld xmm2,7
2538 movdqa XMMWORD[(240-128)+rax],xmm6
2539 paddd xmm6,xmm8
2540
2541 psrld xmm1,11
2542 pxor xmm7,xmm2
2543 pslld xmm2,21-7
2544 paddd xmm6,XMMWORD[96+rbp]
2545 pxor xmm7,xmm1
2546
2547 psrld xmm1,25-11
2548 movdqa xmm0,xmm13
2549
2550 pxor xmm7,xmm2
2551 movdqa xmm4,xmm13
2552 pslld xmm2,26-21
2553 pandn xmm0,xmm15
2554 pand xmm4,xmm14
2555 pxor xmm7,xmm1
2556
2557
2558 movdqa xmm1,xmm9
2559 pxor xmm7,xmm2
2560 movdqa xmm2,xmm9
2561 psrld xmm1,2
2562 paddd xmm6,xmm7
2563 pxor xmm0,xmm4
2564 movdqa xmm4,xmm10
2565 movdqa xmm7,xmm9
2566 pslld xmm2,10
2567 pxor xmm4,xmm9
2568
2569
2570 psrld xmm7,13
2571 pxor xmm1,xmm2
2572 paddd xmm6,xmm0
2573 pslld xmm2,19-10
2574 pand xmm3,xmm4
2575 pxor xmm1,xmm7
2576
2577
2578 psrld xmm7,22-13
2579 pxor xmm1,xmm2
2580 movdqa xmm8,xmm10
2581 pslld xmm2,30-19
2582 pxor xmm7,xmm1
2583 pxor xmm8,xmm3
2584 paddd xmm12,xmm6
2585 pxor xmm7,xmm2
2586
2587 paddd xmm8,xmm6
2588 paddd xmm8,xmm7
2589 lea rbp,[256+rbp]
2590 dec ecx
2591 jnz NEAR $L$oop_16_xx
2592
2593 mov ecx,1
2594 lea rbp,[((K256+128))]
2595
2596 movdqa xmm7,XMMWORD[rbx]
2597 cmp ecx,DWORD[rbx]
2598 pxor xmm0,xmm0
2599 cmovge r8,rbp
2600 cmp ecx,DWORD[4+rbx]
2601 movdqa xmm6,xmm7
2602 cmovge r9,rbp
2603 cmp ecx,DWORD[8+rbx]
2604 pcmpgtd xmm6,xmm0
2605 cmovge r10,rbp
2606 cmp ecx,DWORD[12+rbx]
2607 paddd xmm7,xmm6
2608 cmovge r11,rbp
2609
2610 movdqu xmm0,XMMWORD[((0-128))+rdi]
2611 pand xmm8,xmm6
2612 movdqu xmm1,XMMWORD[((32-128))+rdi]
2613 pand xmm9,xmm6
2614 movdqu xmm2,XMMWORD[((64-128))+rdi]
2615 pand xmm10,xmm6
2616 movdqu xmm5,XMMWORD[((96-128))+rdi]
2617 pand xmm11,xmm6
2618 paddd xmm8,xmm0
2619 movdqu xmm0,XMMWORD[((128-128))+rdi]
2620 pand xmm12,xmm6
2621 paddd xmm9,xmm1
2622 movdqu xmm1,XMMWORD[((160-128))+rdi]
2623 pand xmm13,xmm6
2624 paddd xmm10,xmm2
2625 movdqu xmm2,XMMWORD[((192-128))+rdi]
2626 pand xmm14,xmm6
2627 paddd xmm11,xmm5
2628 movdqu xmm5,XMMWORD[((224-128))+rdi]
2629 pand xmm15,xmm6
2630 paddd xmm12,xmm0
2631 paddd xmm13,xmm1
2632 movdqu XMMWORD[(0-128)+rdi],xmm8
2633 paddd xmm14,xmm2
2634 movdqu XMMWORD[(32-128)+rdi],xmm9
2635 paddd xmm15,xmm5
2636 movdqu XMMWORD[(64-128)+rdi],xmm10
2637 movdqu XMMWORD[(96-128)+rdi],xmm11
2638 movdqu XMMWORD[(128-128)+rdi],xmm12
2639 movdqu XMMWORD[(160-128)+rdi],xmm13
2640 movdqu XMMWORD[(192-128)+rdi],xmm14
2641 movdqu XMMWORD[(224-128)+rdi],xmm15
2642
2643 movdqa XMMWORD[rbx],xmm7
2644 movdqa xmm6,XMMWORD[$L$pbswap]
2645 dec edx
2646 jnz NEAR $L$oop
2647
2648 mov edx,DWORD[280+rsp]
2649 lea rdi,[16+rdi]
2650 lea rsi,[64+rsi]
2651 dec edx
2652 jnz NEAR $L$oop_grande
2653
2654$L$done:
2655 mov rax,QWORD[272+rsp]
2656
2657 movaps xmm6,XMMWORD[((-184))+rax]
2658 movaps xmm7,XMMWORD[((-168))+rax]
2659 movaps xmm8,XMMWORD[((-152))+rax]
2660 movaps xmm9,XMMWORD[((-136))+rax]
2661 movaps xmm10,XMMWORD[((-120))+rax]
2662 movaps xmm11,XMMWORD[((-104))+rax]
2663 movaps xmm12,XMMWORD[((-88))+rax]
2664 movaps xmm13,XMMWORD[((-72))+rax]
2665 movaps xmm14,XMMWORD[((-56))+rax]
2666 movaps xmm15,XMMWORD[((-40))+rax]
2667 mov rbp,QWORD[((-16))+rax]
2668
2669 mov rbx,QWORD[((-8))+rax]
2670
2671 lea rsp,[rax]
2672
2673$L$epilogue:
2674 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2675 mov rsi,QWORD[16+rsp]
2676 DB 0F3h,0C3h ;repret
2677
2678$L$SEH_end_sha256_multi_block:
2679
2680ALIGN 32
2681sha256_multi_block_shaext:
2682 mov QWORD[8+rsp],rdi ;WIN64 prologue
2683 mov QWORD[16+rsp],rsi
2684 mov rax,rsp
2685$L$SEH_begin_sha256_multi_block_shaext:
2686 mov rdi,rcx
2687 mov rsi,rdx
2688 mov rdx,r8
2689
2690
2691
2692_shaext_shortcut:
2693 mov rax,rsp
2694
2695 push rbx
2696
2697 push rbp
2698
2699 lea rsp,[((-168))+rsp]
2700 movaps XMMWORD[rsp],xmm6
2701 movaps XMMWORD[16+rsp],xmm7
2702 movaps XMMWORD[32+rsp],xmm8
2703 movaps XMMWORD[48+rsp],xmm9
2704 movaps XMMWORD[(-120)+rax],xmm10
2705 movaps XMMWORD[(-104)+rax],xmm11
2706 movaps XMMWORD[(-88)+rax],xmm12
2707 movaps XMMWORD[(-72)+rax],xmm13
2708 movaps XMMWORD[(-56)+rax],xmm14
2709 movaps XMMWORD[(-40)+rax],xmm15
2710 sub rsp,288
2711 shl edx,1
2712 and rsp,-256
2713 lea rdi,[128+rdi]
2714 mov QWORD[272+rsp],rax
2715$L$body_shaext:
2716 lea rbx,[256+rsp]
2717 lea rbp,[((K256_shaext+128))]
2718
2719$L$oop_grande_shaext:
2720 mov DWORD[280+rsp],edx
2721 xor edx,edx
2722
2723 mov r8,QWORD[rsi]
2724
2725 mov ecx,DWORD[8+rsi]
2726 cmp ecx,edx
2727 cmovg edx,ecx
2728 test ecx,ecx
2729 mov DWORD[rbx],ecx
2730 cmovle r8,rsp
2731
2732 mov r9,QWORD[16+rsi]
2733
2734 mov ecx,DWORD[24+rsi]
2735 cmp ecx,edx
2736 cmovg edx,ecx
2737 test ecx,ecx
2738 mov DWORD[4+rbx],ecx
2739 cmovle r9,rsp
2740 test edx,edx
2741 jz NEAR $L$done_shaext
2742
2743 movq xmm12,QWORD[((0-128))+rdi]
2744 movq xmm4,QWORD[((32-128))+rdi]
2745 movq xmm13,QWORD[((64-128))+rdi]
2746 movq xmm5,QWORD[((96-128))+rdi]
2747 movq xmm8,QWORD[((128-128))+rdi]
2748 movq xmm9,QWORD[((160-128))+rdi]
2749 movq xmm10,QWORD[((192-128))+rdi]
2750 movq xmm11,QWORD[((224-128))+rdi]
2751
2752 punpckldq xmm12,xmm4
2753 punpckldq xmm13,xmm5
2754 punpckldq xmm8,xmm9
2755 punpckldq xmm10,xmm11
2756 movdqa xmm3,XMMWORD[((K256_shaext-16))]
2757
2758 movdqa xmm14,xmm12
2759 movdqa xmm15,xmm13
2760 punpcklqdq xmm12,xmm8
2761 punpcklqdq xmm13,xmm10
2762 punpckhqdq xmm14,xmm8
2763 punpckhqdq xmm15,xmm10
2764
2765 pshufd xmm12,xmm12,27
2766 pshufd xmm13,xmm13,27
2767 pshufd xmm14,xmm14,27
2768 pshufd xmm15,xmm15,27
2769 jmp NEAR $L$oop_shaext
2770
2771ALIGN 32
2772$L$oop_shaext:
2773 movdqu xmm4,XMMWORD[r8]
2774 movdqu xmm8,XMMWORD[r9]
2775 movdqu xmm5,XMMWORD[16+r8]
2776 movdqu xmm9,XMMWORD[16+r9]
2777 movdqu xmm6,XMMWORD[32+r8]
2778DB 102,15,56,0,227
2779 movdqu xmm10,XMMWORD[32+r9]
2780DB 102,68,15,56,0,195
2781 movdqu xmm7,XMMWORD[48+r8]
2782 lea r8,[64+r8]
2783 movdqu xmm11,XMMWORD[48+r9]
2784 lea r9,[64+r9]
2785
2786 movdqa xmm0,XMMWORD[((0-128))+rbp]
2787DB 102,15,56,0,235
2788 paddd xmm0,xmm4
2789 pxor xmm4,xmm12
2790 movdqa xmm1,xmm0
2791 movdqa xmm2,XMMWORD[((0-128))+rbp]
2792DB 102,68,15,56,0,203
2793 paddd xmm2,xmm8
2794 movdqa XMMWORD[80+rsp],xmm13
2795DB 69,15,56,203,236
2796 pxor xmm8,xmm14
2797 movdqa xmm0,xmm2
2798 movdqa XMMWORD[112+rsp],xmm15
2799DB 69,15,56,203,254
2800 pshufd xmm0,xmm1,0x0e
2801 pxor xmm4,xmm12
2802 movdqa XMMWORD[64+rsp],xmm12
2803DB 69,15,56,203,229
2804 pshufd xmm0,xmm2,0x0e
2805 pxor xmm8,xmm14
2806 movdqa XMMWORD[96+rsp],xmm14
2807 movdqa xmm1,XMMWORD[((16-128))+rbp]
2808 paddd xmm1,xmm5
2809DB 102,15,56,0,243
2810DB 69,15,56,203,247
2811
2812 movdqa xmm0,xmm1
2813 movdqa xmm2,XMMWORD[((16-128))+rbp]
2814 paddd xmm2,xmm9
2815DB 69,15,56,203,236
2816 movdqa xmm0,xmm2
2817 prefetcht0 [127+r8]
2818DB 102,15,56,0,251
2819DB 102,68,15,56,0,211
2820 prefetcht0 [127+r9]
2821DB 69,15,56,203,254
2822 pshufd xmm0,xmm1,0x0e
2823DB 102,68,15,56,0,219
2824DB 15,56,204,229
2825DB 69,15,56,203,229
2826 pshufd xmm0,xmm2,0x0e
2827 movdqa xmm1,XMMWORD[((32-128))+rbp]
2828 paddd xmm1,xmm6
2829DB 69,15,56,203,247
2830
2831 movdqa xmm0,xmm1
2832 movdqa xmm2,XMMWORD[((32-128))+rbp]
2833 paddd xmm2,xmm10
2834DB 69,15,56,203,236
2835DB 69,15,56,204,193
2836 movdqa xmm0,xmm2
2837 movdqa xmm3,xmm7
2838DB 69,15,56,203,254
2839 pshufd xmm0,xmm1,0x0e
2840DB 102,15,58,15,222,4
2841 paddd xmm4,xmm3
2842 movdqa xmm3,xmm11
2843DB 102,65,15,58,15,218,4
2844DB 15,56,204,238
2845DB 69,15,56,203,229
2846 pshufd xmm0,xmm2,0x0e
2847 movdqa xmm1,XMMWORD[((48-128))+rbp]
2848 paddd xmm1,xmm7
2849DB 69,15,56,203,247
2850DB 69,15,56,204,202
2851
2852 movdqa xmm0,xmm1
2853 movdqa xmm2,XMMWORD[((48-128))+rbp]
2854 paddd xmm8,xmm3
2855 paddd xmm2,xmm11
2856DB 15,56,205,231
2857DB 69,15,56,203,236
2858 movdqa xmm0,xmm2
2859 movdqa xmm3,xmm4
2860DB 102,15,58,15,223,4
2861DB 69,15,56,203,254
2862DB 69,15,56,205,195
2863 pshufd xmm0,xmm1,0x0e
2864 paddd xmm5,xmm3
2865 movdqa xmm3,xmm8
2866DB 102,65,15,58,15,219,4
2867DB 15,56,204,247
2868DB 69,15,56,203,229
2869 pshufd xmm0,xmm2,0x0e
2870 movdqa xmm1,XMMWORD[((64-128))+rbp]
2871 paddd xmm1,xmm4
2872DB 69,15,56,203,247
2873DB 69,15,56,204,211
2874 movdqa xmm0,xmm1
2875 movdqa xmm2,XMMWORD[((64-128))+rbp]
2876 paddd xmm9,xmm3
2877 paddd xmm2,xmm8
2878DB 15,56,205,236
2879DB 69,15,56,203,236
2880 movdqa xmm0,xmm2
2881 movdqa xmm3,xmm5
2882DB 102,15,58,15,220,4
2883DB 69,15,56,203,254
2884DB 69,15,56,205,200
2885 pshufd xmm0,xmm1,0x0e
2886 paddd xmm6,xmm3
2887 movdqa xmm3,xmm9
2888DB 102,65,15,58,15,216,4
2889DB 15,56,204,252
2890DB 69,15,56,203,229
2891 pshufd xmm0,xmm2,0x0e
2892 movdqa xmm1,XMMWORD[((80-128))+rbp]
2893 paddd xmm1,xmm5
2894DB 69,15,56,203,247
2895DB 69,15,56,204,216
2896 movdqa xmm0,xmm1
2897 movdqa xmm2,XMMWORD[((80-128))+rbp]
2898 paddd xmm10,xmm3
2899 paddd xmm2,xmm9
2900DB 15,56,205,245
2901DB 69,15,56,203,236
2902 movdqa xmm0,xmm2
2903 movdqa xmm3,xmm6
2904DB 102,15,58,15,221,4
2905DB 69,15,56,203,254
2906DB 69,15,56,205,209
2907 pshufd xmm0,xmm1,0x0e
2908 paddd xmm7,xmm3
2909 movdqa xmm3,xmm10
2910DB 102,65,15,58,15,217,4
2911DB 15,56,204,229
2912DB 69,15,56,203,229
2913 pshufd xmm0,xmm2,0x0e
2914 movdqa xmm1,XMMWORD[((96-128))+rbp]
2915 paddd xmm1,xmm6
2916DB 69,15,56,203,247
2917DB 69,15,56,204,193
2918 movdqa xmm0,xmm1
2919 movdqa xmm2,XMMWORD[((96-128))+rbp]
2920 paddd xmm11,xmm3
2921 paddd xmm2,xmm10
2922DB 15,56,205,254
2923DB 69,15,56,203,236
2924 movdqa xmm0,xmm2
2925 movdqa xmm3,xmm7
2926DB 102,15,58,15,222,4
2927DB 69,15,56,203,254
2928DB 69,15,56,205,218
2929 pshufd xmm0,xmm1,0x0e
2930 paddd xmm4,xmm3
2931 movdqa xmm3,xmm11
2932DB 102,65,15,58,15,218,4
2933DB 15,56,204,238
2934DB 69,15,56,203,229
2935 pshufd xmm0,xmm2,0x0e
2936 movdqa xmm1,XMMWORD[((112-128))+rbp]
2937 paddd xmm1,xmm7
2938DB 69,15,56,203,247
2939DB 69,15,56,204,202
2940 movdqa xmm0,xmm1
2941 movdqa xmm2,XMMWORD[((112-128))+rbp]
2942 paddd xmm8,xmm3
2943 paddd xmm2,xmm11
2944DB 15,56,205,231
2945DB 69,15,56,203,236
2946 movdqa xmm0,xmm2
2947 movdqa xmm3,xmm4
2948DB 102,15,58,15,223,4
2949DB 69,15,56,203,254
2950DB 69,15,56,205,195
2951 pshufd xmm0,xmm1,0x0e
2952 paddd xmm5,xmm3
2953 movdqa xmm3,xmm8
2954DB 102,65,15,58,15,219,4
2955DB 15,56,204,247
2956DB 69,15,56,203,229
2957 pshufd xmm0,xmm2,0x0e
2958 movdqa xmm1,XMMWORD[((128-128))+rbp]
2959 paddd xmm1,xmm4
2960DB 69,15,56,203,247
2961DB 69,15,56,204,211
2962 movdqa xmm0,xmm1
2963 movdqa xmm2,XMMWORD[((128-128))+rbp]
2964 paddd xmm9,xmm3
2965 paddd xmm2,xmm8
2966DB 15,56,205,236
2967DB 69,15,56,203,236
2968 movdqa xmm0,xmm2
2969 movdqa xmm3,xmm5
2970DB 102,15,58,15,220,4
2971DB 69,15,56,203,254
2972DB 69,15,56,205,200
2973 pshufd xmm0,xmm1,0x0e
2974 paddd xmm6,xmm3
2975 movdqa xmm3,xmm9
2976DB 102,65,15,58,15,216,4
2977DB 15,56,204,252
2978DB 69,15,56,203,229
2979 pshufd xmm0,xmm2,0x0e
2980 movdqa xmm1,XMMWORD[((144-128))+rbp]
2981 paddd xmm1,xmm5
2982DB 69,15,56,203,247
2983DB 69,15,56,204,216
2984 movdqa xmm0,xmm1
2985 movdqa xmm2,XMMWORD[((144-128))+rbp]
2986 paddd xmm10,xmm3
2987 paddd xmm2,xmm9
2988DB 15,56,205,245
2989DB 69,15,56,203,236
2990 movdqa xmm0,xmm2
2991 movdqa xmm3,xmm6
2992DB 102,15,58,15,221,4
2993DB 69,15,56,203,254
2994DB 69,15,56,205,209
2995 pshufd xmm0,xmm1,0x0e
2996 paddd xmm7,xmm3
2997 movdqa xmm3,xmm10
2998DB 102,65,15,58,15,217,4
2999DB 15,56,204,229
3000DB 69,15,56,203,229
3001 pshufd xmm0,xmm2,0x0e
3002 movdqa xmm1,XMMWORD[((160-128))+rbp]
3003 paddd xmm1,xmm6
3004DB 69,15,56,203,247
3005DB 69,15,56,204,193
3006 movdqa xmm0,xmm1
3007 movdqa xmm2,XMMWORD[((160-128))+rbp]
3008 paddd xmm11,xmm3
3009 paddd xmm2,xmm10
3010DB 15,56,205,254
3011DB 69,15,56,203,236
3012 movdqa xmm0,xmm2
3013 movdqa xmm3,xmm7
3014DB 102,15,58,15,222,4
3015DB 69,15,56,203,254
3016DB 69,15,56,205,218
3017 pshufd xmm0,xmm1,0x0e
3018 paddd xmm4,xmm3
3019 movdqa xmm3,xmm11
3020DB 102,65,15,58,15,218,4
3021DB 15,56,204,238
3022DB 69,15,56,203,229
3023 pshufd xmm0,xmm2,0x0e
3024 movdqa xmm1,XMMWORD[((176-128))+rbp]
3025 paddd xmm1,xmm7
3026DB 69,15,56,203,247
3027DB 69,15,56,204,202
3028 movdqa xmm0,xmm1
3029 movdqa xmm2,XMMWORD[((176-128))+rbp]
3030 paddd xmm8,xmm3
3031 paddd xmm2,xmm11
3032DB 15,56,205,231
3033DB 69,15,56,203,236
3034 movdqa xmm0,xmm2
3035 movdqa xmm3,xmm4
3036DB 102,15,58,15,223,4
3037DB 69,15,56,203,254
3038DB 69,15,56,205,195
3039 pshufd xmm0,xmm1,0x0e
3040 paddd xmm5,xmm3
3041 movdqa xmm3,xmm8
3042DB 102,65,15,58,15,219,4
3043DB 15,56,204,247
3044DB 69,15,56,203,229
3045 pshufd xmm0,xmm2,0x0e
3046 movdqa xmm1,XMMWORD[((192-128))+rbp]
3047 paddd xmm1,xmm4
3048DB 69,15,56,203,247
3049DB 69,15,56,204,211
3050 movdqa xmm0,xmm1
3051 movdqa xmm2,XMMWORD[((192-128))+rbp]
3052 paddd xmm9,xmm3
3053 paddd xmm2,xmm8
3054DB 15,56,205,236
3055DB 69,15,56,203,236
3056 movdqa xmm0,xmm2
3057 movdqa xmm3,xmm5
3058DB 102,15,58,15,220,4
3059DB 69,15,56,203,254
3060DB 69,15,56,205,200
3061 pshufd xmm0,xmm1,0x0e
3062 paddd xmm6,xmm3
3063 movdqa xmm3,xmm9
3064DB 102,65,15,58,15,216,4
3065DB 15,56,204,252
3066DB 69,15,56,203,229
3067 pshufd xmm0,xmm2,0x0e
3068 movdqa xmm1,XMMWORD[((208-128))+rbp]
3069 paddd xmm1,xmm5
3070DB 69,15,56,203,247
3071DB 69,15,56,204,216
3072 movdqa xmm0,xmm1
3073 movdqa xmm2,XMMWORD[((208-128))+rbp]
3074 paddd xmm10,xmm3
3075 paddd xmm2,xmm9
3076DB 15,56,205,245
3077DB 69,15,56,203,236
3078 movdqa xmm0,xmm2
3079 movdqa xmm3,xmm6
3080DB 102,15,58,15,221,4
3081DB 69,15,56,203,254
3082DB 69,15,56,205,209
3083 pshufd xmm0,xmm1,0x0e
3084 paddd xmm7,xmm3
3085 movdqa xmm3,xmm10
3086DB 102,65,15,58,15,217,4
3087 nop
3088DB 69,15,56,203,229
3089 pshufd xmm0,xmm2,0x0e
3090 movdqa xmm1,XMMWORD[((224-128))+rbp]
3091 paddd xmm1,xmm6
3092DB 69,15,56,203,247
3093
3094 movdqa xmm0,xmm1
3095 movdqa xmm2,XMMWORD[((224-128))+rbp]
3096 paddd xmm11,xmm3
3097 paddd xmm2,xmm10
3098DB 15,56,205,254
3099 nop
3100DB 69,15,56,203,236
3101 movdqa xmm0,xmm2
3102 mov ecx,1
3103 pxor xmm6,xmm6
3104DB 69,15,56,203,254
3105DB 69,15,56,205,218
3106 pshufd xmm0,xmm1,0x0e
3107 movdqa xmm1,XMMWORD[((240-128))+rbp]
3108 paddd xmm1,xmm7
3109 movq xmm7,QWORD[rbx]
3110 nop
3111DB 69,15,56,203,229
3112 pshufd xmm0,xmm2,0x0e
3113 movdqa xmm2,XMMWORD[((240-128))+rbp]
3114 paddd xmm2,xmm11
3115DB 69,15,56,203,247
3116
3117 movdqa xmm0,xmm1
3118 cmp ecx,DWORD[rbx]
3119 cmovge r8,rsp
3120 cmp ecx,DWORD[4+rbx]
3121 cmovge r9,rsp
3122 pshufd xmm9,xmm7,0x00
3123DB 69,15,56,203,236
3124 movdqa xmm0,xmm2
3125 pshufd xmm10,xmm7,0x55
3126 movdqa xmm11,xmm7
3127DB 69,15,56,203,254
3128 pshufd xmm0,xmm1,0x0e
3129 pcmpgtd xmm9,xmm6
3130 pcmpgtd xmm10,xmm6
3131DB 69,15,56,203,229
3132 pshufd xmm0,xmm2,0x0e
3133 pcmpgtd xmm11,xmm6
3134 movdqa xmm3,XMMWORD[((K256_shaext-16))]
3135DB 69,15,56,203,247
3136
3137 pand xmm13,xmm9
3138 pand xmm15,xmm10
3139 pand xmm12,xmm9
3140 pand xmm14,xmm10
3141 paddd xmm11,xmm7
3142
3143 paddd xmm13,XMMWORD[80+rsp]
3144 paddd xmm15,XMMWORD[112+rsp]
3145 paddd xmm12,XMMWORD[64+rsp]
3146 paddd xmm14,XMMWORD[96+rsp]
3147
3148 movq QWORD[rbx],xmm11
3149 dec edx
3150 jnz NEAR $L$oop_shaext
3151
3152 mov edx,DWORD[280+rsp]
3153
3154 pshufd xmm12,xmm12,27
3155 pshufd xmm13,xmm13,27
3156 pshufd xmm14,xmm14,27
3157 pshufd xmm15,xmm15,27
3158
3159 movdqa xmm5,xmm12
3160 movdqa xmm6,xmm13
3161 punpckldq xmm12,xmm14
3162 punpckhdq xmm5,xmm14
3163 punpckldq xmm13,xmm15
3164 punpckhdq xmm6,xmm15
3165
3166 movq QWORD[(0-128)+rdi],xmm12
3167 psrldq xmm12,8
3168 movq QWORD[(128-128)+rdi],xmm5
3169 psrldq xmm5,8
3170 movq QWORD[(32-128)+rdi],xmm12
3171 movq QWORD[(160-128)+rdi],xmm5
3172
3173 movq QWORD[(64-128)+rdi],xmm13
3174 psrldq xmm13,8
3175 movq QWORD[(192-128)+rdi],xmm6
3176 psrldq xmm6,8
3177 movq QWORD[(96-128)+rdi],xmm13
3178 movq QWORD[(224-128)+rdi],xmm6
3179
3180 lea rdi,[8+rdi]
3181 lea rsi,[32+rsi]
3182 dec edx
3183 jnz NEAR $L$oop_grande_shaext
3184
3185$L$done_shaext:
3186
3187 movaps xmm6,XMMWORD[((-184))+rax]
3188 movaps xmm7,XMMWORD[((-168))+rax]
3189 movaps xmm8,XMMWORD[((-152))+rax]
3190 movaps xmm9,XMMWORD[((-136))+rax]
3191 movaps xmm10,XMMWORD[((-120))+rax]
3192 movaps xmm11,XMMWORD[((-104))+rax]
3193 movaps xmm12,XMMWORD[((-88))+rax]
3194 movaps xmm13,XMMWORD[((-72))+rax]
3195 movaps xmm14,XMMWORD[((-56))+rax]
3196 movaps xmm15,XMMWORD[((-40))+rax]
3197 mov rbp,QWORD[((-16))+rax]
3198
3199 mov rbx,QWORD[((-8))+rax]
3200
3201 lea rsp,[rax]
3202
3203$L$epilogue_shaext:
3204 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3205 mov rsi,QWORD[16+rsp]
3206 DB 0F3h,0C3h ;repret
3207
3208$L$SEH_end_sha256_multi_block_shaext:
3209
3210ALIGN 32
3211sha256_multi_block_avx:
3212 mov QWORD[8+rsp],rdi ;WIN64 prologue
3213 mov QWORD[16+rsp],rsi
3214 mov rax,rsp
3215$L$SEH_begin_sha256_multi_block_avx:
3216 mov rdi,rcx
3217 mov rsi,rdx
3218 mov rdx,r8
3219
3220
3221
3222_avx_shortcut:
3223 shr rcx,32
3224 cmp edx,2
3225 jb NEAR $L$avx
3226 test ecx,32
3227 jnz NEAR _avx2_shortcut
3228 jmp NEAR $L$avx
3229ALIGN 32
3230$L$avx:
3231 mov rax,rsp
3232
3233 push rbx
3234
3235 push rbp
3236
3237 lea rsp,[((-168))+rsp]
3238 movaps XMMWORD[rsp],xmm6
3239 movaps XMMWORD[16+rsp],xmm7
3240 movaps XMMWORD[32+rsp],xmm8
3241 movaps XMMWORD[48+rsp],xmm9
3242 movaps XMMWORD[(-120)+rax],xmm10
3243 movaps XMMWORD[(-104)+rax],xmm11
3244 movaps XMMWORD[(-88)+rax],xmm12
3245 movaps XMMWORD[(-72)+rax],xmm13
3246 movaps XMMWORD[(-56)+rax],xmm14
3247 movaps XMMWORD[(-40)+rax],xmm15
3248 sub rsp,288
3249 and rsp,-256
3250 mov QWORD[272+rsp],rax
3251
3252$L$body_avx:
3253 lea rbp,[((K256+128))]
3254 lea rbx,[256+rsp]
3255 lea rdi,[128+rdi]
3256
3257$L$oop_grande_avx:
3258 mov DWORD[280+rsp],edx
3259 xor edx,edx
3260
3261 mov r8,QWORD[rsi]
3262
3263 mov ecx,DWORD[8+rsi]
3264 cmp ecx,edx
3265 cmovg edx,ecx
3266 test ecx,ecx
3267 mov DWORD[rbx],ecx
3268 cmovle r8,rbp
3269
3270 mov r9,QWORD[16+rsi]
3271
3272 mov ecx,DWORD[24+rsi]
3273 cmp ecx,edx
3274 cmovg edx,ecx
3275 test ecx,ecx
3276 mov DWORD[4+rbx],ecx
3277 cmovle r9,rbp
3278
3279 mov r10,QWORD[32+rsi]
3280
3281 mov ecx,DWORD[40+rsi]
3282 cmp ecx,edx
3283 cmovg edx,ecx
3284 test ecx,ecx
3285 mov DWORD[8+rbx],ecx
3286 cmovle r10,rbp
3287
3288 mov r11,QWORD[48+rsi]
3289
3290 mov ecx,DWORD[56+rsi]
3291 cmp ecx,edx
3292 cmovg edx,ecx
3293 test ecx,ecx
3294 mov DWORD[12+rbx],ecx
3295 cmovle r11,rbp
3296 test edx,edx
3297 jz NEAR $L$done_avx
3298
3299 vmovdqu xmm8,XMMWORD[((0-128))+rdi]
3300 lea rax,[128+rsp]
3301 vmovdqu xmm9,XMMWORD[((32-128))+rdi]
3302 vmovdqu xmm10,XMMWORD[((64-128))+rdi]
3303 vmovdqu xmm11,XMMWORD[((96-128))+rdi]
3304 vmovdqu xmm12,XMMWORD[((128-128))+rdi]
3305 vmovdqu xmm13,XMMWORD[((160-128))+rdi]
3306 vmovdqu xmm14,XMMWORD[((192-128))+rdi]
3307 vmovdqu xmm15,XMMWORD[((224-128))+rdi]
3308 vmovdqu xmm6,XMMWORD[$L$pbswap]
3309 jmp NEAR $L$oop_avx
3310
3311ALIGN 32
3312$L$oop_avx:
3313 vpxor xmm4,xmm10,xmm9
3314 vmovd xmm5,DWORD[r8]
3315 vmovd xmm0,DWORD[r9]
3316 vpinsrd xmm5,xmm5,DWORD[r10],1
3317 vpinsrd xmm0,xmm0,DWORD[r11],1
3318 vpunpckldq xmm5,xmm5,xmm0
3319 vpshufb xmm5,xmm5,xmm6
3320 vpsrld xmm7,xmm12,6
3321 vpslld xmm2,xmm12,26
3322 vmovdqu XMMWORD[(0-128)+rax],xmm5
3323 vpaddd xmm5,xmm5,xmm15
3324
3325 vpsrld xmm1,xmm12,11
3326 vpxor xmm7,xmm7,xmm2
3327 vpslld xmm2,xmm12,21
3328 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]
3329 vpxor xmm7,xmm7,xmm1
3330
3331 vpsrld xmm1,xmm12,25
3332 vpxor xmm7,xmm7,xmm2
3333
3334 vpslld xmm2,xmm12,7
3335 vpandn xmm0,xmm12,xmm14
3336 vpand xmm3,xmm12,xmm13
3337
3338 vpxor xmm7,xmm7,xmm1
3339
3340 vpsrld xmm15,xmm8,2
3341 vpxor xmm7,xmm7,xmm2
3342
3343 vpslld xmm1,xmm8,30
3344 vpxor xmm0,xmm0,xmm3
3345 vpxor xmm3,xmm9,xmm8
3346
3347 vpxor xmm15,xmm15,xmm1
3348 vpaddd xmm5,xmm5,xmm7
3349
3350 vpsrld xmm1,xmm8,13
3351
3352 vpslld xmm2,xmm8,19
3353 vpaddd xmm5,xmm5,xmm0
3354 vpand xmm4,xmm4,xmm3
3355
3356 vpxor xmm7,xmm15,xmm1
3357
3358 vpsrld xmm1,xmm8,22
3359 vpxor xmm7,xmm7,xmm2
3360
3361 vpslld xmm2,xmm8,10
3362 vpxor xmm15,xmm9,xmm4
3363 vpaddd xmm11,xmm11,xmm5
3364
3365 vpxor xmm7,xmm7,xmm1
3366 vpxor xmm7,xmm7,xmm2
3367
3368 vpaddd xmm15,xmm15,xmm5
3369 vpaddd xmm15,xmm15,xmm7
3370 vmovd xmm5,DWORD[4+r8]
3371 vmovd xmm0,DWORD[4+r9]
3372 vpinsrd xmm5,xmm5,DWORD[4+r10],1
3373 vpinsrd xmm0,xmm0,DWORD[4+r11],1
3374 vpunpckldq xmm5,xmm5,xmm0
3375 vpshufb xmm5,xmm5,xmm6
3376 vpsrld xmm7,xmm11,6
3377 vpslld xmm2,xmm11,26
3378 vmovdqu XMMWORD[(16-128)+rax],xmm5
3379 vpaddd xmm5,xmm5,xmm14
3380
3381 vpsrld xmm1,xmm11,11
3382 vpxor xmm7,xmm7,xmm2
3383 vpslld xmm2,xmm11,21
3384 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp]
3385 vpxor xmm7,xmm7,xmm1
3386
3387 vpsrld xmm1,xmm11,25
3388 vpxor xmm7,xmm7,xmm2
3389
3390 vpslld xmm2,xmm11,7
3391 vpandn xmm0,xmm11,xmm13
3392 vpand xmm4,xmm11,xmm12
3393
3394 vpxor xmm7,xmm7,xmm1
3395
3396 vpsrld xmm14,xmm15,2
3397 vpxor xmm7,xmm7,xmm2
3398
3399 vpslld xmm1,xmm15,30
3400 vpxor xmm0,xmm0,xmm4
3401 vpxor xmm4,xmm8,xmm15
3402
3403 vpxor xmm14,xmm14,xmm1
3404 vpaddd xmm5,xmm5,xmm7
3405
3406 vpsrld xmm1,xmm15,13
3407
3408 vpslld xmm2,xmm15,19
3409 vpaddd xmm5,xmm5,xmm0
3410 vpand xmm3,xmm3,xmm4
3411
3412 vpxor xmm7,xmm14,xmm1
3413
3414 vpsrld xmm1,xmm15,22
3415 vpxor xmm7,xmm7,xmm2
3416
3417 vpslld xmm2,xmm15,10
3418 vpxor xmm14,xmm8,xmm3
3419 vpaddd xmm10,xmm10,xmm5
3420
3421 vpxor xmm7,xmm7,xmm1
3422 vpxor xmm7,xmm7,xmm2
3423
3424 vpaddd xmm14,xmm14,xmm5
3425 vpaddd xmm14,xmm14,xmm7
3426 vmovd xmm5,DWORD[8+r8]
3427 vmovd xmm0,DWORD[8+r9]
3428 vpinsrd xmm5,xmm5,DWORD[8+r10],1
3429 vpinsrd xmm0,xmm0,DWORD[8+r11],1
3430 vpunpckldq xmm5,xmm5,xmm0
3431 vpshufb xmm5,xmm5,xmm6
3432 vpsrld xmm7,xmm10,6
3433 vpslld xmm2,xmm10,26
3434 vmovdqu XMMWORD[(32-128)+rax],xmm5
3435 vpaddd xmm5,xmm5,xmm13
3436
3437 vpsrld xmm1,xmm10,11
3438 vpxor xmm7,xmm7,xmm2
3439 vpslld xmm2,xmm10,21
3440 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]
3441 vpxor xmm7,xmm7,xmm1
3442
3443 vpsrld xmm1,xmm10,25
3444 vpxor xmm7,xmm7,xmm2
3445
3446 vpslld xmm2,xmm10,7
3447 vpandn xmm0,xmm10,xmm12
3448 vpand xmm3,xmm10,xmm11
3449
3450 vpxor xmm7,xmm7,xmm1
3451
3452 vpsrld xmm13,xmm14,2
3453 vpxor xmm7,xmm7,xmm2
3454
3455 vpslld xmm1,xmm14,30
3456 vpxor xmm0,xmm0,xmm3
3457 vpxor xmm3,xmm15,xmm14
3458
3459 vpxor xmm13,xmm13,xmm1
3460 vpaddd xmm5,xmm5,xmm7
3461
3462 vpsrld xmm1,xmm14,13
3463
3464 vpslld xmm2,xmm14,19
3465 vpaddd xmm5,xmm5,xmm0
3466 vpand xmm4,xmm4,xmm3
3467
3468 vpxor xmm7,xmm13,xmm1
3469
3470 vpsrld xmm1,xmm14,22
3471 vpxor xmm7,xmm7,xmm2
3472
3473 vpslld xmm2,xmm14,10
3474 vpxor xmm13,xmm15,xmm4
3475 vpaddd xmm9,xmm9,xmm5
3476
3477 vpxor xmm7,xmm7,xmm1
3478 vpxor xmm7,xmm7,xmm2
3479
3480 vpaddd xmm13,xmm13,xmm5
3481 vpaddd xmm13,xmm13,xmm7
3482 vmovd xmm5,DWORD[12+r8]
3483 vmovd xmm0,DWORD[12+r9]
3484 vpinsrd xmm5,xmm5,DWORD[12+r10],1
3485 vpinsrd xmm0,xmm0,DWORD[12+r11],1
3486 vpunpckldq xmm5,xmm5,xmm0
3487 vpshufb xmm5,xmm5,xmm6
3488 vpsrld xmm7,xmm9,6
3489 vpslld xmm2,xmm9,26
3490 vmovdqu XMMWORD[(48-128)+rax],xmm5
3491 vpaddd xmm5,xmm5,xmm12
3492
3493 vpsrld xmm1,xmm9,11
3494 vpxor xmm7,xmm7,xmm2
3495 vpslld xmm2,xmm9,21
3496 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp]
3497 vpxor xmm7,xmm7,xmm1
3498
3499 vpsrld xmm1,xmm9,25
3500 vpxor xmm7,xmm7,xmm2
3501
3502 vpslld xmm2,xmm9,7
3503 vpandn xmm0,xmm9,xmm11
3504 vpand xmm4,xmm9,xmm10
3505
3506 vpxor xmm7,xmm7,xmm1
3507
3508 vpsrld xmm12,xmm13,2
3509 vpxor xmm7,xmm7,xmm2
3510
3511 vpslld xmm1,xmm13,30
3512 vpxor xmm0,xmm0,xmm4
3513 vpxor xmm4,xmm14,xmm13
3514
3515 vpxor xmm12,xmm12,xmm1
3516 vpaddd xmm5,xmm5,xmm7
3517
3518 vpsrld xmm1,xmm13,13
3519
3520 vpslld xmm2,xmm13,19
3521 vpaddd xmm5,xmm5,xmm0
3522 vpand xmm3,xmm3,xmm4
3523
3524 vpxor xmm7,xmm12,xmm1
3525
3526 vpsrld xmm1,xmm13,22
3527 vpxor xmm7,xmm7,xmm2
3528
3529 vpslld xmm2,xmm13,10
3530 vpxor xmm12,xmm14,xmm3
3531 vpaddd xmm8,xmm8,xmm5
3532
3533 vpxor xmm7,xmm7,xmm1
3534 vpxor xmm7,xmm7,xmm2
3535
3536 vpaddd xmm12,xmm12,xmm5
3537 vpaddd xmm12,xmm12,xmm7
3538 vmovd xmm5,DWORD[16+r8]
3539 vmovd xmm0,DWORD[16+r9]
3540 vpinsrd xmm5,xmm5,DWORD[16+r10],1
3541 vpinsrd xmm0,xmm0,DWORD[16+r11],1
3542 vpunpckldq xmm5,xmm5,xmm0
3543 vpshufb xmm5,xmm5,xmm6
3544 vpsrld xmm7,xmm8,6
3545 vpslld xmm2,xmm8,26
3546 vmovdqu XMMWORD[(64-128)+rax],xmm5
3547 vpaddd xmm5,xmm5,xmm11
3548
3549 vpsrld xmm1,xmm8,11
3550 vpxor xmm7,xmm7,xmm2
3551 vpslld xmm2,xmm8,21
3552 vpaddd xmm5,xmm5,XMMWORD[rbp]
3553 vpxor xmm7,xmm7,xmm1
3554
3555 vpsrld xmm1,xmm8,25
3556 vpxor xmm7,xmm7,xmm2
3557
3558 vpslld xmm2,xmm8,7
3559 vpandn xmm0,xmm8,xmm10
3560 vpand xmm3,xmm8,xmm9
3561
3562 vpxor xmm7,xmm7,xmm1
3563
3564 vpsrld xmm11,xmm12,2
3565 vpxor xmm7,xmm7,xmm2
3566
3567 vpslld xmm1,xmm12,30
3568 vpxor xmm0,xmm0,xmm3
3569 vpxor xmm3,xmm13,xmm12
3570
3571 vpxor xmm11,xmm11,xmm1
3572 vpaddd xmm5,xmm5,xmm7
3573
3574 vpsrld xmm1,xmm12,13
3575
3576 vpslld xmm2,xmm12,19
3577 vpaddd xmm5,xmm5,xmm0
3578 vpand xmm4,xmm4,xmm3
3579
3580 vpxor xmm7,xmm11,xmm1
3581
3582 vpsrld xmm1,xmm12,22
3583 vpxor xmm7,xmm7,xmm2
3584
3585 vpslld xmm2,xmm12,10
3586 vpxor xmm11,xmm13,xmm4
3587 vpaddd xmm15,xmm15,xmm5
3588
3589 vpxor xmm7,xmm7,xmm1
3590 vpxor xmm7,xmm7,xmm2
3591
3592 vpaddd xmm11,xmm11,xmm5
3593 vpaddd xmm11,xmm11,xmm7
3594 vmovd xmm5,DWORD[20+r8]
3595 vmovd xmm0,DWORD[20+r9]
3596 vpinsrd xmm5,xmm5,DWORD[20+r10],1
3597 vpinsrd xmm0,xmm0,DWORD[20+r11],1
3598 vpunpckldq xmm5,xmm5,xmm0
3599 vpshufb xmm5,xmm5,xmm6
3600 vpsrld xmm7,xmm15,6
3601 vpslld xmm2,xmm15,26
3602 vmovdqu XMMWORD[(80-128)+rax],xmm5
3603 vpaddd xmm5,xmm5,xmm10
3604
3605 vpsrld xmm1,xmm15,11
3606 vpxor xmm7,xmm7,xmm2
3607 vpslld xmm2,xmm15,21
3608 vpaddd xmm5,xmm5,XMMWORD[32+rbp]
3609 vpxor xmm7,xmm7,xmm1
3610
3611 vpsrld xmm1,xmm15,25
3612 vpxor xmm7,xmm7,xmm2
3613
3614 vpslld xmm2,xmm15,7
3615 vpandn xmm0,xmm15,xmm9
3616 vpand xmm4,xmm15,xmm8
3617
3618 vpxor xmm7,xmm7,xmm1
3619
3620 vpsrld xmm10,xmm11,2
3621 vpxor xmm7,xmm7,xmm2
3622
3623 vpslld xmm1,xmm11,30
3624 vpxor xmm0,xmm0,xmm4
3625 vpxor xmm4,xmm12,xmm11
3626
3627 vpxor xmm10,xmm10,xmm1
3628 vpaddd xmm5,xmm5,xmm7
3629
3630 vpsrld xmm1,xmm11,13
3631
3632 vpslld xmm2,xmm11,19
3633 vpaddd xmm5,xmm5,xmm0
3634 vpand xmm3,xmm3,xmm4
3635
3636 vpxor xmm7,xmm10,xmm1
3637
3638 vpsrld xmm1,xmm11,22
3639 vpxor xmm7,xmm7,xmm2
3640
3641 vpslld xmm2,xmm11,10
3642 vpxor xmm10,xmm12,xmm3
3643 vpaddd xmm14,xmm14,xmm5
3644
3645 vpxor xmm7,xmm7,xmm1
3646 vpxor xmm7,xmm7,xmm2
3647
3648 vpaddd xmm10,xmm10,xmm5
3649 vpaddd xmm10,xmm10,xmm7
3650 vmovd xmm5,DWORD[24+r8]
3651 vmovd xmm0,DWORD[24+r9]
3652 vpinsrd xmm5,xmm5,DWORD[24+r10],1
3653 vpinsrd xmm0,xmm0,DWORD[24+r11],1
3654 vpunpckldq xmm5,xmm5,xmm0
3655 vpshufb xmm5,xmm5,xmm6
3656 vpsrld xmm7,xmm14,6
3657 vpslld xmm2,xmm14,26
3658 vmovdqu XMMWORD[(96-128)+rax],xmm5
3659 vpaddd xmm5,xmm5,xmm9
3660
3661 vpsrld xmm1,xmm14,11
3662 vpxor xmm7,xmm7,xmm2
3663 vpslld xmm2,xmm14,21
3664 vpaddd xmm5,xmm5,XMMWORD[64+rbp]
3665 vpxor xmm7,xmm7,xmm1
3666
3667 vpsrld xmm1,xmm14,25
3668 vpxor xmm7,xmm7,xmm2
3669
3670 vpslld xmm2,xmm14,7
3671 vpandn xmm0,xmm14,xmm8
3672 vpand xmm3,xmm14,xmm15
3673
3674 vpxor xmm7,xmm7,xmm1
3675
3676 vpsrld xmm9,xmm10,2
3677 vpxor xmm7,xmm7,xmm2
3678
3679 vpslld xmm1,xmm10,30
3680 vpxor xmm0,xmm0,xmm3
3681 vpxor xmm3,xmm11,xmm10
3682
3683 vpxor xmm9,xmm9,xmm1
3684 vpaddd xmm5,xmm5,xmm7
3685
3686 vpsrld xmm1,xmm10,13
3687
3688 vpslld xmm2,xmm10,19
3689 vpaddd xmm5,xmm5,xmm0
3690 vpand xmm4,xmm4,xmm3
3691
3692 vpxor xmm7,xmm9,xmm1
3693
3694 vpsrld xmm1,xmm10,22
3695 vpxor xmm7,xmm7,xmm2
3696
3697 vpslld xmm2,xmm10,10
3698 vpxor xmm9,xmm11,xmm4
3699 vpaddd xmm13,xmm13,xmm5
3700
3701 vpxor xmm7,xmm7,xmm1
3702 vpxor xmm7,xmm7,xmm2
3703
3704 vpaddd xmm9,xmm9,xmm5
3705 vpaddd xmm9,xmm9,xmm7
3706 vmovd xmm5,DWORD[28+r8]
3707 vmovd xmm0,DWORD[28+r9]
3708 vpinsrd xmm5,xmm5,DWORD[28+r10],1
3709 vpinsrd xmm0,xmm0,DWORD[28+r11],1
3710 vpunpckldq xmm5,xmm5,xmm0
3711 vpshufb xmm5,xmm5,xmm6
3712 vpsrld xmm7,xmm13,6
3713 vpslld xmm2,xmm13,26
3714 vmovdqu XMMWORD[(112-128)+rax],xmm5
3715 vpaddd xmm5,xmm5,xmm8
3716
3717 vpsrld xmm1,xmm13,11
3718 vpxor xmm7,xmm7,xmm2
3719 vpslld xmm2,xmm13,21
3720 vpaddd xmm5,xmm5,XMMWORD[96+rbp]
3721 vpxor xmm7,xmm7,xmm1
3722
3723 vpsrld xmm1,xmm13,25
3724 vpxor xmm7,xmm7,xmm2
3725
3726 vpslld xmm2,xmm13,7
3727 vpandn xmm0,xmm13,xmm15
3728 vpand xmm4,xmm13,xmm14
3729
3730 vpxor xmm7,xmm7,xmm1
3731
3732 vpsrld xmm8,xmm9,2
3733 vpxor xmm7,xmm7,xmm2
3734
3735 vpslld xmm1,xmm9,30
3736 vpxor xmm0,xmm0,xmm4
3737 vpxor xmm4,xmm10,xmm9
3738
3739 vpxor xmm8,xmm8,xmm1
3740 vpaddd xmm5,xmm5,xmm7
3741
3742 vpsrld xmm1,xmm9,13
3743
3744 vpslld xmm2,xmm9,19
3745 vpaddd xmm5,xmm5,xmm0
3746 vpand xmm3,xmm3,xmm4
3747
3748 vpxor xmm7,xmm8,xmm1
3749
3750 vpsrld xmm1,xmm9,22
3751 vpxor xmm7,xmm7,xmm2
3752
3753 vpslld xmm2,xmm9,10
3754 vpxor xmm8,xmm10,xmm3
3755 vpaddd xmm12,xmm12,xmm5
3756
3757 vpxor xmm7,xmm7,xmm1
3758 vpxor xmm7,xmm7,xmm2
3759
3760 vpaddd xmm8,xmm8,xmm5
3761 vpaddd xmm8,xmm8,xmm7
3762 add rbp,256
3763 vmovd xmm5,DWORD[32+r8]
3764 vmovd xmm0,DWORD[32+r9]
3765 vpinsrd xmm5,xmm5,DWORD[32+r10],1
3766 vpinsrd xmm0,xmm0,DWORD[32+r11],1
3767 vpunpckldq xmm5,xmm5,xmm0
3768 vpshufb xmm5,xmm5,xmm6
3769 vpsrld xmm7,xmm12,6
3770 vpslld xmm2,xmm12,26
3771 vmovdqu XMMWORD[(128-128)+rax],xmm5
3772 vpaddd xmm5,xmm5,xmm15
3773
3774 vpsrld xmm1,xmm12,11
3775 vpxor xmm7,xmm7,xmm2
3776 vpslld xmm2,xmm12,21
3777 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]
3778 vpxor xmm7,xmm7,xmm1
3779
3780 vpsrld xmm1,xmm12,25
3781 vpxor xmm7,xmm7,xmm2
3782
3783 vpslld xmm2,xmm12,7
3784 vpandn xmm0,xmm12,xmm14
3785 vpand xmm3,xmm12,xmm13
3786
3787 vpxor xmm7,xmm7,xmm1
3788
3789 vpsrld xmm15,xmm8,2
3790 vpxor xmm7,xmm7,xmm2
3791
3792 vpslld xmm1,xmm8,30
3793 vpxor xmm0,xmm0,xmm3
3794 vpxor xmm3,xmm9,xmm8
3795
3796 vpxor xmm15,xmm15,xmm1
3797 vpaddd xmm5,xmm5,xmm7
3798
3799 vpsrld xmm1,xmm8,13
3800
3801 vpslld xmm2,xmm8,19
3802 vpaddd xmm5,xmm5,xmm0
3803 vpand xmm4,xmm4,xmm3
3804
3805 vpxor xmm7,xmm15,xmm1
3806
3807 vpsrld xmm1,xmm8,22
3808 vpxor xmm7,xmm7,xmm2
3809
3810 vpslld xmm2,xmm8,10
3811 vpxor xmm15,xmm9,xmm4
3812 vpaddd xmm11,xmm11,xmm5
3813
3814 vpxor xmm7,xmm7,xmm1
3815 vpxor xmm7,xmm7,xmm2
3816
3817 vpaddd xmm15,xmm15,xmm5
3818 vpaddd xmm15,xmm15,xmm7
3819 vmovd xmm5,DWORD[36+r8]
3820 vmovd xmm0,DWORD[36+r9]
3821 vpinsrd xmm5,xmm5,DWORD[36+r10],1
3822 vpinsrd xmm0,xmm0,DWORD[36+r11],1
3823 vpunpckldq xmm5,xmm5,xmm0
3824 vpshufb xmm5,xmm5,xmm6
3825 vpsrld xmm7,xmm11,6
3826 vpslld xmm2,xmm11,26
3827 vmovdqu XMMWORD[(144-128)+rax],xmm5
3828 vpaddd xmm5,xmm5,xmm14
3829
3830 vpsrld xmm1,xmm11,11
3831 vpxor xmm7,xmm7,xmm2
3832 vpslld xmm2,xmm11,21
3833 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp]
3834 vpxor xmm7,xmm7,xmm1
3835
3836 vpsrld xmm1,xmm11,25
3837 vpxor xmm7,xmm7,xmm2
3838
3839 vpslld xmm2,xmm11,7
3840 vpandn xmm0,xmm11,xmm13
3841 vpand xmm4,xmm11,xmm12
3842
3843 vpxor xmm7,xmm7,xmm1
3844
3845 vpsrld xmm14,xmm15,2
3846 vpxor xmm7,xmm7,xmm2
3847
3848 vpslld xmm1,xmm15,30
3849 vpxor xmm0,xmm0,xmm4
3850 vpxor xmm4,xmm8,xmm15
3851
3852 vpxor xmm14,xmm14,xmm1
3853 vpaddd xmm5,xmm5,xmm7
3854
3855 vpsrld xmm1,xmm15,13
3856
3857 vpslld xmm2,xmm15,19
3858 vpaddd xmm5,xmm5,xmm0
3859 vpand xmm3,xmm3,xmm4
3860
3861 vpxor xmm7,xmm14,xmm1
3862
3863 vpsrld xmm1,xmm15,22
3864 vpxor xmm7,xmm7,xmm2
3865
3866 vpslld xmm2,xmm15,10
3867 vpxor xmm14,xmm8,xmm3
3868 vpaddd xmm10,xmm10,xmm5
3869
3870 vpxor xmm7,xmm7,xmm1
3871 vpxor xmm7,xmm7,xmm2
3872
3873 vpaddd xmm14,xmm14,xmm5
3874 vpaddd xmm14,xmm14,xmm7
3875 vmovd xmm5,DWORD[40+r8]
3876 vmovd xmm0,DWORD[40+r9]
3877 vpinsrd xmm5,xmm5,DWORD[40+r10],1
3878 vpinsrd xmm0,xmm0,DWORD[40+r11],1
3879 vpunpckldq xmm5,xmm5,xmm0
3880 vpshufb xmm5,xmm5,xmm6
3881 vpsrld xmm7,xmm10,6
3882 vpslld xmm2,xmm10,26
3883 vmovdqu XMMWORD[(160-128)+rax],xmm5
3884 vpaddd xmm5,xmm5,xmm13
3885
3886 vpsrld xmm1,xmm10,11
3887 vpxor xmm7,xmm7,xmm2
3888 vpslld xmm2,xmm10,21
3889 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]
3890 vpxor xmm7,xmm7,xmm1
3891
3892 vpsrld xmm1,xmm10,25
3893 vpxor xmm7,xmm7,xmm2
3894
3895 vpslld xmm2,xmm10,7
3896 vpandn xmm0,xmm10,xmm12
3897 vpand xmm3,xmm10,xmm11
3898
3899 vpxor xmm7,xmm7,xmm1
3900
3901 vpsrld xmm13,xmm14,2
3902 vpxor xmm7,xmm7,xmm2
3903
3904 vpslld xmm1,xmm14,30
3905 vpxor xmm0,xmm0,xmm3
3906 vpxor xmm3,xmm15,xmm14
3907
3908 vpxor xmm13,xmm13,xmm1
3909 vpaddd xmm5,xmm5,xmm7
3910
3911 vpsrld xmm1,xmm14,13
3912
3913 vpslld xmm2,xmm14,19
3914 vpaddd xmm5,xmm5,xmm0
3915 vpand xmm4,xmm4,xmm3
3916
3917 vpxor xmm7,xmm13,xmm1
3918
3919 vpsrld xmm1,xmm14,22
3920 vpxor xmm7,xmm7,xmm2
3921
3922 vpslld xmm2,xmm14,10
3923 vpxor xmm13,xmm15,xmm4
3924 vpaddd xmm9,xmm9,xmm5
3925
3926 vpxor xmm7,xmm7,xmm1
3927 vpxor xmm7,xmm7,xmm2
3928
3929 vpaddd xmm13,xmm13,xmm5
3930 vpaddd xmm13,xmm13,xmm7
3931 vmovd xmm5,DWORD[44+r8]
3932 vmovd xmm0,DWORD[44+r9]
3933 vpinsrd xmm5,xmm5,DWORD[44+r10],1
3934 vpinsrd xmm0,xmm0,DWORD[44+r11],1
3935 vpunpckldq xmm5,xmm5,xmm0
3936 vpshufb xmm5,xmm5,xmm6
3937 vpsrld xmm7,xmm9,6
3938 vpslld xmm2,xmm9,26
3939 vmovdqu XMMWORD[(176-128)+rax],xmm5
3940 vpaddd xmm5,xmm5,xmm12
3941
3942 vpsrld xmm1,xmm9,11
3943 vpxor xmm7,xmm7,xmm2
3944 vpslld xmm2,xmm9,21
3945 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp]
3946 vpxor xmm7,xmm7,xmm1
3947
3948 vpsrld xmm1,xmm9,25
3949 vpxor xmm7,xmm7,xmm2
3950
3951 vpslld xmm2,xmm9,7
3952 vpandn xmm0,xmm9,xmm11
3953 vpand xmm4,xmm9,xmm10
3954
3955 vpxor xmm7,xmm7,xmm1
3956
3957 vpsrld xmm12,xmm13,2
3958 vpxor xmm7,xmm7,xmm2
3959
3960 vpslld xmm1,xmm13,30
3961 vpxor xmm0,xmm0,xmm4
3962 vpxor xmm4,xmm14,xmm13
3963
3964 vpxor xmm12,xmm12,xmm1
3965 vpaddd xmm5,xmm5,xmm7
3966
3967 vpsrld xmm1,xmm13,13
3968
3969 vpslld xmm2,xmm13,19
3970 vpaddd xmm5,xmm5,xmm0
3971 vpand xmm3,xmm3,xmm4
3972
3973 vpxor xmm7,xmm12,xmm1
3974
3975 vpsrld xmm1,xmm13,22
3976 vpxor xmm7,xmm7,xmm2
3977
3978 vpslld xmm2,xmm13,10
3979 vpxor xmm12,xmm14,xmm3
3980 vpaddd xmm8,xmm8,xmm5
3981
3982 vpxor xmm7,xmm7,xmm1
3983 vpxor xmm7,xmm7,xmm2
3984
3985 vpaddd xmm12,xmm12,xmm5
3986 vpaddd xmm12,xmm12,xmm7
3987 vmovd xmm5,DWORD[48+r8]
3988 vmovd xmm0,DWORD[48+r9]
3989 vpinsrd xmm5,xmm5,DWORD[48+r10],1
3990 vpinsrd xmm0,xmm0,DWORD[48+r11],1
3991 vpunpckldq xmm5,xmm5,xmm0
3992 vpshufb xmm5,xmm5,xmm6
3993 vpsrld xmm7,xmm8,6
3994 vpslld xmm2,xmm8,26
3995 vmovdqu XMMWORD[(192-128)+rax],xmm5
3996 vpaddd xmm5,xmm5,xmm11
3997
3998 vpsrld xmm1,xmm8,11
3999 vpxor xmm7,xmm7,xmm2
4000 vpslld xmm2,xmm8,21
4001 vpaddd xmm5,xmm5,XMMWORD[rbp]
4002 vpxor xmm7,xmm7,xmm1
4003
4004 vpsrld xmm1,xmm8,25
4005 vpxor xmm7,xmm7,xmm2
4006
4007 vpslld xmm2,xmm8,7
4008 vpandn xmm0,xmm8,xmm10
4009 vpand xmm3,xmm8,xmm9
4010
4011 vpxor xmm7,xmm7,xmm1
4012
4013 vpsrld xmm11,xmm12,2
4014 vpxor xmm7,xmm7,xmm2
4015
4016 vpslld xmm1,xmm12,30
4017 vpxor xmm0,xmm0,xmm3
4018 vpxor xmm3,xmm13,xmm12
4019
4020 vpxor xmm11,xmm11,xmm1
4021 vpaddd xmm5,xmm5,xmm7
4022
4023 vpsrld xmm1,xmm12,13
4024
4025 vpslld xmm2,xmm12,19
4026 vpaddd xmm5,xmm5,xmm0
4027 vpand xmm4,xmm4,xmm3
4028
4029 vpxor xmm7,xmm11,xmm1
4030
4031 vpsrld xmm1,xmm12,22
4032 vpxor xmm7,xmm7,xmm2
4033
4034 vpslld xmm2,xmm12,10
4035 vpxor xmm11,xmm13,xmm4
4036 vpaddd xmm15,xmm15,xmm5
4037
4038 vpxor xmm7,xmm7,xmm1
4039 vpxor xmm7,xmm7,xmm2
4040
4041 vpaddd xmm11,xmm11,xmm5
4042 vpaddd xmm11,xmm11,xmm7
4043 vmovd xmm5,DWORD[52+r8]
4044 vmovd xmm0,DWORD[52+r9]
4045 vpinsrd xmm5,xmm5,DWORD[52+r10],1
4046 vpinsrd xmm0,xmm0,DWORD[52+r11],1
4047 vpunpckldq xmm5,xmm5,xmm0
4048 vpshufb xmm5,xmm5,xmm6
4049 vpsrld xmm7,xmm15,6
4050 vpslld xmm2,xmm15,26
4051 vmovdqu XMMWORD[(208-128)+rax],xmm5
4052 vpaddd xmm5,xmm5,xmm10
4053
4054 vpsrld xmm1,xmm15,11
4055 vpxor xmm7,xmm7,xmm2
4056 vpslld xmm2,xmm15,21
4057 vpaddd xmm5,xmm5,XMMWORD[32+rbp]
4058 vpxor xmm7,xmm7,xmm1
4059
4060 vpsrld xmm1,xmm15,25
4061 vpxor xmm7,xmm7,xmm2
4062
4063 vpslld xmm2,xmm15,7
4064 vpandn xmm0,xmm15,xmm9
4065 vpand xmm4,xmm15,xmm8
4066
4067 vpxor xmm7,xmm7,xmm1
4068
4069 vpsrld xmm10,xmm11,2
4070 vpxor xmm7,xmm7,xmm2
4071
4072 vpslld xmm1,xmm11,30
4073 vpxor xmm0,xmm0,xmm4
4074 vpxor xmm4,xmm12,xmm11
4075
4076 vpxor xmm10,xmm10,xmm1
4077 vpaddd xmm5,xmm5,xmm7
4078
4079 vpsrld xmm1,xmm11,13
4080
4081 vpslld xmm2,xmm11,19
4082 vpaddd xmm5,xmm5,xmm0
4083 vpand xmm3,xmm3,xmm4
4084
4085 vpxor xmm7,xmm10,xmm1
4086
4087 vpsrld xmm1,xmm11,22
4088 vpxor xmm7,xmm7,xmm2
4089
4090 vpslld xmm2,xmm11,10
4091 vpxor xmm10,xmm12,xmm3
4092 vpaddd xmm14,xmm14,xmm5
4093
4094 vpxor xmm7,xmm7,xmm1
4095 vpxor xmm7,xmm7,xmm2
4096
4097 vpaddd xmm10,xmm10,xmm5
4098 vpaddd xmm10,xmm10,xmm7
4099 vmovd xmm5,DWORD[56+r8]
4100 vmovd xmm0,DWORD[56+r9]
4101 vpinsrd xmm5,xmm5,DWORD[56+r10],1
4102 vpinsrd xmm0,xmm0,DWORD[56+r11],1
4103 vpunpckldq xmm5,xmm5,xmm0
4104 vpshufb xmm5,xmm5,xmm6
4105 vpsrld xmm7,xmm14,6
4106 vpslld xmm2,xmm14,26
4107 vmovdqu XMMWORD[(224-128)+rax],xmm5
4108 vpaddd xmm5,xmm5,xmm9
4109
4110 vpsrld xmm1,xmm14,11
4111 vpxor xmm7,xmm7,xmm2
4112 vpslld xmm2,xmm14,21
4113 vpaddd xmm5,xmm5,XMMWORD[64+rbp]
4114 vpxor xmm7,xmm7,xmm1
4115
4116 vpsrld xmm1,xmm14,25
4117 vpxor xmm7,xmm7,xmm2
4118
4119 vpslld xmm2,xmm14,7
4120 vpandn xmm0,xmm14,xmm8
4121 vpand xmm3,xmm14,xmm15
4122
4123 vpxor xmm7,xmm7,xmm1
4124
4125 vpsrld xmm9,xmm10,2
4126 vpxor xmm7,xmm7,xmm2
4127
4128 vpslld xmm1,xmm10,30
4129 vpxor xmm0,xmm0,xmm3
4130 vpxor xmm3,xmm11,xmm10
4131
4132 vpxor xmm9,xmm9,xmm1
4133 vpaddd xmm5,xmm5,xmm7
4134
4135 vpsrld xmm1,xmm10,13
4136
4137 vpslld xmm2,xmm10,19
4138 vpaddd xmm5,xmm5,xmm0
4139 vpand xmm4,xmm4,xmm3
4140
4141 vpxor xmm7,xmm9,xmm1
4142
4143 vpsrld xmm1,xmm10,22
4144 vpxor xmm7,xmm7,xmm2
4145
4146 vpslld xmm2,xmm10,10
4147 vpxor xmm9,xmm11,xmm4
4148 vpaddd xmm13,xmm13,xmm5
4149
4150 vpxor xmm7,xmm7,xmm1
4151 vpxor xmm7,xmm7,xmm2
4152
4153 vpaddd xmm9,xmm9,xmm5
4154 vpaddd xmm9,xmm9,xmm7
4155 vmovd xmm5,DWORD[60+r8]
4156 lea r8,[64+r8]
4157 vmovd xmm0,DWORD[60+r9]
4158 lea r9,[64+r9]
4159 vpinsrd xmm5,xmm5,DWORD[60+r10],1
4160 lea r10,[64+r10]
4161 vpinsrd xmm0,xmm0,DWORD[60+r11],1
4162 lea r11,[64+r11]
4163 vpunpckldq xmm5,xmm5,xmm0
4164 vpshufb xmm5,xmm5,xmm6
4165 vpsrld xmm7,xmm13,6
4166 vpslld xmm2,xmm13,26
4167 vmovdqu XMMWORD[(240-128)+rax],xmm5
4168 vpaddd xmm5,xmm5,xmm8
4169
4170 vpsrld xmm1,xmm13,11
4171 vpxor xmm7,xmm7,xmm2
4172 vpslld xmm2,xmm13,21
4173 vpaddd xmm5,xmm5,XMMWORD[96+rbp]
4174 vpxor xmm7,xmm7,xmm1
4175
4176 vpsrld xmm1,xmm13,25
4177 vpxor xmm7,xmm7,xmm2
4178 prefetcht0 [63+r8]
4179 vpslld xmm2,xmm13,7
4180 vpandn xmm0,xmm13,xmm15
4181 vpand xmm4,xmm13,xmm14
4182 prefetcht0 [63+r9]
4183 vpxor xmm7,xmm7,xmm1
4184
4185 vpsrld xmm8,xmm9,2
4186 vpxor xmm7,xmm7,xmm2
4187 prefetcht0 [63+r10]
4188 vpslld xmm1,xmm9,30
4189 vpxor xmm0,xmm0,xmm4
4190 vpxor xmm4,xmm10,xmm9
4191 prefetcht0 [63+r11]
4192 vpxor xmm8,xmm8,xmm1
4193 vpaddd xmm5,xmm5,xmm7
4194
4195 vpsrld xmm1,xmm9,13
4196
4197 vpslld xmm2,xmm9,19
4198 vpaddd xmm5,xmm5,xmm0
4199 vpand xmm3,xmm3,xmm4
4200
4201 vpxor xmm7,xmm8,xmm1
4202
4203 vpsrld xmm1,xmm9,22
4204 vpxor xmm7,xmm7,xmm2
4205
4206 vpslld xmm2,xmm9,10
4207 vpxor xmm8,xmm10,xmm3
4208 vpaddd xmm12,xmm12,xmm5
4209
4210 vpxor xmm7,xmm7,xmm1
4211 vpxor xmm7,xmm7,xmm2
4212
4213 vpaddd xmm8,xmm8,xmm5
4214 vpaddd xmm8,xmm8,xmm7
4215 add rbp,256
4216 vmovdqu xmm5,XMMWORD[((0-128))+rax]
4217 mov ecx,3
4218 jmp NEAR $L$oop_16_xx_avx
4219ALIGN 32
4220$L$oop_16_xx_avx:
4221 vmovdqu xmm6,XMMWORD[((16-128))+rax]
4222 vpaddd xmm5,xmm5,XMMWORD[((144-128))+rax]
4223
4224 vpsrld xmm7,xmm6,3
4225 vpsrld xmm1,xmm6,7
4226 vpslld xmm2,xmm6,25
4227 vpxor xmm7,xmm7,xmm1
4228 vpsrld xmm1,xmm6,18
4229 vpxor xmm7,xmm7,xmm2
4230 vpslld xmm2,xmm6,14
4231 vmovdqu xmm0,XMMWORD[((224-128))+rax]
4232 vpsrld xmm3,xmm0,10
4233
4234 vpxor xmm7,xmm7,xmm1
4235 vpsrld xmm1,xmm0,17
4236 vpxor xmm7,xmm7,xmm2
4237 vpslld xmm2,xmm0,15
4238 vpaddd xmm5,xmm5,xmm7
4239 vpxor xmm7,xmm3,xmm1
4240 vpsrld xmm1,xmm0,19
4241 vpxor xmm7,xmm7,xmm2
4242 vpslld xmm2,xmm0,13
4243 vpxor xmm7,xmm7,xmm1
4244 vpxor xmm7,xmm7,xmm2
4245 vpaddd xmm5,xmm5,xmm7
4246 vpsrld xmm7,xmm12,6
4247 vpslld xmm2,xmm12,26
4248 vmovdqu XMMWORD[(0-128)+rax],xmm5
4249 vpaddd xmm5,xmm5,xmm15
4250
4251 vpsrld xmm1,xmm12,11
4252 vpxor xmm7,xmm7,xmm2
4253 vpslld xmm2,xmm12,21
4254 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]
4255 vpxor xmm7,xmm7,xmm1
4256
4257 vpsrld xmm1,xmm12,25
4258 vpxor xmm7,xmm7,xmm2
4259
4260 vpslld xmm2,xmm12,7
4261 vpandn xmm0,xmm12,xmm14
4262 vpand xmm3,xmm12,xmm13
4263
4264 vpxor xmm7,xmm7,xmm1
4265
4266 vpsrld xmm15,xmm8,2
4267 vpxor xmm7,xmm7,xmm2
4268
4269 vpslld xmm1,xmm8,30
4270 vpxor xmm0,xmm0,xmm3
4271 vpxor xmm3,xmm9,xmm8
4272
4273 vpxor xmm15,xmm15,xmm1
4274 vpaddd xmm5,xmm5,xmm7
4275
4276 vpsrld xmm1,xmm8,13
4277
4278 vpslld xmm2,xmm8,19
4279 vpaddd xmm5,xmm5,xmm0
4280 vpand xmm4,xmm4,xmm3
4281
4282 vpxor xmm7,xmm15,xmm1
4283
4284 vpsrld xmm1,xmm8,22
4285 vpxor xmm7,xmm7,xmm2
4286
4287 vpslld xmm2,xmm8,10
4288 vpxor xmm15,xmm9,xmm4
4289 vpaddd xmm11,xmm11,xmm5
4290
4291 vpxor xmm7,xmm7,xmm1
4292 vpxor xmm7,xmm7,xmm2
4293
4294 vpaddd xmm15,xmm15,xmm5
4295 vpaddd xmm15,xmm15,xmm7
4296 vmovdqu xmm5,XMMWORD[((32-128))+rax]
4297 vpaddd xmm6,xmm6,XMMWORD[((160-128))+rax]
4298
4299 vpsrld xmm7,xmm5,3
4300 vpsrld xmm1,xmm5,7
4301 vpslld xmm2,xmm5,25
4302 vpxor xmm7,xmm7,xmm1
4303 vpsrld xmm1,xmm5,18
4304 vpxor xmm7,xmm7,xmm2
4305 vpslld xmm2,xmm5,14
4306 vmovdqu xmm0,XMMWORD[((240-128))+rax]
4307 vpsrld xmm4,xmm0,10
4308
4309 vpxor xmm7,xmm7,xmm1
4310 vpsrld xmm1,xmm0,17
4311 vpxor xmm7,xmm7,xmm2
4312 vpslld xmm2,xmm0,15
4313 vpaddd xmm6,xmm6,xmm7
4314 vpxor xmm7,xmm4,xmm1
4315 vpsrld xmm1,xmm0,19
4316 vpxor xmm7,xmm7,xmm2
4317 vpslld xmm2,xmm0,13
4318 vpxor xmm7,xmm7,xmm1
4319 vpxor xmm7,xmm7,xmm2
4320 vpaddd xmm6,xmm6,xmm7
4321 vpsrld xmm7,xmm11,6
4322 vpslld xmm2,xmm11,26
4323 vmovdqu XMMWORD[(16-128)+rax],xmm6
4324 vpaddd xmm6,xmm6,xmm14
4325
4326 vpsrld xmm1,xmm11,11
4327 vpxor xmm7,xmm7,xmm2
4328 vpslld xmm2,xmm11,21
4329 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp]
4330 vpxor xmm7,xmm7,xmm1
4331
4332 vpsrld xmm1,xmm11,25
4333 vpxor xmm7,xmm7,xmm2
4334
4335 vpslld xmm2,xmm11,7
4336 vpandn xmm0,xmm11,xmm13
4337 vpand xmm4,xmm11,xmm12
4338
4339 vpxor xmm7,xmm7,xmm1
4340
4341 vpsrld xmm14,xmm15,2
4342 vpxor xmm7,xmm7,xmm2
4343
4344 vpslld xmm1,xmm15,30
4345 vpxor xmm0,xmm0,xmm4
4346 vpxor xmm4,xmm8,xmm15
4347
4348 vpxor xmm14,xmm14,xmm1
4349 vpaddd xmm6,xmm6,xmm7
4350
4351 vpsrld xmm1,xmm15,13
4352
4353 vpslld xmm2,xmm15,19
4354 vpaddd xmm6,xmm6,xmm0
4355 vpand xmm3,xmm3,xmm4
4356
4357 vpxor xmm7,xmm14,xmm1
4358
4359 vpsrld xmm1,xmm15,22
4360 vpxor xmm7,xmm7,xmm2
4361
4362 vpslld xmm2,xmm15,10
4363 vpxor xmm14,xmm8,xmm3
4364 vpaddd xmm10,xmm10,xmm6
4365
4366 vpxor xmm7,xmm7,xmm1
4367 vpxor xmm7,xmm7,xmm2
4368
4369 vpaddd xmm14,xmm14,xmm6
4370 vpaddd xmm14,xmm14,xmm7
4371 vmovdqu xmm6,XMMWORD[((48-128))+rax]
4372 vpaddd xmm5,xmm5,XMMWORD[((176-128))+rax]
4373
4374 vpsrld xmm7,xmm6,3
4375 vpsrld xmm1,xmm6,7
4376 vpslld xmm2,xmm6,25
4377 vpxor xmm7,xmm7,xmm1
4378 vpsrld xmm1,xmm6,18
4379 vpxor xmm7,xmm7,xmm2
4380 vpslld xmm2,xmm6,14
4381 vmovdqu xmm0,XMMWORD[((0-128))+rax]
4382 vpsrld xmm3,xmm0,10
4383
4384 vpxor xmm7,xmm7,xmm1
4385 vpsrld xmm1,xmm0,17
4386 vpxor xmm7,xmm7,xmm2
4387 vpslld xmm2,xmm0,15
4388 vpaddd xmm5,xmm5,xmm7
4389 vpxor xmm7,xmm3,xmm1
4390 vpsrld xmm1,xmm0,19
4391 vpxor xmm7,xmm7,xmm2
4392 vpslld xmm2,xmm0,13
4393 vpxor xmm7,xmm7,xmm1
4394 vpxor xmm7,xmm7,xmm2
4395 vpaddd xmm5,xmm5,xmm7
4396 vpsrld xmm7,xmm10,6
4397 vpslld xmm2,xmm10,26
4398 vmovdqu XMMWORD[(32-128)+rax],xmm5
4399 vpaddd xmm5,xmm5,xmm13
4400
4401 vpsrld xmm1,xmm10,11
4402 vpxor xmm7,xmm7,xmm2
4403 vpslld xmm2,xmm10,21
4404 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]
4405 vpxor xmm7,xmm7,xmm1
4406
4407 vpsrld xmm1,xmm10,25
4408 vpxor xmm7,xmm7,xmm2
4409
4410 vpslld xmm2,xmm10,7
4411 vpandn xmm0,xmm10,xmm12
4412 vpand xmm3,xmm10,xmm11
4413
4414 vpxor xmm7,xmm7,xmm1
4415
4416 vpsrld xmm13,xmm14,2
4417 vpxor xmm7,xmm7,xmm2
4418
4419 vpslld xmm1,xmm14,30
4420 vpxor xmm0,xmm0,xmm3
4421 vpxor xmm3,xmm15,xmm14
4422
4423 vpxor xmm13,xmm13,xmm1
4424 vpaddd xmm5,xmm5,xmm7
4425
4426 vpsrld xmm1,xmm14,13
4427
4428 vpslld xmm2,xmm14,19
4429 vpaddd xmm5,xmm5,xmm0
4430 vpand xmm4,xmm4,xmm3
4431
4432 vpxor xmm7,xmm13,xmm1
4433
4434 vpsrld xmm1,xmm14,22
4435 vpxor xmm7,xmm7,xmm2
4436
4437 vpslld xmm2,xmm14,10
4438 vpxor xmm13,xmm15,xmm4
4439 vpaddd xmm9,xmm9,xmm5
4440
4441 vpxor xmm7,xmm7,xmm1
4442 vpxor xmm7,xmm7,xmm2
4443
4444 vpaddd xmm13,xmm13,xmm5
4445 vpaddd xmm13,xmm13,xmm7
4446 vmovdqu xmm5,XMMWORD[((64-128))+rax]
4447 vpaddd xmm6,xmm6,XMMWORD[((192-128))+rax]
4448
4449 vpsrld xmm7,xmm5,3
4450 vpsrld xmm1,xmm5,7
4451 vpslld xmm2,xmm5,25
4452 vpxor xmm7,xmm7,xmm1
4453 vpsrld xmm1,xmm5,18
4454 vpxor xmm7,xmm7,xmm2
4455 vpslld xmm2,xmm5,14
4456 vmovdqu xmm0,XMMWORD[((16-128))+rax]
4457 vpsrld xmm4,xmm0,10
4458
4459 vpxor xmm7,xmm7,xmm1
4460 vpsrld xmm1,xmm0,17
4461 vpxor xmm7,xmm7,xmm2
4462 vpslld xmm2,xmm0,15
4463 vpaddd xmm6,xmm6,xmm7
4464 vpxor xmm7,xmm4,xmm1
4465 vpsrld xmm1,xmm0,19
4466 vpxor xmm7,xmm7,xmm2
4467 vpslld xmm2,xmm0,13
4468 vpxor xmm7,xmm7,xmm1
4469 vpxor xmm7,xmm7,xmm2
4470 vpaddd xmm6,xmm6,xmm7
4471 vpsrld xmm7,xmm9,6
4472 vpslld xmm2,xmm9,26
4473 vmovdqu XMMWORD[(48-128)+rax],xmm6
4474 vpaddd xmm6,xmm6,xmm12
4475
4476 vpsrld xmm1,xmm9,11
4477 vpxor xmm7,xmm7,xmm2
4478 vpslld xmm2,xmm9,21
4479 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp]
4480 vpxor xmm7,xmm7,xmm1
4481
4482 vpsrld xmm1,xmm9,25
4483 vpxor xmm7,xmm7,xmm2
4484
4485 vpslld xmm2,xmm9,7
4486 vpandn xmm0,xmm9,xmm11
4487 vpand xmm4,xmm9,xmm10
4488
4489 vpxor xmm7,xmm7,xmm1
4490
4491 vpsrld xmm12,xmm13,2
4492 vpxor xmm7,xmm7,xmm2
4493
4494 vpslld xmm1,xmm13,30
4495 vpxor xmm0,xmm0,xmm4
4496 vpxor xmm4,xmm14,xmm13
4497
4498 vpxor xmm12,xmm12,xmm1
4499 vpaddd xmm6,xmm6,xmm7
4500
4501 vpsrld xmm1,xmm13,13
4502
4503 vpslld xmm2,xmm13,19
4504 vpaddd xmm6,xmm6,xmm0
4505 vpand xmm3,xmm3,xmm4
4506
4507 vpxor xmm7,xmm12,xmm1
4508
4509 vpsrld xmm1,xmm13,22
4510 vpxor xmm7,xmm7,xmm2
4511
4512 vpslld xmm2,xmm13,10
4513 vpxor xmm12,xmm14,xmm3
4514 vpaddd xmm8,xmm8,xmm6
4515
4516 vpxor xmm7,xmm7,xmm1
4517 vpxor xmm7,xmm7,xmm2
4518
4519 vpaddd xmm12,xmm12,xmm6
4520 vpaddd xmm12,xmm12,xmm7
4521 vmovdqu xmm6,XMMWORD[((80-128))+rax]
4522 vpaddd xmm5,xmm5,XMMWORD[((208-128))+rax]
4523
4524 vpsrld xmm7,xmm6,3
4525 vpsrld xmm1,xmm6,7
4526 vpslld xmm2,xmm6,25
4527 vpxor xmm7,xmm7,xmm1
4528 vpsrld xmm1,xmm6,18
4529 vpxor xmm7,xmm7,xmm2
4530 vpslld xmm2,xmm6,14
4531 vmovdqu xmm0,XMMWORD[((32-128))+rax]
4532 vpsrld xmm3,xmm0,10
4533
4534 vpxor xmm7,xmm7,xmm1
4535 vpsrld xmm1,xmm0,17
4536 vpxor xmm7,xmm7,xmm2
4537 vpslld xmm2,xmm0,15
4538 vpaddd xmm5,xmm5,xmm7
4539 vpxor xmm7,xmm3,xmm1
4540 vpsrld xmm1,xmm0,19
4541 vpxor xmm7,xmm7,xmm2
4542 vpslld xmm2,xmm0,13
4543 vpxor xmm7,xmm7,xmm1
4544 vpxor xmm7,xmm7,xmm2
4545 vpaddd xmm5,xmm5,xmm7
4546 vpsrld xmm7,xmm8,6
4547 vpslld xmm2,xmm8,26
4548 vmovdqu XMMWORD[(64-128)+rax],xmm5
4549 vpaddd xmm5,xmm5,xmm11
4550
4551 vpsrld xmm1,xmm8,11
4552 vpxor xmm7,xmm7,xmm2
4553 vpslld xmm2,xmm8,21
4554 vpaddd xmm5,xmm5,XMMWORD[rbp]
4555 vpxor xmm7,xmm7,xmm1
4556
4557 vpsrld xmm1,xmm8,25
4558 vpxor xmm7,xmm7,xmm2
4559
4560 vpslld xmm2,xmm8,7
4561 vpandn xmm0,xmm8,xmm10
4562 vpand xmm3,xmm8,xmm9
4563
4564 vpxor xmm7,xmm7,xmm1
4565
4566 vpsrld xmm11,xmm12,2
4567 vpxor xmm7,xmm7,xmm2
4568
4569 vpslld xmm1,xmm12,30
4570 vpxor xmm0,xmm0,xmm3
4571 vpxor xmm3,xmm13,xmm12
4572
4573 vpxor xmm11,xmm11,xmm1
4574 vpaddd xmm5,xmm5,xmm7
4575
4576 vpsrld xmm1,xmm12,13
4577
4578 vpslld xmm2,xmm12,19
4579 vpaddd xmm5,xmm5,xmm0
4580 vpand xmm4,xmm4,xmm3
4581
4582 vpxor xmm7,xmm11,xmm1
4583
4584 vpsrld xmm1,xmm12,22
4585 vpxor xmm7,xmm7,xmm2
4586
4587 vpslld xmm2,xmm12,10
4588 vpxor xmm11,xmm13,xmm4
4589 vpaddd xmm15,xmm15,xmm5
4590
4591 vpxor xmm7,xmm7,xmm1
4592 vpxor xmm7,xmm7,xmm2
4593
4594 vpaddd xmm11,xmm11,xmm5
4595 vpaddd xmm11,xmm11,xmm7
4596 vmovdqu xmm5,XMMWORD[((96-128))+rax]
4597 vpaddd xmm6,xmm6,XMMWORD[((224-128))+rax]
4598
4599 vpsrld xmm7,xmm5,3
4600 vpsrld xmm1,xmm5,7
4601 vpslld xmm2,xmm5,25
4602 vpxor xmm7,xmm7,xmm1
4603 vpsrld xmm1,xmm5,18
4604 vpxor xmm7,xmm7,xmm2
4605 vpslld xmm2,xmm5,14
4606 vmovdqu xmm0,XMMWORD[((48-128))+rax]
4607 vpsrld xmm4,xmm0,10
4608
4609 vpxor xmm7,xmm7,xmm1
4610 vpsrld xmm1,xmm0,17
4611 vpxor xmm7,xmm7,xmm2
4612 vpslld xmm2,xmm0,15
4613 vpaddd xmm6,xmm6,xmm7
4614 vpxor xmm7,xmm4,xmm1
4615 vpsrld xmm1,xmm0,19
4616 vpxor xmm7,xmm7,xmm2
4617 vpslld xmm2,xmm0,13
4618 vpxor xmm7,xmm7,xmm1
4619 vpxor xmm7,xmm7,xmm2
4620 vpaddd xmm6,xmm6,xmm7
4621 vpsrld xmm7,xmm15,6
4622 vpslld xmm2,xmm15,26
4623 vmovdqu XMMWORD[(80-128)+rax],xmm6
4624 vpaddd xmm6,xmm6,xmm10
4625
4626 vpsrld xmm1,xmm15,11
4627 vpxor xmm7,xmm7,xmm2
4628 vpslld xmm2,xmm15,21
4629 vpaddd xmm6,xmm6,XMMWORD[32+rbp]
4630 vpxor xmm7,xmm7,xmm1
4631
4632 vpsrld xmm1,xmm15,25
4633 vpxor xmm7,xmm7,xmm2
4634
4635 vpslld xmm2,xmm15,7
4636 vpandn xmm0,xmm15,xmm9
4637 vpand xmm4,xmm15,xmm8
4638
4639 vpxor xmm7,xmm7,xmm1
4640
4641 vpsrld xmm10,xmm11,2
4642 vpxor xmm7,xmm7,xmm2
4643
4644 vpslld xmm1,xmm11,30
4645 vpxor xmm0,xmm0,xmm4
4646 vpxor xmm4,xmm12,xmm11
4647
4648 vpxor xmm10,xmm10,xmm1
4649 vpaddd xmm6,xmm6,xmm7
4650
4651 vpsrld xmm1,xmm11,13
4652
4653 vpslld xmm2,xmm11,19
4654 vpaddd xmm6,xmm6,xmm0
4655 vpand xmm3,xmm3,xmm4
4656
4657 vpxor xmm7,xmm10,xmm1
4658
4659 vpsrld xmm1,xmm11,22
4660 vpxor xmm7,xmm7,xmm2
4661
4662 vpslld xmm2,xmm11,10
4663 vpxor xmm10,xmm12,xmm3
4664 vpaddd xmm14,xmm14,xmm6
4665
4666 vpxor xmm7,xmm7,xmm1
4667 vpxor xmm7,xmm7,xmm2
4668
4669 vpaddd xmm10,xmm10,xmm6
4670 vpaddd xmm10,xmm10,xmm7
4671 vmovdqu xmm6,XMMWORD[((112-128))+rax]
4672 vpaddd xmm5,xmm5,XMMWORD[((240-128))+rax]
4673
4674 vpsrld xmm7,xmm6,3
4675 vpsrld xmm1,xmm6,7
4676 vpslld xmm2,xmm6,25
4677 vpxor xmm7,xmm7,xmm1
4678 vpsrld xmm1,xmm6,18
4679 vpxor xmm7,xmm7,xmm2
4680 vpslld xmm2,xmm6,14
4681 vmovdqu xmm0,XMMWORD[((64-128))+rax]
4682 vpsrld xmm3,xmm0,10
4683
4684 vpxor xmm7,xmm7,xmm1
4685 vpsrld xmm1,xmm0,17
4686 vpxor xmm7,xmm7,xmm2
4687 vpslld xmm2,xmm0,15
4688 vpaddd xmm5,xmm5,xmm7
4689 vpxor xmm7,xmm3,xmm1
4690 vpsrld xmm1,xmm0,19
4691 vpxor xmm7,xmm7,xmm2
4692 vpslld xmm2,xmm0,13
4693 vpxor xmm7,xmm7,xmm1
4694 vpxor xmm7,xmm7,xmm2
4695 vpaddd xmm5,xmm5,xmm7
4696 vpsrld xmm7,xmm14,6
4697 vpslld xmm2,xmm14,26
4698 vmovdqu XMMWORD[(96-128)+rax],xmm5
4699 vpaddd xmm5,xmm5,xmm9
4700
4701 vpsrld xmm1,xmm14,11
4702 vpxor xmm7,xmm7,xmm2
4703 vpslld xmm2,xmm14,21
4704 vpaddd xmm5,xmm5,XMMWORD[64+rbp]
4705 vpxor xmm7,xmm7,xmm1
4706
4707 vpsrld xmm1,xmm14,25
4708 vpxor xmm7,xmm7,xmm2
4709
4710 vpslld xmm2,xmm14,7
4711 vpandn xmm0,xmm14,xmm8
4712 vpand xmm3,xmm14,xmm15
4713
4714 vpxor xmm7,xmm7,xmm1
4715
4716 vpsrld xmm9,xmm10,2
4717 vpxor xmm7,xmm7,xmm2
4718
4719 vpslld xmm1,xmm10,30
4720 vpxor xmm0,xmm0,xmm3
4721 vpxor xmm3,xmm11,xmm10
4722
4723 vpxor xmm9,xmm9,xmm1
4724 vpaddd xmm5,xmm5,xmm7
4725
4726 vpsrld xmm1,xmm10,13
4727
4728 vpslld xmm2,xmm10,19
4729 vpaddd xmm5,xmm5,xmm0
4730 vpand xmm4,xmm4,xmm3
4731
4732 vpxor xmm7,xmm9,xmm1
4733
4734 vpsrld xmm1,xmm10,22
4735 vpxor xmm7,xmm7,xmm2
4736
4737 vpslld xmm2,xmm10,10
4738 vpxor xmm9,xmm11,xmm4
4739 vpaddd xmm13,xmm13,xmm5
4740
4741 vpxor xmm7,xmm7,xmm1
4742 vpxor xmm7,xmm7,xmm2
4743
4744 vpaddd xmm9,xmm9,xmm5
4745 vpaddd xmm9,xmm9,xmm7
4746 vmovdqu xmm5,XMMWORD[((128-128))+rax]
4747 vpaddd xmm6,xmm6,XMMWORD[((0-128))+rax]
4748
4749 vpsrld xmm7,xmm5,3
4750 vpsrld xmm1,xmm5,7
4751 vpslld xmm2,xmm5,25
4752 vpxor xmm7,xmm7,xmm1
4753 vpsrld xmm1,xmm5,18
4754 vpxor xmm7,xmm7,xmm2
4755 vpslld xmm2,xmm5,14
4756 vmovdqu xmm0,XMMWORD[((80-128))+rax]
4757 vpsrld xmm4,xmm0,10
4758
4759 vpxor xmm7,xmm7,xmm1
4760 vpsrld xmm1,xmm0,17
4761 vpxor xmm7,xmm7,xmm2
4762 vpslld xmm2,xmm0,15
4763 vpaddd xmm6,xmm6,xmm7
4764 vpxor xmm7,xmm4,xmm1
4765 vpsrld xmm1,xmm0,19
4766 vpxor xmm7,xmm7,xmm2
4767 vpslld xmm2,xmm0,13
4768 vpxor xmm7,xmm7,xmm1
4769 vpxor xmm7,xmm7,xmm2
4770 vpaddd xmm6,xmm6,xmm7
4771 vpsrld xmm7,xmm13,6
4772 vpslld xmm2,xmm13,26
4773 vmovdqu XMMWORD[(112-128)+rax],xmm6
4774 vpaddd xmm6,xmm6,xmm8
4775
4776 vpsrld xmm1,xmm13,11
4777 vpxor xmm7,xmm7,xmm2
4778 vpslld xmm2,xmm13,21
4779 vpaddd xmm6,xmm6,XMMWORD[96+rbp]
4780 vpxor xmm7,xmm7,xmm1
4781
4782 vpsrld xmm1,xmm13,25
4783 vpxor xmm7,xmm7,xmm2
4784
4785 vpslld xmm2,xmm13,7
4786 vpandn xmm0,xmm13,xmm15
4787 vpand xmm4,xmm13,xmm14
4788
4789 vpxor xmm7,xmm7,xmm1
4790
4791 vpsrld xmm8,xmm9,2
4792 vpxor xmm7,xmm7,xmm2
4793
4794 vpslld xmm1,xmm9,30
4795 vpxor xmm0,xmm0,xmm4
4796 vpxor xmm4,xmm10,xmm9
4797
4798 vpxor xmm8,xmm8,xmm1
4799 vpaddd xmm6,xmm6,xmm7
4800
4801 vpsrld xmm1,xmm9,13
4802
4803 vpslld xmm2,xmm9,19
4804 vpaddd xmm6,xmm6,xmm0
4805 vpand xmm3,xmm3,xmm4
4806
4807 vpxor xmm7,xmm8,xmm1
4808
4809 vpsrld xmm1,xmm9,22
4810 vpxor xmm7,xmm7,xmm2
4811
4812 vpslld xmm2,xmm9,10
4813 vpxor xmm8,xmm10,xmm3
4814 vpaddd xmm12,xmm12,xmm6
4815
4816 vpxor xmm7,xmm7,xmm1
4817 vpxor xmm7,xmm7,xmm2
4818
4819 vpaddd xmm8,xmm8,xmm6
4820 vpaddd xmm8,xmm8,xmm7
4821 add rbp,256
4822 vmovdqu xmm6,XMMWORD[((144-128))+rax]
4823 vpaddd xmm5,xmm5,XMMWORD[((16-128))+rax]
4824
4825 vpsrld xmm7,xmm6,3
4826 vpsrld xmm1,xmm6,7
4827 vpslld xmm2,xmm6,25
4828 vpxor xmm7,xmm7,xmm1
4829 vpsrld xmm1,xmm6,18
4830 vpxor xmm7,xmm7,xmm2
4831 vpslld xmm2,xmm6,14
4832 vmovdqu xmm0,XMMWORD[((96-128))+rax]
4833 vpsrld xmm3,xmm0,10
4834
4835 vpxor xmm7,xmm7,xmm1
4836 vpsrld xmm1,xmm0,17
4837 vpxor xmm7,xmm7,xmm2
4838 vpslld xmm2,xmm0,15
4839 vpaddd xmm5,xmm5,xmm7
4840 vpxor xmm7,xmm3,xmm1
4841 vpsrld xmm1,xmm0,19
4842 vpxor xmm7,xmm7,xmm2
4843 vpslld xmm2,xmm0,13
4844 vpxor xmm7,xmm7,xmm1
4845 vpxor xmm7,xmm7,xmm2
4846 vpaddd xmm5,xmm5,xmm7
4847 vpsrld xmm7,xmm12,6
4848 vpslld xmm2,xmm12,26
4849 vmovdqu XMMWORD[(128-128)+rax],xmm5
4850 vpaddd xmm5,xmm5,xmm15
4851
4852 vpsrld xmm1,xmm12,11
4853 vpxor xmm7,xmm7,xmm2
4854 vpslld xmm2,xmm12,21
4855 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]
4856 vpxor xmm7,xmm7,xmm1
4857
4858 vpsrld xmm1,xmm12,25
4859 vpxor xmm7,xmm7,xmm2
4860
4861 vpslld xmm2,xmm12,7
4862 vpandn xmm0,xmm12,xmm14
4863 vpand xmm3,xmm12,xmm13
4864
4865 vpxor xmm7,xmm7,xmm1
4866
4867 vpsrld xmm15,xmm8,2
4868 vpxor xmm7,xmm7,xmm2
4869
4870 vpslld xmm1,xmm8,30
4871 vpxor xmm0,xmm0,xmm3
4872 vpxor xmm3,xmm9,xmm8
4873
4874 vpxor xmm15,xmm15,xmm1
4875 vpaddd xmm5,xmm5,xmm7
4876
4877 vpsrld xmm1,xmm8,13
4878
4879 vpslld xmm2,xmm8,19
4880 vpaddd xmm5,xmm5,xmm0
4881 vpand xmm4,xmm4,xmm3
4882
4883 vpxor xmm7,xmm15,xmm1
4884
4885 vpsrld xmm1,xmm8,22
4886 vpxor xmm7,xmm7,xmm2
4887
4888 vpslld xmm2,xmm8,10
4889 vpxor xmm15,xmm9,xmm4
4890 vpaddd xmm11,xmm11,xmm5
4891
4892 vpxor xmm7,xmm7,xmm1
4893 vpxor xmm7,xmm7,xmm2
4894
4895 vpaddd xmm15,xmm15,xmm5
4896 vpaddd xmm15,xmm15,xmm7
4897 vmovdqu xmm5,XMMWORD[((160-128))+rax]
4898 vpaddd xmm6,xmm6,XMMWORD[((32-128))+rax]
4899
4900 vpsrld xmm7,xmm5,3
4901 vpsrld xmm1,xmm5,7
4902 vpslld xmm2,xmm5,25
4903 vpxor xmm7,xmm7,xmm1
4904 vpsrld xmm1,xmm5,18
4905 vpxor xmm7,xmm7,xmm2
4906 vpslld xmm2,xmm5,14
4907 vmovdqu xmm0,XMMWORD[((112-128))+rax]
4908 vpsrld xmm4,xmm0,10
4909
4910 vpxor xmm7,xmm7,xmm1
4911 vpsrld xmm1,xmm0,17
4912 vpxor xmm7,xmm7,xmm2
4913 vpslld xmm2,xmm0,15
4914 vpaddd xmm6,xmm6,xmm7
4915 vpxor xmm7,xmm4,xmm1
4916 vpsrld xmm1,xmm0,19
4917 vpxor xmm7,xmm7,xmm2
4918 vpslld xmm2,xmm0,13
4919 vpxor xmm7,xmm7,xmm1
4920 vpxor xmm7,xmm7,xmm2
4921 vpaddd xmm6,xmm6,xmm7
4922 vpsrld xmm7,xmm11,6
4923 vpslld xmm2,xmm11,26
4924 vmovdqu XMMWORD[(144-128)+rax],xmm6
4925 vpaddd xmm6,xmm6,xmm14
4926
4927 vpsrld xmm1,xmm11,11
4928 vpxor xmm7,xmm7,xmm2
4929 vpslld xmm2,xmm11,21
4930 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp]
4931 vpxor xmm7,xmm7,xmm1
4932
4933 vpsrld xmm1,xmm11,25
4934 vpxor xmm7,xmm7,xmm2
4935
4936 vpslld xmm2,xmm11,7
4937 vpandn xmm0,xmm11,xmm13
4938 vpand xmm4,xmm11,xmm12
4939
4940 vpxor xmm7,xmm7,xmm1
4941
4942 vpsrld xmm14,xmm15,2
4943 vpxor xmm7,xmm7,xmm2
4944
4945 vpslld xmm1,xmm15,30
4946 vpxor xmm0,xmm0,xmm4
4947 vpxor xmm4,xmm8,xmm15
4948
4949 vpxor xmm14,xmm14,xmm1
4950 vpaddd xmm6,xmm6,xmm7
4951
4952 vpsrld xmm1,xmm15,13
4953
4954 vpslld xmm2,xmm15,19
4955 vpaddd xmm6,xmm6,xmm0
4956 vpand xmm3,xmm3,xmm4
4957
4958 vpxor xmm7,xmm14,xmm1
4959
4960 vpsrld xmm1,xmm15,22
4961 vpxor xmm7,xmm7,xmm2
4962
4963 vpslld xmm2,xmm15,10
4964 vpxor xmm14,xmm8,xmm3
4965 vpaddd xmm10,xmm10,xmm6
4966
4967 vpxor xmm7,xmm7,xmm1
4968 vpxor xmm7,xmm7,xmm2
4969
4970 vpaddd xmm14,xmm14,xmm6
4971 vpaddd xmm14,xmm14,xmm7
4972 vmovdqu xmm6,XMMWORD[((176-128))+rax]
4973 vpaddd xmm5,xmm5,XMMWORD[((48-128))+rax]
4974
4975 vpsrld xmm7,xmm6,3
4976 vpsrld xmm1,xmm6,7
4977 vpslld xmm2,xmm6,25
4978 vpxor xmm7,xmm7,xmm1
4979 vpsrld xmm1,xmm6,18
4980 vpxor xmm7,xmm7,xmm2
4981 vpslld xmm2,xmm6,14
4982 vmovdqu xmm0,XMMWORD[((128-128))+rax]
4983 vpsrld xmm3,xmm0,10
4984
4985 vpxor xmm7,xmm7,xmm1
4986 vpsrld xmm1,xmm0,17
4987 vpxor xmm7,xmm7,xmm2
4988 vpslld xmm2,xmm0,15
4989 vpaddd xmm5,xmm5,xmm7
4990 vpxor xmm7,xmm3,xmm1
4991 vpsrld xmm1,xmm0,19
4992 vpxor xmm7,xmm7,xmm2
4993 vpslld xmm2,xmm0,13
4994 vpxor xmm7,xmm7,xmm1
4995 vpxor xmm7,xmm7,xmm2
4996 vpaddd xmm5,xmm5,xmm7
4997 vpsrld xmm7,xmm10,6
4998 vpslld xmm2,xmm10,26
4999 vmovdqu XMMWORD[(160-128)+rax],xmm5
5000 vpaddd xmm5,xmm5,xmm13
5001
5002 vpsrld xmm1,xmm10,11
5003 vpxor xmm7,xmm7,xmm2
5004 vpslld xmm2,xmm10,21
5005 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]
5006 vpxor xmm7,xmm7,xmm1
5007
5008 vpsrld xmm1,xmm10,25
5009 vpxor xmm7,xmm7,xmm2
5010
5011 vpslld xmm2,xmm10,7
5012 vpandn xmm0,xmm10,xmm12
5013 vpand xmm3,xmm10,xmm11
5014
5015 vpxor xmm7,xmm7,xmm1
5016
5017 vpsrld xmm13,xmm14,2
5018 vpxor xmm7,xmm7,xmm2
5019
5020 vpslld xmm1,xmm14,30
5021 vpxor xmm0,xmm0,xmm3
5022 vpxor xmm3,xmm15,xmm14
5023
5024 vpxor xmm13,xmm13,xmm1
5025 vpaddd xmm5,xmm5,xmm7
5026
5027 vpsrld xmm1,xmm14,13
5028
5029 vpslld xmm2,xmm14,19
5030 vpaddd xmm5,xmm5,xmm0
5031 vpand xmm4,xmm4,xmm3
5032
5033 vpxor xmm7,xmm13,xmm1
5034
5035 vpsrld xmm1,xmm14,22
5036 vpxor xmm7,xmm7,xmm2
5037
5038 vpslld xmm2,xmm14,10
5039 vpxor xmm13,xmm15,xmm4
5040 vpaddd xmm9,xmm9,xmm5
5041
5042 vpxor xmm7,xmm7,xmm1
5043 vpxor xmm7,xmm7,xmm2
5044
5045 vpaddd xmm13,xmm13,xmm5
5046 vpaddd xmm13,xmm13,xmm7
5047 vmovdqu xmm5,XMMWORD[((192-128))+rax]
5048 vpaddd xmm6,xmm6,XMMWORD[((64-128))+rax]
5049
5050 vpsrld xmm7,xmm5,3
5051 vpsrld xmm1,xmm5,7
5052 vpslld xmm2,xmm5,25
5053 vpxor xmm7,xmm7,xmm1
5054 vpsrld xmm1,xmm5,18
5055 vpxor xmm7,xmm7,xmm2
5056 vpslld xmm2,xmm5,14
5057 vmovdqu xmm0,XMMWORD[((144-128))+rax]
5058 vpsrld xmm4,xmm0,10
5059
5060 vpxor xmm7,xmm7,xmm1
5061 vpsrld xmm1,xmm0,17
5062 vpxor xmm7,xmm7,xmm2
5063 vpslld xmm2,xmm0,15
5064 vpaddd xmm6,xmm6,xmm7
5065 vpxor xmm7,xmm4,xmm1
5066 vpsrld xmm1,xmm0,19
5067 vpxor xmm7,xmm7,xmm2
5068 vpslld xmm2,xmm0,13
5069 vpxor xmm7,xmm7,xmm1
5070 vpxor xmm7,xmm7,xmm2
5071 vpaddd xmm6,xmm6,xmm7
5072 vpsrld xmm7,xmm9,6
5073 vpslld xmm2,xmm9,26
5074 vmovdqu XMMWORD[(176-128)+rax],xmm6
5075 vpaddd xmm6,xmm6,xmm12
5076
5077 vpsrld xmm1,xmm9,11
5078 vpxor xmm7,xmm7,xmm2
5079 vpslld xmm2,xmm9,21
5080 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp]
5081 vpxor xmm7,xmm7,xmm1
5082
5083 vpsrld xmm1,xmm9,25
5084 vpxor xmm7,xmm7,xmm2
5085
5086 vpslld xmm2,xmm9,7
5087 vpandn xmm0,xmm9,xmm11
5088 vpand xmm4,xmm9,xmm10
5089
5090 vpxor xmm7,xmm7,xmm1
5091
5092 vpsrld xmm12,xmm13,2
5093 vpxor xmm7,xmm7,xmm2
5094
5095 vpslld xmm1,xmm13,30
5096 vpxor xmm0,xmm0,xmm4
5097 vpxor xmm4,xmm14,xmm13
5098
5099 vpxor xmm12,xmm12,xmm1
5100 vpaddd xmm6,xmm6,xmm7
5101
5102 vpsrld xmm1,xmm13,13
5103
5104 vpslld xmm2,xmm13,19
5105 vpaddd xmm6,xmm6,xmm0
5106 vpand xmm3,xmm3,xmm4
5107
5108 vpxor xmm7,xmm12,xmm1
5109
5110 vpsrld xmm1,xmm13,22
5111 vpxor xmm7,xmm7,xmm2
5112
5113 vpslld xmm2,xmm13,10
5114 vpxor xmm12,xmm14,xmm3
5115 vpaddd xmm8,xmm8,xmm6
5116
5117 vpxor xmm7,xmm7,xmm1
5118 vpxor xmm7,xmm7,xmm2
5119
5120 vpaddd xmm12,xmm12,xmm6
5121 vpaddd xmm12,xmm12,xmm7
5122 vmovdqu xmm6,XMMWORD[((208-128))+rax]
5123 vpaddd xmm5,xmm5,XMMWORD[((80-128))+rax]
5124
5125 vpsrld xmm7,xmm6,3
5126 vpsrld xmm1,xmm6,7
5127 vpslld xmm2,xmm6,25
5128 vpxor xmm7,xmm7,xmm1
5129 vpsrld xmm1,xmm6,18
5130 vpxor xmm7,xmm7,xmm2
5131 vpslld xmm2,xmm6,14
5132 vmovdqu xmm0,XMMWORD[((160-128))+rax]
5133 vpsrld xmm3,xmm0,10
5134
5135 vpxor xmm7,xmm7,xmm1
5136 vpsrld xmm1,xmm0,17
5137 vpxor xmm7,xmm7,xmm2
5138 vpslld xmm2,xmm0,15
5139 vpaddd xmm5,xmm5,xmm7
5140 vpxor xmm7,xmm3,xmm1
5141 vpsrld xmm1,xmm0,19
5142 vpxor xmm7,xmm7,xmm2
5143 vpslld xmm2,xmm0,13
5144 vpxor xmm7,xmm7,xmm1
5145 vpxor xmm7,xmm7,xmm2
5146 vpaddd xmm5,xmm5,xmm7
5147 vpsrld xmm7,xmm8,6
5148 vpslld xmm2,xmm8,26
5149 vmovdqu XMMWORD[(192-128)+rax],xmm5
5150 vpaddd xmm5,xmm5,xmm11
5151
5152 vpsrld xmm1,xmm8,11
5153 vpxor xmm7,xmm7,xmm2
5154 vpslld xmm2,xmm8,21
5155 vpaddd xmm5,xmm5,XMMWORD[rbp]
5156 vpxor xmm7,xmm7,xmm1
5157
5158 vpsrld xmm1,xmm8,25
5159 vpxor xmm7,xmm7,xmm2
5160
5161 vpslld xmm2,xmm8,7
5162 vpandn xmm0,xmm8,xmm10
5163 vpand xmm3,xmm8,xmm9
5164
5165 vpxor xmm7,xmm7,xmm1
5166
5167 vpsrld xmm11,xmm12,2
5168 vpxor xmm7,xmm7,xmm2
5169
5170 vpslld xmm1,xmm12,30
5171 vpxor xmm0,xmm0,xmm3
5172 vpxor xmm3,xmm13,xmm12
5173
5174 vpxor xmm11,xmm11,xmm1
5175 vpaddd xmm5,xmm5,xmm7
5176
5177 vpsrld xmm1,xmm12,13
5178
5179 vpslld xmm2,xmm12,19
5180 vpaddd xmm5,xmm5,xmm0
5181 vpand xmm4,xmm4,xmm3
5182
5183 vpxor xmm7,xmm11,xmm1
5184
5185 vpsrld xmm1,xmm12,22
5186 vpxor xmm7,xmm7,xmm2
5187
5188 vpslld xmm2,xmm12,10
5189 vpxor xmm11,xmm13,xmm4
5190 vpaddd xmm15,xmm15,xmm5
5191
5192 vpxor xmm7,xmm7,xmm1
5193 vpxor xmm7,xmm7,xmm2
5194
5195 vpaddd xmm11,xmm11,xmm5
5196 vpaddd xmm11,xmm11,xmm7
5197 vmovdqu xmm5,XMMWORD[((224-128))+rax]
5198 vpaddd xmm6,xmm6,XMMWORD[((96-128))+rax]
5199
5200 vpsrld xmm7,xmm5,3
5201 vpsrld xmm1,xmm5,7
5202 vpslld xmm2,xmm5,25
5203 vpxor xmm7,xmm7,xmm1
5204 vpsrld xmm1,xmm5,18
5205 vpxor xmm7,xmm7,xmm2
5206 vpslld xmm2,xmm5,14
5207 vmovdqu xmm0,XMMWORD[((176-128))+rax]
5208 vpsrld xmm4,xmm0,10
5209
5210 vpxor xmm7,xmm7,xmm1
5211 vpsrld xmm1,xmm0,17
5212 vpxor xmm7,xmm7,xmm2
5213 vpslld xmm2,xmm0,15
5214 vpaddd xmm6,xmm6,xmm7
5215 vpxor xmm7,xmm4,xmm1
5216 vpsrld xmm1,xmm0,19
5217 vpxor xmm7,xmm7,xmm2
5218 vpslld xmm2,xmm0,13
5219 vpxor xmm7,xmm7,xmm1
5220 vpxor xmm7,xmm7,xmm2
5221 vpaddd xmm6,xmm6,xmm7
5222 vpsrld xmm7,xmm15,6
5223 vpslld xmm2,xmm15,26
5224 vmovdqu XMMWORD[(208-128)+rax],xmm6
5225 vpaddd xmm6,xmm6,xmm10
5226
5227 vpsrld xmm1,xmm15,11
5228 vpxor xmm7,xmm7,xmm2
5229 vpslld xmm2,xmm15,21
5230 vpaddd xmm6,xmm6,XMMWORD[32+rbp]
5231 vpxor xmm7,xmm7,xmm1
5232
5233 vpsrld xmm1,xmm15,25
5234 vpxor xmm7,xmm7,xmm2
5235
5236 vpslld xmm2,xmm15,7
5237 vpandn xmm0,xmm15,xmm9
5238 vpand xmm4,xmm15,xmm8
5239
5240 vpxor xmm7,xmm7,xmm1
5241
5242 vpsrld xmm10,xmm11,2
5243 vpxor xmm7,xmm7,xmm2
5244
5245 vpslld xmm1,xmm11,30
5246 vpxor xmm0,xmm0,xmm4
5247 vpxor xmm4,xmm12,xmm11
5248
5249 vpxor xmm10,xmm10,xmm1
5250 vpaddd xmm6,xmm6,xmm7
5251
5252 vpsrld xmm1,xmm11,13
5253
5254 vpslld xmm2,xmm11,19
5255 vpaddd xmm6,xmm6,xmm0
5256 vpand xmm3,xmm3,xmm4
5257
5258 vpxor xmm7,xmm10,xmm1
5259
5260 vpsrld xmm1,xmm11,22
5261 vpxor xmm7,xmm7,xmm2
5262
5263 vpslld xmm2,xmm11,10
5264 vpxor xmm10,xmm12,xmm3
5265 vpaddd xmm14,xmm14,xmm6
5266
5267 vpxor xmm7,xmm7,xmm1
5268 vpxor xmm7,xmm7,xmm2
5269
5270 vpaddd xmm10,xmm10,xmm6
5271 vpaddd xmm10,xmm10,xmm7
5272 vmovdqu xmm6,XMMWORD[((240-128))+rax]
5273 vpaddd xmm5,xmm5,XMMWORD[((112-128))+rax]
5274
5275 vpsrld xmm7,xmm6,3
5276 vpsrld xmm1,xmm6,7
5277 vpslld xmm2,xmm6,25
5278 vpxor xmm7,xmm7,xmm1
5279 vpsrld xmm1,xmm6,18
5280 vpxor xmm7,xmm7,xmm2
5281 vpslld xmm2,xmm6,14
5282 vmovdqu xmm0,XMMWORD[((192-128))+rax]
5283 vpsrld xmm3,xmm0,10
5284
5285 vpxor xmm7,xmm7,xmm1
5286 vpsrld xmm1,xmm0,17
5287 vpxor xmm7,xmm7,xmm2
5288 vpslld xmm2,xmm0,15
5289 vpaddd xmm5,xmm5,xmm7
5290 vpxor xmm7,xmm3,xmm1
5291 vpsrld xmm1,xmm0,19
5292 vpxor xmm7,xmm7,xmm2
5293 vpslld xmm2,xmm0,13
5294 vpxor xmm7,xmm7,xmm1
5295 vpxor xmm7,xmm7,xmm2
5296 vpaddd xmm5,xmm5,xmm7
5297 vpsrld xmm7,xmm14,6
5298 vpslld xmm2,xmm14,26
5299 vmovdqu XMMWORD[(224-128)+rax],xmm5
5300 vpaddd xmm5,xmm5,xmm9
5301
5302 vpsrld xmm1,xmm14,11
5303 vpxor xmm7,xmm7,xmm2
5304 vpslld xmm2,xmm14,21
5305 vpaddd xmm5,xmm5,XMMWORD[64+rbp]
5306 vpxor xmm7,xmm7,xmm1
5307
5308 vpsrld xmm1,xmm14,25
5309 vpxor xmm7,xmm7,xmm2
5310
5311 vpslld xmm2,xmm14,7
5312 vpandn xmm0,xmm14,xmm8
5313 vpand xmm3,xmm14,xmm15
5314
5315 vpxor xmm7,xmm7,xmm1
5316
5317 vpsrld xmm9,xmm10,2
5318 vpxor xmm7,xmm7,xmm2
5319
5320 vpslld xmm1,xmm10,30
5321 vpxor xmm0,xmm0,xmm3
5322 vpxor xmm3,xmm11,xmm10
5323
5324 vpxor xmm9,xmm9,xmm1
5325 vpaddd xmm5,xmm5,xmm7
5326
5327 vpsrld xmm1,xmm10,13
5328
5329 vpslld xmm2,xmm10,19
5330 vpaddd xmm5,xmm5,xmm0
5331 vpand xmm4,xmm4,xmm3
5332
5333 vpxor xmm7,xmm9,xmm1
5334
5335 vpsrld xmm1,xmm10,22
5336 vpxor xmm7,xmm7,xmm2
5337
5338 vpslld xmm2,xmm10,10
5339 vpxor xmm9,xmm11,xmm4
5340 vpaddd xmm13,xmm13,xmm5
5341
5342 vpxor xmm7,xmm7,xmm1
5343 vpxor xmm7,xmm7,xmm2
5344
5345 vpaddd xmm9,xmm9,xmm5
5346 vpaddd xmm9,xmm9,xmm7
5347 vmovdqu xmm5,XMMWORD[((0-128))+rax]
5348 vpaddd xmm6,xmm6,XMMWORD[((128-128))+rax]
5349
5350 vpsrld xmm7,xmm5,3
5351 vpsrld xmm1,xmm5,7
5352 vpslld xmm2,xmm5,25
5353 vpxor xmm7,xmm7,xmm1
5354 vpsrld xmm1,xmm5,18
5355 vpxor xmm7,xmm7,xmm2
5356 vpslld xmm2,xmm5,14
5357 vmovdqu xmm0,XMMWORD[((208-128))+rax]
5358 vpsrld xmm4,xmm0,10
5359
5360 vpxor xmm7,xmm7,xmm1
5361 vpsrld xmm1,xmm0,17
5362 vpxor xmm7,xmm7,xmm2
5363 vpslld xmm2,xmm0,15
5364 vpaddd xmm6,xmm6,xmm7
5365 vpxor xmm7,xmm4,xmm1
5366 vpsrld xmm1,xmm0,19
5367 vpxor xmm7,xmm7,xmm2
5368 vpslld xmm2,xmm0,13
5369 vpxor xmm7,xmm7,xmm1
5370 vpxor xmm7,xmm7,xmm2
5371 vpaddd xmm6,xmm6,xmm7
5372 vpsrld xmm7,xmm13,6
5373 vpslld xmm2,xmm13,26
5374 vmovdqu XMMWORD[(240-128)+rax],xmm6
5375 vpaddd xmm6,xmm6,xmm8
5376
5377 vpsrld xmm1,xmm13,11
5378 vpxor xmm7,xmm7,xmm2
5379 vpslld xmm2,xmm13,21
5380 vpaddd xmm6,xmm6,XMMWORD[96+rbp]
5381 vpxor xmm7,xmm7,xmm1
5382
5383 vpsrld xmm1,xmm13,25
5384 vpxor xmm7,xmm7,xmm2
5385
5386 vpslld xmm2,xmm13,7
5387 vpandn xmm0,xmm13,xmm15
5388 vpand xmm4,xmm13,xmm14
5389
5390 vpxor xmm7,xmm7,xmm1
5391
5392 vpsrld xmm8,xmm9,2
5393 vpxor xmm7,xmm7,xmm2
5394
5395 vpslld xmm1,xmm9,30
5396 vpxor xmm0,xmm0,xmm4
5397 vpxor xmm4,xmm10,xmm9
5398
5399 vpxor xmm8,xmm8,xmm1
5400 vpaddd xmm6,xmm6,xmm7
5401
5402 vpsrld xmm1,xmm9,13
5403
5404 vpslld xmm2,xmm9,19
5405 vpaddd xmm6,xmm6,xmm0
5406 vpand xmm3,xmm3,xmm4
5407
5408 vpxor xmm7,xmm8,xmm1
5409
5410 vpsrld xmm1,xmm9,22
5411 vpxor xmm7,xmm7,xmm2
5412
5413 vpslld xmm2,xmm9,10
5414 vpxor xmm8,xmm10,xmm3
5415 vpaddd xmm12,xmm12,xmm6
5416
5417 vpxor xmm7,xmm7,xmm1
5418 vpxor xmm7,xmm7,xmm2
5419
5420 vpaddd xmm8,xmm8,xmm6
5421 vpaddd xmm8,xmm8,xmm7
5422 add rbp,256
5423 dec ecx
5424 jnz NEAR $L$oop_16_xx_avx
5425
5426 mov ecx,1
5427 lea rbp,[((K256+128))]
5428 cmp ecx,DWORD[rbx]
5429 cmovge r8,rbp
5430 cmp ecx,DWORD[4+rbx]
5431 cmovge r9,rbp
5432 cmp ecx,DWORD[8+rbx]
5433 cmovge r10,rbp
5434 cmp ecx,DWORD[12+rbx]
5435 cmovge r11,rbp
5436 vmovdqa xmm7,XMMWORD[rbx]
5437 vpxor xmm0,xmm0,xmm0
5438 vmovdqa xmm6,xmm7
5439 vpcmpgtd xmm6,xmm6,xmm0
5440 vpaddd xmm7,xmm7,xmm6
5441
5442 vmovdqu xmm0,XMMWORD[((0-128))+rdi]
5443 vpand xmm8,xmm8,xmm6
5444 vmovdqu xmm1,XMMWORD[((32-128))+rdi]
5445 vpand xmm9,xmm9,xmm6
5446 vmovdqu xmm2,XMMWORD[((64-128))+rdi]
5447 vpand xmm10,xmm10,xmm6
5448 vmovdqu xmm5,XMMWORD[((96-128))+rdi]
5449 vpand xmm11,xmm11,xmm6
5450 vpaddd xmm8,xmm8,xmm0
5451 vmovdqu xmm0,XMMWORD[((128-128))+rdi]
5452 vpand xmm12,xmm12,xmm6
5453 vpaddd xmm9,xmm9,xmm1
5454 vmovdqu xmm1,XMMWORD[((160-128))+rdi]
5455 vpand xmm13,xmm13,xmm6
5456 vpaddd xmm10,xmm10,xmm2
5457 vmovdqu xmm2,XMMWORD[((192-128))+rdi]
5458 vpand xmm14,xmm14,xmm6
5459 vpaddd xmm11,xmm11,xmm5
5460 vmovdqu xmm5,XMMWORD[((224-128))+rdi]
5461 vpand xmm15,xmm15,xmm6
5462 vpaddd xmm12,xmm12,xmm0
5463 vpaddd xmm13,xmm13,xmm1
5464 vmovdqu XMMWORD[(0-128)+rdi],xmm8
5465 vpaddd xmm14,xmm14,xmm2
5466 vmovdqu XMMWORD[(32-128)+rdi],xmm9
5467 vpaddd xmm15,xmm15,xmm5
5468 vmovdqu XMMWORD[(64-128)+rdi],xmm10
5469 vmovdqu XMMWORD[(96-128)+rdi],xmm11
5470 vmovdqu XMMWORD[(128-128)+rdi],xmm12
5471 vmovdqu XMMWORD[(160-128)+rdi],xmm13
5472 vmovdqu XMMWORD[(192-128)+rdi],xmm14
5473 vmovdqu XMMWORD[(224-128)+rdi],xmm15
5474
5475 vmovdqu XMMWORD[rbx],xmm7
5476 vmovdqu xmm6,XMMWORD[$L$pbswap]
5477 dec edx
5478 jnz NEAR $L$oop_avx
5479
5480 mov edx,DWORD[280+rsp]
5481 lea rdi,[16+rdi]
5482 lea rsi,[64+rsi]
5483 dec edx
5484 jnz NEAR $L$oop_grande_avx
5485
5486$L$done_avx:
5487 mov rax,QWORD[272+rsp]
5488
5489 vzeroupper
5490 movaps xmm6,XMMWORD[((-184))+rax]
5491 movaps xmm7,XMMWORD[((-168))+rax]
5492 movaps xmm8,XMMWORD[((-152))+rax]
5493 movaps xmm9,XMMWORD[((-136))+rax]
5494 movaps xmm10,XMMWORD[((-120))+rax]
5495 movaps xmm11,XMMWORD[((-104))+rax]
5496 movaps xmm12,XMMWORD[((-88))+rax]
5497 movaps xmm13,XMMWORD[((-72))+rax]
5498 movaps xmm14,XMMWORD[((-56))+rax]
5499 movaps xmm15,XMMWORD[((-40))+rax]
5500 mov rbp,QWORD[((-16))+rax]
5501
5502 mov rbx,QWORD[((-8))+rax]
5503
5504 lea rsp,[rax]
5505
5506$L$epilogue_avx:
5507 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
5508 mov rsi,QWORD[16+rsp]
5509 DB 0F3h,0C3h ;repret
5510
5511$L$SEH_end_sha256_multi_block_avx:
5512
5513ALIGN 32
5514sha256_multi_block_avx2:
5515 mov QWORD[8+rsp],rdi ;WIN64 prologue
5516 mov QWORD[16+rsp],rsi
5517 mov rax,rsp
5518$L$SEH_begin_sha256_multi_block_avx2:
5519 mov rdi,rcx
5520 mov rsi,rdx
5521 mov rdx,r8
5522
5523
5524
5525_avx2_shortcut:
5526 mov rax,rsp
5527
5528 push rbx
5529
5530 push rbp
5531
5532 push r12
5533
5534 push r13
5535
5536 push r14
5537
5538 push r15
5539
5540 lea rsp,[((-168))+rsp]
5541 movaps XMMWORD[rsp],xmm6
5542 movaps XMMWORD[16+rsp],xmm7
5543 movaps XMMWORD[32+rsp],xmm8
5544 movaps XMMWORD[48+rsp],xmm9
5545 movaps XMMWORD[64+rsp],xmm10
5546 movaps XMMWORD[80+rsp],xmm11
5547 movaps XMMWORD[(-120)+rax],xmm12
5548 movaps XMMWORD[(-104)+rax],xmm13
5549 movaps XMMWORD[(-88)+rax],xmm14
5550 movaps XMMWORD[(-72)+rax],xmm15
5551 sub rsp,576
5552 and rsp,-256
5553 mov QWORD[544+rsp],rax
5554
5555$L$body_avx2:
5556 lea rbp,[((K256+128))]
5557 lea rdi,[128+rdi]
5558
5559$L$oop_grande_avx2:
5560 mov DWORD[552+rsp],edx
5561 xor edx,edx
5562 lea rbx,[512+rsp]
5563
5564 mov r12,QWORD[rsi]
5565
5566 mov ecx,DWORD[8+rsi]
5567 cmp ecx,edx
5568 cmovg edx,ecx
5569 test ecx,ecx
5570 mov DWORD[rbx],ecx
5571 cmovle r12,rbp
5572
5573 mov r13,QWORD[16+rsi]
5574
5575 mov ecx,DWORD[24+rsi]
5576 cmp ecx,edx
5577 cmovg edx,ecx
5578 test ecx,ecx
5579 mov DWORD[4+rbx],ecx
5580 cmovle r13,rbp
5581
5582 mov r14,QWORD[32+rsi]
5583
5584 mov ecx,DWORD[40+rsi]
5585 cmp ecx,edx
5586 cmovg edx,ecx
5587 test ecx,ecx
5588 mov DWORD[8+rbx],ecx
5589 cmovle r14,rbp
5590
5591 mov r15,QWORD[48+rsi]
5592
5593 mov ecx,DWORD[56+rsi]
5594 cmp ecx,edx
5595 cmovg edx,ecx
5596 test ecx,ecx
5597 mov DWORD[12+rbx],ecx
5598 cmovle r15,rbp
5599
5600 mov r8,QWORD[64+rsi]
5601
5602 mov ecx,DWORD[72+rsi]
5603 cmp ecx,edx
5604 cmovg edx,ecx
5605 test ecx,ecx
5606 mov DWORD[16+rbx],ecx
5607 cmovle r8,rbp
5608
5609 mov r9,QWORD[80+rsi]
5610
5611 mov ecx,DWORD[88+rsi]
5612 cmp ecx,edx
5613 cmovg edx,ecx
5614 test ecx,ecx
5615 mov DWORD[20+rbx],ecx
5616 cmovle r9,rbp
5617
5618 mov r10,QWORD[96+rsi]
5619
5620 mov ecx,DWORD[104+rsi]
5621 cmp ecx,edx
5622 cmovg edx,ecx
5623 test ecx,ecx
5624 mov DWORD[24+rbx],ecx
5625 cmovle r10,rbp
5626
5627 mov r11,QWORD[112+rsi]
5628
5629 mov ecx,DWORD[120+rsi]
5630 cmp ecx,edx
5631 cmovg edx,ecx
5632 test ecx,ecx
5633 mov DWORD[28+rbx],ecx
5634 cmovle r11,rbp
5635 vmovdqu ymm8,YMMWORD[((0-128))+rdi]
5636 lea rax,[128+rsp]
5637 vmovdqu ymm9,YMMWORD[((32-128))+rdi]
5638 lea rbx,[((256+128))+rsp]
5639 vmovdqu ymm10,YMMWORD[((64-128))+rdi]
5640 vmovdqu ymm11,YMMWORD[((96-128))+rdi]
5641 vmovdqu ymm12,YMMWORD[((128-128))+rdi]
5642 vmovdqu ymm13,YMMWORD[((160-128))+rdi]
5643 vmovdqu ymm14,YMMWORD[((192-128))+rdi]
5644 vmovdqu ymm15,YMMWORD[((224-128))+rdi]
5645 vmovdqu ymm6,YMMWORD[$L$pbswap]
5646 jmp NEAR $L$oop_avx2
5647
5648ALIGN 32
5649$L$oop_avx2:
5650 vpxor ymm4,ymm10,ymm9
5651 vmovd xmm5,DWORD[r12]
5652 vmovd xmm0,DWORD[r8]
5653 vmovd xmm1,DWORD[r13]
5654 vmovd xmm2,DWORD[r9]
5655 vpinsrd xmm5,xmm5,DWORD[r14],1
5656 vpinsrd xmm0,xmm0,DWORD[r10],1
5657 vpinsrd xmm1,xmm1,DWORD[r15],1
5658 vpunpckldq ymm5,ymm5,ymm1
5659 vpinsrd xmm2,xmm2,DWORD[r11],1
5660 vpunpckldq ymm0,ymm0,ymm2
5661 vinserti128 ymm5,ymm5,xmm0,1
5662 vpshufb ymm5,ymm5,ymm6
5663 vpsrld ymm7,ymm12,6
5664 vpslld ymm2,ymm12,26
5665 vmovdqu YMMWORD[(0-128)+rax],ymm5
5666 vpaddd ymm5,ymm5,ymm15
5667
5668 vpsrld ymm1,ymm12,11
5669 vpxor ymm7,ymm7,ymm2
5670 vpslld ymm2,ymm12,21
5671 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]
5672 vpxor ymm7,ymm7,ymm1
5673
5674 vpsrld ymm1,ymm12,25
5675 vpxor ymm7,ymm7,ymm2
5676
5677 vpslld ymm2,ymm12,7
5678 vpandn ymm0,ymm12,ymm14
5679 vpand ymm3,ymm12,ymm13
5680
5681 vpxor ymm7,ymm7,ymm1
5682
5683 vpsrld ymm15,ymm8,2
5684 vpxor ymm7,ymm7,ymm2
5685
5686 vpslld ymm1,ymm8,30
5687 vpxor ymm0,ymm0,ymm3
5688 vpxor ymm3,ymm9,ymm8
5689
5690 vpxor ymm15,ymm15,ymm1
5691 vpaddd ymm5,ymm5,ymm7
5692
5693 vpsrld ymm1,ymm8,13
5694
5695 vpslld ymm2,ymm8,19
5696 vpaddd ymm5,ymm5,ymm0
5697 vpand ymm4,ymm4,ymm3
5698
5699 vpxor ymm7,ymm15,ymm1
5700
5701 vpsrld ymm1,ymm8,22
5702 vpxor ymm7,ymm7,ymm2
5703
5704 vpslld ymm2,ymm8,10
5705 vpxor ymm15,ymm9,ymm4
5706 vpaddd ymm11,ymm11,ymm5
5707
5708 vpxor ymm7,ymm7,ymm1
5709 vpxor ymm7,ymm7,ymm2
5710
5711 vpaddd ymm15,ymm15,ymm5
5712 vpaddd ymm15,ymm15,ymm7
5713 vmovd xmm5,DWORD[4+r12]
5714 vmovd xmm0,DWORD[4+r8]
5715 vmovd xmm1,DWORD[4+r13]
5716 vmovd xmm2,DWORD[4+r9]
5717 vpinsrd xmm5,xmm5,DWORD[4+r14],1
5718 vpinsrd xmm0,xmm0,DWORD[4+r10],1
5719 vpinsrd xmm1,xmm1,DWORD[4+r15],1
5720 vpunpckldq ymm5,ymm5,ymm1
5721 vpinsrd xmm2,xmm2,DWORD[4+r11],1
5722 vpunpckldq ymm0,ymm0,ymm2
5723 vinserti128 ymm5,ymm5,xmm0,1
5724 vpshufb ymm5,ymm5,ymm6
5725 vpsrld ymm7,ymm11,6
5726 vpslld ymm2,ymm11,26
5727 vmovdqu YMMWORD[(32-128)+rax],ymm5
5728 vpaddd ymm5,ymm5,ymm14
5729
5730 vpsrld ymm1,ymm11,11
5731 vpxor ymm7,ymm7,ymm2
5732 vpslld ymm2,ymm11,21
5733 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp]
5734 vpxor ymm7,ymm7,ymm1
5735
5736 vpsrld ymm1,ymm11,25
5737 vpxor ymm7,ymm7,ymm2
5738
5739 vpslld ymm2,ymm11,7
5740 vpandn ymm0,ymm11,ymm13
5741 vpand ymm4,ymm11,ymm12
5742
5743 vpxor ymm7,ymm7,ymm1
5744
5745 vpsrld ymm14,ymm15,2
5746 vpxor ymm7,ymm7,ymm2
5747
5748 vpslld ymm1,ymm15,30
5749 vpxor ymm0,ymm0,ymm4
5750 vpxor ymm4,ymm8,ymm15
5751
5752 vpxor ymm14,ymm14,ymm1
5753 vpaddd ymm5,ymm5,ymm7
5754
5755 vpsrld ymm1,ymm15,13
5756
5757 vpslld ymm2,ymm15,19
5758 vpaddd ymm5,ymm5,ymm0
5759 vpand ymm3,ymm3,ymm4
5760
5761 vpxor ymm7,ymm14,ymm1
5762
5763 vpsrld ymm1,ymm15,22
5764 vpxor ymm7,ymm7,ymm2
5765
5766 vpslld ymm2,ymm15,10
5767 vpxor ymm14,ymm8,ymm3
5768 vpaddd ymm10,ymm10,ymm5
5769
5770 vpxor ymm7,ymm7,ymm1
5771 vpxor ymm7,ymm7,ymm2
5772
5773 vpaddd ymm14,ymm14,ymm5
5774 vpaddd ymm14,ymm14,ymm7
5775 vmovd xmm5,DWORD[8+r12]
5776 vmovd xmm0,DWORD[8+r8]
5777 vmovd xmm1,DWORD[8+r13]
5778 vmovd xmm2,DWORD[8+r9]
5779 vpinsrd xmm5,xmm5,DWORD[8+r14],1
5780 vpinsrd xmm0,xmm0,DWORD[8+r10],1
5781 vpinsrd xmm1,xmm1,DWORD[8+r15],1
5782 vpunpckldq ymm5,ymm5,ymm1
5783 vpinsrd xmm2,xmm2,DWORD[8+r11],1
5784 vpunpckldq ymm0,ymm0,ymm2
5785 vinserti128 ymm5,ymm5,xmm0,1
5786 vpshufb ymm5,ymm5,ymm6
5787 vpsrld ymm7,ymm10,6
5788 vpslld ymm2,ymm10,26
5789 vmovdqu YMMWORD[(64-128)+rax],ymm5
5790 vpaddd ymm5,ymm5,ymm13
5791
5792 vpsrld ymm1,ymm10,11
5793 vpxor ymm7,ymm7,ymm2
5794 vpslld ymm2,ymm10,21
5795 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]
5796 vpxor ymm7,ymm7,ymm1
5797
5798 vpsrld ymm1,ymm10,25
5799 vpxor ymm7,ymm7,ymm2
5800
5801 vpslld ymm2,ymm10,7
5802 vpandn ymm0,ymm10,ymm12
5803 vpand ymm3,ymm10,ymm11
5804
5805 vpxor ymm7,ymm7,ymm1
5806
5807 vpsrld ymm13,ymm14,2
5808 vpxor ymm7,ymm7,ymm2
5809
5810 vpslld ymm1,ymm14,30
5811 vpxor ymm0,ymm0,ymm3
5812 vpxor ymm3,ymm15,ymm14
5813
5814 vpxor ymm13,ymm13,ymm1
5815 vpaddd ymm5,ymm5,ymm7
5816
5817 vpsrld ymm1,ymm14,13
5818
5819 vpslld ymm2,ymm14,19
5820 vpaddd ymm5,ymm5,ymm0
5821 vpand ymm4,ymm4,ymm3
5822
5823 vpxor ymm7,ymm13,ymm1
5824
5825 vpsrld ymm1,ymm14,22
5826 vpxor ymm7,ymm7,ymm2
5827
5828 vpslld ymm2,ymm14,10
5829 vpxor ymm13,ymm15,ymm4
5830 vpaddd ymm9,ymm9,ymm5
5831
5832 vpxor ymm7,ymm7,ymm1
5833 vpxor ymm7,ymm7,ymm2
5834
5835 vpaddd ymm13,ymm13,ymm5
5836 vpaddd ymm13,ymm13,ymm7
5837 vmovd xmm5,DWORD[12+r12]
5838 vmovd xmm0,DWORD[12+r8]
5839 vmovd xmm1,DWORD[12+r13]
5840 vmovd xmm2,DWORD[12+r9]
5841 vpinsrd xmm5,xmm5,DWORD[12+r14],1
5842 vpinsrd xmm0,xmm0,DWORD[12+r10],1
5843 vpinsrd xmm1,xmm1,DWORD[12+r15],1
5844 vpunpckldq ymm5,ymm5,ymm1
5845 vpinsrd xmm2,xmm2,DWORD[12+r11],1
5846 vpunpckldq ymm0,ymm0,ymm2
5847 vinserti128 ymm5,ymm5,xmm0,1
5848 vpshufb ymm5,ymm5,ymm6
5849 vpsrld ymm7,ymm9,6
5850 vpslld ymm2,ymm9,26
5851 vmovdqu YMMWORD[(96-128)+rax],ymm5
5852 vpaddd ymm5,ymm5,ymm12
5853
5854 vpsrld ymm1,ymm9,11
5855 vpxor ymm7,ymm7,ymm2
5856 vpslld ymm2,ymm9,21
5857 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp]
5858 vpxor ymm7,ymm7,ymm1
5859
5860 vpsrld ymm1,ymm9,25
5861 vpxor ymm7,ymm7,ymm2
5862
5863 vpslld ymm2,ymm9,7
5864 vpandn ymm0,ymm9,ymm11
5865 vpand ymm4,ymm9,ymm10
5866
5867 vpxor ymm7,ymm7,ymm1
5868
5869 vpsrld ymm12,ymm13,2
5870 vpxor ymm7,ymm7,ymm2
5871
5872 vpslld ymm1,ymm13,30
5873 vpxor ymm0,ymm0,ymm4
5874 vpxor ymm4,ymm14,ymm13
5875
5876 vpxor ymm12,ymm12,ymm1
5877 vpaddd ymm5,ymm5,ymm7
5878
5879 vpsrld ymm1,ymm13,13
5880
5881 vpslld ymm2,ymm13,19
5882 vpaddd ymm5,ymm5,ymm0
5883 vpand ymm3,ymm3,ymm4
5884
5885 vpxor ymm7,ymm12,ymm1
5886
5887 vpsrld ymm1,ymm13,22
5888 vpxor ymm7,ymm7,ymm2
5889
5890 vpslld ymm2,ymm13,10
5891 vpxor ymm12,ymm14,ymm3
5892 vpaddd ymm8,ymm8,ymm5
5893
5894 vpxor ymm7,ymm7,ymm1
5895 vpxor ymm7,ymm7,ymm2
5896
5897 vpaddd ymm12,ymm12,ymm5
5898 vpaddd ymm12,ymm12,ymm7
5899 vmovd xmm5,DWORD[16+r12]
5900 vmovd xmm0,DWORD[16+r8]
5901 vmovd xmm1,DWORD[16+r13]
5902 vmovd xmm2,DWORD[16+r9]
5903 vpinsrd xmm5,xmm5,DWORD[16+r14],1
5904 vpinsrd xmm0,xmm0,DWORD[16+r10],1
5905 vpinsrd xmm1,xmm1,DWORD[16+r15],1
5906 vpunpckldq ymm5,ymm5,ymm1
5907 vpinsrd xmm2,xmm2,DWORD[16+r11],1
5908 vpunpckldq ymm0,ymm0,ymm2
5909 vinserti128 ymm5,ymm5,xmm0,1
5910 vpshufb ymm5,ymm5,ymm6
5911 vpsrld ymm7,ymm8,6
5912 vpslld ymm2,ymm8,26
5913 vmovdqu YMMWORD[(128-128)+rax],ymm5
5914 vpaddd ymm5,ymm5,ymm11
5915
5916 vpsrld ymm1,ymm8,11
5917 vpxor ymm7,ymm7,ymm2
5918 vpslld ymm2,ymm8,21
5919 vpaddd ymm5,ymm5,YMMWORD[rbp]
5920 vpxor ymm7,ymm7,ymm1
5921
5922 vpsrld ymm1,ymm8,25
5923 vpxor ymm7,ymm7,ymm2
5924
5925 vpslld ymm2,ymm8,7
5926 vpandn ymm0,ymm8,ymm10
5927 vpand ymm3,ymm8,ymm9
5928
5929 vpxor ymm7,ymm7,ymm1
5930
5931 vpsrld ymm11,ymm12,2
5932 vpxor ymm7,ymm7,ymm2
5933
5934 vpslld ymm1,ymm12,30
5935 vpxor ymm0,ymm0,ymm3
5936 vpxor ymm3,ymm13,ymm12
5937
5938 vpxor ymm11,ymm11,ymm1
5939 vpaddd ymm5,ymm5,ymm7
5940
5941 vpsrld ymm1,ymm12,13
5942
5943 vpslld ymm2,ymm12,19
5944 vpaddd ymm5,ymm5,ymm0
5945 vpand ymm4,ymm4,ymm3
5946
5947 vpxor ymm7,ymm11,ymm1
5948
5949 vpsrld ymm1,ymm12,22
5950 vpxor ymm7,ymm7,ymm2
5951
5952 vpslld ymm2,ymm12,10
5953 vpxor ymm11,ymm13,ymm4
5954 vpaddd ymm15,ymm15,ymm5
5955
5956 vpxor ymm7,ymm7,ymm1
5957 vpxor ymm7,ymm7,ymm2
5958
5959 vpaddd ymm11,ymm11,ymm5
5960 vpaddd ymm11,ymm11,ymm7
5961 vmovd xmm5,DWORD[20+r12]
5962 vmovd xmm0,DWORD[20+r8]
5963 vmovd xmm1,DWORD[20+r13]
5964 vmovd xmm2,DWORD[20+r9]
5965 vpinsrd xmm5,xmm5,DWORD[20+r14],1
5966 vpinsrd xmm0,xmm0,DWORD[20+r10],1
5967 vpinsrd xmm1,xmm1,DWORD[20+r15],1
5968 vpunpckldq ymm5,ymm5,ymm1
5969 vpinsrd xmm2,xmm2,DWORD[20+r11],1
5970 vpunpckldq ymm0,ymm0,ymm2
5971 vinserti128 ymm5,ymm5,xmm0,1
5972 vpshufb ymm5,ymm5,ymm6
5973 vpsrld ymm7,ymm15,6
5974 vpslld ymm2,ymm15,26
5975 vmovdqu YMMWORD[(160-128)+rax],ymm5
5976 vpaddd ymm5,ymm5,ymm10
5977
5978 vpsrld ymm1,ymm15,11
5979 vpxor ymm7,ymm7,ymm2
5980 vpslld ymm2,ymm15,21
5981 vpaddd ymm5,ymm5,YMMWORD[32+rbp]
5982 vpxor ymm7,ymm7,ymm1
5983
5984 vpsrld ymm1,ymm15,25
5985 vpxor ymm7,ymm7,ymm2
5986
5987 vpslld ymm2,ymm15,7
5988 vpandn ymm0,ymm15,ymm9
5989 vpand ymm4,ymm15,ymm8
5990
5991 vpxor ymm7,ymm7,ymm1
5992
5993 vpsrld ymm10,ymm11,2
5994 vpxor ymm7,ymm7,ymm2
5995
5996 vpslld ymm1,ymm11,30
5997 vpxor ymm0,ymm0,ymm4
5998 vpxor ymm4,ymm12,ymm11
5999
6000 vpxor ymm10,ymm10,ymm1
6001 vpaddd ymm5,ymm5,ymm7
6002
6003 vpsrld ymm1,ymm11,13
6004
6005 vpslld ymm2,ymm11,19
6006 vpaddd ymm5,ymm5,ymm0
6007 vpand ymm3,ymm3,ymm4
6008
6009 vpxor ymm7,ymm10,ymm1
6010
6011 vpsrld ymm1,ymm11,22
6012 vpxor ymm7,ymm7,ymm2
6013
6014 vpslld ymm2,ymm11,10
6015 vpxor ymm10,ymm12,ymm3
6016 vpaddd ymm14,ymm14,ymm5
6017
6018 vpxor ymm7,ymm7,ymm1
6019 vpxor ymm7,ymm7,ymm2
6020
6021 vpaddd ymm10,ymm10,ymm5
6022 vpaddd ymm10,ymm10,ymm7
6023 vmovd xmm5,DWORD[24+r12]
6024 vmovd xmm0,DWORD[24+r8]
6025 vmovd xmm1,DWORD[24+r13]
6026 vmovd xmm2,DWORD[24+r9]
6027 vpinsrd xmm5,xmm5,DWORD[24+r14],1
6028 vpinsrd xmm0,xmm0,DWORD[24+r10],1
6029 vpinsrd xmm1,xmm1,DWORD[24+r15],1
6030 vpunpckldq ymm5,ymm5,ymm1
6031 vpinsrd xmm2,xmm2,DWORD[24+r11],1
6032 vpunpckldq ymm0,ymm0,ymm2
6033 vinserti128 ymm5,ymm5,xmm0,1
6034 vpshufb ymm5,ymm5,ymm6
6035 vpsrld ymm7,ymm14,6
6036 vpslld ymm2,ymm14,26
6037 vmovdqu YMMWORD[(192-128)+rax],ymm5
6038 vpaddd ymm5,ymm5,ymm9
6039
6040 vpsrld ymm1,ymm14,11
6041 vpxor ymm7,ymm7,ymm2
6042 vpslld ymm2,ymm14,21
6043 vpaddd ymm5,ymm5,YMMWORD[64+rbp]
6044 vpxor ymm7,ymm7,ymm1
6045
6046 vpsrld ymm1,ymm14,25
6047 vpxor ymm7,ymm7,ymm2
6048
6049 vpslld ymm2,ymm14,7
6050 vpandn ymm0,ymm14,ymm8
6051 vpand ymm3,ymm14,ymm15
6052
6053 vpxor ymm7,ymm7,ymm1
6054
6055 vpsrld ymm9,ymm10,2
6056 vpxor ymm7,ymm7,ymm2
6057
6058 vpslld ymm1,ymm10,30
6059 vpxor ymm0,ymm0,ymm3
6060 vpxor ymm3,ymm11,ymm10
6061
6062 vpxor ymm9,ymm9,ymm1
6063 vpaddd ymm5,ymm5,ymm7
6064
6065 vpsrld ymm1,ymm10,13
6066
6067 vpslld ymm2,ymm10,19
6068 vpaddd ymm5,ymm5,ymm0
6069 vpand ymm4,ymm4,ymm3
6070
6071 vpxor ymm7,ymm9,ymm1
6072
6073 vpsrld ymm1,ymm10,22
6074 vpxor ymm7,ymm7,ymm2
6075
6076 vpslld ymm2,ymm10,10
6077 vpxor ymm9,ymm11,ymm4
6078 vpaddd ymm13,ymm13,ymm5
6079
6080 vpxor ymm7,ymm7,ymm1
6081 vpxor ymm7,ymm7,ymm2
6082
6083 vpaddd ymm9,ymm9,ymm5
6084 vpaddd ymm9,ymm9,ymm7
6085 vmovd xmm5,DWORD[28+r12]
6086 vmovd xmm0,DWORD[28+r8]
6087 vmovd xmm1,DWORD[28+r13]
6088 vmovd xmm2,DWORD[28+r9]
6089 vpinsrd xmm5,xmm5,DWORD[28+r14],1
6090 vpinsrd xmm0,xmm0,DWORD[28+r10],1
6091 vpinsrd xmm1,xmm1,DWORD[28+r15],1
6092 vpunpckldq ymm5,ymm5,ymm1
6093 vpinsrd xmm2,xmm2,DWORD[28+r11],1
6094 vpunpckldq ymm0,ymm0,ymm2
6095 vinserti128 ymm5,ymm5,xmm0,1
6096 vpshufb ymm5,ymm5,ymm6
6097 vpsrld ymm7,ymm13,6
6098 vpslld ymm2,ymm13,26
6099 vmovdqu YMMWORD[(224-128)+rax],ymm5
6100 vpaddd ymm5,ymm5,ymm8
6101
6102 vpsrld ymm1,ymm13,11
6103 vpxor ymm7,ymm7,ymm2
6104 vpslld ymm2,ymm13,21
6105 vpaddd ymm5,ymm5,YMMWORD[96+rbp]
6106 vpxor ymm7,ymm7,ymm1
6107
6108 vpsrld ymm1,ymm13,25
6109 vpxor ymm7,ymm7,ymm2
6110
6111 vpslld ymm2,ymm13,7
6112 vpandn ymm0,ymm13,ymm15
6113 vpand ymm4,ymm13,ymm14
6114
6115 vpxor ymm7,ymm7,ymm1
6116
6117 vpsrld ymm8,ymm9,2
6118 vpxor ymm7,ymm7,ymm2
6119
6120 vpslld ymm1,ymm9,30
6121 vpxor ymm0,ymm0,ymm4
6122 vpxor ymm4,ymm10,ymm9
6123
6124 vpxor ymm8,ymm8,ymm1
6125 vpaddd ymm5,ymm5,ymm7
6126
6127 vpsrld ymm1,ymm9,13
6128
6129 vpslld ymm2,ymm9,19
6130 vpaddd ymm5,ymm5,ymm0
6131 vpand ymm3,ymm3,ymm4
6132
6133 vpxor ymm7,ymm8,ymm1
6134
6135 vpsrld ymm1,ymm9,22
6136 vpxor ymm7,ymm7,ymm2
6137
6138 vpslld ymm2,ymm9,10
6139 vpxor ymm8,ymm10,ymm3
6140 vpaddd ymm12,ymm12,ymm5
6141
6142 vpxor ymm7,ymm7,ymm1
6143 vpxor ymm7,ymm7,ymm2
6144
6145 vpaddd ymm8,ymm8,ymm5
6146 vpaddd ymm8,ymm8,ymm7
6147 add rbp,256
6148 vmovd xmm5,DWORD[32+r12]
6149 vmovd xmm0,DWORD[32+r8]
6150 vmovd xmm1,DWORD[32+r13]
6151 vmovd xmm2,DWORD[32+r9]
6152 vpinsrd xmm5,xmm5,DWORD[32+r14],1
6153 vpinsrd xmm0,xmm0,DWORD[32+r10],1
6154 vpinsrd xmm1,xmm1,DWORD[32+r15],1
6155 vpunpckldq ymm5,ymm5,ymm1
6156 vpinsrd xmm2,xmm2,DWORD[32+r11],1
6157 vpunpckldq ymm0,ymm0,ymm2
6158 vinserti128 ymm5,ymm5,xmm0,1
6159 vpshufb ymm5,ymm5,ymm6
6160 vpsrld ymm7,ymm12,6
6161 vpslld ymm2,ymm12,26
6162 vmovdqu YMMWORD[(256-256-128)+rbx],ymm5
6163 vpaddd ymm5,ymm5,ymm15
6164
6165 vpsrld ymm1,ymm12,11
6166 vpxor ymm7,ymm7,ymm2
6167 vpslld ymm2,ymm12,21
6168 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]
6169 vpxor ymm7,ymm7,ymm1
6170
6171 vpsrld ymm1,ymm12,25
6172 vpxor ymm7,ymm7,ymm2
6173
6174 vpslld ymm2,ymm12,7
6175 vpandn ymm0,ymm12,ymm14
6176 vpand ymm3,ymm12,ymm13
6177
6178 vpxor ymm7,ymm7,ymm1
6179
6180 vpsrld ymm15,ymm8,2
6181 vpxor ymm7,ymm7,ymm2
6182
6183 vpslld ymm1,ymm8,30
6184 vpxor ymm0,ymm0,ymm3
6185 vpxor ymm3,ymm9,ymm8
6186
6187 vpxor ymm15,ymm15,ymm1
6188 vpaddd ymm5,ymm5,ymm7
6189
6190 vpsrld ymm1,ymm8,13
6191
6192 vpslld ymm2,ymm8,19
6193 vpaddd ymm5,ymm5,ymm0
6194 vpand ymm4,ymm4,ymm3
6195
6196 vpxor ymm7,ymm15,ymm1
6197
6198 vpsrld ymm1,ymm8,22
6199 vpxor ymm7,ymm7,ymm2
6200
6201 vpslld ymm2,ymm8,10
6202 vpxor ymm15,ymm9,ymm4
6203 vpaddd ymm11,ymm11,ymm5
6204
6205 vpxor ymm7,ymm7,ymm1
6206 vpxor ymm7,ymm7,ymm2
6207
6208 vpaddd ymm15,ymm15,ymm5
6209 vpaddd ymm15,ymm15,ymm7
6210 vmovd xmm5,DWORD[36+r12]
6211 vmovd xmm0,DWORD[36+r8]
6212 vmovd xmm1,DWORD[36+r13]
6213 vmovd xmm2,DWORD[36+r9]
6214 vpinsrd xmm5,xmm5,DWORD[36+r14],1
6215 vpinsrd xmm0,xmm0,DWORD[36+r10],1
6216 vpinsrd xmm1,xmm1,DWORD[36+r15],1
6217 vpunpckldq ymm5,ymm5,ymm1
6218 vpinsrd xmm2,xmm2,DWORD[36+r11],1
6219 vpunpckldq ymm0,ymm0,ymm2
6220 vinserti128 ymm5,ymm5,xmm0,1
6221 vpshufb ymm5,ymm5,ymm6
6222 vpsrld ymm7,ymm11,6
6223 vpslld ymm2,ymm11,26
6224 vmovdqu YMMWORD[(288-256-128)+rbx],ymm5
6225 vpaddd ymm5,ymm5,ymm14
6226
6227 vpsrld ymm1,ymm11,11
6228 vpxor ymm7,ymm7,ymm2
6229 vpslld ymm2,ymm11,21
6230 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp]
6231 vpxor ymm7,ymm7,ymm1
6232
6233 vpsrld ymm1,ymm11,25
6234 vpxor ymm7,ymm7,ymm2
6235
6236 vpslld ymm2,ymm11,7
6237 vpandn ymm0,ymm11,ymm13
6238 vpand ymm4,ymm11,ymm12
6239
6240 vpxor ymm7,ymm7,ymm1
6241
6242 vpsrld ymm14,ymm15,2
6243 vpxor ymm7,ymm7,ymm2
6244
6245 vpslld ymm1,ymm15,30
6246 vpxor ymm0,ymm0,ymm4
6247 vpxor ymm4,ymm8,ymm15
6248
6249 vpxor ymm14,ymm14,ymm1
6250 vpaddd ymm5,ymm5,ymm7
6251
6252 vpsrld ymm1,ymm15,13
6253
6254 vpslld ymm2,ymm15,19
6255 vpaddd ymm5,ymm5,ymm0
6256 vpand ymm3,ymm3,ymm4
6257
6258 vpxor ymm7,ymm14,ymm1
6259
6260 vpsrld ymm1,ymm15,22
6261 vpxor ymm7,ymm7,ymm2
6262
6263 vpslld ymm2,ymm15,10
6264 vpxor ymm14,ymm8,ymm3
6265 vpaddd ymm10,ymm10,ymm5
6266
6267 vpxor ymm7,ymm7,ymm1
6268 vpxor ymm7,ymm7,ymm2
6269
6270 vpaddd ymm14,ymm14,ymm5
6271 vpaddd ymm14,ymm14,ymm7
6272 vmovd xmm5,DWORD[40+r12]
6273 vmovd xmm0,DWORD[40+r8]
6274 vmovd xmm1,DWORD[40+r13]
6275 vmovd xmm2,DWORD[40+r9]
6276 vpinsrd xmm5,xmm5,DWORD[40+r14],1
6277 vpinsrd xmm0,xmm0,DWORD[40+r10],1
6278 vpinsrd xmm1,xmm1,DWORD[40+r15],1
6279 vpunpckldq ymm5,ymm5,ymm1
6280 vpinsrd xmm2,xmm2,DWORD[40+r11],1
6281 vpunpckldq ymm0,ymm0,ymm2
6282 vinserti128 ymm5,ymm5,xmm0,1
6283 vpshufb ymm5,ymm5,ymm6
6284 vpsrld ymm7,ymm10,6
6285 vpslld ymm2,ymm10,26
6286 vmovdqu YMMWORD[(320-256-128)+rbx],ymm5
6287 vpaddd ymm5,ymm5,ymm13
6288
6289 vpsrld ymm1,ymm10,11
6290 vpxor ymm7,ymm7,ymm2
6291 vpslld ymm2,ymm10,21
6292 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]
6293 vpxor ymm7,ymm7,ymm1
6294
6295 vpsrld ymm1,ymm10,25
6296 vpxor ymm7,ymm7,ymm2
6297
6298 vpslld ymm2,ymm10,7
6299 vpandn ymm0,ymm10,ymm12
6300 vpand ymm3,ymm10,ymm11
6301
6302 vpxor ymm7,ymm7,ymm1
6303
6304 vpsrld ymm13,ymm14,2
6305 vpxor ymm7,ymm7,ymm2
6306
6307 vpslld ymm1,ymm14,30
6308 vpxor ymm0,ymm0,ymm3
6309 vpxor ymm3,ymm15,ymm14
6310
6311 vpxor ymm13,ymm13,ymm1
6312 vpaddd ymm5,ymm5,ymm7
6313
6314 vpsrld ymm1,ymm14,13
6315
6316 vpslld ymm2,ymm14,19
6317 vpaddd ymm5,ymm5,ymm0
6318 vpand ymm4,ymm4,ymm3
6319
6320 vpxor ymm7,ymm13,ymm1
6321
6322 vpsrld ymm1,ymm14,22
6323 vpxor ymm7,ymm7,ymm2
6324
6325 vpslld ymm2,ymm14,10
6326 vpxor ymm13,ymm15,ymm4
6327 vpaddd ymm9,ymm9,ymm5
6328
6329 vpxor ymm7,ymm7,ymm1
6330 vpxor ymm7,ymm7,ymm2
6331
6332 vpaddd ymm13,ymm13,ymm5
6333 vpaddd ymm13,ymm13,ymm7
6334 vmovd xmm5,DWORD[44+r12]
6335 vmovd xmm0,DWORD[44+r8]
6336 vmovd xmm1,DWORD[44+r13]
6337 vmovd xmm2,DWORD[44+r9]
6338 vpinsrd xmm5,xmm5,DWORD[44+r14],1
6339 vpinsrd xmm0,xmm0,DWORD[44+r10],1
6340 vpinsrd xmm1,xmm1,DWORD[44+r15],1
6341 vpunpckldq ymm5,ymm5,ymm1
6342 vpinsrd xmm2,xmm2,DWORD[44+r11],1
6343 vpunpckldq ymm0,ymm0,ymm2
6344 vinserti128 ymm5,ymm5,xmm0,1
6345 vpshufb ymm5,ymm5,ymm6
6346 vpsrld ymm7,ymm9,6
6347 vpslld ymm2,ymm9,26
6348 vmovdqu YMMWORD[(352-256-128)+rbx],ymm5
6349 vpaddd ymm5,ymm5,ymm12
6350
6351 vpsrld ymm1,ymm9,11
6352 vpxor ymm7,ymm7,ymm2
6353 vpslld ymm2,ymm9,21
6354 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp]
6355 vpxor ymm7,ymm7,ymm1
6356
6357 vpsrld ymm1,ymm9,25
6358 vpxor ymm7,ymm7,ymm2
6359
6360 vpslld ymm2,ymm9,7
6361 vpandn ymm0,ymm9,ymm11
6362 vpand ymm4,ymm9,ymm10
6363
6364 vpxor ymm7,ymm7,ymm1
6365
6366 vpsrld ymm12,ymm13,2
6367 vpxor ymm7,ymm7,ymm2
6368
6369 vpslld ymm1,ymm13,30
6370 vpxor ymm0,ymm0,ymm4
6371 vpxor ymm4,ymm14,ymm13
6372
6373 vpxor ymm12,ymm12,ymm1
6374 vpaddd ymm5,ymm5,ymm7
6375
6376 vpsrld ymm1,ymm13,13
6377
6378 vpslld ymm2,ymm13,19
6379 vpaddd ymm5,ymm5,ymm0
6380 vpand ymm3,ymm3,ymm4
6381
6382 vpxor ymm7,ymm12,ymm1
6383
6384 vpsrld ymm1,ymm13,22
6385 vpxor ymm7,ymm7,ymm2
6386
6387 vpslld ymm2,ymm13,10
6388 vpxor ymm12,ymm14,ymm3
6389 vpaddd ymm8,ymm8,ymm5
6390
6391 vpxor ymm7,ymm7,ymm1
6392 vpxor ymm7,ymm7,ymm2
6393
6394 vpaddd ymm12,ymm12,ymm5
6395 vpaddd ymm12,ymm12,ymm7
6396 vmovd xmm5,DWORD[48+r12]
6397 vmovd xmm0,DWORD[48+r8]
6398 vmovd xmm1,DWORD[48+r13]
6399 vmovd xmm2,DWORD[48+r9]
6400 vpinsrd xmm5,xmm5,DWORD[48+r14],1
6401 vpinsrd xmm0,xmm0,DWORD[48+r10],1
6402 vpinsrd xmm1,xmm1,DWORD[48+r15],1
6403 vpunpckldq ymm5,ymm5,ymm1
6404 vpinsrd xmm2,xmm2,DWORD[48+r11],1
6405 vpunpckldq ymm0,ymm0,ymm2
6406 vinserti128 ymm5,ymm5,xmm0,1
6407 vpshufb ymm5,ymm5,ymm6
6408 vpsrld ymm7,ymm8,6
6409 vpslld ymm2,ymm8,26
6410 vmovdqu YMMWORD[(384-256-128)+rbx],ymm5
6411 vpaddd ymm5,ymm5,ymm11
6412
6413 vpsrld ymm1,ymm8,11
6414 vpxor ymm7,ymm7,ymm2
6415 vpslld ymm2,ymm8,21
6416 vpaddd ymm5,ymm5,YMMWORD[rbp]
6417 vpxor ymm7,ymm7,ymm1
6418
6419 vpsrld ymm1,ymm8,25
6420 vpxor ymm7,ymm7,ymm2
6421
6422 vpslld ymm2,ymm8,7
6423 vpandn ymm0,ymm8,ymm10
6424 vpand ymm3,ymm8,ymm9
6425
6426 vpxor ymm7,ymm7,ymm1
6427
6428 vpsrld ymm11,ymm12,2
6429 vpxor ymm7,ymm7,ymm2
6430
6431 vpslld ymm1,ymm12,30
6432 vpxor ymm0,ymm0,ymm3
6433 vpxor ymm3,ymm13,ymm12
6434
6435 vpxor ymm11,ymm11,ymm1
6436 vpaddd ymm5,ymm5,ymm7
6437
6438 vpsrld ymm1,ymm12,13
6439
6440 vpslld ymm2,ymm12,19
6441 vpaddd ymm5,ymm5,ymm0
6442 vpand ymm4,ymm4,ymm3
6443
6444 vpxor ymm7,ymm11,ymm1
6445
6446 vpsrld ymm1,ymm12,22
6447 vpxor ymm7,ymm7,ymm2
6448
6449 vpslld ymm2,ymm12,10
6450 vpxor ymm11,ymm13,ymm4
6451 vpaddd ymm15,ymm15,ymm5
6452
6453 vpxor ymm7,ymm7,ymm1
6454 vpxor ymm7,ymm7,ymm2
6455
6456 vpaddd ymm11,ymm11,ymm5
6457 vpaddd ymm11,ymm11,ymm7
6458 vmovd xmm5,DWORD[52+r12]
6459 vmovd xmm0,DWORD[52+r8]
6460 vmovd xmm1,DWORD[52+r13]
6461 vmovd xmm2,DWORD[52+r9]
6462 vpinsrd xmm5,xmm5,DWORD[52+r14],1
6463 vpinsrd xmm0,xmm0,DWORD[52+r10],1
6464 vpinsrd xmm1,xmm1,DWORD[52+r15],1
6465 vpunpckldq ymm5,ymm5,ymm1
6466 vpinsrd xmm2,xmm2,DWORD[52+r11],1
6467 vpunpckldq ymm0,ymm0,ymm2
6468 vinserti128 ymm5,ymm5,xmm0,1
6469 vpshufb ymm5,ymm5,ymm6
6470 vpsrld ymm7,ymm15,6
6471 vpslld ymm2,ymm15,26
6472 vmovdqu YMMWORD[(416-256-128)+rbx],ymm5
6473 vpaddd ymm5,ymm5,ymm10
6474
6475 vpsrld ymm1,ymm15,11
6476 vpxor ymm7,ymm7,ymm2
6477 vpslld ymm2,ymm15,21
6478 vpaddd ymm5,ymm5,YMMWORD[32+rbp]
6479 vpxor ymm7,ymm7,ymm1
6480
6481 vpsrld ymm1,ymm15,25
6482 vpxor ymm7,ymm7,ymm2
6483
6484 vpslld ymm2,ymm15,7
6485 vpandn ymm0,ymm15,ymm9
6486 vpand ymm4,ymm15,ymm8
6487
6488 vpxor ymm7,ymm7,ymm1
6489
6490 vpsrld ymm10,ymm11,2
6491 vpxor ymm7,ymm7,ymm2
6492
6493 vpslld ymm1,ymm11,30
6494 vpxor ymm0,ymm0,ymm4
6495 vpxor ymm4,ymm12,ymm11
6496
6497 vpxor ymm10,ymm10,ymm1
6498 vpaddd ymm5,ymm5,ymm7
6499
6500 vpsrld ymm1,ymm11,13
6501
6502 vpslld ymm2,ymm11,19
6503 vpaddd ymm5,ymm5,ymm0
6504 vpand ymm3,ymm3,ymm4
6505
6506 vpxor ymm7,ymm10,ymm1
6507
6508 vpsrld ymm1,ymm11,22
6509 vpxor ymm7,ymm7,ymm2
6510
6511 vpslld ymm2,ymm11,10
6512 vpxor ymm10,ymm12,ymm3
6513 vpaddd ymm14,ymm14,ymm5
6514
6515 vpxor ymm7,ymm7,ymm1
6516 vpxor ymm7,ymm7,ymm2
6517
6518 vpaddd ymm10,ymm10,ymm5
6519 vpaddd ymm10,ymm10,ymm7
6520 vmovd xmm5,DWORD[56+r12]
6521 vmovd xmm0,DWORD[56+r8]
6522 vmovd xmm1,DWORD[56+r13]
6523 vmovd xmm2,DWORD[56+r9]
6524 vpinsrd xmm5,xmm5,DWORD[56+r14],1
6525 vpinsrd xmm0,xmm0,DWORD[56+r10],1
6526 vpinsrd xmm1,xmm1,DWORD[56+r15],1
6527 vpunpckldq ymm5,ymm5,ymm1
6528 vpinsrd xmm2,xmm2,DWORD[56+r11],1
6529 vpunpckldq ymm0,ymm0,ymm2
6530 vinserti128 ymm5,ymm5,xmm0,1
6531 vpshufb ymm5,ymm5,ymm6
6532 vpsrld ymm7,ymm14,6
6533 vpslld ymm2,ymm14,26
6534 vmovdqu YMMWORD[(448-256-128)+rbx],ymm5
6535 vpaddd ymm5,ymm5,ymm9
6536
6537 vpsrld ymm1,ymm14,11
6538 vpxor ymm7,ymm7,ymm2
6539 vpslld ymm2,ymm14,21
6540 vpaddd ymm5,ymm5,YMMWORD[64+rbp]
6541 vpxor ymm7,ymm7,ymm1
6542
6543 vpsrld ymm1,ymm14,25
6544 vpxor ymm7,ymm7,ymm2
6545
6546 vpslld ymm2,ymm14,7
6547 vpandn ymm0,ymm14,ymm8
6548 vpand ymm3,ymm14,ymm15
6549
6550 vpxor ymm7,ymm7,ymm1
6551
6552 vpsrld ymm9,ymm10,2
6553 vpxor ymm7,ymm7,ymm2
6554
6555 vpslld ymm1,ymm10,30
6556 vpxor ymm0,ymm0,ymm3
6557 vpxor ymm3,ymm11,ymm10
6558
6559 vpxor ymm9,ymm9,ymm1
6560 vpaddd ymm5,ymm5,ymm7
6561
6562 vpsrld ymm1,ymm10,13
6563
6564 vpslld ymm2,ymm10,19
6565 vpaddd ymm5,ymm5,ymm0
6566 vpand ymm4,ymm4,ymm3
6567
6568 vpxor ymm7,ymm9,ymm1
6569
6570 vpsrld ymm1,ymm10,22
6571 vpxor ymm7,ymm7,ymm2
6572
6573 vpslld ymm2,ymm10,10
6574 vpxor ymm9,ymm11,ymm4
6575 vpaddd ymm13,ymm13,ymm5
6576
6577 vpxor ymm7,ymm7,ymm1
6578 vpxor ymm7,ymm7,ymm2
6579
6580 vpaddd ymm9,ymm9,ymm5
6581 vpaddd ymm9,ymm9,ymm7
6582 vmovd xmm5,DWORD[60+r12]
6583 lea r12,[64+r12]
6584 vmovd xmm0,DWORD[60+r8]
6585 lea r8,[64+r8]
6586 vmovd xmm1,DWORD[60+r13]
6587 lea r13,[64+r13]
6588 vmovd xmm2,DWORD[60+r9]
6589 lea r9,[64+r9]
6590 vpinsrd xmm5,xmm5,DWORD[60+r14],1
6591 lea r14,[64+r14]
6592 vpinsrd xmm0,xmm0,DWORD[60+r10],1
6593 lea r10,[64+r10]
6594 vpinsrd xmm1,xmm1,DWORD[60+r15],1
6595 lea r15,[64+r15]
6596 vpunpckldq ymm5,ymm5,ymm1
6597 vpinsrd xmm2,xmm2,DWORD[60+r11],1
6598 lea r11,[64+r11]
6599 vpunpckldq ymm0,ymm0,ymm2
6600 vinserti128 ymm5,ymm5,xmm0,1
6601 vpshufb ymm5,ymm5,ymm6
6602 vpsrld ymm7,ymm13,6
6603 vpslld ymm2,ymm13,26
6604 vmovdqu YMMWORD[(480-256-128)+rbx],ymm5
6605 vpaddd ymm5,ymm5,ymm8
6606
6607 vpsrld ymm1,ymm13,11
6608 vpxor ymm7,ymm7,ymm2
6609 vpslld ymm2,ymm13,21
6610 vpaddd ymm5,ymm5,YMMWORD[96+rbp]
6611 vpxor ymm7,ymm7,ymm1
6612
6613 vpsrld ymm1,ymm13,25
6614 vpxor ymm7,ymm7,ymm2
6615 prefetcht0 [63+r12]
6616 vpslld ymm2,ymm13,7
6617 vpandn ymm0,ymm13,ymm15
6618 vpand ymm4,ymm13,ymm14
6619 prefetcht0 [63+r13]
6620 vpxor ymm7,ymm7,ymm1
6621
6622 vpsrld ymm8,ymm9,2
6623 vpxor ymm7,ymm7,ymm2
6624 prefetcht0 [63+r14]
6625 vpslld ymm1,ymm9,30
6626 vpxor ymm0,ymm0,ymm4
6627 vpxor ymm4,ymm10,ymm9
6628 prefetcht0 [63+r15]
6629 vpxor ymm8,ymm8,ymm1
6630 vpaddd ymm5,ymm5,ymm7
6631
6632 vpsrld ymm1,ymm9,13
6633 prefetcht0 [63+r8]
6634 vpslld ymm2,ymm9,19
6635 vpaddd ymm5,ymm5,ymm0
6636 vpand ymm3,ymm3,ymm4
6637 prefetcht0 [63+r9]
6638 vpxor ymm7,ymm8,ymm1
6639
6640 vpsrld ymm1,ymm9,22
6641 vpxor ymm7,ymm7,ymm2
6642 prefetcht0 [63+r10]
6643 vpslld ymm2,ymm9,10
6644 vpxor ymm8,ymm10,ymm3
6645 vpaddd ymm12,ymm12,ymm5
6646 prefetcht0 [63+r11]
6647 vpxor ymm7,ymm7,ymm1
6648 vpxor ymm7,ymm7,ymm2
6649
6650 vpaddd ymm8,ymm8,ymm5
6651 vpaddd ymm8,ymm8,ymm7
6652 add rbp,256
6653 vmovdqu ymm5,YMMWORD[((0-128))+rax]
6654 mov ecx,3
6655 jmp NEAR $L$oop_16_xx_avx2
6656ALIGN 32
6657$L$oop_16_xx_avx2:
6658 vmovdqu ymm6,YMMWORD[((32-128))+rax]
6659 vpaddd ymm5,ymm5,YMMWORD[((288-256-128))+rbx]
6660
6661 vpsrld ymm7,ymm6,3
6662 vpsrld ymm1,ymm6,7
6663 vpslld ymm2,ymm6,25
6664 vpxor ymm7,ymm7,ymm1
6665 vpsrld ymm1,ymm6,18
6666 vpxor ymm7,ymm7,ymm2
6667 vpslld ymm2,ymm6,14
6668 vmovdqu ymm0,YMMWORD[((448-256-128))+rbx]
6669 vpsrld ymm3,ymm0,10
6670
6671 vpxor ymm7,ymm7,ymm1
6672 vpsrld ymm1,ymm0,17
6673 vpxor ymm7,ymm7,ymm2
6674 vpslld ymm2,ymm0,15
6675 vpaddd ymm5,ymm5,ymm7
6676 vpxor ymm7,ymm3,ymm1
6677 vpsrld ymm1,ymm0,19
6678 vpxor ymm7,ymm7,ymm2
6679 vpslld ymm2,ymm0,13
6680 vpxor ymm7,ymm7,ymm1
6681 vpxor ymm7,ymm7,ymm2
6682 vpaddd ymm5,ymm5,ymm7
6683 vpsrld ymm7,ymm12,6
6684 vpslld ymm2,ymm12,26
6685 vmovdqu YMMWORD[(0-128)+rax],ymm5
6686 vpaddd ymm5,ymm5,ymm15
6687
6688 vpsrld ymm1,ymm12,11
6689 vpxor ymm7,ymm7,ymm2
6690 vpslld ymm2,ymm12,21
6691 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]
6692 vpxor ymm7,ymm7,ymm1
6693
6694 vpsrld ymm1,ymm12,25
6695 vpxor ymm7,ymm7,ymm2
6696
6697 vpslld ymm2,ymm12,7
6698 vpandn ymm0,ymm12,ymm14
6699 vpand ymm3,ymm12,ymm13
6700
6701 vpxor ymm7,ymm7,ymm1
6702
6703 vpsrld ymm15,ymm8,2
6704 vpxor ymm7,ymm7,ymm2
6705
6706 vpslld ymm1,ymm8,30
6707 vpxor ymm0,ymm0,ymm3
6708 vpxor ymm3,ymm9,ymm8
6709
6710 vpxor ymm15,ymm15,ymm1
6711 vpaddd ymm5,ymm5,ymm7
6712
6713 vpsrld ymm1,ymm8,13
6714
6715 vpslld ymm2,ymm8,19
6716 vpaddd ymm5,ymm5,ymm0
6717 vpand ymm4,ymm4,ymm3
6718
6719 vpxor ymm7,ymm15,ymm1
6720
6721 vpsrld ymm1,ymm8,22
6722 vpxor ymm7,ymm7,ymm2
6723
6724 vpslld ymm2,ymm8,10
6725 vpxor ymm15,ymm9,ymm4
6726 vpaddd ymm11,ymm11,ymm5
6727
6728 vpxor ymm7,ymm7,ymm1
6729 vpxor ymm7,ymm7,ymm2
6730
6731 vpaddd ymm15,ymm15,ymm5
6732 vpaddd ymm15,ymm15,ymm7
6733 vmovdqu ymm5,YMMWORD[((64-128))+rax]
6734 vpaddd ymm6,ymm6,YMMWORD[((320-256-128))+rbx]
6735
6736 vpsrld ymm7,ymm5,3
6737 vpsrld ymm1,ymm5,7
6738 vpslld ymm2,ymm5,25
6739 vpxor ymm7,ymm7,ymm1
6740 vpsrld ymm1,ymm5,18
6741 vpxor ymm7,ymm7,ymm2
6742 vpslld ymm2,ymm5,14
6743 vmovdqu ymm0,YMMWORD[((480-256-128))+rbx]
6744 vpsrld ymm4,ymm0,10
6745
6746 vpxor ymm7,ymm7,ymm1
6747 vpsrld ymm1,ymm0,17
6748 vpxor ymm7,ymm7,ymm2
6749 vpslld ymm2,ymm0,15
6750 vpaddd ymm6,ymm6,ymm7
6751 vpxor ymm7,ymm4,ymm1
6752 vpsrld ymm1,ymm0,19
6753 vpxor ymm7,ymm7,ymm2
6754 vpslld ymm2,ymm0,13
6755 vpxor ymm7,ymm7,ymm1
6756 vpxor ymm7,ymm7,ymm2
6757 vpaddd ymm6,ymm6,ymm7
6758 vpsrld ymm7,ymm11,6
6759 vpslld ymm2,ymm11,26
6760 vmovdqu YMMWORD[(32-128)+rax],ymm6
6761 vpaddd ymm6,ymm6,ymm14
6762
6763 vpsrld ymm1,ymm11,11
6764 vpxor ymm7,ymm7,ymm2
6765 vpslld ymm2,ymm11,21
6766 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp]
6767 vpxor ymm7,ymm7,ymm1
6768
6769 vpsrld ymm1,ymm11,25
6770 vpxor ymm7,ymm7,ymm2
6771
6772 vpslld ymm2,ymm11,7
6773 vpandn ymm0,ymm11,ymm13
6774 vpand ymm4,ymm11,ymm12
6775
6776 vpxor ymm7,ymm7,ymm1
6777
6778 vpsrld ymm14,ymm15,2
6779 vpxor ymm7,ymm7,ymm2
6780
6781 vpslld ymm1,ymm15,30
6782 vpxor ymm0,ymm0,ymm4
6783 vpxor ymm4,ymm8,ymm15
6784
6785 vpxor ymm14,ymm14,ymm1
6786 vpaddd ymm6,ymm6,ymm7
6787
6788 vpsrld ymm1,ymm15,13
6789
6790 vpslld ymm2,ymm15,19
6791 vpaddd ymm6,ymm6,ymm0
6792 vpand ymm3,ymm3,ymm4
6793
6794 vpxor ymm7,ymm14,ymm1
6795
6796 vpsrld ymm1,ymm15,22
6797 vpxor ymm7,ymm7,ymm2
6798
6799 vpslld ymm2,ymm15,10
6800 vpxor ymm14,ymm8,ymm3
6801 vpaddd ymm10,ymm10,ymm6
6802
6803 vpxor ymm7,ymm7,ymm1
6804 vpxor ymm7,ymm7,ymm2
6805
6806 vpaddd ymm14,ymm14,ymm6
6807 vpaddd ymm14,ymm14,ymm7
6808 vmovdqu ymm6,YMMWORD[((96-128))+rax]
6809 vpaddd ymm5,ymm5,YMMWORD[((352-256-128))+rbx]
6810
6811 vpsrld ymm7,ymm6,3
6812 vpsrld ymm1,ymm6,7
6813 vpslld ymm2,ymm6,25
6814 vpxor ymm7,ymm7,ymm1
6815 vpsrld ymm1,ymm6,18
6816 vpxor ymm7,ymm7,ymm2
6817 vpslld ymm2,ymm6,14
6818 vmovdqu ymm0,YMMWORD[((0-128))+rax]
6819 vpsrld ymm3,ymm0,10
6820
6821 vpxor ymm7,ymm7,ymm1
6822 vpsrld ymm1,ymm0,17
6823 vpxor ymm7,ymm7,ymm2
6824 vpslld ymm2,ymm0,15
6825 vpaddd ymm5,ymm5,ymm7
6826 vpxor ymm7,ymm3,ymm1
6827 vpsrld ymm1,ymm0,19
6828 vpxor ymm7,ymm7,ymm2
6829 vpslld ymm2,ymm0,13
6830 vpxor ymm7,ymm7,ymm1
6831 vpxor ymm7,ymm7,ymm2
6832 vpaddd ymm5,ymm5,ymm7
6833 vpsrld ymm7,ymm10,6
6834 vpslld ymm2,ymm10,26
6835 vmovdqu YMMWORD[(64-128)+rax],ymm5
6836 vpaddd ymm5,ymm5,ymm13
6837
6838 vpsrld ymm1,ymm10,11
6839 vpxor ymm7,ymm7,ymm2
6840 vpslld ymm2,ymm10,21
6841 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]
6842 vpxor ymm7,ymm7,ymm1
6843
6844 vpsrld ymm1,ymm10,25
6845 vpxor ymm7,ymm7,ymm2
6846
6847 vpslld ymm2,ymm10,7
6848 vpandn ymm0,ymm10,ymm12
6849 vpand ymm3,ymm10,ymm11
6850
6851 vpxor ymm7,ymm7,ymm1
6852
6853 vpsrld ymm13,ymm14,2
6854 vpxor ymm7,ymm7,ymm2
6855
6856 vpslld ymm1,ymm14,30
6857 vpxor ymm0,ymm0,ymm3
6858 vpxor ymm3,ymm15,ymm14
6859
6860 vpxor ymm13,ymm13,ymm1
6861 vpaddd ymm5,ymm5,ymm7
6862
6863 vpsrld ymm1,ymm14,13
6864
6865 vpslld ymm2,ymm14,19
6866 vpaddd ymm5,ymm5,ymm0
6867 vpand ymm4,ymm4,ymm3
6868
6869 vpxor ymm7,ymm13,ymm1
6870
6871 vpsrld ymm1,ymm14,22
6872 vpxor ymm7,ymm7,ymm2
6873
6874 vpslld ymm2,ymm14,10
6875 vpxor ymm13,ymm15,ymm4
6876 vpaddd ymm9,ymm9,ymm5
6877
6878 vpxor ymm7,ymm7,ymm1
6879 vpxor ymm7,ymm7,ymm2
6880
6881 vpaddd ymm13,ymm13,ymm5
6882 vpaddd ymm13,ymm13,ymm7
6883 vmovdqu ymm5,YMMWORD[((128-128))+rax]
6884 vpaddd ymm6,ymm6,YMMWORD[((384-256-128))+rbx]
6885
6886 vpsrld ymm7,ymm5,3
6887 vpsrld ymm1,ymm5,7
6888 vpslld ymm2,ymm5,25
6889 vpxor ymm7,ymm7,ymm1
6890 vpsrld ymm1,ymm5,18
6891 vpxor ymm7,ymm7,ymm2
6892 vpslld ymm2,ymm5,14
6893 vmovdqu ymm0,YMMWORD[((32-128))+rax]
6894 vpsrld ymm4,ymm0,10
6895
6896 vpxor ymm7,ymm7,ymm1
6897 vpsrld ymm1,ymm0,17
6898 vpxor ymm7,ymm7,ymm2
6899 vpslld ymm2,ymm0,15
6900 vpaddd ymm6,ymm6,ymm7
6901 vpxor ymm7,ymm4,ymm1
6902 vpsrld ymm1,ymm0,19
6903 vpxor ymm7,ymm7,ymm2
6904 vpslld ymm2,ymm0,13
6905 vpxor ymm7,ymm7,ymm1
6906 vpxor ymm7,ymm7,ymm2
6907 vpaddd ymm6,ymm6,ymm7
6908 vpsrld ymm7,ymm9,6
6909 vpslld ymm2,ymm9,26
6910 vmovdqu YMMWORD[(96-128)+rax],ymm6
6911 vpaddd ymm6,ymm6,ymm12
6912
6913 vpsrld ymm1,ymm9,11
6914 vpxor ymm7,ymm7,ymm2
6915 vpslld ymm2,ymm9,21
6916 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp]
6917 vpxor ymm7,ymm7,ymm1
6918
6919 vpsrld ymm1,ymm9,25
6920 vpxor ymm7,ymm7,ymm2
6921
6922 vpslld ymm2,ymm9,7
6923 vpandn ymm0,ymm9,ymm11
6924 vpand ymm4,ymm9,ymm10
6925
6926 vpxor ymm7,ymm7,ymm1
6927
6928 vpsrld ymm12,ymm13,2
6929 vpxor ymm7,ymm7,ymm2
6930
6931 vpslld ymm1,ymm13,30
6932 vpxor ymm0,ymm0,ymm4
6933 vpxor ymm4,ymm14,ymm13
6934
6935 vpxor ymm12,ymm12,ymm1
6936 vpaddd ymm6,ymm6,ymm7
6937
6938 vpsrld ymm1,ymm13,13
6939
6940 vpslld ymm2,ymm13,19
6941 vpaddd ymm6,ymm6,ymm0
6942 vpand ymm3,ymm3,ymm4
6943
6944 vpxor ymm7,ymm12,ymm1
6945
6946 vpsrld ymm1,ymm13,22
6947 vpxor ymm7,ymm7,ymm2
6948
6949 vpslld ymm2,ymm13,10
6950 vpxor ymm12,ymm14,ymm3
6951 vpaddd ymm8,ymm8,ymm6
6952
6953 vpxor ymm7,ymm7,ymm1
6954 vpxor ymm7,ymm7,ymm2
6955
6956 vpaddd ymm12,ymm12,ymm6
6957 vpaddd ymm12,ymm12,ymm7
6958 vmovdqu ymm6,YMMWORD[((160-128))+rax]
6959 vpaddd ymm5,ymm5,YMMWORD[((416-256-128))+rbx]
6960
6961 vpsrld ymm7,ymm6,3
6962 vpsrld ymm1,ymm6,7
6963 vpslld ymm2,ymm6,25
6964 vpxor ymm7,ymm7,ymm1
6965 vpsrld ymm1,ymm6,18
6966 vpxor ymm7,ymm7,ymm2
6967 vpslld ymm2,ymm6,14
6968 vmovdqu ymm0,YMMWORD[((64-128))+rax]
6969 vpsrld ymm3,ymm0,10
6970
6971 vpxor ymm7,ymm7,ymm1
6972 vpsrld ymm1,ymm0,17
6973 vpxor ymm7,ymm7,ymm2
6974 vpslld ymm2,ymm0,15
6975 vpaddd ymm5,ymm5,ymm7
6976 vpxor ymm7,ymm3,ymm1
6977 vpsrld ymm1,ymm0,19
6978 vpxor ymm7,ymm7,ymm2
6979 vpslld ymm2,ymm0,13
6980 vpxor ymm7,ymm7,ymm1
6981 vpxor ymm7,ymm7,ymm2
6982 vpaddd ymm5,ymm5,ymm7
6983 vpsrld ymm7,ymm8,6
6984 vpslld ymm2,ymm8,26
6985 vmovdqu YMMWORD[(128-128)+rax],ymm5
6986 vpaddd ymm5,ymm5,ymm11
6987
6988 vpsrld ymm1,ymm8,11
6989 vpxor ymm7,ymm7,ymm2
6990 vpslld ymm2,ymm8,21
6991 vpaddd ymm5,ymm5,YMMWORD[rbp]
6992 vpxor ymm7,ymm7,ymm1
6993
6994 vpsrld ymm1,ymm8,25
6995 vpxor ymm7,ymm7,ymm2
6996
6997 vpslld ymm2,ymm8,7
6998 vpandn ymm0,ymm8,ymm10
6999 vpand ymm3,ymm8,ymm9
7000
7001 vpxor ymm7,ymm7,ymm1
7002
7003 vpsrld ymm11,ymm12,2
7004 vpxor ymm7,ymm7,ymm2
7005
7006 vpslld ymm1,ymm12,30
7007 vpxor ymm0,ymm0,ymm3
7008 vpxor ymm3,ymm13,ymm12
7009
7010 vpxor ymm11,ymm11,ymm1
7011 vpaddd ymm5,ymm5,ymm7
7012
7013 vpsrld ymm1,ymm12,13
7014
7015 vpslld ymm2,ymm12,19
7016 vpaddd ymm5,ymm5,ymm0
7017 vpand ymm4,ymm4,ymm3
7018
7019 vpxor ymm7,ymm11,ymm1
7020
7021 vpsrld ymm1,ymm12,22
7022 vpxor ymm7,ymm7,ymm2
7023
7024 vpslld ymm2,ymm12,10
7025 vpxor ymm11,ymm13,ymm4
7026 vpaddd ymm15,ymm15,ymm5
7027
7028 vpxor ymm7,ymm7,ymm1
7029 vpxor ymm7,ymm7,ymm2
7030
7031 vpaddd ymm11,ymm11,ymm5
7032 vpaddd ymm11,ymm11,ymm7
7033 vmovdqu ymm5,YMMWORD[((192-128))+rax]
7034 vpaddd ymm6,ymm6,YMMWORD[((448-256-128))+rbx]
7035
7036 vpsrld ymm7,ymm5,3
7037 vpsrld ymm1,ymm5,7
7038 vpslld ymm2,ymm5,25
7039 vpxor ymm7,ymm7,ymm1
7040 vpsrld ymm1,ymm5,18
7041 vpxor ymm7,ymm7,ymm2
7042 vpslld ymm2,ymm5,14
7043 vmovdqu ymm0,YMMWORD[((96-128))+rax]
7044 vpsrld ymm4,ymm0,10
7045
7046 vpxor ymm7,ymm7,ymm1
7047 vpsrld ymm1,ymm0,17
7048 vpxor ymm7,ymm7,ymm2
7049 vpslld ymm2,ymm0,15
7050 vpaddd ymm6,ymm6,ymm7
7051 vpxor ymm7,ymm4,ymm1
7052 vpsrld ymm1,ymm0,19
7053 vpxor ymm7,ymm7,ymm2
7054 vpslld ymm2,ymm0,13
7055 vpxor ymm7,ymm7,ymm1
7056 vpxor ymm7,ymm7,ymm2
7057 vpaddd ymm6,ymm6,ymm7
7058 vpsrld ymm7,ymm15,6
7059 vpslld ymm2,ymm15,26
7060 vmovdqu YMMWORD[(160-128)+rax],ymm6
7061 vpaddd ymm6,ymm6,ymm10
7062
7063 vpsrld ymm1,ymm15,11
7064 vpxor ymm7,ymm7,ymm2
7065 vpslld ymm2,ymm15,21
7066 vpaddd ymm6,ymm6,YMMWORD[32+rbp]
7067 vpxor ymm7,ymm7,ymm1
7068
7069 vpsrld ymm1,ymm15,25
7070 vpxor ymm7,ymm7,ymm2
7071
7072 vpslld ymm2,ymm15,7
7073 vpandn ymm0,ymm15,ymm9
7074 vpand ymm4,ymm15,ymm8
7075
7076 vpxor ymm7,ymm7,ymm1
7077
7078 vpsrld ymm10,ymm11,2
7079 vpxor ymm7,ymm7,ymm2
7080
7081 vpslld ymm1,ymm11,30
7082 vpxor ymm0,ymm0,ymm4
7083 vpxor ymm4,ymm12,ymm11
7084
7085 vpxor ymm10,ymm10,ymm1
7086 vpaddd ymm6,ymm6,ymm7
7087
7088 vpsrld ymm1,ymm11,13
7089
7090 vpslld ymm2,ymm11,19
7091 vpaddd ymm6,ymm6,ymm0
7092 vpand ymm3,ymm3,ymm4
7093
7094 vpxor ymm7,ymm10,ymm1
7095
7096 vpsrld ymm1,ymm11,22
7097 vpxor ymm7,ymm7,ymm2
7098
7099 vpslld ymm2,ymm11,10
7100 vpxor ymm10,ymm12,ymm3
7101 vpaddd ymm14,ymm14,ymm6
7102
7103 vpxor ymm7,ymm7,ymm1
7104 vpxor ymm7,ymm7,ymm2
7105
7106 vpaddd ymm10,ymm10,ymm6
7107 vpaddd ymm10,ymm10,ymm7
7108 vmovdqu ymm6,YMMWORD[((224-128))+rax]
7109 vpaddd ymm5,ymm5,YMMWORD[((480-256-128))+rbx]
7110
7111 vpsrld ymm7,ymm6,3
7112 vpsrld ymm1,ymm6,7
7113 vpslld ymm2,ymm6,25
7114 vpxor ymm7,ymm7,ymm1
7115 vpsrld ymm1,ymm6,18
7116 vpxor ymm7,ymm7,ymm2
7117 vpslld ymm2,ymm6,14
7118 vmovdqu ymm0,YMMWORD[((128-128))+rax]
7119 vpsrld ymm3,ymm0,10
7120
7121 vpxor ymm7,ymm7,ymm1
7122 vpsrld ymm1,ymm0,17
7123 vpxor ymm7,ymm7,ymm2
7124 vpslld ymm2,ymm0,15
7125 vpaddd ymm5,ymm5,ymm7
7126 vpxor ymm7,ymm3,ymm1
7127 vpsrld ymm1,ymm0,19
7128 vpxor ymm7,ymm7,ymm2
7129 vpslld ymm2,ymm0,13
7130 vpxor ymm7,ymm7,ymm1
7131 vpxor ymm7,ymm7,ymm2
7132 vpaddd ymm5,ymm5,ymm7
7133 vpsrld ymm7,ymm14,6
7134 vpslld ymm2,ymm14,26
7135 vmovdqu YMMWORD[(192-128)+rax],ymm5
7136 vpaddd ymm5,ymm5,ymm9
7137
7138 vpsrld ymm1,ymm14,11
7139 vpxor ymm7,ymm7,ymm2
7140 vpslld ymm2,ymm14,21
7141 vpaddd ymm5,ymm5,YMMWORD[64+rbp]
7142 vpxor ymm7,ymm7,ymm1
7143
7144 vpsrld ymm1,ymm14,25
7145 vpxor ymm7,ymm7,ymm2
7146
7147 vpslld ymm2,ymm14,7
7148 vpandn ymm0,ymm14,ymm8
7149 vpand ymm3,ymm14,ymm15
7150
7151 vpxor ymm7,ymm7,ymm1
7152
7153 vpsrld ymm9,ymm10,2
7154 vpxor ymm7,ymm7,ymm2
7155
7156 vpslld ymm1,ymm10,30
7157 vpxor ymm0,ymm0,ymm3
7158 vpxor ymm3,ymm11,ymm10
7159
7160 vpxor ymm9,ymm9,ymm1
7161 vpaddd ymm5,ymm5,ymm7
7162
7163 vpsrld ymm1,ymm10,13
7164
7165 vpslld ymm2,ymm10,19
7166 vpaddd ymm5,ymm5,ymm0
7167 vpand ymm4,ymm4,ymm3
7168
7169 vpxor ymm7,ymm9,ymm1
7170
7171 vpsrld ymm1,ymm10,22
7172 vpxor ymm7,ymm7,ymm2
7173
7174 vpslld ymm2,ymm10,10
7175 vpxor ymm9,ymm11,ymm4
7176 vpaddd ymm13,ymm13,ymm5
7177
7178 vpxor ymm7,ymm7,ymm1
7179 vpxor ymm7,ymm7,ymm2
7180
7181 vpaddd ymm9,ymm9,ymm5
7182 vpaddd ymm9,ymm9,ymm7
7183 vmovdqu ymm5,YMMWORD[((256-256-128))+rbx]
7184 vpaddd ymm6,ymm6,YMMWORD[((0-128))+rax]
7185
7186 vpsrld ymm7,ymm5,3
7187 vpsrld ymm1,ymm5,7
7188 vpslld ymm2,ymm5,25
7189 vpxor ymm7,ymm7,ymm1
7190 vpsrld ymm1,ymm5,18
7191 vpxor ymm7,ymm7,ymm2
7192 vpslld ymm2,ymm5,14
7193 vmovdqu ymm0,YMMWORD[((160-128))+rax]
7194 vpsrld ymm4,ymm0,10
7195
7196 vpxor ymm7,ymm7,ymm1
7197 vpsrld ymm1,ymm0,17
7198 vpxor ymm7,ymm7,ymm2
7199 vpslld ymm2,ymm0,15
7200 vpaddd ymm6,ymm6,ymm7
7201 vpxor ymm7,ymm4,ymm1
7202 vpsrld ymm1,ymm0,19
7203 vpxor ymm7,ymm7,ymm2
7204 vpslld ymm2,ymm0,13
7205 vpxor ymm7,ymm7,ymm1
7206 vpxor ymm7,ymm7,ymm2
7207 vpaddd ymm6,ymm6,ymm7
7208 vpsrld ymm7,ymm13,6
7209 vpslld ymm2,ymm13,26
7210 vmovdqu YMMWORD[(224-128)+rax],ymm6
7211 vpaddd ymm6,ymm6,ymm8
7212
7213 vpsrld ymm1,ymm13,11
7214 vpxor ymm7,ymm7,ymm2
7215 vpslld ymm2,ymm13,21
7216 vpaddd ymm6,ymm6,YMMWORD[96+rbp]
7217 vpxor ymm7,ymm7,ymm1
7218
7219 vpsrld ymm1,ymm13,25
7220 vpxor ymm7,ymm7,ymm2
7221
7222 vpslld ymm2,ymm13,7
7223 vpandn ymm0,ymm13,ymm15
7224 vpand ymm4,ymm13,ymm14
7225
7226 vpxor ymm7,ymm7,ymm1
7227
7228 vpsrld ymm8,ymm9,2
7229 vpxor ymm7,ymm7,ymm2
7230
7231 vpslld ymm1,ymm9,30
7232 vpxor ymm0,ymm0,ymm4
7233 vpxor ymm4,ymm10,ymm9
7234
7235 vpxor ymm8,ymm8,ymm1
7236 vpaddd ymm6,ymm6,ymm7
7237
7238 vpsrld ymm1,ymm9,13
7239
7240 vpslld ymm2,ymm9,19
7241 vpaddd ymm6,ymm6,ymm0
7242 vpand ymm3,ymm3,ymm4
7243
7244 vpxor ymm7,ymm8,ymm1
7245
7246 vpsrld ymm1,ymm9,22
7247 vpxor ymm7,ymm7,ymm2
7248
7249 vpslld ymm2,ymm9,10
7250 vpxor ymm8,ymm10,ymm3
7251 vpaddd ymm12,ymm12,ymm6
7252
7253 vpxor ymm7,ymm7,ymm1
7254 vpxor ymm7,ymm7,ymm2
7255
7256 vpaddd ymm8,ymm8,ymm6
7257 vpaddd ymm8,ymm8,ymm7
7258 add rbp,256
7259 vmovdqu ymm6,YMMWORD[((288-256-128))+rbx]
7260 vpaddd ymm5,ymm5,YMMWORD[((32-128))+rax]
7261
7262 vpsrld ymm7,ymm6,3
7263 vpsrld ymm1,ymm6,7
7264 vpslld ymm2,ymm6,25
7265 vpxor ymm7,ymm7,ymm1
7266 vpsrld ymm1,ymm6,18
7267 vpxor ymm7,ymm7,ymm2
7268 vpslld ymm2,ymm6,14
7269 vmovdqu ymm0,YMMWORD[((192-128))+rax]
7270 vpsrld ymm3,ymm0,10
7271
7272 vpxor ymm7,ymm7,ymm1
7273 vpsrld ymm1,ymm0,17
7274 vpxor ymm7,ymm7,ymm2
7275 vpslld ymm2,ymm0,15
7276 vpaddd ymm5,ymm5,ymm7
7277 vpxor ymm7,ymm3,ymm1
7278 vpsrld ymm1,ymm0,19
7279 vpxor ymm7,ymm7,ymm2
7280 vpslld ymm2,ymm0,13
7281 vpxor ymm7,ymm7,ymm1
7282 vpxor ymm7,ymm7,ymm2
7283 vpaddd ymm5,ymm5,ymm7
7284 vpsrld ymm7,ymm12,6
7285 vpslld ymm2,ymm12,26
7286 vmovdqu YMMWORD[(256-256-128)+rbx],ymm5
7287 vpaddd ymm5,ymm5,ymm15
7288
7289 vpsrld ymm1,ymm12,11
7290 vpxor ymm7,ymm7,ymm2
7291 vpslld ymm2,ymm12,21
7292 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]
7293 vpxor ymm7,ymm7,ymm1
7294
7295 vpsrld ymm1,ymm12,25
7296 vpxor ymm7,ymm7,ymm2
7297
7298 vpslld ymm2,ymm12,7
7299 vpandn ymm0,ymm12,ymm14
7300 vpand ymm3,ymm12,ymm13
7301
7302 vpxor ymm7,ymm7,ymm1
7303
7304 vpsrld ymm15,ymm8,2
7305 vpxor ymm7,ymm7,ymm2
7306
7307 vpslld ymm1,ymm8,30
7308 vpxor ymm0,ymm0,ymm3
7309 vpxor ymm3,ymm9,ymm8
7310
7311 vpxor ymm15,ymm15,ymm1
7312 vpaddd ymm5,ymm5,ymm7
7313
7314 vpsrld ymm1,ymm8,13
7315
7316 vpslld ymm2,ymm8,19
7317 vpaddd ymm5,ymm5,ymm0
7318 vpand ymm4,ymm4,ymm3
7319
7320 vpxor ymm7,ymm15,ymm1
7321
7322 vpsrld ymm1,ymm8,22
7323 vpxor ymm7,ymm7,ymm2
7324
7325 vpslld ymm2,ymm8,10
7326 vpxor ymm15,ymm9,ymm4
7327 vpaddd ymm11,ymm11,ymm5
7328
7329 vpxor ymm7,ymm7,ymm1
7330 vpxor ymm7,ymm7,ymm2
7331
7332 vpaddd ymm15,ymm15,ymm5
7333 vpaddd ymm15,ymm15,ymm7
7334 vmovdqu ymm5,YMMWORD[((320-256-128))+rbx]
7335 vpaddd ymm6,ymm6,YMMWORD[((64-128))+rax]
7336
7337 vpsrld ymm7,ymm5,3
7338 vpsrld ymm1,ymm5,7
7339 vpslld ymm2,ymm5,25
7340 vpxor ymm7,ymm7,ymm1
7341 vpsrld ymm1,ymm5,18
7342 vpxor ymm7,ymm7,ymm2
7343 vpslld ymm2,ymm5,14
7344 vmovdqu ymm0,YMMWORD[((224-128))+rax]
7345 vpsrld ymm4,ymm0,10
7346
7347 vpxor ymm7,ymm7,ymm1
7348 vpsrld ymm1,ymm0,17
7349 vpxor ymm7,ymm7,ymm2
7350 vpslld ymm2,ymm0,15
7351 vpaddd ymm6,ymm6,ymm7
7352 vpxor ymm7,ymm4,ymm1
7353 vpsrld ymm1,ymm0,19
7354 vpxor ymm7,ymm7,ymm2
7355 vpslld ymm2,ymm0,13
7356 vpxor ymm7,ymm7,ymm1
7357 vpxor ymm7,ymm7,ymm2
7358 vpaddd ymm6,ymm6,ymm7
7359 vpsrld ymm7,ymm11,6
7360 vpslld ymm2,ymm11,26
7361 vmovdqu YMMWORD[(288-256-128)+rbx],ymm6
7362 vpaddd ymm6,ymm6,ymm14
7363
7364 vpsrld ymm1,ymm11,11
7365 vpxor ymm7,ymm7,ymm2
7366 vpslld ymm2,ymm11,21
7367 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp]
7368 vpxor ymm7,ymm7,ymm1
7369
7370 vpsrld ymm1,ymm11,25
7371 vpxor ymm7,ymm7,ymm2
7372
7373 vpslld ymm2,ymm11,7
7374 vpandn ymm0,ymm11,ymm13
7375 vpand ymm4,ymm11,ymm12
7376
7377 vpxor ymm7,ymm7,ymm1
7378
7379 vpsrld ymm14,ymm15,2
7380 vpxor ymm7,ymm7,ymm2
7381
7382 vpslld ymm1,ymm15,30
7383 vpxor ymm0,ymm0,ymm4
7384 vpxor ymm4,ymm8,ymm15
7385
7386 vpxor ymm14,ymm14,ymm1
7387 vpaddd ymm6,ymm6,ymm7
7388
7389 vpsrld ymm1,ymm15,13
7390
7391 vpslld ymm2,ymm15,19
7392 vpaddd ymm6,ymm6,ymm0
7393 vpand ymm3,ymm3,ymm4
7394
7395 vpxor ymm7,ymm14,ymm1
7396
7397 vpsrld ymm1,ymm15,22
7398 vpxor ymm7,ymm7,ymm2
7399
7400 vpslld ymm2,ymm15,10
7401 vpxor ymm14,ymm8,ymm3
7402 vpaddd ymm10,ymm10,ymm6
7403
7404 vpxor ymm7,ymm7,ymm1
7405 vpxor ymm7,ymm7,ymm2
7406
7407 vpaddd ymm14,ymm14,ymm6
7408 vpaddd ymm14,ymm14,ymm7
7409 vmovdqu ymm6,YMMWORD[((352-256-128))+rbx]
7410 vpaddd ymm5,ymm5,YMMWORD[((96-128))+rax]
7411
7412 vpsrld ymm7,ymm6,3
7413 vpsrld ymm1,ymm6,7
7414 vpslld ymm2,ymm6,25
7415 vpxor ymm7,ymm7,ymm1
7416 vpsrld ymm1,ymm6,18
7417 vpxor ymm7,ymm7,ymm2
7418 vpslld ymm2,ymm6,14
7419 vmovdqu ymm0,YMMWORD[((256-256-128))+rbx]
7420 vpsrld ymm3,ymm0,10
7421
7422 vpxor ymm7,ymm7,ymm1
7423 vpsrld ymm1,ymm0,17
7424 vpxor ymm7,ymm7,ymm2
7425 vpslld ymm2,ymm0,15
7426 vpaddd ymm5,ymm5,ymm7
7427 vpxor ymm7,ymm3,ymm1
7428 vpsrld ymm1,ymm0,19
7429 vpxor ymm7,ymm7,ymm2
7430 vpslld ymm2,ymm0,13
7431 vpxor ymm7,ymm7,ymm1
7432 vpxor ymm7,ymm7,ymm2
7433 vpaddd ymm5,ymm5,ymm7
7434 vpsrld ymm7,ymm10,6
7435 vpslld ymm2,ymm10,26
7436 vmovdqu YMMWORD[(320-256-128)+rbx],ymm5
7437 vpaddd ymm5,ymm5,ymm13
7438
7439 vpsrld ymm1,ymm10,11
7440 vpxor ymm7,ymm7,ymm2
7441 vpslld ymm2,ymm10,21
7442 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]
7443 vpxor ymm7,ymm7,ymm1
7444
7445 vpsrld ymm1,ymm10,25
7446 vpxor ymm7,ymm7,ymm2
7447
7448 vpslld ymm2,ymm10,7
7449 vpandn ymm0,ymm10,ymm12
7450 vpand ymm3,ymm10,ymm11
7451
7452 vpxor ymm7,ymm7,ymm1
7453
7454 vpsrld ymm13,ymm14,2
7455 vpxor ymm7,ymm7,ymm2
7456
7457 vpslld ymm1,ymm14,30
7458 vpxor ymm0,ymm0,ymm3
7459 vpxor ymm3,ymm15,ymm14
7460
7461 vpxor ymm13,ymm13,ymm1
7462 vpaddd ymm5,ymm5,ymm7
7463
7464 vpsrld ymm1,ymm14,13
7465
7466 vpslld ymm2,ymm14,19
7467 vpaddd ymm5,ymm5,ymm0
7468 vpand ymm4,ymm4,ymm3
7469
7470 vpxor ymm7,ymm13,ymm1
7471
7472 vpsrld ymm1,ymm14,22
7473 vpxor ymm7,ymm7,ymm2
7474
7475 vpslld ymm2,ymm14,10
7476 vpxor ymm13,ymm15,ymm4
7477 vpaddd ymm9,ymm9,ymm5
7478
7479 vpxor ymm7,ymm7,ymm1
7480 vpxor ymm7,ymm7,ymm2
7481
7482 vpaddd ymm13,ymm13,ymm5
7483 vpaddd ymm13,ymm13,ymm7
7484 vmovdqu ymm5,YMMWORD[((384-256-128))+rbx]
7485 vpaddd ymm6,ymm6,YMMWORD[((128-128))+rax]
7486
7487 vpsrld ymm7,ymm5,3
7488 vpsrld ymm1,ymm5,7
7489 vpslld ymm2,ymm5,25
7490 vpxor ymm7,ymm7,ymm1
7491 vpsrld ymm1,ymm5,18
7492 vpxor ymm7,ymm7,ymm2
7493 vpslld ymm2,ymm5,14
7494 vmovdqu ymm0,YMMWORD[((288-256-128))+rbx]
7495 vpsrld ymm4,ymm0,10
7496
7497 vpxor ymm7,ymm7,ymm1
7498 vpsrld ymm1,ymm0,17
7499 vpxor ymm7,ymm7,ymm2
7500 vpslld ymm2,ymm0,15
7501 vpaddd ymm6,ymm6,ymm7
7502 vpxor ymm7,ymm4,ymm1
7503 vpsrld ymm1,ymm0,19
7504 vpxor ymm7,ymm7,ymm2
7505 vpslld ymm2,ymm0,13
7506 vpxor ymm7,ymm7,ymm1
7507 vpxor ymm7,ymm7,ymm2
7508 vpaddd ymm6,ymm6,ymm7
7509 vpsrld ymm7,ymm9,6
7510 vpslld ymm2,ymm9,26
7511 vmovdqu YMMWORD[(352-256-128)+rbx],ymm6
7512 vpaddd ymm6,ymm6,ymm12
7513
7514 vpsrld ymm1,ymm9,11
7515 vpxor ymm7,ymm7,ymm2
7516 vpslld ymm2,ymm9,21
7517 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp]
7518 vpxor ymm7,ymm7,ymm1
7519
7520 vpsrld ymm1,ymm9,25
7521 vpxor ymm7,ymm7,ymm2
7522
7523 vpslld ymm2,ymm9,7
7524 vpandn ymm0,ymm9,ymm11
7525 vpand ymm4,ymm9,ymm10
7526
7527 vpxor ymm7,ymm7,ymm1
7528
7529 vpsrld ymm12,ymm13,2
7530 vpxor ymm7,ymm7,ymm2
7531
7532 vpslld ymm1,ymm13,30
7533 vpxor ymm0,ymm0,ymm4
7534 vpxor ymm4,ymm14,ymm13
7535
7536 vpxor ymm12,ymm12,ymm1
7537 vpaddd ymm6,ymm6,ymm7
7538
7539 vpsrld ymm1,ymm13,13
7540
7541 vpslld ymm2,ymm13,19
7542 vpaddd ymm6,ymm6,ymm0
7543 vpand ymm3,ymm3,ymm4
7544
7545 vpxor ymm7,ymm12,ymm1
7546
7547 vpsrld ymm1,ymm13,22
7548 vpxor ymm7,ymm7,ymm2
7549
7550 vpslld ymm2,ymm13,10
7551 vpxor ymm12,ymm14,ymm3
7552 vpaddd ymm8,ymm8,ymm6
7553
7554 vpxor ymm7,ymm7,ymm1
7555 vpxor ymm7,ymm7,ymm2
7556
7557 vpaddd ymm12,ymm12,ymm6
7558 vpaddd ymm12,ymm12,ymm7
7559 vmovdqu ymm6,YMMWORD[((416-256-128))+rbx]
7560 vpaddd ymm5,ymm5,YMMWORD[((160-128))+rax]
7561
7562 vpsrld ymm7,ymm6,3
7563 vpsrld ymm1,ymm6,7
7564 vpslld ymm2,ymm6,25
7565 vpxor ymm7,ymm7,ymm1
7566 vpsrld ymm1,ymm6,18
7567 vpxor ymm7,ymm7,ymm2
7568 vpslld ymm2,ymm6,14
7569 vmovdqu ymm0,YMMWORD[((320-256-128))+rbx]
7570 vpsrld ymm3,ymm0,10
7571
7572 vpxor ymm7,ymm7,ymm1
7573 vpsrld ymm1,ymm0,17
7574 vpxor ymm7,ymm7,ymm2
7575 vpslld ymm2,ymm0,15
7576 vpaddd ymm5,ymm5,ymm7
7577 vpxor ymm7,ymm3,ymm1
7578 vpsrld ymm1,ymm0,19
7579 vpxor ymm7,ymm7,ymm2
7580 vpslld ymm2,ymm0,13
7581 vpxor ymm7,ymm7,ymm1
7582 vpxor ymm7,ymm7,ymm2
7583 vpaddd ymm5,ymm5,ymm7
7584 vpsrld ymm7,ymm8,6
7585 vpslld ymm2,ymm8,26
7586 vmovdqu YMMWORD[(384-256-128)+rbx],ymm5
7587 vpaddd ymm5,ymm5,ymm11
7588
7589 vpsrld ymm1,ymm8,11
7590 vpxor ymm7,ymm7,ymm2
7591 vpslld ymm2,ymm8,21
7592 vpaddd ymm5,ymm5,YMMWORD[rbp]
7593 vpxor ymm7,ymm7,ymm1
7594
7595 vpsrld ymm1,ymm8,25
7596 vpxor ymm7,ymm7,ymm2
7597
7598 vpslld ymm2,ymm8,7
7599 vpandn ymm0,ymm8,ymm10
7600 vpand ymm3,ymm8,ymm9
7601
7602 vpxor ymm7,ymm7,ymm1
7603
7604 vpsrld ymm11,ymm12,2
7605 vpxor ymm7,ymm7,ymm2
7606
7607 vpslld ymm1,ymm12,30
7608 vpxor ymm0,ymm0,ymm3
7609 vpxor ymm3,ymm13,ymm12
7610
7611 vpxor ymm11,ymm11,ymm1
7612 vpaddd ymm5,ymm5,ymm7
7613
7614 vpsrld ymm1,ymm12,13
7615
7616 vpslld ymm2,ymm12,19
7617 vpaddd ymm5,ymm5,ymm0
7618 vpand ymm4,ymm4,ymm3
7619
7620 vpxor ymm7,ymm11,ymm1
7621
7622 vpsrld ymm1,ymm12,22
7623 vpxor ymm7,ymm7,ymm2
7624
7625 vpslld ymm2,ymm12,10
7626 vpxor ymm11,ymm13,ymm4
7627 vpaddd ymm15,ymm15,ymm5
7628
7629 vpxor ymm7,ymm7,ymm1
7630 vpxor ymm7,ymm7,ymm2
7631
7632 vpaddd ymm11,ymm11,ymm5
7633 vpaddd ymm11,ymm11,ymm7
7634 vmovdqu ymm5,YMMWORD[((448-256-128))+rbx]
7635 vpaddd ymm6,ymm6,YMMWORD[((192-128))+rax]
7636
7637 vpsrld ymm7,ymm5,3
7638 vpsrld ymm1,ymm5,7
7639 vpslld ymm2,ymm5,25
7640 vpxor ymm7,ymm7,ymm1
7641 vpsrld ymm1,ymm5,18
7642 vpxor ymm7,ymm7,ymm2
7643 vpslld ymm2,ymm5,14
7644 vmovdqu ymm0,YMMWORD[((352-256-128))+rbx]
7645 vpsrld ymm4,ymm0,10
7646
7647 vpxor ymm7,ymm7,ymm1
7648 vpsrld ymm1,ymm0,17
7649 vpxor ymm7,ymm7,ymm2
7650 vpslld ymm2,ymm0,15
7651 vpaddd ymm6,ymm6,ymm7
7652 vpxor ymm7,ymm4,ymm1
7653 vpsrld ymm1,ymm0,19
7654 vpxor ymm7,ymm7,ymm2
7655 vpslld ymm2,ymm0,13
7656 vpxor ymm7,ymm7,ymm1
7657 vpxor ymm7,ymm7,ymm2
7658 vpaddd ymm6,ymm6,ymm7
7659 vpsrld ymm7,ymm15,6
7660 vpslld ymm2,ymm15,26
7661 vmovdqu YMMWORD[(416-256-128)+rbx],ymm6
7662 vpaddd ymm6,ymm6,ymm10
7663
7664 vpsrld ymm1,ymm15,11
7665 vpxor ymm7,ymm7,ymm2
7666 vpslld ymm2,ymm15,21
7667 vpaddd ymm6,ymm6,YMMWORD[32+rbp]
7668 vpxor ymm7,ymm7,ymm1
7669
7670 vpsrld ymm1,ymm15,25
7671 vpxor ymm7,ymm7,ymm2
7672
7673 vpslld ymm2,ymm15,7
7674 vpandn ymm0,ymm15,ymm9
7675 vpand ymm4,ymm15,ymm8
7676
7677 vpxor ymm7,ymm7,ymm1
7678
7679 vpsrld ymm10,ymm11,2
7680 vpxor ymm7,ymm7,ymm2
7681
7682 vpslld ymm1,ymm11,30
7683 vpxor ymm0,ymm0,ymm4
7684 vpxor ymm4,ymm12,ymm11
7685
7686 vpxor ymm10,ymm10,ymm1
7687 vpaddd ymm6,ymm6,ymm7
7688
7689 vpsrld ymm1,ymm11,13
7690
7691 vpslld ymm2,ymm11,19
7692 vpaddd ymm6,ymm6,ymm0
7693 vpand ymm3,ymm3,ymm4
7694
7695 vpxor ymm7,ymm10,ymm1
7696
7697 vpsrld ymm1,ymm11,22
7698 vpxor ymm7,ymm7,ymm2
7699
7700 vpslld ymm2,ymm11,10
7701 vpxor ymm10,ymm12,ymm3
7702 vpaddd ymm14,ymm14,ymm6
7703
7704 vpxor ymm7,ymm7,ymm1
7705 vpxor ymm7,ymm7,ymm2
7706
7707 vpaddd ymm10,ymm10,ymm6
7708 vpaddd ymm10,ymm10,ymm7
7709 vmovdqu ymm6,YMMWORD[((480-256-128))+rbx]
7710 vpaddd ymm5,ymm5,YMMWORD[((224-128))+rax]
7711
7712 vpsrld ymm7,ymm6,3
7713 vpsrld ymm1,ymm6,7
7714 vpslld ymm2,ymm6,25
7715 vpxor ymm7,ymm7,ymm1
7716 vpsrld ymm1,ymm6,18
7717 vpxor ymm7,ymm7,ymm2
7718 vpslld ymm2,ymm6,14
7719 vmovdqu ymm0,YMMWORD[((384-256-128))+rbx]
7720 vpsrld ymm3,ymm0,10
7721
7722 vpxor ymm7,ymm7,ymm1
7723 vpsrld ymm1,ymm0,17
7724 vpxor ymm7,ymm7,ymm2
7725 vpslld ymm2,ymm0,15
7726 vpaddd ymm5,ymm5,ymm7
7727 vpxor ymm7,ymm3,ymm1
7728 vpsrld ymm1,ymm0,19
7729 vpxor ymm7,ymm7,ymm2
7730 vpslld ymm2,ymm0,13
7731 vpxor ymm7,ymm7,ymm1
7732 vpxor ymm7,ymm7,ymm2
7733 vpaddd ymm5,ymm5,ymm7
7734 vpsrld ymm7,ymm14,6
7735 vpslld ymm2,ymm14,26
7736 vmovdqu YMMWORD[(448-256-128)+rbx],ymm5
7737 vpaddd ymm5,ymm5,ymm9
7738
7739 vpsrld ymm1,ymm14,11
7740 vpxor ymm7,ymm7,ymm2
7741 vpslld ymm2,ymm14,21
7742 vpaddd ymm5,ymm5,YMMWORD[64+rbp]
7743 vpxor ymm7,ymm7,ymm1
7744
7745 vpsrld ymm1,ymm14,25
7746 vpxor ymm7,ymm7,ymm2
7747
7748 vpslld ymm2,ymm14,7
7749 vpandn ymm0,ymm14,ymm8
7750 vpand ymm3,ymm14,ymm15
7751
7752 vpxor ymm7,ymm7,ymm1
7753
7754 vpsrld ymm9,ymm10,2
7755 vpxor ymm7,ymm7,ymm2
7756
7757 vpslld ymm1,ymm10,30
7758 vpxor ymm0,ymm0,ymm3
7759 vpxor ymm3,ymm11,ymm10
7760
7761 vpxor ymm9,ymm9,ymm1
7762 vpaddd ymm5,ymm5,ymm7
7763
7764 vpsrld ymm1,ymm10,13
7765
7766 vpslld ymm2,ymm10,19
7767 vpaddd ymm5,ymm5,ymm0
7768 vpand ymm4,ymm4,ymm3
7769
7770 vpxor ymm7,ymm9,ymm1
7771
7772 vpsrld ymm1,ymm10,22
7773 vpxor ymm7,ymm7,ymm2
7774
7775 vpslld ymm2,ymm10,10
7776 vpxor ymm9,ymm11,ymm4
7777 vpaddd ymm13,ymm13,ymm5
7778
7779 vpxor ymm7,ymm7,ymm1
7780 vpxor ymm7,ymm7,ymm2
7781
7782 vpaddd ymm9,ymm9,ymm5
7783 vpaddd ymm9,ymm9,ymm7
7784 vmovdqu ymm5,YMMWORD[((0-128))+rax]
7785 vpaddd ymm6,ymm6,YMMWORD[((256-256-128))+rbx]
7786
7787 vpsrld ymm7,ymm5,3
7788 vpsrld ymm1,ymm5,7
7789 vpslld ymm2,ymm5,25
7790 vpxor ymm7,ymm7,ymm1
7791 vpsrld ymm1,ymm5,18
7792 vpxor ymm7,ymm7,ymm2
7793 vpslld ymm2,ymm5,14
7794 vmovdqu ymm0,YMMWORD[((416-256-128))+rbx]
7795 vpsrld ymm4,ymm0,10
7796
7797 vpxor ymm7,ymm7,ymm1
7798 vpsrld ymm1,ymm0,17
7799 vpxor ymm7,ymm7,ymm2
7800 vpslld ymm2,ymm0,15
7801 vpaddd ymm6,ymm6,ymm7
7802 vpxor ymm7,ymm4,ymm1
7803 vpsrld ymm1,ymm0,19
7804 vpxor ymm7,ymm7,ymm2
7805 vpslld ymm2,ymm0,13
7806 vpxor ymm7,ymm7,ymm1
7807 vpxor ymm7,ymm7,ymm2
7808 vpaddd ymm6,ymm6,ymm7
7809 vpsrld ymm7,ymm13,6
7810 vpslld ymm2,ymm13,26
7811 vmovdqu YMMWORD[(480-256-128)+rbx],ymm6
7812 vpaddd ymm6,ymm6,ymm8
7813
7814 vpsrld ymm1,ymm13,11
7815 vpxor ymm7,ymm7,ymm2
7816 vpslld ymm2,ymm13,21
7817 vpaddd ymm6,ymm6,YMMWORD[96+rbp]
7818 vpxor ymm7,ymm7,ymm1
7819
7820 vpsrld ymm1,ymm13,25
7821 vpxor ymm7,ymm7,ymm2
7822
7823 vpslld ymm2,ymm13,7
7824 vpandn ymm0,ymm13,ymm15
7825 vpand ymm4,ymm13,ymm14
7826
7827 vpxor ymm7,ymm7,ymm1
7828
7829 vpsrld ymm8,ymm9,2
7830 vpxor ymm7,ymm7,ymm2
7831
7832 vpslld ymm1,ymm9,30
7833 vpxor ymm0,ymm0,ymm4
7834 vpxor ymm4,ymm10,ymm9
7835
7836 vpxor ymm8,ymm8,ymm1
7837 vpaddd ymm6,ymm6,ymm7
7838
7839 vpsrld ymm1,ymm9,13
7840
7841 vpslld ymm2,ymm9,19
7842 vpaddd ymm6,ymm6,ymm0
7843 vpand ymm3,ymm3,ymm4
7844
7845 vpxor ymm7,ymm8,ymm1
7846
7847 vpsrld ymm1,ymm9,22
7848 vpxor ymm7,ymm7,ymm2
7849
7850 vpslld ymm2,ymm9,10
7851 vpxor ymm8,ymm10,ymm3
7852 vpaddd ymm12,ymm12,ymm6
7853
7854 vpxor ymm7,ymm7,ymm1
7855 vpxor ymm7,ymm7,ymm2
7856
7857 vpaddd ymm8,ymm8,ymm6
7858 vpaddd ymm8,ymm8,ymm7
7859 add rbp,256
7860 dec ecx
7861 jnz NEAR $L$oop_16_xx_avx2
7862
7863 mov ecx,1
7864 lea rbx,[512+rsp]
7865 lea rbp,[((K256+128))]
7866 cmp ecx,DWORD[rbx]
7867 cmovge r12,rbp
7868 cmp ecx,DWORD[4+rbx]
7869 cmovge r13,rbp
7870 cmp ecx,DWORD[8+rbx]
7871 cmovge r14,rbp
7872 cmp ecx,DWORD[12+rbx]
7873 cmovge r15,rbp
7874 cmp ecx,DWORD[16+rbx]
7875 cmovge r8,rbp
7876 cmp ecx,DWORD[20+rbx]
7877 cmovge r9,rbp
7878 cmp ecx,DWORD[24+rbx]
7879 cmovge r10,rbp
7880 cmp ecx,DWORD[28+rbx]
7881 cmovge r11,rbp
7882 vmovdqa ymm7,YMMWORD[rbx]
7883 vpxor ymm0,ymm0,ymm0
7884 vmovdqa ymm6,ymm7
7885 vpcmpgtd ymm6,ymm6,ymm0
7886 vpaddd ymm7,ymm7,ymm6
7887
7888 vmovdqu ymm0,YMMWORD[((0-128))+rdi]
7889 vpand ymm8,ymm8,ymm6
7890 vmovdqu ymm1,YMMWORD[((32-128))+rdi]
7891 vpand ymm9,ymm9,ymm6
7892 vmovdqu ymm2,YMMWORD[((64-128))+rdi]
7893 vpand ymm10,ymm10,ymm6
7894 vmovdqu ymm5,YMMWORD[((96-128))+rdi]
7895 vpand ymm11,ymm11,ymm6
7896 vpaddd ymm8,ymm8,ymm0
7897 vmovdqu ymm0,YMMWORD[((128-128))+rdi]
7898 vpand ymm12,ymm12,ymm6
7899 vpaddd ymm9,ymm9,ymm1
7900 vmovdqu ymm1,YMMWORD[((160-128))+rdi]
7901 vpand ymm13,ymm13,ymm6
7902 vpaddd ymm10,ymm10,ymm2
7903 vmovdqu ymm2,YMMWORD[((192-128))+rdi]
7904 vpand ymm14,ymm14,ymm6
7905 vpaddd ymm11,ymm11,ymm5
7906 vmovdqu ymm5,YMMWORD[((224-128))+rdi]
7907 vpand ymm15,ymm15,ymm6
7908 vpaddd ymm12,ymm12,ymm0
7909 vpaddd ymm13,ymm13,ymm1
7910 vmovdqu YMMWORD[(0-128)+rdi],ymm8
7911 vpaddd ymm14,ymm14,ymm2
7912 vmovdqu YMMWORD[(32-128)+rdi],ymm9
7913 vpaddd ymm15,ymm15,ymm5
7914 vmovdqu YMMWORD[(64-128)+rdi],ymm10
7915 vmovdqu YMMWORD[(96-128)+rdi],ymm11
7916 vmovdqu YMMWORD[(128-128)+rdi],ymm12
7917 vmovdqu YMMWORD[(160-128)+rdi],ymm13
7918 vmovdqu YMMWORD[(192-128)+rdi],ymm14
7919 vmovdqu YMMWORD[(224-128)+rdi],ymm15
7920
7921 vmovdqu YMMWORD[rbx],ymm7
7922 lea rbx,[((256+128))+rsp]
7923 vmovdqu ymm6,YMMWORD[$L$pbswap]
7924 dec edx
7925 jnz NEAR $L$oop_avx2
7926
7927
7928
7929
7930
7931
7932
7933$L$done_avx2:
7934 mov rax,QWORD[544+rsp]
7935
7936 vzeroupper
7937 movaps xmm6,XMMWORD[((-216))+rax]
7938 movaps xmm7,XMMWORD[((-200))+rax]
7939 movaps xmm8,XMMWORD[((-184))+rax]
7940 movaps xmm9,XMMWORD[((-168))+rax]
7941 movaps xmm10,XMMWORD[((-152))+rax]
7942 movaps xmm11,XMMWORD[((-136))+rax]
7943 movaps xmm12,XMMWORD[((-120))+rax]
7944 movaps xmm13,XMMWORD[((-104))+rax]
7945 movaps xmm14,XMMWORD[((-88))+rax]
7946 movaps xmm15,XMMWORD[((-72))+rax]
7947 mov r15,QWORD[((-48))+rax]
7948
7949 mov r14,QWORD[((-40))+rax]
7950
7951 mov r13,QWORD[((-32))+rax]
7952
7953 mov r12,QWORD[((-24))+rax]
7954
7955 mov rbp,QWORD[((-16))+rax]
7956
7957 mov rbx,QWORD[((-8))+rax]
7958
7959 lea rsp,[rax]
7960
7961$L$epilogue_avx2:
7962 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
7963 mov rsi,QWORD[16+rsp]
7964 DB 0F3h,0C3h ;repret
7965
7966$L$SEH_end_sha256_multi_block_avx2:
7967ALIGN 256
7968K256:
7969 DD 1116352408,1116352408,1116352408,1116352408
7970 DD 1116352408,1116352408,1116352408,1116352408
7971 DD 1899447441,1899447441,1899447441,1899447441
7972 DD 1899447441,1899447441,1899447441,1899447441
7973 DD 3049323471,3049323471,3049323471,3049323471
7974 DD 3049323471,3049323471,3049323471,3049323471
7975 DD 3921009573,3921009573,3921009573,3921009573
7976 DD 3921009573,3921009573,3921009573,3921009573
7977 DD 961987163,961987163,961987163,961987163
7978 DD 961987163,961987163,961987163,961987163
7979 DD 1508970993,1508970993,1508970993,1508970993
7980 DD 1508970993,1508970993,1508970993,1508970993
7981 DD 2453635748,2453635748,2453635748,2453635748
7982 DD 2453635748,2453635748,2453635748,2453635748
7983 DD 2870763221,2870763221,2870763221,2870763221
7984 DD 2870763221,2870763221,2870763221,2870763221
7985 DD 3624381080,3624381080,3624381080,3624381080
7986 DD 3624381080,3624381080,3624381080,3624381080
7987 DD 310598401,310598401,310598401,310598401
7988 DD 310598401,310598401,310598401,310598401
7989 DD 607225278,607225278,607225278,607225278
7990 DD 607225278,607225278,607225278,607225278
7991 DD 1426881987,1426881987,1426881987,1426881987
7992 DD 1426881987,1426881987,1426881987,1426881987
7993 DD 1925078388,1925078388,1925078388,1925078388
7994 DD 1925078388,1925078388,1925078388,1925078388
7995 DD 2162078206,2162078206,2162078206,2162078206
7996 DD 2162078206,2162078206,2162078206,2162078206
7997 DD 2614888103,2614888103,2614888103,2614888103
7998 DD 2614888103,2614888103,2614888103,2614888103
7999 DD 3248222580,3248222580,3248222580,3248222580
8000 DD 3248222580,3248222580,3248222580,3248222580
8001 DD 3835390401,3835390401,3835390401,3835390401
8002 DD 3835390401,3835390401,3835390401,3835390401
8003 DD 4022224774,4022224774,4022224774,4022224774
8004 DD 4022224774,4022224774,4022224774,4022224774
8005 DD 264347078,264347078,264347078,264347078
8006 DD 264347078,264347078,264347078,264347078
8007 DD 604807628,604807628,604807628,604807628
8008 DD 604807628,604807628,604807628,604807628
8009 DD 770255983,770255983,770255983,770255983
8010 DD 770255983,770255983,770255983,770255983
8011 DD 1249150122,1249150122,1249150122,1249150122
8012 DD 1249150122,1249150122,1249150122,1249150122
8013 DD 1555081692,1555081692,1555081692,1555081692
8014 DD 1555081692,1555081692,1555081692,1555081692
8015 DD 1996064986,1996064986,1996064986,1996064986
8016 DD 1996064986,1996064986,1996064986,1996064986
8017 DD 2554220882,2554220882,2554220882,2554220882
8018 DD 2554220882,2554220882,2554220882,2554220882
8019 DD 2821834349,2821834349,2821834349,2821834349
8020 DD 2821834349,2821834349,2821834349,2821834349
8021 DD 2952996808,2952996808,2952996808,2952996808
8022 DD 2952996808,2952996808,2952996808,2952996808
8023 DD 3210313671,3210313671,3210313671,3210313671
8024 DD 3210313671,3210313671,3210313671,3210313671
8025 DD 3336571891,3336571891,3336571891,3336571891
8026 DD 3336571891,3336571891,3336571891,3336571891
8027 DD 3584528711,3584528711,3584528711,3584528711
8028 DD 3584528711,3584528711,3584528711,3584528711
8029 DD 113926993,113926993,113926993,113926993
8030 DD 113926993,113926993,113926993,113926993
8031 DD 338241895,338241895,338241895,338241895
8032 DD 338241895,338241895,338241895,338241895
8033 DD 666307205,666307205,666307205,666307205
8034 DD 666307205,666307205,666307205,666307205
8035 DD 773529912,773529912,773529912,773529912
8036 DD 773529912,773529912,773529912,773529912
8037 DD 1294757372,1294757372,1294757372,1294757372
8038 DD 1294757372,1294757372,1294757372,1294757372
8039 DD 1396182291,1396182291,1396182291,1396182291
8040 DD 1396182291,1396182291,1396182291,1396182291
8041 DD 1695183700,1695183700,1695183700,1695183700
8042 DD 1695183700,1695183700,1695183700,1695183700
8043 DD 1986661051,1986661051,1986661051,1986661051
8044 DD 1986661051,1986661051,1986661051,1986661051
8045 DD 2177026350,2177026350,2177026350,2177026350
8046 DD 2177026350,2177026350,2177026350,2177026350
8047 DD 2456956037,2456956037,2456956037,2456956037
8048 DD 2456956037,2456956037,2456956037,2456956037
8049 DD 2730485921,2730485921,2730485921,2730485921
8050 DD 2730485921,2730485921,2730485921,2730485921
8051 DD 2820302411,2820302411,2820302411,2820302411
8052 DD 2820302411,2820302411,2820302411,2820302411
8053 DD 3259730800,3259730800,3259730800,3259730800
8054 DD 3259730800,3259730800,3259730800,3259730800
8055 DD 3345764771,3345764771,3345764771,3345764771
8056 DD 3345764771,3345764771,3345764771,3345764771
8057 DD 3516065817,3516065817,3516065817,3516065817
8058 DD 3516065817,3516065817,3516065817,3516065817
8059 DD 3600352804,3600352804,3600352804,3600352804
8060 DD 3600352804,3600352804,3600352804,3600352804
8061 DD 4094571909,4094571909,4094571909,4094571909
8062 DD 4094571909,4094571909,4094571909,4094571909
8063 DD 275423344,275423344,275423344,275423344
8064 DD 275423344,275423344,275423344,275423344
8065 DD 430227734,430227734,430227734,430227734
8066 DD 430227734,430227734,430227734,430227734
8067 DD 506948616,506948616,506948616,506948616
8068 DD 506948616,506948616,506948616,506948616
8069 DD 659060556,659060556,659060556,659060556
8070 DD 659060556,659060556,659060556,659060556
8071 DD 883997877,883997877,883997877,883997877
8072 DD 883997877,883997877,883997877,883997877
8073 DD 958139571,958139571,958139571,958139571
8074 DD 958139571,958139571,958139571,958139571
8075 DD 1322822218,1322822218,1322822218,1322822218
8076 DD 1322822218,1322822218,1322822218,1322822218
8077 DD 1537002063,1537002063,1537002063,1537002063
8078 DD 1537002063,1537002063,1537002063,1537002063
8079 DD 1747873779,1747873779,1747873779,1747873779
8080 DD 1747873779,1747873779,1747873779,1747873779
8081 DD 1955562222,1955562222,1955562222,1955562222
8082 DD 1955562222,1955562222,1955562222,1955562222
8083 DD 2024104815,2024104815,2024104815,2024104815
8084 DD 2024104815,2024104815,2024104815,2024104815
8085 DD 2227730452,2227730452,2227730452,2227730452
8086 DD 2227730452,2227730452,2227730452,2227730452
8087 DD 2361852424,2361852424,2361852424,2361852424
8088 DD 2361852424,2361852424,2361852424,2361852424
8089 DD 2428436474,2428436474,2428436474,2428436474
8090 DD 2428436474,2428436474,2428436474,2428436474
8091 DD 2756734187,2756734187,2756734187,2756734187
8092 DD 2756734187,2756734187,2756734187,2756734187
8093 DD 3204031479,3204031479,3204031479,3204031479
8094 DD 3204031479,3204031479,3204031479,3204031479
8095 DD 3329325298,3329325298,3329325298,3329325298
8096 DD 3329325298,3329325298,3329325298,3329325298
8097$L$pbswap:
8098 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
8099 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
8100K256_shaext:
8101 DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
8102 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
8103 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
8104 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
8105 DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
8106 DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
8107 DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
8108 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
8109 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
8110 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
8111 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
8112 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
8113 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
8114 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
8115 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
8116 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
8117DB 83,72,65,50,53,54,32,109,117,108,116,105,45,98,108,111
8118DB 99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114
8119DB 32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71
8120DB 65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112
8121DB 101,110,115,115,108,46,111,114,103,62,0
8122EXTERN __imp_RtlVirtualUnwind
8123
8124ALIGN 16
8125se_handler:
8126 push rsi
8127 push rdi
8128 push rbx
8129 push rbp
8130 push r12
8131 push r13
8132 push r14
8133 push r15
8134 pushfq
8135 sub rsp,64
8136
8137 mov rax,QWORD[120+r8]
8138 mov rbx,QWORD[248+r8]
8139
8140 mov rsi,QWORD[8+r9]
8141 mov r11,QWORD[56+r9]
8142
8143 mov r10d,DWORD[r11]
8144 lea r10,[r10*1+rsi]
8145 cmp rbx,r10
8146 jb NEAR $L$in_prologue
8147
8148 mov rax,QWORD[152+r8]
8149
8150 mov r10d,DWORD[4+r11]
8151 lea r10,[r10*1+rsi]
8152 cmp rbx,r10
8153 jae NEAR $L$in_prologue
8154
8155 mov rax,QWORD[272+rax]
8156
8157 mov rbx,QWORD[((-8))+rax]
8158 mov rbp,QWORD[((-16))+rax]
8159 mov QWORD[144+r8],rbx
8160 mov QWORD[160+r8],rbp
8161
8162 lea rsi,[((-24-160))+rax]
8163 lea rdi,[512+r8]
8164 mov ecx,20
8165 DD 0xa548f3fc
8166
8167$L$in_prologue:
8168 mov rdi,QWORD[8+rax]
8169 mov rsi,QWORD[16+rax]
8170 mov QWORD[152+r8],rax
8171 mov QWORD[168+r8],rsi
8172 mov QWORD[176+r8],rdi
8173
8174 mov rdi,QWORD[40+r9]
8175 mov rsi,r8
8176 mov ecx,154
8177 DD 0xa548f3fc
8178
8179 mov rsi,r9
8180 xor rcx,rcx
8181 mov rdx,QWORD[8+rsi]
8182 mov r8,QWORD[rsi]
8183 mov r9,QWORD[16+rsi]
8184 mov r10,QWORD[40+rsi]
8185 lea r11,[56+rsi]
8186 lea r12,[24+rsi]
8187 mov QWORD[32+rsp],r10
8188 mov QWORD[40+rsp],r11
8189 mov QWORD[48+rsp],r12
8190 mov QWORD[56+rsp],rcx
8191 call QWORD[__imp_RtlVirtualUnwind]
8192
8193 mov eax,1
8194 add rsp,64
8195 popfq
8196 pop r15
8197 pop r14
8198 pop r13
8199 pop r12
8200 pop rbp
8201 pop rbx
8202 pop rdi
8203 pop rsi
8204 DB 0F3h,0C3h ;repret
8205
8206
8207ALIGN 16
8208avx2_handler:
8209 push rsi
8210 push rdi
8211 push rbx
8212 push rbp
8213 push r12
8214 push r13
8215 push r14
8216 push r15
8217 pushfq
8218 sub rsp,64
8219
8220 mov rax,QWORD[120+r8]
8221 mov rbx,QWORD[248+r8]
8222
8223 mov rsi,QWORD[8+r9]
8224 mov r11,QWORD[56+r9]
8225
8226 mov r10d,DWORD[r11]
8227 lea r10,[r10*1+rsi]
8228 cmp rbx,r10
8229 jb NEAR $L$in_prologue
8230
8231 mov rax,QWORD[152+r8]
8232
8233 mov r10d,DWORD[4+r11]
8234 lea r10,[r10*1+rsi]
8235 cmp rbx,r10
8236 jae NEAR $L$in_prologue
8237
8238 mov rax,QWORD[544+r8]
8239
8240 mov rbx,QWORD[((-8))+rax]
8241 mov rbp,QWORD[((-16))+rax]
8242 mov r12,QWORD[((-24))+rax]
8243 mov r13,QWORD[((-32))+rax]
8244 mov r14,QWORD[((-40))+rax]
8245 mov r15,QWORD[((-48))+rax]
8246 mov QWORD[144+r8],rbx
8247 mov QWORD[160+r8],rbp
8248 mov QWORD[216+r8],r12
8249 mov QWORD[224+r8],r13
8250 mov QWORD[232+r8],r14
8251 mov QWORD[240+r8],r15
8252
8253 lea rsi,[((-56-160))+rax]
8254 lea rdi,[512+r8]
8255 mov ecx,20
8256 DD 0xa548f3fc
8257
8258 jmp NEAR $L$in_prologue
8259
8260section .pdata rdata align=4
8261ALIGN 4
8262 DD $L$SEH_begin_sha256_multi_block wrt ..imagebase
8263 DD $L$SEH_end_sha256_multi_block wrt ..imagebase
8264 DD $L$SEH_info_sha256_multi_block wrt ..imagebase
8265 DD $L$SEH_begin_sha256_multi_block_shaext wrt ..imagebase
8266 DD $L$SEH_end_sha256_multi_block_shaext wrt ..imagebase
8267 DD $L$SEH_info_sha256_multi_block_shaext wrt ..imagebase
8268 DD $L$SEH_begin_sha256_multi_block_avx wrt ..imagebase
8269 DD $L$SEH_end_sha256_multi_block_avx wrt ..imagebase
8270 DD $L$SEH_info_sha256_multi_block_avx wrt ..imagebase
8271 DD $L$SEH_begin_sha256_multi_block_avx2 wrt ..imagebase
8272 DD $L$SEH_end_sha256_multi_block_avx2 wrt ..imagebase
8273 DD $L$SEH_info_sha256_multi_block_avx2 wrt ..imagebase
8274section .xdata rdata align=8
8275ALIGN 8
8276$L$SEH_info_sha256_multi_block:
8277DB 9,0,0,0
8278 DD se_handler wrt ..imagebase
8279 DD $L$body wrt ..imagebase,$L$epilogue wrt ..imagebase
8280$L$SEH_info_sha256_multi_block_shaext:
8281DB 9,0,0,0
8282 DD se_handler wrt ..imagebase
8283 DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase
8284$L$SEH_info_sha256_multi_block_avx:
8285DB 9,0,0,0
8286 DD se_handler wrt ..imagebase
8287 DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
8288$L$SEH_info_sha256_multi_block_avx2:
8289DB 9,0,0,0
8290 DD avx2_handler wrt ..imagebase
8291 DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette