VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.0g/crypto/genasm-nasm/sha256-x86_64.S@ 69881

Last change on this file since 69881 was 69881, checked in by vboxsync, 7 years ago

Update OpenSSL to 1.1.0g.
bugref:8070: src/libs maintenance

  • Property svn:eol-style set to native
File size: 88.0 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9global sha256_block_data_order
10
11ALIGN 16
12sha256_block_data_order:
13 mov QWORD[8+rsp],rdi ;WIN64 prologue
14 mov QWORD[16+rsp],rsi
15 mov rax,rsp
16$L$SEH_begin_sha256_block_data_order:
17 mov rdi,rcx
18 mov rsi,rdx
19 mov rdx,r8
20
21
22 lea r11,[OPENSSL_ia32cap_P]
23 mov r9d,DWORD[r11]
24 mov r10d,DWORD[4+r11]
25 mov r11d,DWORD[8+r11]
26 test r11d,536870912
27 jnz NEAR _shaext_shortcut
28 and r11d,296
29 cmp r11d,296
30 je NEAR $L$avx2_shortcut
31 and r9d,1073741824
32 and r10d,268435968
33 or r10d,r9d
34 cmp r10d,1342177792
35 je NEAR $L$avx_shortcut
36 test r10d,512
37 jnz NEAR $L$ssse3_shortcut
38 push rbx
39 push rbp
40 push r12
41 push r13
42 push r14
43 push r15
44 mov r11,rsp
45 shl rdx,4
46 sub rsp,16*4+4*8
47 lea rdx,[rdx*4+rsi]
48 and rsp,-64
49 mov QWORD[((64+0))+rsp],rdi
50 mov QWORD[((64+8))+rsp],rsi
51 mov QWORD[((64+16))+rsp],rdx
52 mov QWORD[((64+24))+rsp],r11
53$L$prologue:
54
55 mov eax,DWORD[rdi]
56 mov ebx,DWORD[4+rdi]
57 mov ecx,DWORD[8+rdi]
58 mov edx,DWORD[12+rdi]
59 mov r8d,DWORD[16+rdi]
60 mov r9d,DWORD[20+rdi]
61 mov r10d,DWORD[24+rdi]
62 mov r11d,DWORD[28+rdi]
63 jmp NEAR $L$loop
64
65ALIGN 16
66$L$loop:
67 mov edi,ebx
68 lea rbp,[K256]
69 xor edi,ecx
70 mov r12d,DWORD[rsi]
71 mov r13d,r8d
72 mov r14d,eax
73 bswap r12d
74 ror r13d,14
75 mov r15d,r9d
76
77 xor r13d,r8d
78 ror r14d,9
79 xor r15d,r10d
80
81 mov DWORD[rsp],r12d
82 xor r14d,eax
83 and r15d,r8d
84
85 ror r13d,5
86 add r12d,r11d
87 xor r15d,r10d
88
89 ror r14d,11
90 xor r13d,r8d
91 add r12d,r15d
92
93 mov r15d,eax
94 add r12d,DWORD[rbp]
95 xor r14d,eax
96
97 xor r15d,ebx
98 ror r13d,6
99 mov r11d,ebx
100
101 and edi,r15d
102 ror r14d,2
103 add r12d,r13d
104
105 xor r11d,edi
106 add edx,r12d
107 add r11d,r12d
108
109 lea rbp,[4+rbp]
110 add r11d,r14d
111 mov r12d,DWORD[4+rsi]
112 mov r13d,edx
113 mov r14d,r11d
114 bswap r12d
115 ror r13d,14
116 mov edi,r8d
117
118 xor r13d,edx
119 ror r14d,9
120 xor edi,r9d
121
122 mov DWORD[4+rsp],r12d
123 xor r14d,r11d
124 and edi,edx
125
126 ror r13d,5
127 add r12d,r10d
128 xor edi,r9d
129
130 ror r14d,11
131 xor r13d,edx
132 add r12d,edi
133
134 mov edi,r11d
135 add r12d,DWORD[rbp]
136 xor r14d,r11d
137
138 xor edi,eax
139 ror r13d,6
140 mov r10d,eax
141
142 and r15d,edi
143 ror r14d,2
144 add r12d,r13d
145
146 xor r10d,r15d
147 add ecx,r12d
148 add r10d,r12d
149
150 lea rbp,[4+rbp]
151 add r10d,r14d
152 mov r12d,DWORD[8+rsi]
153 mov r13d,ecx
154 mov r14d,r10d
155 bswap r12d
156 ror r13d,14
157 mov r15d,edx
158
159 xor r13d,ecx
160 ror r14d,9
161 xor r15d,r8d
162
163 mov DWORD[8+rsp],r12d
164 xor r14d,r10d
165 and r15d,ecx
166
167 ror r13d,5
168 add r12d,r9d
169 xor r15d,r8d
170
171 ror r14d,11
172 xor r13d,ecx
173 add r12d,r15d
174
175 mov r15d,r10d
176 add r12d,DWORD[rbp]
177 xor r14d,r10d
178
179 xor r15d,r11d
180 ror r13d,6
181 mov r9d,r11d
182
183 and edi,r15d
184 ror r14d,2
185 add r12d,r13d
186
187 xor r9d,edi
188 add ebx,r12d
189 add r9d,r12d
190
191 lea rbp,[4+rbp]
192 add r9d,r14d
193 mov r12d,DWORD[12+rsi]
194 mov r13d,ebx
195 mov r14d,r9d
196 bswap r12d
197 ror r13d,14
198 mov edi,ecx
199
200 xor r13d,ebx
201 ror r14d,9
202 xor edi,edx
203
204 mov DWORD[12+rsp],r12d
205 xor r14d,r9d
206 and edi,ebx
207
208 ror r13d,5
209 add r12d,r8d
210 xor edi,edx
211
212 ror r14d,11
213 xor r13d,ebx
214 add r12d,edi
215
216 mov edi,r9d
217 add r12d,DWORD[rbp]
218 xor r14d,r9d
219
220 xor edi,r10d
221 ror r13d,6
222 mov r8d,r10d
223
224 and r15d,edi
225 ror r14d,2
226 add r12d,r13d
227
228 xor r8d,r15d
229 add eax,r12d
230 add r8d,r12d
231
232 lea rbp,[20+rbp]
233 add r8d,r14d
234 mov r12d,DWORD[16+rsi]
235 mov r13d,eax
236 mov r14d,r8d
237 bswap r12d
238 ror r13d,14
239 mov r15d,ebx
240
241 xor r13d,eax
242 ror r14d,9
243 xor r15d,ecx
244
245 mov DWORD[16+rsp],r12d
246 xor r14d,r8d
247 and r15d,eax
248
249 ror r13d,5
250 add r12d,edx
251 xor r15d,ecx
252
253 ror r14d,11
254 xor r13d,eax
255 add r12d,r15d
256
257 mov r15d,r8d
258 add r12d,DWORD[rbp]
259 xor r14d,r8d
260
261 xor r15d,r9d
262 ror r13d,6
263 mov edx,r9d
264
265 and edi,r15d
266 ror r14d,2
267 add r12d,r13d
268
269 xor edx,edi
270 add r11d,r12d
271 add edx,r12d
272
273 lea rbp,[4+rbp]
274 add edx,r14d
275 mov r12d,DWORD[20+rsi]
276 mov r13d,r11d
277 mov r14d,edx
278 bswap r12d
279 ror r13d,14
280 mov edi,eax
281
282 xor r13d,r11d
283 ror r14d,9
284 xor edi,ebx
285
286 mov DWORD[20+rsp],r12d
287 xor r14d,edx
288 and edi,r11d
289
290 ror r13d,5
291 add r12d,ecx
292 xor edi,ebx
293
294 ror r14d,11
295 xor r13d,r11d
296 add r12d,edi
297
298 mov edi,edx
299 add r12d,DWORD[rbp]
300 xor r14d,edx
301
302 xor edi,r8d
303 ror r13d,6
304 mov ecx,r8d
305
306 and r15d,edi
307 ror r14d,2
308 add r12d,r13d
309
310 xor ecx,r15d
311 add r10d,r12d
312 add ecx,r12d
313
314 lea rbp,[4+rbp]
315 add ecx,r14d
316 mov r12d,DWORD[24+rsi]
317 mov r13d,r10d
318 mov r14d,ecx
319 bswap r12d
320 ror r13d,14
321 mov r15d,r11d
322
323 xor r13d,r10d
324 ror r14d,9
325 xor r15d,eax
326
327 mov DWORD[24+rsp],r12d
328 xor r14d,ecx
329 and r15d,r10d
330
331 ror r13d,5
332 add r12d,ebx
333 xor r15d,eax
334
335 ror r14d,11
336 xor r13d,r10d
337 add r12d,r15d
338
339 mov r15d,ecx
340 add r12d,DWORD[rbp]
341 xor r14d,ecx
342
343 xor r15d,edx
344 ror r13d,6
345 mov ebx,edx
346
347 and edi,r15d
348 ror r14d,2
349 add r12d,r13d
350
351 xor ebx,edi
352 add r9d,r12d
353 add ebx,r12d
354
355 lea rbp,[4+rbp]
356 add ebx,r14d
357 mov r12d,DWORD[28+rsi]
358 mov r13d,r9d
359 mov r14d,ebx
360 bswap r12d
361 ror r13d,14
362 mov edi,r10d
363
364 xor r13d,r9d
365 ror r14d,9
366 xor edi,r11d
367
368 mov DWORD[28+rsp],r12d
369 xor r14d,ebx
370 and edi,r9d
371
372 ror r13d,5
373 add r12d,eax
374 xor edi,r11d
375
376 ror r14d,11
377 xor r13d,r9d
378 add r12d,edi
379
380 mov edi,ebx
381 add r12d,DWORD[rbp]
382 xor r14d,ebx
383
384 xor edi,ecx
385 ror r13d,6
386 mov eax,ecx
387
388 and r15d,edi
389 ror r14d,2
390 add r12d,r13d
391
392 xor eax,r15d
393 add r8d,r12d
394 add eax,r12d
395
396 lea rbp,[20+rbp]
397 add eax,r14d
398 mov r12d,DWORD[32+rsi]
399 mov r13d,r8d
400 mov r14d,eax
401 bswap r12d
402 ror r13d,14
403 mov r15d,r9d
404
405 xor r13d,r8d
406 ror r14d,9
407 xor r15d,r10d
408
409 mov DWORD[32+rsp],r12d
410 xor r14d,eax
411 and r15d,r8d
412
413 ror r13d,5
414 add r12d,r11d
415 xor r15d,r10d
416
417 ror r14d,11
418 xor r13d,r8d
419 add r12d,r15d
420
421 mov r15d,eax
422 add r12d,DWORD[rbp]
423 xor r14d,eax
424
425 xor r15d,ebx
426 ror r13d,6
427 mov r11d,ebx
428
429 and edi,r15d
430 ror r14d,2
431 add r12d,r13d
432
433 xor r11d,edi
434 add edx,r12d
435 add r11d,r12d
436
437 lea rbp,[4+rbp]
438 add r11d,r14d
439 mov r12d,DWORD[36+rsi]
440 mov r13d,edx
441 mov r14d,r11d
442 bswap r12d
443 ror r13d,14
444 mov edi,r8d
445
446 xor r13d,edx
447 ror r14d,9
448 xor edi,r9d
449
450 mov DWORD[36+rsp],r12d
451 xor r14d,r11d
452 and edi,edx
453
454 ror r13d,5
455 add r12d,r10d
456 xor edi,r9d
457
458 ror r14d,11
459 xor r13d,edx
460 add r12d,edi
461
462 mov edi,r11d
463 add r12d,DWORD[rbp]
464 xor r14d,r11d
465
466 xor edi,eax
467 ror r13d,6
468 mov r10d,eax
469
470 and r15d,edi
471 ror r14d,2
472 add r12d,r13d
473
474 xor r10d,r15d
475 add ecx,r12d
476 add r10d,r12d
477
478 lea rbp,[4+rbp]
479 add r10d,r14d
480 mov r12d,DWORD[40+rsi]
481 mov r13d,ecx
482 mov r14d,r10d
483 bswap r12d
484 ror r13d,14
485 mov r15d,edx
486
487 xor r13d,ecx
488 ror r14d,9
489 xor r15d,r8d
490
491 mov DWORD[40+rsp],r12d
492 xor r14d,r10d
493 and r15d,ecx
494
495 ror r13d,5
496 add r12d,r9d
497 xor r15d,r8d
498
499 ror r14d,11
500 xor r13d,ecx
501 add r12d,r15d
502
503 mov r15d,r10d
504 add r12d,DWORD[rbp]
505 xor r14d,r10d
506
507 xor r15d,r11d
508 ror r13d,6
509 mov r9d,r11d
510
511 and edi,r15d
512 ror r14d,2
513 add r12d,r13d
514
515 xor r9d,edi
516 add ebx,r12d
517 add r9d,r12d
518
519 lea rbp,[4+rbp]
520 add r9d,r14d
521 mov r12d,DWORD[44+rsi]
522 mov r13d,ebx
523 mov r14d,r9d
524 bswap r12d
525 ror r13d,14
526 mov edi,ecx
527
528 xor r13d,ebx
529 ror r14d,9
530 xor edi,edx
531
532 mov DWORD[44+rsp],r12d
533 xor r14d,r9d
534 and edi,ebx
535
536 ror r13d,5
537 add r12d,r8d
538 xor edi,edx
539
540 ror r14d,11
541 xor r13d,ebx
542 add r12d,edi
543
544 mov edi,r9d
545 add r12d,DWORD[rbp]
546 xor r14d,r9d
547
548 xor edi,r10d
549 ror r13d,6
550 mov r8d,r10d
551
552 and r15d,edi
553 ror r14d,2
554 add r12d,r13d
555
556 xor r8d,r15d
557 add eax,r12d
558 add r8d,r12d
559
560 lea rbp,[20+rbp]
561 add r8d,r14d
562 mov r12d,DWORD[48+rsi]
563 mov r13d,eax
564 mov r14d,r8d
565 bswap r12d
566 ror r13d,14
567 mov r15d,ebx
568
569 xor r13d,eax
570 ror r14d,9
571 xor r15d,ecx
572
573 mov DWORD[48+rsp],r12d
574 xor r14d,r8d
575 and r15d,eax
576
577 ror r13d,5
578 add r12d,edx
579 xor r15d,ecx
580
581 ror r14d,11
582 xor r13d,eax
583 add r12d,r15d
584
585 mov r15d,r8d
586 add r12d,DWORD[rbp]
587 xor r14d,r8d
588
589 xor r15d,r9d
590 ror r13d,6
591 mov edx,r9d
592
593 and edi,r15d
594 ror r14d,2
595 add r12d,r13d
596
597 xor edx,edi
598 add r11d,r12d
599 add edx,r12d
600
601 lea rbp,[4+rbp]
602 add edx,r14d
603 mov r12d,DWORD[52+rsi]
604 mov r13d,r11d
605 mov r14d,edx
606 bswap r12d
607 ror r13d,14
608 mov edi,eax
609
610 xor r13d,r11d
611 ror r14d,9
612 xor edi,ebx
613
614 mov DWORD[52+rsp],r12d
615 xor r14d,edx
616 and edi,r11d
617
618 ror r13d,5
619 add r12d,ecx
620 xor edi,ebx
621
622 ror r14d,11
623 xor r13d,r11d
624 add r12d,edi
625
626 mov edi,edx
627 add r12d,DWORD[rbp]
628 xor r14d,edx
629
630 xor edi,r8d
631 ror r13d,6
632 mov ecx,r8d
633
634 and r15d,edi
635 ror r14d,2
636 add r12d,r13d
637
638 xor ecx,r15d
639 add r10d,r12d
640 add ecx,r12d
641
642 lea rbp,[4+rbp]
643 add ecx,r14d
644 mov r12d,DWORD[56+rsi]
645 mov r13d,r10d
646 mov r14d,ecx
647 bswap r12d
648 ror r13d,14
649 mov r15d,r11d
650
651 xor r13d,r10d
652 ror r14d,9
653 xor r15d,eax
654
655 mov DWORD[56+rsp],r12d
656 xor r14d,ecx
657 and r15d,r10d
658
659 ror r13d,5
660 add r12d,ebx
661 xor r15d,eax
662
663 ror r14d,11
664 xor r13d,r10d
665 add r12d,r15d
666
667 mov r15d,ecx
668 add r12d,DWORD[rbp]
669 xor r14d,ecx
670
671 xor r15d,edx
672 ror r13d,6
673 mov ebx,edx
674
675 and edi,r15d
676 ror r14d,2
677 add r12d,r13d
678
679 xor ebx,edi
680 add r9d,r12d
681 add ebx,r12d
682
683 lea rbp,[4+rbp]
684 add ebx,r14d
685 mov r12d,DWORD[60+rsi]
686 mov r13d,r9d
687 mov r14d,ebx
688 bswap r12d
689 ror r13d,14
690 mov edi,r10d
691
692 xor r13d,r9d
693 ror r14d,9
694 xor edi,r11d
695
696 mov DWORD[60+rsp],r12d
697 xor r14d,ebx
698 and edi,r9d
699
700 ror r13d,5
701 add r12d,eax
702 xor edi,r11d
703
704 ror r14d,11
705 xor r13d,r9d
706 add r12d,edi
707
708 mov edi,ebx
709 add r12d,DWORD[rbp]
710 xor r14d,ebx
711
712 xor edi,ecx
713 ror r13d,6
714 mov eax,ecx
715
716 and r15d,edi
717 ror r14d,2
718 add r12d,r13d
719
720 xor eax,r15d
721 add r8d,r12d
722 add eax,r12d
723
724 lea rbp,[20+rbp]
725 jmp NEAR $L$rounds_16_xx
726ALIGN 16
727$L$rounds_16_xx:
728 mov r13d,DWORD[4+rsp]
729 mov r15d,DWORD[56+rsp]
730
731 mov r12d,r13d
732 ror r13d,11
733 add eax,r14d
734 mov r14d,r15d
735 ror r15d,2
736
737 xor r13d,r12d
738 shr r12d,3
739 ror r13d,7
740 xor r15d,r14d
741 shr r14d,10
742
743 ror r15d,17
744 xor r12d,r13d
745 xor r15d,r14d
746 add r12d,DWORD[36+rsp]
747
748 add r12d,DWORD[rsp]
749 mov r13d,r8d
750 add r12d,r15d
751 mov r14d,eax
752 ror r13d,14
753 mov r15d,r9d
754
755 xor r13d,r8d
756 ror r14d,9
757 xor r15d,r10d
758
759 mov DWORD[rsp],r12d
760 xor r14d,eax
761 and r15d,r8d
762
763 ror r13d,5
764 add r12d,r11d
765 xor r15d,r10d
766
767 ror r14d,11
768 xor r13d,r8d
769 add r12d,r15d
770
771 mov r15d,eax
772 add r12d,DWORD[rbp]
773 xor r14d,eax
774
775 xor r15d,ebx
776 ror r13d,6
777 mov r11d,ebx
778
779 and edi,r15d
780 ror r14d,2
781 add r12d,r13d
782
783 xor r11d,edi
784 add edx,r12d
785 add r11d,r12d
786
787 lea rbp,[4+rbp]
788 mov r13d,DWORD[8+rsp]
789 mov edi,DWORD[60+rsp]
790
791 mov r12d,r13d
792 ror r13d,11
793 add r11d,r14d
794 mov r14d,edi
795 ror edi,2
796
797 xor r13d,r12d
798 shr r12d,3
799 ror r13d,7
800 xor edi,r14d
801 shr r14d,10
802
803 ror edi,17
804 xor r12d,r13d
805 xor edi,r14d
806 add r12d,DWORD[40+rsp]
807
808 add r12d,DWORD[4+rsp]
809 mov r13d,edx
810 add r12d,edi
811 mov r14d,r11d
812 ror r13d,14
813 mov edi,r8d
814
815 xor r13d,edx
816 ror r14d,9
817 xor edi,r9d
818
819 mov DWORD[4+rsp],r12d
820 xor r14d,r11d
821 and edi,edx
822
823 ror r13d,5
824 add r12d,r10d
825 xor edi,r9d
826
827 ror r14d,11
828 xor r13d,edx
829 add r12d,edi
830
831 mov edi,r11d
832 add r12d,DWORD[rbp]
833 xor r14d,r11d
834
835 xor edi,eax
836 ror r13d,6
837 mov r10d,eax
838
839 and r15d,edi
840 ror r14d,2
841 add r12d,r13d
842
843 xor r10d,r15d
844 add ecx,r12d
845 add r10d,r12d
846
847 lea rbp,[4+rbp]
848 mov r13d,DWORD[12+rsp]
849 mov r15d,DWORD[rsp]
850
851 mov r12d,r13d
852 ror r13d,11
853 add r10d,r14d
854 mov r14d,r15d
855 ror r15d,2
856
857 xor r13d,r12d
858 shr r12d,3
859 ror r13d,7
860 xor r15d,r14d
861 shr r14d,10
862
863 ror r15d,17
864 xor r12d,r13d
865 xor r15d,r14d
866 add r12d,DWORD[44+rsp]
867
868 add r12d,DWORD[8+rsp]
869 mov r13d,ecx
870 add r12d,r15d
871 mov r14d,r10d
872 ror r13d,14
873 mov r15d,edx
874
875 xor r13d,ecx
876 ror r14d,9
877 xor r15d,r8d
878
879 mov DWORD[8+rsp],r12d
880 xor r14d,r10d
881 and r15d,ecx
882
883 ror r13d,5
884 add r12d,r9d
885 xor r15d,r8d
886
887 ror r14d,11
888 xor r13d,ecx
889 add r12d,r15d
890
891 mov r15d,r10d
892 add r12d,DWORD[rbp]
893 xor r14d,r10d
894
895 xor r15d,r11d
896 ror r13d,6
897 mov r9d,r11d
898
899 and edi,r15d
900 ror r14d,2
901 add r12d,r13d
902
903 xor r9d,edi
904 add ebx,r12d
905 add r9d,r12d
906
907 lea rbp,[4+rbp]
908 mov r13d,DWORD[16+rsp]
909 mov edi,DWORD[4+rsp]
910
911 mov r12d,r13d
912 ror r13d,11
913 add r9d,r14d
914 mov r14d,edi
915 ror edi,2
916
917 xor r13d,r12d
918 shr r12d,3
919 ror r13d,7
920 xor edi,r14d
921 shr r14d,10
922
923 ror edi,17
924 xor r12d,r13d
925 xor edi,r14d
926 add r12d,DWORD[48+rsp]
927
928 add r12d,DWORD[12+rsp]
929 mov r13d,ebx
930 add r12d,edi
931 mov r14d,r9d
932 ror r13d,14
933 mov edi,ecx
934
935 xor r13d,ebx
936 ror r14d,9
937 xor edi,edx
938
939 mov DWORD[12+rsp],r12d
940 xor r14d,r9d
941 and edi,ebx
942
943 ror r13d,5
944 add r12d,r8d
945 xor edi,edx
946
947 ror r14d,11
948 xor r13d,ebx
949 add r12d,edi
950
951 mov edi,r9d
952 add r12d,DWORD[rbp]
953 xor r14d,r9d
954
955 xor edi,r10d
956 ror r13d,6
957 mov r8d,r10d
958
959 and r15d,edi
960 ror r14d,2
961 add r12d,r13d
962
963 xor r8d,r15d
964 add eax,r12d
965 add r8d,r12d
966
967 lea rbp,[20+rbp]
968 mov r13d,DWORD[20+rsp]
969 mov r15d,DWORD[8+rsp]
970
971 mov r12d,r13d
972 ror r13d,11
973 add r8d,r14d
974 mov r14d,r15d
975 ror r15d,2
976
977 xor r13d,r12d
978 shr r12d,3
979 ror r13d,7
980 xor r15d,r14d
981 shr r14d,10
982
983 ror r15d,17
984 xor r12d,r13d
985 xor r15d,r14d
986 add r12d,DWORD[52+rsp]
987
988 add r12d,DWORD[16+rsp]
989 mov r13d,eax
990 add r12d,r15d
991 mov r14d,r8d
992 ror r13d,14
993 mov r15d,ebx
994
995 xor r13d,eax
996 ror r14d,9
997 xor r15d,ecx
998
999 mov DWORD[16+rsp],r12d
1000 xor r14d,r8d
1001 and r15d,eax
1002
1003 ror r13d,5
1004 add r12d,edx
1005 xor r15d,ecx
1006
1007 ror r14d,11
1008 xor r13d,eax
1009 add r12d,r15d
1010
1011 mov r15d,r8d
1012 add r12d,DWORD[rbp]
1013 xor r14d,r8d
1014
1015 xor r15d,r9d
1016 ror r13d,6
1017 mov edx,r9d
1018
1019 and edi,r15d
1020 ror r14d,2
1021 add r12d,r13d
1022
1023 xor edx,edi
1024 add r11d,r12d
1025 add edx,r12d
1026
1027 lea rbp,[4+rbp]
1028 mov r13d,DWORD[24+rsp]
1029 mov edi,DWORD[12+rsp]
1030
1031 mov r12d,r13d
1032 ror r13d,11
1033 add edx,r14d
1034 mov r14d,edi
1035 ror edi,2
1036
1037 xor r13d,r12d
1038 shr r12d,3
1039 ror r13d,7
1040 xor edi,r14d
1041 shr r14d,10
1042
1043 ror edi,17
1044 xor r12d,r13d
1045 xor edi,r14d
1046 add r12d,DWORD[56+rsp]
1047
1048 add r12d,DWORD[20+rsp]
1049 mov r13d,r11d
1050 add r12d,edi
1051 mov r14d,edx
1052 ror r13d,14
1053 mov edi,eax
1054
1055 xor r13d,r11d
1056 ror r14d,9
1057 xor edi,ebx
1058
1059 mov DWORD[20+rsp],r12d
1060 xor r14d,edx
1061 and edi,r11d
1062
1063 ror r13d,5
1064 add r12d,ecx
1065 xor edi,ebx
1066
1067 ror r14d,11
1068 xor r13d,r11d
1069 add r12d,edi
1070
1071 mov edi,edx
1072 add r12d,DWORD[rbp]
1073 xor r14d,edx
1074
1075 xor edi,r8d
1076 ror r13d,6
1077 mov ecx,r8d
1078
1079 and r15d,edi
1080 ror r14d,2
1081 add r12d,r13d
1082
1083 xor ecx,r15d
1084 add r10d,r12d
1085 add ecx,r12d
1086
1087 lea rbp,[4+rbp]
1088 mov r13d,DWORD[28+rsp]
1089 mov r15d,DWORD[16+rsp]
1090
1091 mov r12d,r13d
1092 ror r13d,11
1093 add ecx,r14d
1094 mov r14d,r15d
1095 ror r15d,2
1096
1097 xor r13d,r12d
1098 shr r12d,3
1099 ror r13d,7
1100 xor r15d,r14d
1101 shr r14d,10
1102
1103 ror r15d,17
1104 xor r12d,r13d
1105 xor r15d,r14d
1106 add r12d,DWORD[60+rsp]
1107
1108 add r12d,DWORD[24+rsp]
1109 mov r13d,r10d
1110 add r12d,r15d
1111 mov r14d,ecx
1112 ror r13d,14
1113 mov r15d,r11d
1114
1115 xor r13d,r10d
1116 ror r14d,9
1117 xor r15d,eax
1118
1119 mov DWORD[24+rsp],r12d
1120 xor r14d,ecx
1121 and r15d,r10d
1122
1123 ror r13d,5
1124 add r12d,ebx
1125 xor r15d,eax
1126
1127 ror r14d,11
1128 xor r13d,r10d
1129 add r12d,r15d
1130
1131 mov r15d,ecx
1132 add r12d,DWORD[rbp]
1133 xor r14d,ecx
1134
1135 xor r15d,edx
1136 ror r13d,6
1137 mov ebx,edx
1138
1139 and edi,r15d
1140 ror r14d,2
1141 add r12d,r13d
1142
1143 xor ebx,edi
1144 add r9d,r12d
1145 add ebx,r12d
1146
1147 lea rbp,[4+rbp]
1148 mov r13d,DWORD[32+rsp]
1149 mov edi,DWORD[20+rsp]
1150
1151 mov r12d,r13d
1152 ror r13d,11
1153 add ebx,r14d
1154 mov r14d,edi
1155 ror edi,2
1156
1157 xor r13d,r12d
1158 shr r12d,3
1159 ror r13d,7
1160 xor edi,r14d
1161 shr r14d,10
1162
1163 ror edi,17
1164 xor r12d,r13d
1165 xor edi,r14d
1166 add r12d,DWORD[rsp]
1167
1168 add r12d,DWORD[28+rsp]
1169 mov r13d,r9d
1170 add r12d,edi
1171 mov r14d,ebx
1172 ror r13d,14
1173 mov edi,r10d
1174
1175 xor r13d,r9d
1176 ror r14d,9
1177 xor edi,r11d
1178
1179 mov DWORD[28+rsp],r12d
1180 xor r14d,ebx
1181 and edi,r9d
1182
1183 ror r13d,5
1184 add r12d,eax
1185 xor edi,r11d
1186
1187 ror r14d,11
1188 xor r13d,r9d
1189 add r12d,edi
1190
1191 mov edi,ebx
1192 add r12d,DWORD[rbp]
1193 xor r14d,ebx
1194
1195 xor edi,ecx
1196 ror r13d,6
1197 mov eax,ecx
1198
1199 and r15d,edi
1200 ror r14d,2
1201 add r12d,r13d
1202
1203 xor eax,r15d
1204 add r8d,r12d
1205 add eax,r12d
1206
1207 lea rbp,[20+rbp]
1208 mov r13d,DWORD[36+rsp]
1209 mov r15d,DWORD[24+rsp]
1210
1211 mov r12d,r13d
1212 ror r13d,11
1213 add eax,r14d
1214 mov r14d,r15d
1215 ror r15d,2
1216
1217 xor r13d,r12d
1218 shr r12d,3
1219 ror r13d,7
1220 xor r15d,r14d
1221 shr r14d,10
1222
1223 ror r15d,17
1224 xor r12d,r13d
1225 xor r15d,r14d
1226 add r12d,DWORD[4+rsp]
1227
1228 add r12d,DWORD[32+rsp]
1229 mov r13d,r8d
1230 add r12d,r15d
1231 mov r14d,eax
1232 ror r13d,14
1233 mov r15d,r9d
1234
1235 xor r13d,r8d
1236 ror r14d,9
1237 xor r15d,r10d
1238
1239 mov DWORD[32+rsp],r12d
1240 xor r14d,eax
1241 and r15d,r8d
1242
1243 ror r13d,5
1244 add r12d,r11d
1245 xor r15d,r10d
1246
1247 ror r14d,11
1248 xor r13d,r8d
1249 add r12d,r15d
1250
1251 mov r15d,eax
1252 add r12d,DWORD[rbp]
1253 xor r14d,eax
1254
1255 xor r15d,ebx
1256 ror r13d,6
1257 mov r11d,ebx
1258
1259 and edi,r15d
1260 ror r14d,2
1261 add r12d,r13d
1262
1263 xor r11d,edi
1264 add edx,r12d
1265 add r11d,r12d
1266
1267 lea rbp,[4+rbp]
1268 mov r13d,DWORD[40+rsp]
1269 mov edi,DWORD[28+rsp]
1270
1271 mov r12d,r13d
1272 ror r13d,11
1273 add r11d,r14d
1274 mov r14d,edi
1275 ror edi,2
1276
1277 xor r13d,r12d
1278 shr r12d,3
1279 ror r13d,7
1280 xor edi,r14d
1281 shr r14d,10
1282
1283 ror edi,17
1284 xor r12d,r13d
1285 xor edi,r14d
1286 add r12d,DWORD[8+rsp]
1287
1288 add r12d,DWORD[36+rsp]
1289 mov r13d,edx
1290 add r12d,edi
1291 mov r14d,r11d
1292 ror r13d,14
1293 mov edi,r8d
1294
1295 xor r13d,edx
1296 ror r14d,9
1297 xor edi,r9d
1298
1299 mov DWORD[36+rsp],r12d
1300 xor r14d,r11d
1301 and edi,edx
1302
1303 ror r13d,5
1304 add r12d,r10d
1305 xor edi,r9d
1306
1307 ror r14d,11
1308 xor r13d,edx
1309 add r12d,edi
1310
1311 mov edi,r11d
1312 add r12d,DWORD[rbp]
1313 xor r14d,r11d
1314
1315 xor edi,eax
1316 ror r13d,6
1317 mov r10d,eax
1318
1319 and r15d,edi
1320 ror r14d,2
1321 add r12d,r13d
1322
1323 xor r10d,r15d
1324 add ecx,r12d
1325 add r10d,r12d
1326
1327 lea rbp,[4+rbp]
1328 mov r13d,DWORD[44+rsp]
1329 mov r15d,DWORD[32+rsp]
1330
1331 mov r12d,r13d
1332 ror r13d,11
1333 add r10d,r14d
1334 mov r14d,r15d
1335 ror r15d,2
1336
1337 xor r13d,r12d
1338 shr r12d,3
1339 ror r13d,7
1340 xor r15d,r14d
1341 shr r14d,10
1342
1343 ror r15d,17
1344 xor r12d,r13d
1345 xor r15d,r14d
1346 add r12d,DWORD[12+rsp]
1347
1348 add r12d,DWORD[40+rsp]
1349 mov r13d,ecx
1350 add r12d,r15d
1351 mov r14d,r10d
1352 ror r13d,14
1353 mov r15d,edx
1354
1355 xor r13d,ecx
1356 ror r14d,9
1357 xor r15d,r8d
1358
1359 mov DWORD[40+rsp],r12d
1360 xor r14d,r10d
1361 and r15d,ecx
1362
1363 ror r13d,5
1364 add r12d,r9d
1365 xor r15d,r8d
1366
1367 ror r14d,11
1368 xor r13d,ecx
1369 add r12d,r15d
1370
1371 mov r15d,r10d
1372 add r12d,DWORD[rbp]
1373 xor r14d,r10d
1374
1375 xor r15d,r11d
1376 ror r13d,6
1377 mov r9d,r11d
1378
1379 and edi,r15d
1380 ror r14d,2
1381 add r12d,r13d
1382
1383 xor r9d,edi
1384 add ebx,r12d
1385 add r9d,r12d
1386
1387 lea rbp,[4+rbp]
1388 mov r13d,DWORD[48+rsp]
1389 mov edi,DWORD[36+rsp]
1390
1391 mov r12d,r13d
1392 ror r13d,11
1393 add r9d,r14d
1394 mov r14d,edi
1395 ror edi,2
1396
1397 xor r13d,r12d
1398 shr r12d,3
1399 ror r13d,7
1400 xor edi,r14d
1401 shr r14d,10
1402
1403 ror edi,17
1404 xor r12d,r13d
1405 xor edi,r14d
1406 add r12d,DWORD[16+rsp]
1407
1408 add r12d,DWORD[44+rsp]
1409 mov r13d,ebx
1410 add r12d,edi
1411 mov r14d,r9d
1412 ror r13d,14
1413 mov edi,ecx
1414
1415 xor r13d,ebx
1416 ror r14d,9
1417 xor edi,edx
1418
1419 mov DWORD[44+rsp],r12d
1420 xor r14d,r9d
1421 and edi,ebx
1422
1423 ror r13d,5
1424 add r12d,r8d
1425 xor edi,edx
1426
1427 ror r14d,11
1428 xor r13d,ebx
1429 add r12d,edi
1430
1431 mov edi,r9d
1432 add r12d,DWORD[rbp]
1433 xor r14d,r9d
1434
1435 xor edi,r10d
1436 ror r13d,6
1437 mov r8d,r10d
1438
1439 and r15d,edi
1440 ror r14d,2
1441 add r12d,r13d
1442
1443 xor r8d,r15d
1444 add eax,r12d
1445 add r8d,r12d
1446
1447 lea rbp,[20+rbp]
1448 mov r13d,DWORD[52+rsp]
1449 mov r15d,DWORD[40+rsp]
1450
1451 mov r12d,r13d
1452 ror r13d,11
1453 add r8d,r14d
1454 mov r14d,r15d
1455 ror r15d,2
1456
1457 xor r13d,r12d
1458 shr r12d,3
1459 ror r13d,7
1460 xor r15d,r14d
1461 shr r14d,10
1462
1463 ror r15d,17
1464 xor r12d,r13d
1465 xor r15d,r14d
1466 add r12d,DWORD[20+rsp]
1467
1468 add r12d,DWORD[48+rsp]
1469 mov r13d,eax
1470 add r12d,r15d
1471 mov r14d,r8d
1472 ror r13d,14
1473 mov r15d,ebx
1474
1475 xor r13d,eax
1476 ror r14d,9
1477 xor r15d,ecx
1478
1479 mov DWORD[48+rsp],r12d
1480 xor r14d,r8d
1481 and r15d,eax
1482
1483 ror r13d,5
1484 add r12d,edx
1485 xor r15d,ecx
1486
1487 ror r14d,11
1488 xor r13d,eax
1489 add r12d,r15d
1490
1491 mov r15d,r8d
1492 add r12d,DWORD[rbp]
1493 xor r14d,r8d
1494
1495 xor r15d,r9d
1496 ror r13d,6
1497 mov edx,r9d
1498
1499 and edi,r15d
1500 ror r14d,2
1501 add r12d,r13d
1502
1503 xor edx,edi
1504 add r11d,r12d
1505 add edx,r12d
1506
1507 lea rbp,[4+rbp]
1508 mov r13d,DWORD[56+rsp]
1509 mov edi,DWORD[44+rsp]
1510
1511 mov r12d,r13d
1512 ror r13d,11
1513 add edx,r14d
1514 mov r14d,edi
1515 ror edi,2
1516
1517 xor r13d,r12d
1518 shr r12d,3
1519 ror r13d,7
1520 xor edi,r14d
1521 shr r14d,10
1522
1523 ror edi,17
1524 xor r12d,r13d
1525 xor edi,r14d
1526 add r12d,DWORD[24+rsp]
1527
1528 add r12d,DWORD[52+rsp]
1529 mov r13d,r11d
1530 add r12d,edi
1531 mov r14d,edx
1532 ror r13d,14
1533 mov edi,eax
1534
1535 xor r13d,r11d
1536 ror r14d,9
1537 xor edi,ebx
1538
1539 mov DWORD[52+rsp],r12d
1540 xor r14d,edx
1541 and edi,r11d
1542
1543 ror r13d,5
1544 add r12d,ecx
1545 xor edi,ebx
1546
1547 ror r14d,11
1548 xor r13d,r11d
1549 add r12d,edi
1550
1551 mov edi,edx
1552 add r12d,DWORD[rbp]
1553 xor r14d,edx
1554
1555 xor edi,r8d
1556 ror r13d,6
1557 mov ecx,r8d
1558
1559 and r15d,edi
1560 ror r14d,2
1561 add r12d,r13d
1562
1563 xor ecx,r15d
1564 add r10d,r12d
1565 add ecx,r12d
1566
1567 lea rbp,[4+rbp]
1568 mov r13d,DWORD[60+rsp]
1569 mov r15d,DWORD[48+rsp]
1570
1571 mov r12d,r13d
1572 ror r13d,11
1573 add ecx,r14d
1574 mov r14d,r15d
1575 ror r15d,2
1576
1577 xor r13d,r12d
1578 shr r12d,3
1579 ror r13d,7
1580 xor r15d,r14d
1581 shr r14d,10
1582
1583 ror r15d,17
1584 xor r12d,r13d
1585 xor r15d,r14d
1586 add r12d,DWORD[28+rsp]
1587
1588 add r12d,DWORD[56+rsp]
1589 mov r13d,r10d
1590 add r12d,r15d
1591 mov r14d,ecx
1592 ror r13d,14
1593 mov r15d,r11d
1594
1595 xor r13d,r10d
1596 ror r14d,9
1597 xor r15d,eax
1598
1599 mov DWORD[56+rsp],r12d
1600 xor r14d,ecx
1601 and r15d,r10d
1602
1603 ror r13d,5
1604 add r12d,ebx
1605 xor r15d,eax
1606
1607 ror r14d,11
1608 xor r13d,r10d
1609 add r12d,r15d
1610
1611 mov r15d,ecx
1612 add r12d,DWORD[rbp]
1613 xor r14d,ecx
1614
1615 xor r15d,edx
1616 ror r13d,6
1617 mov ebx,edx
1618
1619 and edi,r15d
1620 ror r14d,2
1621 add r12d,r13d
1622
1623 xor ebx,edi
1624 add r9d,r12d
1625 add ebx,r12d
1626
1627 lea rbp,[4+rbp]
1628 mov r13d,DWORD[rsp]
1629 mov edi,DWORD[52+rsp]
1630
1631 mov r12d,r13d
1632 ror r13d,11
1633 add ebx,r14d
1634 mov r14d,edi
1635 ror edi,2
1636
1637 xor r13d,r12d
1638 shr r12d,3
1639 ror r13d,7
1640 xor edi,r14d
1641 shr r14d,10
1642
1643 ror edi,17
1644 xor r12d,r13d
1645 xor edi,r14d
1646 add r12d,DWORD[32+rsp]
1647
1648 add r12d,DWORD[60+rsp]
1649 mov r13d,r9d
1650 add r12d,edi
1651 mov r14d,ebx
1652 ror r13d,14
1653 mov edi,r10d
1654
1655 xor r13d,r9d
1656 ror r14d,9
1657 xor edi,r11d
1658
1659 mov DWORD[60+rsp],r12d
1660 xor r14d,ebx
1661 and edi,r9d
1662
1663 ror r13d,5
1664 add r12d,eax
1665 xor edi,r11d
1666
1667 ror r14d,11
1668 xor r13d,r9d
1669 add r12d,edi
1670
1671 mov edi,ebx
1672 add r12d,DWORD[rbp]
1673 xor r14d,ebx
1674
1675 xor edi,ecx
1676 ror r13d,6
1677 mov eax,ecx
1678
1679 and r15d,edi
1680 ror r14d,2
1681 add r12d,r13d
1682
1683 xor eax,r15d
1684 add r8d,r12d
1685 add eax,r12d
1686
1687 lea rbp,[20+rbp]
1688 cmp BYTE[3+rbp],0
1689 jnz NEAR $L$rounds_16_xx
1690
1691 mov rdi,QWORD[((64+0))+rsp]
1692 add eax,r14d
1693 lea rsi,[64+rsi]
1694
1695 add eax,DWORD[rdi]
1696 add ebx,DWORD[4+rdi]
1697 add ecx,DWORD[8+rdi]
1698 add edx,DWORD[12+rdi]
1699 add r8d,DWORD[16+rdi]
1700 add r9d,DWORD[20+rdi]
1701 add r10d,DWORD[24+rdi]
1702 add r11d,DWORD[28+rdi]
1703
1704 cmp rsi,QWORD[((64+16))+rsp]
1705
1706 mov DWORD[rdi],eax
1707 mov DWORD[4+rdi],ebx
1708 mov DWORD[8+rdi],ecx
1709 mov DWORD[12+rdi],edx
1710 mov DWORD[16+rdi],r8d
1711 mov DWORD[20+rdi],r9d
1712 mov DWORD[24+rdi],r10d
1713 mov DWORD[28+rdi],r11d
1714 jb NEAR $L$loop
1715
1716 mov rsi,QWORD[((64+24))+rsp]
1717 mov r15,QWORD[rsi]
1718 mov r14,QWORD[8+rsi]
1719 mov r13,QWORD[16+rsi]
1720 mov r12,QWORD[24+rsi]
1721 mov rbp,QWORD[32+rsi]
1722 mov rbx,QWORD[40+rsi]
1723 lea rsp,[48+rsi]
1724$L$epilogue:
1725 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1726 mov rsi,QWORD[16+rsp]
1727 DB 0F3h,0C3h ;repret
1728$L$SEH_end_sha256_block_data_order:
1729ALIGN 64
1730
1731K256:
1732 DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
1733 DD 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
1734 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
1735 DD 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
1736 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
1737 DD 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
1738 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
1739 DD 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
1740 DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
1741 DD 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
1742 DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
1743 DD 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
1744 DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
1745 DD 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
1746 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
1747 DD 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
1748 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
1749 DD 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
1750 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
1751 DD 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
1752 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
1753 DD 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
1754 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
1755 DD 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
1756 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
1757 DD 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
1758 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
1759 DD 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
1760 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
1761 DD 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
1762 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
1763 DD 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
1764
1765 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
1766 DD 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
1767 DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
1768 DD 0x03020100,0x0b0a0908,0xffffffff,0xffffffff
1769 DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
1770 DD 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
1771DB 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
1772DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
1773DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
1774DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
1775DB 111,114,103,62,0
1776
1777ALIGN 64
1778sha256_block_data_order_shaext:
1779 mov QWORD[8+rsp],rdi ;WIN64 prologue
1780 mov QWORD[16+rsp],rsi
1781 mov rax,rsp
1782$L$SEH_begin_sha256_block_data_order_shaext:
1783 mov rdi,rcx
1784 mov rsi,rdx
1785 mov rdx,r8
1786
1787
1788_shaext_shortcut:
1789 lea rsp,[((-88))+rsp]
1790 movaps XMMWORD[(-8-80)+rax],xmm6
1791 movaps XMMWORD[(-8-64)+rax],xmm7
1792 movaps XMMWORD[(-8-48)+rax],xmm8
1793 movaps XMMWORD[(-8-32)+rax],xmm9
1794 movaps XMMWORD[(-8-16)+rax],xmm10
1795$L$prologue_shaext:
1796 lea rcx,[((K256+128))]
1797 movdqu xmm1,XMMWORD[rdi]
1798 movdqu xmm2,XMMWORD[16+rdi]
1799 movdqa xmm7,XMMWORD[((512-128))+rcx]
1800
1801 pshufd xmm0,xmm1,0x1b
1802 pshufd xmm1,xmm1,0xb1
1803 pshufd xmm2,xmm2,0x1b
1804 movdqa xmm8,xmm7
1805DB 102,15,58,15,202,8
1806 punpcklqdq xmm2,xmm0
1807 jmp NEAR $L$oop_shaext
1808
1809ALIGN 16
1810$L$oop_shaext:
1811 movdqu xmm3,XMMWORD[rsi]
1812 movdqu xmm4,XMMWORD[16+rsi]
1813 movdqu xmm5,XMMWORD[32+rsi]
1814DB 102,15,56,0,223
1815 movdqu xmm6,XMMWORD[48+rsi]
1816
1817 movdqa xmm0,XMMWORD[((0-128))+rcx]
1818 paddd xmm0,xmm3
1819DB 102,15,56,0,231
1820 movdqa xmm10,xmm2
1821DB 15,56,203,209
1822 pshufd xmm0,xmm0,0x0e
1823 nop
1824 movdqa xmm9,xmm1
1825DB 15,56,203,202
1826
1827 movdqa xmm0,XMMWORD[((32-128))+rcx]
1828 paddd xmm0,xmm4
1829DB 102,15,56,0,239
1830DB 15,56,203,209
1831 pshufd xmm0,xmm0,0x0e
1832 lea rsi,[64+rsi]
1833DB 15,56,204,220
1834DB 15,56,203,202
1835
1836 movdqa xmm0,XMMWORD[((64-128))+rcx]
1837 paddd xmm0,xmm5
1838DB 102,15,56,0,247
1839DB 15,56,203,209
1840 pshufd xmm0,xmm0,0x0e
1841 movdqa xmm7,xmm6
1842DB 102,15,58,15,253,4
1843 nop
1844 paddd xmm3,xmm7
1845DB 15,56,204,229
1846DB 15,56,203,202
1847
1848 movdqa xmm0,XMMWORD[((96-128))+rcx]
1849 paddd xmm0,xmm6
1850DB 15,56,205,222
1851DB 15,56,203,209
1852 pshufd xmm0,xmm0,0x0e
1853 movdqa xmm7,xmm3
1854DB 102,15,58,15,254,4
1855 nop
1856 paddd xmm4,xmm7
1857DB 15,56,204,238
1858DB 15,56,203,202
1859 movdqa xmm0,XMMWORD[((128-128))+rcx]
1860 paddd xmm0,xmm3
1861DB 15,56,205,227
1862DB 15,56,203,209
1863 pshufd xmm0,xmm0,0x0e
1864 movdqa xmm7,xmm4
1865DB 102,15,58,15,251,4
1866 nop
1867 paddd xmm5,xmm7
1868DB 15,56,204,243
1869DB 15,56,203,202
1870 movdqa xmm0,XMMWORD[((160-128))+rcx]
1871 paddd xmm0,xmm4
1872DB 15,56,205,236
1873DB 15,56,203,209
1874 pshufd xmm0,xmm0,0x0e
1875 movdqa xmm7,xmm5
1876DB 102,15,58,15,252,4
1877 nop
1878 paddd xmm6,xmm7
1879DB 15,56,204,220
1880DB 15,56,203,202
1881 movdqa xmm0,XMMWORD[((192-128))+rcx]
1882 paddd xmm0,xmm5
1883DB 15,56,205,245
1884DB 15,56,203,209
1885 pshufd xmm0,xmm0,0x0e
1886 movdqa xmm7,xmm6
1887DB 102,15,58,15,253,4
1888 nop
1889 paddd xmm3,xmm7
1890DB 15,56,204,229
1891DB 15,56,203,202
1892 movdqa xmm0,XMMWORD[((224-128))+rcx]
1893 paddd xmm0,xmm6
1894DB 15,56,205,222
1895DB 15,56,203,209
1896 pshufd xmm0,xmm0,0x0e
1897 movdqa xmm7,xmm3
1898DB 102,15,58,15,254,4
1899 nop
1900 paddd xmm4,xmm7
1901DB 15,56,204,238
1902DB 15,56,203,202
1903 movdqa xmm0,XMMWORD[((256-128))+rcx]
1904 paddd xmm0,xmm3
1905DB 15,56,205,227
1906DB 15,56,203,209
1907 pshufd xmm0,xmm0,0x0e
1908 movdqa xmm7,xmm4
1909DB 102,15,58,15,251,4
1910 nop
1911 paddd xmm5,xmm7
1912DB 15,56,204,243
1913DB 15,56,203,202
1914 movdqa xmm0,XMMWORD[((288-128))+rcx]
1915 paddd xmm0,xmm4
1916DB 15,56,205,236
1917DB 15,56,203,209
1918 pshufd xmm0,xmm0,0x0e
1919 movdqa xmm7,xmm5
1920DB 102,15,58,15,252,4
1921 nop
1922 paddd xmm6,xmm7
1923DB 15,56,204,220
1924DB 15,56,203,202
1925 movdqa xmm0,XMMWORD[((320-128))+rcx]
1926 paddd xmm0,xmm5
1927DB 15,56,205,245
1928DB 15,56,203,209
1929 pshufd xmm0,xmm0,0x0e
1930 movdqa xmm7,xmm6
1931DB 102,15,58,15,253,4
1932 nop
1933 paddd xmm3,xmm7
1934DB 15,56,204,229
1935DB 15,56,203,202
1936 movdqa xmm0,XMMWORD[((352-128))+rcx]
1937 paddd xmm0,xmm6
1938DB 15,56,205,222
1939DB 15,56,203,209
1940 pshufd xmm0,xmm0,0x0e
1941 movdqa xmm7,xmm3
1942DB 102,15,58,15,254,4
1943 nop
1944 paddd xmm4,xmm7
1945DB 15,56,204,238
1946DB 15,56,203,202
1947 movdqa xmm0,XMMWORD[((384-128))+rcx]
1948 paddd xmm0,xmm3
1949DB 15,56,205,227
1950DB 15,56,203,209
1951 pshufd xmm0,xmm0,0x0e
1952 movdqa xmm7,xmm4
1953DB 102,15,58,15,251,4
1954 nop
1955 paddd xmm5,xmm7
1956DB 15,56,204,243
1957DB 15,56,203,202
1958 movdqa xmm0,XMMWORD[((416-128))+rcx]
1959 paddd xmm0,xmm4
1960DB 15,56,205,236
1961DB 15,56,203,209
1962 pshufd xmm0,xmm0,0x0e
1963 movdqa xmm7,xmm5
1964DB 102,15,58,15,252,4
1965DB 15,56,203,202
1966 paddd xmm6,xmm7
1967
1968 movdqa xmm0,XMMWORD[((448-128))+rcx]
1969 paddd xmm0,xmm5
1970DB 15,56,203,209
1971 pshufd xmm0,xmm0,0x0e
1972DB 15,56,205,245
1973 movdqa xmm7,xmm8
1974DB 15,56,203,202
1975
1976 movdqa xmm0,XMMWORD[((480-128))+rcx]
1977 paddd xmm0,xmm6
1978 nop
1979DB 15,56,203,209
1980 pshufd xmm0,xmm0,0x0e
1981 dec rdx
1982 nop
1983DB 15,56,203,202
1984
1985 paddd xmm2,xmm10
1986 paddd xmm1,xmm9
1987 jnz NEAR $L$oop_shaext
1988
1989 pshufd xmm2,xmm2,0xb1
1990 pshufd xmm7,xmm1,0x1b
1991 pshufd xmm1,xmm1,0xb1
1992 punpckhqdq xmm1,xmm2
1993DB 102,15,58,15,215,8
1994
1995 movdqu XMMWORD[rdi],xmm1
1996 movdqu XMMWORD[16+rdi],xmm2
1997 movaps xmm6,XMMWORD[((-8-80))+rax]
1998 movaps xmm7,XMMWORD[((-8-64))+rax]
1999 movaps xmm8,XMMWORD[((-8-48))+rax]
2000 movaps xmm9,XMMWORD[((-8-32))+rax]
2001 movaps xmm10,XMMWORD[((-8-16))+rax]
2002 mov rsp,rax
2003$L$epilogue_shaext:
2004 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2005 mov rsi,QWORD[16+rsp]
2006 DB 0F3h,0C3h ;repret
2007$L$SEH_end_sha256_block_data_order_shaext:
2008
2009ALIGN 64
2010sha256_block_data_order_ssse3:
2011 mov QWORD[8+rsp],rdi ;WIN64 prologue
2012 mov QWORD[16+rsp],rsi
2013 mov rax,rsp
2014$L$SEH_begin_sha256_block_data_order_ssse3:
2015 mov rdi,rcx
2016 mov rsi,rdx
2017 mov rdx,r8
2018
2019
2020$L$ssse3_shortcut:
2021 push rbx
2022 push rbp
2023 push r12
2024 push r13
2025 push r14
2026 push r15
2027 mov r11,rsp
2028 shl rdx,4
2029 sub rsp,160
2030 lea rdx,[rdx*4+rsi]
2031 and rsp,-64
2032 mov QWORD[((64+0))+rsp],rdi
2033 mov QWORD[((64+8))+rsp],rsi
2034 mov QWORD[((64+16))+rsp],rdx
2035 mov QWORD[((64+24))+rsp],r11
2036 movaps XMMWORD[(64+32)+rsp],xmm6
2037 movaps XMMWORD[(64+48)+rsp],xmm7
2038 movaps XMMWORD[(64+64)+rsp],xmm8
2039 movaps XMMWORD[(64+80)+rsp],xmm9
2040$L$prologue_ssse3:
2041
2042 mov eax,DWORD[rdi]
2043 mov ebx,DWORD[4+rdi]
2044 mov ecx,DWORD[8+rdi]
2045 mov edx,DWORD[12+rdi]
2046 mov r8d,DWORD[16+rdi]
2047 mov r9d,DWORD[20+rdi]
2048 mov r10d,DWORD[24+rdi]
2049 mov r11d,DWORD[28+rdi]
2050
2051
2052 jmp NEAR $L$loop_ssse3
2053ALIGN 16
2054$L$loop_ssse3:
2055 movdqa xmm7,XMMWORD[((K256+512))]
2056 movdqu xmm0,XMMWORD[rsi]
2057 movdqu xmm1,XMMWORD[16+rsi]
2058 movdqu xmm2,XMMWORD[32+rsi]
2059DB 102,15,56,0,199
2060 movdqu xmm3,XMMWORD[48+rsi]
2061 lea rbp,[K256]
2062DB 102,15,56,0,207
2063 movdqa xmm4,XMMWORD[rbp]
2064 movdqa xmm5,XMMWORD[32+rbp]
2065DB 102,15,56,0,215
2066 paddd xmm4,xmm0
2067 movdqa xmm6,XMMWORD[64+rbp]
2068DB 102,15,56,0,223
2069 movdqa xmm7,XMMWORD[96+rbp]
2070 paddd xmm5,xmm1
2071 paddd xmm6,xmm2
2072 paddd xmm7,xmm3
2073 movdqa XMMWORD[rsp],xmm4
2074 mov r14d,eax
2075 movdqa XMMWORD[16+rsp],xmm5
2076 mov edi,ebx
2077 movdqa XMMWORD[32+rsp],xmm6
2078 xor edi,ecx
2079 movdqa XMMWORD[48+rsp],xmm7
2080 mov r13d,r8d
2081 jmp NEAR $L$ssse3_00_47
2082
2083ALIGN 16
2084$L$ssse3_00_47:
2085 sub rbp,-128
2086 ror r13d,14
2087 movdqa xmm4,xmm1
2088 mov eax,r14d
2089 mov r12d,r9d
2090 movdqa xmm7,xmm3
2091 ror r14d,9
2092 xor r13d,r8d
2093 xor r12d,r10d
2094 ror r13d,5
2095 xor r14d,eax
2096DB 102,15,58,15,224,4
2097 and r12d,r8d
2098 xor r13d,r8d
2099DB 102,15,58,15,250,4
2100 add r11d,DWORD[rsp]
2101 mov r15d,eax
2102 xor r12d,r10d
2103 ror r14d,11
2104 movdqa xmm5,xmm4
2105 xor r15d,ebx
2106 add r11d,r12d
2107 movdqa xmm6,xmm4
2108 ror r13d,6
2109 and edi,r15d
2110 psrld xmm4,3
2111 xor r14d,eax
2112 add r11d,r13d
2113 xor edi,ebx
2114 paddd xmm0,xmm7
2115 ror r14d,2
2116 add edx,r11d
2117 psrld xmm6,7
2118 add r11d,edi
2119 mov r13d,edx
2120 pshufd xmm7,xmm3,250
2121 add r14d,r11d
2122 ror r13d,14
2123 pslld xmm5,14
2124 mov r11d,r14d
2125 mov r12d,r8d
2126 pxor xmm4,xmm6
2127 ror r14d,9
2128 xor r13d,edx
2129 xor r12d,r9d
2130 ror r13d,5
2131 psrld xmm6,11
2132 xor r14d,r11d
2133 pxor xmm4,xmm5
2134 and r12d,edx
2135 xor r13d,edx
2136 pslld xmm5,11
2137 add r10d,DWORD[4+rsp]
2138 mov edi,r11d
2139 pxor xmm4,xmm6
2140 xor r12d,r9d
2141 ror r14d,11
2142 movdqa xmm6,xmm7
2143 xor edi,eax
2144 add r10d,r12d
2145 pxor xmm4,xmm5
2146 ror r13d,6
2147 and r15d,edi
2148 xor r14d,r11d
2149 psrld xmm7,10
2150 add r10d,r13d
2151 xor r15d,eax
2152 paddd xmm0,xmm4
2153 ror r14d,2
2154 add ecx,r10d
2155 psrlq xmm6,17
2156 add r10d,r15d
2157 mov r13d,ecx
2158 add r14d,r10d
2159 pxor xmm7,xmm6
2160 ror r13d,14
2161 mov r10d,r14d
2162 mov r12d,edx
2163 ror r14d,9
2164 psrlq xmm6,2
2165 xor r13d,ecx
2166 xor r12d,r8d
2167 pxor xmm7,xmm6
2168 ror r13d,5
2169 xor r14d,r10d
2170 and r12d,ecx
2171 pshufd xmm7,xmm7,128
2172 xor r13d,ecx
2173 add r9d,DWORD[8+rsp]
2174 mov r15d,r10d
2175 psrldq xmm7,8
2176 xor r12d,r8d
2177 ror r14d,11
2178 xor r15d,r11d
2179 add r9d,r12d
2180 ror r13d,6
2181 paddd xmm0,xmm7
2182 and edi,r15d
2183 xor r14d,r10d
2184 add r9d,r13d
2185 pshufd xmm7,xmm0,80
2186 xor edi,r11d
2187 ror r14d,2
2188 add ebx,r9d
2189 movdqa xmm6,xmm7
2190 add r9d,edi
2191 mov r13d,ebx
2192 psrld xmm7,10
2193 add r14d,r9d
2194 ror r13d,14
2195 psrlq xmm6,17
2196 mov r9d,r14d
2197 mov r12d,ecx
2198 pxor xmm7,xmm6
2199 ror r14d,9
2200 xor r13d,ebx
2201 xor r12d,edx
2202 ror r13d,5
2203 xor r14d,r9d
2204 psrlq xmm6,2
2205 and r12d,ebx
2206 xor r13d,ebx
2207 add r8d,DWORD[12+rsp]
2208 pxor xmm7,xmm6
2209 mov edi,r9d
2210 xor r12d,edx
2211 ror r14d,11
2212 pshufd xmm7,xmm7,8
2213 xor edi,r10d
2214 add r8d,r12d
2215 movdqa xmm6,XMMWORD[rbp]
2216 ror r13d,6
2217 and r15d,edi
2218 pslldq xmm7,8
2219 xor r14d,r9d
2220 add r8d,r13d
2221 xor r15d,r10d
2222 paddd xmm0,xmm7
2223 ror r14d,2
2224 add eax,r8d
2225 add r8d,r15d
2226 paddd xmm6,xmm0
2227 mov r13d,eax
2228 add r14d,r8d
2229 movdqa XMMWORD[rsp],xmm6
2230 ror r13d,14
2231 movdqa xmm4,xmm2
2232 mov r8d,r14d
2233 mov r12d,ebx
2234 movdqa xmm7,xmm0
2235 ror r14d,9
2236 xor r13d,eax
2237 xor r12d,ecx
2238 ror r13d,5
2239 xor r14d,r8d
2240DB 102,15,58,15,225,4
2241 and r12d,eax
2242 xor r13d,eax
2243DB 102,15,58,15,251,4
2244 add edx,DWORD[16+rsp]
2245 mov r15d,r8d
2246 xor r12d,ecx
2247 ror r14d,11
2248 movdqa xmm5,xmm4
2249 xor r15d,r9d
2250 add edx,r12d
2251 movdqa xmm6,xmm4
2252 ror r13d,6
2253 and edi,r15d
2254 psrld xmm4,3
2255 xor r14d,r8d
2256 add edx,r13d
2257 xor edi,r9d
2258 paddd xmm1,xmm7
2259 ror r14d,2
2260 add r11d,edx
2261 psrld xmm6,7
2262 add edx,edi
2263 mov r13d,r11d
2264 pshufd xmm7,xmm0,250
2265 add r14d,edx
2266 ror r13d,14
2267 pslld xmm5,14
2268 mov edx,r14d
2269 mov r12d,eax
2270 pxor xmm4,xmm6
2271 ror r14d,9
2272 xor r13d,r11d
2273 xor r12d,ebx
2274 ror r13d,5
2275 psrld xmm6,11
2276 xor r14d,edx
2277 pxor xmm4,xmm5
2278 and r12d,r11d
2279 xor r13d,r11d
2280 pslld xmm5,11
2281 add ecx,DWORD[20+rsp]
2282 mov edi,edx
2283 pxor xmm4,xmm6
2284 xor r12d,ebx
2285 ror r14d,11
2286 movdqa xmm6,xmm7
2287 xor edi,r8d
2288 add ecx,r12d
2289 pxor xmm4,xmm5
2290 ror r13d,6
2291 and r15d,edi
2292 xor r14d,edx
2293 psrld xmm7,10
2294 add ecx,r13d
2295 xor r15d,r8d
2296 paddd xmm1,xmm4
2297 ror r14d,2
2298 add r10d,ecx
2299 psrlq xmm6,17
2300 add ecx,r15d
2301 mov r13d,r10d
2302 add r14d,ecx
2303 pxor xmm7,xmm6
2304 ror r13d,14
2305 mov ecx,r14d
2306 mov r12d,r11d
2307 ror r14d,9
2308 psrlq xmm6,2
2309 xor r13d,r10d
2310 xor r12d,eax
2311 pxor xmm7,xmm6
2312 ror r13d,5
2313 xor r14d,ecx
2314 and r12d,r10d
2315 pshufd xmm7,xmm7,128
2316 xor r13d,r10d
2317 add ebx,DWORD[24+rsp]
2318 mov r15d,ecx
2319 psrldq xmm7,8
2320 xor r12d,eax
2321 ror r14d,11
2322 xor r15d,edx
2323 add ebx,r12d
2324 ror r13d,6
2325 paddd xmm1,xmm7
2326 and edi,r15d
2327 xor r14d,ecx
2328 add ebx,r13d
2329 pshufd xmm7,xmm1,80
2330 xor edi,edx
2331 ror r14d,2
2332 add r9d,ebx
2333 movdqa xmm6,xmm7
2334 add ebx,edi
2335 mov r13d,r9d
2336 psrld xmm7,10
2337 add r14d,ebx
2338 ror r13d,14
2339 psrlq xmm6,17
2340 mov ebx,r14d
2341 mov r12d,r10d
2342 pxor xmm7,xmm6
2343 ror r14d,9
2344 xor r13d,r9d
2345 xor r12d,r11d
2346 ror r13d,5
2347 xor r14d,ebx
2348 psrlq xmm6,2
2349 and r12d,r9d
2350 xor r13d,r9d
2351 add eax,DWORD[28+rsp]
2352 pxor xmm7,xmm6
2353 mov edi,ebx
2354 xor r12d,r11d
2355 ror r14d,11
2356 pshufd xmm7,xmm7,8
2357 xor edi,ecx
2358 add eax,r12d
2359 movdqa xmm6,XMMWORD[32+rbp]
2360 ror r13d,6
2361 and r15d,edi
2362 pslldq xmm7,8
2363 xor r14d,ebx
2364 add eax,r13d
2365 xor r15d,ecx
2366 paddd xmm1,xmm7
2367 ror r14d,2
2368 add r8d,eax
2369 add eax,r15d
2370 paddd xmm6,xmm1
2371 mov r13d,r8d
2372 add r14d,eax
2373 movdqa XMMWORD[16+rsp],xmm6
2374 ror r13d,14
2375 movdqa xmm4,xmm3
2376 mov eax,r14d
2377 mov r12d,r9d
2378 movdqa xmm7,xmm1
2379 ror r14d,9
2380 xor r13d,r8d
2381 xor r12d,r10d
2382 ror r13d,5
2383 xor r14d,eax
2384DB 102,15,58,15,226,4
2385 and r12d,r8d
2386 xor r13d,r8d
2387DB 102,15,58,15,248,4
2388 add r11d,DWORD[32+rsp]
2389 mov r15d,eax
2390 xor r12d,r10d
2391 ror r14d,11
2392 movdqa xmm5,xmm4
2393 xor r15d,ebx
2394 add r11d,r12d
2395 movdqa xmm6,xmm4
2396 ror r13d,6
2397 and edi,r15d
2398 psrld xmm4,3
2399 xor r14d,eax
2400 add r11d,r13d
2401 xor edi,ebx
2402 paddd xmm2,xmm7
2403 ror r14d,2
2404 add edx,r11d
2405 psrld xmm6,7
2406 add r11d,edi
2407 mov r13d,edx
2408 pshufd xmm7,xmm1,250
2409 add r14d,r11d
2410 ror r13d,14
2411 pslld xmm5,14
2412 mov r11d,r14d
2413 mov r12d,r8d
2414 pxor xmm4,xmm6
2415 ror r14d,9
2416 xor r13d,edx
2417 xor r12d,r9d
2418 ror r13d,5
2419 psrld xmm6,11
2420 xor r14d,r11d
2421 pxor xmm4,xmm5
2422 and r12d,edx
2423 xor r13d,edx
2424 pslld xmm5,11
2425 add r10d,DWORD[36+rsp]
2426 mov edi,r11d
2427 pxor xmm4,xmm6
2428 xor r12d,r9d
2429 ror r14d,11
2430 movdqa xmm6,xmm7
2431 xor edi,eax
2432 add r10d,r12d
2433 pxor xmm4,xmm5
2434 ror r13d,6
2435 and r15d,edi
2436 xor r14d,r11d
2437 psrld xmm7,10
2438 add r10d,r13d
2439 xor r15d,eax
2440 paddd xmm2,xmm4
2441 ror r14d,2
2442 add ecx,r10d
2443 psrlq xmm6,17
2444 add r10d,r15d
2445 mov r13d,ecx
2446 add r14d,r10d
2447 pxor xmm7,xmm6
2448 ror r13d,14
2449 mov r10d,r14d
2450 mov r12d,edx
2451 ror r14d,9
2452 psrlq xmm6,2
2453 xor r13d,ecx
2454 xor r12d,r8d
2455 pxor xmm7,xmm6
2456 ror r13d,5
2457 xor r14d,r10d
2458 and r12d,ecx
2459 pshufd xmm7,xmm7,128
2460 xor r13d,ecx
2461 add r9d,DWORD[40+rsp]
2462 mov r15d,r10d
2463 psrldq xmm7,8
2464 xor r12d,r8d
2465 ror r14d,11
2466 xor r15d,r11d
2467 add r9d,r12d
2468 ror r13d,6
2469 paddd xmm2,xmm7
2470 and edi,r15d
2471 xor r14d,r10d
2472 add r9d,r13d
2473 pshufd xmm7,xmm2,80
2474 xor edi,r11d
2475 ror r14d,2
2476 add ebx,r9d
2477 movdqa xmm6,xmm7
2478 add r9d,edi
2479 mov r13d,ebx
2480 psrld xmm7,10
2481 add r14d,r9d
2482 ror r13d,14
2483 psrlq xmm6,17
2484 mov r9d,r14d
2485 mov r12d,ecx
2486 pxor xmm7,xmm6
2487 ror r14d,9
2488 xor r13d,ebx
2489 xor r12d,edx
2490 ror r13d,5
2491 xor r14d,r9d
2492 psrlq xmm6,2
2493 and r12d,ebx
2494 xor r13d,ebx
2495 add r8d,DWORD[44+rsp]
2496 pxor xmm7,xmm6
2497 mov edi,r9d
2498 xor r12d,edx
2499 ror r14d,11
2500 pshufd xmm7,xmm7,8
2501 xor edi,r10d
2502 add r8d,r12d
2503 movdqa xmm6,XMMWORD[64+rbp]
2504 ror r13d,6
2505 and r15d,edi
2506 pslldq xmm7,8
2507 xor r14d,r9d
2508 add r8d,r13d
2509 xor r15d,r10d
2510 paddd xmm2,xmm7
2511 ror r14d,2
2512 add eax,r8d
2513 add r8d,r15d
2514 paddd xmm6,xmm2
2515 mov r13d,eax
2516 add r14d,r8d
2517 movdqa XMMWORD[32+rsp],xmm6
2518 ror r13d,14
2519 movdqa xmm4,xmm0
2520 mov r8d,r14d
2521 mov r12d,ebx
2522 movdqa xmm7,xmm2
2523 ror r14d,9
2524 xor r13d,eax
2525 xor r12d,ecx
2526 ror r13d,5
2527 xor r14d,r8d
2528DB 102,15,58,15,227,4
2529 and r12d,eax
2530 xor r13d,eax
2531DB 102,15,58,15,249,4
2532 add edx,DWORD[48+rsp]
2533 mov r15d,r8d
2534 xor r12d,ecx
2535 ror r14d,11
2536 movdqa xmm5,xmm4
2537 xor r15d,r9d
2538 add edx,r12d
2539 movdqa xmm6,xmm4
2540 ror r13d,6
2541 and edi,r15d
2542 psrld xmm4,3
2543 xor r14d,r8d
2544 add edx,r13d
2545 xor edi,r9d
2546 paddd xmm3,xmm7
2547 ror r14d,2
2548 add r11d,edx
2549 psrld xmm6,7
2550 add edx,edi
2551 mov r13d,r11d
2552 pshufd xmm7,xmm2,250
2553 add r14d,edx
2554 ror r13d,14
2555 pslld xmm5,14
2556 mov edx,r14d
2557 mov r12d,eax
2558 pxor xmm4,xmm6
2559 ror r14d,9
2560 xor r13d,r11d
2561 xor r12d,ebx
2562 ror r13d,5
2563 psrld xmm6,11
2564 xor r14d,edx
2565 pxor xmm4,xmm5
2566 and r12d,r11d
2567 xor r13d,r11d
2568 pslld xmm5,11
2569 add ecx,DWORD[52+rsp]
2570 mov edi,edx
2571 pxor xmm4,xmm6
2572 xor r12d,ebx
2573 ror r14d,11
2574 movdqa xmm6,xmm7
2575 xor edi,r8d
2576 add ecx,r12d
2577 pxor xmm4,xmm5
2578 ror r13d,6
2579 and r15d,edi
2580 xor r14d,edx
2581 psrld xmm7,10
2582 add ecx,r13d
2583 xor r15d,r8d
2584 paddd xmm3,xmm4
2585 ror r14d,2
2586 add r10d,ecx
2587 psrlq xmm6,17
2588 add ecx,r15d
2589 mov r13d,r10d
2590 add r14d,ecx
2591 pxor xmm7,xmm6
2592 ror r13d,14
2593 mov ecx,r14d
2594 mov r12d,r11d
2595 ror r14d,9
2596 psrlq xmm6,2
2597 xor r13d,r10d
2598 xor r12d,eax
2599 pxor xmm7,xmm6
2600 ror r13d,5
2601 xor r14d,ecx
2602 and r12d,r10d
2603 pshufd xmm7,xmm7,128
2604 xor r13d,r10d
2605 add ebx,DWORD[56+rsp]
2606 mov r15d,ecx
2607 psrldq xmm7,8
2608 xor r12d,eax
2609 ror r14d,11
2610 xor r15d,edx
2611 add ebx,r12d
2612 ror r13d,6
2613 paddd xmm3,xmm7
2614 and edi,r15d
2615 xor r14d,ecx
2616 add ebx,r13d
2617 pshufd xmm7,xmm3,80
2618 xor edi,edx
2619 ror r14d,2
2620 add r9d,ebx
2621 movdqa xmm6,xmm7
2622 add ebx,edi
2623 mov r13d,r9d
2624 psrld xmm7,10
2625 add r14d,ebx
2626 ror r13d,14
2627 psrlq xmm6,17
2628 mov ebx,r14d
2629 mov r12d,r10d
2630 pxor xmm7,xmm6
2631 ror r14d,9
2632 xor r13d,r9d
2633 xor r12d,r11d
2634 ror r13d,5
2635 xor r14d,ebx
2636 psrlq xmm6,2
2637 and r12d,r9d
2638 xor r13d,r9d
2639 add eax,DWORD[60+rsp]
2640 pxor xmm7,xmm6
2641 mov edi,ebx
2642 xor r12d,r11d
2643 ror r14d,11
2644 pshufd xmm7,xmm7,8
2645 xor edi,ecx
2646 add eax,r12d
2647 movdqa xmm6,XMMWORD[96+rbp]
2648 ror r13d,6
2649 and r15d,edi
2650 pslldq xmm7,8
2651 xor r14d,ebx
2652 add eax,r13d
2653 xor r15d,ecx
2654 paddd xmm3,xmm7
2655 ror r14d,2
2656 add r8d,eax
2657 add eax,r15d
2658 paddd xmm6,xmm3
2659 mov r13d,r8d
2660 add r14d,eax
2661 movdqa XMMWORD[48+rsp],xmm6
2662 cmp BYTE[131+rbp],0
2663 jne NEAR $L$ssse3_00_47
2664 ror r13d,14
2665 mov eax,r14d
2666 mov r12d,r9d
2667 ror r14d,9
2668 xor r13d,r8d
2669 xor r12d,r10d
2670 ror r13d,5
2671 xor r14d,eax
2672 and r12d,r8d
2673 xor r13d,r8d
2674 add r11d,DWORD[rsp]
2675 mov r15d,eax
2676 xor r12d,r10d
2677 ror r14d,11
2678 xor r15d,ebx
2679 add r11d,r12d
2680 ror r13d,6
2681 and edi,r15d
2682 xor r14d,eax
2683 add r11d,r13d
2684 xor edi,ebx
2685 ror r14d,2
2686 add edx,r11d
2687 add r11d,edi
2688 mov r13d,edx
2689 add r14d,r11d
2690 ror r13d,14
2691 mov r11d,r14d
2692 mov r12d,r8d
2693 ror r14d,9
2694 xor r13d,edx
2695 xor r12d,r9d
2696 ror r13d,5
2697 xor r14d,r11d
2698 and r12d,edx
2699 xor r13d,edx
2700 add r10d,DWORD[4+rsp]
2701 mov edi,r11d
2702 xor r12d,r9d
2703 ror r14d,11
2704 xor edi,eax
2705 add r10d,r12d
2706 ror r13d,6
2707 and r15d,edi
2708 xor r14d,r11d
2709 add r10d,r13d
2710 xor r15d,eax
2711 ror r14d,2
2712 add ecx,r10d
2713 add r10d,r15d
2714 mov r13d,ecx
2715 add r14d,r10d
2716 ror r13d,14
2717 mov r10d,r14d
2718 mov r12d,edx
2719 ror r14d,9
2720 xor r13d,ecx
2721 xor r12d,r8d
2722 ror r13d,5
2723 xor r14d,r10d
2724 and r12d,ecx
2725 xor r13d,ecx
2726 add r9d,DWORD[8+rsp]
2727 mov r15d,r10d
2728 xor r12d,r8d
2729 ror r14d,11
2730 xor r15d,r11d
2731 add r9d,r12d
2732 ror r13d,6
2733 and edi,r15d
2734 xor r14d,r10d
2735 add r9d,r13d
2736 xor edi,r11d
2737 ror r14d,2
2738 add ebx,r9d
2739 add r9d,edi
2740 mov r13d,ebx
2741 add r14d,r9d
2742 ror r13d,14
2743 mov r9d,r14d
2744 mov r12d,ecx
2745 ror r14d,9
2746 xor r13d,ebx
2747 xor r12d,edx
2748 ror r13d,5
2749 xor r14d,r9d
2750 and r12d,ebx
2751 xor r13d,ebx
2752 add r8d,DWORD[12+rsp]
2753 mov edi,r9d
2754 xor r12d,edx
2755 ror r14d,11
2756 xor edi,r10d
2757 add r8d,r12d
2758 ror r13d,6
2759 and r15d,edi
2760 xor r14d,r9d
2761 add r8d,r13d
2762 xor r15d,r10d
2763 ror r14d,2
2764 add eax,r8d
2765 add r8d,r15d
2766 mov r13d,eax
2767 add r14d,r8d
2768 ror r13d,14
2769 mov r8d,r14d
2770 mov r12d,ebx
2771 ror r14d,9
2772 xor r13d,eax
2773 xor r12d,ecx
2774 ror r13d,5
2775 xor r14d,r8d
2776 and r12d,eax
2777 xor r13d,eax
2778 add edx,DWORD[16+rsp]
2779 mov r15d,r8d
2780 xor r12d,ecx
2781 ror r14d,11
2782 xor r15d,r9d
2783 add edx,r12d
2784 ror r13d,6
2785 and edi,r15d
2786 xor r14d,r8d
2787 add edx,r13d
2788 xor edi,r9d
2789 ror r14d,2
2790 add r11d,edx
2791 add edx,edi
2792 mov r13d,r11d
2793 add r14d,edx
2794 ror r13d,14
2795 mov edx,r14d
2796 mov r12d,eax
2797 ror r14d,9
2798 xor r13d,r11d
2799 xor r12d,ebx
2800 ror r13d,5
2801 xor r14d,edx
2802 and r12d,r11d
2803 xor r13d,r11d
2804 add ecx,DWORD[20+rsp]
2805 mov edi,edx
2806 xor r12d,ebx
2807 ror r14d,11
2808 xor edi,r8d
2809 add ecx,r12d
2810 ror r13d,6
2811 and r15d,edi
2812 xor r14d,edx
2813 add ecx,r13d
2814 xor r15d,r8d
2815 ror r14d,2
2816 add r10d,ecx
2817 add ecx,r15d
2818 mov r13d,r10d
2819 add r14d,ecx
2820 ror r13d,14
2821 mov ecx,r14d
2822 mov r12d,r11d
2823 ror r14d,9
2824 xor r13d,r10d
2825 xor r12d,eax
2826 ror r13d,5
2827 xor r14d,ecx
2828 and r12d,r10d
2829 xor r13d,r10d
2830 add ebx,DWORD[24+rsp]
2831 mov r15d,ecx
2832 xor r12d,eax
2833 ror r14d,11
2834 xor r15d,edx
2835 add ebx,r12d
2836 ror r13d,6
2837 and edi,r15d
2838 xor r14d,ecx
2839 add ebx,r13d
2840 xor edi,edx
2841 ror r14d,2
2842 add r9d,ebx
2843 add ebx,edi
2844 mov r13d,r9d
2845 add r14d,ebx
2846 ror r13d,14
2847 mov ebx,r14d
2848 mov r12d,r10d
2849 ror r14d,9
2850 xor r13d,r9d
2851 xor r12d,r11d
2852 ror r13d,5
2853 xor r14d,ebx
2854 and r12d,r9d
2855 xor r13d,r9d
2856 add eax,DWORD[28+rsp]
2857 mov edi,ebx
2858 xor r12d,r11d
2859 ror r14d,11
2860 xor edi,ecx
2861 add eax,r12d
2862 ror r13d,6
2863 and r15d,edi
2864 xor r14d,ebx
2865 add eax,r13d
2866 xor r15d,ecx
2867 ror r14d,2
2868 add r8d,eax
2869 add eax,r15d
2870 mov r13d,r8d
2871 add r14d,eax
2872 ror r13d,14
2873 mov eax,r14d
2874 mov r12d,r9d
2875 ror r14d,9
2876 xor r13d,r8d
2877 xor r12d,r10d
2878 ror r13d,5
2879 xor r14d,eax
2880 and r12d,r8d
2881 xor r13d,r8d
2882 add r11d,DWORD[32+rsp]
2883 mov r15d,eax
2884 xor r12d,r10d
2885 ror r14d,11
2886 xor r15d,ebx
2887 add r11d,r12d
2888 ror r13d,6
2889 and edi,r15d
2890 xor r14d,eax
2891 add r11d,r13d
2892 xor edi,ebx
2893 ror r14d,2
2894 add edx,r11d
2895 add r11d,edi
2896 mov r13d,edx
2897 add r14d,r11d
2898 ror r13d,14
2899 mov r11d,r14d
2900 mov r12d,r8d
2901 ror r14d,9
2902 xor r13d,edx
2903 xor r12d,r9d
2904 ror r13d,5
2905 xor r14d,r11d
2906 and r12d,edx
2907 xor r13d,edx
2908 add r10d,DWORD[36+rsp]
2909 mov edi,r11d
2910 xor r12d,r9d
2911 ror r14d,11
2912 xor edi,eax
2913 add r10d,r12d
2914 ror r13d,6
2915 and r15d,edi
2916 xor r14d,r11d
2917 add r10d,r13d
2918 xor r15d,eax
2919 ror r14d,2
2920 add ecx,r10d
2921 add r10d,r15d
2922 mov r13d,ecx
2923 add r14d,r10d
2924 ror r13d,14
2925 mov r10d,r14d
2926 mov r12d,edx
2927 ror r14d,9
2928 xor r13d,ecx
2929 xor r12d,r8d
2930 ror r13d,5
2931 xor r14d,r10d
2932 and r12d,ecx
2933 xor r13d,ecx
2934 add r9d,DWORD[40+rsp]
2935 mov r15d,r10d
2936 xor r12d,r8d
2937 ror r14d,11
2938 xor r15d,r11d
2939 add r9d,r12d
2940 ror r13d,6
2941 and edi,r15d
2942 xor r14d,r10d
2943 add r9d,r13d
2944 xor edi,r11d
2945 ror r14d,2
2946 add ebx,r9d
2947 add r9d,edi
2948 mov r13d,ebx
2949 add r14d,r9d
2950 ror r13d,14
2951 mov r9d,r14d
2952 mov r12d,ecx
2953 ror r14d,9
2954 xor r13d,ebx
2955 xor r12d,edx
2956 ror r13d,5
2957 xor r14d,r9d
2958 and r12d,ebx
2959 xor r13d,ebx
2960 add r8d,DWORD[44+rsp]
2961 mov edi,r9d
2962 xor r12d,edx
2963 ror r14d,11
2964 xor edi,r10d
2965 add r8d,r12d
2966 ror r13d,6
2967 and r15d,edi
2968 xor r14d,r9d
2969 add r8d,r13d
2970 xor r15d,r10d
2971 ror r14d,2
2972 add eax,r8d
2973 add r8d,r15d
2974 mov r13d,eax
2975 add r14d,r8d
2976 ror r13d,14
2977 mov r8d,r14d
2978 mov r12d,ebx
2979 ror r14d,9
2980 xor r13d,eax
2981 xor r12d,ecx
2982 ror r13d,5
2983 xor r14d,r8d
2984 and r12d,eax
2985 xor r13d,eax
2986 add edx,DWORD[48+rsp]
2987 mov r15d,r8d
2988 xor r12d,ecx
2989 ror r14d,11
2990 xor r15d,r9d
2991 add edx,r12d
2992 ror r13d,6
2993 and edi,r15d
2994 xor r14d,r8d
2995 add edx,r13d
2996 xor edi,r9d
2997 ror r14d,2
2998 add r11d,edx
2999 add edx,edi
3000 mov r13d,r11d
3001 add r14d,edx
3002 ror r13d,14
3003 mov edx,r14d
3004 mov r12d,eax
3005 ror r14d,9
3006 xor r13d,r11d
3007 xor r12d,ebx
3008 ror r13d,5
3009 xor r14d,edx
3010 and r12d,r11d
3011 xor r13d,r11d
3012 add ecx,DWORD[52+rsp]
3013 mov edi,edx
3014 xor r12d,ebx
3015 ror r14d,11
3016 xor edi,r8d
3017 add ecx,r12d
3018 ror r13d,6
3019 and r15d,edi
3020 xor r14d,edx
3021 add ecx,r13d
3022 xor r15d,r8d
3023 ror r14d,2
3024 add r10d,ecx
3025 add ecx,r15d
3026 mov r13d,r10d
3027 add r14d,ecx
3028 ror r13d,14
3029 mov ecx,r14d
3030 mov r12d,r11d
3031 ror r14d,9
3032 xor r13d,r10d
3033 xor r12d,eax
3034 ror r13d,5
3035 xor r14d,ecx
3036 and r12d,r10d
3037 xor r13d,r10d
3038 add ebx,DWORD[56+rsp]
3039 mov r15d,ecx
3040 xor r12d,eax
3041 ror r14d,11
3042 xor r15d,edx
3043 add ebx,r12d
3044 ror r13d,6
3045 and edi,r15d
3046 xor r14d,ecx
3047 add ebx,r13d
3048 xor edi,edx
3049 ror r14d,2
3050 add r9d,ebx
3051 add ebx,edi
3052 mov r13d,r9d
3053 add r14d,ebx
3054 ror r13d,14
3055 mov ebx,r14d
3056 mov r12d,r10d
3057 ror r14d,9
3058 xor r13d,r9d
3059 xor r12d,r11d
3060 ror r13d,5
3061 xor r14d,ebx
3062 and r12d,r9d
3063 xor r13d,r9d
3064 add eax,DWORD[60+rsp]
3065 mov edi,ebx
3066 xor r12d,r11d
3067 ror r14d,11
3068 xor edi,ecx
3069 add eax,r12d
3070 ror r13d,6
3071 and r15d,edi
3072 xor r14d,ebx
3073 add eax,r13d
3074 xor r15d,ecx
3075 ror r14d,2
3076 add r8d,eax
3077 add eax,r15d
3078 mov r13d,r8d
3079 add r14d,eax
3080 mov rdi,QWORD[((64+0))+rsp]
3081 mov eax,r14d
3082
3083 add eax,DWORD[rdi]
3084 lea rsi,[64+rsi]
3085 add ebx,DWORD[4+rdi]
3086 add ecx,DWORD[8+rdi]
3087 add edx,DWORD[12+rdi]
3088 add r8d,DWORD[16+rdi]
3089 add r9d,DWORD[20+rdi]
3090 add r10d,DWORD[24+rdi]
3091 add r11d,DWORD[28+rdi]
3092
3093 cmp rsi,QWORD[((64+16))+rsp]
3094
3095 mov DWORD[rdi],eax
3096 mov DWORD[4+rdi],ebx
3097 mov DWORD[8+rdi],ecx
3098 mov DWORD[12+rdi],edx
3099 mov DWORD[16+rdi],r8d
3100 mov DWORD[20+rdi],r9d
3101 mov DWORD[24+rdi],r10d
3102 mov DWORD[28+rdi],r11d
3103 jb NEAR $L$loop_ssse3
3104
3105 mov rsi,QWORD[((64+24))+rsp]
3106 movaps xmm6,XMMWORD[((64+32))+rsp]
3107 movaps xmm7,XMMWORD[((64+48))+rsp]
3108 movaps xmm8,XMMWORD[((64+64))+rsp]
3109 movaps xmm9,XMMWORD[((64+80))+rsp]
3110 mov r15,QWORD[rsi]
3111 mov r14,QWORD[8+rsi]
3112 mov r13,QWORD[16+rsi]
3113 mov r12,QWORD[24+rsi]
3114 mov rbp,QWORD[32+rsi]
3115 mov rbx,QWORD[40+rsi]
3116 lea rsp,[48+rsi]
3117$L$epilogue_ssse3:
3118 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
3119 mov rsi,QWORD[16+rsp]
3120 DB 0F3h,0C3h ;repret
3121$L$SEH_end_sha256_block_data_order_ssse3:
3122
3123ALIGN 64
3124sha256_block_data_order_avx:
3125 mov QWORD[8+rsp],rdi ;WIN64 prologue
3126 mov QWORD[16+rsp],rsi
3127 mov rax,rsp
3128$L$SEH_begin_sha256_block_data_order_avx:
3129 mov rdi,rcx
3130 mov rsi,rdx
3131 mov rdx,r8
3132
3133
3134$L$avx_shortcut:
3135 push rbx
3136 push rbp
3137 push r12
3138 push r13
3139 push r14
3140 push r15
3141 mov r11,rsp
3142 shl rdx,4
3143 sub rsp,160
3144 lea rdx,[rdx*4+rsi]
3145 and rsp,-64
3146 mov QWORD[((64+0))+rsp],rdi
3147 mov QWORD[((64+8))+rsp],rsi
3148 mov QWORD[((64+16))+rsp],rdx
3149 mov QWORD[((64+24))+rsp],r11
3150 movaps XMMWORD[(64+32)+rsp],xmm6
3151 movaps XMMWORD[(64+48)+rsp],xmm7
3152 movaps XMMWORD[(64+64)+rsp],xmm8
3153 movaps XMMWORD[(64+80)+rsp],xmm9
3154$L$prologue_avx:
3155
3156 vzeroupper
3157 mov eax,DWORD[rdi]
3158 mov ebx,DWORD[4+rdi]
3159 mov ecx,DWORD[8+rdi]
3160 mov edx,DWORD[12+rdi]
3161 mov r8d,DWORD[16+rdi]
3162 mov r9d,DWORD[20+rdi]
3163 mov r10d,DWORD[24+rdi]
3164 mov r11d,DWORD[28+rdi]
3165 vmovdqa xmm8,XMMWORD[((K256+512+32))]
3166 vmovdqa xmm9,XMMWORD[((K256+512+64))]
3167 jmp NEAR $L$loop_avx
3168ALIGN 16
3169$L$loop_avx:
3170 vmovdqa xmm7,XMMWORD[((K256+512))]
3171 vmovdqu xmm0,XMMWORD[rsi]
3172 vmovdqu xmm1,XMMWORD[16+rsi]
3173 vmovdqu xmm2,XMMWORD[32+rsi]
3174 vmovdqu xmm3,XMMWORD[48+rsi]
3175 vpshufb xmm0,xmm0,xmm7
3176 lea rbp,[K256]
3177 vpshufb xmm1,xmm1,xmm7
3178 vpshufb xmm2,xmm2,xmm7
3179 vpaddd xmm4,xmm0,XMMWORD[rbp]
3180 vpshufb xmm3,xmm3,xmm7
3181 vpaddd xmm5,xmm1,XMMWORD[32+rbp]
3182 vpaddd xmm6,xmm2,XMMWORD[64+rbp]
3183 vpaddd xmm7,xmm3,XMMWORD[96+rbp]
3184 vmovdqa XMMWORD[rsp],xmm4
3185 mov r14d,eax
3186 vmovdqa XMMWORD[16+rsp],xmm5
3187 mov edi,ebx
3188 vmovdqa XMMWORD[32+rsp],xmm6
3189 xor edi,ecx
3190 vmovdqa XMMWORD[48+rsp],xmm7
3191 mov r13d,r8d
3192 jmp NEAR $L$avx_00_47
3193
3194ALIGN 16
3195$L$avx_00_47:
3196 sub rbp,-128
3197 vpalignr xmm4,xmm1,xmm0,4
3198 shrd r13d,r13d,14
3199 mov eax,r14d
3200 mov r12d,r9d
3201 vpalignr xmm7,xmm3,xmm2,4
3202 shrd r14d,r14d,9
3203 xor r13d,r8d
3204 xor r12d,r10d
3205 vpsrld xmm6,xmm4,7
3206 shrd r13d,r13d,5
3207 xor r14d,eax
3208 and r12d,r8d
3209 vpaddd xmm0,xmm0,xmm7
3210 xor r13d,r8d
3211 add r11d,DWORD[rsp]
3212 mov r15d,eax
3213 vpsrld xmm7,xmm4,3
3214 xor r12d,r10d
3215 shrd r14d,r14d,11
3216 xor r15d,ebx
3217 vpslld xmm5,xmm4,14
3218 add r11d,r12d
3219 shrd r13d,r13d,6
3220 and edi,r15d
3221 vpxor xmm4,xmm7,xmm6
3222 xor r14d,eax
3223 add r11d,r13d
3224 xor edi,ebx
3225 vpshufd xmm7,xmm3,250
3226 shrd r14d,r14d,2
3227 add edx,r11d
3228 add r11d,edi
3229 vpsrld xmm6,xmm6,11
3230 mov r13d,edx
3231 add r14d,r11d
3232 shrd r13d,r13d,14
3233 vpxor xmm4,xmm4,xmm5
3234 mov r11d,r14d
3235 mov r12d,r8d
3236 shrd r14d,r14d,9
3237 vpslld xmm5,xmm5,11
3238 xor r13d,edx
3239 xor r12d,r9d
3240 shrd r13d,r13d,5
3241 vpxor xmm4,xmm4,xmm6
3242 xor r14d,r11d
3243 and r12d,edx
3244 xor r13d,edx
3245 vpsrld xmm6,xmm7,10
3246 add r10d,DWORD[4+rsp]
3247 mov edi,r11d
3248 xor r12d,r9d
3249 vpxor xmm4,xmm4,xmm5
3250 shrd r14d,r14d,11
3251 xor edi,eax
3252 add r10d,r12d
3253 vpsrlq xmm7,xmm7,17
3254 shrd r13d,r13d,6
3255 and r15d,edi
3256 xor r14d,r11d
3257 vpaddd xmm0,xmm0,xmm4
3258 add r10d,r13d
3259 xor r15d,eax
3260 shrd r14d,r14d,2
3261 vpxor xmm6,xmm6,xmm7
3262 add ecx,r10d
3263 add r10d,r15d
3264 mov r13d,ecx
3265 vpsrlq xmm7,xmm7,2
3266 add r14d,r10d
3267 shrd r13d,r13d,14
3268 mov r10d,r14d
3269 vpxor xmm6,xmm6,xmm7
3270 mov r12d,edx
3271 shrd r14d,r14d,9
3272 xor r13d,ecx
3273 vpshufb xmm6,xmm6,xmm8
3274 xor r12d,r8d
3275 shrd r13d,r13d,5
3276 xor r14d,r10d
3277 vpaddd xmm0,xmm0,xmm6
3278 and r12d,ecx
3279 xor r13d,ecx
3280 add r9d,DWORD[8+rsp]
3281 vpshufd xmm7,xmm0,80
3282 mov r15d,r10d
3283 xor r12d,r8d
3284 shrd r14d,r14d,11
3285 vpsrld xmm6,xmm7,10
3286 xor r15d,r11d
3287 add r9d,r12d
3288 shrd r13d,r13d,6
3289 vpsrlq xmm7,xmm7,17
3290 and edi,r15d
3291 xor r14d,r10d
3292 add r9d,r13d
3293 vpxor xmm6,xmm6,xmm7
3294 xor edi,r11d
3295 shrd r14d,r14d,2
3296 add ebx,r9d
3297 vpsrlq xmm7,xmm7,2
3298 add r9d,edi
3299 mov r13d,ebx
3300 add r14d,r9d
3301 vpxor xmm6,xmm6,xmm7
3302 shrd r13d,r13d,14
3303 mov r9d,r14d
3304 mov r12d,ecx
3305 vpshufb xmm6,xmm6,xmm9
3306 shrd r14d,r14d,9
3307 xor r13d,ebx
3308 xor r12d,edx
3309 vpaddd xmm0,xmm0,xmm6
3310 shrd r13d,r13d,5
3311 xor r14d,r9d
3312 and r12d,ebx
3313 vpaddd xmm6,xmm0,XMMWORD[rbp]
3314 xor r13d,ebx
3315 add r8d,DWORD[12+rsp]
3316 mov edi,r9d
3317 xor r12d,edx
3318 shrd r14d,r14d,11
3319 xor edi,r10d
3320 add r8d,r12d
3321 shrd r13d,r13d,6
3322 and r15d,edi
3323 xor r14d,r9d
3324 add r8d,r13d
3325 xor r15d,r10d
3326 shrd r14d,r14d,2
3327 add eax,r8d
3328 add r8d,r15d
3329 mov r13d,eax
3330 add r14d,r8d
3331 vmovdqa XMMWORD[rsp],xmm6
3332 vpalignr xmm4,xmm2,xmm1,4
3333 shrd r13d,r13d,14
3334 mov r8d,r14d
3335 mov r12d,ebx
3336 vpalignr xmm7,xmm0,xmm3,4
3337 shrd r14d,r14d,9
3338 xor r13d,eax
3339 xor r12d,ecx
3340 vpsrld xmm6,xmm4,7
3341 shrd r13d,r13d,5
3342 xor r14d,r8d
3343 and r12d,eax
3344 vpaddd xmm1,xmm1,xmm7
3345 xor r13d,eax
3346 add edx,DWORD[16+rsp]
3347 mov r15d,r8d
3348 vpsrld xmm7,xmm4,3
3349 xor r12d,ecx
3350 shrd r14d,r14d,11
3351 xor r15d,r9d
3352 vpslld xmm5,xmm4,14
3353 add edx,r12d
3354 shrd r13d,r13d,6
3355 and edi,r15d
3356 vpxor xmm4,xmm7,xmm6
3357 xor r14d,r8d
3358 add edx,r13d
3359 xor edi,r9d
3360 vpshufd xmm7,xmm0,250
3361 shrd r14d,r14d,2
3362 add r11d,edx
3363 add edx,edi
3364 vpsrld xmm6,xmm6,11
3365 mov r13d,r11d
3366 add r14d,edx
3367 shrd r13d,r13d,14
3368 vpxor xmm4,xmm4,xmm5
3369 mov edx,r14d
3370 mov r12d,eax
3371 shrd r14d,r14d,9
3372 vpslld xmm5,xmm5,11
3373 xor r13d,r11d
3374 xor r12d,ebx
3375 shrd r13d,r13d,5
3376 vpxor xmm4,xmm4,xmm6
3377 xor r14d,edx
3378 and r12d,r11d
3379 xor r13d,r11d
3380 vpsrld xmm6,xmm7,10
3381 add ecx,DWORD[20+rsp]
3382 mov edi,edx
3383 xor r12d,ebx
3384 vpxor xmm4,xmm4,xmm5
3385 shrd r14d,r14d,11
3386 xor edi,r8d
3387 add ecx,r12d
3388 vpsrlq xmm7,xmm7,17
3389 shrd r13d,r13d,6
3390 and r15d,edi
3391 xor r14d,edx
3392 vpaddd xmm1,xmm1,xmm4
3393 add ecx,r13d
3394 xor r15d,r8d
3395 shrd r14d,r14d,2
3396 vpxor xmm6,xmm6,xmm7
3397 add r10d,ecx
3398 add ecx,r15d
3399 mov r13d,r10d
3400 vpsrlq xmm7,xmm7,2
3401 add r14d,ecx
3402 shrd r13d,r13d,14
3403 mov ecx,r14d
3404 vpxor xmm6,xmm6,xmm7
3405 mov r12d,r11d
3406 shrd r14d,r14d,9
3407 xor r13d,r10d
3408 vpshufb xmm6,xmm6,xmm8
3409 xor r12d,eax
3410 shrd r13d,r13d,5
3411 xor r14d,ecx
3412 vpaddd xmm1,xmm1,xmm6
3413 and r12d,r10d
3414 xor r13d,r10d
3415 add ebx,DWORD[24+rsp]
3416 vpshufd xmm7,xmm1,80
3417 mov r15d,ecx
3418 xor r12d,eax
3419 shrd r14d,r14d,11
3420 vpsrld xmm6,xmm7,10
3421 xor r15d,edx
3422 add ebx,r12d
3423 shrd r13d,r13d,6
3424 vpsrlq xmm7,xmm7,17
3425 and edi,r15d
3426 xor r14d,ecx
3427 add ebx,r13d
3428 vpxor xmm6,xmm6,xmm7
3429 xor edi,edx
3430 shrd r14d,r14d,2
3431 add r9d,ebx
3432 vpsrlq xmm7,xmm7,2
3433 add ebx,edi
3434 mov r13d,r9d
3435 add r14d,ebx
3436 vpxor xmm6,xmm6,xmm7
3437 shrd r13d,r13d,14
3438 mov ebx,r14d
3439 mov r12d,r10d
3440 vpshufb xmm6,xmm6,xmm9
3441 shrd r14d,r14d,9
3442 xor r13d,r9d
3443 xor r12d,r11d
3444 vpaddd xmm1,xmm1,xmm6
3445 shrd r13d,r13d,5
3446 xor r14d,ebx
3447 and r12d,r9d
3448 vpaddd xmm6,xmm1,XMMWORD[32+rbp]
3449 xor r13d,r9d
3450 add eax,DWORD[28+rsp]
3451 mov edi,ebx
3452 xor r12d,r11d
3453 shrd r14d,r14d,11
3454 xor edi,ecx
3455 add eax,r12d
3456 shrd r13d,r13d,6
3457 and r15d,edi
3458 xor r14d,ebx
3459 add eax,r13d
3460 xor r15d,ecx
3461 shrd r14d,r14d,2
3462 add r8d,eax
3463 add eax,r15d
3464 mov r13d,r8d
3465 add r14d,eax
3466 vmovdqa XMMWORD[16+rsp],xmm6
3467 vpalignr xmm4,xmm3,xmm2,4
3468 shrd r13d,r13d,14
3469 mov eax,r14d
3470 mov r12d,r9d
3471 vpalignr xmm7,xmm1,xmm0,4
3472 shrd r14d,r14d,9
3473 xor r13d,r8d
3474 xor r12d,r10d
3475 vpsrld xmm6,xmm4,7
3476 shrd r13d,r13d,5
3477 xor r14d,eax
3478 and r12d,r8d
3479 vpaddd xmm2,xmm2,xmm7
3480 xor r13d,r8d
3481 add r11d,DWORD[32+rsp]
3482 mov r15d,eax
3483 vpsrld xmm7,xmm4,3
3484 xor r12d,r10d
3485 shrd r14d,r14d,11
3486 xor r15d,ebx
3487 vpslld xmm5,xmm4,14
3488 add r11d,r12d
3489 shrd r13d,r13d,6
3490 and edi,r15d
3491 vpxor xmm4,xmm7,xmm6
3492 xor r14d,eax
3493 add r11d,r13d
3494 xor edi,ebx
3495 vpshufd xmm7,xmm1,250
3496 shrd r14d,r14d,2
3497 add edx,r11d
3498 add r11d,edi
3499 vpsrld xmm6,xmm6,11
3500 mov r13d,edx
3501 add r14d,r11d
3502 shrd r13d,r13d,14
3503 vpxor xmm4,xmm4,xmm5
3504 mov r11d,r14d
3505 mov r12d,r8d
3506 shrd r14d,r14d,9
3507 vpslld xmm5,xmm5,11
3508 xor r13d,edx
3509 xor r12d,r9d
3510 shrd r13d,r13d,5
3511 vpxor xmm4,xmm4,xmm6
3512 xor r14d,r11d
3513 and r12d,edx
3514 xor r13d,edx
3515 vpsrld xmm6,xmm7,10
3516 add r10d,DWORD[36+rsp]
3517 mov edi,r11d
3518 xor r12d,r9d
3519 vpxor xmm4,xmm4,xmm5
3520 shrd r14d,r14d,11
3521 xor edi,eax
3522 add r10d,r12d
3523 vpsrlq xmm7,xmm7,17
3524 shrd r13d,r13d,6
3525 and r15d,edi
3526 xor r14d,r11d
3527 vpaddd xmm2,xmm2,xmm4
3528 add r10d,r13d
3529 xor r15d,eax
3530 shrd r14d,r14d,2
3531 vpxor xmm6,xmm6,xmm7
3532 add ecx,r10d
3533 add r10d,r15d
3534 mov r13d,ecx
3535 vpsrlq xmm7,xmm7,2
3536 add r14d,r10d
3537 shrd r13d,r13d,14
3538 mov r10d,r14d
3539 vpxor xmm6,xmm6,xmm7
3540 mov r12d,edx
3541 shrd r14d,r14d,9
3542 xor r13d,ecx
3543 vpshufb xmm6,xmm6,xmm8
3544 xor r12d,r8d
3545 shrd r13d,r13d,5
3546 xor r14d,r10d
3547 vpaddd xmm2,xmm2,xmm6
3548 and r12d,ecx
3549 xor r13d,ecx
3550 add r9d,DWORD[40+rsp]
3551 vpshufd xmm7,xmm2,80
3552 mov r15d,r10d
3553 xor r12d,r8d
3554 shrd r14d,r14d,11
3555 vpsrld xmm6,xmm7,10
3556 xor r15d,r11d
3557 add r9d,r12d
3558 shrd r13d,r13d,6
3559 vpsrlq xmm7,xmm7,17
3560 and edi,r15d
3561 xor r14d,r10d
3562 add r9d,r13d
3563 vpxor xmm6,xmm6,xmm7
3564 xor edi,r11d
3565 shrd r14d,r14d,2
3566 add ebx,r9d
3567 vpsrlq xmm7,xmm7,2
3568 add r9d,edi
3569 mov r13d,ebx
3570 add r14d,r9d
3571 vpxor xmm6,xmm6,xmm7
3572 shrd r13d,r13d,14
3573 mov r9d,r14d
3574 mov r12d,ecx
3575 vpshufb xmm6,xmm6,xmm9
3576 shrd r14d,r14d,9
3577 xor r13d,ebx
3578 xor r12d,edx
3579 vpaddd xmm2,xmm2,xmm6
3580 shrd r13d,r13d,5
3581 xor r14d,r9d
3582 and r12d,ebx
3583 vpaddd xmm6,xmm2,XMMWORD[64+rbp]
3584 xor r13d,ebx
3585 add r8d,DWORD[44+rsp]
3586 mov edi,r9d
3587 xor r12d,edx
3588 shrd r14d,r14d,11
3589 xor edi,r10d
3590 add r8d,r12d
3591 shrd r13d,r13d,6
3592 and r15d,edi
3593 xor r14d,r9d
3594 add r8d,r13d
3595 xor r15d,r10d
3596 shrd r14d,r14d,2
3597 add eax,r8d
3598 add r8d,r15d
3599 mov r13d,eax
3600 add r14d,r8d
3601 vmovdqa XMMWORD[32+rsp],xmm6
3602 vpalignr xmm4,xmm0,xmm3,4
3603 shrd r13d,r13d,14
3604 mov r8d,r14d
3605 mov r12d,ebx
3606 vpalignr xmm7,xmm2,xmm1,4
3607 shrd r14d,r14d,9
3608 xor r13d,eax
3609 xor r12d,ecx
3610 vpsrld xmm6,xmm4,7
3611 shrd r13d,r13d,5
3612 xor r14d,r8d
3613 and r12d,eax
3614 vpaddd xmm3,xmm3,xmm7
3615 xor r13d,eax
3616 add edx,DWORD[48+rsp]
3617 mov r15d,r8d
3618 vpsrld xmm7,xmm4,3
3619 xor r12d,ecx
3620 shrd r14d,r14d,11
3621 xor r15d,r9d
3622 vpslld xmm5,xmm4,14
3623 add edx,r12d
3624 shrd r13d,r13d,6
3625 and edi,r15d
3626 vpxor xmm4,xmm7,xmm6
3627 xor r14d,r8d
3628 add edx,r13d
3629 xor edi,r9d
3630 vpshufd xmm7,xmm2,250
3631 shrd r14d,r14d,2
3632 add r11d,edx
3633 add edx,edi
3634 vpsrld xmm6,xmm6,11
3635 mov r13d,r11d
3636 add r14d,edx
3637 shrd r13d,r13d,14
3638 vpxor xmm4,xmm4,xmm5
3639 mov edx,r14d
3640 mov r12d,eax
3641 shrd r14d,r14d,9
3642 vpslld xmm5,xmm5,11
3643 xor r13d,r11d
3644 xor r12d,ebx
3645 shrd r13d,r13d,5
3646 vpxor xmm4,xmm4,xmm6
3647 xor r14d,edx
3648 and r12d,r11d
3649 xor r13d,r11d
3650 vpsrld xmm6,xmm7,10
3651 add ecx,DWORD[52+rsp]
3652 mov edi,edx
3653 xor r12d,ebx
3654 vpxor xmm4,xmm4,xmm5
3655 shrd r14d,r14d,11
3656 xor edi,r8d
3657 add ecx,r12d
3658 vpsrlq xmm7,xmm7,17
3659 shrd r13d,r13d,6
3660 and r15d,edi
3661 xor r14d,edx
3662 vpaddd xmm3,xmm3,xmm4
3663 add ecx,r13d
3664 xor r15d,r8d
3665 shrd r14d,r14d,2
3666 vpxor xmm6,xmm6,xmm7
3667 add r10d,ecx
3668 add ecx,r15d
3669 mov r13d,r10d
3670 vpsrlq xmm7,xmm7,2
3671 add r14d,ecx
3672 shrd r13d,r13d,14
3673 mov ecx,r14d
3674 vpxor xmm6,xmm6,xmm7
3675 mov r12d,r11d
3676 shrd r14d,r14d,9
3677 xor r13d,r10d
3678 vpshufb xmm6,xmm6,xmm8
3679 xor r12d,eax
3680 shrd r13d,r13d,5
3681 xor r14d,ecx
3682 vpaddd xmm3,xmm3,xmm6
3683 and r12d,r10d
3684 xor r13d,r10d
3685 add ebx,DWORD[56+rsp]
3686 vpshufd xmm7,xmm3,80
3687 mov r15d,ecx
3688 xor r12d,eax
3689 shrd r14d,r14d,11
3690 vpsrld xmm6,xmm7,10
3691 xor r15d,edx
3692 add ebx,r12d
3693 shrd r13d,r13d,6
3694 vpsrlq xmm7,xmm7,17
3695 and edi,r15d
3696 xor r14d,ecx
3697 add ebx,r13d
3698 vpxor xmm6,xmm6,xmm7
3699 xor edi,edx
3700 shrd r14d,r14d,2
3701 add r9d,ebx
3702 vpsrlq xmm7,xmm7,2
3703 add ebx,edi
3704 mov r13d,r9d
3705 add r14d,ebx
3706 vpxor xmm6,xmm6,xmm7
3707 shrd r13d,r13d,14
3708 mov ebx,r14d
3709 mov r12d,r10d
3710 vpshufb xmm6,xmm6,xmm9
3711 shrd r14d,r14d,9
3712 xor r13d,r9d
3713 xor r12d,r11d
3714 vpaddd xmm3,xmm3,xmm6
3715 shrd r13d,r13d,5
3716 xor r14d,ebx
3717 and r12d,r9d
3718 vpaddd xmm6,xmm3,XMMWORD[96+rbp]
3719 xor r13d,r9d
3720 add eax,DWORD[60+rsp]
3721 mov edi,ebx
3722 xor r12d,r11d
3723 shrd r14d,r14d,11
3724 xor edi,ecx
3725 add eax,r12d
3726 shrd r13d,r13d,6
3727 and r15d,edi
3728 xor r14d,ebx
3729 add eax,r13d
3730 xor r15d,ecx
3731 shrd r14d,r14d,2
3732 add r8d,eax
3733 add eax,r15d
3734 mov r13d,r8d
3735 add r14d,eax
3736 vmovdqa XMMWORD[48+rsp],xmm6
3737 cmp BYTE[131+rbp],0
3738 jne NEAR $L$avx_00_47
3739 shrd r13d,r13d,14
3740 mov eax,r14d
3741 mov r12d,r9d
3742 shrd r14d,r14d,9
3743 xor r13d,r8d
3744 xor r12d,r10d
3745 shrd r13d,r13d,5
3746 xor r14d,eax
3747 and r12d,r8d
3748 xor r13d,r8d
3749 add r11d,DWORD[rsp]
3750 mov r15d,eax
3751 xor r12d,r10d
3752 shrd r14d,r14d,11
3753 xor r15d,ebx
3754 add r11d,r12d
3755 shrd r13d,r13d,6
3756 and edi,r15d
3757 xor r14d,eax
3758 add r11d,r13d
3759 xor edi,ebx
3760 shrd r14d,r14d,2
3761 add edx,r11d
3762 add r11d,edi
3763 mov r13d,edx
3764 add r14d,r11d
3765 shrd r13d,r13d,14
3766 mov r11d,r14d
3767 mov r12d,r8d
3768 shrd r14d,r14d,9
3769 xor r13d,edx
3770 xor r12d,r9d
3771 shrd r13d,r13d,5
3772 xor r14d,r11d
3773 and r12d,edx
3774 xor r13d,edx
3775 add r10d,DWORD[4+rsp]
3776 mov edi,r11d
3777 xor r12d,r9d
3778 shrd r14d,r14d,11
3779 xor edi,eax
3780 add r10d,r12d
3781 shrd r13d,r13d,6
3782 and r15d,edi
3783 xor r14d,r11d
3784 add r10d,r13d
3785 xor r15d,eax
3786 shrd r14d,r14d,2
3787 add ecx,r10d
3788 add r10d,r15d
3789 mov r13d,ecx
3790 add r14d,r10d
3791 shrd r13d,r13d,14
3792 mov r10d,r14d
3793 mov r12d,edx
3794 shrd r14d,r14d,9
3795 xor r13d,ecx
3796 xor r12d,r8d
3797 shrd r13d,r13d,5
3798 xor r14d,r10d
3799 and r12d,ecx
3800 xor r13d,ecx
3801 add r9d,DWORD[8+rsp]
3802 mov r15d,r10d
3803 xor r12d,r8d
3804 shrd r14d,r14d,11
3805 xor r15d,r11d
3806 add r9d,r12d
3807 shrd r13d,r13d,6
3808 and edi,r15d
3809 xor r14d,r10d
3810 add r9d,r13d
3811 xor edi,r11d
3812 shrd r14d,r14d,2
3813 add ebx,r9d
3814 add r9d,edi
3815 mov r13d,ebx
3816 add r14d,r9d
3817 shrd r13d,r13d,14
3818 mov r9d,r14d
3819 mov r12d,ecx
3820 shrd r14d,r14d,9
3821 xor r13d,ebx
3822 xor r12d,edx
3823 shrd r13d,r13d,5
3824 xor r14d,r9d
3825 and r12d,ebx
3826 xor r13d,ebx
3827 add r8d,DWORD[12+rsp]
3828 mov edi,r9d
3829 xor r12d,edx
3830 shrd r14d,r14d,11
3831 xor edi,r10d
3832 add r8d,r12d
3833 shrd r13d,r13d,6
3834 and r15d,edi
3835 xor r14d,r9d
3836 add r8d,r13d
3837 xor r15d,r10d
3838 shrd r14d,r14d,2
3839 add eax,r8d
3840 add r8d,r15d
3841 mov r13d,eax
3842 add r14d,r8d
3843 shrd r13d,r13d,14
3844 mov r8d,r14d
3845 mov r12d,ebx
3846 shrd r14d,r14d,9
3847 xor r13d,eax
3848 xor r12d,ecx
3849 shrd r13d,r13d,5
3850 xor r14d,r8d
3851 and r12d,eax
3852 xor r13d,eax
3853 add edx,DWORD[16+rsp]
3854 mov r15d,r8d
3855 xor r12d,ecx
3856 shrd r14d,r14d,11
3857 xor r15d,r9d
3858 add edx,r12d
3859 shrd r13d,r13d,6
3860 and edi,r15d
3861 xor r14d,r8d
3862 add edx,r13d
3863 xor edi,r9d
3864 shrd r14d,r14d,2
3865 add r11d,edx
3866 add edx,edi
3867 mov r13d,r11d
3868 add r14d,edx
3869 shrd r13d,r13d,14
3870 mov edx,r14d
3871 mov r12d,eax
3872 shrd r14d,r14d,9
3873 xor r13d,r11d
3874 xor r12d,ebx
3875 shrd r13d,r13d,5
3876 xor r14d,edx
3877 and r12d,r11d
3878 xor r13d,r11d
3879 add ecx,DWORD[20+rsp]
3880 mov edi,edx
3881 xor r12d,ebx
3882 shrd r14d,r14d,11
3883 xor edi,r8d
3884 add ecx,r12d
3885 shrd r13d,r13d,6
3886 and r15d,edi
3887 xor r14d,edx
3888 add ecx,r13d
3889 xor r15d,r8d
3890 shrd r14d,r14d,2
3891 add r10d,ecx
3892 add ecx,r15d
3893 mov r13d,r10d
3894 add r14d,ecx
3895 shrd r13d,r13d,14
3896 mov ecx,r14d
3897 mov r12d,r11d
3898 shrd r14d,r14d,9
3899 xor r13d,r10d
3900 xor r12d,eax
3901 shrd r13d,r13d,5
3902 xor r14d,ecx
3903 and r12d,r10d
3904 xor r13d,r10d
3905 add ebx,DWORD[24+rsp]
3906 mov r15d,ecx
3907 xor r12d,eax
3908 shrd r14d,r14d,11
3909 xor r15d,edx
3910 add ebx,r12d
3911 shrd r13d,r13d,6
3912 and edi,r15d
3913 xor r14d,ecx
3914 add ebx,r13d
3915 xor edi,edx
3916 shrd r14d,r14d,2
3917 add r9d,ebx
3918 add ebx,edi
3919 mov r13d,r9d
3920 add r14d,ebx
3921 shrd r13d,r13d,14
3922 mov ebx,r14d
3923 mov r12d,r10d
3924 shrd r14d,r14d,9
3925 xor r13d,r9d
3926 xor r12d,r11d
3927 shrd r13d,r13d,5
3928 xor r14d,ebx
3929 and r12d,r9d
3930 xor r13d,r9d
3931 add eax,DWORD[28+rsp]
3932 mov edi,ebx
3933 xor r12d,r11d
3934 shrd r14d,r14d,11
3935 xor edi,ecx
3936 add eax,r12d
3937 shrd r13d,r13d,6
3938 and r15d,edi
3939 xor r14d,ebx
3940 add eax,r13d
3941 xor r15d,ecx
3942 shrd r14d,r14d,2
3943 add r8d,eax
3944 add eax,r15d
3945 mov r13d,r8d
3946 add r14d,eax
3947 shrd r13d,r13d,14
3948 mov eax,r14d
3949 mov r12d,r9d
3950 shrd r14d,r14d,9
3951 xor r13d,r8d
3952 xor r12d,r10d
3953 shrd r13d,r13d,5
3954 xor r14d,eax
3955 and r12d,r8d
3956 xor r13d,r8d
3957 add r11d,DWORD[32+rsp]
3958 mov r15d,eax
3959 xor r12d,r10d
3960 shrd r14d,r14d,11
3961 xor r15d,ebx
3962 add r11d,r12d
3963 shrd r13d,r13d,6
3964 and edi,r15d
3965 xor r14d,eax
3966 add r11d,r13d
3967 xor edi,ebx
3968 shrd r14d,r14d,2
3969 add edx,r11d
3970 add r11d,edi
3971 mov r13d,edx
3972 add r14d,r11d
3973 shrd r13d,r13d,14
3974 mov r11d,r14d
3975 mov r12d,r8d
3976 shrd r14d,r14d,9
3977 xor r13d,edx
3978 xor r12d,r9d
3979 shrd r13d,r13d,5
3980 xor r14d,r11d
3981 and r12d,edx
3982 xor r13d,edx
3983 add r10d,DWORD[36+rsp]
3984 mov edi,r11d
3985 xor r12d,r9d
3986 shrd r14d,r14d,11
3987 xor edi,eax
3988 add r10d,r12d
3989 shrd r13d,r13d,6
3990 and r15d,edi
3991 xor r14d,r11d
3992 add r10d,r13d
3993 xor r15d,eax
3994 shrd r14d,r14d,2
3995 add ecx,r10d
3996 add r10d,r15d
3997 mov r13d,ecx
3998 add r14d,r10d
3999 shrd r13d,r13d,14
4000 mov r10d,r14d
4001 mov r12d,edx
4002 shrd r14d,r14d,9
4003 xor r13d,ecx
4004 xor r12d,r8d
4005 shrd r13d,r13d,5
4006 xor r14d,r10d
4007 and r12d,ecx
4008 xor r13d,ecx
4009 add r9d,DWORD[40+rsp]
4010 mov r15d,r10d
4011 xor r12d,r8d
4012 shrd r14d,r14d,11
4013 xor r15d,r11d
4014 add r9d,r12d
4015 shrd r13d,r13d,6
4016 and edi,r15d
4017 xor r14d,r10d
4018 add r9d,r13d
4019 xor edi,r11d
4020 shrd r14d,r14d,2
4021 add ebx,r9d
4022 add r9d,edi
4023 mov r13d,ebx
4024 add r14d,r9d
4025 shrd r13d,r13d,14
4026 mov r9d,r14d
4027 mov r12d,ecx
4028 shrd r14d,r14d,9
4029 xor r13d,ebx
4030 xor r12d,edx
4031 shrd r13d,r13d,5
4032 xor r14d,r9d
4033 and r12d,ebx
4034 xor r13d,ebx
4035 add r8d,DWORD[44+rsp]
4036 mov edi,r9d
4037 xor r12d,edx
4038 shrd r14d,r14d,11
4039 xor edi,r10d
4040 add r8d,r12d
4041 shrd r13d,r13d,6
4042 and r15d,edi
4043 xor r14d,r9d
4044 add r8d,r13d
4045 xor r15d,r10d
4046 shrd r14d,r14d,2
4047 add eax,r8d
4048 add r8d,r15d
4049 mov r13d,eax
4050 add r14d,r8d
4051 shrd r13d,r13d,14
4052 mov r8d,r14d
4053 mov r12d,ebx
4054 shrd r14d,r14d,9
4055 xor r13d,eax
4056 xor r12d,ecx
4057 shrd r13d,r13d,5
4058 xor r14d,r8d
4059 and r12d,eax
4060 xor r13d,eax
4061 add edx,DWORD[48+rsp]
4062 mov r15d,r8d
4063 xor r12d,ecx
4064 shrd r14d,r14d,11
4065 xor r15d,r9d
4066 add edx,r12d
4067 shrd r13d,r13d,6
4068 and edi,r15d
4069 xor r14d,r8d
4070 add edx,r13d
4071 xor edi,r9d
4072 shrd r14d,r14d,2
4073 add r11d,edx
4074 add edx,edi
4075 mov r13d,r11d
4076 add r14d,edx
4077 shrd r13d,r13d,14
4078 mov edx,r14d
4079 mov r12d,eax
4080 shrd r14d,r14d,9
4081 xor r13d,r11d
4082 xor r12d,ebx
4083 shrd r13d,r13d,5
4084 xor r14d,edx
4085 and r12d,r11d
4086 xor r13d,r11d
4087 add ecx,DWORD[52+rsp]
4088 mov edi,edx
4089 xor r12d,ebx
4090 shrd r14d,r14d,11
4091 xor edi,r8d
4092 add ecx,r12d
4093 shrd r13d,r13d,6
4094 and r15d,edi
4095 xor r14d,edx
4096 add ecx,r13d
4097 xor r15d,r8d
4098 shrd r14d,r14d,2
4099 add r10d,ecx
4100 add ecx,r15d
4101 mov r13d,r10d
4102 add r14d,ecx
4103 shrd r13d,r13d,14
4104 mov ecx,r14d
4105 mov r12d,r11d
4106 shrd r14d,r14d,9
4107 xor r13d,r10d
4108 xor r12d,eax
4109 shrd r13d,r13d,5
4110 xor r14d,ecx
4111 and r12d,r10d
4112 xor r13d,r10d
4113 add ebx,DWORD[56+rsp]
4114 mov r15d,ecx
4115 xor r12d,eax
4116 shrd r14d,r14d,11
4117 xor r15d,edx
4118 add ebx,r12d
4119 shrd r13d,r13d,6
4120 and edi,r15d
4121 xor r14d,ecx
4122 add ebx,r13d
4123 xor edi,edx
4124 shrd r14d,r14d,2
4125 add r9d,ebx
4126 add ebx,edi
4127 mov r13d,r9d
4128 add r14d,ebx
4129 shrd r13d,r13d,14
4130 mov ebx,r14d
4131 mov r12d,r10d
4132 shrd r14d,r14d,9
4133 xor r13d,r9d
4134 xor r12d,r11d
4135 shrd r13d,r13d,5
4136 xor r14d,ebx
4137 and r12d,r9d
4138 xor r13d,r9d
4139 add eax,DWORD[60+rsp]
4140 mov edi,ebx
4141 xor r12d,r11d
4142 shrd r14d,r14d,11
4143 xor edi,ecx
4144 add eax,r12d
4145 shrd r13d,r13d,6
4146 and r15d,edi
4147 xor r14d,ebx
4148 add eax,r13d
4149 xor r15d,ecx
4150 shrd r14d,r14d,2
4151 add r8d,eax
4152 add eax,r15d
4153 mov r13d,r8d
4154 add r14d,eax
4155 mov rdi,QWORD[((64+0))+rsp]
4156 mov eax,r14d
4157
4158 add eax,DWORD[rdi]
4159 lea rsi,[64+rsi]
4160 add ebx,DWORD[4+rdi]
4161 add ecx,DWORD[8+rdi]
4162 add edx,DWORD[12+rdi]
4163 add r8d,DWORD[16+rdi]
4164 add r9d,DWORD[20+rdi]
4165 add r10d,DWORD[24+rdi]
4166 add r11d,DWORD[28+rdi]
4167
4168 cmp rsi,QWORD[((64+16))+rsp]
4169
4170 mov DWORD[rdi],eax
4171 mov DWORD[4+rdi],ebx
4172 mov DWORD[8+rdi],ecx
4173 mov DWORD[12+rdi],edx
4174 mov DWORD[16+rdi],r8d
4175 mov DWORD[20+rdi],r9d
4176 mov DWORD[24+rdi],r10d
4177 mov DWORD[28+rdi],r11d
4178 jb NEAR $L$loop_avx
4179
4180 mov rsi,QWORD[((64+24))+rsp]
4181 vzeroupper
4182 movaps xmm6,XMMWORD[((64+32))+rsp]
4183 movaps xmm7,XMMWORD[((64+48))+rsp]
4184 movaps xmm8,XMMWORD[((64+64))+rsp]
4185 movaps xmm9,XMMWORD[((64+80))+rsp]
4186 mov r15,QWORD[rsi]
4187 mov r14,QWORD[8+rsi]
4188 mov r13,QWORD[16+rsi]
4189 mov r12,QWORD[24+rsi]
4190 mov rbp,QWORD[32+rsi]
4191 mov rbx,QWORD[40+rsi]
4192 lea rsp,[48+rsi]
4193$L$epilogue_avx:
4194 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4195 mov rsi,QWORD[16+rsp]
4196 DB 0F3h,0C3h ;repret
4197$L$SEH_end_sha256_block_data_order_avx:
4198
4199ALIGN 64
4200sha256_block_data_order_avx2:
4201 mov QWORD[8+rsp],rdi ;WIN64 prologue
4202 mov QWORD[16+rsp],rsi
4203 mov rax,rsp
4204$L$SEH_begin_sha256_block_data_order_avx2:
4205 mov rdi,rcx
4206 mov rsi,rdx
4207 mov rdx,r8
4208
4209
4210$L$avx2_shortcut:
4211 push rbx
4212 push rbp
4213 push r12
4214 push r13
4215 push r14
4216 push r15
4217 mov r11,rsp
4218 sub rsp,608
4219 shl rdx,4
4220 and rsp,-256*4
4221 lea rdx,[rdx*4+rsi]
4222 add rsp,448
4223 mov QWORD[((64+0))+rsp],rdi
4224 mov QWORD[((64+8))+rsp],rsi
4225 mov QWORD[((64+16))+rsp],rdx
4226 mov QWORD[((64+24))+rsp],r11
4227 movaps XMMWORD[(64+32)+rsp],xmm6
4228 movaps XMMWORD[(64+48)+rsp],xmm7
4229 movaps XMMWORD[(64+64)+rsp],xmm8
4230 movaps XMMWORD[(64+80)+rsp],xmm9
4231$L$prologue_avx2:
4232
4233 vzeroupper
4234 sub rsi,-16*4
4235 mov eax,DWORD[rdi]
4236 mov r12,rsi
4237 mov ebx,DWORD[4+rdi]
4238 cmp rsi,rdx
4239 mov ecx,DWORD[8+rdi]
4240 cmove r12,rsp
4241 mov edx,DWORD[12+rdi]
4242 mov r8d,DWORD[16+rdi]
4243 mov r9d,DWORD[20+rdi]
4244 mov r10d,DWORD[24+rdi]
4245 mov r11d,DWORD[28+rdi]
4246 vmovdqa ymm8,YMMWORD[((K256+512+32))]
4247 vmovdqa ymm9,YMMWORD[((K256+512+64))]
4248 jmp NEAR $L$oop_avx2
4249ALIGN 16
4250$L$oop_avx2:
4251 vmovdqa ymm7,YMMWORD[((K256+512))]
4252 vmovdqu xmm0,XMMWORD[((-64+0))+rsi]
4253 vmovdqu xmm1,XMMWORD[((-64+16))+rsi]
4254 vmovdqu xmm2,XMMWORD[((-64+32))+rsi]
4255 vmovdqu xmm3,XMMWORD[((-64+48))+rsi]
4256
4257 vinserti128 ymm0,ymm0,XMMWORD[r12],1
4258 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
4259 vpshufb ymm0,ymm0,ymm7
4260 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
4261 vpshufb ymm1,ymm1,ymm7
4262 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
4263
4264 lea rbp,[K256]
4265 vpshufb ymm2,ymm2,ymm7
4266 vpaddd ymm4,ymm0,YMMWORD[rbp]
4267 vpshufb ymm3,ymm3,ymm7
4268 vpaddd ymm5,ymm1,YMMWORD[32+rbp]
4269 vpaddd ymm6,ymm2,YMMWORD[64+rbp]
4270 vpaddd ymm7,ymm3,YMMWORD[96+rbp]
4271 vmovdqa YMMWORD[rsp],ymm4
4272 xor r14d,r14d
4273 vmovdqa YMMWORD[32+rsp],ymm5
4274 lea rsp,[((-64))+rsp]
4275 mov edi,ebx
4276 vmovdqa YMMWORD[rsp],ymm6
4277 xor edi,ecx
4278 vmovdqa YMMWORD[32+rsp],ymm7
4279 mov r12d,r9d
4280 sub rbp,-16*2*4
4281 jmp NEAR $L$avx2_00_47
4282
4283ALIGN 16
4284$L$avx2_00_47:
4285 lea rsp,[((-64))+rsp]
4286 vpalignr ymm4,ymm1,ymm0,4
4287 add r11d,DWORD[((0+128))+rsp]
4288 and r12d,r8d
4289 rorx r13d,r8d,25
4290 vpalignr ymm7,ymm3,ymm2,4
4291 rorx r15d,r8d,11
4292 lea eax,[r14*1+rax]
4293 lea r11d,[r12*1+r11]
4294 vpsrld ymm6,ymm4,7
4295 andn r12d,r8d,r10d
4296 xor r13d,r15d
4297 rorx r14d,r8d,6
4298 vpaddd ymm0,ymm0,ymm7
4299 lea r11d,[r12*1+r11]
4300 xor r13d,r14d
4301 mov r15d,eax
4302 vpsrld ymm7,ymm4,3
4303 rorx r12d,eax,22
4304 lea r11d,[r13*1+r11]
4305 xor r15d,ebx
4306 vpslld ymm5,ymm4,14
4307 rorx r14d,eax,13
4308 rorx r13d,eax,2
4309 lea edx,[r11*1+rdx]
4310 vpxor ymm4,ymm7,ymm6
4311 and edi,r15d
4312 xor r14d,r12d
4313 xor edi,ebx
4314 vpshufd ymm7,ymm3,250
4315 xor r14d,r13d
4316 lea r11d,[rdi*1+r11]
4317 mov r12d,r8d
4318 vpsrld ymm6,ymm6,11
4319 add r10d,DWORD[((4+128))+rsp]
4320 and r12d,edx
4321 rorx r13d,edx,25
4322 vpxor ymm4,ymm4,ymm5
4323 rorx edi,edx,11
4324 lea r11d,[r14*1+r11]
4325 lea r10d,[r12*1+r10]
4326 vpslld ymm5,ymm5,11
4327 andn r12d,edx,r9d
4328 xor r13d,edi
4329 rorx r14d,edx,6
4330 vpxor ymm4,ymm4,ymm6
4331 lea r10d,[r12*1+r10]
4332 xor r13d,r14d
4333 mov edi,r11d
4334 vpsrld ymm6,ymm7,10
4335 rorx r12d,r11d,22
4336 lea r10d,[r13*1+r10]
4337 xor edi,eax
4338 vpxor ymm4,ymm4,ymm5
4339 rorx r14d,r11d,13
4340 rorx r13d,r11d,2
4341 lea ecx,[r10*1+rcx]
4342 vpsrlq ymm7,ymm7,17
4343 and r15d,edi
4344 xor r14d,r12d
4345 xor r15d,eax
4346 vpaddd ymm0,ymm0,ymm4
4347 xor r14d,r13d
4348 lea r10d,[r15*1+r10]
4349 mov r12d,edx
4350 vpxor ymm6,ymm6,ymm7
4351 add r9d,DWORD[((8+128))+rsp]
4352 and r12d,ecx
4353 rorx r13d,ecx,25
4354 vpsrlq ymm7,ymm7,2
4355 rorx r15d,ecx,11
4356 lea r10d,[r14*1+r10]
4357 lea r9d,[r12*1+r9]
4358 vpxor ymm6,ymm6,ymm7
4359 andn r12d,ecx,r8d
4360 xor r13d,r15d
4361 rorx r14d,ecx,6
4362 vpshufb ymm6,ymm6,ymm8
4363 lea r9d,[r12*1+r9]
4364 xor r13d,r14d
4365 mov r15d,r10d
4366 vpaddd ymm0,ymm0,ymm6
4367 rorx r12d,r10d,22
4368 lea r9d,[r13*1+r9]
4369 xor r15d,r11d
4370 vpshufd ymm7,ymm0,80
4371 rorx r14d,r10d,13
4372 rorx r13d,r10d,2
4373 lea ebx,[r9*1+rbx]
4374 vpsrld ymm6,ymm7,10
4375 and edi,r15d
4376 xor r14d,r12d
4377 xor edi,r11d
4378 vpsrlq ymm7,ymm7,17
4379 xor r14d,r13d
4380 lea r9d,[rdi*1+r9]
4381 mov r12d,ecx
4382 vpxor ymm6,ymm6,ymm7
4383 add r8d,DWORD[((12+128))+rsp]
4384 and r12d,ebx
4385 rorx r13d,ebx,25
4386 vpsrlq ymm7,ymm7,2
4387 rorx edi,ebx,11
4388 lea r9d,[r14*1+r9]
4389 lea r8d,[r12*1+r8]
4390 vpxor ymm6,ymm6,ymm7
4391 andn r12d,ebx,edx
4392 xor r13d,edi
4393 rorx r14d,ebx,6
4394 vpshufb ymm6,ymm6,ymm9
4395 lea r8d,[r12*1+r8]
4396 xor r13d,r14d
4397 mov edi,r9d
4398 vpaddd ymm0,ymm0,ymm6
4399 rorx r12d,r9d,22
4400 lea r8d,[r13*1+r8]
4401 xor edi,r10d
4402 vpaddd ymm6,ymm0,YMMWORD[rbp]
4403 rorx r14d,r9d,13
4404 rorx r13d,r9d,2
4405 lea eax,[r8*1+rax]
4406 and r15d,edi
4407 xor r14d,r12d
4408 xor r15d,r10d
4409 xor r14d,r13d
4410 lea r8d,[r15*1+r8]
4411 mov r12d,ebx
4412 vmovdqa YMMWORD[rsp],ymm6
4413 vpalignr ymm4,ymm2,ymm1,4
4414 add edx,DWORD[((32+128))+rsp]
4415 and r12d,eax
4416 rorx r13d,eax,25
4417 vpalignr ymm7,ymm0,ymm3,4
4418 rorx r15d,eax,11
4419 lea r8d,[r14*1+r8]
4420 lea edx,[r12*1+rdx]
4421 vpsrld ymm6,ymm4,7
4422 andn r12d,eax,ecx
4423 xor r13d,r15d
4424 rorx r14d,eax,6
4425 vpaddd ymm1,ymm1,ymm7
4426 lea edx,[r12*1+rdx]
4427 xor r13d,r14d
4428 mov r15d,r8d
4429 vpsrld ymm7,ymm4,3
4430 rorx r12d,r8d,22
4431 lea edx,[r13*1+rdx]
4432 xor r15d,r9d
4433 vpslld ymm5,ymm4,14
4434 rorx r14d,r8d,13
4435 rorx r13d,r8d,2
4436 lea r11d,[rdx*1+r11]
4437 vpxor ymm4,ymm7,ymm6
4438 and edi,r15d
4439 xor r14d,r12d
4440 xor edi,r9d
4441 vpshufd ymm7,ymm0,250
4442 xor r14d,r13d
4443 lea edx,[rdi*1+rdx]
4444 mov r12d,eax
4445 vpsrld ymm6,ymm6,11
4446 add ecx,DWORD[((36+128))+rsp]
4447 and r12d,r11d
4448 rorx r13d,r11d,25
4449 vpxor ymm4,ymm4,ymm5
4450 rorx edi,r11d,11
4451 lea edx,[r14*1+rdx]
4452 lea ecx,[r12*1+rcx]
4453 vpslld ymm5,ymm5,11
4454 andn r12d,r11d,ebx
4455 xor r13d,edi
4456 rorx r14d,r11d,6
4457 vpxor ymm4,ymm4,ymm6
4458 lea ecx,[r12*1+rcx]
4459 xor r13d,r14d
4460 mov edi,edx
4461 vpsrld ymm6,ymm7,10
4462 rorx r12d,edx,22
4463 lea ecx,[r13*1+rcx]
4464 xor edi,r8d
4465 vpxor ymm4,ymm4,ymm5
4466 rorx r14d,edx,13
4467 rorx r13d,edx,2
4468 lea r10d,[rcx*1+r10]
4469 vpsrlq ymm7,ymm7,17
4470 and r15d,edi
4471 xor r14d,r12d
4472 xor r15d,r8d
4473 vpaddd ymm1,ymm1,ymm4
4474 xor r14d,r13d
4475 lea ecx,[r15*1+rcx]
4476 mov r12d,r11d
4477 vpxor ymm6,ymm6,ymm7
4478 add ebx,DWORD[((40+128))+rsp]
4479 and r12d,r10d
4480 rorx r13d,r10d,25
4481 vpsrlq ymm7,ymm7,2
4482 rorx r15d,r10d,11
4483 lea ecx,[r14*1+rcx]
4484 lea ebx,[r12*1+rbx]
4485 vpxor ymm6,ymm6,ymm7
4486 andn r12d,r10d,eax
4487 xor r13d,r15d
4488 rorx r14d,r10d,6
4489 vpshufb ymm6,ymm6,ymm8
4490 lea ebx,[r12*1+rbx]
4491 xor r13d,r14d
4492 mov r15d,ecx
4493 vpaddd ymm1,ymm1,ymm6
4494 rorx r12d,ecx,22
4495 lea ebx,[r13*1+rbx]
4496 xor r15d,edx
4497 vpshufd ymm7,ymm1,80
4498 rorx r14d,ecx,13
4499 rorx r13d,ecx,2
4500 lea r9d,[rbx*1+r9]
4501 vpsrld ymm6,ymm7,10
4502 and edi,r15d
4503 xor r14d,r12d
4504 xor edi,edx
4505 vpsrlq ymm7,ymm7,17
4506 xor r14d,r13d
4507 lea ebx,[rdi*1+rbx]
4508 mov r12d,r10d
4509 vpxor ymm6,ymm6,ymm7
4510 add eax,DWORD[((44+128))+rsp]
4511 and r12d,r9d
4512 rorx r13d,r9d,25
4513 vpsrlq ymm7,ymm7,2
4514 rorx edi,r9d,11
4515 lea ebx,[r14*1+rbx]
4516 lea eax,[r12*1+rax]
4517 vpxor ymm6,ymm6,ymm7
4518 andn r12d,r9d,r11d
4519 xor r13d,edi
4520 rorx r14d,r9d,6
4521 vpshufb ymm6,ymm6,ymm9
4522 lea eax,[r12*1+rax]
4523 xor r13d,r14d
4524 mov edi,ebx
4525 vpaddd ymm1,ymm1,ymm6
4526 rorx r12d,ebx,22
4527 lea eax,[r13*1+rax]
4528 xor edi,ecx
4529 vpaddd ymm6,ymm1,YMMWORD[32+rbp]
4530 rorx r14d,ebx,13
4531 rorx r13d,ebx,2
4532 lea r8d,[rax*1+r8]
4533 and r15d,edi
4534 xor r14d,r12d
4535 xor r15d,ecx
4536 xor r14d,r13d
4537 lea eax,[r15*1+rax]
4538 mov r12d,r9d
4539 vmovdqa YMMWORD[32+rsp],ymm6
4540 lea rsp,[((-64))+rsp]
4541 vpalignr ymm4,ymm3,ymm2,4
4542 add r11d,DWORD[((0+128))+rsp]
4543 and r12d,r8d
4544 rorx r13d,r8d,25
4545 vpalignr ymm7,ymm1,ymm0,4
4546 rorx r15d,r8d,11
4547 lea eax,[r14*1+rax]
4548 lea r11d,[r12*1+r11]
4549 vpsrld ymm6,ymm4,7
4550 andn r12d,r8d,r10d
4551 xor r13d,r15d
4552 rorx r14d,r8d,6
4553 vpaddd ymm2,ymm2,ymm7
4554 lea r11d,[r12*1+r11]
4555 xor r13d,r14d
4556 mov r15d,eax
4557 vpsrld ymm7,ymm4,3
4558 rorx r12d,eax,22
4559 lea r11d,[r13*1+r11]
4560 xor r15d,ebx
4561 vpslld ymm5,ymm4,14
4562 rorx r14d,eax,13
4563 rorx r13d,eax,2
4564 lea edx,[r11*1+rdx]
4565 vpxor ymm4,ymm7,ymm6
4566 and edi,r15d
4567 xor r14d,r12d
4568 xor edi,ebx
4569 vpshufd ymm7,ymm1,250
4570 xor r14d,r13d
4571 lea r11d,[rdi*1+r11]
4572 mov r12d,r8d
4573 vpsrld ymm6,ymm6,11
4574 add r10d,DWORD[((4+128))+rsp]
4575 and r12d,edx
4576 rorx r13d,edx,25
4577 vpxor ymm4,ymm4,ymm5
4578 rorx edi,edx,11
4579 lea r11d,[r14*1+r11]
4580 lea r10d,[r12*1+r10]
4581 vpslld ymm5,ymm5,11
4582 andn r12d,edx,r9d
4583 xor r13d,edi
4584 rorx r14d,edx,6
4585 vpxor ymm4,ymm4,ymm6
4586 lea r10d,[r12*1+r10]
4587 xor r13d,r14d
4588 mov edi,r11d
4589 vpsrld ymm6,ymm7,10
4590 rorx r12d,r11d,22
4591 lea r10d,[r13*1+r10]
4592 xor edi,eax
4593 vpxor ymm4,ymm4,ymm5
4594 rorx r14d,r11d,13
4595 rorx r13d,r11d,2
4596 lea ecx,[r10*1+rcx]
4597 vpsrlq ymm7,ymm7,17
4598 and r15d,edi
4599 xor r14d,r12d
4600 xor r15d,eax
4601 vpaddd ymm2,ymm2,ymm4
4602 xor r14d,r13d
4603 lea r10d,[r15*1+r10]
4604 mov r12d,edx
4605 vpxor ymm6,ymm6,ymm7
4606 add r9d,DWORD[((8+128))+rsp]
4607 and r12d,ecx
4608 rorx r13d,ecx,25
4609 vpsrlq ymm7,ymm7,2
4610 rorx r15d,ecx,11
4611 lea r10d,[r14*1+r10]
4612 lea r9d,[r12*1+r9]
4613 vpxor ymm6,ymm6,ymm7
4614 andn r12d,ecx,r8d
4615 xor r13d,r15d
4616 rorx r14d,ecx,6
4617 vpshufb ymm6,ymm6,ymm8
4618 lea r9d,[r12*1+r9]
4619 xor r13d,r14d
4620 mov r15d,r10d
4621 vpaddd ymm2,ymm2,ymm6
4622 rorx r12d,r10d,22
4623 lea r9d,[r13*1+r9]
4624 xor r15d,r11d
4625 vpshufd ymm7,ymm2,80
4626 rorx r14d,r10d,13
4627 rorx r13d,r10d,2
4628 lea ebx,[r9*1+rbx]
4629 vpsrld ymm6,ymm7,10
4630 and edi,r15d
4631 xor r14d,r12d
4632 xor edi,r11d
4633 vpsrlq ymm7,ymm7,17
4634 xor r14d,r13d
4635 lea r9d,[rdi*1+r9]
4636 mov r12d,ecx
4637 vpxor ymm6,ymm6,ymm7
4638 add r8d,DWORD[((12+128))+rsp]
4639 and r12d,ebx
4640 rorx r13d,ebx,25
4641 vpsrlq ymm7,ymm7,2
4642 rorx edi,ebx,11
4643 lea r9d,[r14*1+r9]
4644 lea r8d,[r12*1+r8]
4645 vpxor ymm6,ymm6,ymm7
4646 andn r12d,ebx,edx
4647 xor r13d,edi
4648 rorx r14d,ebx,6
4649 vpshufb ymm6,ymm6,ymm9
4650 lea r8d,[r12*1+r8]
4651 xor r13d,r14d
4652 mov edi,r9d
4653 vpaddd ymm2,ymm2,ymm6
4654 rorx r12d,r9d,22
4655 lea r8d,[r13*1+r8]
4656 xor edi,r10d
4657 vpaddd ymm6,ymm2,YMMWORD[64+rbp]
4658 rorx r14d,r9d,13
4659 rorx r13d,r9d,2
4660 lea eax,[r8*1+rax]
4661 and r15d,edi
4662 xor r14d,r12d
4663 xor r15d,r10d
4664 xor r14d,r13d
4665 lea r8d,[r15*1+r8]
4666 mov r12d,ebx
4667 vmovdqa YMMWORD[rsp],ymm6
4668 vpalignr ymm4,ymm0,ymm3,4
4669 add edx,DWORD[((32+128))+rsp]
4670 and r12d,eax
4671 rorx r13d,eax,25
4672 vpalignr ymm7,ymm2,ymm1,4
4673 rorx r15d,eax,11
4674 lea r8d,[r14*1+r8]
4675 lea edx,[r12*1+rdx]
4676 vpsrld ymm6,ymm4,7
4677 andn r12d,eax,ecx
4678 xor r13d,r15d
4679 rorx r14d,eax,6
4680 vpaddd ymm3,ymm3,ymm7
4681 lea edx,[r12*1+rdx]
4682 xor r13d,r14d
4683 mov r15d,r8d
4684 vpsrld ymm7,ymm4,3
4685 rorx r12d,r8d,22
4686 lea edx,[r13*1+rdx]
4687 xor r15d,r9d
4688 vpslld ymm5,ymm4,14
4689 rorx r14d,r8d,13
4690 rorx r13d,r8d,2
4691 lea r11d,[rdx*1+r11]
4692 vpxor ymm4,ymm7,ymm6
4693 and edi,r15d
4694 xor r14d,r12d
4695 xor edi,r9d
4696 vpshufd ymm7,ymm2,250
4697 xor r14d,r13d
4698 lea edx,[rdi*1+rdx]
4699 mov r12d,eax
4700 vpsrld ymm6,ymm6,11
4701 add ecx,DWORD[((36+128))+rsp]
4702 and r12d,r11d
4703 rorx r13d,r11d,25
4704 vpxor ymm4,ymm4,ymm5
4705 rorx edi,r11d,11
4706 lea edx,[r14*1+rdx]
4707 lea ecx,[r12*1+rcx]
4708 vpslld ymm5,ymm5,11
4709 andn r12d,r11d,ebx
4710 xor r13d,edi
4711 rorx r14d,r11d,6
4712 vpxor ymm4,ymm4,ymm6
4713 lea ecx,[r12*1+rcx]
4714 xor r13d,r14d
4715 mov edi,edx
4716 vpsrld ymm6,ymm7,10
4717 rorx r12d,edx,22
4718 lea ecx,[r13*1+rcx]
4719 xor edi,r8d
4720 vpxor ymm4,ymm4,ymm5
4721 rorx r14d,edx,13
4722 rorx r13d,edx,2
4723 lea r10d,[rcx*1+r10]
4724 vpsrlq ymm7,ymm7,17
4725 and r15d,edi
4726 xor r14d,r12d
4727 xor r15d,r8d
4728 vpaddd ymm3,ymm3,ymm4
4729 xor r14d,r13d
4730 lea ecx,[r15*1+rcx]
4731 mov r12d,r11d
4732 vpxor ymm6,ymm6,ymm7
4733 add ebx,DWORD[((40+128))+rsp]
4734 and r12d,r10d
4735 rorx r13d,r10d,25
4736 vpsrlq ymm7,ymm7,2
4737 rorx r15d,r10d,11
4738 lea ecx,[r14*1+rcx]
4739 lea ebx,[r12*1+rbx]
4740 vpxor ymm6,ymm6,ymm7
4741 andn r12d,r10d,eax
4742 xor r13d,r15d
4743 rorx r14d,r10d,6
4744 vpshufb ymm6,ymm6,ymm8
4745 lea ebx,[r12*1+rbx]
4746 xor r13d,r14d
4747 mov r15d,ecx
4748 vpaddd ymm3,ymm3,ymm6
4749 rorx r12d,ecx,22
4750 lea ebx,[r13*1+rbx]
4751 xor r15d,edx
4752 vpshufd ymm7,ymm3,80
4753 rorx r14d,ecx,13
4754 rorx r13d,ecx,2
4755 lea r9d,[rbx*1+r9]
4756 vpsrld ymm6,ymm7,10
4757 and edi,r15d
4758 xor r14d,r12d
4759 xor edi,edx
4760 vpsrlq ymm7,ymm7,17
4761 xor r14d,r13d
4762 lea ebx,[rdi*1+rbx]
4763 mov r12d,r10d
4764 vpxor ymm6,ymm6,ymm7
4765 add eax,DWORD[((44+128))+rsp]
4766 and r12d,r9d
4767 rorx r13d,r9d,25
4768 vpsrlq ymm7,ymm7,2
4769 rorx edi,r9d,11
4770 lea ebx,[r14*1+rbx]
4771 lea eax,[r12*1+rax]
4772 vpxor ymm6,ymm6,ymm7
4773 andn r12d,r9d,r11d
4774 xor r13d,edi
4775 rorx r14d,r9d,6
4776 vpshufb ymm6,ymm6,ymm9
4777 lea eax,[r12*1+rax]
4778 xor r13d,r14d
4779 mov edi,ebx
4780 vpaddd ymm3,ymm3,ymm6
4781 rorx r12d,ebx,22
4782 lea eax,[r13*1+rax]
4783 xor edi,ecx
4784 vpaddd ymm6,ymm3,YMMWORD[96+rbp]
4785 rorx r14d,ebx,13
4786 rorx r13d,ebx,2
4787 lea r8d,[rax*1+r8]
4788 and r15d,edi
4789 xor r14d,r12d
4790 xor r15d,ecx
4791 xor r14d,r13d
4792 lea eax,[r15*1+rax]
4793 mov r12d,r9d
4794 vmovdqa YMMWORD[32+rsp],ymm6
4795 lea rbp,[128+rbp]
4796 cmp BYTE[3+rbp],0
4797 jne NEAR $L$avx2_00_47
4798 add r11d,DWORD[((0+64))+rsp]
4799 and r12d,r8d
4800 rorx r13d,r8d,25
4801 rorx r15d,r8d,11
4802 lea eax,[r14*1+rax]
4803 lea r11d,[r12*1+r11]
4804 andn r12d,r8d,r10d
4805 xor r13d,r15d
4806 rorx r14d,r8d,6
4807 lea r11d,[r12*1+r11]
4808 xor r13d,r14d
4809 mov r15d,eax
4810 rorx r12d,eax,22
4811 lea r11d,[r13*1+r11]
4812 xor r15d,ebx
4813 rorx r14d,eax,13
4814 rorx r13d,eax,2
4815 lea edx,[r11*1+rdx]
4816 and edi,r15d
4817 xor r14d,r12d
4818 xor edi,ebx
4819 xor r14d,r13d
4820 lea r11d,[rdi*1+r11]
4821 mov r12d,r8d
4822 add r10d,DWORD[((4+64))+rsp]
4823 and r12d,edx
4824 rorx r13d,edx,25
4825 rorx edi,edx,11
4826 lea r11d,[r14*1+r11]
4827 lea r10d,[r12*1+r10]
4828 andn r12d,edx,r9d
4829 xor r13d,edi
4830 rorx r14d,edx,6
4831 lea r10d,[r12*1+r10]
4832 xor r13d,r14d
4833 mov edi,r11d
4834 rorx r12d,r11d,22
4835 lea r10d,[r13*1+r10]
4836 xor edi,eax
4837 rorx r14d,r11d,13
4838 rorx r13d,r11d,2
4839 lea ecx,[r10*1+rcx]
4840 and r15d,edi
4841 xor r14d,r12d
4842 xor r15d,eax
4843 xor r14d,r13d
4844 lea r10d,[r15*1+r10]
4845 mov r12d,edx
4846 add r9d,DWORD[((8+64))+rsp]
4847 and r12d,ecx
4848 rorx r13d,ecx,25
4849 rorx r15d,ecx,11
4850 lea r10d,[r14*1+r10]
4851 lea r9d,[r12*1+r9]
4852 andn r12d,ecx,r8d
4853 xor r13d,r15d
4854 rorx r14d,ecx,6
4855 lea r9d,[r12*1+r9]
4856 xor r13d,r14d
4857 mov r15d,r10d
4858 rorx r12d,r10d,22
4859 lea r9d,[r13*1+r9]
4860 xor r15d,r11d
4861 rorx r14d,r10d,13
4862 rorx r13d,r10d,2
4863 lea ebx,[r9*1+rbx]
4864 and edi,r15d
4865 xor r14d,r12d
4866 xor edi,r11d
4867 xor r14d,r13d
4868 lea r9d,[rdi*1+r9]
4869 mov r12d,ecx
4870 add r8d,DWORD[((12+64))+rsp]
4871 and r12d,ebx
4872 rorx r13d,ebx,25
4873 rorx edi,ebx,11
4874 lea r9d,[r14*1+r9]
4875 lea r8d,[r12*1+r8]
4876 andn r12d,ebx,edx
4877 xor r13d,edi
4878 rorx r14d,ebx,6
4879 lea r8d,[r12*1+r8]
4880 xor r13d,r14d
4881 mov edi,r9d
4882 rorx r12d,r9d,22
4883 lea r8d,[r13*1+r8]
4884 xor edi,r10d
4885 rorx r14d,r9d,13
4886 rorx r13d,r9d,2
4887 lea eax,[r8*1+rax]
4888 and r15d,edi
4889 xor r14d,r12d
4890 xor r15d,r10d
4891 xor r14d,r13d
4892 lea r8d,[r15*1+r8]
4893 mov r12d,ebx
4894 add edx,DWORD[((32+64))+rsp]
4895 and r12d,eax
4896 rorx r13d,eax,25
4897 rorx r15d,eax,11
4898 lea r8d,[r14*1+r8]
4899 lea edx,[r12*1+rdx]
4900 andn r12d,eax,ecx
4901 xor r13d,r15d
4902 rorx r14d,eax,6
4903 lea edx,[r12*1+rdx]
4904 xor r13d,r14d
4905 mov r15d,r8d
4906 rorx r12d,r8d,22
4907 lea edx,[r13*1+rdx]
4908 xor r15d,r9d
4909 rorx r14d,r8d,13
4910 rorx r13d,r8d,2
4911 lea r11d,[rdx*1+r11]
4912 and edi,r15d
4913 xor r14d,r12d
4914 xor edi,r9d
4915 xor r14d,r13d
4916 lea edx,[rdi*1+rdx]
4917 mov r12d,eax
4918 add ecx,DWORD[((36+64))+rsp]
4919 and r12d,r11d
4920 rorx r13d,r11d,25
4921 rorx edi,r11d,11
4922 lea edx,[r14*1+rdx]
4923 lea ecx,[r12*1+rcx]
4924 andn r12d,r11d,ebx
4925 xor r13d,edi
4926 rorx r14d,r11d,6
4927 lea ecx,[r12*1+rcx]
4928 xor r13d,r14d
4929 mov edi,edx
4930 rorx r12d,edx,22
4931 lea ecx,[r13*1+rcx]
4932 xor edi,r8d
4933 rorx r14d,edx,13
4934 rorx r13d,edx,2
4935 lea r10d,[rcx*1+r10]
4936 and r15d,edi
4937 xor r14d,r12d
4938 xor r15d,r8d
4939 xor r14d,r13d
4940 lea ecx,[r15*1+rcx]
4941 mov r12d,r11d
4942 add ebx,DWORD[((40+64))+rsp]
4943 and r12d,r10d
4944 rorx r13d,r10d,25
4945 rorx r15d,r10d,11
4946 lea ecx,[r14*1+rcx]
4947 lea ebx,[r12*1+rbx]
4948 andn r12d,r10d,eax
4949 xor r13d,r15d
4950 rorx r14d,r10d,6
4951 lea ebx,[r12*1+rbx]
4952 xor r13d,r14d
4953 mov r15d,ecx
4954 rorx r12d,ecx,22
4955 lea ebx,[r13*1+rbx]
4956 xor r15d,edx
4957 rorx r14d,ecx,13
4958 rorx r13d,ecx,2
4959 lea r9d,[rbx*1+r9]
4960 and edi,r15d
4961 xor r14d,r12d
4962 xor edi,edx
4963 xor r14d,r13d
4964 lea ebx,[rdi*1+rbx]
4965 mov r12d,r10d
4966 add eax,DWORD[((44+64))+rsp]
4967 and r12d,r9d
4968 rorx r13d,r9d,25
4969 rorx edi,r9d,11
4970 lea ebx,[r14*1+rbx]
4971 lea eax,[r12*1+rax]
4972 andn r12d,r9d,r11d
4973 xor r13d,edi
4974 rorx r14d,r9d,6
4975 lea eax,[r12*1+rax]
4976 xor r13d,r14d
4977 mov edi,ebx
4978 rorx r12d,ebx,22
4979 lea eax,[r13*1+rax]
4980 xor edi,ecx
4981 rorx r14d,ebx,13
4982 rorx r13d,ebx,2
4983 lea r8d,[rax*1+r8]
4984 and r15d,edi
4985 xor r14d,r12d
4986 xor r15d,ecx
4987 xor r14d,r13d
4988 lea eax,[r15*1+rax]
4989 mov r12d,r9d
4990 add r11d,DWORD[rsp]
4991 and r12d,r8d
4992 rorx r13d,r8d,25
4993 rorx r15d,r8d,11
4994 lea eax,[r14*1+rax]
4995 lea r11d,[r12*1+r11]
4996 andn r12d,r8d,r10d
4997 xor r13d,r15d
4998 rorx r14d,r8d,6
4999 lea r11d,[r12*1+r11]
5000 xor r13d,r14d
5001 mov r15d,eax
5002 rorx r12d,eax,22
5003 lea r11d,[r13*1+r11]
5004 xor r15d,ebx
5005 rorx r14d,eax,13
5006 rorx r13d,eax,2
5007 lea edx,[r11*1+rdx]
5008 and edi,r15d
5009 xor r14d,r12d
5010 xor edi,ebx
5011 xor r14d,r13d
5012 lea r11d,[rdi*1+r11]
5013 mov r12d,r8d
5014 add r10d,DWORD[4+rsp]
5015 and r12d,edx
5016 rorx r13d,edx,25
5017 rorx edi,edx,11
5018 lea r11d,[r14*1+r11]
5019 lea r10d,[r12*1+r10]
5020 andn r12d,edx,r9d
5021 xor r13d,edi
5022 rorx r14d,edx,6
5023 lea r10d,[r12*1+r10]
5024 xor r13d,r14d
5025 mov edi,r11d
5026 rorx r12d,r11d,22
5027 lea r10d,[r13*1+r10]
5028 xor edi,eax
5029 rorx r14d,r11d,13
5030 rorx r13d,r11d,2
5031 lea ecx,[r10*1+rcx]
5032 and r15d,edi
5033 xor r14d,r12d
5034 xor r15d,eax
5035 xor r14d,r13d
5036 lea r10d,[r15*1+r10]
5037 mov r12d,edx
5038 add r9d,DWORD[8+rsp]
5039 and r12d,ecx
5040 rorx r13d,ecx,25
5041 rorx r15d,ecx,11
5042 lea r10d,[r14*1+r10]
5043 lea r9d,[r12*1+r9]
5044 andn r12d,ecx,r8d
5045 xor r13d,r15d
5046 rorx r14d,ecx,6
5047 lea r9d,[r12*1+r9]
5048 xor r13d,r14d
5049 mov r15d,r10d
5050 rorx r12d,r10d,22
5051 lea r9d,[r13*1+r9]
5052 xor r15d,r11d
5053 rorx r14d,r10d,13
5054 rorx r13d,r10d,2
5055 lea ebx,[r9*1+rbx]
5056 and edi,r15d
5057 xor r14d,r12d
5058 xor edi,r11d
5059 xor r14d,r13d
5060 lea r9d,[rdi*1+r9]
5061 mov r12d,ecx
5062 add r8d,DWORD[12+rsp]
5063 and r12d,ebx
5064 rorx r13d,ebx,25
5065 rorx edi,ebx,11
5066 lea r9d,[r14*1+r9]
5067 lea r8d,[r12*1+r8]
5068 andn r12d,ebx,edx
5069 xor r13d,edi
5070 rorx r14d,ebx,6
5071 lea r8d,[r12*1+r8]
5072 xor r13d,r14d
5073 mov edi,r9d
5074 rorx r12d,r9d,22
5075 lea r8d,[r13*1+r8]
5076 xor edi,r10d
5077 rorx r14d,r9d,13
5078 rorx r13d,r9d,2
5079 lea eax,[r8*1+rax]
5080 and r15d,edi
5081 xor r14d,r12d
5082 xor r15d,r10d
5083 xor r14d,r13d
5084 lea r8d,[r15*1+r8]
5085 mov r12d,ebx
5086 add edx,DWORD[32+rsp]
5087 and r12d,eax
5088 rorx r13d,eax,25
5089 rorx r15d,eax,11
5090 lea r8d,[r14*1+r8]
5091 lea edx,[r12*1+rdx]
5092 andn r12d,eax,ecx
5093 xor r13d,r15d
5094 rorx r14d,eax,6
5095 lea edx,[r12*1+rdx]
5096 xor r13d,r14d
5097 mov r15d,r8d
5098 rorx r12d,r8d,22
5099 lea edx,[r13*1+rdx]
5100 xor r15d,r9d
5101 rorx r14d,r8d,13
5102 rorx r13d,r8d,2
5103 lea r11d,[rdx*1+r11]
5104 and edi,r15d
5105 xor r14d,r12d
5106 xor edi,r9d
5107 xor r14d,r13d
5108 lea edx,[rdi*1+rdx]
5109 mov r12d,eax
5110 add ecx,DWORD[36+rsp]
5111 and r12d,r11d
5112 rorx r13d,r11d,25
5113 rorx edi,r11d,11
5114 lea edx,[r14*1+rdx]
5115 lea ecx,[r12*1+rcx]
5116 andn r12d,r11d,ebx
5117 xor r13d,edi
5118 rorx r14d,r11d,6
5119 lea ecx,[r12*1+rcx]
5120 xor r13d,r14d
5121 mov edi,edx
5122 rorx r12d,edx,22
5123 lea ecx,[r13*1+rcx]
5124 xor edi,r8d
5125 rorx r14d,edx,13
5126 rorx r13d,edx,2
5127 lea r10d,[rcx*1+r10]
5128 and r15d,edi
5129 xor r14d,r12d
5130 xor r15d,r8d
5131 xor r14d,r13d
5132 lea ecx,[r15*1+rcx]
5133 mov r12d,r11d
5134 add ebx,DWORD[40+rsp]
5135 and r12d,r10d
5136 rorx r13d,r10d,25
5137 rorx r15d,r10d,11
5138 lea ecx,[r14*1+rcx]
5139 lea ebx,[r12*1+rbx]
5140 andn r12d,r10d,eax
5141 xor r13d,r15d
5142 rorx r14d,r10d,6
5143 lea ebx,[r12*1+rbx]
5144 xor r13d,r14d
5145 mov r15d,ecx
5146 rorx r12d,ecx,22
5147 lea ebx,[r13*1+rbx]
5148 xor r15d,edx
5149 rorx r14d,ecx,13
5150 rorx r13d,ecx,2
5151 lea r9d,[rbx*1+r9]
5152 and edi,r15d
5153 xor r14d,r12d
5154 xor edi,edx
5155 xor r14d,r13d
5156 lea ebx,[rdi*1+rbx]
5157 mov r12d,r10d
5158 add eax,DWORD[44+rsp]
5159 and r12d,r9d
5160 rorx r13d,r9d,25
5161 rorx edi,r9d,11
5162 lea ebx,[r14*1+rbx]
5163 lea eax,[r12*1+rax]
5164 andn r12d,r9d,r11d
5165 xor r13d,edi
5166 rorx r14d,r9d,6
5167 lea eax,[r12*1+rax]
5168 xor r13d,r14d
5169 mov edi,ebx
5170 rorx r12d,ebx,22
5171 lea eax,[r13*1+rax]
5172 xor edi,ecx
5173 rorx r14d,ebx,13
5174 rorx r13d,ebx,2
5175 lea r8d,[rax*1+r8]
5176 and r15d,edi
5177 xor r14d,r12d
5178 xor r15d,ecx
5179 xor r14d,r13d
5180 lea eax,[r15*1+rax]
5181 mov r12d,r9d
5182 mov rdi,QWORD[512+rsp]
5183 add eax,r14d
5184
5185 lea rbp,[448+rsp]
5186
5187 add eax,DWORD[rdi]
5188 add ebx,DWORD[4+rdi]
5189 add ecx,DWORD[8+rdi]
5190 add edx,DWORD[12+rdi]
5191 add r8d,DWORD[16+rdi]
5192 add r9d,DWORD[20+rdi]
5193 add r10d,DWORD[24+rdi]
5194 add r11d,DWORD[28+rdi]
5195
5196 mov DWORD[rdi],eax
5197 mov DWORD[4+rdi],ebx
5198 mov DWORD[8+rdi],ecx
5199 mov DWORD[12+rdi],edx
5200 mov DWORD[16+rdi],r8d
5201 mov DWORD[20+rdi],r9d
5202 mov DWORD[24+rdi],r10d
5203 mov DWORD[28+rdi],r11d
5204
5205 cmp rsi,QWORD[80+rbp]
5206 je NEAR $L$done_avx2
5207
5208 xor r14d,r14d
5209 mov edi,ebx
5210 xor edi,ecx
5211 mov r12d,r9d
5212 jmp NEAR $L$ower_avx2
5213ALIGN 16
5214$L$ower_avx2:
5215 add r11d,DWORD[((0+16))+rbp]
5216 and r12d,r8d
5217 rorx r13d,r8d,25
5218 rorx r15d,r8d,11
5219 lea eax,[r14*1+rax]
5220 lea r11d,[r12*1+r11]
5221 andn r12d,r8d,r10d
5222 xor r13d,r15d
5223 rorx r14d,r8d,6
5224 lea r11d,[r12*1+r11]
5225 xor r13d,r14d
5226 mov r15d,eax
5227 rorx r12d,eax,22
5228 lea r11d,[r13*1+r11]
5229 xor r15d,ebx
5230 rorx r14d,eax,13
5231 rorx r13d,eax,2
5232 lea edx,[r11*1+rdx]
5233 and edi,r15d
5234 xor r14d,r12d
5235 xor edi,ebx
5236 xor r14d,r13d
5237 lea r11d,[rdi*1+r11]
5238 mov r12d,r8d
5239 add r10d,DWORD[((4+16))+rbp]
5240 and r12d,edx
5241 rorx r13d,edx,25
5242 rorx edi,edx,11
5243 lea r11d,[r14*1+r11]
5244 lea r10d,[r12*1+r10]
5245 andn r12d,edx,r9d
5246 xor r13d,edi
5247 rorx r14d,edx,6
5248 lea r10d,[r12*1+r10]
5249 xor r13d,r14d
5250 mov edi,r11d
5251 rorx r12d,r11d,22
5252 lea r10d,[r13*1+r10]
5253 xor edi,eax
5254 rorx r14d,r11d,13
5255 rorx r13d,r11d,2
5256 lea ecx,[r10*1+rcx]
5257 and r15d,edi
5258 xor r14d,r12d
5259 xor r15d,eax
5260 xor r14d,r13d
5261 lea r10d,[r15*1+r10]
5262 mov r12d,edx
5263 add r9d,DWORD[((8+16))+rbp]
5264 and r12d,ecx
5265 rorx r13d,ecx,25
5266 rorx r15d,ecx,11
5267 lea r10d,[r14*1+r10]
5268 lea r9d,[r12*1+r9]
5269 andn r12d,ecx,r8d
5270 xor r13d,r15d
5271 rorx r14d,ecx,6
5272 lea r9d,[r12*1+r9]
5273 xor r13d,r14d
5274 mov r15d,r10d
5275 rorx r12d,r10d,22
5276 lea r9d,[r13*1+r9]
5277 xor r15d,r11d
5278 rorx r14d,r10d,13
5279 rorx r13d,r10d,2
5280 lea ebx,[r9*1+rbx]
5281 and edi,r15d
5282 xor r14d,r12d
5283 xor edi,r11d
5284 xor r14d,r13d
5285 lea r9d,[rdi*1+r9]
5286 mov r12d,ecx
5287 add r8d,DWORD[((12+16))+rbp]
5288 and r12d,ebx
5289 rorx r13d,ebx,25
5290 rorx edi,ebx,11
5291 lea r9d,[r14*1+r9]
5292 lea r8d,[r12*1+r8]
5293 andn r12d,ebx,edx
5294 xor r13d,edi
5295 rorx r14d,ebx,6
5296 lea r8d,[r12*1+r8]
5297 xor r13d,r14d
5298 mov edi,r9d
5299 rorx r12d,r9d,22
5300 lea r8d,[r13*1+r8]
5301 xor edi,r10d
5302 rorx r14d,r9d,13
5303 rorx r13d,r9d,2
5304 lea eax,[r8*1+rax]
5305 and r15d,edi
5306 xor r14d,r12d
5307 xor r15d,r10d
5308 xor r14d,r13d
5309 lea r8d,[r15*1+r8]
5310 mov r12d,ebx
5311 add edx,DWORD[((32+16))+rbp]
5312 and r12d,eax
5313 rorx r13d,eax,25
5314 rorx r15d,eax,11
5315 lea r8d,[r14*1+r8]
5316 lea edx,[r12*1+rdx]
5317 andn r12d,eax,ecx
5318 xor r13d,r15d
5319 rorx r14d,eax,6
5320 lea edx,[r12*1+rdx]
5321 xor r13d,r14d
5322 mov r15d,r8d
5323 rorx r12d,r8d,22
5324 lea edx,[r13*1+rdx]
5325 xor r15d,r9d
5326 rorx r14d,r8d,13
5327 rorx r13d,r8d,2
5328 lea r11d,[rdx*1+r11]
5329 and edi,r15d
5330 xor r14d,r12d
5331 xor edi,r9d
5332 xor r14d,r13d
5333 lea edx,[rdi*1+rdx]
5334 mov r12d,eax
5335 add ecx,DWORD[((36+16))+rbp]
5336 and r12d,r11d
5337 rorx r13d,r11d,25
5338 rorx edi,r11d,11
5339 lea edx,[r14*1+rdx]
5340 lea ecx,[r12*1+rcx]
5341 andn r12d,r11d,ebx
5342 xor r13d,edi
5343 rorx r14d,r11d,6
5344 lea ecx,[r12*1+rcx]
5345 xor r13d,r14d
5346 mov edi,edx
5347 rorx r12d,edx,22
5348 lea ecx,[r13*1+rcx]
5349 xor edi,r8d
5350 rorx r14d,edx,13
5351 rorx r13d,edx,2
5352 lea r10d,[rcx*1+r10]
5353 and r15d,edi
5354 xor r14d,r12d
5355 xor r15d,r8d
5356 xor r14d,r13d
5357 lea ecx,[r15*1+rcx]
5358 mov r12d,r11d
5359 add ebx,DWORD[((40+16))+rbp]
5360 and r12d,r10d
5361 rorx r13d,r10d,25
5362 rorx r15d,r10d,11
5363 lea ecx,[r14*1+rcx]
5364 lea ebx,[r12*1+rbx]
5365 andn r12d,r10d,eax
5366 xor r13d,r15d
5367 rorx r14d,r10d,6
5368 lea ebx,[r12*1+rbx]
5369 xor r13d,r14d
5370 mov r15d,ecx
5371 rorx r12d,ecx,22
5372 lea ebx,[r13*1+rbx]
5373 xor r15d,edx
5374 rorx r14d,ecx,13
5375 rorx r13d,ecx,2
5376 lea r9d,[rbx*1+r9]
5377 and edi,r15d
5378 xor r14d,r12d
5379 xor edi,edx
5380 xor r14d,r13d
5381 lea ebx,[rdi*1+rbx]
5382 mov r12d,r10d
5383 add eax,DWORD[((44+16))+rbp]
5384 and r12d,r9d
5385 rorx r13d,r9d,25
5386 rorx edi,r9d,11
5387 lea ebx,[r14*1+rbx]
5388 lea eax,[r12*1+rax]
5389 andn r12d,r9d,r11d
5390 xor r13d,edi
5391 rorx r14d,r9d,6
5392 lea eax,[r12*1+rax]
5393 xor r13d,r14d
5394 mov edi,ebx
5395 rorx r12d,ebx,22
5396 lea eax,[r13*1+rax]
5397 xor edi,ecx
5398 rorx r14d,ebx,13
5399 rorx r13d,ebx,2
5400 lea r8d,[rax*1+r8]
5401 and r15d,edi
5402 xor r14d,r12d
5403 xor r15d,ecx
5404 xor r14d,r13d
5405 lea eax,[r15*1+rax]
5406 mov r12d,r9d
5407 lea rbp,[((-64))+rbp]
5408 cmp rbp,rsp
5409 jae NEAR $L$ower_avx2
5410
5411 mov rdi,QWORD[512+rsp]
5412 add eax,r14d
5413
5414 lea rsp,[448+rsp]
5415
5416 add eax,DWORD[rdi]
5417 add ebx,DWORD[4+rdi]
5418 add ecx,DWORD[8+rdi]
5419 add edx,DWORD[12+rdi]
5420 add r8d,DWORD[16+rdi]
5421 add r9d,DWORD[20+rdi]
5422 lea rsi,[128+rsi]
5423 add r10d,DWORD[24+rdi]
5424 mov r12,rsi
5425 add r11d,DWORD[28+rdi]
5426 cmp rsi,QWORD[((64+16))+rsp]
5427
5428 mov DWORD[rdi],eax
5429 cmove r12,rsp
5430 mov DWORD[4+rdi],ebx
5431 mov DWORD[8+rdi],ecx
5432 mov DWORD[12+rdi],edx
5433 mov DWORD[16+rdi],r8d
5434 mov DWORD[20+rdi],r9d
5435 mov DWORD[24+rdi],r10d
5436 mov DWORD[28+rdi],r11d
5437
5438 jbe NEAR $L$oop_avx2
5439 lea rbp,[rsp]
5440
5441$L$done_avx2:
5442 lea rsp,[rbp]
5443 mov rsi,QWORD[((64+24))+rsp]
5444 vzeroupper
5445 movaps xmm6,XMMWORD[((64+32))+rsp]
5446 movaps xmm7,XMMWORD[((64+48))+rsp]
5447 movaps xmm8,XMMWORD[((64+64))+rsp]
5448 movaps xmm9,XMMWORD[((64+80))+rsp]
5449 mov r15,QWORD[rsi]
5450 mov r14,QWORD[8+rsi]
5451 mov r13,QWORD[16+rsi]
5452 mov r12,QWORD[24+rsi]
5453 mov rbp,QWORD[32+rsi]
5454 mov rbx,QWORD[40+rsi]
5455 lea rsp,[48+rsi]
5456$L$epilogue_avx2:
5457 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
5458 mov rsi,QWORD[16+rsp]
5459 DB 0F3h,0C3h ;repret
5460$L$SEH_end_sha256_block_data_order_avx2:
5461EXTERN __imp_RtlVirtualUnwind
5462
5463ALIGN 16
5464se_handler:
5465 push rsi
5466 push rdi
5467 push rbx
5468 push rbp
5469 push r12
5470 push r13
5471 push r14
5472 push r15
5473 pushfq
5474 sub rsp,64
5475
5476 mov rax,QWORD[120+r8]
5477 mov rbx,QWORD[248+r8]
5478
5479 mov rsi,QWORD[8+r9]
5480 mov r11,QWORD[56+r9]
5481
5482 mov r10d,DWORD[r11]
5483 lea r10,[r10*1+rsi]
5484 cmp rbx,r10
5485 jb NEAR $L$in_prologue
5486
5487 mov rax,QWORD[152+r8]
5488
5489 mov r10d,DWORD[4+r11]
5490 lea r10,[r10*1+rsi]
5491 cmp rbx,r10
5492 jae NEAR $L$in_prologue
5493 lea r10,[$L$avx2_shortcut]
5494 cmp rbx,r10
5495 jb NEAR $L$not_in_avx2
5496
5497 and rax,-256*4
5498 add rax,448
5499$L$not_in_avx2:
5500 mov rsi,rax
5501 mov rax,QWORD[((64+24))+rax]
5502 lea rax,[48+rax]
5503
5504 mov rbx,QWORD[((-8))+rax]
5505 mov rbp,QWORD[((-16))+rax]
5506 mov r12,QWORD[((-24))+rax]
5507 mov r13,QWORD[((-32))+rax]
5508 mov r14,QWORD[((-40))+rax]
5509 mov r15,QWORD[((-48))+rax]
5510 mov QWORD[144+r8],rbx
5511 mov QWORD[160+r8],rbp
5512 mov QWORD[216+r8],r12
5513 mov QWORD[224+r8],r13
5514 mov QWORD[232+r8],r14
5515 mov QWORD[240+r8],r15
5516
5517 lea r10,[$L$epilogue]
5518 cmp rbx,r10
5519 jb NEAR $L$in_prologue
5520
5521 lea rsi,[((64+32))+rsi]
5522 lea rdi,[512+r8]
5523 mov ecx,8
5524 DD 0xa548f3fc
5525
5526$L$in_prologue:
5527 mov rdi,QWORD[8+rax]
5528 mov rsi,QWORD[16+rax]
5529 mov QWORD[152+r8],rax
5530 mov QWORD[168+r8],rsi
5531 mov QWORD[176+r8],rdi
5532
5533 mov rdi,QWORD[40+r9]
5534 mov rsi,r8
5535 mov ecx,154
5536 DD 0xa548f3fc
5537
5538 mov rsi,r9
5539 xor rcx,rcx
5540 mov rdx,QWORD[8+rsi]
5541 mov r8,QWORD[rsi]
5542 mov r9,QWORD[16+rsi]
5543 mov r10,QWORD[40+rsi]
5544 lea r11,[56+rsi]
5545 lea r12,[24+rsi]
5546 mov QWORD[32+rsp],r10
5547 mov QWORD[40+rsp],r11
5548 mov QWORD[48+rsp],r12
5549 mov QWORD[56+rsp],rcx
5550 call QWORD[__imp_RtlVirtualUnwind]
5551
5552 mov eax,1
5553 add rsp,64
5554 popfq
5555 pop r15
5556 pop r14
5557 pop r13
5558 pop r12
5559 pop rbp
5560 pop rbx
5561 pop rdi
5562 pop rsi
5563 DB 0F3h,0C3h ;repret
5564
5565
5566ALIGN 16
5567shaext_handler:
5568 push rsi
5569 push rdi
5570 push rbx
5571 push rbp
5572 push r12
5573 push r13
5574 push r14
5575 push r15
5576 pushfq
5577 sub rsp,64
5578
5579 mov rax,QWORD[120+r8]
5580 mov rbx,QWORD[248+r8]
5581
5582 lea r10,[$L$prologue_shaext]
5583 cmp rbx,r10
5584 jb NEAR $L$in_prologue
5585
5586 lea r10,[$L$epilogue_shaext]
5587 cmp rbx,r10
5588 jae NEAR $L$in_prologue
5589
5590 lea rsi,[((-8-80))+rax]
5591 lea rdi,[512+r8]
5592 mov ecx,10
5593 DD 0xa548f3fc
5594
5595 jmp NEAR $L$in_prologue
5596
5597section .pdata rdata align=4
5598ALIGN 4
5599 DD $L$SEH_begin_sha256_block_data_order wrt ..imagebase
5600 DD $L$SEH_end_sha256_block_data_order wrt ..imagebase
5601 DD $L$SEH_info_sha256_block_data_order wrt ..imagebase
5602 DD $L$SEH_begin_sha256_block_data_order_shaext wrt ..imagebase
5603 DD $L$SEH_end_sha256_block_data_order_shaext wrt ..imagebase
5604 DD $L$SEH_info_sha256_block_data_order_shaext wrt ..imagebase
5605 DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase
5606 DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase
5607 DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase
5608 DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase
5609 DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase
5610 DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase
5611 DD $L$SEH_begin_sha256_block_data_order_avx2 wrt ..imagebase
5612 DD $L$SEH_end_sha256_block_data_order_avx2 wrt ..imagebase
5613 DD $L$SEH_info_sha256_block_data_order_avx2 wrt ..imagebase
5614section .xdata rdata align=8
5615ALIGN 8
5616$L$SEH_info_sha256_block_data_order:
5617DB 9,0,0,0
5618 DD se_handler wrt ..imagebase
5619 DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
5620$L$SEH_info_sha256_block_data_order_shaext:
5621DB 9,0,0,0
5622 DD shaext_handler wrt ..imagebase
5623$L$SEH_info_sha256_block_data_order_ssse3:
5624DB 9,0,0,0
5625 DD se_handler wrt ..imagebase
5626 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
5627$L$SEH_info_sha256_block_data_order_avx:
5628DB 9,0,0,0
5629 DD se_handler wrt ..imagebase
5630 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
5631$L$SEH_info_sha256_block_data_order_avx2:
5632DB 9,0,0,0
5633 DD se_handler wrt ..imagebase
5634 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette