VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/sha512-x86_64.S@ 95218

Last change on this file since 95218 was 94083, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Recreate asm files, bugref:10128

File size: 86.4 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9global sha512_block_data_order
10
11ALIGN 16
12sha512_block_data_order:
13 mov QWORD[8+rsp],rdi ;WIN64 prologue
14 mov QWORD[16+rsp],rsi
15 mov rax,rsp
16$L$SEH_begin_sha512_block_data_order:
17 mov rdi,rcx
18 mov rsi,rdx
19 mov rdx,r8
20
21
22
23 lea r11,[OPENSSL_ia32cap_P]
24 mov r9d,DWORD[r11]
25 mov r10d,DWORD[4+r11]
26 mov r11d,DWORD[8+r11]
27 test r10d,2048
28 jnz NEAR $L$xop_shortcut
29 and r11d,296
30 cmp r11d,296
31 je NEAR $L$avx2_shortcut
32 and r9d,1073741824
33 and r10d,268435968
34 or r10d,r9d
35 cmp r10d,1342177792
36 je NEAR $L$avx_shortcut
37 mov rax,rsp
38
39 push rbx
40
41 push rbp
42
43 push r12
44
45 push r13
46
47 push r14
48
49 push r15
50
51 shl rdx,4
52 sub rsp,16*8+4*8
53 lea rdx,[rdx*8+rsi]
54 and rsp,-64
55 mov QWORD[((128+0))+rsp],rdi
56 mov QWORD[((128+8))+rsp],rsi
57 mov QWORD[((128+16))+rsp],rdx
58 mov QWORD[152+rsp],rax
59
60$L$prologue:
61
62 mov rax,QWORD[rdi]
63 mov rbx,QWORD[8+rdi]
64 mov rcx,QWORD[16+rdi]
65 mov rdx,QWORD[24+rdi]
66 mov r8,QWORD[32+rdi]
67 mov r9,QWORD[40+rdi]
68 mov r10,QWORD[48+rdi]
69 mov r11,QWORD[56+rdi]
70 jmp NEAR $L$loop
71
72ALIGN 16
73$L$loop:
74 mov rdi,rbx
75 lea rbp,[K512]
76 xor rdi,rcx
77 mov r12,QWORD[rsi]
78 mov r13,r8
79 mov r14,rax
80 bswap r12
81 ror r13,23
82 mov r15,r9
83
84 xor r13,r8
85 ror r14,5
86 xor r15,r10
87
88 mov QWORD[rsp],r12
89 xor r14,rax
90 and r15,r8
91
92 ror r13,4
93 add r12,r11
94 xor r15,r10
95
96 ror r14,6
97 xor r13,r8
98 add r12,r15
99
100 mov r15,rax
101 add r12,QWORD[rbp]
102 xor r14,rax
103
104 xor r15,rbx
105 ror r13,14
106 mov r11,rbx
107
108 and rdi,r15
109 ror r14,28
110 add r12,r13
111
112 xor r11,rdi
113 add rdx,r12
114 add r11,r12
115
116 lea rbp,[8+rbp]
117 add r11,r14
118 mov r12,QWORD[8+rsi]
119 mov r13,rdx
120 mov r14,r11
121 bswap r12
122 ror r13,23
123 mov rdi,r8
124
125 xor r13,rdx
126 ror r14,5
127 xor rdi,r9
128
129 mov QWORD[8+rsp],r12
130 xor r14,r11
131 and rdi,rdx
132
133 ror r13,4
134 add r12,r10
135 xor rdi,r9
136
137 ror r14,6
138 xor r13,rdx
139 add r12,rdi
140
141 mov rdi,r11
142 add r12,QWORD[rbp]
143 xor r14,r11
144
145 xor rdi,rax
146 ror r13,14
147 mov r10,rax
148
149 and r15,rdi
150 ror r14,28
151 add r12,r13
152
153 xor r10,r15
154 add rcx,r12
155 add r10,r12
156
157 lea rbp,[24+rbp]
158 add r10,r14
159 mov r12,QWORD[16+rsi]
160 mov r13,rcx
161 mov r14,r10
162 bswap r12
163 ror r13,23
164 mov r15,rdx
165
166 xor r13,rcx
167 ror r14,5
168 xor r15,r8
169
170 mov QWORD[16+rsp],r12
171 xor r14,r10
172 and r15,rcx
173
174 ror r13,4
175 add r12,r9
176 xor r15,r8
177
178 ror r14,6
179 xor r13,rcx
180 add r12,r15
181
182 mov r15,r10
183 add r12,QWORD[rbp]
184 xor r14,r10
185
186 xor r15,r11
187 ror r13,14
188 mov r9,r11
189
190 and rdi,r15
191 ror r14,28
192 add r12,r13
193
194 xor r9,rdi
195 add rbx,r12
196 add r9,r12
197
198 lea rbp,[8+rbp]
199 add r9,r14
200 mov r12,QWORD[24+rsi]
201 mov r13,rbx
202 mov r14,r9
203 bswap r12
204 ror r13,23
205 mov rdi,rcx
206
207 xor r13,rbx
208 ror r14,5
209 xor rdi,rdx
210
211 mov QWORD[24+rsp],r12
212 xor r14,r9
213 and rdi,rbx
214
215 ror r13,4
216 add r12,r8
217 xor rdi,rdx
218
219 ror r14,6
220 xor r13,rbx
221 add r12,rdi
222
223 mov rdi,r9
224 add r12,QWORD[rbp]
225 xor r14,r9
226
227 xor rdi,r10
228 ror r13,14
229 mov r8,r10
230
231 and r15,rdi
232 ror r14,28
233 add r12,r13
234
235 xor r8,r15
236 add rax,r12
237 add r8,r12
238
239 lea rbp,[24+rbp]
240 add r8,r14
241 mov r12,QWORD[32+rsi]
242 mov r13,rax
243 mov r14,r8
244 bswap r12
245 ror r13,23
246 mov r15,rbx
247
248 xor r13,rax
249 ror r14,5
250 xor r15,rcx
251
252 mov QWORD[32+rsp],r12
253 xor r14,r8
254 and r15,rax
255
256 ror r13,4
257 add r12,rdx
258 xor r15,rcx
259
260 ror r14,6
261 xor r13,rax
262 add r12,r15
263
264 mov r15,r8
265 add r12,QWORD[rbp]
266 xor r14,r8
267
268 xor r15,r9
269 ror r13,14
270 mov rdx,r9
271
272 and rdi,r15
273 ror r14,28
274 add r12,r13
275
276 xor rdx,rdi
277 add r11,r12
278 add rdx,r12
279
280 lea rbp,[8+rbp]
281 add rdx,r14
282 mov r12,QWORD[40+rsi]
283 mov r13,r11
284 mov r14,rdx
285 bswap r12
286 ror r13,23
287 mov rdi,rax
288
289 xor r13,r11
290 ror r14,5
291 xor rdi,rbx
292
293 mov QWORD[40+rsp],r12
294 xor r14,rdx
295 and rdi,r11
296
297 ror r13,4
298 add r12,rcx
299 xor rdi,rbx
300
301 ror r14,6
302 xor r13,r11
303 add r12,rdi
304
305 mov rdi,rdx
306 add r12,QWORD[rbp]
307 xor r14,rdx
308
309 xor rdi,r8
310 ror r13,14
311 mov rcx,r8
312
313 and r15,rdi
314 ror r14,28
315 add r12,r13
316
317 xor rcx,r15
318 add r10,r12
319 add rcx,r12
320
321 lea rbp,[24+rbp]
322 add rcx,r14
323 mov r12,QWORD[48+rsi]
324 mov r13,r10
325 mov r14,rcx
326 bswap r12
327 ror r13,23
328 mov r15,r11
329
330 xor r13,r10
331 ror r14,5
332 xor r15,rax
333
334 mov QWORD[48+rsp],r12
335 xor r14,rcx
336 and r15,r10
337
338 ror r13,4
339 add r12,rbx
340 xor r15,rax
341
342 ror r14,6
343 xor r13,r10
344 add r12,r15
345
346 mov r15,rcx
347 add r12,QWORD[rbp]
348 xor r14,rcx
349
350 xor r15,rdx
351 ror r13,14
352 mov rbx,rdx
353
354 and rdi,r15
355 ror r14,28
356 add r12,r13
357
358 xor rbx,rdi
359 add r9,r12
360 add rbx,r12
361
362 lea rbp,[8+rbp]
363 add rbx,r14
364 mov r12,QWORD[56+rsi]
365 mov r13,r9
366 mov r14,rbx
367 bswap r12
368 ror r13,23
369 mov rdi,r10
370
371 xor r13,r9
372 ror r14,5
373 xor rdi,r11
374
375 mov QWORD[56+rsp],r12
376 xor r14,rbx
377 and rdi,r9
378
379 ror r13,4
380 add r12,rax
381 xor rdi,r11
382
383 ror r14,6
384 xor r13,r9
385 add r12,rdi
386
387 mov rdi,rbx
388 add r12,QWORD[rbp]
389 xor r14,rbx
390
391 xor rdi,rcx
392 ror r13,14
393 mov rax,rcx
394
395 and r15,rdi
396 ror r14,28
397 add r12,r13
398
399 xor rax,r15
400 add r8,r12
401 add rax,r12
402
403 lea rbp,[24+rbp]
404 add rax,r14
405 mov r12,QWORD[64+rsi]
406 mov r13,r8
407 mov r14,rax
408 bswap r12
409 ror r13,23
410 mov r15,r9
411
412 xor r13,r8
413 ror r14,5
414 xor r15,r10
415
416 mov QWORD[64+rsp],r12
417 xor r14,rax
418 and r15,r8
419
420 ror r13,4
421 add r12,r11
422 xor r15,r10
423
424 ror r14,6
425 xor r13,r8
426 add r12,r15
427
428 mov r15,rax
429 add r12,QWORD[rbp]
430 xor r14,rax
431
432 xor r15,rbx
433 ror r13,14
434 mov r11,rbx
435
436 and rdi,r15
437 ror r14,28
438 add r12,r13
439
440 xor r11,rdi
441 add rdx,r12
442 add r11,r12
443
444 lea rbp,[8+rbp]
445 add r11,r14
446 mov r12,QWORD[72+rsi]
447 mov r13,rdx
448 mov r14,r11
449 bswap r12
450 ror r13,23
451 mov rdi,r8
452
453 xor r13,rdx
454 ror r14,5
455 xor rdi,r9
456
457 mov QWORD[72+rsp],r12
458 xor r14,r11
459 and rdi,rdx
460
461 ror r13,4
462 add r12,r10
463 xor rdi,r9
464
465 ror r14,6
466 xor r13,rdx
467 add r12,rdi
468
469 mov rdi,r11
470 add r12,QWORD[rbp]
471 xor r14,r11
472
473 xor rdi,rax
474 ror r13,14
475 mov r10,rax
476
477 and r15,rdi
478 ror r14,28
479 add r12,r13
480
481 xor r10,r15
482 add rcx,r12
483 add r10,r12
484
485 lea rbp,[24+rbp]
486 add r10,r14
487 mov r12,QWORD[80+rsi]
488 mov r13,rcx
489 mov r14,r10
490 bswap r12
491 ror r13,23
492 mov r15,rdx
493
494 xor r13,rcx
495 ror r14,5
496 xor r15,r8
497
498 mov QWORD[80+rsp],r12
499 xor r14,r10
500 and r15,rcx
501
502 ror r13,4
503 add r12,r9
504 xor r15,r8
505
506 ror r14,6
507 xor r13,rcx
508 add r12,r15
509
510 mov r15,r10
511 add r12,QWORD[rbp]
512 xor r14,r10
513
514 xor r15,r11
515 ror r13,14
516 mov r9,r11
517
518 and rdi,r15
519 ror r14,28
520 add r12,r13
521
522 xor r9,rdi
523 add rbx,r12
524 add r9,r12
525
526 lea rbp,[8+rbp]
527 add r9,r14
528 mov r12,QWORD[88+rsi]
529 mov r13,rbx
530 mov r14,r9
531 bswap r12
532 ror r13,23
533 mov rdi,rcx
534
535 xor r13,rbx
536 ror r14,5
537 xor rdi,rdx
538
539 mov QWORD[88+rsp],r12
540 xor r14,r9
541 and rdi,rbx
542
543 ror r13,4
544 add r12,r8
545 xor rdi,rdx
546
547 ror r14,6
548 xor r13,rbx
549 add r12,rdi
550
551 mov rdi,r9
552 add r12,QWORD[rbp]
553 xor r14,r9
554
555 xor rdi,r10
556 ror r13,14
557 mov r8,r10
558
559 and r15,rdi
560 ror r14,28
561 add r12,r13
562
563 xor r8,r15
564 add rax,r12
565 add r8,r12
566
567 lea rbp,[24+rbp]
568 add r8,r14
569 mov r12,QWORD[96+rsi]
570 mov r13,rax
571 mov r14,r8
572 bswap r12
573 ror r13,23
574 mov r15,rbx
575
576 xor r13,rax
577 ror r14,5
578 xor r15,rcx
579
580 mov QWORD[96+rsp],r12
581 xor r14,r8
582 and r15,rax
583
584 ror r13,4
585 add r12,rdx
586 xor r15,rcx
587
588 ror r14,6
589 xor r13,rax
590 add r12,r15
591
592 mov r15,r8
593 add r12,QWORD[rbp]
594 xor r14,r8
595
596 xor r15,r9
597 ror r13,14
598 mov rdx,r9
599
600 and rdi,r15
601 ror r14,28
602 add r12,r13
603
604 xor rdx,rdi
605 add r11,r12
606 add rdx,r12
607
608 lea rbp,[8+rbp]
609 add rdx,r14
610 mov r12,QWORD[104+rsi]
611 mov r13,r11
612 mov r14,rdx
613 bswap r12
614 ror r13,23
615 mov rdi,rax
616
617 xor r13,r11
618 ror r14,5
619 xor rdi,rbx
620
621 mov QWORD[104+rsp],r12
622 xor r14,rdx
623 and rdi,r11
624
625 ror r13,4
626 add r12,rcx
627 xor rdi,rbx
628
629 ror r14,6
630 xor r13,r11
631 add r12,rdi
632
633 mov rdi,rdx
634 add r12,QWORD[rbp]
635 xor r14,rdx
636
637 xor rdi,r8
638 ror r13,14
639 mov rcx,r8
640
641 and r15,rdi
642 ror r14,28
643 add r12,r13
644
645 xor rcx,r15
646 add r10,r12
647 add rcx,r12
648
649 lea rbp,[24+rbp]
650 add rcx,r14
651 mov r12,QWORD[112+rsi]
652 mov r13,r10
653 mov r14,rcx
654 bswap r12
655 ror r13,23
656 mov r15,r11
657
658 xor r13,r10
659 ror r14,5
660 xor r15,rax
661
662 mov QWORD[112+rsp],r12
663 xor r14,rcx
664 and r15,r10
665
666 ror r13,4
667 add r12,rbx
668 xor r15,rax
669
670 ror r14,6
671 xor r13,r10
672 add r12,r15
673
674 mov r15,rcx
675 add r12,QWORD[rbp]
676 xor r14,rcx
677
678 xor r15,rdx
679 ror r13,14
680 mov rbx,rdx
681
682 and rdi,r15
683 ror r14,28
684 add r12,r13
685
686 xor rbx,rdi
687 add r9,r12
688 add rbx,r12
689
690 lea rbp,[8+rbp]
691 add rbx,r14
692 mov r12,QWORD[120+rsi]
693 mov r13,r9
694 mov r14,rbx
695 bswap r12
696 ror r13,23
697 mov rdi,r10
698
699 xor r13,r9
700 ror r14,5
701 xor rdi,r11
702
703 mov QWORD[120+rsp],r12
704 xor r14,rbx
705 and rdi,r9
706
707 ror r13,4
708 add r12,rax
709 xor rdi,r11
710
711 ror r14,6
712 xor r13,r9
713 add r12,rdi
714
715 mov rdi,rbx
716 add r12,QWORD[rbp]
717 xor r14,rbx
718
719 xor rdi,rcx
720 ror r13,14
721 mov rax,rcx
722
723 and r15,rdi
724 ror r14,28
725 add r12,r13
726
727 xor rax,r15
728 add r8,r12
729 add rax,r12
730
731 lea rbp,[24+rbp]
732 jmp NEAR $L$rounds_16_xx
733ALIGN 16
734$L$rounds_16_xx:
735 mov r13,QWORD[8+rsp]
736 mov r15,QWORD[112+rsp]
737
738 mov r12,r13
739 ror r13,7
740 add rax,r14
741 mov r14,r15
742 ror r15,42
743
744 xor r13,r12
745 shr r12,7
746 ror r13,1
747 xor r15,r14
748 shr r14,6
749
750 ror r15,19
751 xor r12,r13
752 xor r15,r14
753 add r12,QWORD[72+rsp]
754
755 add r12,QWORD[rsp]
756 mov r13,r8
757 add r12,r15
758 mov r14,rax
759 ror r13,23
760 mov r15,r9
761
762 xor r13,r8
763 ror r14,5
764 xor r15,r10
765
766 mov QWORD[rsp],r12
767 xor r14,rax
768 and r15,r8
769
770 ror r13,4
771 add r12,r11
772 xor r15,r10
773
774 ror r14,6
775 xor r13,r8
776 add r12,r15
777
778 mov r15,rax
779 add r12,QWORD[rbp]
780 xor r14,rax
781
782 xor r15,rbx
783 ror r13,14
784 mov r11,rbx
785
786 and rdi,r15
787 ror r14,28
788 add r12,r13
789
790 xor r11,rdi
791 add rdx,r12
792 add r11,r12
793
794 lea rbp,[8+rbp]
795 mov r13,QWORD[16+rsp]
796 mov rdi,QWORD[120+rsp]
797
798 mov r12,r13
799 ror r13,7
800 add r11,r14
801 mov r14,rdi
802 ror rdi,42
803
804 xor r13,r12
805 shr r12,7
806 ror r13,1
807 xor rdi,r14
808 shr r14,6
809
810 ror rdi,19
811 xor r12,r13
812 xor rdi,r14
813 add r12,QWORD[80+rsp]
814
815 add r12,QWORD[8+rsp]
816 mov r13,rdx
817 add r12,rdi
818 mov r14,r11
819 ror r13,23
820 mov rdi,r8
821
822 xor r13,rdx
823 ror r14,5
824 xor rdi,r9
825
826 mov QWORD[8+rsp],r12
827 xor r14,r11
828 and rdi,rdx
829
830 ror r13,4
831 add r12,r10
832 xor rdi,r9
833
834 ror r14,6
835 xor r13,rdx
836 add r12,rdi
837
838 mov rdi,r11
839 add r12,QWORD[rbp]
840 xor r14,r11
841
842 xor rdi,rax
843 ror r13,14
844 mov r10,rax
845
846 and r15,rdi
847 ror r14,28
848 add r12,r13
849
850 xor r10,r15
851 add rcx,r12
852 add r10,r12
853
854 lea rbp,[24+rbp]
855 mov r13,QWORD[24+rsp]
856 mov r15,QWORD[rsp]
857
858 mov r12,r13
859 ror r13,7
860 add r10,r14
861 mov r14,r15
862 ror r15,42
863
864 xor r13,r12
865 shr r12,7
866 ror r13,1
867 xor r15,r14
868 shr r14,6
869
870 ror r15,19
871 xor r12,r13
872 xor r15,r14
873 add r12,QWORD[88+rsp]
874
875 add r12,QWORD[16+rsp]
876 mov r13,rcx
877 add r12,r15
878 mov r14,r10
879 ror r13,23
880 mov r15,rdx
881
882 xor r13,rcx
883 ror r14,5
884 xor r15,r8
885
886 mov QWORD[16+rsp],r12
887 xor r14,r10
888 and r15,rcx
889
890 ror r13,4
891 add r12,r9
892 xor r15,r8
893
894 ror r14,6
895 xor r13,rcx
896 add r12,r15
897
898 mov r15,r10
899 add r12,QWORD[rbp]
900 xor r14,r10
901
902 xor r15,r11
903 ror r13,14
904 mov r9,r11
905
906 and rdi,r15
907 ror r14,28
908 add r12,r13
909
910 xor r9,rdi
911 add rbx,r12
912 add r9,r12
913
914 lea rbp,[8+rbp]
915 mov r13,QWORD[32+rsp]
916 mov rdi,QWORD[8+rsp]
917
918 mov r12,r13
919 ror r13,7
920 add r9,r14
921 mov r14,rdi
922 ror rdi,42
923
924 xor r13,r12
925 shr r12,7
926 ror r13,1
927 xor rdi,r14
928 shr r14,6
929
930 ror rdi,19
931 xor r12,r13
932 xor rdi,r14
933 add r12,QWORD[96+rsp]
934
935 add r12,QWORD[24+rsp]
936 mov r13,rbx
937 add r12,rdi
938 mov r14,r9
939 ror r13,23
940 mov rdi,rcx
941
942 xor r13,rbx
943 ror r14,5
944 xor rdi,rdx
945
946 mov QWORD[24+rsp],r12
947 xor r14,r9
948 and rdi,rbx
949
950 ror r13,4
951 add r12,r8
952 xor rdi,rdx
953
954 ror r14,6
955 xor r13,rbx
956 add r12,rdi
957
958 mov rdi,r9
959 add r12,QWORD[rbp]
960 xor r14,r9
961
962 xor rdi,r10
963 ror r13,14
964 mov r8,r10
965
966 and r15,rdi
967 ror r14,28
968 add r12,r13
969
970 xor r8,r15
971 add rax,r12
972 add r8,r12
973
974 lea rbp,[24+rbp]
975 mov r13,QWORD[40+rsp]
976 mov r15,QWORD[16+rsp]
977
978 mov r12,r13
979 ror r13,7
980 add r8,r14
981 mov r14,r15
982 ror r15,42
983
984 xor r13,r12
985 shr r12,7
986 ror r13,1
987 xor r15,r14
988 shr r14,6
989
990 ror r15,19
991 xor r12,r13
992 xor r15,r14
993 add r12,QWORD[104+rsp]
994
995 add r12,QWORD[32+rsp]
996 mov r13,rax
997 add r12,r15
998 mov r14,r8
999 ror r13,23
1000 mov r15,rbx
1001
1002 xor r13,rax
1003 ror r14,5
1004 xor r15,rcx
1005
1006 mov QWORD[32+rsp],r12
1007 xor r14,r8
1008 and r15,rax
1009
1010 ror r13,4
1011 add r12,rdx
1012 xor r15,rcx
1013
1014 ror r14,6
1015 xor r13,rax
1016 add r12,r15
1017
1018 mov r15,r8
1019 add r12,QWORD[rbp]
1020 xor r14,r8
1021
1022 xor r15,r9
1023 ror r13,14
1024 mov rdx,r9
1025
1026 and rdi,r15
1027 ror r14,28
1028 add r12,r13
1029
1030 xor rdx,rdi
1031 add r11,r12
1032 add rdx,r12
1033
1034 lea rbp,[8+rbp]
1035 mov r13,QWORD[48+rsp]
1036 mov rdi,QWORD[24+rsp]
1037
1038 mov r12,r13
1039 ror r13,7
1040 add rdx,r14
1041 mov r14,rdi
1042 ror rdi,42
1043
1044 xor r13,r12
1045 shr r12,7
1046 ror r13,1
1047 xor rdi,r14
1048 shr r14,6
1049
1050 ror rdi,19
1051 xor r12,r13
1052 xor rdi,r14
1053 add r12,QWORD[112+rsp]
1054
1055 add r12,QWORD[40+rsp]
1056 mov r13,r11
1057 add r12,rdi
1058 mov r14,rdx
1059 ror r13,23
1060 mov rdi,rax
1061
1062 xor r13,r11
1063 ror r14,5
1064 xor rdi,rbx
1065
1066 mov QWORD[40+rsp],r12
1067 xor r14,rdx
1068 and rdi,r11
1069
1070 ror r13,4
1071 add r12,rcx
1072 xor rdi,rbx
1073
1074 ror r14,6
1075 xor r13,r11
1076 add r12,rdi
1077
1078 mov rdi,rdx
1079 add r12,QWORD[rbp]
1080 xor r14,rdx
1081
1082 xor rdi,r8
1083 ror r13,14
1084 mov rcx,r8
1085
1086 and r15,rdi
1087 ror r14,28
1088 add r12,r13
1089
1090 xor rcx,r15
1091 add r10,r12
1092 add rcx,r12
1093
1094 lea rbp,[24+rbp]
1095 mov r13,QWORD[56+rsp]
1096 mov r15,QWORD[32+rsp]
1097
1098 mov r12,r13
1099 ror r13,7
1100 add rcx,r14
1101 mov r14,r15
1102 ror r15,42
1103
1104 xor r13,r12
1105 shr r12,7
1106 ror r13,1
1107 xor r15,r14
1108 shr r14,6
1109
1110 ror r15,19
1111 xor r12,r13
1112 xor r15,r14
1113 add r12,QWORD[120+rsp]
1114
1115 add r12,QWORD[48+rsp]
1116 mov r13,r10
1117 add r12,r15
1118 mov r14,rcx
1119 ror r13,23
1120 mov r15,r11
1121
1122 xor r13,r10
1123 ror r14,5
1124 xor r15,rax
1125
1126 mov QWORD[48+rsp],r12
1127 xor r14,rcx
1128 and r15,r10
1129
1130 ror r13,4
1131 add r12,rbx
1132 xor r15,rax
1133
1134 ror r14,6
1135 xor r13,r10
1136 add r12,r15
1137
1138 mov r15,rcx
1139 add r12,QWORD[rbp]
1140 xor r14,rcx
1141
1142 xor r15,rdx
1143 ror r13,14
1144 mov rbx,rdx
1145
1146 and rdi,r15
1147 ror r14,28
1148 add r12,r13
1149
1150 xor rbx,rdi
1151 add r9,r12
1152 add rbx,r12
1153
1154 lea rbp,[8+rbp]
1155 mov r13,QWORD[64+rsp]
1156 mov rdi,QWORD[40+rsp]
1157
1158 mov r12,r13
1159 ror r13,7
1160 add rbx,r14
1161 mov r14,rdi
1162 ror rdi,42
1163
1164 xor r13,r12
1165 shr r12,7
1166 ror r13,1
1167 xor rdi,r14
1168 shr r14,6
1169
1170 ror rdi,19
1171 xor r12,r13
1172 xor rdi,r14
1173 add r12,QWORD[rsp]
1174
1175 add r12,QWORD[56+rsp]
1176 mov r13,r9
1177 add r12,rdi
1178 mov r14,rbx
1179 ror r13,23
1180 mov rdi,r10
1181
1182 xor r13,r9
1183 ror r14,5
1184 xor rdi,r11
1185
1186 mov QWORD[56+rsp],r12
1187 xor r14,rbx
1188 and rdi,r9
1189
1190 ror r13,4
1191 add r12,rax
1192 xor rdi,r11
1193
1194 ror r14,6
1195 xor r13,r9
1196 add r12,rdi
1197
1198 mov rdi,rbx
1199 add r12,QWORD[rbp]
1200 xor r14,rbx
1201
1202 xor rdi,rcx
1203 ror r13,14
1204 mov rax,rcx
1205
1206 and r15,rdi
1207 ror r14,28
1208 add r12,r13
1209
1210 xor rax,r15
1211 add r8,r12
1212 add rax,r12
1213
1214 lea rbp,[24+rbp]
1215 mov r13,QWORD[72+rsp]
1216 mov r15,QWORD[48+rsp]
1217
1218 mov r12,r13
1219 ror r13,7
1220 add rax,r14
1221 mov r14,r15
1222 ror r15,42
1223
1224 xor r13,r12
1225 shr r12,7
1226 ror r13,1
1227 xor r15,r14
1228 shr r14,6
1229
1230 ror r15,19
1231 xor r12,r13
1232 xor r15,r14
1233 add r12,QWORD[8+rsp]
1234
1235 add r12,QWORD[64+rsp]
1236 mov r13,r8
1237 add r12,r15
1238 mov r14,rax
1239 ror r13,23
1240 mov r15,r9
1241
1242 xor r13,r8
1243 ror r14,5
1244 xor r15,r10
1245
1246 mov QWORD[64+rsp],r12
1247 xor r14,rax
1248 and r15,r8
1249
1250 ror r13,4
1251 add r12,r11
1252 xor r15,r10
1253
1254 ror r14,6
1255 xor r13,r8
1256 add r12,r15
1257
1258 mov r15,rax
1259 add r12,QWORD[rbp]
1260 xor r14,rax
1261
1262 xor r15,rbx
1263 ror r13,14
1264 mov r11,rbx
1265
1266 and rdi,r15
1267 ror r14,28
1268 add r12,r13
1269
1270 xor r11,rdi
1271 add rdx,r12
1272 add r11,r12
1273
1274 lea rbp,[8+rbp]
1275 mov r13,QWORD[80+rsp]
1276 mov rdi,QWORD[56+rsp]
1277
1278 mov r12,r13
1279 ror r13,7
1280 add r11,r14
1281 mov r14,rdi
1282 ror rdi,42
1283
1284 xor r13,r12
1285 shr r12,7
1286 ror r13,1
1287 xor rdi,r14
1288 shr r14,6
1289
1290 ror rdi,19
1291 xor r12,r13
1292 xor rdi,r14
1293 add r12,QWORD[16+rsp]
1294
1295 add r12,QWORD[72+rsp]
1296 mov r13,rdx
1297 add r12,rdi
1298 mov r14,r11
1299 ror r13,23
1300 mov rdi,r8
1301
1302 xor r13,rdx
1303 ror r14,5
1304 xor rdi,r9
1305
1306 mov QWORD[72+rsp],r12
1307 xor r14,r11
1308 and rdi,rdx
1309
1310 ror r13,4
1311 add r12,r10
1312 xor rdi,r9
1313
1314 ror r14,6
1315 xor r13,rdx
1316 add r12,rdi
1317
1318 mov rdi,r11
1319 add r12,QWORD[rbp]
1320 xor r14,r11
1321
1322 xor rdi,rax
1323 ror r13,14
1324 mov r10,rax
1325
1326 and r15,rdi
1327 ror r14,28
1328 add r12,r13
1329
1330 xor r10,r15
1331 add rcx,r12
1332 add r10,r12
1333
1334 lea rbp,[24+rbp]
1335 mov r13,QWORD[88+rsp]
1336 mov r15,QWORD[64+rsp]
1337
1338 mov r12,r13
1339 ror r13,7
1340 add r10,r14
1341 mov r14,r15
1342 ror r15,42
1343
1344 xor r13,r12
1345 shr r12,7
1346 ror r13,1
1347 xor r15,r14
1348 shr r14,6
1349
1350 ror r15,19
1351 xor r12,r13
1352 xor r15,r14
1353 add r12,QWORD[24+rsp]
1354
1355 add r12,QWORD[80+rsp]
1356 mov r13,rcx
1357 add r12,r15
1358 mov r14,r10
1359 ror r13,23
1360 mov r15,rdx
1361
1362 xor r13,rcx
1363 ror r14,5
1364 xor r15,r8
1365
1366 mov QWORD[80+rsp],r12
1367 xor r14,r10
1368 and r15,rcx
1369
1370 ror r13,4
1371 add r12,r9
1372 xor r15,r8
1373
1374 ror r14,6
1375 xor r13,rcx
1376 add r12,r15
1377
1378 mov r15,r10
1379 add r12,QWORD[rbp]
1380 xor r14,r10
1381
1382 xor r15,r11
1383 ror r13,14
1384 mov r9,r11
1385
1386 and rdi,r15
1387 ror r14,28
1388 add r12,r13
1389
1390 xor r9,rdi
1391 add rbx,r12
1392 add r9,r12
1393
1394 lea rbp,[8+rbp]
1395 mov r13,QWORD[96+rsp]
1396 mov rdi,QWORD[72+rsp]
1397
1398 mov r12,r13
1399 ror r13,7
1400 add r9,r14
1401 mov r14,rdi
1402 ror rdi,42
1403
1404 xor r13,r12
1405 shr r12,7
1406 ror r13,1
1407 xor rdi,r14
1408 shr r14,6
1409
1410 ror rdi,19
1411 xor r12,r13
1412 xor rdi,r14
1413 add r12,QWORD[32+rsp]
1414
1415 add r12,QWORD[88+rsp]
1416 mov r13,rbx
1417 add r12,rdi
1418 mov r14,r9
1419 ror r13,23
1420 mov rdi,rcx
1421
1422 xor r13,rbx
1423 ror r14,5
1424 xor rdi,rdx
1425
1426 mov QWORD[88+rsp],r12
1427 xor r14,r9
1428 and rdi,rbx
1429
1430 ror r13,4
1431 add r12,r8
1432 xor rdi,rdx
1433
1434 ror r14,6
1435 xor r13,rbx
1436 add r12,rdi
1437
1438 mov rdi,r9
1439 add r12,QWORD[rbp]
1440 xor r14,r9
1441
1442 xor rdi,r10
1443 ror r13,14
1444 mov r8,r10
1445
1446 and r15,rdi
1447 ror r14,28
1448 add r12,r13
1449
1450 xor r8,r15
1451 add rax,r12
1452 add r8,r12
1453
1454 lea rbp,[24+rbp]
1455 mov r13,QWORD[104+rsp]
1456 mov r15,QWORD[80+rsp]
1457
1458 mov r12,r13
1459 ror r13,7
1460 add r8,r14
1461 mov r14,r15
1462 ror r15,42
1463
1464 xor r13,r12
1465 shr r12,7
1466 ror r13,1
1467 xor r15,r14
1468 shr r14,6
1469
1470 ror r15,19
1471 xor r12,r13
1472 xor r15,r14
1473 add r12,QWORD[40+rsp]
1474
1475 add r12,QWORD[96+rsp]
1476 mov r13,rax
1477 add r12,r15
1478 mov r14,r8
1479 ror r13,23
1480 mov r15,rbx
1481
1482 xor r13,rax
1483 ror r14,5
1484 xor r15,rcx
1485
1486 mov QWORD[96+rsp],r12
1487 xor r14,r8
1488 and r15,rax
1489
1490 ror r13,4
1491 add r12,rdx
1492 xor r15,rcx
1493
1494 ror r14,6
1495 xor r13,rax
1496 add r12,r15
1497
1498 mov r15,r8
1499 add r12,QWORD[rbp]
1500 xor r14,r8
1501
1502 xor r15,r9
1503 ror r13,14
1504 mov rdx,r9
1505
1506 and rdi,r15
1507 ror r14,28
1508 add r12,r13
1509
1510 xor rdx,rdi
1511 add r11,r12
1512 add rdx,r12
1513
1514 lea rbp,[8+rbp]
1515 mov r13,QWORD[112+rsp]
1516 mov rdi,QWORD[88+rsp]
1517
1518 mov r12,r13
1519 ror r13,7
1520 add rdx,r14
1521 mov r14,rdi
1522 ror rdi,42
1523
1524 xor r13,r12
1525 shr r12,7
1526 ror r13,1
1527 xor rdi,r14
1528 shr r14,6
1529
1530 ror rdi,19
1531 xor r12,r13
1532 xor rdi,r14
1533 add r12,QWORD[48+rsp]
1534
1535 add r12,QWORD[104+rsp]
1536 mov r13,r11
1537 add r12,rdi
1538 mov r14,rdx
1539 ror r13,23
1540 mov rdi,rax
1541
1542 xor r13,r11
1543 ror r14,5
1544 xor rdi,rbx
1545
1546 mov QWORD[104+rsp],r12
1547 xor r14,rdx
1548 and rdi,r11
1549
1550 ror r13,4
1551 add r12,rcx
1552 xor rdi,rbx
1553
1554 ror r14,6
1555 xor r13,r11
1556 add r12,rdi
1557
1558 mov rdi,rdx
1559 add r12,QWORD[rbp]
1560 xor r14,rdx
1561
1562 xor rdi,r8
1563 ror r13,14
1564 mov rcx,r8
1565
1566 and r15,rdi
1567 ror r14,28
1568 add r12,r13
1569
1570 xor rcx,r15
1571 add r10,r12
1572 add rcx,r12
1573
1574 lea rbp,[24+rbp]
1575 mov r13,QWORD[120+rsp]
1576 mov r15,QWORD[96+rsp]
1577
1578 mov r12,r13
1579 ror r13,7
1580 add rcx,r14
1581 mov r14,r15
1582 ror r15,42
1583
1584 xor r13,r12
1585 shr r12,7
1586 ror r13,1
1587 xor r15,r14
1588 shr r14,6
1589
1590 ror r15,19
1591 xor r12,r13
1592 xor r15,r14
1593 add r12,QWORD[56+rsp]
1594
1595 add r12,QWORD[112+rsp]
1596 mov r13,r10
1597 add r12,r15
1598 mov r14,rcx
1599 ror r13,23
1600 mov r15,r11
1601
1602 xor r13,r10
1603 ror r14,5
1604 xor r15,rax
1605
1606 mov QWORD[112+rsp],r12
1607 xor r14,rcx
1608 and r15,r10
1609
1610 ror r13,4
1611 add r12,rbx
1612 xor r15,rax
1613
1614 ror r14,6
1615 xor r13,r10
1616 add r12,r15
1617
1618 mov r15,rcx
1619 add r12,QWORD[rbp]
1620 xor r14,rcx
1621
1622 xor r15,rdx
1623 ror r13,14
1624 mov rbx,rdx
1625
1626 and rdi,r15
1627 ror r14,28
1628 add r12,r13
1629
1630 xor rbx,rdi
1631 add r9,r12
1632 add rbx,r12
1633
1634 lea rbp,[8+rbp]
1635 mov r13,QWORD[rsp]
1636 mov rdi,QWORD[104+rsp]
1637
1638 mov r12,r13
1639 ror r13,7
1640 add rbx,r14
1641 mov r14,rdi
1642 ror rdi,42
1643
1644 xor r13,r12
1645 shr r12,7
1646 ror r13,1
1647 xor rdi,r14
1648 shr r14,6
1649
1650 ror rdi,19
1651 xor r12,r13
1652 xor rdi,r14
1653 add r12,QWORD[64+rsp]
1654
1655 add r12,QWORD[120+rsp]
1656 mov r13,r9
1657 add r12,rdi
1658 mov r14,rbx
1659 ror r13,23
1660 mov rdi,r10
1661
1662 xor r13,r9
1663 ror r14,5
1664 xor rdi,r11
1665
1666 mov QWORD[120+rsp],r12
1667 xor r14,rbx
1668 and rdi,r9
1669
1670 ror r13,4
1671 add r12,rax
1672 xor rdi,r11
1673
1674 ror r14,6
1675 xor r13,r9
1676 add r12,rdi
1677
1678 mov rdi,rbx
1679 add r12,QWORD[rbp]
1680 xor r14,rbx
1681
1682 xor rdi,rcx
1683 ror r13,14
1684 mov rax,rcx
1685
1686 and r15,rdi
1687 ror r14,28
1688 add r12,r13
1689
1690 xor rax,r15
1691 add r8,r12
1692 add rax,r12
1693
1694 lea rbp,[24+rbp]
1695 cmp BYTE[7+rbp],0
1696 jnz NEAR $L$rounds_16_xx
1697
1698 mov rdi,QWORD[((128+0))+rsp]
1699 add rax,r14
1700 lea rsi,[128+rsi]
1701
1702 add rax,QWORD[rdi]
1703 add rbx,QWORD[8+rdi]
1704 add rcx,QWORD[16+rdi]
1705 add rdx,QWORD[24+rdi]
1706 add r8,QWORD[32+rdi]
1707 add r9,QWORD[40+rdi]
1708 add r10,QWORD[48+rdi]
1709 add r11,QWORD[56+rdi]
1710
1711 cmp rsi,QWORD[((128+16))+rsp]
1712
1713 mov QWORD[rdi],rax
1714 mov QWORD[8+rdi],rbx
1715 mov QWORD[16+rdi],rcx
1716 mov QWORD[24+rdi],rdx
1717 mov QWORD[32+rdi],r8
1718 mov QWORD[40+rdi],r9
1719 mov QWORD[48+rdi],r10
1720 mov QWORD[56+rdi],r11
1721 jb NEAR $L$loop
1722
1723 mov rsi,QWORD[152+rsp]
1724
1725 mov r15,QWORD[((-48))+rsi]
1726
1727 mov r14,QWORD[((-40))+rsi]
1728
1729 mov r13,QWORD[((-32))+rsi]
1730
1731 mov r12,QWORD[((-24))+rsi]
1732
1733 mov rbp,QWORD[((-16))+rsi]
1734
1735 mov rbx,QWORD[((-8))+rsi]
1736
1737 lea rsp,[rsi]
1738
1739$L$epilogue:
1740 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1741 mov rsi,QWORD[16+rsp]
1742 DB 0F3h,0C3h ;repret
1743
1744$L$SEH_end_sha512_block_data_order:
1745ALIGN 64
1746
1747K512:
1748 DQ 0x428a2f98d728ae22,0x7137449123ef65cd
1749 DQ 0x428a2f98d728ae22,0x7137449123ef65cd
1750 DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1751 DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1752 DQ 0x3956c25bf348b538,0x59f111f1b605d019
1753 DQ 0x3956c25bf348b538,0x59f111f1b605d019
1754 DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118
1755 DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118
1756 DQ 0xd807aa98a3030242,0x12835b0145706fbe
1757 DQ 0xd807aa98a3030242,0x12835b0145706fbe
1758 DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1759 DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1760 DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1
1761 DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1
1762 DQ 0x9bdc06a725c71235,0xc19bf174cf692694
1763 DQ 0x9bdc06a725c71235,0xc19bf174cf692694
1764 DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3
1765 DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3
1766 DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1767 DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1768 DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483
1769 DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483
1770 DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1771 DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1772 DQ 0x983e5152ee66dfab,0xa831c66d2db43210
1773 DQ 0x983e5152ee66dfab,0xa831c66d2db43210
1774 DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4
1775 DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4
1776 DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725
1777 DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725
1778 DQ 0x06ca6351e003826f,0x142929670a0e6e70
1779 DQ 0x06ca6351e003826f,0x142929670a0e6e70
1780 DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926
1781 DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926
1782 DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1783 DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1784 DQ 0x650a73548baf63de,0x766a0abb3c77b2a8
1785 DQ 0x650a73548baf63de,0x766a0abb3c77b2a8
1786 DQ 0x81c2c92e47edaee6,0x92722c851482353b
1787 DQ 0x81c2c92e47edaee6,0x92722c851482353b
1788 DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001
1789 DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001
1790 DQ 0xc24b8b70d0f89791,0xc76c51a30654be30
1791 DQ 0xc24b8b70d0f89791,0xc76c51a30654be30
1792 DQ 0xd192e819d6ef5218,0xd69906245565a910
1793 DQ 0xd192e819d6ef5218,0xd69906245565a910
1794 DQ 0xf40e35855771202a,0x106aa07032bbd1b8
1795 DQ 0xf40e35855771202a,0x106aa07032bbd1b8
1796 DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53
1797 DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53
1798 DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1799 DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1800 DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1801 DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1802 DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1803 DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1804 DQ 0x748f82ee5defb2fc,0x78a5636f43172f60
1805 DQ 0x748f82ee5defb2fc,0x78a5636f43172f60
1806 DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec
1807 DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec
1808 DQ 0x90befffa23631e28,0xa4506cebde82bde9
1809 DQ 0x90befffa23631e28,0xa4506cebde82bde9
1810 DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b
1811 DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b
1812 DQ 0xca273eceea26619c,0xd186b8c721c0c207
1813 DQ 0xca273eceea26619c,0xd186b8c721c0c207
1814 DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1815 DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1816 DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6
1817 DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6
1818 DQ 0x113f9804bef90dae,0x1b710b35131c471b
1819 DQ 0x113f9804bef90dae,0x1b710b35131c471b
1820 DQ 0x28db77f523047d84,0x32caab7b40c72493
1821 DQ 0x28db77f523047d84,0x32caab7b40c72493
1822 DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1823 DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1824 DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1825 DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1826 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1827 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1828
1829 DQ 0x0001020304050607,0x08090a0b0c0d0e0f
1830 DQ 0x0001020304050607,0x08090a0b0c0d0e0f
1831DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
1832DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
1833DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
1834DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
1835DB 111,114,103,62,0
1836
1837ALIGN 64
1838sha512_block_data_order_xop:
1839 mov QWORD[8+rsp],rdi ;WIN64 prologue
1840 mov QWORD[16+rsp],rsi
1841 mov rax,rsp
1842$L$SEH_begin_sha512_block_data_order_xop:
1843 mov rdi,rcx
1844 mov rsi,rdx
1845 mov rdx,r8
1846
1847
1848
1849$L$xop_shortcut:
1850 mov rax,rsp
1851
1852 push rbx
1853
1854 push rbp
1855
1856 push r12
1857
1858 push r13
1859
1860 push r14
1861
1862 push r15
1863
1864 shl rdx,4
1865 sub rsp,256
1866 lea rdx,[rdx*8+rsi]
1867 and rsp,-64
1868 mov QWORD[((128+0))+rsp],rdi
1869 mov QWORD[((128+8))+rsp],rsi
1870 mov QWORD[((128+16))+rsp],rdx
1871 mov QWORD[152+rsp],rax
1872
1873 movaps XMMWORD[(128+32)+rsp],xmm6
1874 movaps XMMWORD[(128+48)+rsp],xmm7
1875 movaps XMMWORD[(128+64)+rsp],xmm8
1876 movaps XMMWORD[(128+80)+rsp],xmm9
1877 movaps XMMWORD[(128+96)+rsp],xmm10
1878 movaps XMMWORD[(128+112)+rsp],xmm11
1879$L$prologue_xop:
1880
1881 vzeroupper
1882 mov rax,QWORD[rdi]
1883 mov rbx,QWORD[8+rdi]
1884 mov rcx,QWORD[16+rdi]
1885 mov rdx,QWORD[24+rdi]
1886 mov r8,QWORD[32+rdi]
1887 mov r9,QWORD[40+rdi]
1888 mov r10,QWORD[48+rdi]
1889 mov r11,QWORD[56+rdi]
1890 jmp NEAR $L$loop_xop
1891ALIGN 16
1892$L$loop_xop:
1893 vmovdqa xmm11,XMMWORD[((K512+1280))]
1894 vmovdqu xmm0,XMMWORD[rsi]
1895 lea rbp,[((K512+128))]
1896 vmovdqu xmm1,XMMWORD[16+rsi]
1897 vmovdqu xmm2,XMMWORD[32+rsi]
1898 vpshufb xmm0,xmm0,xmm11
1899 vmovdqu xmm3,XMMWORD[48+rsi]
1900 vpshufb xmm1,xmm1,xmm11
1901 vmovdqu xmm4,XMMWORD[64+rsi]
1902 vpshufb xmm2,xmm2,xmm11
1903 vmovdqu xmm5,XMMWORD[80+rsi]
1904 vpshufb xmm3,xmm3,xmm11
1905 vmovdqu xmm6,XMMWORD[96+rsi]
1906 vpshufb xmm4,xmm4,xmm11
1907 vmovdqu xmm7,XMMWORD[112+rsi]
1908 vpshufb xmm5,xmm5,xmm11
1909 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]
1910 vpshufb xmm6,xmm6,xmm11
1911 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]
1912 vpshufb xmm7,xmm7,xmm11
1913 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
1914 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]
1915 vmovdqa XMMWORD[rsp],xmm8
1916 vpaddq xmm8,xmm4,XMMWORD[rbp]
1917 vmovdqa XMMWORD[16+rsp],xmm9
1918 vpaddq xmm9,xmm5,XMMWORD[32+rbp]
1919 vmovdqa XMMWORD[32+rsp],xmm10
1920 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
1921 vmovdqa XMMWORD[48+rsp],xmm11
1922 vpaddq xmm11,xmm7,XMMWORD[96+rbp]
1923 vmovdqa XMMWORD[64+rsp],xmm8
1924 mov r14,rax
1925 vmovdqa XMMWORD[80+rsp],xmm9
1926 mov rdi,rbx
1927 vmovdqa XMMWORD[96+rsp],xmm10
1928 xor rdi,rcx
1929 vmovdqa XMMWORD[112+rsp],xmm11
1930 mov r13,r8
1931 jmp NEAR $L$xop_00_47
1932
1933ALIGN 16
1934$L$xop_00_47:
1935 add rbp,256
1936 vpalignr xmm8,xmm1,xmm0,8
1937 ror r13,23
1938 mov rax,r14
1939 vpalignr xmm11,xmm5,xmm4,8
1940 mov r12,r9
1941 ror r14,5
1942DB 143,72,120,195,200,56
1943 xor r13,r8
1944 xor r12,r10
1945 vpsrlq xmm8,xmm8,7
1946 ror r13,4
1947 xor r14,rax
1948 vpaddq xmm0,xmm0,xmm11
1949 and r12,r8
1950 xor r13,r8
1951 add r11,QWORD[rsp]
1952 mov r15,rax
1953DB 143,72,120,195,209,7
1954 xor r12,r10
1955 ror r14,6
1956 vpxor xmm8,xmm8,xmm9
1957 xor r15,rbx
1958 add r11,r12
1959 ror r13,14
1960 and rdi,r15
1961DB 143,104,120,195,223,3
1962 xor r14,rax
1963 add r11,r13
1964 vpxor xmm8,xmm8,xmm10
1965 xor rdi,rbx
1966 ror r14,28
1967 vpsrlq xmm10,xmm7,6
1968 add rdx,r11
1969 add r11,rdi
1970 vpaddq xmm0,xmm0,xmm8
1971 mov r13,rdx
1972 add r14,r11
1973DB 143,72,120,195,203,42
1974 ror r13,23
1975 mov r11,r14
1976 vpxor xmm11,xmm11,xmm10
1977 mov r12,r8
1978 ror r14,5
1979 xor r13,rdx
1980 xor r12,r9
1981 vpxor xmm11,xmm11,xmm9
1982 ror r13,4
1983 xor r14,r11
1984 and r12,rdx
1985 xor r13,rdx
1986 vpaddq xmm0,xmm0,xmm11
1987 add r10,QWORD[8+rsp]
1988 mov rdi,r11
1989 xor r12,r9
1990 ror r14,6
1991 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]
1992 xor rdi,rax
1993 add r10,r12
1994 ror r13,14
1995 and r15,rdi
1996 xor r14,r11
1997 add r10,r13
1998 xor r15,rax
1999 ror r14,28
2000 add rcx,r10
2001 add r10,r15
2002 mov r13,rcx
2003 add r14,r10
2004 vmovdqa XMMWORD[rsp],xmm10
2005 vpalignr xmm8,xmm2,xmm1,8
2006 ror r13,23
2007 mov r10,r14
2008 vpalignr xmm11,xmm6,xmm5,8
2009 mov r12,rdx
2010 ror r14,5
2011DB 143,72,120,195,200,56
2012 xor r13,rcx
2013 xor r12,r8
2014 vpsrlq xmm8,xmm8,7
2015 ror r13,4
2016 xor r14,r10
2017 vpaddq xmm1,xmm1,xmm11
2018 and r12,rcx
2019 xor r13,rcx
2020 add r9,QWORD[16+rsp]
2021 mov r15,r10
2022DB 143,72,120,195,209,7
2023 xor r12,r8
2024 ror r14,6
2025 vpxor xmm8,xmm8,xmm9
2026 xor r15,r11
2027 add r9,r12
2028 ror r13,14
2029 and rdi,r15
2030DB 143,104,120,195,216,3
2031 xor r14,r10
2032 add r9,r13
2033 vpxor xmm8,xmm8,xmm10
2034 xor rdi,r11
2035 ror r14,28
2036 vpsrlq xmm10,xmm0,6
2037 add rbx,r9
2038 add r9,rdi
2039 vpaddq xmm1,xmm1,xmm8
2040 mov r13,rbx
2041 add r14,r9
2042DB 143,72,120,195,203,42
2043 ror r13,23
2044 mov r9,r14
2045 vpxor xmm11,xmm11,xmm10
2046 mov r12,rcx
2047 ror r14,5
2048 xor r13,rbx
2049 xor r12,rdx
2050 vpxor xmm11,xmm11,xmm9
2051 ror r13,4
2052 xor r14,r9
2053 and r12,rbx
2054 xor r13,rbx
2055 vpaddq xmm1,xmm1,xmm11
2056 add r8,QWORD[24+rsp]
2057 mov rdi,r9
2058 xor r12,rdx
2059 ror r14,6
2060 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]
2061 xor rdi,r10
2062 add r8,r12
2063 ror r13,14
2064 and r15,rdi
2065 xor r14,r9
2066 add r8,r13
2067 xor r15,r10
2068 ror r14,28
2069 add rax,r8
2070 add r8,r15
2071 mov r13,rax
2072 add r14,r8
2073 vmovdqa XMMWORD[16+rsp],xmm10
2074 vpalignr xmm8,xmm3,xmm2,8
2075 ror r13,23
2076 mov r8,r14
2077 vpalignr xmm11,xmm7,xmm6,8
2078 mov r12,rbx
2079 ror r14,5
2080DB 143,72,120,195,200,56
2081 xor r13,rax
2082 xor r12,rcx
2083 vpsrlq xmm8,xmm8,7
2084 ror r13,4
2085 xor r14,r8
2086 vpaddq xmm2,xmm2,xmm11
2087 and r12,rax
2088 xor r13,rax
2089 add rdx,QWORD[32+rsp]
2090 mov r15,r8
2091DB 143,72,120,195,209,7
2092 xor r12,rcx
2093 ror r14,6
2094 vpxor xmm8,xmm8,xmm9
2095 xor r15,r9
2096 add rdx,r12
2097 ror r13,14
2098 and rdi,r15
2099DB 143,104,120,195,217,3
2100 xor r14,r8
2101 add rdx,r13
2102 vpxor xmm8,xmm8,xmm10
2103 xor rdi,r9
2104 ror r14,28
2105 vpsrlq xmm10,xmm1,6
2106 add r11,rdx
2107 add rdx,rdi
2108 vpaddq xmm2,xmm2,xmm8
2109 mov r13,r11
2110 add r14,rdx
2111DB 143,72,120,195,203,42
2112 ror r13,23
2113 mov rdx,r14
2114 vpxor xmm11,xmm11,xmm10
2115 mov r12,rax
2116 ror r14,5
2117 xor r13,r11
2118 xor r12,rbx
2119 vpxor xmm11,xmm11,xmm9
2120 ror r13,4
2121 xor r14,rdx
2122 and r12,r11
2123 xor r13,r11
2124 vpaddq xmm2,xmm2,xmm11
2125 add rcx,QWORD[40+rsp]
2126 mov rdi,rdx
2127 xor r12,rbx
2128 ror r14,6
2129 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
2130 xor rdi,r8
2131 add rcx,r12
2132 ror r13,14
2133 and r15,rdi
2134 xor r14,rdx
2135 add rcx,r13
2136 xor r15,r8
2137 ror r14,28
2138 add r10,rcx
2139 add rcx,r15
2140 mov r13,r10
2141 add r14,rcx
2142 vmovdqa XMMWORD[32+rsp],xmm10
2143 vpalignr xmm8,xmm4,xmm3,8
2144 ror r13,23
2145 mov rcx,r14
2146 vpalignr xmm11,xmm0,xmm7,8
2147 mov r12,r11
2148 ror r14,5
2149DB 143,72,120,195,200,56
2150 xor r13,r10
2151 xor r12,rax
2152 vpsrlq xmm8,xmm8,7
2153 ror r13,4
2154 xor r14,rcx
2155 vpaddq xmm3,xmm3,xmm11
2156 and r12,r10
2157 xor r13,r10
2158 add rbx,QWORD[48+rsp]
2159 mov r15,rcx
2160DB 143,72,120,195,209,7
2161 xor r12,rax
2162 ror r14,6
2163 vpxor xmm8,xmm8,xmm9
2164 xor r15,rdx
2165 add rbx,r12
2166 ror r13,14
2167 and rdi,r15
2168DB 143,104,120,195,218,3
2169 xor r14,rcx
2170 add rbx,r13
2171 vpxor xmm8,xmm8,xmm10
2172 xor rdi,rdx
2173 ror r14,28
2174 vpsrlq xmm10,xmm2,6
2175 add r9,rbx
2176 add rbx,rdi
2177 vpaddq xmm3,xmm3,xmm8
2178 mov r13,r9
2179 add r14,rbx
2180DB 143,72,120,195,203,42
2181 ror r13,23
2182 mov rbx,r14
2183 vpxor xmm11,xmm11,xmm10
2184 mov r12,r10
2185 ror r14,5
2186 xor r13,r9
2187 xor r12,r11
2188 vpxor xmm11,xmm11,xmm9
2189 ror r13,4
2190 xor r14,rbx
2191 and r12,r9
2192 xor r13,r9
2193 vpaddq xmm3,xmm3,xmm11
2194 add rax,QWORD[56+rsp]
2195 mov rdi,rbx
2196 xor r12,r11
2197 ror r14,6
2198 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]
2199 xor rdi,rcx
2200 add rax,r12
2201 ror r13,14
2202 and r15,rdi
2203 xor r14,rbx
2204 add rax,r13
2205 xor r15,rcx
2206 ror r14,28
2207 add r8,rax
2208 add rax,r15
2209 mov r13,r8
2210 add r14,rax
2211 vmovdqa XMMWORD[48+rsp],xmm10
2212 vpalignr xmm8,xmm5,xmm4,8
2213 ror r13,23
2214 mov rax,r14
2215 vpalignr xmm11,xmm1,xmm0,8
2216 mov r12,r9
2217 ror r14,5
2218DB 143,72,120,195,200,56
2219 xor r13,r8
2220 xor r12,r10
2221 vpsrlq xmm8,xmm8,7
2222 ror r13,4
2223 xor r14,rax
2224 vpaddq xmm4,xmm4,xmm11
2225 and r12,r8
2226 xor r13,r8
2227 add r11,QWORD[64+rsp]
2228 mov r15,rax
2229DB 143,72,120,195,209,7
2230 xor r12,r10
2231 ror r14,6
2232 vpxor xmm8,xmm8,xmm9
2233 xor r15,rbx
2234 add r11,r12
2235 ror r13,14
2236 and rdi,r15
2237DB 143,104,120,195,219,3
2238 xor r14,rax
2239 add r11,r13
2240 vpxor xmm8,xmm8,xmm10
2241 xor rdi,rbx
2242 ror r14,28
2243 vpsrlq xmm10,xmm3,6
2244 add rdx,r11
2245 add r11,rdi
2246 vpaddq xmm4,xmm4,xmm8
2247 mov r13,rdx
2248 add r14,r11
2249DB 143,72,120,195,203,42
2250 ror r13,23
2251 mov r11,r14
2252 vpxor xmm11,xmm11,xmm10
2253 mov r12,r8
2254 ror r14,5
2255 xor r13,rdx
2256 xor r12,r9
2257 vpxor xmm11,xmm11,xmm9
2258 ror r13,4
2259 xor r14,r11
2260 and r12,rdx
2261 xor r13,rdx
2262 vpaddq xmm4,xmm4,xmm11
2263 add r10,QWORD[72+rsp]
2264 mov rdi,r11
2265 xor r12,r9
2266 ror r14,6
2267 vpaddq xmm10,xmm4,XMMWORD[rbp]
2268 xor rdi,rax
2269 add r10,r12
2270 ror r13,14
2271 and r15,rdi
2272 xor r14,r11
2273 add r10,r13
2274 xor r15,rax
2275 ror r14,28
2276 add rcx,r10
2277 add r10,r15
2278 mov r13,rcx
2279 add r14,r10
2280 vmovdqa XMMWORD[64+rsp],xmm10
2281 vpalignr xmm8,xmm6,xmm5,8
2282 ror r13,23
2283 mov r10,r14
2284 vpalignr xmm11,xmm2,xmm1,8
2285 mov r12,rdx
2286 ror r14,5
2287DB 143,72,120,195,200,56
2288 xor r13,rcx
2289 xor r12,r8
2290 vpsrlq xmm8,xmm8,7
2291 ror r13,4
2292 xor r14,r10
2293 vpaddq xmm5,xmm5,xmm11
2294 and r12,rcx
2295 xor r13,rcx
2296 add r9,QWORD[80+rsp]
2297 mov r15,r10
2298DB 143,72,120,195,209,7
2299 xor r12,r8
2300 ror r14,6
2301 vpxor xmm8,xmm8,xmm9
2302 xor r15,r11
2303 add r9,r12
2304 ror r13,14
2305 and rdi,r15
2306DB 143,104,120,195,220,3
2307 xor r14,r10
2308 add r9,r13
2309 vpxor xmm8,xmm8,xmm10
2310 xor rdi,r11
2311 ror r14,28
2312 vpsrlq xmm10,xmm4,6
2313 add rbx,r9
2314 add r9,rdi
2315 vpaddq xmm5,xmm5,xmm8
2316 mov r13,rbx
2317 add r14,r9
2318DB 143,72,120,195,203,42
2319 ror r13,23
2320 mov r9,r14
2321 vpxor xmm11,xmm11,xmm10
2322 mov r12,rcx
2323 ror r14,5
2324 xor r13,rbx
2325 xor r12,rdx
2326 vpxor xmm11,xmm11,xmm9
2327 ror r13,4
2328 xor r14,r9
2329 and r12,rbx
2330 xor r13,rbx
2331 vpaddq xmm5,xmm5,xmm11
2332 add r8,QWORD[88+rsp]
2333 mov rdi,r9
2334 xor r12,rdx
2335 ror r14,6
2336 vpaddq xmm10,xmm5,XMMWORD[32+rbp]
2337 xor rdi,r10
2338 add r8,r12
2339 ror r13,14
2340 and r15,rdi
2341 xor r14,r9
2342 add r8,r13
2343 xor r15,r10
2344 ror r14,28
2345 add rax,r8
2346 add r8,r15
2347 mov r13,rax
2348 add r14,r8
2349 vmovdqa XMMWORD[80+rsp],xmm10
2350 vpalignr xmm8,xmm7,xmm6,8
2351 ror r13,23
2352 mov r8,r14
2353 vpalignr xmm11,xmm3,xmm2,8
2354 mov r12,rbx
2355 ror r14,5
2356DB 143,72,120,195,200,56
2357 xor r13,rax
2358 xor r12,rcx
2359 vpsrlq xmm8,xmm8,7
2360 ror r13,4
2361 xor r14,r8
2362 vpaddq xmm6,xmm6,xmm11
2363 and r12,rax
2364 xor r13,rax
2365 add rdx,QWORD[96+rsp]
2366 mov r15,r8
2367DB 143,72,120,195,209,7
2368 xor r12,rcx
2369 ror r14,6
2370 vpxor xmm8,xmm8,xmm9
2371 xor r15,r9
2372 add rdx,r12
2373 ror r13,14
2374 and rdi,r15
2375DB 143,104,120,195,221,3
2376 xor r14,r8
2377 add rdx,r13
2378 vpxor xmm8,xmm8,xmm10
2379 xor rdi,r9
2380 ror r14,28
2381 vpsrlq xmm10,xmm5,6
2382 add r11,rdx
2383 add rdx,rdi
2384 vpaddq xmm6,xmm6,xmm8
2385 mov r13,r11
2386 add r14,rdx
2387DB 143,72,120,195,203,42
2388 ror r13,23
2389 mov rdx,r14
2390 vpxor xmm11,xmm11,xmm10
2391 mov r12,rax
2392 ror r14,5
2393 xor r13,r11
2394 xor r12,rbx
2395 vpxor xmm11,xmm11,xmm9
2396 ror r13,4
2397 xor r14,rdx
2398 and r12,r11
2399 xor r13,r11
2400 vpaddq xmm6,xmm6,xmm11
2401 add rcx,QWORD[104+rsp]
2402 mov rdi,rdx
2403 xor r12,rbx
2404 ror r14,6
2405 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
2406 xor rdi,r8
2407 add rcx,r12
2408 ror r13,14
2409 and r15,rdi
2410 xor r14,rdx
2411 add rcx,r13
2412 xor r15,r8
2413 ror r14,28
2414 add r10,rcx
2415 add rcx,r15
2416 mov r13,r10
2417 add r14,rcx
2418 vmovdqa XMMWORD[96+rsp],xmm10
2419 vpalignr xmm8,xmm0,xmm7,8
2420 ror r13,23
2421 mov rcx,r14
2422 vpalignr xmm11,xmm4,xmm3,8
2423 mov r12,r11
2424 ror r14,5
2425DB 143,72,120,195,200,56
2426 xor r13,r10
2427 xor r12,rax
2428 vpsrlq xmm8,xmm8,7
2429 ror r13,4
2430 xor r14,rcx
2431 vpaddq xmm7,xmm7,xmm11
2432 and r12,r10
2433 xor r13,r10
2434 add rbx,QWORD[112+rsp]
2435 mov r15,rcx
2436DB 143,72,120,195,209,7
2437 xor r12,rax
2438 ror r14,6
2439 vpxor xmm8,xmm8,xmm9
2440 xor r15,rdx
2441 add rbx,r12
2442 ror r13,14
2443 and rdi,r15
2444DB 143,104,120,195,222,3
2445 xor r14,rcx
2446 add rbx,r13
2447 vpxor xmm8,xmm8,xmm10
2448 xor rdi,rdx
2449 ror r14,28
2450 vpsrlq xmm10,xmm6,6
2451 add r9,rbx
2452 add rbx,rdi
2453 vpaddq xmm7,xmm7,xmm8
2454 mov r13,r9
2455 add r14,rbx
2456DB 143,72,120,195,203,42
2457 ror r13,23
2458 mov rbx,r14
2459 vpxor xmm11,xmm11,xmm10
2460 mov r12,r10
2461 ror r14,5
2462 xor r13,r9
2463 xor r12,r11
2464 vpxor xmm11,xmm11,xmm9
2465 ror r13,4
2466 xor r14,rbx
2467 and r12,r9
2468 xor r13,r9
2469 vpaddq xmm7,xmm7,xmm11
2470 add rax,QWORD[120+rsp]
2471 mov rdi,rbx
2472 xor r12,r11
2473 ror r14,6
2474 vpaddq xmm10,xmm7,XMMWORD[96+rbp]
2475 xor rdi,rcx
2476 add rax,r12
2477 ror r13,14
2478 and r15,rdi
2479 xor r14,rbx
2480 add rax,r13
2481 xor r15,rcx
2482 ror r14,28
2483 add r8,rax
2484 add rax,r15
2485 mov r13,r8
2486 add r14,rax
2487 vmovdqa XMMWORD[112+rsp],xmm10
2488 cmp BYTE[135+rbp],0
2489 jne NEAR $L$xop_00_47
2490 ror r13,23
2491 mov rax,r14
2492 mov r12,r9
2493 ror r14,5
2494 xor r13,r8
2495 xor r12,r10
2496 ror r13,4
2497 xor r14,rax
2498 and r12,r8
2499 xor r13,r8
2500 add r11,QWORD[rsp]
2501 mov r15,rax
2502 xor r12,r10
2503 ror r14,6
2504 xor r15,rbx
2505 add r11,r12
2506 ror r13,14
2507 and rdi,r15
2508 xor r14,rax
2509 add r11,r13
2510 xor rdi,rbx
2511 ror r14,28
2512 add rdx,r11
2513 add r11,rdi
2514 mov r13,rdx
2515 add r14,r11
2516 ror r13,23
2517 mov r11,r14
2518 mov r12,r8
2519 ror r14,5
2520 xor r13,rdx
2521 xor r12,r9
2522 ror r13,4
2523 xor r14,r11
2524 and r12,rdx
2525 xor r13,rdx
2526 add r10,QWORD[8+rsp]
2527 mov rdi,r11
2528 xor r12,r9
2529 ror r14,6
2530 xor rdi,rax
2531 add r10,r12
2532 ror r13,14
2533 and r15,rdi
2534 xor r14,r11
2535 add r10,r13
2536 xor r15,rax
2537 ror r14,28
2538 add rcx,r10
2539 add r10,r15
2540 mov r13,rcx
2541 add r14,r10
2542 ror r13,23
2543 mov r10,r14
2544 mov r12,rdx
2545 ror r14,5
2546 xor r13,rcx
2547 xor r12,r8
2548 ror r13,4
2549 xor r14,r10
2550 and r12,rcx
2551 xor r13,rcx
2552 add r9,QWORD[16+rsp]
2553 mov r15,r10
2554 xor r12,r8
2555 ror r14,6
2556 xor r15,r11
2557 add r9,r12
2558 ror r13,14
2559 and rdi,r15
2560 xor r14,r10
2561 add r9,r13
2562 xor rdi,r11
2563 ror r14,28
2564 add rbx,r9
2565 add r9,rdi
2566 mov r13,rbx
2567 add r14,r9
2568 ror r13,23
2569 mov r9,r14
2570 mov r12,rcx
2571 ror r14,5
2572 xor r13,rbx
2573 xor r12,rdx
2574 ror r13,4
2575 xor r14,r9
2576 and r12,rbx
2577 xor r13,rbx
2578 add r8,QWORD[24+rsp]
2579 mov rdi,r9
2580 xor r12,rdx
2581 ror r14,6
2582 xor rdi,r10
2583 add r8,r12
2584 ror r13,14
2585 and r15,rdi
2586 xor r14,r9
2587 add r8,r13
2588 xor r15,r10
2589 ror r14,28
2590 add rax,r8
2591 add r8,r15
2592 mov r13,rax
2593 add r14,r8
2594 ror r13,23
2595 mov r8,r14
2596 mov r12,rbx
2597 ror r14,5
2598 xor r13,rax
2599 xor r12,rcx
2600 ror r13,4
2601 xor r14,r8
2602 and r12,rax
2603 xor r13,rax
2604 add rdx,QWORD[32+rsp]
2605 mov r15,r8
2606 xor r12,rcx
2607 ror r14,6
2608 xor r15,r9
2609 add rdx,r12
2610 ror r13,14
2611 and rdi,r15
2612 xor r14,r8
2613 add rdx,r13
2614 xor rdi,r9
2615 ror r14,28
2616 add r11,rdx
2617 add rdx,rdi
2618 mov r13,r11
2619 add r14,rdx
2620 ror r13,23
2621 mov rdx,r14
2622 mov r12,rax
2623 ror r14,5
2624 xor r13,r11
2625 xor r12,rbx
2626 ror r13,4
2627 xor r14,rdx
2628 and r12,r11
2629 xor r13,r11
2630 add rcx,QWORD[40+rsp]
2631 mov rdi,rdx
2632 xor r12,rbx
2633 ror r14,6
2634 xor rdi,r8
2635 add rcx,r12
2636 ror r13,14
2637 and r15,rdi
2638 xor r14,rdx
2639 add rcx,r13
2640 xor r15,r8
2641 ror r14,28
2642 add r10,rcx
2643 add rcx,r15
2644 mov r13,r10
2645 add r14,rcx
2646 ror r13,23
2647 mov rcx,r14
2648 mov r12,r11
2649 ror r14,5
2650 xor r13,r10
2651 xor r12,rax
2652 ror r13,4
2653 xor r14,rcx
2654 and r12,r10
2655 xor r13,r10
2656 add rbx,QWORD[48+rsp]
2657 mov r15,rcx
2658 xor r12,rax
2659 ror r14,6
2660 xor r15,rdx
2661 add rbx,r12
2662 ror r13,14
2663 and rdi,r15
2664 xor r14,rcx
2665 add rbx,r13
2666 xor rdi,rdx
2667 ror r14,28
2668 add r9,rbx
2669 add rbx,rdi
2670 mov r13,r9
2671 add r14,rbx
2672 ror r13,23
2673 mov rbx,r14
2674 mov r12,r10
2675 ror r14,5
2676 xor r13,r9
2677 xor r12,r11
2678 ror r13,4
2679 xor r14,rbx
2680 and r12,r9
2681 xor r13,r9
2682 add rax,QWORD[56+rsp]
2683 mov rdi,rbx
2684 xor r12,r11
2685 ror r14,6
2686 xor rdi,rcx
2687 add rax,r12
2688 ror r13,14
2689 and r15,rdi
2690 xor r14,rbx
2691 add rax,r13
2692 xor r15,rcx
2693 ror r14,28
2694 add r8,rax
2695 add rax,r15
2696 mov r13,r8
2697 add r14,rax
2698 ror r13,23
2699 mov rax,r14
2700 mov r12,r9
2701 ror r14,5
2702 xor r13,r8
2703 xor r12,r10
2704 ror r13,4
2705 xor r14,rax
2706 and r12,r8
2707 xor r13,r8
2708 add r11,QWORD[64+rsp]
2709 mov r15,rax
2710 xor r12,r10
2711 ror r14,6
2712 xor r15,rbx
2713 add r11,r12
2714 ror r13,14
2715 and rdi,r15
2716 xor r14,rax
2717 add r11,r13
2718 xor rdi,rbx
2719 ror r14,28
2720 add rdx,r11
2721 add r11,rdi
2722 mov r13,rdx
2723 add r14,r11
2724 ror r13,23
2725 mov r11,r14
2726 mov r12,r8
2727 ror r14,5
2728 xor r13,rdx
2729 xor r12,r9
2730 ror r13,4
2731 xor r14,r11
2732 and r12,rdx
2733 xor r13,rdx
2734 add r10,QWORD[72+rsp]
2735 mov rdi,r11
2736 xor r12,r9
2737 ror r14,6
2738 xor rdi,rax
2739 add r10,r12
2740 ror r13,14
2741 and r15,rdi
2742 xor r14,r11
2743 add r10,r13
2744 xor r15,rax
2745 ror r14,28
2746 add rcx,r10
2747 add r10,r15
2748 mov r13,rcx
2749 add r14,r10
2750 ror r13,23
2751 mov r10,r14
2752 mov r12,rdx
2753 ror r14,5
2754 xor r13,rcx
2755 xor r12,r8
2756 ror r13,4
2757 xor r14,r10
2758 and r12,rcx
2759 xor r13,rcx
2760 add r9,QWORD[80+rsp]
2761 mov r15,r10
2762 xor r12,r8
2763 ror r14,6
2764 xor r15,r11
2765 add r9,r12
2766 ror r13,14
2767 and rdi,r15
2768 xor r14,r10
2769 add r9,r13
2770 xor rdi,r11
2771 ror r14,28
2772 add rbx,r9
2773 add r9,rdi
2774 mov r13,rbx
2775 add r14,r9
2776 ror r13,23
2777 mov r9,r14
2778 mov r12,rcx
2779 ror r14,5
2780 xor r13,rbx
2781 xor r12,rdx
2782 ror r13,4
2783 xor r14,r9
2784 and r12,rbx
2785 xor r13,rbx
2786 add r8,QWORD[88+rsp]
2787 mov rdi,r9
2788 xor r12,rdx
2789 ror r14,6
2790 xor rdi,r10
2791 add r8,r12
2792 ror r13,14
2793 and r15,rdi
2794 xor r14,r9
2795 add r8,r13
2796 xor r15,r10
2797 ror r14,28
2798 add rax,r8
2799 add r8,r15
2800 mov r13,rax
2801 add r14,r8
2802 ror r13,23
2803 mov r8,r14
2804 mov r12,rbx
2805 ror r14,5
2806 xor r13,rax
2807 xor r12,rcx
2808 ror r13,4
2809 xor r14,r8
2810 and r12,rax
2811 xor r13,rax
2812 add rdx,QWORD[96+rsp]
2813 mov r15,r8
2814 xor r12,rcx
2815 ror r14,6
2816 xor r15,r9
2817 add rdx,r12
2818 ror r13,14
2819 and rdi,r15
2820 xor r14,r8
2821 add rdx,r13
2822 xor rdi,r9
2823 ror r14,28
2824 add r11,rdx
2825 add rdx,rdi
2826 mov r13,r11
2827 add r14,rdx
2828 ror r13,23
2829 mov rdx,r14
2830 mov r12,rax
2831 ror r14,5
2832 xor r13,r11
2833 xor r12,rbx
2834 ror r13,4
2835 xor r14,rdx
2836 and r12,r11
2837 xor r13,r11
2838 add rcx,QWORD[104+rsp]
2839 mov rdi,rdx
2840 xor r12,rbx
2841 ror r14,6
2842 xor rdi,r8
2843 add rcx,r12
2844 ror r13,14
2845 and r15,rdi
2846 xor r14,rdx
2847 add rcx,r13
2848 xor r15,r8
2849 ror r14,28
2850 add r10,rcx
2851 add rcx,r15
2852 mov r13,r10
2853 add r14,rcx
2854 ror r13,23
2855 mov rcx,r14
2856 mov r12,r11
2857 ror r14,5
2858 xor r13,r10
2859 xor r12,rax
2860 ror r13,4
2861 xor r14,rcx
2862 and r12,r10
2863 xor r13,r10
2864 add rbx,QWORD[112+rsp]
2865 mov r15,rcx
2866 xor r12,rax
2867 ror r14,6
2868 xor r15,rdx
2869 add rbx,r12
2870 ror r13,14
2871 and rdi,r15
2872 xor r14,rcx
2873 add rbx,r13
2874 xor rdi,rdx
2875 ror r14,28
2876 add r9,rbx
2877 add rbx,rdi
2878 mov r13,r9
2879 add r14,rbx
2880 ror r13,23
2881 mov rbx,r14
2882 mov r12,r10
2883 ror r14,5
2884 xor r13,r9
2885 xor r12,r11
2886 ror r13,4
2887 xor r14,rbx
2888 and r12,r9
2889 xor r13,r9
2890 add rax,QWORD[120+rsp]
2891 mov rdi,rbx
2892 xor r12,r11
2893 ror r14,6
2894 xor rdi,rcx
2895 add rax,r12
2896 ror r13,14
2897 and r15,rdi
2898 xor r14,rbx
2899 add rax,r13
2900 xor r15,rcx
2901 ror r14,28
2902 add r8,rax
2903 add rax,r15
2904 mov r13,r8
2905 add r14,rax
2906 mov rdi,QWORD[((128+0))+rsp]
2907 mov rax,r14
2908
2909 add rax,QWORD[rdi]
2910 lea rsi,[128+rsi]
2911 add rbx,QWORD[8+rdi]
2912 add rcx,QWORD[16+rdi]
2913 add rdx,QWORD[24+rdi]
2914 add r8,QWORD[32+rdi]
2915 add r9,QWORD[40+rdi]
2916 add r10,QWORD[48+rdi]
2917 add r11,QWORD[56+rdi]
2918
2919 cmp rsi,QWORD[((128+16))+rsp]
2920
2921 mov QWORD[rdi],rax
2922 mov QWORD[8+rdi],rbx
2923 mov QWORD[16+rdi],rcx
2924 mov QWORD[24+rdi],rdx
2925 mov QWORD[32+rdi],r8
2926 mov QWORD[40+rdi],r9
2927 mov QWORD[48+rdi],r10
2928 mov QWORD[56+rdi],r11
2929 jb NEAR $L$loop_xop
2930
2931 mov rsi,QWORD[152+rsp]
2932
2933 vzeroupper
2934 movaps xmm6,XMMWORD[((128+32))+rsp]
2935 movaps xmm7,XMMWORD[((128+48))+rsp]
2936 movaps xmm8,XMMWORD[((128+64))+rsp]
2937 movaps xmm9,XMMWORD[((128+80))+rsp]
2938 movaps xmm10,XMMWORD[((128+96))+rsp]
2939 movaps xmm11,XMMWORD[((128+112))+rsp]
2940 mov r15,QWORD[((-48))+rsi]
2941
2942 mov r14,QWORD[((-40))+rsi]
2943
2944 mov r13,QWORD[((-32))+rsi]
2945
2946 mov r12,QWORD[((-24))+rsi]
2947
2948 mov rbp,QWORD[((-16))+rsi]
2949
2950 mov rbx,QWORD[((-8))+rsi]
2951
2952 lea rsp,[rsi]
2953
2954$L$epilogue_xop:
2955 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2956 mov rsi,QWORD[16+rsp]
2957 DB 0F3h,0C3h ;repret
2958
2959$L$SEH_end_sha512_block_data_order_xop:
2960
2961ALIGN 64
2962sha512_block_data_order_avx:
2963 mov QWORD[8+rsp],rdi ;WIN64 prologue
2964 mov QWORD[16+rsp],rsi
2965 mov rax,rsp
2966$L$SEH_begin_sha512_block_data_order_avx:
2967 mov rdi,rcx
2968 mov rsi,rdx
2969 mov rdx,r8
2970
2971
2972
2973$L$avx_shortcut:
2974 mov rax,rsp
2975
2976 push rbx
2977
2978 push rbp
2979
2980 push r12
2981
2982 push r13
2983
2984 push r14
2985
2986 push r15
2987
2988 shl rdx,4
2989 sub rsp,256
2990 lea rdx,[rdx*8+rsi]
2991 and rsp,-64
2992 mov QWORD[((128+0))+rsp],rdi
2993 mov QWORD[((128+8))+rsp],rsi
2994 mov QWORD[((128+16))+rsp],rdx
2995 mov QWORD[152+rsp],rax
2996
2997 movaps XMMWORD[(128+32)+rsp],xmm6
2998 movaps XMMWORD[(128+48)+rsp],xmm7
2999 movaps XMMWORD[(128+64)+rsp],xmm8
3000 movaps XMMWORD[(128+80)+rsp],xmm9
3001 movaps XMMWORD[(128+96)+rsp],xmm10
3002 movaps XMMWORD[(128+112)+rsp],xmm11
3003$L$prologue_avx:
3004
3005 vzeroupper
3006 mov rax,QWORD[rdi]
3007 mov rbx,QWORD[8+rdi]
3008 mov rcx,QWORD[16+rdi]
3009 mov rdx,QWORD[24+rdi]
3010 mov r8,QWORD[32+rdi]
3011 mov r9,QWORD[40+rdi]
3012 mov r10,QWORD[48+rdi]
3013 mov r11,QWORD[56+rdi]
3014 jmp NEAR $L$loop_avx
3015ALIGN 16
3016$L$loop_avx:
3017 vmovdqa xmm11,XMMWORD[((K512+1280))]
3018 vmovdqu xmm0,XMMWORD[rsi]
3019 lea rbp,[((K512+128))]
3020 vmovdqu xmm1,XMMWORD[16+rsi]
3021 vmovdqu xmm2,XMMWORD[32+rsi]
3022 vpshufb xmm0,xmm0,xmm11
3023 vmovdqu xmm3,XMMWORD[48+rsi]
3024 vpshufb xmm1,xmm1,xmm11
3025 vmovdqu xmm4,XMMWORD[64+rsi]
3026 vpshufb xmm2,xmm2,xmm11
3027 vmovdqu xmm5,XMMWORD[80+rsi]
3028 vpshufb xmm3,xmm3,xmm11
3029 vmovdqu xmm6,XMMWORD[96+rsi]
3030 vpshufb xmm4,xmm4,xmm11
3031 vmovdqu xmm7,XMMWORD[112+rsi]
3032 vpshufb xmm5,xmm5,xmm11
3033 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]
3034 vpshufb xmm6,xmm6,xmm11
3035 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]
3036 vpshufb xmm7,xmm7,xmm11
3037 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
3038 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]
3039 vmovdqa XMMWORD[rsp],xmm8
3040 vpaddq xmm8,xmm4,XMMWORD[rbp]
3041 vmovdqa XMMWORD[16+rsp],xmm9
3042 vpaddq xmm9,xmm5,XMMWORD[32+rbp]
3043 vmovdqa XMMWORD[32+rsp],xmm10
3044 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
3045 vmovdqa XMMWORD[48+rsp],xmm11
3046 vpaddq xmm11,xmm7,XMMWORD[96+rbp]
3047 vmovdqa XMMWORD[64+rsp],xmm8
3048 mov r14,rax
3049 vmovdqa XMMWORD[80+rsp],xmm9
3050 mov rdi,rbx
3051 vmovdqa XMMWORD[96+rsp],xmm10
3052 xor rdi,rcx
3053 vmovdqa XMMWORD[112+rsp],xmm11
3054 mov r13,r8
3055 jmp NEAR $L$avx_00_47
3056
3057ALIGN 16
3058$L$avx_00_47:
3059 add rbp,256
3060 vpalignr xmm8,xmm1,xmm0,8
3061 shrd r13,r13,23
3062 mov rax,r14
3063 vpalignr xmm11,xmm5,xmm4,8
3064 mov r12,r9
3065 shrd r14,r14,5
3066 vpsrlq xmm10,xmm8,1
3067 xor r13,r8
3068 xor r12,r10
3069 vpaddq xmm0,xmm0,xmm11
3070 shrd r13,r13,4
3071 xor r14,rax
3072 vpsrlq xmm11,xmm8,7
3073 and r12,r8
3074 xor r13,r8
3075 vpsllq xmm9,xmm8,56
3076 add r11,QWORD[rsp]
3077 mov r15,rax
3078 vpxor xmm8,xmm11,xmm10
3079 xor r12,r10
3080 shrd r14,r14,6
3081 vpsrlq xmm10,xmm10,7
3082 xor r15,rbx
3083 add r11,r12
3084 vpxor xmm8,xmm8,xmm9
3085 shrd r13,r13,14
3086 and rdi,r15
3087 vpsllq xmm9,xmm9,7
3088 xor r14,rax
3089 add r11,r13
3090 vpxor xmm8,xmm8,xmm10
3091 xor rdi,rbx
3092 shrd r14,r14,28
3093 vpsrlq xmm11,xmm7,6
3094 add rdx,r11
3095 add r11,rdi
3096 vpxor xmm8,xmm8,xmm9
3097 mov r13,rdx
3098 add r14,r11
3099 vpsllq xmm10,xmm7,3
3100 shrd r13,r13,23
3101 mov r11,r14
3102 vpaddq xmm0,xmm0,xmm8
3103 mov r12,r8
3104 shrd r14,r14,5
3105 vpsrlq xmm9,xmm7,19
3106 xor r13,rdx
3107 xor r12,r9
3108 vpxor xmm11,xmm11,xmm10
3109 shrd r13,r13,4
3110 xor r14,r11
3111 vpsllq xmm10,xmm10,42
3112 and r12,rdx
3113 xor r13,rdx
3114 vpxor xmm11,xmm11,xmm9
3115 add r10,QWORD[8+rsp]
3116 mov rdi,r11
3117 vpsrlq xmm9,xmm9,42
3118 xor r12,r9
3119 shrd r14,r14,6
3120 vpxor xmm11,xmm11,xmm10
3121 xor rdi,rax
3122 add r10,r12
3123 vpxor xmm11,xmm11,xmm9
3124 shrd r13,r13,14
3125 and r15,rdi
3126 vpaddq xmm0,xmm0,xmm11
3127 xor r14,r11
3128 add r10,r13
3129 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]
3130 xor r15,rax
3131 shrd r14,r14,28
3132 add rcx,r10
3133 add r10,r15
3134 mov r13,rcx
3135 add r14,r10
3136 vmovdqa XMMWORD[rsp],xmm10
3137 vpalignr xmm8,xmm2,xmm1,8
3138 shrd r13,r13,23
3139 mov r10,r14
3140 vpalignr xmm11,xmm6,xmm5,8
3141 mov r12,rdx
3142 shrd r14,r14,5
3143 vpsrlq xmm10,xmm8,1
3144 xor r13,rcx
3145 xor r12,r8
3146 vpaddq xmm1,xmm1,xmm11
3147 shrd r13,r13,4
3148 xor r14,r10
3149 vpsrlq xmm11,xmm8,7
3150 and r12,rcx
3151 xor r13,rcx
3152 vpsllq xmm9,xmm8,56
3153 add r9,QWORD[16+rsp]
3154 mov r15,r10
3155 vpxor xmm8,xmm11,xmm10
3156 xor r12,r8
3157 shrd r14,r14,6
3158 vpsrlq xmm10,xmm10,7
3159 xor r15,r11
3160 add r9,r12
3161 vpxor xmm8,xmm8,xmm9
3162 shrd r13,r13,14
3163 and rdi,r15
3164 vpsllq xmm9,xmm9,7
3165 xor r14,r10
3166 add r9,r13
3167 vpxor xmm8,xmm8,xmm10
3168 xor rdi,r11
3169 shrd r14,r14,28
3170 vpsrlq xmm11,xmm0,6
3171 add rbx,r9
3172 add r9,rdi
3173 vpxor xmm8,xmm8,xmm9
3174 mov r13,rbx
3175 add r14,r9
3176 vpsllq xmm10,xmm0,3
3177 shrd r13,r13,23
3178 mov r9,r14
3179 vpaddq xmm1,xmm1,xmm8
3180 mov r12,rcx
3181 shrd r14,r14,5
3182 vpsrlq xmm9,xmm0,19
3183 xor r13,rbx
3184 xor r12,rdx
3185 vpxor xmm11,xmm11,xmm10
3186 shrd r13,r13,4
3187 xor r14,r9
3188 vpsllq xmm10,xmm10,42
3189 and r12,rbx
3190 xor r13,rbx
3191 vpxor xmm11,xmm11,xmm9
3192 add r8,QWORD[24+rsp]
3193 mov rdi,r9
3194 vpsrlq xmm9,xmm9,42
3195 xor r12,rdx
3196 shrd r14,r14,6
3197 vpxor xmm11,xmm11,xmm10
3198 xor rdi,r10
3199 add r8,r12
3200 vpxor xmm11,xmm11,xmm9
3201 shrd r13,r13,14
3202 and r15,rdi
3203 vpaddq xmm1,xmm1,xmm11
3204 xor r14,r9
3205 add r8,r13
3206 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]
3207 xor r15,r10
3208 shrd r14,r14,28
3209 add rax,r8
3210 add r8,r15
3211 mov r13,rax
3212 add r14,r8
3213 vmovdqa XMMWORD[16+rsp],xmm10
3214 vpalignr xmm8,xmm3,xmm2,8
3215 shrd r13,r13,23
3216 mov r8,r14
3217 vpalignr xmm11,xmm7,xmm6,8
3218 mov r12,rbx
3219 shrd r14,r14,5
3220 vpsrlq xmm10,xmm8,1
3221 xor r13,rax
3222 xor r12,rcx
3223 vpaddq xmm2,xmm2,xmm11
3224 shrd r13,r13,4
3225 xor r14,r8
3226 vpsrlq xmm11,xmm8,7
3227 and r12,rax
3228 xor r13,rax
3229 vpsllq xmm9,xmm8,56
3230 add rdx,QWORD[32+rsp]
3231 mov r15,r8
3232 vpxor xmm8,xmm11,xmm10
3233 xor r12,rcx
3234 shrd r14,r14,6
3235 vpsrlq xmm10,xmm10,7
3236 xor r15,r9
3237 add rdx,r12
3238 vpxor xmm8,xmm8,xmm9
3239 shrd r13,r13,14
3240 and rdi,r15
3241 vpsllq xmm9,xmm9,7
3242 xor r14,r8
3243 add rdx,r13
3244 vpxor xmm8,xmm8,xmm10
3245 xor rdi,r9
3246 shrd r14,r14,28
3247 vpsrlq xmm11,xmm1,6
3248 add r11,rdx
3249 add rdx,rdi
3250 vpxor xmm8,xmm8,xmm9
3251 mov r13,r11
3252 add r14,rdx
3253 vpsllq xmm10,xmm1,3
3254 shrd r13,r13,23
3255 mov rdx,r14
3256 vpaddq xmm2,xmm2,xmm8
3257 mov r12,rax
3258 shrd r14,r14,5
3259 vpsrlq xmm9,xmm1,19
3260 xor r13,r11
3261 xor r12,rbx
3262 vpxor xmm11,xmm11,xmm10
3263 shrd r13,r13,4
3264 xor r14,rdx
3265 vpsllq xmm10,xmm10,42
3266 and r12,r11
3267 xor r13,r11
3268 vpxor xmm11,xmm11,xmm9
3269 add rcx,QWORD[40+rsp]
3270 mov rdi,rdx
3271 vpsrlq xmm9,xmm9,42
3272 xor r12,rbx
3273 shrd r14,r14,6
3274 vpxor xmm11,xmm11,xmm10
3275 xor rdi,r8
3276 add rcx,r12
3277 vpxor xmm11,xmm11,xmm9
3278 shrd r13,r13,14
3279 and r15,rdi
3280 vpaddq xmm2,xmm2,xmm11
3281 xor r14,rdx
3282 add rcx,r13
3283 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
3284 xor r15,r8
3285 shrd r14,r14,28
3286 add r10,rcx
3287 add rcx,r15
3288 mov r13,r10
3289 add r14,rcx
3290 vmovdqa XMMWORD[32+rsp],xmm10
3291 vpalignr xmm8,xmm4,xmm3,8
3292 shrd r13,r13,23
3293 mov rcx,r14
3294 vpalignr xmm11,xmm0,xmm7,8
3295 mov r12,r11
3296 shrd r14,r14,5
3297 vpsrlq xmm10,xmm8,1
3298 xor r13,r10
3299 xor r12,rax
3300 vpaddq xmm3,xmm3,xmm11
3301 shrd r13,r13,4
3302 xor r14,rcx
3303 vpsrlq xmm11,xmm8,7
3304 and r12,r10
3305 xor r13,r10
3306 vpsllq xmm9,xmm8,56
3307 add rbx,QWORD[48+rsp]
3308 mov r15,rcx
3309 vpxor xmm8,xmm11,xmm10
3310 xor r12,rax
3311 shrd r14,r14,6
3312 vpsrlq xmm10,xmm10,7
3313 xor r15,rdx
3314 add rbx,r12
3315 vpxor xmm8,xmm8,xmm9
3316 shrd r13,r13,14
3317 and rdi,r15
3318 vpsllq xmm9,xmm9,7
3319 xor r14,rcx
3320 add rbx,r13
3321 vpxor xmm8,xmm8,xmm10
3322 xor rdi,rdx
3323 shrd r14,r14,28
3324 vpsrlq xmm11,xmm2,6
3325 add r9,rbx
3326 add rbx,rdi
3327 vpxor xmm8,xmm8,xmm9
3328 mov r13,r9
3329 add r14,rbx
3330 vpsllq xmm10,xmm2,3
3331 shrd r13,r13,23
3332 mov rbx,r14
3333 vpaddq xmm3,xmm3,xmm8
3334 mov r12,r10
3335 shrd r14,r14,5
3336 vpsrlq xmm9,xmm2,19
3337 xor r13,r9
3338 xor r12,r11
3339 vpxor xmm11,xmm11,xmm10
3340 shrd r13,r13,4
3341 xor r14,rbx
3342 vpsllq xmm10,xmm10,42
3343 and r12,r9
3344 xor r13,r9
3345 vpxor xmm11,xmm11,xmm9
3346 add rax,QWORD[56+rsp]
3347 mov rdi,rbx
3348 vpsrlq xmm9,xmm9,42
3349 xor r12,r11
3350 shrd r14,r14,6
3351 vpxor xmm11,xmm11,xmm10
3352 xor rdi,rcx
3353 add rax,r12
3354 vpxor xmm11,xmm11,xmm9
3355 shrd r13,r13,14
3356 and r15,rdi
3357 vpaddq xmm3,xmm3,xmm11
3358 xor r14,rbx
3359 add rax,r13
3360 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]
3361 xor r15,rcx
3362 shrd r14,r14,28
3363 add r8,rax
3364 add rax,r15
3365 mov r13,r8
3366 add r14,rax
3367 vmovdqa XMMWORD[48+rsp],xmm10
3368 vpalignr xmm8,xmm5,xmm4,8
3369 shrd r13,r13,23
3370 mov rax,r14
3371 vpalignr xmm11,xmm1,xmm0,8
3372 mov r12,r9
3373 shrd r14,r14,5
3374 vpsrlq xmm10,xmm8,1
3375 xor r13,r8
3376 xor r12,r10
3377 vpaddq xmm4,xmm4,xmm11
3378 shrd r13,r13,4
3379 xor r14,rax
3380 vpsrlq xmm11,xmm8,7
3381 and r12,r8
3382 xor r13,r8
3383 vpsllq xmm9,xmm8,56
3384 add r11,QWORD[64+rsp]
3385 mov r15,rax
3386 vpxor xmm8,xmm11,xmm10
3387 xor r12,r10
3388 shrd r14,r14,6
3389 vpsrlq xmm10,xmm10,7
3390 xor r15,rbx
3391 add r11,r12
3392 vpxor xmm8,xmm8,xmm9
3393 shrd r13,r13,14
3394 and rdi,r15
3395 vpsllq xmm9,xmm9,7
3396 xor r14,rax
3397 add r11,r13
3398 vpxor xmm8,xmm8,xmm10
3399 xor rdi,rbx
3400 shrd r14,r14,28
3401 vpsrlq xmm11,xmm3,6
3402 add rdx,r11
3403 add r11,rdi
3404 vpxor xmm8,xmm8,xmm9
3405 mov r13,rdx
3406 add r14,r11
3407 vpsllq xmm10,xmm3,3
3408 shrd r13,r13,23
3409 mov r11,r14
3410 vpaddq xmm4,xmm4,xmm8
3411 mov r12,r8
3412 shrd r14,r14,5
3413 vpsrlq xmm9,xmm3,19
3414 xor r13,rdx
3415 xor r12,r9
3416 vpxor xmm11,xmm11,xmm10
3417 shrd r13,r13,4
3418 xor r14,r11
3419 vpsllq xmm10,xmm10,42
3420 and r12,rdx
3421 xor r13,rdx
3422 vpxor xmm11,xmm11,xmm9
3423 add r10,QWORD[72+rsp]
3424 mov rdi,r11
3425 vpsrlq xmm9,xmm9,42
3426 xor r12,r9
3427 shrd r14,r14,6
3428 vpxor xmm11,xmm11,xmm10
3429 xor rdi,rax
3430 add r10,r12
3431 vpxor xmm11,xmm11,xmm9
3432 shrd r13,r13,14
3433 and r15,rdi
3434 vpaddq xmm4,xmm4,xmm11
3435 xor r14,r11
3436 add r10,r13
3437 vpaddq xmm10,xmm4,XMMWORD[rbp]
3438 xor r15,rax
3439 shrd r14,r14,28
3440 add rcx,r10
3441 add r10,r15
3442 mov r13,rcx
3443 add r14,r10
3444 vmovdqa XMMWORD[64+rsp],xmm10
3445 vpalignr xmm8,xmm6,xmm5,8
3446 shrd r13,r13,23
3447 mov r10,r14
3448 vpalignr xmm11,xmm2,xmm1,8
3449 mov r12,rdx
3450 shrd r14,r14,5
3451 vpsrlq xmm10,xmm8,1
3452 xor r13,rcx
3453 xor r12,r8
3454 vpaddq xmm5,xmm5,xmm11
3455 shrd r13,r13,4
3456 xor r14,r10
3457 vpsrlq xmm11,xmm8,7
3458 and r12,rcx
3459 xor r13,rcx
3460 vpsllq xmm9,xmm8,56
3461 add r9,QWORD[80+rsp]
3462 mov r15,r10
3463 vpxor xmm8,xmm11,xmm10
3464 xor r12,r8
3465 shrd r14,r14,6
3466 vpsrlq xmm10,xmm10,7
3467 xor r15,r11
3468 add r9,r12
3469 vpxor xmm8,xmm8,xmm9
3470 shrd r13,r13,14
3471 and rdi,r15
3472 vpsllq xmm9,xmm9,7
3473 xor r14,r10
3474 add r9,r13
3475 vpxor xmm8,xmm8,xmm10
3476 xor rdi,r11
3477 shrd r14,r14,28
3478 vpsrlq xmm11,xmm4,6
3479 add rbx,r9
3480 add r9,rdi
3481 vpxor xmm8,xmm8,xmm9
3482 mov r13,rbx
3483 add r14,r9
3484 vpsllq xmm10,xmm4,3
3485 shrd r13,r13,23
3486 mov r9,r14
3487 vpaddq xmm5,xmm5,xmm8
3488 mov r12,rcx
3489 shrd r14,r14,5
3490 vpsrlq xmm9,xmm4,19
3491 xor r13,rbx
3492 xor r12,rdx
3493 vpxor xmm11,xmm11,xmm10
3494 shrd r13,r13,4
3495 xor r14,r9
3496 vpsllq xmm10,xmm10,42
3497 and r12,rbx
3498 xor r13,rbx
3499 vpxor xmm11,xmm11,xmm9
3500 add r8,QWORD[88+rsp]
3501 mov rdi,r9
3502 vpsrlq xmm9,xmm9,42
3503 xor r12,rdx
3504 shrd r14,r14,6
3505 vpxor xmm11,xmm11,xmm10
3506 xor rdi,r10
3507 add r8,r12
3508 vpxor xmm11,xmm11,xmm9
3509 shrd r13,r13,14
3510 and r15,rdi
3511 vpaddq xmm5,xmm5,xmm11
3512 xor r14,r9
3513 add r8,r13
3514 vpaddq xmm10,xmm5,XMMWORD[32+rbp]
3515 xor r15,r10
3516 shrd r14,r14,28
3517 add rax,r8
3518 add r8,r15
3519 mov r13,rax
3520 add r14,r8
3521 vmovdqa XMMWORD[80+rsp],xmm10
3522 vpalignr xmm8,xmm7,xmm6,8
3523 shrd r13,r13,23
3524 mov r8,r14
3525 vpalignr xmm11,xmm3,xmm2,8
3526 mov r12,rbx
3527 shrd r14,r14,5
3528 vpsrlq xmm10,xmm8,1
3529 xor r13,rax
3530 xor r12,rcx
3531 vpaddq xmm6,xmm6,xmm11
3532 shrd r13,r13,4
3533 xor r14,r8
3534 vpsrlq xmm11,xmm8,7
3535 and r12,rax
3536 xor r13,rax
3537 vpsllq xmm9,xmm8,56
3538 add rdx,QWORD[96+rsp]
3539 mov r15,r8
3540 vpxor xmm8,xmm11,xmm10
3541 xor r12,rcx
3542 shrd r14,r14,6
3543 vpsrlq xmm10,xmm10,7
3544 xor r15,r9
3545 add rdx,r12
3546 vpxor xmm8,xmm8,xmm9
3547 shrd r13,r13,14
3548 and rdi,r15
3549 vpsllq xmm9,xmm9,7
3550 xor r14,r8
3551 add rdx,r13
3552 vpxor xmm8,xmm8,xmm10
3553 xor rdi,r9
3554 shrd r14,r14,28
3555 vpsrlq xmm11,xmm5,6
3556 add r11,rdx
3557 add rdx,rdi
3558 vpxor xmm8,xmm8,xmm9
3559 mov r13,r11
3560 add r14,rdx
3561 vpsllq xmm10,xmm5,3
3562 shrd r13,r13,23
3563 mov rdx,r14
3564 vpaddq xmm6,xmm6,xmm8
3565 mov r12,rax
3566 shrd r14,r14,5
3567 vpsrlq xmm9,xmm5,19
3568 xor r13,r11
3569 xor r12,rbx
3570 vpxor xmm11,xmm11,xmm10
3571 shrd r13,r13,4
3572 xor r14,rdx
3573 vpsllq xmm10,xmm10,42
3574 and r12,r11
3575 xor r13,r11
3576 vpxor xmm11,xmm11,xmm9
3577 add rcx,QWORD[104+rsp]
3578 mov rdi,rdx
3579 vpsrlq xmm9,xmm9,42
3580 xor r12,rbx
3581 shrd r14,r14,6
3582 vpxor xmm11,xmm11,xmm10
3583 xor rdi,r8
3584 add rcx,r12
3585 vpxor xmm11,xmm11,xmm9
3586 shrd r13,r13,14
3587 and r15,rdi
3588 vpaddq xmm6,xmm6,xmm11
3589 xor r14,rdx
3590 add rcx,r13
3591 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
3592 xor r15,r8
3593 shrd r14,r14,28
3594 add r10,rcx
3595 add rcx,r15
3596 mov r13,r10
3597 add r14,rcx
3598 vmovdqa XMMWORD[96+rsp],xmm10
3599 vpalignr xmm8,xmm0,xmm7,8
3600 shrd r13,r13,23
3601 mov rcx,r14
3602 vpalignr xmm11,xmm4,xmm3,8
3603 mov r12,r11
3604 shrd r14,r14,5
3605 vpsrlq xmm10,xmm8,1
3606 xor r13,r10
3607 xor r12,rax
3608 vpaddq xmm7,xmm7,xmm11
3609 shrd r13,r13,4
3610 xor r14,rcx
3611 vpsrlq xmm11,xmm8,7
3612 and r12,r10
3613 xor r13,r10
3614 vpsllq xmm9,xmm8,56
3615 add rbx,QWORD[112+rsp]
3616 mov r15,rcx
3617 vpxor xmm8,xmm11,xmm10
3618 xor r12,rax
3619 shrd r14,r14,6
3620 vpsrlq xmm10,xmm10,7
3621 xor r15,rdx
3622 add rbx,r12
3623 vpxor xmm8,xmm8,xmm9
3624 shrd r13,r13,14
3625 and rdi,r15
3626 vpsllq xmm9,xmm9,7
3627 xor r14,rcx
3628 add rbx,r13
3629 vpxor xmm8,xmm8,xmm10
3630 xor rdi,rdx
3631 shrd r14,r14,28
3632 vpsrlq xmm11,xmm6,6
3633 add r9,rbx
3634 add rbx,rdi
3635 vpxor xmm8,xmm8,xmm9
3636 mov r13,r9
3637 add r14,rbx
3638 vpsllq xmm10,xmm6,3
3639 shrd r13,r13,23
3640 mov rbx,r14
3641 vpaddq xmm7,xmm7,xmm8
3642 mov r12,r10
3643 shrd r14,r14,5
3644 vpsrlq xmm9,xmm6,19
3645 xor r13,r9
3646 xor r12,r11
3647 vpxor xmm11,xmm11,xmm10
3648 shrd r13,r13,4
3649 xor r14,rbx
3650 vpsllq xmm10,xmm10,42
3651 and r12,r9
3652 xor r13,r9
3653 vpxor xmm11,xmm11,xmm9
3654 add rax,QWORD[120+rsp]
3655 mov rdi,rbx
3656 vpsrlq xmm9,xmm9,42
3657 xor r12,r11
3658 shrd r14,r14,6
3659 vpxor xmm11,xmm11,xmm10
3660 xor rdi,rcx
3661 add rax,r12
3662 vpxor xmm11,xmm11,xmm9
3663 shrd r13,r13,14
3664 and r15,rdi
3665 vpaddq xmm7,xmm7,xmm11
3666 xor r14,rbx
3667 add rax,r13
3668 vpaddq xmm10,xmm7,XMMWORD[96+rbp]
3669 xor r15,rcx
3670 shrd r14,r14,28
3671 add r8,rax
3672 add rax,r15
3673 mov r13,r8
3674 add r14,rax
3675 vmovdqa XMMWORD[112+rsp],xmm10
3676 cmp BYTE[135+rbp],0
3677 jne NEAR $L$avx_00_47
3678 shrd r13,r13,23
3679 mov rax,r14
3680 mov r12,r9
3681 shrd r14,r14,5
3682 xor r13,r8
3683 xor r12,r10
3684 shrd r13,r13,4
3685 xor r14,rax
3686 and r12,r8
3687 xor r13,r8
3688 add r11,QWORD[rsp]
3689 mov r15,rax
3690 xor r12,r10
3691 shrd r14,r14,6
3692 xor r15,rbx
3693 add r11,r12
3694 shrd r13,r13,14
3695 and rdi,r15
3696 xor r14,rax
3697 add r11,r13
3698 xor rdi,rbx
3699 shrd r14,r14,28
3700 add rdx,r11
3701 add r11,rdi
3702 mov r13,rdx
3703 add r14,r11
3704 shrd r13,r13,23
3705 mov r11,r14
3706 mov r12,r8
3707 shrd r14,r14,5
3708 xor r13,rdx
3709 xor r12,r9
3710 shrd r13,r13,4
3711 xor r14,r11
3712 and r12,rdx
3713 xor r13,rdx
3714 add r10,QWORD[8+rsp]
3715 mov rdi,r11
3716 xor r12,r9
3717 shrd r14,r14,6
3718 xor rdi,rax
3719 add r10,r12
3720 shrd r13,r13,14
3721 and r15,rdi
3722 xor r14,r11
3723 add r10,r13
3724 xor r15,rax
3725 shrd r14,r14,28
3726 add rcx,r10
3727 add r10,r15
3728 mov r13,rcx
3729 add r14,r10
3730 shrd r13,r13,23
3731 mov r10,r14
3732 mov r12,rdx
3733 shrd r14,r14,5
3734 xor r13,rcx
3735 xor r12,r8
3736 shrd r13,r13,4
3737 xor r14,r10
3738 and r12,rcx
3739 xor r13,rcx
3740 add r9,QWORD[16+rsp]
3741 mov r15,r10
3742 xor r12,r8
3743 shrd r14,r14,6
3744 xor r15,r11
3745 add r9,r12
3746 shrd r13,r13,14
3747 and rdi,r15
3748 xor r14,r10
3749 add r9,r13
3750 xor rdi,r11
3751 shrd r14,r14,28
3752 add rbx,r9
3753 add r9,rdi
3754 mov r13,rbx
3755 add r14,r9
3756 shrd r13,r13,23
3757 mov r9,r14
3758 mov r12,rcx
3759 shrd r14,r14,5
3760 xor r13,rbx
3761 xor r12,rdx
3762 shrd r13,r13,4
3763 xor r14,r9
3764 and r12,rbx
3765 xor r13,rbx
3766 add r8,QWORD[24+rsp]
3767 mov rdi,r9
3768 xor r12,rdx
3769 shrd r14,r14,6
3770 xor rdi,r10
3771 add r8,r12
3772 shrd r13,r13,14
3773 and r15,rdi
3774 xor r14,r9
3775 add r8,r13
3776 xor r15,r10
3777 shrd r14,r14,28
3778 add rax,r8
3779 add r8,r15
3780 mov r13,rax
3781 add r14,r8
3782 shrd r13,r13,23
3783 mov r8,r14
3784 mov r12,rbx
3785 shrd r14,r14,5
3786 xor r13,rax
3787 xor r12,rcx
3788 shrd r13,r13,4
3789 xor r14,r8
3790 and r12,rax
3791 xor r13,rax
3792 add rdx,QWORD[32+rsp]
3793 mov r15,r8
3794 xor r12,rcx
3795 shrd r14,r14,6
3796 xor r15,r9
3797 add rdx,r12
3798 shrd r13,r13,14
3799 and rdi,r15
3800 xor r14,r8
3801 add rdx,r13
3802 xor rdi,r9
3803 shrd r14,r14,28
3804 add r11,rdx
3805 add rdx,rdi
3806 mov r13,r11
3807 add r14,rdx
3808 shrd r13,r13,23
3809 mov rdx,r14
3810 mov r12,rax
3811 shrd r14,r14,5
3812 xor r13,r11
3813 xor r12,rbx
3814 shrd r13,r13,4
3815 xor r14,rdx
3816 and r12,r11
3817 xor r13,r11
3818 add rcx,QWORD[40+rsp]
3819 mov rdi,rdx
3820 xor r12,rbx
3821 shrd r14,r14,6
3822 xor rdi,r8
3823 add rcx,r12
3824 shrd r13,r13,14
3825 and r15,rdi
3826 xor r14,rdx
3827 add rcx,r13
3828 xor r15,r8
3829 shrd r14,r14,28
3830 add r10,rcx
3831 add rcx,r15
3832 mov r13,r10
3833 add r14,rcx
3834 shrd r13,r13,23
3835 mov rcx,r14
3836 mov r12,r11
3837 shrd r14,r14,5
3838 xor r13,r10
3839 xor r12,rax
3840 shrd r13,r13,4
3841 xor r14,rcx
3842 and r12,r10
3843 xor r13,r10
3844 add rbx,QWORD[48+rsp]
3845 mov r15,rcx
3846 xor r12,rax
3847 shrd r14,r14,6
3848 xor r15,rdx
3849 add rbx,r12
3850 shrd r13,r13,14
3851 and rdi,r15
3852 xor r14,rcx
3853 add rbx,r13
3854 xor rdi,rdx
3855 shrd r14,r14,28
3856 add r9,rbx
3857 add rbx,rdi
3858 mov r13,r9
3859 add r14,rbx
3860 shrd r13,r13,23
3861 mov rbx,r14
3862 mov r12,r10
3863 shrd r14,r14,5
3864 xor r13,r9
3865 xor r12,r11
3866 shrd r13,r13,4
3867 xor r14,rbx
3868 and r12,r9
3869 xor r13,r9
3870 add rax,QWORD[56+rsp]
3871 mov rdi,rbx
3872 xor r12,r11
3873 shrd r14,r14,6
3874 xor rdi,rcx
3875 add rax,r12
3876 shrd r13,r13,14
3877 and r15,rdi
3878 xor r14,rbx
3879 add rax,r13
3880 xor r15,rcx
3881 shrd r14,r14,28
3882 add r8,rax
3883 add rax,r15
3884 mov r13,r8
3885 add r14,rax
3886 shrd r13,r13,23
3887 mov rax,r14
3888 mov r12,r9
3889 shrd r14,r14,5
3890 xor r13,r8
3891 xor r12,r10
3892 shrd r13,r13,4
3893 xor r14,rax
3894 and r12,r8
3895 xor r13,r8
3896 add r11,QWORD[64+rsp]
3897 mov r15,rax
3898 xor r12,r10
3899 shrd r14,r14,6
3900 xor r15,rbx
3901 add r11,r12
3902 shrd r13,r13,14
3903 and rdi,r15
3904 xor r14,rax
3905 add r11,r13
3906 xor rdi,rbx
3907 shrd r14,r14,28
3908 add rdx,r11
3909 add r11,rdi
3910 mov r13,rdx
3911 add r14,r11
3912 shrd r13,r13,23
3913 mov r11,r14
3914 mov r12,r8
3915 shrd r14,r14,5
3916 xor r13,rdx
3917 xor r12,r9
3918 shrd r13,r13,4
3919 xor r14,r11
3920 and r12,rdx
3921 xor r13,rdx
3922 add r10,QWORD[72+rsp]
3923 mov rdi,r11
3924 xor r12,r9
3925 shrd r14,r14,6
3926 xor rdi,rax
3927 add r10,r12
3928 shrd r13,r13,14
3929 and r15,rdi
3930 xor r14,r11
3931 add r10,r13
3932 xor r15,rax
3933 shrd r14,r14,28
3934 add rcx,r10
3935 add r10,r15
3936 mov r13,rcx
3937 add r14,r10
3938 shrd r13,r13,23
3939 mov r10,r14
3940 mov r12,rdx
3941 shrd r14,r14,5
3942 xor r13,rcx
3943 xor r12,r8
3944 shrd r13,r13,4
3945 xor r14,r10
3946 and r12,rcx
3947 xor r13,rcx
3948 add r9,QWORD[80+rsp]
3949 mov r15,r10
3950 xor r12,r8
3951 shrd r14,r14,6
3952 xor r15,r11
3953 add r9,r12
3954 shrd r13,r13,14
3955 and rdi,r15
3956 xor r14,r10
3957 add r9,r13
3958 xor rdi,r11
3959 shrd r14,r14,28
3960 add rbx,r9
3961 add r9,rdi
3962 mov r13,rbx
3963 add r14,r9
3964 shrd r13,r13,23
3965 mov r9,r14
3966 mov r12,rcx
3967 shrd r14,r14,5
3968 xor r13,rbx
3969 xor r12,rdx
3970 shrd r13,r13,4
3971 xor r14,r9
3972 and r12,rbx
3973 xor r13,rbx
3974 add r8,QWORD[88+rsp]
3975 mov rdi,r9
3976 xor r12,rdx
3977 shrd r14,r14,6
3978 xor rdi,r10
3979 add r8,r12
3980 shrd r13,r13,14
3981 and r15,rdi
3982 xor r14,r9
3983 add r8,r13
3984 xor r15,r10
3985 shrd r14,r14,28
3986 add rax,r8
3987 add r8,r15
3988 mov r13,rax
3989 add r14,r8
3990 shrd r13,r13,23
3991 mov r8,r14
3992 mov r12,rbx
3993 shrd r14,r14,5
3994 xor r13,rax
3995 xor r12,rcx
3996 shrd r13,r13,4
3997 xor r14,r8
3998 and r12,rax
3999 xor r13,rax
4000 add rdx,QWORD[96+rsp]
4001 mov r15,r8
4002 xor r12,rcx
4003 shrd r14,r14,6
4004 xor r15,r9
4005 add rdx,r12
4006 shrd r13,r13,14
4007 and rdi,r15
4008 xor r14,r8
4009 add rdx,r13
4010 xor rdi,r9
4011 shrd r14,r14,28
4012 add r11,rdx
4013 add rdx,rdi
4014 mov r13,r11
4015 add r14,rdx
4016 shrd r13,r13,23
4017 mov rdx,r14
4018 mov r12,rax
4019 shrd r14,r14,5
4020 xor r13,r11
4021 xor r12,rbx
4022 shrd r13,r13,4
4023 xor r14,rdx
4024 and r12,r11
4025 xor r13,r11
4026 add rcx,QWORD[104+rsp]
4027 mov rdi,rdx
4028 xor r12,rbx
4029 shrd r14,r14,6
4030 xor rdi,r8
4031 add rcx,r12
4032 shrd r13,r13,14
4033 and r15,rdi
4034 xor r14,rdx
4035 add rcx,r13
4036 xor r15,r8
4037 shrd r14,r14,28
4038 add r10,rcx
4039 add rcx,r15
4040 mov r13,r10
4041 add r14,rcx
4042 shrd r13,r13,23
4043 mov rcx,r14
4044 mov r12,r11
4045 shrd r14,r14,5
4046 xor r13,r10
4047 xor r12,rax
4048 shrd r13,r13,4
4049 xor r14,rcx
4050 and r12,r10
4051 xor r13,r10
4052 add rbx,QWORD[112+rsp]
4053 mov r15,rcx
4054 xor r12,rax
4055 shrd r14,r14,6
4056 xor r15,rdx
4057 add rbx,r12
4058 shrd r13,r13,14
4059 and rdi,r15
4060 xor r14,rcx
4061 add rbx,r13
4062 xor rdi,rdx
4063 shrd r14,r14,28
4064 add r9,rbx
4065 add rbx,rdi
4066 mov r13,r9
4067 add r14,rbx
4068 shrd r13,r13,23
4069 mov rbx,r14
4070 mov r12,r10
4071 shrd r14,r14,5
4072 xor r13,r9
4073 xor r12,r11
4074 shrd r13,r13,4
4075 xor r14,rbx
4076 and r12,r9
4077 xor r13,r9
4078 add rax,QWORD[120+rsp]
4079 mov rdi,rbx
4080 xor r12,r11
4081 shrd r14,r14,6
4082 xor rdi,rcx
4083 add rax,r12
4084 shrd r13,r13,14
4085 and r15,rdi
4086 xor r14,rbx
4087 add rax,r13
4088 xor r15,rcx
4089 shrd r14,r14,28
4090 add r8,rax
4091 add rax,r15
4092 mov r13,r8
4093 add r14,rax
4094 mov rdi,QWORD[((128+0))+rsp]
4095 mov rax,r14
4096
4097 add rax,QWORD[rdi]
4098 lea rsi,[128+rsi]
4099 add rbx,QWORD[8+rdi]
4100 add rcx,QWORD[16+rdi]
4101 add rdx,QWORD[24+rdi]
4102 add r8,QWORD[32+rdi]
4103 add r9,QWORD[40+rdi]
4104 add r10,QWORD[48+rdi]
4105 add r11,QWORD[56+rdi]
4106
4107 cmp rsi,QWORD[((128+16))+rsp]
4108
4109 mov QWORD[rdi],rax
4110 mov QWORD[8+rdi],rbx
4111 mov QWORD[16+rdi],rcx
4112 mov QWORD[24+rdi],rdx
4113 mov QWORD[32+rdi],r8
4114 mov QWORD[40+rdi],r9
4115 mov QWORD[48+rdi],r10
4116 mov QWORD[56+rdi],r11
4117 jb NEAR $L$loop_avx
4118
4119 mov rsi,QWORD[152+rsp]
4120
4121 vzeroupper
4122 movaps xmm6,XMMWORD[((128+32))+rsp]
4123 movaps xmm7,XMMWORD[((128+48))+rsp]
4124 movaps xmm8,XMMWORD[((128+64))+rsp]
4125 movaps xmm9,XMMWORD[((128+80))+rsp]
4126 movaps xmm10,XMMWORD[((128+96))+rsp]
4127 movaps xmm11,XMMWORD[((128+112))+rsp]
4128 mov r15,QWORD[((-48))+rsi]
4129
4130 mov r14,QWORD[((-40))+rsi]
4131
4132 mov r13,QWORD[((-32))+rsi]
4133
4134 mov r12,QWORD[((-24))+rsi]
4135
4136 mov rbp,QWORD[((-16))+rsi]
4137
4138 mov rbx,QWORD[((-8))+rsi]
4139
4140 lea rsp,[rsi]
4141
4142$L$epilogue_avx:
4143 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4144 mov rsi,QWORD[16+rsp]
4145 DB 0F3h,0C3h ;repret
4146
4147$L$SEH_end_sha512_block_data_order_avx:
4148
4149ALIGN 64
4150sha512_block_data_order_avx2:
4151 mov QWORD[8+rsp],rdi ;WIN64 prologue
4152 mov QWORD[16+rsp],rsi
4153 mov rax,rsp
4154$L$SEH_begin_sha512_block_data_order_avx2:
4155 mov rdi,rcx
4156 mov rsi,rdx
4157 mov rdx,r8
4158
4159
4160
4161$L$avx2_shortcut:
4162 mov rax,rsp
4163
4164 push rbx
4165
4166 push rbp
4167
4168 push r12
4169
4170 push r13
4171
4172 push r14
4173
4174 push r15
4175
4176 sub rsp,1408
4177 shl rdx,4
4178 and rsp,-256*8
4179 lea rdx,[rdx*8+rsi]
4180 add rsp,1152
4181 mov QWORD[((128+0))+rsp],rdi
4182 mov QWORD[((128+8))+rsp],rsi
4183 mov QWORD[((128+16))+rsp],rdx
4184 mov QWORD[152+rsp],rax
4185
4186 movaps XMMWORD[(128+32)+rsp],xmm6
4187 movaps XMMWORD[(128+48)+rsp],xmm7
4188 movaps XMMWORD[(128+64)+rsp],xmm8
4189 movaps XMMWORD[(128+80)+rsp],xmm9
4190 movaps XMMWORD[(128+96)+rsp],xmm10
4191 movaps XMMWORD[(128+112)+rsp],xmm11
4192$L$prologue_avx2:
4193
4194 vzeroupper
4195 sub rsi,-16*8
4196 mov rax,QWORD[rdi]
4197 mov r12,rsi
4198 mov rbx,QWORD[8+rdi]
4199 cmp rsi,rdx
4200 mov rcx,QWORD[16+rdi]
4201 cmove r12,rsp
4202 mov rdx,QWORD[24+rdi]
4203 mov r8,QWORD[32+rdi]
4204 mov r9,QWORD[40+rdi]
4205 mov r10,QWORD[48+rdi]
4206 mov r11,QWORD[56+rdi]
4207 jmp NEAR $L$oop_avx2
4208ALIGN 16
4209$L$oop_avx2:
4210 vmovdqu xmm0,XMMWORD[((-128))+rsi]
4211 vmovdqu xmm1,XMMWORD[((-128+16))+rsi]
4212 vmovdqu xmm2,XMMWORD[((-128+32))+rsi]
4213 lea rbp,[((K512+128))]
4214 vmovdqu xmm3,XMMWORD[((-128+48))+rsi]
4215 vmovdqu xmm4,XMMWORD[((-128+64))+rsi]
4216 vmovdqu xmm5,XMMWORD[((-128+80))+rsi]
4217 vmovdqu xmm6,XMMWORD[((-128+96))+rsi]
4218 vmovdqu xmm7,XMMWORD[((-128+112))+rsi]
4219
4220 vmovdqa ymm10,YMMWORD[1152+rbp]
4221 vinserti128 ymm0,ymm0,XMMWORD[r12],1
4222 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
4223 vpshufb ymm0,ymm0,ymm10
4224 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
4225 vpshufb ymm1,ymm1,ymm10
4226 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
4227 vpshufb ymm2,ymm2,ymm10
4228 vinserti128 ymm4,ymm4,XMMWORD[64+r12],1
4229 vpshufb ymm3,ymm3,ymm10
4230 vinserti128 ymm5,ymm5,XMMWORD[80+r12],1
4231 vpshufb ymm4,ymm4,ymm10
4232 vinserti128 ymm6,ymm6,XMMWORD[96+r12],1
4233 vpshufb ymm5,ymm5,ymm10
4234 vinserti128 ymm7,ymm7,XMMWORD[112+r12],1
4235
4236 vpaddq ymm8,ymm0,YMMWORD[((-128))+rbp]
4237 vpshufb ymm6,ymm6,ymm10
4238 vpaddq ymm9,ymm1,YMMWORD[((-96))+rbp]
4239 vpshufb ymm7,ymm7,ymm10
4240 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp]
4241 vpaddq ymm11,ymm3,YMMWORD[((-32))+rbp]
4242 vmovdqa YMMWORD[rsp],ymm8
4243 vpaddq ymm8,ymm4,YMMWORD[rbp]
4244 vmovdqa YMMWORD[32+rsp],ymm9
4245 vpaddq ymm9,ymm5,YMMWORD[32+rbp]
4246 vmovdqa YMMWORD[64+rsp],ymm10
4247 vpaddq ymm10,ymm6,YMMWORD[64+rbp]
4248 vmovdqa YMMWORD[96+rsp],ymm11
4249 lea rsp,[((-128))+rsp]
4250 vpaddq ymm11,ymm7,YMMWORD[96+rbp]
4251 vmovdqa YMMWORD[rsp],ymm8
4252 xor r14,r14
4253 vmovdqa YMMWORD[32+rsp],ymm9
4254 mov rdi,rbx
4255 vmovdqa YMMWORD[64+rsp],ymm10
4256 xor rdi,rcx
4257 vmovdqa YMMWORD[96+rsp],ymm11
4258 mov r12,r9
4259 add rbp,16*2*8
4260 jmp NEAR $L$avx2_00_47
4261
4262ALIGN 16
4263$L$avx2_00_47:
4264 lea rsp,[((-128))+rsp]
4265 vpalignr ymm8,ymm1,ymm0,8
4266 add r11,QWORD[((0+256))+rsp]
4267 and r12,r8
4268 rorx r13,r8,41
4269 vpalignr ymm11,ymm5,ymm4,8
4270 rorx r15,r8,18
4271 lea rax,[r14*1+rax]
4272 lea r11,[r12*1+r11]
4273 vpsrlq ymm10,ymm8,1
4274 andn r12,r8,r10
4275 xor r13,r15
4276 rorx r14,r8,14
4277 vpaddq ymm0,ymm0,ymm11
4278 vpsrlq ymm11,ymm8,7
4279 lea r11,[r12*1+r11]
4280 xor r13,r14
4281 mov r15,rax
4282 vpsllq ymm9,ymm8,56
4283 vpxor ymm8,ymm11,ymm10
4284 rorx r12,rax,39
4285 lea r11,[r13*1+r11]
4286 xor r15,rbx
4287 vpsrlq ymm10,ymm10,7
4288 vpxor ymm8,ymm8,ymm9
4289 rorx r14,rax,34
4290 rorx r13,rax,28
4291 lea rdx,[r11*1+rdx]
4292 vpsllq ymm9,ymm9,7
4293 vpxor ymm8,ymm8,ymm10
4294 and rdi,r15
4295 xor r14,r12
4296 xor rdi,rbx
4297 vpsrlq ymm11,ymm7,6
4298 vpxor ymm8,ymm8,ymm9
4299 xor r14,r13
4300 lea r11,[rdi*1+r11]
4301 mov r12,r8
4302 vpsllq ymm10,ymm7,3
4303 vpaddq ymm0,ymm0,ymm8
4304 add r10,QWORD[((8+256))+rsp]
4305 and r12,rdx
4306 rorx r13,rdx,41
4307 vpsrlq ymm9,ymm7,19
4308 vpxor ymm11,ymm11,ymm10
4309 rorx rdi,rdx,18
4310 lea r11,[r14*1+r11]
4311 lea r10,[r12*1+r10]
4312 vpsllq ymm10,ymm10,42
4313 vpxor ymm11,ymm11,ymm9
4314 andn r12,rdx,r9
4315 xor r13,rdi
4316 rorx r14,rdx,14
4317 vpsrlq ymm9,ymm9,42
4318 vpxor ymm11,ymm11,ymm10
4319 lea r10,[r12*1+r10]
4320 xor r13,r14
4321 mov rdi,r11
4322 vpxor ymm11,ymm11,ymm9
4323 rorx r12,r11,39
4324 lea r10,[r13*1+r10]
4325 xor rdi,rax
4326 vpaddq ymm0,ymm0,ymm11
4327 rorx r14,r11,34
4328 rorx r13,r11,28
4329 lea rcx,[r10*1+rcx]
4330 vpaddq ymm10,ymm0,YMMWORD[((-128))+rbp]
4331 and r15,rdi
4332 xor r14,r12
4333 xor r15,rax
4334 xor r14,r13
4335 lea r10,[r15*1+r10]
4336 mov r12,rdx
4337 vmovdqa YMMWORD[rsp],ymm10
4338 vpalignr ymm8,ymm2,ymm1,8
4339 add r9,QWORD[((32+256))+rsp]
4340 and r12,rcx
4341 rorx r13,rcx,41
4342 vpalignr ymm11,ymm6,ymm5,8
4343 rorx r15,rcx,18
4344 lea r10,[r14*1+r10]
4345 lea r9,[r12*1+r9]
4346 vpsrlq ymm10,ymm8,1
4347 andn r12,rcx,r8
4348 xor r13,r15
4349 rorx r14,rcx,14
4350 vpaddq ymm1,ymm1,ymm11
4351 vpsrlq ymm11,ymm8,7
4352 lea r9,[r12*1+r9]
4353 xor r13,r14
4354 mov r15,r10
4355 vpsllq ymm9,ymm8,56
4356 vpxor ymm8,ymm11,ymm10
4357 rorx r12,r10,39
4358 lea r9,[r13*1+r9]
4359 xor r15,r11
4360 vpsrlq ymm10,ymm10,7
4361 vpxor ymm8,ymm8,ymm9
4362 rorx r14,r10,34
4363 rorx r13,r10,28
4364 lea rbx,[r9*1+rbx]
4365 vpsllq ymm9,ymm9,7
4366 vpxor ymm8,ymm8,ymm10
4367 and rdi,r15
4368 xor r14,r12
4369 xor rdi,r11
4370 vpsrlq ymm11,ymm0,6
4371 vpxor ymm8,ymm8,ymm9
4372 xor r14,r13
4373 lea r9,[rdi*1+r9]
4374 mov r12,rcx
4375 vpsllq ymm10,ymm0,3
4376 vpaddq ymm1,ymm1,ymm8
4377 add r8,QWORD[((40+256))+rsp]
4378 and r12,rbx
4379 rorx r13,rbx,41
4380 vpsrlq ymm9,ymm0,19
4381 vpxor ymm11,ymm11,ymm10
4382 rorx rdi,rbx,18
4383 lea r9,[r14*1+r9]
4384 lea r8,[r12*1+r8]
4385 vpsllq ymm10,ymm10,42
4386 vpxor ymm11,ymm11,ymm9
4387 andn r12,rbx,rdx
4388 xor r13,rdi
4389 rorx r14,rbx,14
4390 vpsrlq ymm9,ymm9,42
4391 vpxor ymm11,ymm11,ymm10
4392 lea r8,[r12*1+r8]
4393 xor r13,r14
4394 mov rdi,r9
4395 vpxor ymm11,ymm11,ymm9
4396 rorx r12,r9,39
4397 lea r8,[r13*1+r8]
4398 xor rdi,r10
4399 vpaddq ymm1,ymm1,ymm11
4400 rorx r14,r9,34
4401 rorx r13,r9,28
4402 lea rax,[r8*1+rax]
4403 vpaddq ymm10,ymm1,YMMWORD[((-96))+rbp]
4404 and r15,rdi
4405 xor r14,r12
4406 xor r15,r10
4407 xor r14,r13
4408 lea r8,[r15*1+r8]
4409 mov r12,rbx
4410 vmovdqa YMMWORD[32+rsp],ymm10
4411 vpalignr ymm8,ymm3,ymm2,8
4412 add rdx,QWORD[((64+256))+rsp]
4413 and r12,rax
4414 rorx r13,rax,41
4415 vpalignr ymm11,ymm7,ymm6,8
4416 rorx r15,rax,18
4417 lea r8,[r14*1+r8]
4418 lea rdx,[r12*1+rdx]
4419 vpsrlq ymm10,ymm8,1
4420 andn r12,rax,rcx
4421 xor r13,r15
4422 rorx r14,rax,14
4423 vpaddq ymm2,ymm2,ymm11
4424 vpsrlq ymm11,ymm8,7
4425 lea rdx,[r12*1+rdx]
4426 xor r13,r14
4427 mov r15,r8
4428 vpsllq ymm9,ymm8,56
4429 vpxor ymm8,ymm11,ymm10
4430 rorx r12,r8,39
4431 lea rdx,[r13*1+rdx]
4432 xor r15,r9
4433 vpsrlq ymm10,ymm10,7
4434 vpxor ymm8,ymm8,ymm9
4435 rorx r14,r8,34
4436 rorx r13,r8,28
4437 lea r11,[rdx*1+r11]
4438 vpsllq ymm9,ymm9,7
4439 vpxor ymm8,ymm8,ymm10
4440 and rdi,r15
4441 xor r14,r12
4442 xor rdi,r9
4443 vpsrlq ymm11,ymm1,6
4444 vpxor ymm8,ymm8,ymm9
4445 xor r14,r13
4446 lea rdx,[rdi*1+rdx]
4447 mov r12,rax
4448 vpsllq ymm10,ymm1,3
4449 vpaddq ymm2,ymm2,ymm8
4450 add rcx,QWORD[((72+256))+rsp]
4451 and r12,r11
4452 rorx r13,r11,41
4453 vpsrlq ymm9,ymm1,19
4454 vpxor ymm11,ymm11,ymm10
4455 rorx rdi,r11,18
4456 lea rdx,[r14*1+rdx]
4457 lea rcx,[r12*1+rcx]
4458 vpsllq ymm10,ymm10,42
4459 vpxor ymm11,ymm11,ymm9
4460 andn r12,r11,rbx
4461 xor r13,rdi
4462 rorx r14,r11,14
4463 vpsrlq ymm9,ymm9,42
4464 vpxor ymm11,ymm11,ymm10
4465 lea rcx,[r12*1+rcx]
4466 xor r13,r14
4467 mov rdi,rdx
4468 vpxor ymm11,ymm11,ymm9
4469 rorx r12,rdx,39
4470 lea rcx,[r13*1+rcx]
4471 xor rdi,r8
4472 vpaddq ymm2,ymm2,ymm11
4473 rorx r14,rdx,34
4474 rorx r13,rdx,28
4475 lea r10,[rcx*1+r10]
4476 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp]
4477 and r15,rdi
4478 xor r14,r12
4479 xor r15,r8
4480 xor r14,r13
4481 lea rcx,[r15*1+rcx]
4482 mov r12,r11
4483 vmovdqa YMMWORD[64+rsp],ymm10
4484 vpalignr ymm8,ymm4,ymm3,8
4485 add rbx,QWORD[((96+256))+rsp]
4486 and r12,r10
4487 rorx r13,r10,41
4488 vpalignr ymm11,ymm0,ymm7,8
4489 rorx r15,r10,18
4490 lea rcx,[r14*1+rcx]
4491 lea rbx,[r12*1+rbx]
4492 vpsrlq ymm10,ymm8,1
4493 andn r12,r10,rax
4494 xor r13,r15
4495 rorx r14,r10,14
4496 vpaddq ymm3,ymm3,ymm11
4497 vpsrlq ymm11,ymm8,7
4498 lea rbx,[r12*1+rbx]
4499 xor r13,r14
4500 mov r15,rcx
4501 vpsllq ymm9,ymm8,56
4502 vpxor ymm8,ymm11,ymm10
4503 rorx r12,rcx,39
4504 lea rbx,[r13*1+rbx]
4505 xor r15,rdx
4506 vpsrlq ymm10,ymm10,7
4507 vpxor ymm8,ymm8,ymm9
4508 rorx r14,rcx,34
4509 rorx r13,rcx,28
4510 lea r9,[rbx*1+r9]
4511 vpsllq ymm9,ymm9,7
4512 vpxor ymm8,ymm8,ymm10
4513 and rdi,r15
4514 xor r14,r12
4515 xor rdi,rdx
4516 vpsrlq ymm11,ymm2,6
4517 vpxor ymm8,ymm8,ymm9
4518 xor r14,r13
4519 lea rbx,[rdi*1+rbx]
4520 mov r12,r10
4521 vpsllq ymm10,ymm2,3
4522 vpaddq ymm3,ymm3,ymm8
4523 add rax,QWORD[((104+256))+rsp]
4524 and r12,r9
4525 rorx r13,r9,41
4526 vpsrlq ymm9,ymm2,19
4527 vpxor ymm11,ymm11,ymm10
4528 rorx rdi,r9,18
4529 lea rbx,[r14*1+rbx]
4530 lea rax,[r12*1+rax]
4531 vpsllq ymm10,ymm10,42
4532 vpxor ymm11,ymm11,ymm9
4533 andn r12,r9,r11
4534 xor r13,rdi
4535 rorx r14,r9,14
4536 vpsrlq ymm9,ymm9,42
4537 vpxor ymm11,ymm11,ymm10
4538 lea rax,[r12*1+rax]
4539 xor r13,r14
4540 mov rdi,rbx
4541 vpxor ymm11,ymm11,ymm9
4542 rorx r12,rbx,39
4543 lea rax,[r13*1+rax]
4544 xor rdi,rcx
4545 vpaddq ymm3,ymm3,ymm11
4546 rorx r14,rbx,34
4547 rorx r13,rbx,28
4548 lea r8,[rax*1+r8]
4549 vpaddq ymm10,ymm3,YMMWORD[((-32))+rbp]
4550 and r15,rdi
4551 xor r14,r12
4552 xor r15,rcx
4553 xor r14,r13
4554 lea rax,[r15*1+rax]
4555 mov r12,r9
4556 vmovdqa YMMWORD[96+rsp],ymm10
4557 lea rsp,[((-128))+rsp]
4558 vpalignr ymm8,ymm5,ymm4,8
4559 add r11,QWORD[((0+256))+rsp]
4560 and r12,r8
4561 rorx r13,r8,41
4562 vpalignr ymm11,ymm1,ymm0,8
4563 rorx r15,r8,18
4564 lea rax,[r14*1+rax]
4565 lea r11,[r12*1+r11]
4566 vpsrlq ymm10,ymm8,1
4567 andn r12,r8,r10
4568 xor r13,r15
4569 rorx r14,r8,14
4570 vpaddq ymm4,ymm4,ymm11
4571 vpsrlq ymm11,ymm8,7
4572 lea r11,[r12*1+r11]
4573 xor r13,r14
4574 mov r15,rax
4575 vpsllq ymm9,ymm8,56
4576 vpxor ymm8,ymm11,ymm10
4577 rorx r12,rax,39
4578 lea r11,[r13*1+r11]
4579 xor r15,rbx
4580 vpsrlq ymm10,ymm10,7
4581 vpxor ymm8,ymm8,ymm9
4582 rorx r14,rax,34
4583 rorx r13,rax,28
4584 lea rdx,[r11*1+rdx]
4585 vpsllq ymm9,ymm9,7
4586 vpxor ymm8,ymm8,ymm10
4587 and rdi,r15
4588 xor r14,r12
4589 xor rdi,rbx
4590 vpsrlq ymm11,ymm3,6
4591 vpxor ymm8,ymm8,ymm9
4592 xor r14,r13
4593 lea r11,[rdi*1+r11]
4594 mov r12,r8
4595 vpsllq ymm10,ymm3,3
4596 vpaddq ymm4,ymm4,ymm8
4597 add r10,QWORD[((8+256))+rsp]
4598 and r12,rdx
4599 rorx r13,rdx,41
4600 vpsrlq ymm9,ymm3,19
4601 vpxor ymm11,ymm11,ymm10
4602 rorx rdi,rdx,18
4603 lea r11,[r14*1+r11]
4604 lea r10,[r12*1+r10]
4605 vpsllq ymm10,ymm10,42
4606 vpxor ymm11,ymm11,ymm9
4607 andn r12,rdx,r9
4608 xor r13,rdi
4609 rorx r14,rdx,14
4610 vpsrlq ymm9,ymm9,42
4611 vpxor ymm11,ymm11,ymm10
4612 lea r10,[r12*1+r10]
4613 xor r13,r14
4614 mov rdi,r11
4615 vpxor ymm11,ymm11,ymm9
4616 rorx r12,r11,39
4617 lea r10,[r13*1+r10]
4618 xor rdi,rax
4619 vpaddq ymm4,ymm4,ymm11
4620 rorx r14,r11,34
4621 rorx r13,r11,28
4622 lea rcx,[r10*1+rcx]
4623 vpaddq ymm10,ymm4,YMMWORD[rbp]
4624 and r15,rdi
4625 xor r14,r12
4626 xor r15,rax
4627 xor r14,r13
4628 lea r10,[r15*1+r10]
4629 mov r12,rdx
4630 vmovdqa YMMWORD[rsp],ymm10
4631 vpalignr ymm8,ymm6,ymm5,8
4632 add r9,QWORD[((32+256))+rsp]
4633 and r12,rcx
4634 rorx r13,rcx,41
4635 vpalignr ymm11,ymm2,ymm1,8
4636 rorx r15,rcx,18
4637 lea r10,[r14*1+r10]
4638 lea r9,[r12*1+r9]
4639 vpsrlq ymm10,ymm8,1
4640 andn r12,rcx,r8
4641 xor r13,r15
4642 rorx r14,rcx,14
4643 vpaddq ymm5,ymm5,ymm11
4644 vpsrlq ymm11,ymm8,7
4645 lea r9,[r12*1+r9]
4646 xor r13,r14
4647 mov r15,r10
4648 vpsllq ymm9,ymm8,56
4649 vpxor ymm8,ymm11,ymm10
4650 rorx r12,r10,39
4651 lea r9,[r13*1+r9]
4652 xor r15,r11
4653 vpsrlq ymm10,ymm10,7
4654 vpxor ymm8,ymm8,ymm9
4655 rorx r14,r10,34
4656 rorx r13,r10,28
4657 lea rbx,[r9*1+rbx]
4658 vpsllq ymm9,ymm9,7
4659 vpxor ymm8,ymm8,ymm10
4660 and rdi,r15
4661 xor r14,r12
4662 xor rdi,r11
4663 vpsrlq ymm11,ymm4,6
4664 vpxor ymm8,ymm8,ymm9
4665 xor r14,r13
4666 lea r9,[rdi*1+r9]
4667 mov r12,rcx
4668 vpsllq ymm10,ymm4,3
4669 vpaddq ymm5,ymm5,ymm8
4670 add r8,QWORD[((40+256))+rsp]
4671 and r12,rbx
4672 rorx r13,rbx,41
4673 vpsrlq ymm9,ymm4,19
4674 vpxor ymm11,ymm11,ymm10
4675 rorx rdi,rbx,18
4676 lea r9,[r14*1+r9]
4677 lea r8,[r12*1+r8]
4678 vpsllq ymm10,ymm10,42
4679 vpxor ymm11,ymm11,ymm9
4680 andn r12,rbx,rdx
4681 xor r13,rdi
4682 rorx r14,rbx,14
4683 vpsrlq ymm9,ymm9,42
4684 vpxor ymm11,ymm11,ymm10
4685 lea r8,[r12*1+r8]
4686 xor r13,r14
4687 mov rdi,r9
4688 vpxor ymm11,ymm11,ymm9
4689 rorx r12,r9,39
4690 lea r8,[r13*1+r8]
4691 xor rdi,r10
4692 vpaddq ymm5,ymm5,ymm11
4693 rorx r14,r9,34
4694 rorx r13,r9,28
4695 lea rax,[r8*1+rax]
4696 vpaddq ymm10,ymm5,YMMWORD[32+rbp]
4697 and r15,rdi
4698 xor r14,r12
4699 xor r15,r10
4700 xor r14,r13
4701 lea r8,[r15*1+r8]
4702 mov r12,rbx
4703 vmovdqa YMMWORD[32+rsp],ymm10
4704 vpalignr ymm8,ymm7,ymm6,8
4705 add rdx,QWORD[((64+256))+rsp]
4706 and r12,rax
4707 rorx r13,rax,41
4708 vpalignr ymm11,ymm3,ymm2,8
4709 rorx r15,rax,18
4710 lea r8,[r14*1+r8]
4711 lea rdx,[r12*1+rdx]
4712 vpsrlq ymm10,ymm8,1
4713 andn r12,rax,rcx
4714 xor r13,r15
4715 rorx r14,rax,14
4716 vpaddq ymm6,ymm6,ymm11
4717 vpsrlq ymm11,ymm8,7
4718 lea rdx,[r12*1+rdx]
4719 xor r13,r14
4720 mov r15,r8
4721 vpsllq ymm9,ymm8,56
4722 vpxor ymm8,ymm11,ymm10
4723 rorx r12,r8,39
4724 lea rdx,[r13*1+rdx]
4725 xor r15,r9
4726 vpsrlq ymm10,ymm10,7
4727 vpxor ymm8,ymm8,ymm9
4728 rorx r14,r8,34
4729 rorx r13,r8,28
4730 lea r11,[rdx*1+r11]
4731 vpsllq ymm9,ymm9,7
4732 vpxor ymm8,ymm8,ymm10
4733 and rdi,r15
4734 xor r14,r12
4735 xor rdi,r9
4736 vpsrlq ymm11,ymm5,6
4737 vpxor ymm8,ymm8,ymm9
4738 xor r14,r13
4739 lea rdx,[rdi*1+rdx]
4740 mov r12,rax
4741 vpsllq ymm10,ymm5,3
4742 vpaddq ymm6,ymm6,ymm8
4743 add rcx,QWORD[((72+256))+rsp]
4744 and r12,r11
4745 rorx r13,r11,41
4746 vpsrlq ymm9,ymm5,19
4747 vpxor ymm11,ymm11,ymm10
4748 rorx rdi,r11,18
4749 lea rdx,[r14*1+rdx]
4750 lea rcx,[r12*1+rcx]
4751 vpsllq ymm10,ymm10,42
4752 vpxor ymm11,ymm11,ymm9
4753 andn r12,r11,rbx
4754 xor r13,rdi
4755 rorx r14,r11,14
4756 vpsrlq ymm9,ymm9,42
4757 vpxor ymm11,ymm11,ymm10
4758 lea rcx,[r12*1+rcx]
4759 xor r13,r14
4760 mov rdi,rdx
4761 vpxor ymm11,ymm11,ymm9
4762 rorx r12,rdx,39
4763 lea rcx,[r13*1+rcx]
4764 xor rdi,r8
4765 vpaddq ymm6,ymm6,ymm11
4766 rorx r14,rdx,34
4767 rorx r13,rdx,28
4768 lea r10,[rcx*1+r10]
4769 vpaddq ymm10,ymm6,YMMWORD[64+rbp]
4770 and r15,rdi
4771 xor r14,r12
4772 xor r15,r8
4773 xor r14,r13
4774 lea rcx,[r15*1+rcx]
4775 mov r12,r11
4776 vmovdqa YMMWORD[64+rsp],ymm10
4777 vpalignr ymm8,ymm0,ymm7,8
4778 add rbx,QWORD[((96+256))+rsp]
4779 and r12,r10
4780 rorx r13,r10,41
4781 vpalignr ymm11,ymm4,ymm3,8
4782 rorx r15,r10,18
4783 lea rcx,[r14*1+rcx]
4784 lea rbx,[r12*1+rbx]
4785 vpsrlq ymm10,ymm8,1
4786 andn r12,r10,rax
4787 xor r13,r15
4788 rorx r14,r10,14
4789 vpaddq ymm7,ymm7,ymm11
4790 vpsrlq ymm11,ymm8,7
4791 lea rbx,[r12*1+rbx]
4792 xor r13,r14
4793 mov r15,rcx
4794 vpsllq ymm9,ymm8,56
4795 vpxor ymm8,ymm11,ymm10
4796 rorx r12,rcx,39
4797 lea rbx,[r13*1+rbx]
4798 xor r15,rdx
4799 vpsrlq ymm10,ymm10,7
4800 vpxor ymm8,ymm8,ymm9
4801 rorx r14,rcx,34
4802 rorx r13,rcx,28
4803 lea r9,[rbx*1+r9]
4804 vpsllq ymm9,ymm9,7
4805 vpxor ymm8,ymm8,ymm10
4806 and rdi,r15
4807 xor r14,r12
4808 xor rdi,rdx
4809 vpsrlq ymm11,ymm6,6
4810 vpxor ymm8,ymm8,ymm9
4811 xor r14,r13
4812 lea rbx,[rdi*1+rbx]
4813 mov r12,r10
4814 vpsllq ymm10,ymm6,3
4815 vpaddq ymm7,ymm7,ymm8
4816 add rax,QWORD[((104+256))+rsp]
4817 and r12,r9
4818 rorx r13,r9,41
4819 vpsrlq ymm9,ymm6,19
4820 vpxor ymm11,ymm11,ymm10
4821 rorx rdi,r9,18
4822 lea rbx,[r14*1+rbx]
4823 lea rax,[r12*1+rax]
4824 vpsllq ymm10,ymm10,42
4825 vpxor ymm11,ymm11,ymm9
4826 andn r12,r9,r11
4827 xor r13,rdi
4828 rorx r14,r9,14
4829 vpsrlq ymm9,ymm9,42
4830 vpxor ymm11,ymm11,ymm10
4831 lea rax,[r12*1+rax]
4832 xor r13,r14
4833 mov rdi,rbx
4834 vpxor ymm11,ymm11,ymm9
4835 rorx r12,rbx,39
4836 lea rax,[r13*1+rax]
4837 xor rdi,rcx
4838 vpaddq ymm7,ymm7,ymm11
4839 rorx r14,rbx,34
4840 rorx r13,rbx,28
4841 lea r8,[rax*1+r8]
4842 vpaddq ymm10,ymm7,YMMWORD[96+rbp]
4843 and r15,rdi
4844 xor r14,r12
4845 xor r15,rcx
4846 xor r14,r13
4847 lea rax,[r15*1+rax]
4848 mov r12,r9
4849 vmovdqa YMMWORD[96+rsp],ymm10
4850 lea rbp,[256+rbp]
4851 cmp BYTE[((-121))+rbp],0
4852 jne NEAR $L$avx2_00_47
4853 add r11,QWORD[((0+128))+rsp]
4854 and r12,r8
4855 rorx r13,r8,41
4856 rorx r15,r8,18
4857 lea rax,[r14*1+rax]
4858 lea r11,[r12*1+r11]
4859 andn r12,r8,r10
4860 xor r13,r15
4861 rorx r14,r8,14
4862 lea r11,[r12*1+r11]
4863 xor r13,r14
4864 mov r15,rax
4865 rorx r12,rax,39
4866 lea r11,[r13*1+r11]
4867 xor r15,rbx
4868 rorx r14,rax,34
4869 rorx r13,rax,28
4870 lea rdx,[r11*1+rdx]
4871 and rdi,r15
4872 xor r14,r12
4873 xor rdi,rbx
4874 xor r14,r13
4875 lea r11,[rdi*1+r11]
4876 mov r12,r8
4877 add r10,QWORD[((8+128))+rsp]
4878 and r12,rdx
4879 rorx r13,rdx,41
4880 rorx rdi,rdx,18
4881 lea r11,[r14*1+r11]
4882 lea r10,[r12*1+r10]
4883 andn r12,rdx,r9
4884 xor r13,rdi
4885 rorx r14,rdx,14
4886 lea r10,[r12*1+r10]
4887 xor r13,r14
4888 mov rdi,r11
4889 rorx r12,r11,39
4890 lea r10,[r13*1+r10]
4891 xor rdi,rax
4892 rorx r14,r11,34
4893 rorx r13,r11,28
4894 lea rcx,[r10*1+rcx]
4895 and r15,rdi
4896 xor r14,r12
4897 xor r15,rax
4898 xor r14,r13
4899 lea r10,[r15*1+r10]
4900 mov r12,rdx
4901 add r9,QWORD[((32+128))+rsp]
4902 and r12,rcx
4903 rorx r13,rcx,41
4904 rorx r15,rcx,18
4905 lea r10,[r14*1+r10]
4906 lea r9,[r12*1+r9]
4907 andn r12,rcx,r8
4908 xor r13,r15
4909 rorx r14,rcx,14
4910 lea r9,[r12*1+r9]
4911 xor r13,r14
4912 mov r15,r10
4913 rorx r12,r10,39
4914 lea r9,[r13*1+r9]
4915 xor r15,r11
4916 rorx r14,r10,34
4917 rorx r13,r10,28
4918 lea rbx,[r9*1+rbx]
4919 and rdi,r15
4920 xor r14,r12
4921 xor rdi,r11
4922 xor r14,r13
4923 lea r9,[rdi*1+r9]
4924 mov r12,rcx
4925 add r8,QWORD[((40+128))+rsp]
4926 and r12,rbx
4927 rorx r13,rbx,41
4928 rorx rdi,rbx,18
4929 lea r9,[r14*1+r9]
4930 lea r8,[r12*1+r8]
4931 andn r12,rbx,rdx
4932 xor r13,rdi
4933 rorx r14,rbx,14
4934 lea r8,[r12*1+r8]
4935 xor r13,r14
4936 mov rdi,r9
4937 rorx r12,r9,39
4938 lea r8,[r13*1+r8]
4939 xor rdi,r10
4940 rorx r14,r9,34
4941 rorx r13,r9,28
4942 lea rax,[r8*1+rax]
4943 and r15,rdi
4944 xor r14,r12
4945 xor r15,r10
4946 xor r14,r13
4947 lea r8,[r15*1+r8]
4948 mov r12,rbx
4949 add rdx,QWORD[((64+128))+rsp]
4950 and r12,rax
4951 rorx r13,rax,41
4952 rorx r15,rax,18
4953 lea r8,[r14*1+r8]
4954 lea rdx,[r12*1+rdx]
4955 andn r12,rax,rcx
4956 xor r13,r15
4957 rorx r14,rax,14
4958 lea rdx,[r12*1+rdx]
4959 xor r13,r14
4960 mov r15,r8
4961 rorx r12,r8,39
4962 lea rdx,[r13*1+rdx]
4963 xor r15,r9
4964 rorx r14,r8,34
4965 rorx r13,r8,28
4966 lea r11,[rdx*1+r11]
4967 and rdi,r15
4968 xor r14,r12
4969 xor rdi,r9
4970 xor r14,r13
4971 lea rdx,[rdi*1+rdx]
4972 mov r12,rax
4973 add rcx,QWORD[((72+128))+rsp]
4974 and r12,r11
4975 rorx r13,r11,41
4976 rorx rdi,r11,18
4977 lea rdx,[r14*1+rdx]
4978 lea rcx,[r12*1+rcx]
4979 andn r12,r11,rbx
4980 xor r13,rdi
4981 rorx r14,r11,14
4982 lea rcx,[r12*1+rcx]
4983 xor r13,r14
4984 mov rdi,rdx
4985 rorx r12,rdx,39
4986 lea rcx,[r13*1+rcx]
4987 xor rdi,r8
4988 rorx r14,rdx,34
4989 rorx r13,rdx,28
4990 lea r10,[rcx*1+r10]
4991 and r15,rdi
4992 xor r14,r12
4993 xor r15,r8
4994 xor r14,r13
4995 lea rcx,[r15*1+rcx]
4996 mov r12,r11
4997 add rbx,QWORD[((96+128))+rsp]
4998 and r12,r10
4999 rorx r13,r10,41
5000 rorx r15,r10,18
5001 lea rcx,[r14*1+rcx]
5002 lea rbx,[r12*1+rbx]
5003 andn r12,r10,rax
5004 xor r13,r15
5005 rorx r14,r10,14
5006 lea rbx,[r12*1+rbx]
5007 xor r13,r14
5008 mov r15,rcx
5009 rorx r12,rcx,39
5010 lea rbx,[r13*1+rbx]
5011 xor r15,rdx
5012 rorx r14,rcx,34
5013 rorx r13,rcx,28
5014 lea r9,[rbx*1+r9]
5015 and rdi,r15
5016 xor r14,r12
5017 xor rdi,rdx
5018 xor r14,r13
5019 lea rbx,[rdi*1+rbx]
5020 mov r12,r10
5021 add rax,QWORD[((104+128))+rsp]
5022 and r12,r9
5023 rorx r13,r9,41
5024 rorx rdi,r9,18
5025 lea rbx,[r14*1+rbx]
5026 lea rax,[r12*1+rax]
5027 andn r12,r9,r11
5028 xor r13,rdi
5029 rorx r14,r9,14
5030 lea rax,[r12*1+rax]
5031 xor r13,r14
5032 mov rdi,rbx
5033 rorx r12,rbx,39
5034 lea rax,[r13*1+rax]
5035 xor rdi,rcx
5036 rorx r14,rbx,34
5037 rorx r13,rbx,28
5038 lea r8,[rax*1+r8]
5039 and r15,rdi
5040 xor r14,r12
5041 xor r15,rcx
5042 xor r14,r13
5043 lea rax,[r15*1+rax]
5044 mov r12,r9
5045 add r11,QWORD[rsp]
5046 and r12,r8
5047 rorx r13,r8,41
5048 rorx r15,r8,18
5049 lea rax,[r14*1+rax]
5050 lea r11,[r12*1+r11]
5051 andn r12,r8,r10
5052 xor r13,r15
5053 rorx r14,r8,14
5054 lea r11,[r12*1+r11]
5055 xor r13,r14
5056 mov r15,rax
5057 rorx r12,rax,39
5058 lea r11,[r13*1+r11]
5059 xor r15,rbx
5060 rorx r14,rax,34
5061 rorx r13,rax,28
5062 lea rdx,[r11*1+rdx]
5063 and rdi,r15
5064 xor r14,r12
5065 xor rdi,rbx
5066 xor r14,r13
5067 lea r11,[rdi*1+r11]
5068 mov r12,r8
5069 add r10,QWORD[8+rsp]
5070 and r12,rdx
5071 rorx r13,rdx,41
5072 rorx rdi,rdx,18
5073 lea r11,[r14*1+r11]
5074 lea r10,[r12*1+r10]
5075 andn r12,rdx,r9
5076 xor r13,rdi
5077 rorx r14,rdx,14
5078 lea r10,[r12*1+r10]
5079 xor r13,r14
5080 mov rdi,r11
5081 rorx r12,r11,39
5082 lea r10,[r13*1+r10]
5083 xor rdi,rax
5084 rorx r14,r11,34
5085 rorx r13,r11,28
5086 lea rcx,[r10*1+rcx]
5087 and r15,rdi
5088 xor r14,r12
5089 xor r15,rax
5090 xor r14,r13
5091 lea r10,[r15*1+r10]
5092 mov r12,rdx
5093 add r9,QWORD[32+rsp]
5094 and r12,rcx
5095 rorx r13,rcx,41
5096 rorx r15,rcx,18
5097 lea r10,[r14*1+r10]
5098 lea r9,[r12*1+r9]
5099 andn r12,rcx,r8
5100 xor r13,r15
5101 rorx r14,rcx,14
5102 lea r9,[r12*1+r9]
5103 xor r13,r14
5104 mov r15,r10
5105 rorx r12,r10,39
5106 lea r9,[r13*1+r9]
5107 xor r15,r11
5108 rorx r14,r10,34
5109 rorx r13,r10,28
5110 lea rbx,[r9*1+rbx]
5111 and rdi,r15
5112 xor r14,r12
5113 xor rdi,r11
5114 xor r14,r13
5115 lea r9,[rdi*1+r9]
5116 mov r12,rcx
5117 add r8,QWORD[40+rsp]
5118 and r12,rbx
5119 rorx r13,rbx,41
5120 rorx rdi,rbx,18
5121 lea r9,[r14*1+r9]
5122 lea r8,[r12*1+r8]
5123 andn r12,rbx,rdx
5124 xor r13,rdi
5125 rorx r14,rbx,14
5126 lea r8,[r12*1+r8]
5127 xor r13,r14
5128 mov rdi,r9
5129 rorx r12,r9,39
5130 lea r8,[r13*1+r8]
5131 xor rdi,r10
5132 rorx r14,r9,34
5133 rorx r13,r9,28
5134 lea rax,[r8*1+rax]
5135 and r15,rdi
5136 xor r14,r12
5137 xor r15,r10
5138 xor r14,r13
5139 lea r8,[r15*1+r8]
5140 mov r12,rbx
5141 add rdx,QWORD[64+rsp]
5142 and r12,rax
5143 rorx r13,rax,41
5144 rorx r15,rax,18
5145 lea r8,[r14*1+r8]
5146 lea rdx,[r12*1+rdx]
5147 andn r12,rax,rcx
5148 xor r13,r15
5149 rorx r14,rax,14
5150 lea rdx,[r12*1+rdx]
5151 xor r13,r14
5152 mov r15,r8
5153 rorx r12,r8,39
5154 lea rdx,[r13*1+rdx]
5155 xor r15,r9
5156 rorx r14,r8,34
5157 rorx r13,r8,28
5158 lea r11,[rdx*1+r11]
5159 and rdi,r15
5160 xor r14,r12
5161 xor rdi,r9
5162 xor r14,r13
5163 lea rdx,[rdi*1+rdx]
5164 mov r12,rax
5165 add rcx,QWORD[72+rsp]
5166 and r12,r11
5167 rorx r13,r11,41
5168 rorx rdi,r11,18
5169 lea rdx,[r14*1+rdx]
5170 lea rcx,[r12*1+rcx]
5171 andn r12,r11,rbx
5172 xor r13,rdi
5173 rorx r14,r11,14
5174 lea rcx,[r12*1+rcx]
5175 xor r13,r14
5176 mov rdi,rdx
5177 rorx r12,rdx,39
5178 lea rcx,[r13*1+rcx]
5179 xor rdi,r8
5180 rorx r14,rdx,34
5181 rorx r13,rdx,28
5182 lea r10,[rcx*1+r10]
5183 and r15,rdi
5184 xor r14,r12
5185 xor r15,r8
5186 xor r14,r13
5187 lea rcx,[r15*1+rcx]
5188 mov r12,r11
5189 add rbx,QWORD[96+rsp]
5190 and r12,r10
5191 rorx r13,r10,41
5192 rorx r15,r10,18
5193 lea rcx,[r14*1+rcx]
5194 lea rbx,[r12*1+rbx]
5195 andn r12,r10,rax
5196 xor r13,r15
5197 rorx r14,r10,14
5198 lea rbx,[r12*1+rbx]
5199 xor r13,r14
5200 mov r15,rcx
5201 rorx r12,rcx,39
5202 lea rbx,[r13*1+rbx]
5203 xor r15,rdx
5204 rorx r14,rcx,34
5205 rorx r13,rcx,28
5206 lea r9,[rbx*1+r9]
5207 and rdi,r15
5208 xor r14,r12
5209 xor rdi,rdx
5210 xor r14,r13
5211 lea rbx,[rdi*1+rbx]
5212 mov r12,r10
5213 add rax,QWORD[104+rsp]
5214 and r12,r9
5215 rorx r13,r9,41
5216 rorx rdi,r9,18
5217 lea rbx,[r14*1+rbx]
5218 lea rax,[r12*1+rax]
5219 andn r12,r9,r11
5220 xor r13,rdi
5221 rorx r14,r9,14
5222 lea rax,[r12*1+rax]
5223 xor r13,r14
5224 mov rdi,rbx
5225 rorx r12,rbx,39
5226 lea rax,[r13*1+rax]
5227 xor rdi,rcx
5228 rorx r14,rbx,34
5229 rorx r13,rbx,28
5230 lea r8,[rax*1+r8]
5231 and r15,rdi
5232 xor r14,r12
5233 xor r15,rcx
5234 xor r14,r13
5235 lea rax,[r15*1+rax]
5236 mov r12,r9
5237 mov rdi,QWORD[1280+rsp]
5238 add rax,r14
5239
5240 lea rbp,[1152+rsp]
5241
5242 add rax,QWORD[rdi]
5243 add rbx,QWORD[8+rdi]
5244 add rcx,QWORD[16+rdi]
5245 add rdx,QWORD[24+rdi]
5246 add r8,QWORD[32+rdi]
5247 add r9,QWORD[40+rdi]
5248 add r10,QWORD[48+rdi]
5249 add r11,QWORD[56+rdi]
5250
5251 mov QWORD[rdi],rax
5252 mov QWORD[8+rdi],rbx
5253 mov QWORD[16+rdi],rcx
5254 mov QWORD[24+rdi],rdx
5255 mov QWORD[32+rdi],r8
5256 mov QWORD[40+rdi],r9
5257 mov QWORD[48+rdi],r10
5258 mov QWORD[56+rdi],r11
5259
5260 cmp rsi,QWORD[144+rbp]
5261 je NEAR $L$done_avx2
5262
5263 xor r14,r14
5264 mov rdi,rbx
5265 xor rdi,rcx
5266 mov r12,r9
5267 jmp NEAR $L$ower_avx2
5268ALIGN 16
5269$L$ower_avx2:
5270 add r11,QWORD[((0+16))+rbp]
5271 and r12,r8
5272 rorx r13,r8,41
5273 rorx r15,r8,18
5274 lea rax,[r14*1+rax]
5275 lea r11,[r12*1+r11]
5276 andn r12,r8,r10
5277 xor r13,r15
5278 rorx r14,r8,14
5279 lea r11,[r12*1+r11]
5280 xor r13,r14
5281 mov r15,rax
5282 rorx r12,rax,39
5283 lea r11,[r13*1+r11]
5284 xor r15,rbx
5285 rorx r14,rax,34
5286 rorx r13,rax,28
5287 lea rdx,[r11*1+rdx]
5288 and rdi,r15
5289 xor r14,r12
5290 xor rdi,rbx
5291 xor r14,r13
5292 lea r11,[rdi*1+r11]
5293 mov r12,r8
5294 add r10,QWORD[((8+16))+rbp]
5295 and r12,rdx
5296 rorx r13,rdx,41
5297 rorx rdi,rdx,18
5298 lea r11,[r14*1+r11]
5299 lea r10,[r12*1+r10]
5300 andn r12,rdx,r9
5301 xor r13,rdi
5302 rorx r14,rdx,14
5303 lea r10,[r12*1+r10]
5304 xor r13,r14
5305 mov rdi,r11
5306 rorx r12,r11,39
5307 lea r10,[r13*1+r10]
5308 xor rdi,rax
5309 rorx r14,r11,34
5310 rorx r13,r11,28
5311 lea rcx,[r10*1+rcx]
5312 and r15,rdi
5313 xor r14,r12
5314 xor r15,rax
5315 xor r14,r13
5316 lea r10,[r15*1+r10]
5317 mov r12,rdx
5318 add r9,QWORD[((32+16))+rbp]
5319 and r12,rcx
5320 rorx r13,rcx,41
5321 rorx r15,rcx,18
5322 lea r10,[r14*1+r10]
5323 lea r9,[r12*1+r9]
5324 andn r12,rcx,r8
5325 xor r13,r15
5326 rorx r14,rcx,14
5327 lea r9,[r12*1+r9]
5328 xor r13,r14
5329 mov r15,r10
5330 rorx r12,r10,39
5331 lea r9,[r13*1+r9]
5332 xor r15,r11
5333 rorx r14,r10,34
5334 rorx r13,r10,28
5335 lea rbx,[r9*1+rbx]
5336 and rdi,r15
5337 xor r14,r12
5338 xor rdi,r11
5339 xor r14,r13
5340 lea r9,[rdi*1+r9]
5341 mov r12,rcx
5342 add r8,QWORD[((40+16))+rbp]
5343 and r12,rbx
5344 rorx r13,rbx,41
5345 rorx rdi,rbx,18
5346 lea r9,[r14*1+r9]
5347 lea r8,[r12*1+r8]
5348 andn r12,rbx,rdx
5349 xor r13,rdi
5350 rorx r14,rbx,14
5351 lea r8,[r12*1+r8]
5352 xor r13,r14
5353 mov rdi,r9
5354 rorx r12,r9,39
5355 lea r8,[r13*1+r8]
5356 xor rdi,r10
5357 rorx r14,r9,34
5358 rorx r13,r9,28
5359 lea rax,[r8*1+rax]
5360 and r15,rdi
5361 xor r14,r12
5362 xor r15,r10
5363 xor r14,r13
5364 lea r8,[r15*1+r8]
5365 mov r12,rbx
5366 add rdx,QWORD[((64+16))+rbp]
5367 and r12,rax
5368 rorx r13,rax,41
5369 rorx r15,rax,18
5370 lea r8,[r14*1+r8]
5371 lea rdx,[r12*1+rdx]
5372 andn r12,rax,rcx
5373 xor r13,r15
5374 rorx r14,rax,14
5375 lea rdx,[r12*1+rdx]
5376 xor r13,r14
5377 mov r15,r8
5378 rorx r12,r8,39
5379 lea rdx,[r13*1+rdx]
5380 xor r15,r9
5381 rorx r14,r8,34
5382 rorx r13,r8,28
5383 lea r11,[rdx*1+r11]
5384 and rdi,r15
5385 xor r14,r12
5386 xor rdi,r9
5387 xor r14,r13
5388 lea rdx,[rdi*1+rdx]
5389 mov r12,rax
5390 add rcx,QWORD[((72+16))+rbp]
5391 and r12,r11
5392 rorx r13,r11,41
5393 rorx rdi,r11,18
5394 lea rdx,[r14*1+rdx]
5395 lea rcx,[r12*1+rcx]
5396 andn r12,r11,rbx
5397 xor r13,rdi
5398 rorx r14,r11,14
5399 lea rcx,[r12*1+rcx]
5400 xor r13,r14
5401 mov rdi,rdx
5402 rorx r12,rdx,39
5403 lea rcx,[r13*1+rcx]
5404 xor rdi,r8
5405 rorx r14,rdx,34
5406 rorx r13,rdx,28
5407 lea r10,[rcx*1+r10]
5408 and r15,rdi
5409 xor r14,r12
5410 xor r15,r8
5411 xor r14,r13
5412 lea rcx,[r15*1+rcx]
5413 mov r12,r11
5414 add rbx,QWORD[((96+16))+rbp]
5415 and r12,r10
5416 rorx r13,r10,41
5417 rorx r15,r10,18
5418 lea rcx,[r14*1+rcx]
5419 lea rbx,[r12*1+rbx]
5420 andn r12,r10,rax
5421 xor r13,r15
5422 rorx r14,r10,14
5423 lea rbx,[r12*1+rbx]
5424 xor r13,r14
5425 mov r15,rcx
5426 rorx r12,rcx,39
5427 lea rbx,[r13*1+rbx]
5428 xor r15,rdx
5429 rorx r14,rcx,34
5430 rorx r13,rcx,28
5431 lea r9,[rbx*1+r9]
5432 and rdi,r15
5433 xor r14,r12
5434 xor rdi,rdx
5435 xor r14,r13
5436 lea rbx,[rdi*1+rbx]
5437 mov r12,r10
5438 add rax,QWORD[((104+16))+rbp]
5439 and r12,r9
5440 rorx r13,r9,41
5441 rorx rdi,r9,18
5442 lea rbx,[r14*1+rbx]
5443 lea rax,[r12*1+rax]
5444 andn r12,r9,r11
5445 xor r13,rdi
5446 rorx r14,r9,14
5447 lea rax,[r12*1+rax]
5448 xor r13,r14
5449 mov rdi,rbx
5450 rorx r12,rbx,39
5451 lea rax,[r13*1+rax]
5452 xor rdi,rcx
5453 rorx r14,rbx,34
5454 rorx r13,rbx,28
5455 lea r8,[rax*1+r8]
5456 and r15,rdi
5457 xor r14,r12
5458 xor r15,rcx
5459 xor r14,r13
5460 lea rax,[r15*1+rax]
5461 mov r12,r9
5462 lea rbp,[((-128))+rbp]
5463 cmp rbp,rsp
5464 jae NEAR $L$ower_avx2
5465
5466 mov rdi,QWORD[1280+rsp]
5467 add rax,r14
5468
5469 lea rsp,[1152+rsp]
5470
5471
5472
5473 add rax,QWORD[rdi]
5474 add rbx,QWORD[8+rdi]
5475 add rcx,QWORD[16+rdi]
5476 add rdx,QWORD[24+rdi]
5477 add r8,QWORD[32+rdi]
5478 add r9,QWORD[40+rdi]
5479 lea rsi,[256+rsi]
5480 add r10,QWORD[48+rdi]
5481 mov r12,rsi
5482 add r11,QWORD[56+rdi]
5483 cmp rsi,QWORD[((128+16))+rsp]
5484
5485 mov QWORD[rdi],rax
5486 cmove r12,rsp
5487 mov QWORD[8+rdi],rbx
5488 mov QWORD[16+rdi],rcx
5489 mov QWORD[24+rdi],rdx
5490 mov QWORD[32+rdi],r8
5491 mov QWORD[40+rdi],r9
5492 mov QWORD[48+rdi],r10
5493 mov QWORD[56+rdi],r11
5494
5495 jbe NEAR $L$oop_avx2
5496 lea rbp,[rsp]
5497
5498
5499
5500
5501$L$done_avx2:
5502 mov rsi,QWORD[152+rbp]
5503
5504 vzeroupper
5505 movaps xmm6,XMMWORD[((128+32))+rbp]
5506 movaps xmm7,XMMWORD[((128+48))+rbp]
5507 movaps xmm8,XMMWORD[((128+64))+rbp]
5508 movaps xmm9,XMMWORD[((128+80))+rbp]
5509 movaps xmm10,XMMWORD[((128+96))+rbp]
5510 movaps xmm11,XMMWORD[((128+112))+rbp]
5511 mov r15,QWORD[((-48))+rsi]
5512
5513 mov r14,QWORD[((-40))+rsi]
5514
5515 mov r13,QWORD[((-32))+rsi]
5516
5517 mov r12,QWORD[((-24))+rsi]
5518
5519 mov rbp,QWORD[((-16))+rsi]
5520
5521 mov rbx,QWORD[((-8))+rsi]
5522
5523 lea rsp,[rsi]
5524
5525$L$epilogue_avx2:
5526 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
5527 mov rsi,QWORD[16+rsp]
5528 DB 0F3h,0C3h ;repret
5529
5530$L$SEH_end_sha512_block_data_order_avx2:
5531EXTERN __imp_RtlVirtualUnwind
5532
5533ALIGN 16
5534se_handler:
5535 push rsi
5536 push rdi
5537 push rbx
5538 push rbp
5539 push r12
5540 push r13
5541 push r14
5542 push r15
5543 pushfq
5544 sub rsp,64
5545
5546 mov rax,QWORD[120+r8]
5547 mov rbx,QWORD[248+r8]
5548
5549 mov rsi,QWORD[8+r9]
5550 mov r11,QWORD[56+r9]
5551
5552 mov r10d,DWORD[r11]
5553 lea r10,[r10*1+rsi]
5554 cmp rbx,r10
5555 jb NEAR $L$in_prologue
5556
5557 mov rax,QWORD[152+r8]
5558
5559 mov r10d,DWORD[4+r11]
5560 lea r10,[r10*1+rsi]
5561 cmp rbx,r10
5562 jae NEAR $L$in_prologue
5563 lea r10,[$L$avx2_shortcut]
5564 cmp rbx,r10
5565 jb NEAR $L$not_in_avx2
5566
5567 and rax,-256*8
5568 add rax,1152
5569$L$not_in_avx2:
5570 mov rsi,rax
5571 mov rax,QWORD[((128+24))+rax]
5572
5573 mov rbx,QWORD[((-8))+rax]
5574 mov rbp,QWORD[((-16))+rax]
5575 mov r12,QWORD[((-24))+rax]
5576 mov r13,QWORD[((-32))+rax]
5577 mov r14,QWORD[((-40))+rax]
5578 mov r15,QWORD[((-48))+rax]
5579 mov QWORD[144+r8],rbx
5580 mov QWORD[160+r8],rbp
5581 mov QWORD[216+r8],r12
5582 mov QWORD[224+r8],r13
5583 mov QWORD[232+r8],r14
5584 mov QWORD[240+r8],r15
5585
5586 lea r10,[$L$epilogue]
5587 cmp rbx,r10
5588 jb NEAR $L$in_prologue
5589
5590 lea rsi,[((128+32))+rsi]
5591 lea rdi,[512+r8]
5592 mov ecx,12
5593 DD 0xa548f3fc
5594
5595$L$in_prologue:
5596 mov rdi,QWORD[8+rax]
5597 mov rsi,QWORD[16+rax]
5598 mov QWORD[152+r8],rax
5599 mov QWORD[168+r8],rsi
5600 mov QWORD[176+r8],rdi
5601
5602 mov rdi,QWORD[40+r9]
5603 mov rsi,r8
5604 mov ecx,154
5605 DD 0xa548f3fc
5606
5607 mov rsi,r9
5608 xor rcx,rcx
5609 mov rdx,QWORD[8+rsi]
5610 mov r8,QWORD[rsi]
5611 mov r9,QWORD[16+rsi]
5612 mov r10,QWORD[40+rsi]
5613 lea r11,[56+rsi]
5614 lea r12,[24+rsi]
5615 mov QWORD[32+rsp],r10
5616 mov QWORD[40+rsp],r11
5617 mov QWORD[48+rsp],r12
5618 mov QWORD[56+rsp],rcx
5619 call QWORD[__imp_RtlVirtualUnwind]
5620
5621 mov eax,1
5622 add rsp,64
5623 popfq
5624 pop r15
5625 pop r14
5626 pop r13
5627 pop r12
5628 pop rbp
5629 pop rbx
5630 pop rdi
5631 pop rsi
5632 DB 0F3h,0C3h ;repret
5633
5634section .pdata rdata align=4
5635ALIGN 4
5636 DD $L$SEH_begin_sha512_block_data_order wrt ..imagebase
5637 DD $L$SEH_end_sha512_block_data_order wrt ..imagebase
5638 DD $L$SEH_info_sha512_block_data_order wrt ..imagebase
5639 DD $L$SEH_begin_sha512_block_data_order_xop wrt ..imagebase
5640 DD $L$SEH_end_sha512_block_data_order_xop wrt ..imagebase
5641 DD $L$SEH_info_sha512_block_data_order_xop wrt ..imagebase
5642 DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase
5643 DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase
5644 DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase
5645 DD $L$SEH_begin_sha512_block_data_order_avx2 wrt ..imagebase
5646 DD $L$SEH_end_sha512_block_data_order_avx2 wrt ..imagebase
5647 DD $L$SEH_info_sha512_block_data_order_avx2 wrt ..imagebase
5648section .xdata rdata align=8
5649ALIGN 8
5650$L$SEH_info_sha512_block_data_order:
5651DB 9,0,0,0
5652 DD se_handler wrt ..imagebase
5653 DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
5654$L$SEH_info_sha512_block_data_order_xop:
5655DB 9,0,0,0
5656 DD se_handler wrt ..imagebase
5657 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
5658$L$SEH_info_sha512_block_data_order_avx:
5659DB 9,0,0,0
5660 DD se_handler wrt ..imagebase
5661 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
5662$L$SEH_info_sha512_block_data_order_avx2:
5663DB 9,0,0,0
5664 DD se_handler wrt ..imagebase
5665 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette