VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.0g/crypto/genasm-nasm/sha512-x86_64.S@ 69881

Last change on this file since 69881 was 69881, checked in by vboxsync, 7 years ago

Update OpenSSL to 1.1.0g.
bugref:8070: src/libs maintenance

  • Property svn:eol-style set to native
File size: 86.3 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9global sha512_block_data_order
10
11ALIGN 16
12sha512_block_data_order:
13 mov QWORD[8+rsp],rdi ;WIN64 prologue
14 mov QWORD[16+rsp],rsi
15 mov rax,rsp
16$L$SEH_begin_sha512_block_data_order:
17 mov rdi,rcx
18 mov rsi,rdx
19 mov rdx,r8
20
21
22 lea r11,[OPENSSL_ia32cap_P]
23 mov r9d,DWORD[r11]
24 mov r10d,DWORD[4+r11]
25 mov r11d,DWORD[8+r11]
26 test r10d,2048
27 jnz NEAR $L$xop_shortcut
28 and r11d,296
29 cmp r11d,296
30 je NEAR $L$avx2_shortcut
31 and r9d,1073741824
32 and r10d,268435968
33 or r10d,r9d
34 cmp r10d,1342177792
35 je NEAR $L$avx_shortcut
36 push rbx
37 push rbp
38 push r12
39 push r13
40 push r14
41 push r15
42 mov r11,rsp
43 shl rdx,4
44 sub rsp,16*8+4*8
45 lea rdx,[rdx*8+rsi]
46 and rsp,-64
47 mov QWORD[((128+0))+rsp],rdi
48 mov QWORD[((128+8))+rsp],rsi
49 mov QWORD[((128+16))+rsp],rdx
50 mov QWORD[((128+24))+rsp],r11
51$L$prologue:
52
53 mov rax,QWORD[rdi]
54 mov rbx,QWORD[8+rdi]
55 mov rcx,QWORD[16+rdi]
56 mov rdx,QWORD[24+rdi]
57 mov r8,QWORD[32+rdi]
58 mov r9,QWORD[40+rdi]
59 mov r10,QWORD[48+rdi]
60 mov r11,QWORD[56+rdi]
61 jmp NEAR $L$loop
62
63ALIGN 16
64$L$loop:
65 mov rdi,rbx
66 lea rbp,[K512]
67 xor rdi,rcx
68 mov r12,QWORD[rsi]
69 mov r13,r8
70 mov r14,rax
71 bswap r12
72 ror r13,23
73 mov r15,r9
74
75 xor r13,r8
76 ror r14,5
77 xor r15,r10
78
79 mov QWORD[rsp],r12
80 xor r14,rax
81 and r15,r8
82
83 ror r13,4
84 add r12,r11
85 xor r15,r10
86
87 ror r14,6
88 xor r13,r8
89 add r12,r15
90
91 mov r15,rax
92 add r12,QWORD[rbp]
93 xor r14,rax
94
95 xor r15,rbx
96 ror r13,14
97 mov r11,rbx
98
99 and rdi,r15
100 ror r14,28
101 add r12,r13
102
103 xor r11,rdi
104 add rdx,r12
105 add r11,r12
106
107 lea rbp,[8+rbp]
108 add r11,r14
109 mov r12,QWORD[8+rsi]
110 mov r13,rdx
111 mov r14,r11
112 bswap r12
113 ror r13,23
114 mov rdi,r8
115
116 xor r13,rdx
117 ror r14,5
118 xor rdi,r9
119
120 mov QWORD[8+rsp],r12
121 xor r14,r11
122 and rdi,rdx
123
124 ror r13,4
125 add r12,r10
126 xor rdi,r9
127
128 ror r14,6
129 xor r13,rdx
130 add r12,rdi
131
132 mov rdi,r11
133 add r12,QWORD[rbp]
134 xor r14,r11
135
136 xor rdi,rax
137 ror r13,14
138 mov r10,rax
139
140 and r15,rdi
141 ror r14,28
142 add r12,r13
143
144 xor r10,r15
145 add rcx,r12
146 add r10,r12
147
148 lea rbp,[24+rbp]
149 add r10,r14
150 mov r12,QWORD[16+rsi]
151 mov r13,rcx
152 mov r14,r10
153 bswap r12
154 ror r13,23
155 mov r15,rdx
156
157 xor r13,rcx
158 ror r14,5
159 xor r15,r8
160
161 mov QWORD[16+rsp],r12
162 xor r14,r10
163 and r15,rcx
164
165 ror r13,4
166 add r12,r9
167 xor r15,r8
168
169 ror r14,6
170 xor r13,rcx
171 add r12,r15
172
173 mov r15,r10
174 add r12,QWORD[rbp]
175 xor r14,r10
176
177 xor r15,r11
178 ror r13,14
179 mov r9,r11
180
181 and rdi,r15
182 ror r14,28
183 add r12,r13
184
185 xor r9,rdi
186 add rbx,r12
187 add r9,r12
188
189 lea rbp,[8+rbp]
190 add r9,r14
191 mov r12,QWORD[24+rsi]
192 mov r13,rbx
193 mov r14,r9
194 bswap r12
195 ror r13,23
196 mov rdi,rcx
197
198 xor r13,rbx
199 ror r14,5
200 xor rdi,rdx
201
202 mov QWORD[24+rsp],r12
203 xor r14,r9
204 and rdi,rbx
205
206 ror r13,4
207 add r12,r8
208 xor rdi,rdx
209
210 ror r14,6
211 xor r13,rbx
212 add r12,rdi
213
214 mov rdi,r9
215 add r12,QWORD[rbp]
216 xor r14,r9
217
218 xor rdi,r10
219 ror r13,14
220 mov r8,r10
221
222 and r15,rdi
223 ror r14,28
224 add r12,r13
225
226 xor r8,r15
227 add rax,r12
228 add r8,r12
229
230 lea rbp,[24+rbp]
231 add r8,r14
232 mov r12,QWORD[32+rsi]
233 mov r13,rax
234 mov r14,r8
235 bswap r12
236 ror r13,23
237 mov r15,rbx
238
239 xor r13,rax
240 ror r14,5
241 xor r15,rcx
242
243 mov QWORD[32+rsp],r12
244 xor r14,r8
245 and r15,rax
246
247 ror r13,4
248 add r12,rdx
249 xor r15,rcx
250
251 ror r14,6
252 xor r13,rax
253 add r12,r15
254
255 mov r15,r8
256 add r12,QWORD[rbp]
257 xor r14,r8
258
259 xor r15,r9
260 ror r13,14
261 mov rdx,r9
262
263 and rdi,r15
264 ror r14,28
265 add r12,r13
266
267 xor rdx,rdi
268 add r11,r12
269 add rdx,r12
270
271 lea rbp,[8+rbp]
272 add rdx,r14
273 mov r12,QWORD[40+rsi]
274 mov r13,r11
275 mov r14,rdx
276 bswap r12
277 ror r13,23
278 mov rdi,rax
279
280 xor r13,r11
281 ror r14,5
282 xor rdi,rbx
283
284 mov QWORD[40+rsp],r12
285 xor r14,rdx
286 and rdi,r11
287
288 ror r13,4
289 add r12,rcx
290 xor rdi,rbx
291
292 ror r14,6
293 xor r13,r11
294 add r12,rdi
295
296 mov rdi,rdx
297 add r12,QWORD[rbp]
298 xor r14,rdx
299
300 xor rdi,r8
301 ror r13,14
302 mov rcx,r8
303
304 and r15,rdi
305 ror r14,28
306 add r12,r13
307
308 xor rcx,r15
309 add r10,r12
310 add rcx,r12
311
312 lea rbp,[24+rbp]
313 add rcx,r14
314 mov r12,QWORD[48+rsi]
315 mov r13,r10
316 mov r14,rcx
317 bswap r12
318 ror r13,23
319 mov r15,r11
320
321 xor r13,r10
322 ror r14,5
323 xor r15,rax
324
325 mov QWORD[48+rsp],r12
326 xor r14,rcx
327 and r15,r10
328
329 ror r13,4
330 add r12,rbx
331 xor r15,rax
332
333 ror r14,6
334 xor r13,r10
335 add r12,r15
336
337 mov r15,rcx
338 add r12,QWORD[rbp]
339 xor r14,rcx
340
341 xor r15,rdx
342 ror r13,14
343 mov rbx,rdx
344
345 and rdi,r15
346 ror r14,28
347 add r12,r13
348
349 xor rbx,rdi
350 add r9,r12
351 add rbx,r12
352
353 lea rbp,[8+rbp]
354 add rbx,r14
355 mov r12,QWORD[56+rsi]
356 mov r13,r9
357 mov r14,rbx
358 bswap r12
359 ror r13,23
360 mov rdi,r10
361
362 xor r13,r9
363 ror r14,5
364 xor rdi,r11
365
366 mov QWORD[56+rsp],r12
367 xor r14,rbx
368 and rdi,r9
369
370 ror r13,4
371 add r12,rax
372 xor rdi,r11
373
374 ror r14,6
375 xor r13,r9
376 add r12,rdi
377
378 mov rdi,rbx
379 add r12,QWORD[rbp]
380 xor r14,rbx
381
382 xor rdi,rcx
383 ror r13,14
384 mov rax,rcx
385
386 and r15,rdi
387 ror r14,28
388 add r12,r13
389
390 xor rax,r15
391 add r8,r12
392 add rax,r12
393
394 lea rbp,[24+rbp]
395 add rax,r14
396 mov r12,QWORD[64+rsi]
397 mov r13,r8
398 mov r14,rax
399 bswap r12
400 ror r13,23
401 mov r15,r9
402
403 xor r13,r8
404 ror r14,5
405 xor r15,r10
406
407 mov QWORD[64+rsp],r12
408 xor r14,rax
409 and r15,r8
410
411 ror r13,4
412 add r12,r11
413 xor r15,r10
414
415 ror r14,6
416 xor r13,r8
417 add r12,r15
418
419 mov r15,rax
420 add r12,QWORD[rbp]
421 xor r14,rax
422
423 xor r15,rbx
424 ror r13,14
425 mov r11,rbx
426
427 and rdi,r15
428 ror r14,28
429 add r12,r13
430
431 xor r11,rdi
432 add rdx,r12
433 add r11,r12
434
435 lea rbp,[8+rbp]
436 add r11,r14
437 mov r12,QWORD[72+rsi]
438 mov r13,rdx
439 mov r14,r11
440 bswap r12
441 ror r13,23
442 mov rdi,r8
443
444 xor r13,rdx
445 ror r14,5
446 xor rdi,r9
447
448 mov QWORD[72+rsp],r12
449 xor r14,r11
450 and rdi,rdx
451
452 ror r13,4
453 add r12,r10
454 xor rdi,r9
455
456 ror r14,6
457 xor r13,rdx
458 add r12,rdi
459
460 mov rdi,r11
461 add r12,QWORD[rbp]
462 xor r14,r11
463
464 xor rdi,rax
465 ror r13,14
466 mov r10,rax
467
468 and r15,rdi
469 ror r14,28
470 add r12,r13
471
472 xor r10,r15
473 add rcx,r12
474 add r10,r12
475
476 lea rbp,[24+rbp]
477 add r10,r14
478 mov r12,QWORD[80+rsi]
479 mov r13,rcx
480 mov r14,r10
481 bswap r12
482 ror r13,23
483 mov r15,rdx
484
485 xor r13,rcx
486 ror r14,5
487 xor r15,r8
488
489 mov QWORD[80+rsp],r12
490 xor r14,r10
491 and r15,rcx
492
493 ror r13,4
494 add r12,r9
495 xor r15,r8
496
497 ror r14,6
498 xor r13,rcx
499 add r12,r15
500
501 mov r15,r10
502 add r12,QWORD[rbp]
503 xor r14,r10
504
505 xor r15,r11
506 ror r13,14
507 mov r9,r11
508
509 and rdi,r15
510 ror r14,28
511 add r12,r13
512
513 xor r9,rdi
514 add rbx,r12
515 add r9,r12
516
517 lea rbp,[8+rbp]
518 add r9,r14
519 mov r12,QWORD[88+rsi]
520 mov r13,rbx
521 mov r14,r9
522 bswap r12
523 ror r13,23
524 mov rdi,rcx
525
526 xor r13,rbx
527 ror r14,5
528 xor rdi,rdx
529
530 mov QWORD[88+rsp],r12
531 xor r14,r9
532 and rdi,rbx
533
534 ror r13,4
535 add r12,r8
536 xor rdi,rdx
537
538 ror r14,6
539 xor r13,rbx
540 add r12,rdi
541
542 mov rdi,r9
543 add r12,QWORD[rbp]
544 xor r14,r9
545
546 xor rdi,r10
547 ror r13,14
548 mov r8,r10
549
550 and r15,rdi
551 ror r14,28
552 add r12,r13
553
554 xor r8,r15
555 add rax,r12
556 add r8,r12
557
558 lea rbp,[24+rbp]
559 add r8,r14
560 mov r12,QWORD[96+rsi]
561 mov r13,rax
562 mov r14,r8
563 bswap r12
564 ror r13,23
565 mov r15,rbx
566
567 xor r13,rax
568 ror r14,5
569 xor r15,rcx
570
571 mov QWORD[96+rsp],r12
572 xor r14,r8
573 and r15,rax
574
575 ror r13,4
576 add r12,rdx
577 xor r15,rcx
578
579 ror r14,6
580 xor r13,rax
581 add r12,r15
582
583 mov r15,r8
584 add r12,QWORD[rbp]
585 xor r14,r8
586
587 xor r15,r9
588 ror r13,14
589 mov rdx,r9
590
591 and rdi,r15
592 ror r14,28
593 add r12,r13
594
595 xor rdx,rdi
596 add r11,r12
597 add rdx,r12
598
599 lea rbp,[8+rbp]
600 add rdx,r14
601 mov r12,QWORD[104+rsi]
602 mov r13,r11
603 mov r14,rdx
604 bswap r12
605 ror r13,23
606 mov rdi,rax
607
608 xor r13,r11
609 ror r14,5
610 xor rdi,rbx
611
612 mov QWORD[104+rsp],r12
613 xor r14,rdx
614 and rdi,r11
615
616 ror r13,4
617 add r12,rcx
618 xor rdi,rbx
619
620 ror r14,6
621 xor r13,r11
622 add r12,rdi
623
624 mov rdi,rdx
625 add r12,QWORD[rbp]
626 xor r14,rdx
627
628 xor rdi,r8
629 ror r13,14
630 mov rcx,r8
631
632 and r15,rdi
633 ror r14,28
634 add r12,r13
635
636 xor rcx,r15
637 add r10,r12
638 add rcx,r12
639
640 lea rbp,[24+rbp]
641 add rcx,r14
642 mov r12,QWORD[112+rsi]
643 mov r13,r10
644 mov r14,rcx
645 bswap r12
646 ror r13,23
647 mov r15,r11
648
649 xor r13,r10
650 ror r14,5
651 xor r15,rax
652
653 mov QWORD[112+rsp],r12
654 xor r14,rcx
655 and r15,r10
656
657 ror r13,4
658 add r12,rbx
659 xor r15,rax
660
661 ror r14,6
662 xor r13,r10
663 add r12,r15
664
665 mov r15,rcx
666 add r12,QWORD[rbp]
667 xor r14,rcx
668
669 xor r15,rdx
670 ror r13,14
671 mov rbx,rdx
672
673 and rdi,r15
674 ror r14,28
675 add r12,r13
676
677 xor rbx,rdi
678 add r9,r12
679 add rbx,r12
680
681 lea rbp,[8+rbp]
682 add rbx,r14
683 mov r12,QWORD[120+rsi]
684 mov r13,r9
685 mov r14,rbx
686 bswap r12
687 ror r13,23
688 mov rdi,r10
689
690 xor r13,r9
691 ror r14,5
692 xor rdi,r11
693
694 mov QWORD[120+rsp],r12
695 xor r14,rbx
696 and rdi,r9
697
698 ror r13,4
699 add r12,rax
700 xor rdi,r11
701
702 ror r14,6
703 xor r13,r9
704 add r12,rdi
705
706 mov rdi,rbx
707 add r12,QWORD[rbp]
708 xor r14,rbx
709
710 xor rdi,rcx
711 ror r13,14
712 mov rax,rcx
713
714 and r15,rdi
715 ror r14,28
716 add r12,r13
717
718 xor rax,r15
719 add r8,r12
720 add rax,r12
721
722 lea rbp,[24+rbp]
723 jmp NEAR $L$rounds_16_xx
724ALIGN 16
725$L$rounds_16_xx:
726 mov r13,QWORD[8+rsp]
727 mov r15,QWORD[112+rsp]
728
729 mov r12,r13
730 ror r13,7
731 add rax,r14
732 mov r14,r15
733 ror r15,42
734
735 xor r13,r12
736 shr r12,7
737 ror r13,1
738 xor r15,r14
739 shr r14,6
740
741 ror r15,19
742 xor r12,r13
743 xor r15,r14
744 add r12,QWORD[72+rsp]
745
746 add r12,QWORD[rsp]
747 mov r13,r8
748 add r12,r15
749 mov r14,rax
750 ror r13,23
751 mov r15,r9
752
753 xor r13,r8
754 ror r14,5
755 xor r15,r10
756
757 mov QWORD[rsp],r12
758 xor r14,rax
759 and r15,r8
760
761 ror r13,4
762 add r12,r11
763 xor r15,r10
764
765 ror r14,6
766 xor r13,r8
767 add r12,r15
768
769 mov r15,rax
770 add r12,QWORD[rbp]
771 xor r14,rax
772
773 xor r15,rbx
774 ror r13,14
775 mov r11,rbx
776
777 and rdi,r15
778 ror r14,28
779 add r12,r13
780
781 xor r11,rdi
782 add rdx,r12
783 add r11,r12
784
785 lea rbp,[8+rbp]
786 mov r13,QWORD[16+rsp]
787 mov rdi,QWORD[120+rsp]
788
789 mov r12,r13
790 ror r13,7
791 add r11,r14
792 mov r14,rdi
793 ror rdi,42
794
795 xor r13,r12
796 shr r12,7
797 ror r13,1
798 xor rdi,r14
799 shr r14,6
800
801 ror rdi,19
802 xor r12,r13
803 xor rdi,r14
804 add r12,QWORD[80+rsp]
805
806 add r12,QWORD[8+rsp]
807 mov r13,rdx
808 add r12,rdi
809 mov r14,r11
810 ror r13,23
811 mov rdi,r8
812
813 xor r13,rdx
814 ror r14,5
815 xor rdi,r9
816
817 mov QWORD[8+rsp],r12
818 xor r14,r11
819 and rdi,rdx
820
821 ror r13,4
822 add r12,r10
823 xor rdi,r9
824
825 ror r14,6
826 xor r13,rdx
827 add r12,rdi
828
829 mov rdi,r11
830 add r12,QWORD[rbp]
831 xor r14,r11
832
833 xor rdi,rax
834 ror r13,14
835 mov r10,rax
836
837 and r15,rdi
838 ror r14,28
839 add r12,r13
840
841 xor r10,r15
842 add rcx,r12
843 add r10,r12
844
845 lea rbp,[24+rbp]
846 mov r13,QWORD[24+rsp]
847 mov r15,QWORD[rsp]
848
849 mov r12,r13
850 ror r13,7
851 add r10,r14
852 mov r14,r15
853 ror r15,42
854
855 xor r13,r12
856 shr r12,7
857 ror r13,1
858 xor r15,r14
859 shr r14,6
860
861 ror r15,19
862 xor r12,r13
863 xor r15,r14
864 add r12,QWORD[88+rsp]
865
866 add r12,QWORD[16+rsp]
867 mov r13,rcx
868 add r12,r15
869 mov r14,r10
870 ror r13,23
871 mov r15,rdx
872
873 xor r13,rcx
874 ror r14,5
875 xor r15,r8
876
877 mov QWORD[16+rsp],r12
878 xor r14,r10
879 and r15,rcx
880
881 ror r13,4
882 add r12,r9
883 xor r15,r8
884
885 ror r14,6
886 xor r13,rcx
887 add r12,r15
888
889 mov r15,r10
890 add r12,QWORD[rbp]
891 xor r14,r10
892
893 xor r15,r11
894 ror r13,14
895 mov r9,r11
896
897 and rdi,r15
898 ror r14,28
899 add r12,r13
900
901 xor r9,rdi
902 add rbx,r12
903 add r9,r12
904
905 lea rbp,[8+rbp]
906 mov r13,QWORD[32+rsp]
907 mov rdi,QWORD[8+rsp]
908
909 mov r12,r13
910 ror r13,7
911 add r9,r14
912 mov r14,rdi
913 ror rdi,42
914
915 xor r13,r12
916 shr r12,7
917 ror r13,1
918 xor rdi,r14
919 shr r14,6
920
921 ror rdi,19
922 xor r12,r13
923 xor rdi,r14
924 add r12,QWORD[96+rsp]
925
926 add r12,QWORD[24+rsp]
927 mov r13,rbx
928 add r12,rdi
929 mov r14,r9
930 ror r13,23
931 mov rdi,rcx
932
933 xor r13,rbx
934 ror r14,5
935 xor rdi,rdx
936
937 mov QWORD[24+rsp],r12
938 xor r14,r9
939 and rdi,rbx
940
941 ror r13,4
942 add r12,r8
943 xor rdi,rdx
944
945 ror r14,6
946 xor r13,rbx
947 add r12,rdi
948
949 mov rdi,r9
950 add r12,QWORD[rbp]
951 xor r14,r9
952
953 xor rdi,r10
954 ror r13,14
955 mov r8,r10
956
957 and r15,rdi
958 ror r14,28
959 add r12,r13
960
961 xor r8,r15
962 add rax,r12
963 add r8,r12
964
965 lea rbp,[24+rbp]
966 mov r13,QWORD[40+rsp]
967 mov r15,QWORD[16+rsp]
968
969 mov r12,r13
970 ror r13,7
971 add r8,r14
972 mov r14,r15
973 ror r15,42
974
975 xor r13,r12
976 shr r12,7
977 ror r13,1
978 xor r15,r14
979 shr r14,6
980
981 ror r15,19
982 xor r12,r13
983 xor r15,r14
984 add r12,QWORD[104+rsp]
985
986 add r12,QWORD[32+rsp]
987 mov r13,rax
988 add r12,r15
989 mov r14,r8
990 ror r13,23
991 mov r15,rbx
992
993 xor r13,rax
994 ror r14,5
995 xor r15,rcx
996
997 mov QWORD[32+rsp],r12
998 xor r14,r8
999 and r15,rax
1000
1001 ror r13,4
1002 add r12,rdx
1003 xor r15,rcx
1004
1005 ror r14,6
1006 xor r13,rax
1007 add r12,r15
1008
1009 mov r15,r8
1010 add r12,QWORD[rbp]
1011 xor r14,r8
1012
1013 xor r15,r9
1014 ror r13,14
1015 mov rdx,r9
1016
1017 and rdi,r15
1018 ror r14,28
1019 add r12,r13
1020
1021 xor rdx,rdi
1022 add r11,r12
1023 add rdx,r12
1024
1025 lea rbp,[8+rbp]
1026 mov r13,QWORD[48+rsp]
1027 mov rdi,QWORD[24+rsp]
1028
1029 mov r12,r13
1030 ror r13,7
1031 add rdx,r14
1032 mov r14,rdi
1033 ror rdi,42
1034
1035 xor r13,r12
1036 shr r12,7
1037 ror r13,1
1038 xor rdi,r14
1039 shr r14,6
1040
1041 ror rdi,19
1042 xor r12,r13
1043 xor rdi,r14
1044 add r12,QWORD[112+rsp]
1045
1046 add r12,QWORD[40+rsp]
1047 mov r13,r11
1048 add r12,rdi
1049 mov r14,rdx
1050 ror r13,23
1051 mov rdi,rax
1052
1053 xor r13,r11
1054 ror r14,5
1055 xor rdi,rbx
1056
1057 mov QWORD[40+rsp],r12
1058 xor r14,rdx
1059 and rdi,r11
1060
1061 ror r13,4
1062 add r12,rcx
1063 xor rdi,rbx
1064
1065 ror r14,6
1066 xor r13,r11
1067 add r12,rdi
1068
1069 mov rdi,rdx
1070 add r12,QWORD[rbp]
1071 xor r14,rdx
1072
1073 xor rdi,r8
1074 ror r13,14
1075 mov rcx,r8
1076
1077 and r15,rdi
1078 ror r14,28
1079 add r12,r13
1080
1081 xor rcx,r15
1082 add r10,r12
1083 add rcx,r12
1084
1085 lea rbp,[24+rbp]
1086 mov r13,QWORD[56+rsp]
1087 mov r15,QWORD[32+rsp]
1088
1089 mov r12,r13
1090 ror r13,7
1091 add rcx,r14
1092 mov r14,r15
1093 ror r15,42
1094
1095 xor r13,r12
1096 shr r12,7
1097 ror r13,1
1098 xor r15,r14
1099 shr r14,6
1100
1101 ror r15,19
1102 xor r12,r13
1103 xor r15,r14
1104 add r12,QWORD[120+rsp]
1105
1106 add r12,QWORD[48+rsp]
1107 mov r13,r10
1108 add r12,r15
1109 mov r14,rcx
1110 ror r13,23
1111 mov r15,r11
1112
1113 xor r13,r10
1114 ror r14,5
1115 xor r15,rax
1116
1117 mov QWORD[48+rsp],r12
1118 xor r14,rcx
1119 and r15,r10
1120
1121 ror r13,4
1122 add r12,rbx
1123 xor r15,rax
1124
1125 ror r14,6
1126 xor r13,r10
1127 add r12,r15
1128
1129 mov r15,rcx
1130 add r12,QWORD[rbp]
1131 xor r14,rcx
1132
1133 xor r15,rdx
1134 ror r13,14
1135 mov rbx,rdx
1136
1137 and rdi,r15
1138 ror r14,28
1139 add r12,r13
1140
1141 xor rbx,rdi
1142 add r9,r12
1143 add rbx,r12
1144
1145 lea rbp,[8+rbp]
1146 mov r13,QWORD[64+rsp]
1147 mov rdi,QWORD[40+rsp]
1148
1149 mov r12,r13
1150 ror r13,7
1151 add rbx,r14
1152 mov r14,rdi
1153 ror rdi,42
1154
1155 xor r13,r12
1156 shr r12,7
1157 ror r13,1
1158 xor rdi,r14
1159 shr r14,6
1160
1161 ror rdi,19
1162 xor r12,r13
1163 xor rdi,r14
1164 add r12,QWORD[rsp]
1165
1166 add r12,QWORD[56+rsp]
1167 mov r13,r9
1168 add r12,rdi
1169 mov r14,rbx
1170 ror r13,23
1171 mov rdi,r10
1172
1173 xor r13,r9
1174 ror r14,5
1175 xor rdi,r11
1176
1177 mov QWORD[56+rsp],r12
1178 xor r14,rbx
1179 and rdi,r9
1180
1181 ror r13,4
1182 add r12,rax
1183 xor rdi,r11
1184
1185 ror r14,6
1186 xor r13,r9
1187 add r12,rdi
1188
1189 mov rdi,rbx
1190 add r12,QWORD[rbp]
1191 xor r14,rbx
1192
1193 xor rdi,rcx
1194 ror r13,14
1195 mov rax,rcx
1196
1197 and r15,rdi
1198 ror r14,28
1199 add r12,r13
1200
1201 xor rax,r15
1202 add r8,r12
1203 add rax,r12
1204
1205 lea rbp,[24+rbp]
1206 mov r13,QWORD[72+rsp]
1207 mov r15,QWORD[48+rsp]
1208
1209 mov r12,r13
1210 ror r13,7
1211 add rax,r14
1212 mov r14,r15
1213 ror r15,42
1214
1215 xor r13,r12
1216 shr r12,7
1217 ror r13,1
1218 xor r15,r14
1219 shr r14,6
1220
1221 ror r15,19
1222 xor r12,r13
1223 xor r15,r14
1224 add r12,QWORD[8+rsp]
1225
1226 add r12,QWORD[64+rsp]
1227 mov r13,r8
1228 add r12,r15
1229 mov r14,rax
1230 ror r13,23
1231 mov r15,r9
1232
1233 xor r13,r8
1234 ror r14,5
1235 xor r15,r10
1236
1237 mov QWORD[64+rsp],r12
1238 xor r14,rax
1239 and r15,r8
1240
1241 ror r13,4
1242 add r12,r11
1243 xor r15,r10
1244
1245 ror r14,6
1246 xor r13,r8
1247 add r12,r15
1248
1249 mov r15,rax
1250 add r12,QWORD[rbp]
1251 xor r14,rax
1252
1253 xor r15,rbx
1254 ror r13,14
1255 mov r11,rbx
1256
1257 and rdi,r15
1258 ror r14,28
1259 add r12,r13
1260
1261 xor r11,rdi
1262 add rdx,r12
1263 add r11,r12
1264
1265 lea rbp,[8+rbp]
1266 mov r13,QWORD[80+rsp]
1267 mov rdi,QWORD[56+rsp]
1268
1269 mov r12,r13
1270 ror r13,7
1271 add r11,r14
1272 mov r14,rdi
1273 ror rdi,42
1274
1275 xor r13,r12
1276 shr r12,7
1277 ror r13,1
1278 xor rdi,r14
1279 shr r14,6
1280
1281 ror rdi,19
1282 xor r12,r13
1283 xor rdi,r14
1284 add r12,QWORD[16+rsp]
1285
1286 add r12,QWORD[72+rsp]
1287 mov r13,rdx
1288 add r12,rdi
1289 mov r14,r11
1290 ror r13,23
1291 mov rdi,r8
1292
1293 xor r13,rdx
1294 ror r14,5
1295 xor rdi,r9
1296
1297 mov QWORD[72+rsp],r12
1298 xor r14,r11
1299 and rdi,rdx
1300
1301 ror r13,4
1302 add r12,r10
1303 xor rdi,r9
1304
1305 ror r14,6
1306 xor r13,rdx
1307 add r12,rdi
1308
1309 mov rdi,r11
1310 add r12,QWORD[rbp]
1311 xor r14,r11
1312
1313 xor rdi,rax
1314 ror r13,14
1315 mov r10,rax
1316
1317 and r15,rdi
1318 ror r14,28
1319 add r12,r13
1320
1321 xor r10,r15
1322 add rcx,r12
1323 add r10,r12
1324
1325 lea rbp,[24+rbp]
1326 mov r13,QWORD[88+rsp]
1327 mov r15,QWORD[64+rsp]
1328
1329 mov r12,r13
1330 ror r13,7
1331 add r10,r14
1332 mov r14,r15
1333 ror r15,42
1334
1335 xor r13,r12
1336 shr r12,7
1337 ror r13,1
1338 xor r15,r14
1339 shr r14,6
1340
1341 ror r15,19
1342 xor r12,r13
1343 xor r15,r14
1344 add r12,QWORD[24+rsp]
1345
1346 add r12,QWORD[80+rsp]
1347 mov r13,rcx
1348 add r12,r15
1349 mov r14,r10
1350 ror r13,23
1351 mov r15,rdx
1352
1353 xor r13,rcx
1354 ror r14,5
1355 xor r15,r8
1356
1357 mov QWORD[80+rsp],r12
1358 xor r14,r10
1359 and r15,rcx
1360
1361 ror r13,4
1362 add r12,r9
1363 xor r15,r8
1364
1365 ror r14,6
1366 xor r13,rcx
1367 add r12,r15
1368
1369 mov r15,r10
1370 add r12,QWORD[rbp]
1371 xor r14,r10
1372
1373 xor r15,r11
1374 ror r13,14
1375 mov r9,r11
1376
1377 and rdi,r15
1378 ror r14,28
1379 add r12,r13
1380
1381 xor r9,rdi
1382 add rbx,r12
1383 add r9,r12
1384
1385 lea rbp,[8+rbp]
1386 mov r13,QWORD[96+rsp]
1387 mov rdi,QWORD[72+rsp]
1388
1389 mov r12,r13
1390 ror r13,7
1391 add r9,r14
1392 mov r14,rdi
1393 ror rdi,42
1394
1395 xor r13,r12
1396 shr r12,7
1397 ror r13,1
1398 xor rdi,r14
1399 shr r14,6
1400
1401 ror rdi,19
1402 xor r12,r13
1403 xor rdi,r14
1404 add r12,QWORD[32+rsp]
1405
1406 add r12,QWORD[88+rsp]
1407 mov r13,rbx
1408 add r12,rdi
1409 mov r14,r9
1410 ror r13,23
1411 mov rdi,rcx
1412
1413 xor r13,rbx
1414 ror r14,5
1415 xor rdi,rdx
1416
1417 mov QWORD[88+rsp],r12
1418 xor r14,r9
1419 and rdi,rbx
1420
1421 ror r13,4
1422 add r12,r8
1423 xor rdi,rdx
1424
1425 ror r14,6
1426 xor r13,rbx
1427 add r12,rdi
1428
1429 mov rdi,r9
1430 add r12,QWORD[rbp]
1431 xor r14,r9
1432
1433 xor rdi,r10
1434 ror r13,14
1435 mov r8,r10
1436
1437 and r15,rdi
1438 ror r14,28
1439 add r12,r13
1440
1441 xor r8,r15
1442 add rax,r12
1443 add r8,r12
1444
1445 lea rbp,[24+rbp]
1446 mov r13,QWORD[104+rsp]
1447 mov r15,QWORD[80+rsp]
1448
1449 mov r12,r13
1450 ror r13,7
1451 add r8,r14
1452 mov r14,r15
1453 ror r15,42
1454
1455 xor r13,r12
1456 shr r12,7
1457 ror r13,1
1458 xor r15,r14
1459 shr r14,6
1460
1461 ror r15,19
1462 xor r12,r13
1463 xor r15,r14
1464 add r12,QWORD[40+rsp]
1465
1466 add r12,QWORD[96+rsp]
1467 mov r13,rax
1468 add r12,r15
1469 mov r14,r8
1470 ror r13,23
1471 mov r15,rbx
1472
1473 xor r13,rax
1474 ror r14,5
1475 xor r15,rcx
1476
1477 mov QWORD[96+rsp],r12
1478 xor r14,r8
1479 and r15,rax
1480
1481 ror r13,4
1482 add r12,rdx
1483 xor r15,rcx
1484
1485 ror r14,6
1486 xor r13,rax
1487 add r12,r15
1488
1489 mov r15,r8
1490 add r12,QWORD[rbp]
1491 xor r14,r8
1492
1493 xor r15,r9
1494 ror r13,14
1495 mov rdx,r9
1496
1497 and rdi,r15
1498 ror r14,28
1499 add r12,r13
1500
1501 xor rdx,rdi
1502 add r11,r12
1503 add rdx,r12
1504
1505 lea rbp,[8+rbp]
1506 mov r13,QWORD[112+rsp]
1507 mov rdi,QWORD[88+rsp]
1508
1509 mov r12,r13
1510 ror r13,7
1511 add rdx,r14
1512 mov r14,rdi
1513 ror rdi,42
1514
1515 xor r13,r12
1516 shr r12,7
1517 ror r13,1
1518 xor rdi,r14
1519 shr r14,6
1520
1521 ror rdi,19
1522 xor r12,r13
1523 xor rdi,r14
1524 add r12,QWORD[48+rsp]
1525
1526 add r12,QWORD[104+rsp]
1527 mov r13,r11
1528 add r12,rdi
1529 mov r14,rdx
1530 ror r13,23
1531 mov rdi,rax
1532
1533 xor r13,r11
1534 ror r14,5
1535 xor rdi,rbx
1536
1537 mov QWORD[104+rsp],r12
1538 xor r14,rdx
1539 and rdi,r11
1540
1541 ror r13,4
1542 add r12,rcx
1543 xor rdi,rbx
1544
1545 ror r14,6
1546 xor r13,r11
1547 add r12,rdi
1548
1549 mov rdi,rdx
1550 add r12,QWORD[rbp]
1551 xor r14,rdx
1552
1553 xor rdi,r8
1554 ror r13,14
1555 mov rcx,r8
1556
1557 and r15,rdi
1558 ror r14,28
1559 add r12,r13
1560
1561 xor rcx,r15
1562 add r10,r12
1563 add rcx,r12
1564
1565 lea rbp,[24+rbp]
1566 mov r13,QWORD[120+rsp]
1567 mov r15,QWORD[96+rsp]
1568
1569 mov r12,r13
1570 ror r13,7
1571 add rcx,r14
1572 mov r14,r15
1573 ror r15,42
1574
1575 xor r13,r12
1576 shr r12,7
1577 ror r13,1
1578 xor r15,r14
1579 shr r14,6
1580
1581 ror r15,19
1582 xor r12,r13
1583 xor r15,r14
1584 add r12,QWORD[56+rsp]
1585
1586 add r12,QWORD[112+rsp]
1587 mov r13,r10
1588 add r12,r15
1589 mov r14,rcx
1590 ror r13,23
1591 mov r15,r11
1592
1593 xor r13,r10
1594 ror r14,5
1595 xor r15,rax
1596
1597 mov QWORD[112+rsp],r12
1598 xor r14,rcx
1599 and r15,r10
1600
1601 ror r13,4
1602 add r12,rbx
1603 xor r15,rax
1604
1605 ror r14,6
1606 xor r13,r10
1607 add r12,r15
1608
1609 mov r15,rcx
1610 add r12,QWORD[rbp]
1611 xor r14,rcx
1612
1613 xor r15,rdx
1614 ror r13,14
1615 mov rbx,rdx
1616
1617 and rdi,r15
1618 ror r14,28
1619 add r12,r13
1620
1621 xor rbx,rdi
1622 add r9,r12
1623 add rbx,r12
1624
1625 lea rbp,[8+rbp]
1626 mov r13,QWORD[rsp]
1627 mov rdi,QWORD[104+rsp]
1628
1629 mov r12,r13
1630 ror r13,7
1631 add rbx,r14
1632 mov r14,rdi
1633 ror rdi,42
1634
1635 xor r13,r12
1636 shr r12,7
1637 ror r13,1
1638 xor rdi,r14
1639 shr r14,6
1640
1641 ror rdi,19
1642 xor r12,r13
1643 xor rdi,r14
1644 add r12,QWORD[64+rsp]
1645
1646 add r12,QWORD[120+rsp]
1647 mov r13,r9
1648 add r12,rdi
1649 mov r14,rbx
1650 ror r13,23
1651 mov rdi,r10
1652
1653 xor r13,r9
1654 ror r14,5
1655 xor rdi,r11
1656
1657 mov QWORD[120+rsp],r12
1658 xor r14,rbx
1659 and rdi,r9
1660
1661 ror r13,4
1662 add r12,rax
1663 xor rdi,r11
1664
1665 ror r14,6
1666 xor r13,r9
1667 add r12,rdi
1668
1669 mov rdi,rbx
1670 add r12,QWORD[rbp]
1671 xor r14,rbx
1672
1673 xor rdi,rcx
1674 ror r13,14
1675 mov rax,rcx
1676
1677 and r15,rdi
1678 ror r14,28
1679 add r12,r13
1680
1681 xor rax,r15
1682 add r8,r12
1683 add rax,r12
1684
1685 lea rbp,[24+rbp]
1686 cmp BYTE[7+rbp],0
1687 jnz NEAR $L$rounds_16_xx
1688
1689 mov rdi,QWORD[((128+0))+rsp]
1690 add rax,r14
1691 lea rsi,[128+rsi]
1692
1693 add rax,QWORD[rdi]
1694 add rbx,QWORD[8+rdi]
1695 add rcx,QWORD[16+rdi]
1696 add rdx,QWORD[24+rdi]
1697 add r8,QWORD[32+rdi]
1698 add r9,QWORD[40+rdi]
1699 add r10,QWORD[48+rdi]
1700 add r11,QWORD[56+rdi]
1701
1702 cmp rsi,QWORD[((128+16))+rsp]
1703
1704 mov QWORD[rdi],rax
1705 mov QWORD[8+rdi],rbx
1706 mov QWORD[16+rdi],rcx
1707 mov QWORD[24+rdi],rdx
1708 mov QWORD[32+rdi],r8
1709 mov QWORD[40+rdi],r9
1710 mov QWORD[48+rdi],r10
1711 mov QWORD[56+rdi],r11
1712 jb NEAR $L$loop
1713
1714 mov rsi,QWORD[((128+24))+rsp]
1715 mov r15,QWORD[rsi]
1716 mov r14,QWORD[8+rsi]
1717 mov r13,QWORD[16+rsi]
1718 mov r12,QWORD[24+rsi]
1719 mov rbp,QWORD[32+rsi]
1720 mov rbx,QWORD[40+rsi]
1721 lea rsp,[48+rsi]
1722$L$epilogue:
1723 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1724 mov rsi,QWORD[16+rsp]
1725 DB 0F3h,0C3h ;repret
1726$L$SEH_end_sha512_block_data_order:
1727ALIGN 64
1728
1729K512:
1730 DQ 0x428a2f98d728ae22,0x7137449123ef65cd
1731 DQ 0x428a2f98d728ae22,0x7137449123ef65cd
1732 DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1733 DQ 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
1734 DQ 0x3956c25bf348b538,0x59f111f1b605d019
1735 DQ 0x3956c25bf348b538,0x59f111f1b605d019
1736 DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118
1737 DQ 0x923f82a4af194f9b,0xab1c5ed5da6d8118
1738 DQ 0xd807aa98a3030242,0x12835b0145706fbe
1739 DQ 0xd807aa98a3030242,0x12835b0145706fbe
1740 DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1741 DQ 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
1742 DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1
1743 DQ 0x72be5d74f27b896f,0x80deb1fe3b1696b1
1744 DQ 0x9bdc06a725c71235,0xc19bf174cf692694
1745 DQ 0x9bdc06a725c71235,0xc19bf174cf692694
1746 DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3
1747 DQ 0xe49b69c19ef14ad2,0xefbe4786384f25e3
1748 DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1749 DQ 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
1750 DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483
1751 DQ 0x2de92c6f592b0275,0x4a7484aa6ea6e483
1752 DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1753 DQ 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
1754 DQ 0x983e5152ee66dfab,0xa831c66d2db43210
1755 DQ 0x983e5152ee66dfab,0xa831c66d2db43210
1756 DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4
1757 DQ 0xb00327c898fb213f,0xbf597fc7beef0ee4
1758 DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725
1759 DQ 0xc6e00bf33da88fc2,0xd5a79147930aa725
1760 DQ 0x06ca6351e003826f,0x142929670a0e6e70
1761 DQ 0x06ca6351e003826f,0x142929670a0e6e70
1762 DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926
1763 DQ 0x27b70a8546d22ffc,0x2e1b21385c26c926
1764 DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1765 DQ 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
1766 DQ 0x650a73548baf63de,0x766a0abb3c77b2a8
1767 DQ 0x650a73548baf63de,0x766a0abb3c77b2a8
1768 DQ 0x81c2c92e47edaee6,0x92722c851482353b
1769 DQ 0x81c2c92e47edaee6,0x92722c851482353b
1770 DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001
1771 DQ 0xa2bfe8a14cf10364,0xa81a664bbc423001
1772 DQ 0xc24b8b70d0f89791,0xc76c51a30654be30
1773 DQ 0xc24b8b70d0f89791,0xc76c51a30654be30
1774 DQ 0xd192e819d6ef5218,0xd69906245565a910
1775 DQ 0xd192e819d6ef5218,0xd69906245565a910
1776 DQ 0xf40e35855771202a,0x106aa07032bbd1b8
1777 DQ 0xf40e35855771202a,0x106aa07032bbd1b8
1778 DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53
1779 DQ 0x19a4c116b8d2d0c8,0x1e376c085141ab53
1780 DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1781 DQ 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
1782 DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1783 DQ 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
1784 DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1785 DQ 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
1786 DQ 0x748f82ee5defb2fc,0x78a5636f43172f60
1787 DQ 0x748f82ee5defb2fc,0x78a5636f43172f60
1788 DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec
1789 DQ 0x84c87814a1f0ab72,0x8cc702081a6439ec
1790 DQ 0x90befffa23631e28,0xa4506cebde82bde9
1791 DQ 0x90befffa23631e28,0xa4506cebde82bde9
1792 DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b
1793 DQ 0xbef9a3f7b2c67915,0xc67178f2e372532b
1794 DQ 0xca273eceea26619c,0xd186b8c721c0c207
1795 DQ 0xca273eceea26619c,0xd186b8c721c0c207
1796 DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1797 DQ 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
1798 DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6
1799 DQ 0x06f067aa72176fba,0x0a637dc5a2c898a6
1800 DQ 0x113f9804bef90dae,0x1b710b35131c471b
1801 DQ 0x113f9804bef90dae,0x1b710b35131c471b
1802 DQ 0x28db77f523047d84,0x32caab7b40c72493
1803 DQ 0x28db77f523047d84,0x32caab7b40c72493
1804 DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1805 DQ 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
1806 DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1807 DQ 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
1808 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1809 DQ 0x5fcb6fab3ad6faec,0x6c44198c4a475817
1810
1811 DQ 0x0001020304050607,0x08090a0b0c0d0e0f
1812 DQ 0x0001020304050607,0x08090a0b0c0d0e0f
1813DB 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
1814DB 110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
1815DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
1816DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
1817DB 111,114,103,62,0
1818
1819ALIGN 64
1820sha512_block_data_order_xop:
1821 mov QWORD[8+rsp],rdi ;WIN64 prologue
1822 mov QWORD[16+rsp],rsi
1823 mov rax,rsp
1824$L$SEH_begin_sha512_block_data_order_xop:
1825 mov rdi,rcx
1826 mov rsi,rdx
1827 mov rdx,r8
1828
1829
1830$L$xop_shortcut:
1831 push rbx
1832 push rbp
1833 push r12
1834 push r13
1835 push r14
1836 push r15
1837 mov r11,rsp
1838 shl rdx,4
1839 sub rsp,256
1840 lea rdx,[rdx*8+rsi]
1841 and rsp,-64
1842 mov QWORD[((128+0))+rsp],rdi
1843 mov QWORD[((128+8))+rsp],rsi
1844 mov QWORD[((128+16))+rsp],rdx
1845 mov QWORD[((128+24))+rsp],r11
1846 movaps XMMWORD[(128+32)+rsp],xmm6
1847 movaps XMMWORD[(128+48)+rsp],xmm7
1848 movaps XMMWORD[(128+64)+rsp],xmm8
1849 movaps XMMWORD[(128+80)+rsp],xmm9
1850 movaps XMMWORD[(128+96)+rsp],xmm10
1851 movaps XMMWORD[(128+112)+rsp],xmm11
1852$L$prologue_xop:
1853
1854 vzeroupper
1855 mov rax,QWORD[rdi]
1856 mov rbx,QWORD[8+rdi]
1857 mov rcx,QWORD[16+rdi]
1858 mov rdx,QWORD[24+rdi]
1859 mov r8,QWORD[32+rdi]
1860 mov r9,QWORD[40+rdi]
1861 mov r10,QWORD[48+rdi]
1862 mov r11,QWORD[56+rdi]
1863 jmp NEAR $L$loop_xop
1864ALIGN 16
1865$L$loop_xop:
1866 vmovdqa xmm11,XMMWORD[((K512+1280))]
1867 vmovdqu xmm0,XMMWORD[rsi]
1868 lea rbp,[((K512+128))]
1869 vmovdqu xmm1,XMMWORD[16+rsi]
1870 vmovdqu xmm2,XMMWORD[32+rsi]
1871 vpshufb xmm0,xmm0,xmm11
1872 vmovdqu xmm3,XMMWORD[48+rsi]
1873 vpshufb xmm1,xmm1,xmm11
1874 vmovdqu xmm4,XMMWORD[64+rsi]
1875 vpshufb xmm2,xmm2,xmm11
1876 vmovdqu xmm5,XMMWORD[80+rsi]
1877 vpshufb xmm3,xmm3,xmm11
1878 vmovdqu xmm6,XMMWORD[96+rsi]
1879 vpshufb xmm4,xmm4,xmm11
1880 vmovdqu xmm7,XMMWORD[112+rsi]
1881 vpshufb xmm5,xmm5,xmm11
1882 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]
1883 vpshufb xmm6,xmm6,xmm11
1884 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]
1885 vpshufb xmm7,xmm7,xmm11
1886 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
1887 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]
1888 vmovdqa XMMWORD[rsp],xmm8
1889 vpaddq xmm8,xmm4,XMMWORD[rbp]
1890 vmovdqa XMMWORD[16+rsp],xmm9
1891 vpaddq xmm9,xmm5,XMMWORD[32+rbp]
1892 vmovdqa XMMWORD[32+rsp],xmm10
1893 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
1894 vmovdqa XMMWORD[48+rsp],xmm11
1895 vpaddq xmm11,xmm7,XMMWORD[96+rbp]
1896 vmovdqa XMMWORD[64+rsp],xmm8
1897 mov r14,rax
1898 vmovdqa XMMWORD[80+rsp],xmm9
1899 mov rdi,rbx
1900 vmovdqa XMMWORD[96+rsp],xmm10
1901 xor rdi,rcx
1902 vmovdqa XMMWORD[112+rsp],xmm11
1903 mov r13,r8
1904 jmp NEAR $L$xop_00_47
1905
1906ALIGN 16
1907$L$xop_00_47:
1908 add rbp,256
1909 vpalignr xmm8,xmm1,xmm0,8
1910 ror r13,23
1911 mov rax,r14
1912 vpalignr xmm11,xmm5,xmm4,8
1913 mov r12,r9
1914 ror r14,5
1915DB 143,72,120,195,200,56
1916 xor r13,r8
1917 xor r12,r10
1918 vpsrlq xmm8,xmm8,7
1919 ror r13,4
1920 xor r14,rax
1921 vpaddq xmm0,xmm0,xmm11
1922 and r12,r8
1923 xor r13,r8
1924 add r11,QWORD[rsp]
1925 mov r15,rax
1926DB 143,72,120,195,209,7
1927 xor r12,r10
1928 ror r14,6
1929 vpxor xmm8,xmm8,xmm9
1930 xor r15,rbx
1931 add r11,r12
1932 ror r13,14
1933 and rdi,r15
1934DB 143,104,120,195,223,3
1935 xor r14,rax
1936 add r11,r13
1937 vpxor xmm8,xmm8,xmm10
1938 xor rdi,rbx
1939 ror r14,28
1940 vpsrlq xmm10,xmm7,6
1941 add rdx,r11
1942 add r11,rdi
1943 vpaddq xmm0,xmm0,xmm8
1944 mov r13,rdx
1945 add r14,r11
1946DB 143,72,120,195,203,42
1947 ror r13,23
1948 mov r11,r14
1949 vpxor xmm11,xmm11,xmm10
1950 mov r12,r8
1951 ror r14,5
1952 xor r13,rdx
1953 xor r12,r9
1954 vpxor xmm11,xmm11,xmm9
1955 ror r13,4
1956 xor r14,r11
1957 and r12,rdx
1958 xor r13,rdx
1959 vpaddq xmm0,xmm0,xmm11
1960 add r10,QWORD[8+rsp]
1961 mov rdi,r11
1962 xor r12,r9
1963 ror r14,6
1964 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]
1965 xor rdi,rax
1966 add r10,r12
1967 ror r13,14
1968 and r15,rdi
1969 xor r14,r11
1970 add r10,r13
1971 xor r15,rax
1972 ror r14,28
1973 add rcx,r10
1974 add r10,r15
1975 mov r13,rcx
1976 add r14,r10
1977 vmovdqa XMMWORD[rsp],xmm10
1978 vpalignr xmm8,xmm2,xmm1,8
1979 ror r13,23
1980 mov r10,r14
1981 vpalignr xmm11,xmm6,xmm5,8
1982 mov r12,rdx
1983 ror r14,5
1984DB 143,72,120,195,200,56
1985 xor r13,rcx
1986 xor r12,r8
1987 vpsrlq xmm8,xmm8,7
1988 ror r13,4
1989 xor r14,r10
1990 vpaddq xmm1,xmm1,xmm11
1991 and r12,rcx
1992 xor r13,rcx
1993 add r9,QWORD[16+rsp]
1994 mov r15,r10
1995DB 143,72,120,195,209,7
1996 xor r12,r8
1997 ror r14,6
1998 vpxor xmm8,xmm8,xmm9
1999 xor r15,r11
2000 add r9,r12
2001 ror r13,14
2002 and rdi,r15
2003DB 143,104,120,195,216,3
2004 xor r14,r10
2005 add r9,r13
2006 vpxor xmm8,xmm8,xmm10
2007 xor rdi,r11
2008 ror r14,28
2009 vpsrlq xmm10,xmm0,6
2010 add rbx,r9
2011 add r9,rdi
2012 vpaddq xmm1,xmm1,xmm8
2013 mov r13,rbx
2014 add r14,r9
2015DB 143,72,120,195,203,42
2016 ror r13,23
2017 mov r9,r14
2018 vpxor xmm11,xmm11,xmm10
2019 mov r12,rcx
2020 ror r14,5
2021 xor r13,rbx
2022 xor r12,rdx
2023 vpxor xmm11,xmm11,xmm9
2024 ror r13,4
2025 xor r14,r9
2026 and r12,rbx
2027 xor r13,rbx
2028 vpaddq xmm1,xmm1,xmm11
2029 add r8,QWORD[24+rsp]
2030 mov rdi,r9
2031 xor r12,rdx
2032 ror r14,6
2033 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]
2034 xor rdi,r10
2035 add r8,r12
2036 ror r13,14
2037 and r15,rdi
2038 xor r14,r9
2039 add r8,r13
2040 xor r15,r10
2041 ror r14,28
2042 add rax,r8
2043 add r8,r15
2044 mov r13,rax
2045 add r14,r8
2046 vmovdqa XMMWORD[16+rsp],xmm10
2047 vpalignr xmm8,xmm3,xmm2,8
2048 ror r13,23
2049 mov r8,r14
2050 vpalignr xmm11,xmm7,xmm6,8
2051 mov r12,rbx
2052 ror r14,5
2053DB 143,72,120,195,200,56
2054 xor r13,rax
2055 xor r12,rcx
2056 vpsrlq xmm8,xmm8,7
2057 ror r13,4
2058 xor r14,r8
2059 vpaddq xmm2,xmm2,xmm11
2060 and r12,rax
2061 xor r13,rax
2062 add rdx,QWORD[32+rsp]
2063 mov r15,r8
2064DB 143,72,120,195,209,7
2065 xor r12,rcx
2066 ror r14,6
2067 vpxor xmm8,xmm8,xmm9
2068 xor r15,r9
2069 add rdx,r12
2070 ror r13,14
2071 and rdi,r15
2072DB 143,104,120,195,217,3
2073 xor r14,r8
2074 add rdx,r13
2075 vpxor xmm8,xmm8,xmm10
2076 xor rdi,r9
2077 ror r14,28
2078 vpsrlq xmm10,xmm1,6
2079 add r11,rdx
2080 add rdx,rdi
2081 vpaddq xmm2,xmm2,xmm8
2082 mov r13,r11
2083 add r14,rdx
2084DB 143,72,120,195,203,42
2085 ror r13,23
2086 mov rdx,r14
2087 vpxor xmm11,xmm11,xmm10
2088 mov r12,rax
2089 ror r14,5
2090 xor r13,r11
2091 xor r12,rbx
2092 vpxor xmm11,xmm11,xmm9
2093 ror r13,4
2094 xor r14,rdx
2095 and r12,r11
2096 xor r13,r11
2097 vpaddq xmm2,xmm2,xmm11
2098 add rcx,QWORD[40+rsp]
2099 mov rdi,rdx
2100 xor r12,rbx
2101 ror r14,6
2102 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
2103 xor rdi,r8
2104 add rcx,r12
2105 ror r13,14
2106 and r15,rdi
2107 xor r14,rdx
2108 add rcx,r13
2109 xor r15,r8
2110 ror r14,28
2111 add r10,rcx
2112 add rcx,r15
2113 mov r13,r10
2114 add r14,rcx
2115 vmovdqa XMMWORD[32+rsp],xmm10
2116 vpalignr xmm8,xmm4,xmm3,8
2117 ror r13,23
2118 mov rcx,r14
2119 vpalignr xmm11,xmm0,xmm7,8
2120 mov r12,r11
2121 ror r14,5
2122DB 143,72,120,195,200,56
2123 xor r13,r10
2124 xor r12,rax
2125 vpsrlq xmm8,xmm8,7
2126 ror r13,4
2127 xor r14,rcx
2128 vpaddq xmm3,xmm3,xmm11
2129 and r12,r10
2130 xor r13,r10
2131 add rbx,QWORD[48+rsp]
2132 mov r15,rcx
2133DB 143,72,120,195,209,7
2134 xor r12,rax
2135 ror r14,6
2136 vpxor xmm8,xmm8,xmm9
2137 xor r15,rdx
2138 add rbx,r12
2139 ror r13,14
2140 and rdi,r15
2141DB 143,104,120,195,218,3
2142 xor r14,rcx
2143 add rbx,r13
2144 vpxor xmm8,xmm8,xmm10
2145 xor rdi,rdx
2146 ror r14,28
2147 vpsrlq xmm10,xmm2,6
2148 add r9,rbx
2149 add rbx,rdi
2150 vpaddq xmm3,xmm3,xmm8
2151 mov r13,r9
2152 add r14,rbx
2153DB 143,72,120,195,203,42
2154 ror r13,23
2155 mov rbx,r14
2156 vpxor xmm11,xmm11,xmm10
2157 mov r12,r10
2158 ror r14,5
2159 xor r13,r9
2160 xor r12,r11
2161 vpxor xmm11,xmm11,xmm9
2162 ror r13,4
2163 xor r14,rbx
2164 and r12,r9
2165 xor r13,r9
2166 vpaddq xmm3,xmm3,xmm11
2167 add rax,QWORD[56+rsp]
2168 mov rdi,rbx
2169 xor r12,r11
2170 ror r14,6
2171 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]
2172 xor rdi,rcx
2173 add rax,r12
2174 ror r13,14
2175 and r15,rdi
2176 xor r14,rbx
2177 add rax,r13
2178 xor r15,rcx
2179 ror r14,28
2180 add r8,rax
2181 add rax,r15
2182 mov r13,r8
2183 add r14,rax
2184 vmovdqa XMMWORD[48+rsp],xmm10
2185 vpalignr xmm8,xmm5,xmm4,8
2186 ror r13,23
2187 mov rax,r14
2188 vpalignr xmm11,xmm1,xmm0,8
2189 mov r12,r9
2190 ror r14,5
2191DB 143,72,120,195,200,56
2192 xor r13,r8
2193 xor r12,r10
2194 vpsrlq xmm8,xmm8,7
2195 ror r13,4
2196 xor r14,rax
2197 vpaddq xmm4,xmm4,xmm11
2198 and r12,r8
2199 xor r13,r8
2200 add r11,QWORD[64+rsp]
2201 mov r15,rax
2202DB 143,72,120,195,209,7
2203 xor r12,r10
2204 ror r14,6
2205 vpxor xmm8,xmm8,xmm9
2206 xor r15,rbx
2207 add r11,r12
2208 ror r13,14
2209 and rdi,r15
2210DB 143,104,120,195,219,3
2211 xor r14,rax
2212 add r11,r13
2213 vpxor xmm8,xmm8,xmm10
2214 xor rdi,rbx
2215 ror r14,28
2216 vpsrlq xmm10,xmm3,6
2217 add rdx,r11
2218 add r11,rdi
2219 vpaddq xmm4,xmm4,xmm8
2220 mov r13,rdx
2221 add r14,r11
2222DB 143,72,120,195,203,42
2223 ror r13,23
2224 mov r11,r14
2225 vpxor xmm11,xmm11,xmm10
2226 mov r12,r8
2227 ror r14,5
2228 xor r13,rdx
2229 xor r12,r9
2230 vpxor xmm11,xmm11,xmm9
2231 ror r13,4
2232 xor r14,r11
2233 and r12,rdx
2234 xor r13,rdx
2235 vpaddq xmm4,xmm4,xmm11
2236 add r10,QWORD[72+rsp]
2237 mov rdi,r11
2238 xor r12,r9
2239 ror r14,6
2240 vpaddq xmm10,xmm4,XMMWORD[rbp]
2241 xor rdi,rax
2242 add r10,r12
2243 ror r13,14
2244 and r15,rdi
2245 xor r14,r11
2246 add r10,r13
2247 xor r15,rax
2248 ror r14,28
2249 add rcx,r10
2250 add r10,r15
2251 mov r13,rcx
2252 add r14,r10
2253 vmovdqa XMMWORD[64+rsp],xmm10
2254 vpalignr xmm8,xmm6,xmm5,8
2255 ror r13,23
2256 mov r10,r14
2257 vpalignr xmm11,xmm2,xmm1,8
2258 mov r12,rdx
2259 ror r14,5
2260DB 143,72,120,195,200,56
2261 xor r13,rcx
2262 xor r12,r8
2263 vpsrlq xmm8,xmm8,7
2264 ror r13,4
2265 xor r14,r10
2266 vpaddq xmm5,xmm5,xmm11
2267 and r12,rcx
2268 xor r13,rcx
2269 add r9,QWORD[80+rsp]
2270 mov r15,r10
2271DB 143,72,120,195,209,7
2272 xor r12,r8
2273 ror r14,6
2274 vpxor xmm8,xmm8,xmm9
2275 xor r15,r11
2276 add r9,r12
2277 ror r13,14
2278 and rdi,r15
2279DB 143,104,120,195,220,3
2280 xor r14,r10
2281 add r9,r13
2282 vpxor xmm8,xmm8,xmm10
2283 xor rdi,r11
2284 ror r14,28
2285 vpsrlq xmm10,xmm4,6
2286 add rbx,r9
2287 add r9,rdi
2288 vpaddq xmm5,xmm5,xmm8
2289 mov r13,rbx
2290 add r14,r9
2291DB 143,72,120,195,203,42
2292 ror r13,23
2293 mov r9,r14
2294 vpxor xmm11,xmm11,xmm10
2295 mov r12,rcx
2296 ror r14,5
2297 xor r13,rbx
2298 xor r12,rdx
2299 vpxor xmm11,xmm11,xmm9
2300 ror r13,4
2301 xor r14,r9
2302 and r12,rbx
2303 xor r13,rbx
2304 vpaddq xmm5,xmm5,xmm11
2305 add r8,QWORD[88+rsp]
2306 mov rdi,r9
2307 xor r12,rdx
2308 ror r14,6
2309 vpaddq xmm10,xmm5,XMMWORD[32+rbp]
2310 xor rdi,r10
2311 add r8,r12
2312 ror r13,14
2313 and r15,rdi
2314 xor r14,r9
2315 add r8,r13
2316 xor r15,r10
2317 ror r14,28
2318 add rax,r8
2319 add r8,r15
2320 mov r13,rax
2321 add r14,r8
2322 vmovdqa XMMWORD[80+rsp],xmm10
2323 vpalignr xmm8,xmm7,xmm6,8
2324 ror r13,23
2325 mov r8,r14
2326 vpalignr xmm11,xmm3,xmm2,8
2327 mov r12,rbx
2328 ror r14,5
2329DB 143,72,120,195,200,56
2330 xor r13,rax
2331 xor r12,rcx
2332 vpsrlq xmm8,xmm8,7
2333 ror r13,4
2334 xor r14,r8
2335 vpaddq xmm6,xmm6,xmm11
2336 and r12,rax
2337 xor r13,rax
2338 add rdx,QWORD[96+rsp]
2339 mov r15,r8
2340DB 143,72,120,195,209,7
2341 xor r12,rcx
2342 ror r14,6
2343 vpxor xmm8,xmm8,xmm9
2344 xor r15,r9
2345 add rdx,r12
2346 ror r13,14
2347 and rdi,r15
2348DB 143,104,120,195,221,3
2349 xor r14,r8
2350 add rdx,r13
2351 vpxor xmm8,xmm8,xmm10
2352 xor rdi,r9
2353 ror r14,28
2354 vpsrlq xmm10,xmm5,6
2355 add r11,rdx
2356 add rdx,rdi
2357 vpaddq xmm6,xmm6,xmm8
2358 mov r13,r11
2359 add r14,rdx
2360DB 143,72,120,195,203,42
2361 ror r13,23
2362 mov rdx,r14
2363 vpxor xmm11,xmm11,xmm10
2364 mov r12,rax
2365 ror r14,5
2366 xor r13,r11
2367 xor r12,rbx
2368 vpxor xmm11,xmm11,xmm9
2369 ror r13,4
2370 xor r14,rdx
2371 and r12,r11
2372 xor r13,r11
2373 vpaddq xmm6,xmm6,xmm11
2374 add rcx,QWORD[104+rsp]
2375 mov rdi,rdx
2376 xor r12,rbx
2377 ror r14,6
2378 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
2379 xor rdi,r8
2380 add rcx,r12
2381 ror r13,14
2382 and r15,rdi
2383 xor r14,rdx
2384 add rcx,r13
2385 xor r15,r8
2386 ror r14,28
2387 add r10,rcx
2388 add rcx,r15
2389 mov r13,r10
2390 add r14,rcx
2391 vmovdqa XMMWORD[96+rsp],xmm10
2392 vpalignr xmm8,xmm0,xmm7,8
2393 ror r13,23
2394 mov rcx,r14
2395 vpalignr xmm11,xmm4,xmm3,8
2396 mov r12,r11
2397 ror r14,5
2398DB 143,72,120,195,200,56
2399 xor r13,r10
2400 xor r12,rax
2401 vpsrlq xmm8,xmm8,7
2402 ror r13,4
2403 xor r14,rcx
2404 vpaddq xmm7,xmm7,xmm11
2405 and r12,r10
2406 xor r13,r10
2407 add rbx,QWORD[112+rsp]
2408 mov r15,rcx
2409DB 143,72,120,195,209,7
2410 xor r12,rax
2411 ror r14,6
2412 vpxor xmm8,xmm8,xmm9
2413 xor r15,rdx
2414 add rbx,r12
2415 ror r13,14
2416 and rdi,r15
2417DB 143,104,120,195,222,3
2418 xor r14,rcx
2419 add rbx,r13
2420 vpxor xmm8,xmm8,xmm10
2421 xor rdi,rdx
2422 ror r14,28
2423 vpsrlq xmm10,xmm6,6
2424 add r9,rbx
2425 add rbx,rdi
2426 vpaddq xmm7,xmm7,xmm8
2427 mov r13,r9
2428 add r14,rbx
2429DB 143,72,120,195,203,42
2430 ror r13,23
2431 mov rbx,r14
2432 vpxor xmm11,xmm11,xmm10
2433 mov r12,r10
2434 ror r14,5
2435 xor r13,r9
2436 xor r12,r11
2437 vpxor xmm11,xmm11,xmm9
2438 ror r13,4
2439 xor r14,rbx
2440 and r12,r9
2441 xor r13,r9
2442 vpaddq xmm7,xmm7,xmm11
2443 add rax,QWORD[120+rsp]
2444 mov rdi,rbx
2445 xor r12,r11
2446 ror r14,6
2447 vpaddq xmm10,xmm7,XMMWORD[96+rbp]
2448 xor rdi,rcx
2449 add rax,r12
2450 ror r13,14
2451 and r15,rdi
2452 xor r14,rbx
2453 add rax,r13
2454 xor r15,rcx
2455 ror r14,28
2456 add r8,rax
2457 add rax,r15
2458 mov r13,r8
2459 add r14,rax
2460 vmovdqa XMMWORD[112+rsp],xmm10
2461 cmp BYTE[135+rbp],0
2462 jne NEAR $L$xop_00_47
2463 ror r13,23
2464 mov rax,r14
2465 mov r12,r9
2466 ror r14,5
2467 xor r13,r8
2468 xor r12,r10
2469 ror r13,4
2470 xor r14,rax
2471 and r12,r8
2472 xor r13,r8
2473 add r11,QWORD[rsp]
2474 mov r15,rax
2475 xor r12,r10
2476 ror r14,6
2477 xor r15,rbx
2478 add r11,r12
2479 ror r13,14
2480 and rdi,r15
2481 xor r14,rax
2482 add r11,r13
2483 xor rdi,rbx
2484 ror r14,28
2485 add rdx,r11
2486 add r11,rdi
2487 mov r13,rdx
2488 add r14,r11
2489 ror r13,23
2490 mov r11,r14
2491 mov r12,r8
2492 ror r14,5
2493 xor r13,rdx
2494 xor r12,r9
2495 ror r13,4
2496 xor r14,r11
2497 and r12,rdx
2498 xor r13,rdx
2499 add r10,QWORD[8+rsp]
2500 mov rdi,r11
2501 xor r12,r9
2502 ror r14,6
2503 xor rdi,rax
2504 add r10,r12
2505 ror r13,14
2506 and r15,rdi
2507 xor r14,r11
2508 add r10,r13
2509 xor r15,rax
2510 ror r14,28
2511 add rcx,r10
2512 add r10,r15
2513 mov r13,rcx
2514 add r14,r10
2515 ror r13,23
2516 mov r10,r14
2517 mov r12,rdx
2518 ror r14,5
2519 xor r13,rcx
2520 xor r12,r8
2521 ror r13,4
2522 xor r14,r10
2523 and r12,rcx
2524 xor r13,rcx
2525 add r9,QWORD[16+rsp]
2526 mov r15,r10
2527 xor r12,r8
2528 ror r14,6
2529 xor r15,r11
2530 add r9,r12
2531 ror r13,14
2532 and rdi,r15
2533 xor r14,r10
2534 add r9,r13
2535 xor rdi,r11
2536 ror r14,28
2537 add rbx,r9
2538 add r9,rdi
2539 mov r13,rbx
2540 add r14,r9
2541 ror r13,23
2542 mov r9,r14
2543 mov r12,rcx
2544 ror r14,5
2545 xor r13,rbx
2546 xor r12,rdx
2547 ror r13,4
2548 xor r14,r9
2549 and r12,rbx
2550 xor r13,rbx
2551 add r8,QWORD[24+rsp]
2552 mov rdi,r9
2553 xor r12,rdx
2554 ror r14,6
2555 xor rdi,r10
2556 add r8,r12
2557 ror r13,14
2558 and r15,rdi
2559 xor r14,r9
2560 add r8,r13
2561 xor r15,r10
2562 ror r14,28
2563 add rax,r8
2564 add r8,r15
2565 mov r13,rax
2566 add r14,r8
2567 ror r13,23
2568 mov r8,r14
2569 mov r12,rbx
2570 ror r14,5
2571 xor r13,rax
2572 xor r12,rcx
2573 ror r13,4
2574 xor r14,r8
2575 and r12,rax
2576 xor r13,rax
2577 add rdx,QWORD[32+rsp]
2578 mov r15,r8
2579 xor r12,rcx
2580 ror r14,6
2581 xor r15,r9
2582 add rdx,r12
2583 ror r13,14
2584 and rdi,r15
2585 xor r14,r8
2586 add rdx,r13
2587 xor rdi,r9
2588 ror r14,28
2589 add r11,rdx
2590 add rdx,rdi
2591 mov r13,r11
2592 add r14,rdx
2593 ror r13,23
2594 mov rdx,r14
2595 mov r12,rax
2596 ror r14,5
2597 xor r13,r11
2598 xor r12,rbx
2599 ror r13,4
2600 xor r14,rdx
2601 and r12,r11
2602 xor r13,r11
2603 add rcx,QWORD[40+rsp]
2604 mov rdi,rdx
2605 xor r12,rbx
2606 ror r14,6
2607 xor rdi,r8
2608 add rcx,r12
2609 ror r13,14
2610 and r15,rdi
2611 xor r14,rdx
2612 add rcx,r13
2613 xor r15,r8
2614 ror r14,28
2615 add r10,rcx
2616 add rcx,r15
2617 mov r13,r10
2618 add r14,rcx
2619 ror r13,23
2620 mov rcx,r14
2621 mov r12,r11
2622 ror r14,5
2623 xor r13,r10
2624 xor r12,rax
2625 ror r13,4
2626 xor r14,rcx
2627 and r12,r10
2628 xor r13,r10
2629 add rbx,QWORD[48+rsp]
2630 mov r15,rcx
2631 xor r12,rax
2632 ror r14,6
2633 xor r15,rdx
2634 add rbx,r12
2635 ror r13,14
2636 and rdi,r15
2637 xor r14,rcx
2638 add rbx,r13
2639 xor rdi,rdx
2640 ror r14,28
2641 add r9,rbx
2642 add rbx,rdi
2643 mov r13,r9
2644 add r14,rbx
2645 ror r13,23
2646 mov rbx,r14
2647 mov r12,r10
2648 ror r14,5
2649 xor r13,r9
2650 xor r12,r11
2651 ror r13,4
2652 xor r14,rbx
2653 and r12,r9
2654 xor r13,r9
2655 add rax,QWORD[56+rsp]
2656 mov rdi,rbx
2657 xor r12,r11
2658 ror r14,6
2659 xor rdi,rcx
2660 add rax,r12
2661 ror r13,14
2662 and r15,rdi
2663 xor r14,rbx
2664 add rax,r13
2665 xor r15,rcx
2666 ror r14,28
2667 add r8,rax
2668 add rax,r15
2669 mov r13,r8
2670 add r14,rax
2671 ror r13,23
2672 mov rax,r14
2673 mov r12,r9
2674 ror r14,5
2675 xor r13,r8
2676 xor r12,r10
2677 ror r13,4
2678 xor r14,rax
2679 and r12,r8
2680 xor r13,r8
2681 add r11,QWORD[64+rsp]
2682 mov r15,rax
2683 xor r12,r10
2684 ror r14,6
2685 xor r15,rbx
2686 add r11,r12
2687 ror r13,14
2688 and rdi,r15
2689 xor r14,rax
2690 add r11,r13
2691 xor rdi,rbx
2692 ror r14,28
2693 add rdx,r11
2694 add r11,rdi
2695 mov r13,rdx
2696 add r14,r11
2697 ror r13,23
2698 mov r11,r14
2699 mov r12,r8
2700 ror r14,5
2701 xor r13,rdx
2702 xor r12,r9
2703 ror r13,4
2704 xor r14,r11
2705 and r12,rdx
2706 xor r13,rdx
2707 add r10,QWORD[72+rsp]
2708 mov rdi,r11
2709 xor r12,r9
2710 ror r14,6
2711 xor rdi,rax
2712 add r10,r12
2713 ror r13,14
2714 and r15,rdi
2715 xor r14,r11
2716 add r10,r13
2717 xor r15,rax
2718 ror r14,28
2719 add rcx,r10
2720 add r10,r15
2721 mov r13,rcx
2722 add r14,r10
2723 ror r13,23
2724 mov r10,r14
2725 mov r12,rdx
2726 ror r14,5
2727 xor r13,rcx
2728 xor r12,r8
2729 ror r13,4
2730 xor r14,r10
2731 and r12,rcx
2732 xor r13,rcx
2733 add r9,QWORD[80+rsp]
2734 mov r15,r10
2735 xor r12,r8
2736 ror r14,6
2737 xor r15,r11
2738 add r9,r12
2739 ror r13,14
2740 and rdi,r15
2741 xor r14,r10
2742 add r9,r13
2743 xor rdi,r11
2744 ror r14,28
2745 add rbx,r9
2746 add r9,rdi
2747 mov r13,rbx
2748 add r14,r9
2749 ror r13,23
2750 mov r9,r14
2751 mov r12,rcx
2752 ror r14,5
2753 xor r13,rbx
2754 xor r12,rdx
2755 ror r13,4
2756 xor r14,r9
2757 and r12,rbx
2758 xor r13,rbx
2759 add r8,QWORD[88+rsp]
2760 mov rdi,r9
2761 xor r12,rdx
2762 ror r14,6
2763 xor rdi,r10
2764 add r8,r12
2765 ror r13,14
2766 and r15,rdi
2767 xor r14,r9
2768 add r8,r13
2769 xor r15,r10
2770 ror r14,28
2771 add rax,r8
2772 add r8,r15
2773 mov r13,rax
2774 add r14,r8
2775 ror r13,23
2776 mov r8,r14
2777 mov r12,rbx
2778 ror r14,5
2779 xor r13,rax
2780 xor r12,rcx
2781 ror r13,4
2782 xor r14,r8
2783 and r12,rax
2784 xor r13,rax
2785 add rdx,QWORD[96+rsp]
2786 mov r15,r8
2787 xor r12,rcx
2788 ror r14,6
2789 xor r15,r9
2790 add rdx,r12
2791 ror r13,14
2792 and rdi,r15
2793 xor r14,r8
2794 add rdx,r13
2795 xor rdi,r9
2796 ror r14,28
2797 add r11,rdx
2798 add rdx,rdi
2799 mov r13,r11
2800 add r14,rdx
2801 ror r13,23
2802 mov rdx,r14
2803 mov r12,rax
2804 ror r14,5
2805 xor r13,r11
2806 xor r12,rbx
2807 ror r13,4
2808 xor r14,rdx
2809 and r12,r11
2810 xor r13,r11
2811 add rcx,QWORD[104+rsp]
2812 mov rdi,rdx
2813 xor r12,rbx
2814 ror r14,6
2815 xor rdi,r8
2816 add rcx,r12
2817 ror r13,14
2818 and r15,rdi
2819 xor r14,rdx
2820 add rcx,r13
2821 xor r15,r8
2822 ror r14,28
2823 add r10,rcx
2824 add rcx,r15
2825 mov r13,r10
2826 add r14,rcx
2827 ror r13,23
2828 mov rcx,r14
2829 mov r12,r11
2830 ror r14,5
2831 xor r13,r10
2832 xor r12,rax
2833 ror r13,4
2834 xor r14,rcx
2835 and r12,r10
2836 xor r13,r10
2837 add rbx,QWORD[112+rsp]
2838 mov r15,rcx
2839 xor r12,rax
2840 ror r14,6
2841 xor r15,rdx
2842 add rbx,r12
2843 ror r13,14
2844 and rdi,r15
2845 xor r14,rcx
2846 add rbx,r13
2847 xor rdi,rdx
2848 ror r14,28
2849 add r9,rbx
2850 add rbx,rdi
2851 mov r13,r9
2852 add r14,rbx
2853 ror r13,23
2854 mov rbx,r14
2855 mov r12,r10
2856 ror r14,5
2857 xor r13,r9
2858 xor r12,r11
2859 ror r13,4
2860 xor r14,rbx
2861 and r12,r9
2862 xor r13,r9
2863 add rax,QWORD[120+rsp]
2864 mov rdi,rbx
2865 xor r12,r11
2866 ror r14,6
2867 xor rdi,rcx
2868 add rax,r12
2869 ror r13,14
2870 and r15,rdi
2871 xor r14,rbx
2872 add rax,r13
2873 xor r15,rcx
2874 ror r14,28
2875 add r8,rax
2876 add rax,r15
2877 mov r13,r8
2878 add r14,rax
2879 mov rdi,QWORD[((128+0))+rsp]
2880 mov rax,r14
2881
2882 add rax,QWORD[rdi]
2883 lea rsi,[128+rsi]
2884 add rbx,QWORD[8+rdi]
2885 add rcx,QWORD[16+rdi]
2886 add rdx,QWORD[24+rdi]
2887 add r8,QWORD[32+rdi]
2888 add r9,QWORD[40+rdi]
2889 add r10,QWORD[48+rdi]
2890 add r11,QWORD[56+rdi]
2891
2892 cmp rsi,QWORD[((128+16))+rsp]
2893
2894 mov QWORD[rdi],rax
2895 mov QWORD[8+rdi],rbx
2896 mov QWORD[16+rdi],rcx
2897 mov QWORD[24+rdi],rdx
2898 mov QWORD[32+rdi],r8
2899 mov QWORD[40+rdi],r9
2900 mov QWORD[48+rdi],r10
2901 mov QWORD[56+rdi],r11
2902 jb NEAR $L$loop_xop
2903
2904 mov rsi,QWORD[((128+24))+rsp]
2905 vzeroupper
2906 movaps xmm6,XMMWORD[((128+32))+rsp]
2907 movaps xmm7,XMMWORD[((128+48))+rsp]
2908 movaps xmm8,XMMWORD[((128+64))+rsp]
2909 movaps xmm9,XMMWORD[((128+80))+rsp]
2910 movaps xmm10,XMMWORD[((128+96))+rsp]
2911 movaps xmm11,XMMWORD[((128+112))+rsp]
2912 mov r15,QWORD[rsi]
2913 mov r14,QWORD[8+rsi]
2914 mov r13,QWORD[16+rsi]
2915 mov r12,QWORD[24+rsi]
2916 mov rbp,QWORD[32+rsi]
2917 mov rbx,QWORD[40+rsi]
2918 lea rsp,[48+rsi]
2919$L$epilogue_xop:
2920 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2921 mov rsi,QWORD[16+rsp]
2922 DB 0F3h,0C3h ;repret
2923$L$SEH_end_sha512_block_data_order_xop:
2924
2925ALIGN 64
2926sha512_block_data_order_avx:
2927 mov QWORD[8+rsp],rdi ;WIN64 prologue
2928 mov QWORD[16+rsp],rsi
2929 mov rax,rsp
2930$L$SEH_begin_sha512_block_data_order_avx:
2931 mov rdi,rcx
2932 mov rsi,rdx
2933 mov rdx,r8
2934
2935
2936$L$avx_shortcut:
2937 push rbx
2938 push rbp
2939 push r12
2940 push r13
2941 push r14
2942 push r15
2943 mov r11,rsp
2944 shl rdx,4
2945 sub rsp,256
2946 lea rdx,[rdx*8+rsi]
2947 and rsp,-64
2948 mov QWORD[((128+0))+rsp],rdi
2949 mov QWORD[((128+8))+rsp],rsi
2950 mov QWORD[((128+16))+rsp],rdx
2951 mov QWORD[((128+24))+rsp],r11
2952 movaps XMMWORD[(128+32)+rsp],xmm6
2953 movaps XMMWORD[(128+48)+rsp],xmm7
2954 movaps XMMWORD[(128+64)+rsp],xmm8
2955 movaps XMMWORD[(128+80)+rsp],xmm9
2956 movaps XMMWORD[(128+96)+rsp],xmm10
2957 movaps XMMWORD[(128+112)+rsp],xmm11
2958$L$prologue_avx:
2959
2960 vzeroupper
2961 mov rax,QWORD[rdi]
2962 mov rbx,QWORD[8+rdi]
2963 mov rcx,QWORD[16+rdi]
2964 mov rdx,QWORD[24+rdi]
2965 mov r8,QWORD[32+rdi]
2966 mov r9,QWORD[40+rdi]
2967 mov r10,QWORD[48+rdi]
2968 mov r11,QWORD[56+rdi]
2969 jmp NEAR $L$loop_avx
2970ALIGN 16
2971$L$loop_avx:
2972 vmovdqa xmm11,XMMWORD[((K512+1280))]
2973 vmovdqu xmm0,XMMWORD[rsi]
2974 lea rbp,[((K512+128))]
2975 vmovdqu xmm1,XMMWORD[16+rsi]
2976 vmovdqu xmm2,XMMWORD[32+rsi]
2977 vpshufb xmm0,xmm0,xmm11
2978 vmovdqu xmm3,XMMWORD[48+rsi]
2979 vpshufb xmm1,xmm1,xmm11
2980 vmovdqu xmm4,XMMWORD[64+rsi]
2981 vpshufb xmm2,xmm2,xmm11
2982 vmovdqu xmm5,XMMWORD[80+rsi]
2983 vpshufb xmm3,xmm3,xmm11
2984 vmovdqu xmm6,XMMWORD[96+rsi]
2985 vpshufb xmm4,xmm4,xmm11
2986 vmovdqu xmm7,XMMWORD[112+rsi]
2987 vpshufb xmm5,xmm5,xmm11
2988 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]
2989 vpshufb xmm6,xmm6,xmm11
2990 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]
2991 vpshufb xmm7,xmm7,xmm11
2992 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
2993 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]
2994 vmovdqa XMMWORD[rsp],xmm8
2995 vpaddq xmm8,xmm4,XMMWORD[rbp]
2996 vmovdqa XMMWORD[16+rsp],xmm9
2997 vpaddq xmm9,xmm5,XMMWORD[32+rbp]
2998 vmovdqa XMMWORD[32+rsp],xmm10
2999 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
3000 vmovdqa XMMWORD[48+rsp],xmm11
3001 vpaddq xmm11,xmm7,XMMWORD[96+rbp]
3002 vmovdqa XMMWORD[64+rsp],xmm8
3003 mov r14,rax
3004 vmovdqa XMMWORD[80+rsp],xmm9
3005 mov rdi,rbx
3006 vmovdqa XMMWORD[96+rsp],xmm10
3007 xor rdi,rcx
3008 vmovdqa XMMWORD[112+rsp],xmm11
3009 mov r13,r8
3010 jmp NEAR $L$avx_00_47
3011
3012ALIGN 16
3013$L$avx_00_47:
3014 add rbp,256
3015 vpalignr xmm8,xmm1,xmm0,8
3016 shrd r13,r13,23
3017 mov rax,r14
3018 vpalignr xmm11,xmm5,xmm4,8
3019 mov r12,r9
3020 shrd r14,r14,5
3021 vpsrlq xmm10,xmm8,1
3022 xor r13,r8
3023 xor r12,r10
3024 vpaddq xmm0,xmm0,xmm11
3025 shrd r13,r13,4
3026 xor r14,rax
3027 vpsrlq xmm11,xmm8,7
3028 and r12,r8
3029 xor r13,r8
3030 vpsllq xmm9,xmm8,56
3031 add r11,QWORD[rsp]
3032 mov r15,rax
3033 vpxor xmm8,xmm11,xmm10
3034 xor r12,r10
3035 shrd r14,r14,6
3036 vpsrlq xmm10,xmm10,7
3037 xor r15,rbx
3038 add r11,r12
3039 vpxor xmm8,xmm8,xmm9
3040 shrd r13,r13,14
3041 and rdi,r15
3042 vpsllq xmm9,xmm9,7
3043 xor r14,rax
3044 add r11,r13
3045 vpxor xmm8,xmm8,xmm10
3046 xor rdi,rbx
3047 shrd r14,r14,28
3048 vpsrlq xmm11,xmm7,6
3049 add rdx,r11
3050 add r11,rdi
3051 vpxor xmm8,xmm8,xmm9
3052 mov r13,rdx
3053 add r14,r11
3054 vpsllq xmm10,xmm7,3
3055 shrd r13,r13,23
3056 mov r11,r14
3057 vpaddq xmm0,xmm0,xmm8
3058 mov r12,r8
3059 shrd r14,r14,5
3060 vpsrlq xmm9,xmm7,19
3061 xor r13,rdx
3062 xor r12,r9
3063 vpxor xmm11,xmm11,xmm10
3064 shrd r13,r13,4
3065 xor r14,r11
3066 vpsllq xmm10,xmm10,42
3067 and r12,rdx
3068 xor r13,rdx
3069 vpxor xmm11,xmm11,xmm9
3070 add r10,QWORD[8+rsp]
3071 mov rdi,r11
3072 vpsrlq xmm9,xmm9,42
3073 xor r12,r9
3074 shrd r14,r14,6
3075 vpxor xmm11,xmm11,xmm10
3076 xor rdi,rax
3077 add r10,r12
3078 vpxor xmm11,xmm11,xmm9
3079 shrd r13,r13,14
3080 and r15,rdi
3081 vpaddq xmm0,xmm0,xmm11
3082 xor r14,r11
3083 add r10,r13
3084 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]
3085 xor r15,rax
3086 shrd r14,r14,28
3087 add rcx,r10
3088 add r10,r15
3089 mov r13,rcx
3090 add r14,r10
3091 vmovdqa XMMWORD[rsp],xmm10
3092 vpalignr xmm8,xmm2,xmm1,8
3093 shrd r13,r13,23
3094 mov r10,r14
3095 vpalignr xmm11,xmm6,xmm5,8
3096 mov r12,rdx
3097 shrd r14,r14,5
3098 vpsrlq xmm10,xmm8,1
3099 xor r13,rcx
3100 xor r12,r8
3101 vpaddq xmm1,xmm1,xmm11
3102 shrd r13,r13,4
3103 xor r14,r10
3104 vpsrlq xmm11,xmm8,7
3105 and r12,rcx
3106 xor r13,rcx
3107 vpsllq xmm9,xmm8,56
3108 add r9,QWORD[16+rsp]
3109 mov r15,r10
3110 vpxor xmm8,xmm11,xmm10
3111 xor r12,r8
3112 shrd r14,r14,6
3113 vpsrlq xmm10,xmm10,7
3114 xor r15,r11
3115 add r9,r12
3116 vpxor xmm8,xmm8,xmm9
3117 shrd r13,r13,14
3118 and rdi,r15
3119 vpsllq xmm9,xmm9,7
3120 xor r14,r10
3121 add r9,r13
3122 vpxor xmm8,xmm8,xmm10
3123 xor rdi,r11
3124 shrd r14,r14,28
3125 vpsrlq xmm11,xmm0,6
3126 add rbx,r9
3127 add r9,rdi
3128 vpxor xmm8,xmm8,xmm9
3129 mov r13,rbx
3130 add r14,r9
3131 vpsllq xmm10,xmm0,3
3132 shrd r13,r13,23
3133 mov r9,r14
3134 vpaddq xmm1,xmm1,xmm8
3135 mov r12,rcx
3136 shrd r14,r14,5
3137 vpsrlq xmm9,xmm0,19
3138 xor r13,rbx
3139 xor r12,rdx
3140 vpxor xmm11,xmm11,xmm10
3141 shrd r13,r13,4
3142 xor r14,r9
3143 vpsllq xmm10,xmm10,42
3144 and r12,rbx
3145 xor r13,rbx
3146 vpxor xmm11,xmm11,xmm9
3147 add r8,QWORD[24+rsp]
3148 mov rdi,r9
3149 vpsrlq xmm9,xmm9,42
3150 xor r12,rdx
3151 shrd r14,r14,6
3152 vpxor xmm11,xmm11,xmm10
3153 xor rdi,r10
3154 add r8,r12
3155 vpxor xmm11,xmm11,xmm9
3156 shrd r13,r13,14
3157 and r15,rdi
3158 vpaddq xmm1,xmm1,xmm11
3159 xor r14,r9
3160 add r8,r13
3161 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]
3162 xor r15,r10
3163 shrd r14,r14,28
3164 add rax,r8
3165 add r8,r15
3166 mov r13,rax
3167 add r14,r8
3168 vmovdqa XMMWORD[16+rsp],xmm10
3169 vpalignr xmm8,xmm3,xmm2,8
3170 shrd r13,r13,23
3171 mov r8,r14
3172 vpalignr xmm11,xmm7,xmm6,8
3173 mov r12,rbx
3174 shrd r14,r14,5
3175 vpsrlq xmm10,xmm8,1
3176 xor r13,rax
3177 xor r12,rcx
3178 vpaddq xmm2,xmm2,xmm11
3179 shrd r13,r13,4
3180 xor r14,r8
3181 vpsrlq xmm11,xmm8,7
3182 and r12,rax
3183 xor r13,rax
3184 vpsllq xmm9,xmm8,56
3185 add rdx,QWORD[32+rsp]
3186 mov r15,r8
3187 vpxor xmm8,xmm11,xmm10
3188 xor r12,rcx
3189 shrd r14,r14,6
3190 vpsrlq xmm10,xmm10,7
3191 xor r15,r9
3192 add rdx,r12
3193 vpxor xmm8,xmm8,xmm9
3194 shrd r13,r13,14
3195 and rdi,r15
3196 vpsllq xmm9,xmm9,7
3197 xor r14,r8
3198 add rdx,r13
3199 vpxor xmm8,xmm8,xmm10
3200 xor rdi,r9
3201 shrd r14,r14,28
3202 vpsrlq xmm11,xmm1,6
3203 add r11,rdx
3204 add rdx,rdi
3205 vpxor xmm8,xmm8,xmm9
3206 mov r13,r11
3207 add r14,rdx
3208 vpsllq xmm10,xmm1,3
3209 shrd r13,r13,23
3210 mov rdx,r14
3211 vpaddq xmm2,xmm2,xmm8
3212 mov r12,rax
3213 shrd r14,r14,5
3214 vpsrlq xmm9,xmm1,19
3215 xor r13,r11
3216 xor r12,rbx
3217 vpxor xmm11,xmm11,xmm10
3218 shrd r13,r13,4
3219 xor r14,rdx
3220 vpsllq xmm10,xmm10,42
3221 and r12,r11
3222 xor r13,r11
3223 vpxor xmm11,xmm11,xmm9
3224 add rcx,QWORD[40+rsp]
3225 mov rdi,rdx
3226 vpsrlq xmm9,xmm9,42
3227 xor r12,rbx
3228 shrd r14,r14,6
3229 vpxor xmm11,xmm11,xmm10
3230 xor rdi,r8
3231 add rcx,r12
3232 vpxor xmm11,xmm11,xmm9
3233 shrd r13,r13,14
3234 and r15,rdi
3235 vpaddq xmm2,xmm2,xmm11
3236 xor r14,rdx
3237 add rcx,r13
3238 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]
3239 xor r15,r8
3240 shrd r14,r14,28
3241 add r10,rcx
3242 add rcx,r15
3243 mov r13,r10
3244 add r14,rcx
3245 vmovdqa XMMWORD[32+rsp],xmm10
3246 vpalignr xmm8,xmm4,xmm3,8
3247 shrd r13,r13,23
3248 mov rcx,r14
3249 vpalignr xmm11,xmm0,xmm7,8
3250 mov r12,r11
3251 shrd r14,r14,5
3252 vpsrlq xmm10,xmm8,1
3253 xor r13,r10
3254 xor r12,rax
3255 vpaddq xmm3,xmm3,xmm11
3256 shrd r13,r13,4
3257 xor r14,rcx
3258 vpsrlq xmm11,xmm8,7
3259 and r12,r10
3260 xor r13,r10
3261 vpsllq xmm9,xmm8,56
3262 add rbx,QWORD[48+rsp]
3263 mov r15,rcx
3264 vpxor xmm8,xmm11,xmm10
3265 xor r12,rax
3266 shrd r14,r14,6
3267 vpsrlq xmm10,xmm10,7
3268 xor r15,rdx
3269 add rbx,r12
3270 vpxor xmm8,xmm8,xmm9
3271 shrd r13,r13,14
3272 and rdi,r15
3273 vpsllq xmm9,xmm9,7
3274 xor r14,rcx
3275 add rbx,r13
3276 vpxor xmm8,xmm8,xmm10
3277 xor rdi,rdx
3278 shrd r14,r14,28
3279 vpsrlq xmm11,xmm2,6
3280 add r9,rbx
3281 add rbx,rdi
3282 vpxor xmm8,xmm8,xmm9
3283 mov r13,r9
3284 add r14,rbx
3285 vpsllq xmm10,xmm2,3
3286 shrd r13,r13,23
3287 mov rbx,r14
3288 vpaddq xmm3,xmm3,xmm8
3289 mov r12,r10
3290 shrd r14,r14,5
3291 vpsrlq xmm9,xmm2,19
3292 xor r13,r9
3293 xor r12,r11
3294 vpxor xmm11,xmm11,xmm10
3295 shrd r13,r13,4
3296 xor r14,rbx
3297 vpsllq xmm10,xmm10,42
3298 and r12,r9
3299 xor r13,r9
3300 vpxor xmm11,xmm11,xmm9
3301 add rax,QWORD[56+rsp]
3302 mov rdi,rbx
3303 vpsrlq xmm9,xmm9,42
3304 xor r12,r11
3305 shrd r14,r14,6
3306 vpxor xmm11,xmm11,xmm10
3307 xor rdi,rcx
3308 add rax,r12
3309 vpxor xmm11,xmm11,xmm9
3310 shrd r13,r13,14
3311 and r15,rdi
3312 vpaddq xmm3,xmm3,xmm11
3313 xor r14,rbx
3314 add rax,r13
3315 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]
3316 xor r15,rcx
3317 shrd r14,r14,28
3318 add r8,rax
3319 add rax,r15
3320 mov r13,r8
3321 add r14,rax
3322 vmovdqa XMMWORD[48+rsp],xmm10
3323 vpalignr xmm8,xmm5,xmm4,8
3324 shrd r13,r13,23
3325 mov rax,r14
3326 vpalignr xmm11,xmm1,xmm0,8
3327 mov r12,r9
3328 shrd r14,r14,5
3329 vpsrlq xmm10,xmm8,1
3330 xor r13,r8
3331 xor r12,r10
3332 vpaddq xmm4,xmm4,xmm11
3333 shrd r13,r13,4
3334 xor r14,rax
3335 vpsrlq xmm11,xmm8,7
3336 and r12,r8
3337 xor r13,r8
3338 vpsllq xmm9,xmm8,56
3339 add r11,QWORD[64+rsp]
3340 mov r15,rax
3341 vpxor xmm8,xmm11,xmm10
3342 xor r12,r10
3343 shrd r14,r14,6
3344 vpsrlq xmm10,xmm10,7
3345 xor r15,rbx
3346 add r11,r12
3347 vpxor xmm8,xmm8,xmm9
3348 shrd r13,r13,14
3349 and rdi,r15
3350 vpsllq xmm9,xmm9,7
3351 xor r14,rax
3352 add r11,r13
3353 vpxor xmm8,xmm8,xmm10
3354 xor rdi,rbx
3355 shrd r14,r14,28
3356 vpsrlq xmm11,xmm3,6
3357 add rdx,r11
3358 add r11,rdi
3359 vpxor xmm8,xmm8,xmm9
3360 mov r13,rdx
3361 add r14,r11
3362 vpsllq xmm10,xmm3,3
3363 shrd r13,r13,23
3364 mov r11,r14
3365 vpaddq xmm4,xmm4,xmm8
3366 mov r12,r8
3367 shrd r14,r14,5
3368 vpsrlq xmm9,xmm3,19
3369 xor r13,rdx
3370 xor r12,r9
3371 vpxor xmm11,xmm11,xmm10
3372 shrd r13,r13,4
3373 xor r14,r11
3374 vpsllq xmm10,xmm10,42
3375 and r12,rdx
3376 xor r13,rdx
3377 vpxor xmm11,xmm11,xmm9
3378 add r10,QWORD[72+rsp]
3379 mov rdi,r11
3380 vpsrlq xmm9,xmm9,42
3381 xor r12,r9
3382 shrd r14,r14,6
3383 vpxor xmm11,xmm11,xmm10
3384 xor rdi,rax
3385 add r10,r12
3386 vpxor xmm11,xmm11,xmm9
3387 shrd r13,r13,14
3388 and r15,rdi
3389 vpaddq xmm4,xmm4,xmm11
3390 xor r14,r11
3391 add r10,r13
3392 vpaddq xmm10,xmm4,XMMWORD[rbp]
3393 xor r15,rax
3394 shrd r14,r14,28
3395 add rcx,r10
3396 add r10,r15
3397 mov r13,rcx
3398 add r14,r10
3399 vmovdqa XMMWORD[64+rsp],xmm10
3400 vpalignr xmm8,xmm6,xmm5,8
3401 shrd r13,r13,23
3402 mov r10,r14
3403 vpalignr xmm11,xmm2,xmm1,8
3404 mov r12,rdx
3405 shrd r14,r14,5
3406 vpsrlq xmm10,xmm8,1
3407 xor r13,rcx
3408 xor r12,r8
3409 vpaddq xmm5,xmm5,xmm11
3410 shrd r13,r13,4
3411 xor r14,r10
3412 vpsrlq xmm11,xmm8,7
3413 and r12,rcx
3414 xor r13,rcx
3415 vpsllq xmm9,xmm8,56
3416 add r9,QWORD[80+rsp]
3417 mov r15,r10
3418 vpxor xmm8,xmm11,xmm10
3419 xor r12,r8
3420 shrd r14,r14,6
3421 vpsrlq xmm10,xmm10,7
3422 xor r15,r11
3423 add r9,r12
3424 vpxor xmm8,xmm8,xmm9
3425 shrd r13,r13,14
3426 and rdi,r15
3427 vpsllq xmm9,xmm9,7
3428 xor r14,r10
3429 add r9,r13
3430 vpxor xmm8,xmm8,xmm10
3431 xor rdi,r11
3432 shrd r14,r14,28
3433 vpsrlq xmm11,xmm4,6
3434 add rbx,r9
3435 add r9,rdi
3436 vpxor xmm8,xmm8,xmm9
3437 mov r13,rbx
3438 add r14,r9
3439 vpsllq xmm10,xmm4,3
3440 shrd r13,r13,23
3441 mov r9,r14
3442 vpaddq xmm5,xmm5,xmm8
3443 mov r12,rcx
3444 shrd r14,r14,5
3445 vpsrlq xmm9,xmm4,19
3446 xor r13,rbx
3447 xor r12,rdx
3448 vpxor xmm11,xmm11,xmm10
3449 shrd r13,r13,4
3450 xor r14,r9
3451 vpsllq xmm10,xmm10,42
3452 and r12,rbx
3453 xor r13,rbx
3454 vpxor xmm11,xmm11,xmm9
3455 add r8,QWORD[88+rsp]
3456 mov rdi,r9
3457 vpsrlq xmm9,xmm9,42
3458 xor r12,rdx
3459 shrd r14,r14,6
3460 vpxor xmm11,xmm11,xmm10
3461 xor rdi,r10
3462 add r8,r12
3463 vpxor xmm11,xmm11,xmm9
3464 shrd r13,r13,14
3465 and r15,rdi
3466 vpaddq xmm5,xmm5,xmm11
3467 xor r14,r9
3468 add r8,r13
3469 vpaddq xmm10,xmm5,XMMWORD[32+rbp]
3470 xor r15,r10
3471 shrd r14,r14,28
3472 add rax,r8
3473 add r8,r15
3474 mov r13,rax
3475 add r14,r8
3476 vmovdqa XMMWORD[80+rsp],xmm10
3477 vpalignr xmm8,xmm7,xmm6,8
3478 shrd r13,r13,23
3479 mov r8,r14
3480 vpalignr xmm11,xmm3,xmm2,8
3481 mov r12,rbx
3482 shrd r14,r14,5
3483 vpsrlq xmm10,xmm8,1
3484 xor r13,rax
3485 xor r12,rcx
3486 vpaddq xmm6,xmm6,xmm11
3487 shrd r13,r13,4
3488 xor r14,r8
3489 vpsrlq xmm11,xmm8,7
3490 and r12,rax
3491 xor r13,rax
3492 vpsllq xmm9,xmm8,56
3493 add rdx,QWORD[96+rsp]
3494 mov r15,r8
3495 vpxor xmm8,xmm11,xmm10
3496 xor r12,rcx
3497 shrd r14,r14,6
3498 vpsrlq xmm10,xmm10,7
3499 xor r15,r9
3500 add rdx,r12
3501 vpxor xmm8,xmm8,xmm9
3502 shrd r13,r13,14
3503 and rdi,r15
3504 vpsllq xmm9,xmm9,7
3505 xor r14,r8
3506 add rdx,r13
3507 vpxor xmm8,xmm8,xmm10
3508 xor rdi,r9
3509 shrd r14,r14,28
3510 vpsrlq xmm11,xmm5,6
3511 add r11,rdx
3512 add rdx,rdi
3513 vpxor xmm8,xmm8,xmm9
3514 mov r13,r11
3515 add r14,rdx
3516 vpsllq xmm10,xmm5,3
3517 shrd r13,r13,23
3518 mov rdx,r14
3519 vpaddq xmm6,xmm6,xmm8
3520 mov r12,rax
3521 shrd r14,r14,5
3522 vpsrlq xmm9,xmm5,19
3523 xor r13,r11
3524 xor r12,rbx
3525 vpxor xmm11,xmm11,xmm10
3526 shrd r13,r13,4
3527 xor r14,rdx
3528 vpsllq xmm10,xmm10,42
3529 and r12,r11
3530 xor r13,r11
3531 vpxor xmm11,xmm11,xmm9
3532 add rcx,QWORD[104+rsp]
3533 mov rdi,rdx
3534 vpsrlq xmm9,xmm9,42
3535 xor r12,rbx
3536 shrd r14,r14,6
3537 vpxor xmm11,xmm11,xmm10
3538 xor rdi,r8
3539 add rcx,r12
3540 vpxor xmm11,xmm11,xmm9
3541 shrd r13,r13,14
3542 and r15,rdi
3543 vpaddq xmm6,xmm6,xmm11
3544 xor r14,rdx
3545 add rcx,r13
3546 vpaddq xmm10,xmm6,XMMWORD[64+rbp]
3547 xor r15,r8
3548 shrd r14,r14,28
3549 add r10,rcx
3550 add rcx,r15
3551 mov r13,r10
3552 add r14,rcx
3553 vmovdqa XMMWORD[96+rsp],xmm10
3554 vpalignr xmm8,xmm0,xmm7,8
3555 shrd r13,r13,23
3556 mov rcx,r14
3557 vpalignr xmm11,xmm4,xmm3,8
3558 mov r12,r11
3559 shrd r14,r14,5
3560 vpsrlq xmm10,xmm8,1
3561 xor r13,r10
3562 xor r12,rax
3563 vpaddq xmm7,xmm7,xmm11
3564 shrd r13,r13,4
3565 xor r14,rcx
3566 vpsrlq xmm11,xmm8,7
3567 and r12,r10
3568 xor r13,r10
3569 vpsllq xmm9,xmm8,56
3570 add rbx,QWORD[112+rsp]
3571 mov r15,rcx
3572 vpxor xmm8,xmm11,xmm10
3573 xor r12,rax
3574 shrd r14,r14,6
3575 vpsrlq xmm10,xmm10,7
3576 xor r15,rdx
3577 add rbx,r12
3578 vpxor xmm8,xmm8,xmm9
3579 shrd r13,r13,14
3580 and rdi,r15
3581 vpsllq xmm9,xmm9,7
3582 xor r14,rcx
3583 add rbx,r13
3584 vpxor xmm8,xmm8,xmm10
3585 xor rdi,rdx
3586 shrd r14,r14,28
3587 vpsrlq xmm11,xmm6,6
3588 add r9,rbx
3589 add rbx,rdi
3590 vpxor xmm8,xmm8,xmm9
3591 mov r13,r9
3592 add r14,rbx
3593 vpsllq xmm10,xmm6,3
3594 shrd r13,r13,23
3595 mov rbx,r14
3596 vpaddq xmm7,xmm7,xmm8
3597 mov r12,r10
3598 shrd r14,r14,5
3599 vpsrlq xmm9,xmm6,19
3600 xor r13,r9
3601 xor r12,r11
3602 vpxor xmm11,xmm11,xmm10
3603 shrd r13,r13,4
3604 xor r14,rbx
3605 vpsllq xmm10,xmm10,42
3606 and r12,r9
3607 xor r13,r9
3608 vpxor xmm11,xmm11,xmm9
3609 add rax,QWORD[120+rsp]
3610 mov rdi,rbx
3611 vpsrlq xmm9,xmm9,42
3612 xor r12,r11
3613 shrd r14,r14,6
3614 vpxor xmm11,xmm11,xmm10
3615 xor rdi,rcx
3616 add rax,r12
3617 vpxor xmm11,xmm11,xmm9
3618 shrd r13,r13,14
3619 and r15,rdi
3620 vpaddq xmm7,xmm7,xmm11
3621 xor r14,rbx
3622 add rax,r13
3623 vpaddq xmm10,xmm7,XMMWORD[96+rbp]
3624 xor r15,rcx
3625 shrd r14,r14,28
3626 add r8,rax
3627 add rax,r15
3628 mov r13,r8
3629 add r14,rax
3630 vmovdqa XMMWORD[112+rsp],xmm10
3631 cmp BYTE[135+rbp],0
3632 jne NEAR $L$avx_00_47
3633 shrd r13,r13,23
3634 mov rax,r14
3635 mov r12,r9
3636 shrd r14,r14,5
3637 xor r13,r8
3638 xor r12,r10
3639 shrd r13,r13,4
3640 xor r14,rax
3641 and r12,r8
3642 xor r13,r8
3643 add r11,QWORD[rsp]
3644 mov r15,rax
3645 xor r12,r10
3646 shrd r14,r14,6
3647 xor r15,rbx
3648 add r11,r12
3649 shrd r13,r13,14
3650 and rdi,r15
3651 xor r14,rax
3652 add r11,r13
3653 xor rdi,rbx
3654 shrd r14,r14,28
3655 add rdx,r11
3656 add r11,rdi
3657 mov r13,rdx
3658 add r14,r11
3659 shrd r13,r13,23
3660 mov r11,r14
3661 mov r12,r8
3662 shrd r14,r14,5
3663 xor r13,rdx
3664 xor r12,r9
3665 shrd r13,r13,4
3666 xor r14,r11
3667 and r12,rdx
3668 xor r13,rdx
3669 add r10,QWORD[8+rsp]
3670 mov rdi,r11
3671 xor r12,r9
3672 shrd r14,r14,6
3673 xor rdi,rax
3674 add r10,r12
3675 shrd r13,r13,14
3676 and r15,rdi
3677 xor r14,r11
3678 add r10,r13
3679 xor r15,rax
3680 shrd r14,r14,28
3681 add rcx,r10
3682 add r10,r15
3683 mov r13,rcx
3684 add r14,r10
3685 shrd r13,r13,23
3686 mov r10,r14
3687 mov r12,rdx
3688 shrd r14,r14,5
3689 xor r13,rcx
3690 xor r12,r8
3691 shrd r13,r13,4
3692 xor r14,r10
3693 and r12,rcx
3694 xor r13,rcx
3695 add r9,QWORD[16+rsp]
3696 mov r15,r10
3697 xor r12,r8
3698 shrd r14,r14,6
3699 xor r15,r11
3700 add r9,r12
3701 shrd r13,r13,14
3702 and rdi,r15
3703 xor r14,r10
3704 add r9,r13
3705 xor rdi,r11
3706 shrd r14,r14,28
3707 add rbx,r9
3708 add r9,rdi
3709 mov r13,rbx
3710 add r14,r9
3711 shrd r13,r13,23
3712 mov r9,r14
3713 mov r12,rcx
3714 shrd r14,r14,5
3715 xor r13,rbx
3716 xor r12,rdx
3717 shrd r13,r13,4
3718 xor r14,r9
3719 and r12,rbx
3720 xor r13,rbx
3721 add r8,QWORD[24+rsp]
3722 mov rdi,r9
3723 xor r12,rdx
3724 shrd r14,r14,6
3725 xor rdi,r10
3726 add r8,r12
3727 shrd r13,r13,14
3728 and r15,rdi
3729 xor r14,r9
3730 add r8,r13
3731 xor r15,r10
3732 shrd r14,r14,28
3733 add rax,r8
3734 add r8,r15
3735 mov r13,rax
3736 add r14,r8
3737 shrd r13,r13,23
3738 mov r8,r14
3739 mov r12,rbx
3740 shrd r14,r14,5
3741 xor r13,rax
3742 xor r12,rcx
3743 shrd r13,r13,4
3744 xor r14,r8
3745 and r12,rax
3746 xor r13,rax
3747 add rdx,QWORD[32+rsp]
3748 mov r15,r8
3749 xor r12,rcx
3750 shrd r14,r14,6
3751 xor r15,r9
3752 add rdx,r12
3753 shrd r13,r13,14
3754 and rdi,r15
3755 xor r14,r8
3756 add rdx,r13
3757 xor rdi,r9
3758 shrd r14,r14,28
3759 add r11,rdx
3760 add rdx,rdi
3761 mov r13,r11
3762 add r14,rdx
3763 shrd r13,r13,23
3764 mov rdx,r14
3765 mov r12,rax
3766 shrd r14,r14,5
3767 xor r13,r11
3768 xor r12,rbx
3769 shrd r13,r13,4
3770 xor r14,rdx
3771 and r12,r11
3772 xor r13,r11
3773 add rcx,QWORD[40+rsp]
3774 mov rdi,rdx
3775 xor r12,rbx
3776 shrd r14,r14,6
3777 xor rdi,r8
3778 add rcx,r12
3779 shrd r13,r13,14
3780 and r15,rdi
3781 xor r14,rdx
3782 add rcx,r13
3783 xor r15,r8
3784 shrd r14,r14,28
3785 add r10,rcx
3786 add rcx,r15
3787 mov r13,r10
3788 add r14,rcx
3789 shrd r13,r13,23
3790 mov rcx,r14
3791 mov r12,r11
3792 shrd r14,r14,5
3793 xor r13,r10
3794 xor r12,rax
3795 shrd r13,r13,4
3796 xor r14,rcx
3797 and r12,r10
3798 xor r13,r10
3799 add rbx,QWORD[48+rsp]
3800 mov r15,rcx
3801 xor r12,rax
3802 shrd r14,r14,6
3803 xor r15,rdx
3804 add rbx,r12
3805 shrd r13,r13,14
3806 and rdi,r15
3807 xor r14,rcx
3808 add rbx,r13
3809 xor rdi,rdx
3810 shrd r14,r14,28
3811 add r9,rbx
3812 add rbx,rdi
3813 mov r13,r9
3814 add r14,rbx
3815 shrd r13,r13,23
3816 mov rbx,r14
3817 mov r12,r10
3818 shrd r14,r14,5
3819 xor r13,r9
3820 xor r12,r11
3821 shrd r13,r13,4
3822 xor r14,rbx
3823 and r12,r9
3824 xor r13,r9
3825 add rax,QWORD[56+rsp]
3826 mov rdi,rbx
3827 xor r12,r11
3828 shrd r14,r14,6
3829 xor rdi,rcx
3830 add rax,r12
3831 shrd r13,r13,14
3832 and r15,rdi
3833 xor r14,rbx
3834 add rax,r13
3835 xor r15,rcx
3836 shrd r14,r14,28
3837 add r8,rax
3838 add rax,r15
3839 mov r13,r8
3840 add r14,rax
3841 shrd r13,r13,23
3842 mov rax,r14
3843 mov r12,r9
3844 shrd r14,r14,5
3845 xor r13,r8
3846 xor r12,r10
3847 shrd r13,r13,4
3848 xor r14,rax
3849 and r12,r8
3850 xor r13,r8
3851 add r11,QWORD[64+rsp]
3852 mov r15,rax
3853 xor r12,r10
3854 shrd r14,r14,6
3855 xor r15,rbx
3856 add r11,r12
3857 shrd r13,r13,14
3858 and rdi,r15
3859 xor r14,rax
3860 add r11,r13
3861 xor rdi,rbx
3862 shrd r14,r14,28
3863 add rdx,r11
3864 add r11,rdi
3865 mov r13,rdx
3866 add r14,r11
3867 shrd r13,r13,23
3868 mov r11,r14
3869 mov r12,r8
3870 shrd r14,r14,5
3871 xor r13,rdx
3872 xor r12,r9
3873 shrd r13,r13,4
3874 xor r14,r11
3875 and r12,rdx
3876 xor r13,rdx
3877 add r10,QWORD[72+rsp]
3878 mov rdi,r11
3879 xor r12,r9
3880 shrd r14,r14,6
3881 xor rdi,rax
3882 add r10,r12
3883 shrd r13,r13,14
3884 and r15,rdi
3885 xor r14,r11
3886 add r10,r13
3887 xor r15,rax
3888 shrd r14,r14,28
3889 add rcx,r10
3890 add r10,r15
3891 mov r13,rcx
3892 add r14,r10
3893 shrd r13,r13,23
3894 mov r10,r14
3895 mov r12,rdx
3896 shrd r14,r14,5
3897 xor r13,rcx
3898 xor r12,r8
3899 shrd r13,r13,4
3900 xor r14,r10
3901 and r12,rcx
3902 xor r13,rcx
3903 add r9,QWORD[80+rsp]
3904 mov r15,r10
3905 xor r12,r8
3906 shrd r14,r14,6
3907 xor r15,r11
3908 add r9,r12
3909 shrd r13,r13,14
3910 and rdi,r15
3911 xor r14,r10
3912 add r9,r13
3913 xor rdi,r11
3914 shrd r14,r14,28
3915 add rbx,r9
3916 add r9,rdi
3917 mov r13,rbx
3918 add r14,r9
3919 shrd r13,r13,23
3920 mov r9,r14
3921 mov r12,rcx
3922 shrd r14,r14,5
3923 xor r13,rbx
3924 xor r12,rdx
3925 shrd r13,r13,4
3926 xor r14,r9
3927 and r12,rbx
3928 xor r13,rbx
3929 add r8,QWORD[88+rsp]
3930 mov rdi,r9
3931 xor r12,rdx
3932 shrd r14,r14,6
3933 xor rdi,r10
3934 add r8,r12
3935 shrd r13,r13,14
3936 and r15,rdi
3937 xor r14,r9
3938 add r8,r13
3939 xor r15,r10
3940 shrd r14,r14,28
3941 add rax,r8
3942 add r8,r15
3943 mov r13,rax
3944 add r14,r8
3945 shrd r13,r13,23
3946 mov r8,r14
3947 mov r12,rbx
3948 shrd r14,r14,5
3949 xor r13,rax
3950 xor r12,rcx
3951 shrd r13,r13,4
3952 xor r14,r8
3953 and r12,rax
3954 xor r13,rax
3955 add rdx,QWORD[96+rsp]
3956 mov r15,r8
3957 xor r12,rcx
3958 shrd r14,r14,6
3959 xor r15,r9
3960 add rdx,r12
3961 shrd r13,r13,14
3962 and rdi,r15
3963 xor r14,r8
3964 add rdx,r13
3965 xor rdi,r9
3966 shrd r14,r14,28
3967 add r11,rdx
3968 add rdx,rdi
3969 mov r13,r11
3970 add r14,rdx
3971 shrd r13,r13,23
3972 mov rdx,r14
3973 mov r12,rax
3974 shrd r14,r14,5
3975 xor r13,r11
3976 xor r12,rbx
3977 shrd r13,r13,4
3978 xor r14,rdx
3979 and r12,r11
3980 xor r13,r11
3981 add rcx,QWORD[104+rsp]
3982 mov rdi,rdx
3983 xor r12,rbx
3984 shrd r14,r14,6
3985 xor rdi,r8
3986 add rcx,r12
3987 shrd r13,r13,14
3988 and r15,rdi
3989 xor r14,rdx
3990 add rcx,r13
3991 xor r15,r8
3992 shrd r14,r14,28
3993 add r10,rcx
3994 add rcx,r15
3995 mov r13,r10
3996 add r14,rcx
3997 shrd r13,r13,23
3998 mov rcx,r14
3999 mov r12,r11
4000 shrd r14,r14,5
4001 xor r13,r10
4002 xor r12,rax
4003 shrd r13,r13,4
4004 xor r14,rcx
4005 and r12,r10
4006 xor r13,r10
4007 add rbx,QWORD[112+rsp]
4008 mov r15,rcx
4009 xor r12,rax
4010 shrd r14,r14,6
4011 xor r15,rdx
4012 add rbx,r12
4013 shrd r13,r13,14
4014 and rdi,r15
4015 xor r14,rcx
4016 add rbx,r13
4017 xor rdi,rdx
4018 shrd r14,r14,28
4019 add r9,rbx
4020 add rbx,rdi
4021 mov r13,r9
4022 add r14,rbx
4023 shrd r13,r13,23
4024 mov rbx,r14
4025 mov r12,r10
4026 shrd r14,r14,5
4027 xor r13,r9
4028 xor r12,r11
4029 shrd r13,r13,4
4030 xor r14,rbx
4031 and r12,r9
4032 xor r13,r9
4033 add rax,QWORD[120+rsp]
4034 mov rdi,rbx
4035 xor r12,r11
4036 shrd r14,r14,6
4037 xor rdi,rcx
4038 add rax,r12
4039 shrd r13,r13,14
4040 and r15,rdi
4041 xor r14,rbx
4042 add rax,r13
4043 xor r15,rcx
4044 shrd r14,r14,28
4045 add r8,rax
4046 add rax,r15
4047 mov r13,r8
4048 add r14,rax
4049 mov rdi,QWORD[((128+0))+rsp]
4050 mov rax,r14
4051
4052 add rax,QWORD[rdi]
4053 lea rsi,[128+rsi]
4054 add rbx,QWORD[8+rdi]
4055 add rcx,QWORD[16+rdi]
4056 add rdx,QWORD[24+rdi]
4057 add r8,QWORD[32+rdi]
4058 add r9,QWORD[40+rdi]
4059 add r10,QWORD[48+rdi]
4060 add r11,QWORD[56+rdi]
4061
4062 cmp rsi,QWORD[((128+16))+rsp]
4063
4064 mov QWORD[rdi],rax
4065 mov QWORD[8+rdi],rbx
4066 mov QWORD[16+rdi],rcx
4067 mov QWORD[24+rdi],rdx
4068 mov QWORD[32+rdi],r8
4069 mov QWORD[40+rdi],r9
4070 mov QWORD[48+rdi],r10
4071 mov QWORD[56+rdi],r11
4072 jb NEAR $L$loop_avx
4073
4074 mov rsi,QWORD[((128+24))+rsp]
4075 vzeroupper
4076 movaps xmm6,XMMWORD[((128+32))+rsp]
4077 movaps xmm7,XMMWORD[((128+48))+rsp]
4078 movaps xmm8,XMMWORD[((128+64))+rsp]
4079 movaps xmm9,XMMWORD[((128+80))+rsp]
4080 movaps xmm10,XMMWORD[((128+96))+rsp]
4081 movaps xmm11,XMMWORD[((128+112))+rsp]
4082 mov r15,QWORD[rsi]
4083 mov r14,QWORD[8+rsi]
4084 mov r13,QWORD[16+rsi]
4085 mov r12,QWORD[24+rsi]
4086 mov rbp,QWORD[32+rsi]
4087 mov rbx,QWORD[40+rsi]
4088 lea rsp,[48+rsi]
4089$L$epilogue_avx:
4090 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
4091 mov rsi,QWORD[16+rsp]
4092 DB 0F3h,0C3h ;repret
4093$L$SEH_end_sha512_block_data_order_avx:
4094
4095ALIGN 64
4096sha512_block_data_order_avx2:
4097 mov QWORD[8+rsp],rdi ;WIN64 prologue
4098 mov QWORD[16+rsp],rsi
4099 mov rax,rsp
4100$L$SEH_begin_sha512_block_data_order_avx2:
4101 mov rdi,rcx
4102 mov rsi,rdx
4103 mov rdx,r8
4104
4105
4106$L$avx2_shortcut:
4107 push rbx
4108 push rbp
4109 push r12
4110 push r13
4111 push r14
4112 push r15
4113 mov r11,rsp
4114 sub rsp,1408
4115 shl rdx,4
4116 and rsp,-256*8
4117 lea rdx,[rdx*8+rsi]
4118 add rsp,1152
4119 mov QWORD[((128+0))+rsp],rdi
4120 mov QWORD[((128+8))+rsp],rsi
4121 mov QWORD[((128+16))+rsp],rdx
4122 mov QWORD[((128+24))+rsp],r11
4123 movaps XMMWORD[(128+32)+rsp],xmm6
4124 movaps XMMWORD[(128+48)+rsp],xmm7
4125 movaps XMMWORD[(128+64)+rsp],xmm8
4126 movaps XMMWORD[(128+80)+rsp],xmm9
4127 movaps XMMWORD[(128+96)+rsp],xmm10
4128 movaps XMMWORD[(128+112)+rsp],xmm11
4129$L$prologue_avx2:
4130
4131 vzeroupper
4132 sub rsi,-16*8
4133 mov rax,QWORD[rdi]
4134 mov r12,rsi
4135 mov rbx,QWORD[8+rdi]
4136 cmp rsi,rdx
4137 mov rcx,QWORD[16+rdi]
4138 cmove r12,rsp
4139 mov rdx,QWORD[24+rdi]
4140 mov r8,QWORD[32+rdi]
4141 mov r9,QWORD[40+rdi]
4142 mov r10,QWORD[48+rdi]
4143 mov r11,QWORD[56+rdi]
4144 jmp NEAR $L$oop_avx2
4145ALIGN 16
4146$L$oop_avx2:
4147 vmovdqu xmm0,XMMWORD[((-128))+rsi]
4148 vmovdqu xmm1,XMMWORD[((-128+16))+rsi]
4149 vmovdqu xmm2,XMMWORD[((-128+32))+rsi]
4150 lea rbp,[((K512+128))]
4151 vmovdqu xmm3,XMMWORD[((-128+48))+rsi]
4152 vmovdqu xmm4,XMMWORD[((-128+64))+rsi]
4153 vmovdqu xmm5,XMMWORD[((-128+80))+rsi]
4154 vmovdqu xmm6,XMMWORD[((-128+96))+rsi]
4155 vmovdqu xmm7,XMMWORD[((-128+112))+rsi]
4156
4157 vmovdqa ymm10,YMMWORD[1152+rbp]
4158 vinserti128 ymm0,ymm0,XMMWORD[r12],1
4159 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1
4160 vpshufb ymm0,ymm0,ymm10
4161 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1
4162 vpshufb ymm1,ymm1,ymm10
4163 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1
4164 vpshufb ymm2,ymm2,ymm10
4165 vinserti128 ymm4,ymm4,XMMWORD[64+r12],1
4166 vpshufb ymm3,ymm3,ymm10
4167 vinserti128 ymm5,ymm5,XMMWORD[80+r12],1
4168 vpshufb ymm4,ymm4,ymm10
4169 vinserti128 ymm6,ymm6,XMMWORD[96+r12],1
4170 vpshufb ymm5,ymm5,ymm10
4171 vinserti128 ymm7,ymm7,XMMWORD[112+r12],1
4172
4173 vpaddq ymm8,ymm0,YMMWORD[((-128))+rbp]
4174 vpshufb ymm6,ymm6,ymm10
4175 vpaddq ymm9,ymm1,YMMWORD[((-96))+rbp]
4176 vpshufb ymm7,ymm7,ymm10
4177 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp]
4178 vpaddq ymm11,ymm3,YMMWORD[((-32))+rbp]
4179 vmovdqa YMMWORD[rsp],ymm8
4180 vpaddq ymm8,ymm4,YMMWORD[rbp]
4181 vmovdqa YMMWORD[32+rsp],ymm9
4182 vpaddq ymm9,ymm5,YMMWORD[32+rbp]
4183 vmovdqa YMMWORD[64+rsp],ymm10
4184 vpaddq ymm10,ymm6,YMMWORD[64+rbp]
4185 vmovdqa YMMWORD[96+rsp],ymm11
4186 lea rsp,[((-128))+rsp]
4187 vpaddq ymm11,ymm7,YMMWORD[96+rbp]
4188 vmovdqa YMMWORD[rsp],ymm8
4189 xor r14,r14
4190 vmovdqa YMMWORD[32+rsp],ymm9
4191 mov rdi,rbx
4192 vmovdqa YMMWORD[64+rsp],ymm10
4193 xor rdi,rcx
4194 vmovdqa YMMWORD[96+rsp],ymm11
4195 mov r12,r9
4196 add rbp,16*2*8
4197 jmp NEAR $L$avx2_00_47
4198
4199ALIGN 16
4200$L$avx2_00_47:
4201 lea rsp,[((-128))+rsp]
4202 vpalignr ymm8,ymm1,ymm0,8
4203 add r11,QWORD[((0+256))+rsp]
4204 and r12,r8
4205 rorx r13,r8,41
4206 vpalignr ymm11,ymm5,ymm4,8
4207 rorx r15,r8,18
4208 lea rax,[r14*1+rax]
4209 lea r11,[r12*1+r11]
4210 vpsrlq ymm10,ymm8,1
4211 andn r12,r8,r10
4212 xor r13,r15
4213 rorx r14,r8,14
4214 vpaddq ymm0,ymm0,ymm11
4215 vpsrlq ymm11,ymm8,7
4216 lea r11,[r12*1+r11]
4217 xor r13,r14
4218 mov r15,rax
4219 vpsllq ymm9,ymm8,56
4220 vpxor ymm8,ymm11,ymm10
4221 rorx r12,rax,39
4222 lea r11,[r13*1+r11]
4223 xor r15,rbx
4224 vpsrlq ymm10,ymm10,7
4225 vpxor ymm8,ymm8,ymm9
4226 rorx r14,rax,34
4227 rorx r13,rax,28
4228 lea rdx,[r11*1+rdx]
4229 vpsllq ymm9,ymm9,7
4230 vpxor ymm8,ymm8,ymm10
4231 and rdi,r15
4232 xor r14,r12
4233 xor rdi,rbx
4234 vpsrlq ymm11,ymm7,6
4235 vpxor ymm8,ymm8,ymm9
4236 xor r14,r13
4237 lea r11,[rdi*1+r11]
4238 mov r12,r8
4239 vpsllq ymm10,ymm7,3
4240 vpaddq ymm0,ymm0,ymm8
4241 add r10,QWORD[((8+256))+rsp]
4242 and r12,rdx
4243 rorx r13,rdx,41
4244 vpsrlq ymm9,ymm7,19
4245 vpxor ymm11,ymm11,ymm10
4246 rorx rdi,rdx,18
4247 lea r11,[r14*1+r11]
4248 lea r10,[r12*1+r10]
4249 vpsllq ymm10,ymm10,42
4250 vpxor ymm11,ymm11,ymm9
4251 andn r12,rdx,r9
4252 xor r13,rdi
4253 rorx r14,rdx,14
4254 vpsrlq ymm9,ymm9,42
4255 vpxor ymm11,ymm11,ymm10
4256 lea r10,[r12*1+r10]
4257 xor r13,r14
4258 mov rdi,r11
4259 vpxor ymm11,ymm11,ymm9
4260 rorx r12,r11,39
4261 lea r10,[r13*1+r10]
4262 xor rdi,rax
4263 vpaddq ymm0,ymm0,ymm11
4264 rorx r14,r11,34
4265 rorx r13,r11,28
4266 lea rcx,[r10*1+rcx]
4267 vpaddq ymm10,ymm0,YMMWORD[((-128))+rbp]
4268 and r15,rdi
4269 xor r14,r12
4270 xor r15,rax
4271 xor r14,r13
4272 lea r10,[r15*1+r10]
4273 mov r12,rdx
4274 vmovdqa YMMWORD[rsp],ymm10
4275 vpalignr ymm8,ymm2,ymm1,8
4276 add r9,QWORD[((32+256))+rsp]
4277 and r12,rcx
4278 rorx r13,rcx,41
4279 vpalignr ymm11,ymm6,ymm5,8
4280 rorx r15,rcx,18
4281 lea r10,[r14*1+r10]
4282 lea r9,[r12*1+r9]
4283 vpsrlq ymm10,ymm8,1
4284 andn r12,rcx,r8
4285 xor r13,r15
4286 rorx r14,rcx,14
4287 vpaddq ymm1,ymm1,ymm11
4288 vpsrlq ymm11,ymm8,7
4289 lea r9,[r12*1+r9]
4290 xor r13,r14
4291 mov r15,r10
4292 vpsllq ymm9,ymm8,56
4293 vpxor ymm8,ymm11,ymm10
4294 rorx r12,r10,39
4295 lea r9,[r13*1+r9]
4296 xor r15,r11
4297 vpsrlq ymm10,ymm10,7
4298 vpxor ymm8,ymm8,ymm9
4299 rorx r14,r10,34
4300 rorx r13,r10,28
4301 lea rbx,[r9*1+rbx]
4302 vpsllq ymm9,ymm9,7
4303 vpxor ymm8,ymm8,ymm10
4304 and rdi,r15
4305 xor r14,r12
4306 xor rdi,r11
4307 vpsrlq ymm11,ymm0,6
4308 vpxor ymm8,ymm8,ymm9
4309 xor r14,r13
4310 lea r9,[rdi*1+r9]
4311 mov r12,rcx
4312 vpsllq ymm10,ymm0,3
4313 vpaddq ymm1,ymm1,ymm8
4314 add r8,QWORD[((40+256))+rsp]
4315 and r12,rbx
4316 rorx r13,rbx,41
4317 vpsrlq ymm9,ymm0,19
4318 vpxor ymm11,ymm11,ymm10
4319 rorx rdi,rbx,18
4320 lea r9,[r14*1+r9]
4321 lea r8,[r12*1+r8]
4322 vpsllq ymm10,ymm10,42
4323 vpxor ymm11,ymm11,ymm9
4324 andn r12,rbx,rdx
4325 xor r13,rdi
4326 rorx r14,rbx,14
4327 vpsrlq ymm9,ymm9,42
4328 vpxor ymm11,ymm11,ymm10
4329 lea r8,[r12*1+r8]
4330 xor r13,r14
4331 mov rdi,r9
4332 vpxor ymm11,ymm11,ymm9
4333 rorx r12,r9,39
4334 lea r8,[r13*1+r8]
4335 xor rdi,r10
4336 vpaddq ymm1,ymm1,ymm11
4337 rorx r14,r9,34
4338 rorx r13,r9,28
4339 lea rax,[r8*1+rax]
4340 vpaddq ymm10,ymm1,YMMWORD[((-96))+rbp]
4341 and r15,rdi
4342 xor r14,r12
4343 xor r15,r10
4344 xor r14,r13
4345 lea r8,[r15*1+r8]
4346 mov r12,rbx
4347 vmovdqa YMMWORD[32+rsp],ymm10
4348 vpalignr ymm8,ymm3,ymm2,8
4349 add rdx,QWORD[((64+256))+rsp]
4350 and r12,rax
4351 rorx r13,rax,41
4352 vpalignr ymm11,ymm7,ymm6,8
4353 rorx r15,rax,18
4354 lea r8,[r14*1+r8]
4355 lea rdx,[r12*1+rdx]
4356 vpsrlq ymm10,ymm8,1
4357 andn r12,rax,rcx
4358 xor r13,r15
4359 rorx r14,rax,14
4360 vpaddq ymm2,ymm2,ymm11
4361 vpsrlq ymm11,ymm8,7
4362 lea rdx,[r12*1+rdx]
4363 xor r13,r14
4364 mov r15,r8
4365 vpsllq ymm9,ymm8,56
4366 vpxor ymm8,ymm11,ymm10
4367 rorx r12,r8,39
4368 lea rdx,[r13*1+rdx]
4369 xor r15,r9
4370 vpsrlq ymm10,ymm10,7
4371 vpxor ymm8,ymm8,ymm9
4372 rorx r14,r8,34
4373 rorx r13,r8,28
4374 lea r11,[rdx*1+r11]
4375 vpsllq ymm9,ymm9,7
4376 vpxor ymm8,ymm8,ymm10
4377 and rdi,r15
4378 xor r14,r12
4379 xor rdi,r9
4380 vpsrlq ymm11,ymm1,6
4381 vpxor ymm8,ymm8,ymm9
4382 xor r14,r13
4383 lea rdx,[rdi*1+rdx]
4384 mov r12,rax
4385 vpsllq ymm10,ymm1,3
4386 vpaddq ymm2,ymm2,ymm8
4387 add rcx,QWORD[((72+256))+rsp]
4388 and r12,r11
4389 rorx r13,r11,41
4390 vpsrlq ymm9,ymm1,19
4391 vpxor ymm11,ymm11,ymm10
4392 rorx rdi,r11,18
4393 lea rdx,[r14*1+rdx]
4394 lea rcx,[r12*1+rcx]
4395 vpsllq ymm10,ymm10,42
4396 vpxor ymm11,ymm11,ymm9
4397 andn r12,r11,rbx
4398 xor r13,rdi
4399 rorx r14,r11,14
4400 vpsrlq ymm9,ymm9,42
4401 vpxor ymm11,ymm11,ymm10
4402 lea rcx,[r12*1+rcx]
4403 xor r13,r14
4404 mov rdi,rdx
4405 vpxor ymm11,ymm11,ymm9
4406 rorx r12,rdx,39
4407 lea rcx,[r13*1+rcx]
4408 xor rdi,r8
4409 vpaddq ymm2,ymm2,ymm11
4410 rorx r14,rdx,34
4411 rorx r13,rdx,28
4412 lea r10,[rcx*1+r10]
4413 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp]
4414 and r15,rdi
4415 xor r14,r12
4416 xor r15,r8
4417 xor r14,r13
4418 lea rcx,[r15*1+rcx]
4419 mov r12,r11
4420 vmovdqa YMMWORD[64+rsp],ymm10
4421 vpalignr ymm8,ymm4,ymm3,8
4422 add rbx,QWORD[((96+256))+rsp]
4423 and r12,r10
4424 rorx r13,r10,41
4425 vpalignr ymm11,ymm0,ymm7,8
4426 rorx r15,r10,18
4427 lea rcx,[r14*1+rcx]
4428 lea rbx,[r12*1+rbx]
4429 vpsrlq ymm10,ymm8,1
4430 andn r12,r10,rax
4431 xor r13,r15
4432 rorx r14,r10,14
4433 vpaddq ymm3,ymm3,ymm11
4434 vpsrlq ymm11,ymm8,7
4435 lea rbx,[r12*1+rbx]
4436 xor r13,r14
4437 mov r15,rcx
4438 vpsllq ymm9,ymm8,56
4439 vpxor ymm8,ymm11,ymm10
4440 rorx r12,rcx,39
4441 lea rbx,[r13*1+rbx]
4442 xor r15,rdx
4443 vpsrlq ymm10,ymm10,7
4444 vpxor ymm8,ymm8,ymm9
4445 rorx r14,rcx,34
4446 rorx r13,rcx,28
4447 lea r9,[rbx*1+r9]
4448 vpsllq ymm9,ymm9,7
4449 vpxor ymm8,ymm8,ymm10
4450 and rdi,r15
4451 xor r14,r12
4452 xor rdi,rdx
4453 vpsrlq ymm11,ymm2,6
4454 vpxor ymm8,ymm8,ymm9
4455 xor r14,r13
4456 lea rbx,[rdi*1+rbx]
4457 mov r12,r10
4458 vpsllq ymm10,ymm2,3
4459 vpaddq ymm3,ymm3,ymm8
4460 add rax,QWORD[((104+256))+rsp]
4461 and r12,r9
4462 rorx r13,r9,41
4463 vpsrlq ymm9,ymm2,19
4464 vpxor ymm11,ymm11,ymm10
4465 rorx rdi,r9,18
4466 lea rbx,[r14*1+rbx]
4467 lea rax,[r12*1+rax]
4468 vpsllq ymm10,ymm10,42
4469 vpxor ymm11,ymm11,ymm9
4470 andn r12,r9,r11
4471 xor r13,rdi
4472 rorx r14,r9,14
4473 vpsrlq ymm9,ymm9,42
4474 vpxor ymm11,ymm11,ymm10
4475 lea rax,[r12*1+rax]
4476 xor r13,r14
4477 mov rdi,rbx
4478 vpxor ymm11,ymm11,ymm9
4479 rorx r12,rbx,39
4480 lea rax,[r13*1+rax]
4481 xor rdi,rcx
4482 vpaddq ymm3,ymm3,ymm11
4483 rorx r14,rbx,34
4484 rorx r13,rbx,28
4485 lea r8,[rax*1+r8]
4486 vpaddq ymm10,ymm3,YMMWORD[((-32))+rbp]
4487 and r15,rdi
4488 xor r14,r12
4489 xor r15,rcx
4490 xor r14,r13
4491 lea rax,[r15*1+rax]
4492 mov r12,r9
4493 vmovdqa YMMWORD[96+rsp],ymm10
4494 lea rsp,[((-128))+rsp]
4495 vpalignr ymm8,ymm5,ymm4,8
4496 add r11,QWORD[((0+256))+rsp]
4497 and r12,r8
4498 rorx r13,r8,41
4499 vpalignr ymm11,ymm1,ymm0,8
4500 rorx r15,r8,18
4501 lea rax,[r14*1+rax]
4502 lea r11,[r12*1+r11]
4503 vpsrlq ymm10,ymm8,1
4504 andn r12,r8,r10
4505 xor r13,r15
4506 rorx r14,r8,14
4507 vpaddq ymm4,ymm4,ymm11
4508 vpsrlq ymm11,ymm8,7
4509 lea r11,[r12*1+r11]
4510 xor r13,r14
4511 mov r15,rax
4512 vpsllq ymm9,ymm8,56
4513 vpxor ymm8,ymm11,ymm10
4514 rorx r12,rax,39
4515 lea r11,[r13*1+r11]
4516 xor r15,rbx
4517 vpsrlq ymm10,ymm10,7
4518 vpxor ymm8,ymm8,ymm9
4519 rorx r14,rax,34
4520 rorx r13,rax,28
4521 lea rdx,[r11*1+rdx]
4522 vpsllq ymm9,ymm9,7
4523 vpxor ymm8,ymm8,ymm10
4524 and rdi,r15
4525 xor r14,r12
4526 xor rdi,rbx
4527 vpsrlq ymm11,ymm3,6
4528 vpxor ymm8,ymm8,ymm9
4529 xor r14,r13
4530 lea r11,[rdi*1+r11]
4531 mov r12,r8
4532 vpsllq ymm10,ymm3,3
4533 vpaddq ymm4,ymm4,ymm8
4534 add r10,QWORD[((8+256))+rsp]
4535 and r12,rdx
4536 rorx r13,rdx,41
4537 vpsrlq ymm9,ymm3,19
4538 vpxor ymm11,ymm11,ymm10
4539 rorx rdi,rdx,18
4540 lea r11,[r14*1+r11]
4541 lea r10,[r12*1+r10]
4542 vpsllq ymm10,ymm10,42
4543 vpxor ymm11,ymm11,ymm9
4544 andn r12,rdx,r9
4545 xor r13,rdi
4546 rorx r14,rdx,14
4547 vpsrlq ymm9,ymm9,42
4548 vpxor ymm11,ymm11,ymm10
4549 lea r10,[r12*1+r10]
4550 xor r13,r14
4551 mov rdi,r11
4552 vpxor ymm11,ymm11,ymm9
4553 rorx r12,r11,39
4554 lea r10,[r13*1+r10]
4555 xor rdi,rax
4556 vpaddq ymm4,ymm4,ymm11
4557 rorx r14,r11,34
4558 rorx r13,r11,28
4559 lea rcx,[r10*1+rcx]
4560 vpaddq ymm10,ymm4,YMMWORD[rbp]
4561 and r15,rdi
4562 xor r14,r12
4563 xor r15,rax
4564 xor r14,r13
4565 lea r10,[r15*1+r10]
4566 mov r12,rdx
4567 vmovdqa YMMWORD[rsp],ymm10
4568 vpalignr ymm8,ymm6,ymm5,8
4569 add r9,QWORD[((32+256))+rsp]
4570 and r12,rcx
4571 rorx r13,rcx,41
4572 vpalignr ymm11,ymm2,ymm1,8
4573 rorx r15,rcx,18
4574 lea r10,[r14*1+r10]
4575 lea r9,[r12*1+r9]
4576 vpsrlq ymm10,ymm8,1
4577 andn r12,rcx,r8
4578 xor r13,r15
4579 rorx r14,rcx,14
4580 vpaddq ymm5,ymm5,ymm11
4581 vpsrlq ymm11,ymm8,7
4582 lea r9,[r12*1+r9]
4583 xor r13,r14
4584 mov r15,r10
4585 vpsllq ymm9,ymm8,56
4586 vpxor ymm8,ymm11,ymm10
4587 rorx r12,r10,39
4588 lea r9,[r13*1+r9]
4589 xor r15,r11
4590 vpsrlq ymm10,ymm10,7
4591 vpxor ymm8,ymm8,ymm9
4592 rorx r14,r10,34
4593 rorx r13,r10,28
4594 lea rbx,[r9*1+rbx]
4595 vpsllq ymm9,ymm9,7
4596 vpxor ymm8,ymm8,ymm10
4597 and rdi,r15
4598 xor r14,r12
4599 xor rdi,r11
4600 vpsrlq ymm11,ymm4,6
4601 vpxor ymm8,ymm8,ymm9
4602 xor r14,r13
4603 lea r9,[rdi*1+r9]
4604 mov r12,rcx
4605 vpsllq ymm10,ymm4,3
4606 vpaddq ymm5,ymm5,ymm8
4607 add r8,QWORD[((40+256))+rsp]
4608 and r12,rbx
4609 rorx r13,rbx,41
4610 vpsrlq ymm9,ymm4,19
4611 vpxor ymm11,ymm11,ymm10
4612 rorx rdi,rbx,18
4613 lea r9,[r14*1+r9]
4614 lea r8,[r12*1+r8]
4615 vpsllq ymm10,ymm10,42
4616 vpxor ymm11,ymm11,ymm9
4617 andn r12,rbx,rdx
4618 xor r13,rdi
4619 rorx r14,rbx,14
4620 vpsrlq ymm9,ymm9,42
4621 vpxor ymm11,ymm11,ymm10
4622 lea r8,[r12*1+r8]
4623 xor r13,r14
4624 mov rdi,r9
4625 vpxor ymm11,ymm11,ymm9
4626 rorx r12,r9,39
4627 lea r8,[r13*1+r8]
4628 xor rdi,r10
4629 vpaddq ymm5,ymm5,ymm11
4630 rorx r14,r9,34
4631 rorx r13,r9,28
4632 lea rax,[r8*1+rax]
4633 vpaddq ymm10,ymm5,YMMWORD[32+rbp]
4634 and r15,rdi
4635 xor r14,r12
4636 xor r15,r10
4637 xor r14,r13
4638 lea r8,[r15*1+r8]
4639 mov r12,rbx
4640 vmovdqa YMMWORD[32+rsp],ymm10
4641 vpalignr ymm8,ymm7,ymm6,8
4642 add rdx,QWORD[((64+256))+rsp]
4643 and r12,rax
4644 rorx r13,rax,41
4645 vpalignr ymm11,ymm3,ymm2,8
4646 rorx r15,rax,18
4647 lea r8,[r14*1+r8]
4648 lea rdx,[r12*1+rdx]
4649 vpsrlq ymm10,ymm8,1
4650 andn r12,rax,rcx
4651 xor r13,r15
4652 rorx r14,rax,14
4653 vpaddq ymm6,ymm6,ymm11
4654 vpsrlq ymm11,ymm8,7
4655 lea rdx,[r12*1+rdx]
4656 xor r13,r14
4657 mov r15,r8
4658 vpsllq ymm9,ymm8,56
4659 vpxor ymm8,ymm11,ymm10
4660 rorx r12,r8,39
4661 lea rdx,[r13*1+rdx]
4662 xor r15,r9
4663 vpsrlq ymm10,ymm10,7
4664 vpxor ymm8,ymm8,ymm9
4665 rorx r14,r8,34
4666 rorx r13,r8,28
4667 lea r11,[rdx*1+r11]
4668 vpsllq ymm9,ymm9,7
4669 vpxor ymm8,ymm8,ymm10
4670 and rdi,r15
4671 xor r14,r12
4672 xor rdi,r9
4673 vpsrlq ymm11,ymm5,6
4674 vpxor ymm8,ymm8,ymm9
4675 xor r14,r13
4676 lea rdx,[rdi*1+rdx]
4677 mov r12,rax
4678 vpsllq ymm10,ymm5,3
4679 vpaddq ymm6,ymm6,ymm8
4680 add rcx,QWORD[((72+256))+rsp]
4681 and r12,r11
4682 rorx r13,r11,41
4683 vpsrlq ymm9,ymm5,19
4684 vpxor ymm11,ymm11,ymm10
4685 rorx rdi,r11,18
4686 lea rdx,[r14*1+rdx]
4687 lea rcx,[r12*1+rcx]
4688 vpsllq ymm10,ymm10,42
4689 vpxor ymm11,ymm11,ymm9
4690 andn r12,r11,rbx
4691 xor r13,rdi
4692 rorx r14,r11,14
4693 vpsrlq ymm9,ymm9,42
4694 vpxor ymm11,ymm11,ymm10
4695 lea rcx,[r12*1+rcx]
4696 xor r13,r14
4697 mov rdi,rdx
4698 vpxor ymm11,ymm11,ymm9
4699 rorx r12,rdx,39
4700 lea rcx,[r13*1+rcx]
4701 xor rdi,r8
4702 vpaddq ymm6,ymm6,ymm11
4703 rorx r14,rdx,34
4704 rorx r13,rdx,28
4705 lea r10,[rcx*1+r10]
4706 vpaddq ymm10,ymm6,YMMWORD[64+rbp]
4707 and r15,rdi
4708 xor r14,r12
4709 xor r15,r8
4710 xor r14,r13
4711 lea rcx,[r15*1+rcx]
4712 mov r12,r11
4713 vmovdqa YMMWORD[64+rsp],ymm10
4714 vpalignr ymm8,ymm0,ymm7,8
4715 add rbx,QWORD[((96+256))+rsp]
4716 and r12,r10
4717 rorx r13,r10,41
4718 vpalignr ymm11,ymm4,ymm3,8
4719 rorx r15,r10,18
4720 lea rcx,[r14*1+rcx]
4721 lea rbx,[r12*1+rbx]
4722 vpsrlq ymm10,ymm8,1
4723 andn r12,r10,rax
4724 xor r13,r15
4725 rorx r14,r10,14
4726 vpaddq ymm7,ymm7,ymm11
4727 vpsrlq ymm11,ymm8,7
4728 lea rbx,[r12*1+rbx]
4729 xor r13,r14
4730 mov r15,rcx
4731 vpsllq ymm9,ymm8,56
4732 vpxor ymm8,ymm11,ymm10
4733 rorx r12,rcx,39
4734 lea rbx,[r13*1+rbx]
4735 xor r15,rdx
4736 vpsrlq ymm10,ymm10,7
4737 vpxor ymm8,ymm8,ymm9
4738 rorx r14,rcx,34
4739 rorx r13,rcx,28
4740 lea r9,[rbx*1+r9]
4741 vpsllq ymm9,ymm9,7
4742 vpxor ymm8,ymm8,ymm10
4743 and rdi,r15
4744 xor r14,r12
4745 xor rdi,rdx
4746 vpsrlq ymm11,ymm6,6
4747 vpxor ymm8,ymm8,ymm9
4748 xor r14,r13
4749 lea rbx,[rdi*1+rbx]
4750 mov r12,r10
4751 vpsllq ymm10,ymm6,3
4752 vpaddq ymm7,ymm7,ymm8
4753 add rax,QWORD[((104+256))+rsp]
4754 and r12,r9
4755 rorx r13,r9,41
4756 vpsrlq ymm9,ymm6,19
4757 vpxor ymm11,ymm11,ymm10
4758 rorx rdi,r9,18
4759 lea rbx,[r14*1+rbx]
4760 lea rax,[r12*1+rax]
4761 vpsllq ymm10,ymm10,42
4762 vpxor ymm11,ymm11,ymm9
4763 andn r12,r9,r11
4764 xor r13,rdi
4765 rorx r14,r9,14
4766 vpsrlq ymm9,ymm9,42
4767 vpxor ymm11,ymm11,ymm10
4768 lea rax,[r12*1+rax]
4769 xor r13,r14
4770 mov rdi,rbx
4771 vpxor ymm11,ymm11,ymm9
4772 rorx r12,rbx,39
4773 lea rax,[r13*1+rax]
4774 xor rdi,rcx
4775 vpaddq ymm7,ymm7,ymm11
4776 rorx r14,rbx,34
4777 rorx r13,rbx,28
4778 lea r8,[rax*1+r8]
4779 vpaddq ymm10,ymm7,YMMWORD[96+rbp]
4780 and r15,rdi
4781 xor r14,r12
4782 xor r15,rcx
4783 xor r14,r13
4784 lea rax,[r15*1+rax]
4785 mov r12,r9
4786 vmovdqa YMMWORD[96+rsp],ymm10
4787 lea rbp,[256+rbp]
4788 cmp BYTE[((-121))+rbp],0
4789 jne NEAR $L$avx2_00_47
4790 add r11,QWORD[((0+128))+rsp]
4791 and r12,r8
4792 rorx r13,r8,41
4793 rorx r15,r8,18
4794 lea rax,[r14*1+rax]
4795 lea r11,[r12*1+r11]
4796 andn r12,r8,r10
4797 xor r13,r15
4798 rorx r14,r8,14
4799 lea r11,[r12*1+r11]
4800 xor r13,r14
4801 mov r15,rax
4802 rorx r12,rax,39
4803 lea r11,[r13*1+r11]
4804 xor r15,rbx
4805 rorx r14,rax,34
4806 rorx r13,rax,28
4807 lea rdx,[r11*1+rdx]
4808 and rdi,r15
4809 xor r14,r12
4810 xor rdi,rbx
4811 xor r14,r13
4812 lea r11,[rdi*1+r11]
4813 mov r12,r8
4814 add r10,QWORD[((8+128))+rsp]
4815 and r12,rdx
4816 rorx r13,rdx,41
4817 rorx rdi,rdx,18
4818 lea r11,[r14*1+r11]
4819 lea r10,[r12*1+r10]
4820 andn r12,rdx,r9
4821 xor r13,rdi
4822 rorx r14,rdx,14
4823 lea r10,[r12*1+r10]
4824 xor r13,r14
4825 mov rdi,r11
4826 rorx r12,r11,39
4827 lea r10,[r13*1+r10]
4828 xor rdi,rax
4829 rorx r14,r11,34
4830 rorx r13,r11,28
4831 lea rcx,[r10*1+rcx]
4832 and r15,rdi
4833 xor r14,r12
4834 xor r15,rax
4835 xor r14,r13
4836 lea r10,[r15*1+r10]
4837 mov r12,rdx
4838 add r9,QWORD[((32+128))+rsp]
4839 and r12,rcx
4840 rorx r13,rcx,41
4841 rorx r15,rcx,18
4842 lea r10,[r14*1+r10]
4843 lea r9,[r12*1+r9]
4844 andn r12,rcx,r8
4845 xor r13,r15
4846 rorx r14,rcx,14
4847 lea r9,[r12*1+r9]
4848 xor r13,r14
4849 mov r15,r10
4850 rorx r12,r10,39
4851 lea r9,[r13*1+r9]
4852 xor r15,r11
4853 rorx r14,r10,34
4854 rorx r13,r10,28
4855 lea rbx,[r9*1+rbx]
4856 and rdi,r15
4857 xor r14,r12
4858 xor rdi,r11
4859 xor r14,r13
4860 lea r9,[rdi*1+r9]
4861 mov r12,rcx
4862 add r8,QWORD[((40+128))+rsp]
4863 and r12,rbx
4864 rorx r13,rbx,41
4865 rorx rdi,rbx,18
4866 lea r9,[r14*1+r9]
4867 lea r8,[r12*1+r8]
4868 andn r12,rbx,rdx
4869 xor r13,rdi
4870 rorx r14,rbx,14
4871 lea r8,[r12*1+r8]
4872 xor r13,r14
4873 mov rdi,r9
4874 rorx r12,r9,39
4875 lea r8,[r13*1+r8]
4876 xor rdi,r10
4877 rorx r14,r9,34
4878 rorx r13,r9,28
4879 lea rax,[r8*1+rax]
4880 and r15,rdi
4881 xor r14,r12
4882 xor r15,r10
4883 xor r14,r13
4884 lea r8,[r15*1+r8]
4885 mov r12,rbx
4886 add rdx,QWORD[((64+128))+rsp]
4887 and r12,rax
4888 rorx r13,rax,41
4889 rorx r15,rax,18
4890 lea r8,[r14*1+r8]
4891 lea rdx,[r12*1+rdx]
4892 andn r12,rax,rcx
4893 xor r13,r15
4894 rorx r14,rax,14
4895 lea rdx,[r12*1+rdx]
4896 xor r13,r14
4897 mov r15,r8
4898 rorx r12,r8,39
4899 lea rdx,[r13*1+rdx]
4900 xor r15,r9
4901 rorx r14,r8,34
4902 rorx r13,r8,28
4903 lea r11,[rdx*1+r11]
4904 and rdi,r15
4905 xor r14,r12
4906 xor rdi,r9
4907 xor r14,r13
4908 lea rdx,[rdi*1+rdx]
4909 mov r12,rax
4910 add rcx,QWORD[((72+128))+rsp]
4911 and r12,r11
4912 rorx r13,r11,41
4913 rorx rdi,r11,18
4914 lea rdx,[r14*1+rdx]
4915 lea rcx,[r12*1+rcx]
4916 andn r12,r11,rbx
4917 xor r13,rdi
4918 rorx r14,r11,14
4919 lea rcx,[r12*1+rcx]
4920 xor r13,r14
4921 mov rdi,rdx
4922 rorx r12,rdx,39
4923 lea rcx,[r13*1+rcx]
4924 xor rdi,r8
4925 rorx r14,rdx,34
4926 rorx r13,rdx,28
4927 lea r10,[rcx*1+r10]
4928 and r15,rdi
4929 xor r14,r12
4930 xor r15,r8
4931 xor r14,r13
4932 lea rcx,[r15*1+rcx]
4933 mov r12,r11
4934 add rbx,QWORD[((96+128))+rsp]
4935 and r12,r10
4936 rorx r13,r10,41
4937 rorx r15,r10,18
4938 lea rcx,[r14*1+rcx]
4939 lea rbx,[r12*1+rbx]
4940 andn r12,r10,rax
4941 xor r13,r15
4942 rorx r14,r10,14
4943 lea rbx,[r12*1+rbx]
4944 xor r13,r14
4945 mov r15,rcx
4946 rorx r12,rcx,39
4947 lea rbx,[r13*1+rbx]
4948 xor r15,rdx
4949 rorx r14,rcx,34
4950 rorx r13,rcx,28
4951 lea r9,[rbx*1+r9]
4952 and rdi,r15
4953 xor r14,r12
4954 xor rdi,rdx
4955 xor r14,r13
4956 lea rbx,[rdi*1+rbx]
4957 mov r12,r10
4958 add rax,QWORD[((104+128))+rsp]
4959 and r12,r9
4960 rorx r13,r9,41
4961 rorx rdi,r9,18
4962 lea rbx,[r14*1+rbx]
4963 lea rax,[r12*1+rax]
4964 andn r12,r9,r11
4965 xor r13,rdi
4966 rorx r14,r9,14
4967 lea rax,[r12*1+rax]
4968 xor r13,r14
4969 mov rdi,rbx
4970 rorx r12,rbx,39
4971 lea rax,[r13*1+rax]
4972 xor rdi,rcx
4973 rorx r14,rbx,34
4974 rorx r13,rbx,28
4975 lea r8,[rax*1+r8]
4976 and r15,rdi
4977 xor r14,r12
4978 xor r15,rcx
4979 xor r14,r13
4980 lea rax,[r15*1+rax]
4981 mov r12,r9
4982 add r11,QWORD[rsp]
4983 and r12,r8
4984 rorx r13,r8,41
4985 rorx r15,r8,18
4986 lea rax,[r14*1+rax]
4987 lea r11,[r12*1+r11]
4988 andn r12,r8,r10
4989 xor r13,r15
4990 rorx r14,r8,14
4991 lea r11,[r12*1+r11]
4992 xor r13,r14
4993 mov r15,rax
4994 rorx r12,rax,39
4995 lea r11,[r13*1+r11]
4996 xor r15,rbx
4997 rorx r14,rax,34
4998 rorx r13,rax,28
4999 lea rdx,[r11*1+rdx]
5000 and rdi,r15
5001 xor r14,r12
5002 xor rdi,rbx
5003 xor r14,r13
5004 lea r11,[rdi*1+r11]
5005 mov r12,r8
5006 add r10,QWORD[8+rsp]
5007 and r12,rdx
5008 rorx r13,rdx,41
5009 rorx rdi,rdx,18
5010 lea r11,[r14*1+r11]
5011 lea r10,[r12*1+r10]
5012 andn r12,rdx,r9
5013 xor r13,rdi
5014 rorx r14,rdx,14
5015 lea r10,[r12*1+r10]
5016 xor r13,r14
5017 mov rdi,r11
5018 rorx r12,r11,39
5019 lea r10,[r13*1+r10]
5020 xor rdi,rax
5021 rorx r14,r11,34
5022 rorx r13,r11,28
5023 lea rcx,[r10*1+rcx]
5024 and r15,rdi
5025 xor r14,r12
5026 xor r15,rax
5027 xor r14,r13
5028 lea r10,[r15*1+r10]
5029 mov r12,rdx
5030 add r9,QWORD[32+rsp]
5031 and r12,rcx
5032 rorx r13,rcx,41
5033 rorx r15,rcx,18
5034 lea r10,[r14*1+r10]
5035 lea r9,[r12*1+r9]
5036 andn r12,rcx,r8
5037 xor r13,r15
5038 rorx r14,rcx,14
5039 lea r9,[r12*1+r9]
5040 xor r13,r14
5041 mov r15,r10
5042 rorx r12,r10,39
5043 lea r9,[r13*1+r9]
5044 xor r15,r11
5045 rorx r14,r10,34
5046 rorx r13,r10,28
5047 lea rbx,[r9*1+rbx]
5048 and rdi,r15
5049 xor r14,r12
5050 xor rdi,r11
5051 xor r14,r13
5052 lea r9,[rdi*1+r9]
5053 mov r12,rcx
5054 add r8,QWORD[40+rsp]
5055 and r12,rbx
5056 rorx r13,rbx,41
5057 rorx rdi,rbx,18
5058 lea r9,[r14*1+r9]
5059 lea r8,[r12*1+r8]
5060 andn r12,rbx,rdx
5061 xor r13,rdi
5062 rorx r14,rbx,14
5063 lea r8,[r12*1+r8]
5064 xor r13,r14
5065 mov rdi,r9
5066 rorx r12,r9,39
5067 lea r8,[r13*1+r8]
5068 xor rdi,r10
5069 rorx r14,r9,34
5070 rorx r13,r9,28
5071 lea rax,[r8*1+rax]
5072 and r15,rdi
5073 xor r14,r12
5074 xor r15,r10
5075 xor r14,r13
5076 lea r8,[r15*1+r8]
5077 mov r12,rbx
5078 add rdx,QWORD[64+rsp]
5079 and r12,rax
5080 rorx r13,rax,41
5081 rorx r15,rax,18
5082 lea r8,[r14*1+r8]
5083 lea rdx,[r12*1+rdx]
5084 andn r12,rax,rcx
5085 xor r13,r15
5086 rorx r14,rax,14
5087 lea rdx,[r12*1+rdx]
5088 xor r13,r14
5089 mov r15,r8
5090 rorx r12,r8,39
5091 lea rdx,[r13*1+rdx]
5092 xor r15,r9
5093 rorx r14,r8,34
5094 rorx r13,r8,28
5095 lea r11,[rdx*1+r11]
5096 and rdi,r15
5097 xor r14,r12
5098 xor rdi,r9
5099 xor r14,r13
5100 lea rdx,[rdi*1+rdx]
5101 mov r12,rax
5102 add rcx,QWORD[72+rsp]
5103 and r12,r11
5104 rorx r13,r11,41
5105 rorx rdi,r11,18
5106 lea rdx,[r14*1+rdx]
5107 lea rcx,[r12*1+rcx]
5108 andn r12,r11,rbx
5109 xor r13,rdi
5110 rorx r14,r11,14
5111 lea rcx,[r12*1+rcx]
5112 xor r13,r14
5113 mov rdi,rdx
5114 rorx r12,rdx,39
5115 lea rcx,[r13*1+rcx]
5116 xor rdi,r8
5117 rorx r14,rdx,34
5118 rorx r13,rdx,28
5119 lea r10,[rcx*1+r10]
5120 and r15,rdi
5121 xor r14,r12
5122 xor r15,r8
5123 xor r14,r13
5124 lea rcx,[r15*1+rcx]
5125 mov r12,r11
5126 add rbx,QWORD[96+rsp]
5127 and r12,r10
5128 rorx r13,r10,41
5129 rorx r15,r10,18
5130 lea rcx,[r14*1+rcx]
5131 lea rbx,[r12*1+rbx]
5132 andn r12,r10,rax
5133 xor r13,r15
5134 rorx r14,r10,14
5135 lea rbx,[r12*1+rbx]
5136 xor r13,r14
5137 mov r15,rcx
5138 rorx r12,rcx,39
5139 lea rbx,[r13*1+rbx]
5140 xor r15,rdx
5141 rorx r14,rcx,34
5142 rorx r13,rcx,28
5143 lea r9,[rbx*1+r9]
5144 and rdi,r15
5145 xor r14,r12
5146 xor rdi,rdx
5147 xor r14,r13
5148 lea rbx,[rdi*1+rbx]
5149 mov r12,r10
5150 add rax,QWORD[104+rsp]
5151 and r12,r9
5152 rorx r13,r9,41
5153 rorx rdi,r9,18
5154 lea rbx,[r14*1+rbx]
5155 lea rax,[r12*1+rax]
5156 andn r12,r9,r11
5157 xor r13,rdi
5158 rorx r14,r9,14
5159 lea rax,[r12*1+rax]
5160 xor r13,r14
5161 mov rdi,rbx
5162 rorx r12,rbx,39
5163 lea rax,[r13*1+rax]
5164 xor rdi,rcx
5165 rorx r14,rbx,34
5166 rorx r13,rbx,28
5167 lea r8,[rax*1+r8]
5168 and r15,rdi
5169 xor r14,r12
5170 xor r15,rcx
5171 xor r14,r13
5172 lea rax,[r15*1+rax]
5173 mov r12,r9
5174 mov rdi,QWORD[1280+rsp]
5175 add rax,r14
5176
5177 lea rbp,[1152+rsp]
5178
5179 add rax,QWORD[rdi]
5180 add rbx,QWORD[8+rdi]
5181 add rcx,QWORD[16+rdi]
5182 add rdx,QWORD[24+rdi]
5183 add r8,QWORD[32+rdi]
5184 add r9,QWORD[40+rdi]
5185 add r10,QWORD[48+rdi]
5186 add r11,QWORD[56+rdi]
5187
5188 mov QWORD[rdi],rax
5189 mov QWORD[8+rdi],rbx
5190 mov QWORD[16+rdi],rcx
5191 mov QWORD[24+rdi],rdx
5192 mov QWORD[32+rdi],r8
5193 mov QWORD[40+rdi],r9
5194 mov QWORD[48+rdi],r10
5195 mov QWORD[56+rdi],r11
5196
5197 cmp rsi,QWORD[144+rbp]
5198 je NEAR $L$done_avx2
5199
5200 xor r14,r14
5201 mov rdi,rbx
5202 xor rdi,rcx
5203 mov r12,r9
5204 jmp NEAR $L$ower_avx2
5205ALIGN 16
5206$L$ower_avx2:
5207 add r11,QWORD[((0+16))+rbp]
5208 and r12,r8
5209 rorx r13,r8,41
5210 rorx r15,r8,18
5211 lea rax,[r14*1+rax]
5212 lea r11,[r12*1+r11]
5213 andn r12,r8,r10
5214 xor r13,r15
5215 rorx r14,r8,14
5216 lea r11,[r12*1+r11]
5217 xor r13,r14
5218 mov r15,rax
5219 rorx r12,rax,39
5220 lea r11,[r13*1+r11]
5221 xor r15,rbx
5222 rorx r14,rax,34
5223 rorx r13,rax,28
5224 lea rdx,[r11*1+rdx]
5225 and rdi,r15
5226 xor r14,r12
5227 xor rdi,rbx
5228 xor r14,r13
5229 lea r11,[rdi*1+r11]
5230 mov r12,r8
5231 add r10,QWORD[((8+16))+rbp]
5232 and r12,rdx
5233 rorx r13,rdx,41
5234 rorx rdi,rdx,18
5235 lea r11,[r14*1+r11]
5236 lea r10,[r12*1+r10]
5237 andn r12,rdx,r9
5238 xor r13,rdi
5239 rorx r14,rdx,14
5240 lea r10,[r12*1+r10]
5241 xor r13,r14
5242 mov rdi,r11
5243 rorx r12,r11,39
5244 lea r10,[r13*1+r10]
5245 xor rdi,rax
5246 rorx r14,r11,34
5247 rorx r13,r11,28
5248 lea rcx,[r10*1+rcx]
5249 and r15,rdi
5250 xor r14,r12
5251 xor r15,rax
5252 xor r14,r13
5253 lea r10,[r15*1+r10]
5254 mov r12,rdx
5255 add r9,QWORD[((32+16))+rbp]
5256 and r12,rcx
5257 rorx r13,rcx,41
5258 rorx r15,rcx,18
5259 lea r10,[r14*1+r10]
5260 lea r9,[r12*1+r9]
5261 andn r12,rcx,r8
5262 xor r13,r15
5263 rorx r14,rcx,14
5264 lea r9,[r12*1+r9]
5265 xor r13,r14
5266 mov r15,r10
5267 rorx r12,r10,39
5268 lea r9,[r13*1+r9]
5269 xor r15,r11
5270 rorx r14,r10,34
5271 rorx r13,r10,28
5272 lea rbx,[r9*1+rbx]
5273 and rdi,r15
5274 xor r14,r12
5275 xor rdi,r11
5276 xor r14,r13
5277 lea r9,[rdi*1+r9]
5278 mov r12,rcx
5279 add r8,QWORD[((40+16))+rbp]
5280 and r12,rbx
5281 rorx r13,rbx,41
5282 rorx rdi,rbx,18
5283 lea r9,[r14*1+r9]
5284 lea r8,[r12*1+r8]
5285 andn r12,rbx,rdx
5286 xor r13,rdi
5287 rorx r14,rbx,14
5288 lea r8,[r12*1+r8]
5289 xor r13,r14
5290 mov rdi,r9
5291 rorx r12,r9,39
5292 lea r8,[r13*1+r8]
5293 xor rdi,r10
5294 rorx r14,r9,34
5295 rorx r13,r9,28
5296 lea rax,[r8*1+rax]
5297 and r15,rdi
5298 xor r14,r12
5299 xor r15,r10
5300 xor r14,r13
5301 lea r8,[r15*1+r8]
5302 mov r12,rbx
5303 add rdx,QWORD[((64+16))+rbp]
5304 and r12,rax
5305 rorx r13,rax,41
5306 rorx r15,rax,18
5307 lea r8,[r14*1+r8]
5308 lea rdx,[r12*1+rdx]
5309 andn r12,rax,rcx
5310 xor r13,r15
5311 rorx r14,rax,14
5312 lea rdx,[r12*1+rdx]
5313 xor r13,r14
5314 mov r15,r8
5315 rorx r12,r8,39
5316 lea rdx,[r13*1+rdx]
5317 xor r15,r9
5318 rorx r14,r8,34
5319 rorx r13,r8,28
5320 lea r11,[rdx*1+r11]
5321 and rdi,r15
5322 xor r14,r12
5323 xor rdi,r9
5324 xor r14,r13
5325 lea rdx,[rdi*1+rdx]
5326 mov r12,rax
5327 add rcx,QWORD[((72+16))+rbp]
5328 and r12,r11
5329 rorx r13,r11,41
5330 rorx rdi,r11,18
5331 lea rdx,[r14*1+rdx]
5332 lea rcx,[r12*1+rcx]
5333 andn r12,r11,rbx
5334 xor r13,rdi
5335 rorx r14,r11,14
5336 lea rcx,[r12*1+rcx]
5337 xor r13,r14
5338 mov rdi,rdx
5339 rorx r12,rdx,39
5340 lea rcx,[r13*1+rcx]
5341 xor rdi,r8
5342 rorx r14,rdx,34
5343 rorx r13,rdx,28
5344 lea r10,[rcx*1+r10]
5345 and r15,rdi
5346 xor r14,r12
5347 xor r15,r8
5348 xor r14,r13
5349 lea rcx,[r15*1+rcx]
5350 mov r12,r11
5351 add rbx,QWORD[((96+16))+rbp]
5352 and r12,r10
5353 rorx r13,r10,41
5354 rorx r15,r10,18
5355 lea rcx,[r14*1+rcx]
5356 lea rbx,[r12*1+rbx]
5357 andn r12,r10,rax
5358 xor r13,r15
5359 rorx r14,r10,14
5360 lea rbx,[r12*1+rbx]
5361 xor r13,r14
5362 mov r15,rcx
5363 rorx r12,rcx,39
5364 lea rbx,[r13*1+rbx]
5365 xor r15,rdx
5366 rorx r14,rcx,34
5367 rorx r13,rcx,28
5368 lea r9,[rbx*1+r9]
5369 and rdi,r15
5370 xor r14,r12
5371 xor rdi,rdx
5372 xor r14,r13
5373 lea rbx,[rdi*1+rbx]
5374 mov r12,r10
5375 add rax,QWORD[((104+16))+rbp]
5376 and r12,r9
5377 rorx r13,r9,41
5378 rorx rdi,r9,18
5379 lea rbx,[r14*1+rbx]
5380 lea rax,[r12*1+rax]
5381 andn r12,r9,r11
5382 xor r13,rdi
5383 rorx r14,r9,14
5384 lea rax,[r12*1+rax]
5385 xor r13,r14
5386 mov rdi,rbx
5387 rorx r12,rbx,39
5388 lea rax,[r13*1+rax]
5389 xor rdi,rcx
5390 rorx r14,rbx,34
5391 rorx r13,rbx,28
5392 lea r8,[rax*1+r8]
5393 and r15,rdi
5394 xor r14,r12
5395 xor r15,rcx
5396 xor r14,r13
5397 lea rax,[r15*1+rax]
5398 mov r12,r9
5399 lea rbp,[((-128))+rbp]
5400 cmp rbp,rsp
5401 jae NEAR $L$ower_avx2
5402
5403 mov rdi,QWORD[1280+rsp]
5404 add rax,r14
5405
5406 lea rsp,[1152+rsp]
5407
5408 add rax,QWORD[rdi]
5409 add rbx,QWORD[8+rdi]
5410 add rcx,QWORD[16+rdi]
5411 add rdx,QWORD[24+rdi]
5412 add r8,QWORD[32+rdi]
5413 add r9,QWORD[40+rdi]
5414 lea rsi,[256+rsi]
5415 add r10,QWORD[48+rdi]
5416 mov r12,rsi
5417 add r11,QWORD[56+rdi]
5418 cmp rsi,QWORD[((128+16))+rsp]
5419
5420 mov QWORD[rdi],rax
5421 cmove r12,rsp
5422 mov QWORD[8+rdi],rbx
5423 mov QWORD[16+rdi],rcx
5424 mov QWORD[24+rdi],rdx
5425 mov QWORD[32+rdi],r8
5426 mov QWORD[40+rdi],r9
5427 mov QWORD[48+rdi],r10
5428 mov QWORD[56+rdi],r11
5429
5430 jbe NEAR $L$oop_avx2
5431 lea rbp,[rsp]
5432
5433$L$done_avx2:
5434 lea rsp,[rbp]
5435 mov rsi,QWORD[((128+24))+rsp]
5436 vzeroupper
5437 movaps xmm6,XMMWORD[((128+32))+rsp]
5438 movaps xmm7,XMMWORD[((128+48))+rsp]
5439 movaps xmm8,XMMWORD[((128+64))+rsp]
5440 movaps xmm9,XMMWORD[((128+80))+rsp]
5441 movaps xmm10,XMMWORD[((128+96))+rsp]
5442 movaps xmm11,XMMWORD[((128+112))+rsp]
5443 mov r15,QWORD[rsi]
5444 mov r14,QWORD[8+rsi]
5445 mov r13,QWORD[16+rsi]
5446 mov r12,QWORD[24+rsi]
5447 mov rbp,QWORD[32+rsi]
5448 mov rbx,QWORD[40+rsi]
5449 lea rsp,[48+rsi]
5450$L$epilogue_avx2:
5451 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
5452 mov rsi,QWORD[16+rsp]
5453 DB 0F3h,0C3h ;repret
5454$L$SEH_end_sha512_block_data_order_avx2:
5455EXTERN __imp_RtlVirtualUnwind
5456
5457ALIGN 16
5458se_handler:
5459 push rsi
5460 push rdi
5461 push rbx
5462 push rbp
5463 push r12
5464 push r13
5465 push r14
5466 push r15
5467 pushfq
5468 sub rsp,64
5469
5470 mov rax,QWORD[120+r8]
5471 mov rbx,QWORD[248+r8]
5472
5473 mov rsi,QWORD[8+r9]
5474 mov r11,QWORD[56+r9]
5475
5476 mov r10d,DWORD[r11]
5477 lea r10,[r10*1+rsi]
5478 cmp rbx,r10
5479 jb NEAR $L$in_prologue
5480
5481 mov rax,QWORD[152+r8]
5482
5483 mov r10d,DWORD[4+r11]
5484 lea r10,[r10*1+rsi]
5485 cmp rbx,r10
5486 jae NEAR $L$in_prologue
5487 lea r10,[$L$avx2_shortcut]
5488 cmp rbx,r10
5489 jb NEAR $L$not_in_avx2
5490
5491 and rax,-256*8
5492 add rax,1152
5493$L$not_in_avx2:
5494 mov rsi,rax
5495 mov rax,QWORD[((128+24))+rax]
5496 lea rax,[48+rax]
5497
5498 mov rbx,QWORD[((-8))+rax]
5499 mov rbp,QWORD[((-16))+rax]
5500 mov r12,QWORD[((-24))+rax]
5501 mov r13,QWORD[((-32))+rax]
5502 mov r14,QWORD[((-40))+rax]
5503 mov r15,QWORD[((-48))+rax]
5504 mov QWORD[144+r8],rbx
5505 mov QWORD[160+r8],rbp
5506 mov QWORD[216+r8],r12
5507 mov QWORD[224+r8],r13
5508 mov QWORD[232+r8],r14
5509 mov QWORD[240+r8],r15
5510
5511 lea r10,[$L$epilogue]
5512 cmp rbx,r10
5513 jb NEAR $L$in_prologue
5514
5515 lea rsi,[((128+32))+rsi]
5516 lea rdi,[512+r8]
5517 mov ecx,12
5518 DD 0xa548f3fc
5519
5520$L$in_prologue:
5521 mov rdi,QWORD[8+rax]
5522 mov rsi,QWORD[16+rax]
5523 mov QWORD[152+r8],rax
5524 mov QWORD[168+r8],rsi
5525 mov QWORD[176+r8],rdi
5526
5527 mov rdi,QWORD[40+r9]
5528 mov rsi,r8
5529 mov ecx,154
5530 DD 0xa548f3fc
5531
5532 mov rsi,r9
5533 xor rcx,rcx
5534 mov rdx,QWORD[8+rsi]
5535 mov r8,QWORD[rsi]
5536 mov r9,QWORD[16+rsi]
5537 mov r10,QWORD[40+rsi]
5538 lea r11,[56+rsi]
5539 lea r12,[24+rsi]
5540 mov QWORD[32+rsp],r10
5541 mov QWORD[40+rsp],r11
5542 mov QWORD[48+rsp],r12
5543 mov QWORD[56+rsp],rcx
5544 call QWORD[__imp_RtlVirtualUnwind]
5545
5546 mov eax,1
5547 add rsp,64
5548 popfq
5549 pop r15
5550 pop r14
5551 pop r13
5552 pop r12
5553 pop rbp
5554 pop rbx
5555 pop rdi
5556 pop rsi
5557 DB 0F3h,0C3h ;repret
5558
5559section .pdata rdata align=4
5560ALIGN 4
5561 DD $L$SEH_begin_sha512_block_data_order wrt ..imagebase
5562 DD $L$SEH_end_sha512_block_data_order wrt ..imagebase
5563 DD $L$SEH_info_sha512_block_data_order wrt ..imagebase
5564 DD $L$SEH_begin_sha512_block_data_order_xop wrt ..imagebase
5565 DD $L$SEH_end_sha512_block_data_order_xop wrt ..imagebase
5566 DD $L$SEH_info_sha512_block_data_order_xop wrt ..imagebase
5567 DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase
5568 DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase
5569 DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase
5570 DD $L$SEH_begin_sha512_block_data_order_avx2 wrt ..imagebase
5571 DD $L$SEH_end_sha512_block_data_order_avx2 wrt ..imagebase
5572 DD $L$SEH_info_sha512_block_data_order_avx2 wrt ..imagebase
5573section .xdata rdata align=8
5574ALIGN 8
5575$L$SEH_info_sha512_block_data_order:
5576DB 9,0,0,0
5577 DD se_handler wrt ..imagebase
5578 DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
5579$L$SEH_info_sha512_block_data_order_xop:
5580DB 9,0,0,0
5581 DD se_handler wrt ..imagebase
5582 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase
5583$L$SEH_info_sha512_block_data_order_avx:
5584DB 9,0,0,0
5585 DD se_handler wrt ..imagebase
5586 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
5587$L$SEH_info_sha512_block_data_order_avx2:
5588DB 9,0,0,0
5589 DD se_handler wrt ..imagebase
5590 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette