Changeset 95221 in vbox for trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/poly1305-x86_64.S
- Timestamp:
- Jun 8, 2022 8:35:57 AM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/poly1305-x86_64.S
r95219 r95221 38 38 lea r10,[poly1305_blocks] 39 39 lea r11,[poly1305_emit] 40 mov r9,QWORD[((OPENSSL_ia32cap_P+4))]41 lea rax,[poly1305_blocks_avx]42 lea rcx,[poly1305_emit_avx]43 bt r9,2844 cmovc r10,rax45 cmovc r11,rcx46 lea rax,[poly1305_blocks_avx2]47 bt r9,3748 cmovc r10,rax49 mov rax,214964633650 shr r9,3251 and r9,rax52 cmp r9,rax53 je NEAR $L$init_base2_4454 40 mov rax,0x0ffffffc0fffffff 55 41 mov rcx,0x0ffffffc0ffffffc … … 226 212 227 213 $L$SEH_end_poly1305_emit: 228 229 ALIGN 32230 __poly1305_block:231 232 mul r14233 mov r9,rax234 mov rax,r11235 mov r10,rdx236 237 mul r14238 mov r14,rax239 mov rax,r11240 mov r8,rdx241 242 mul rbx243 add r9,rax244 mov rax,r13245 adc r10,rdx246 247 mul rbx248 mov rbx,rbp249 add r14,rax250 adc r8,rdx251 252 imul rbx,r13253 add r9,rbx254 mov rbx,r8255 adc r10,0256 257 imul rbp,r11258 add rbx,r9259 mov rax,-4260 adc r10,rbp261 262 and rax,r10263 mov rbp,r10264 shr r10,2265 and rbp,3266 add rax,r10267 add r14,rax268 adc rbx,0269 adc rbp,0270 DB 0F3h,0C3h ;repret271 272 273 274 275 ALIGN 32276 __poly1305_init_avx:277 278 mov r14,r11279 mov rbx,r12280 xor rbp,rbp281 282 lea rdi,[((48+64))+rdi]283 284 mov rax,r12285 call __poly1305_block286 287 mov eax,0x3ffffff288 mov edx,0x3ffffff289 mov r8,r14290 and eax,r14d291 mov r9,r11292 and edx,r11d293 mov DWORD[((-64))+rdi],eax294 shr r8,26295 mov DWORD[((-60))+rdi],edx296 shr r9,26297 298 mov eax,0x3ffffff299 mov edx,0x3ffffff300 and eax,r8d301 and edx,r9d302 mov DWORD[((-48))+rdi],eax303 lea eax,[rax*4+rax]304 mov DWORD[((-44))+rdi],edx305 lea edx,[rdx*4+rdx]306 mov DWORD[((-32))+rdi],eax307 shr r8,26308 mov DWORD[((-28))+rdi],edx309 shr r9,26310 311 mov rax,rbx312 mov rdx,r12313 shl rax,12314 shl rdx,12315 or rax,r8316 or rdx,r9317 and eax,0x3ffffff318 and edx,0x3ffffff319 mov DWORD[((-16))+rdi],eax320 lea eax,[rax*4+rax]321 mov DWORD[((-12))+rdi],edx322 lea edx,[rdx*4+rdx]323 mov DWORD[rdi],eax324 mov r8,rbx325 mov DWORD[4+rdi],edx326 mov r9,r12327 328 mov eax,0x3ffffff329 mov edx,0x3ffffff330 shr r8,14331 shr r9,14332 and eax,r8d333 and edx,r9d334 mov DWORD[16+rdi],eax335 lea eax,[rax*4+rax]336 mov DWORD[20+rdi],edx337 lea edx,[rdx*4+rdx]338 mov DWORD[32+rdi],eax339 shr r8,26340 mov DWORD[36+rdi],edx341 shr r9,26342 343 mov rax,rbp344 shl rax,24345 or r8,rax346 mov DWORD[48+rdi],r8d347 lea r8,[r8*4+r8]348 mov DWORD[52+rdi],r9d349 lea r9,[r9*4+r9]350 mov DWORD[64+rdi],r8d351 mov DWORD[68+rdi],r9d352 353 mov rax,r12354 call __poly1305_block355 356 mov eax,0x3ffffff357 mov r8,r14358 and eax,r14d359 shr r8,26360 mov DWORD[((-52))+rdi],eax361 362 mov edx,0x3ffffff363 and edx,r8d364 mov DWORD[((-36))+rdi],edx365 lea edx,[rdx*4+rdx]366 shr r8,26367 mov DWORD[((-20))+rdi],edx368 369 mov rax,rbx370 shl rax,12371 or rax,r8372 and eax,0x3ffffff373 mov DWORD[((-4))+rdi],eax374 lea eax,[rax*4+rax]375 mov r8,rbx376 mov DWORD[12+rdi],eax377 378 mov edx,0x3ffffff379 shr r8,14380 and edx,r8d381 mov DWORD[28+rdi],edx382 lea edx,[rdx*4+rdx]383 shr r8,26384 mov DWORD[44+rdi],edx385 386 mov rax,rbp387 shl rax,24388 or r8,rax389 mov DWORD[60+rdi],r8d390 lea r8,[r8*4+r8]391 mov DWORD[76+rdi],r8d392 393 mov rax,r12394 call __poly1305_block395 396 mov eax,0x3ffffff397 mov r8,r14398 and eax,r14d399 shr r8,26400 mov DWORD[((-56))+rdi],eax401 402 mov edx,0x3ffffff403 and edx,r8d404 mov DWORD[((-40))+rdi],edx405 lea edx,[rdx*4+rdx]406 shr r8,26407 mov DWORD[((-24))+rdi],edx408 409 mov rax,rbx410 shl rax,12411 or rax,r8412 and eax,0x3ffffff413 mov DWORD[((-8))+rdi],eax414 lea eax,[rax*4+rax]415 mov r8,rbx416 mov DWORD[8+rdi],eax417 418 mov edx,0x3ffffff419 shr r8,14420 and edx,r8d421 mov DWORD[24+rdi],edx422 lea edx,[rdx*4+rdx]423 shr r8,26424 mov DWORD[40+rdi],edx425 426 mov rax,rbp427 shl rax,24428 or r8,rax429 mov DWORD[56+rdi],r8d430 lea r8,[r8*4+r8]431 mov DWORD[72+rdi],r8d432 433 lea rdi,[((-48-64))+rdi]434 DB 0F3h,0C3h ;repret435 436 437 438 439 ALIGN 32440 poly1305_blocks_avx:441 mov QWORD[8+rsp],rdi ;WIN64 prologue442 mov QWORD[16+rsp],rsi443 mov rax,rsp444 $L$SEH_begin_poly1305_blocks_avx:445 mov rdi,rcx446 mov rsi,rdx447 mov rdx,r8448 mov rcx,r9449 450 451 452 mov r8d,DWORD[20+rdi]453 cmp rdx,128454 jae NEAR $L$blocks_avx455 test r8d,r8d456 jz NEAR $L$blocks457 458 $L$blocks_avx:459 and rdx,-16460 jz NEAR $L$no_data_avx461 462 vzeroupper463 464 test r8d,r8d465 jz NEAR $L$base2_64_avx466 467 test rdx,31468 jz NEAR $L$even_avx469 470 push rbx471 472 push rbp473 474 push r12475 476 push r13477 478 push r14479 480 push r15481 482 $L$blocks_avx_body:483 484 mov r15,rdx485 486 mov r8,QWORD[rdi]487 mov r9,QWORD[8+rdi]488 mov ebp,DWORD[16+rdi]489 490 mov r11,QWORD[24+rdi]491 mov r13,QWORD[32+rdi]492 493 494 mov r14d,r8d495 and r8,-2147483648496 mov r12,r9497 mov ebx,r9d498 and r9,-2147483648499 500 shr r8,6501 shl r12,52502 add r14,r8503 shr rbx,12504 shr r9,18505 add r14,r12506 adc rbx,r9507 508 mov r8,rbp509 shl r8,40510 shr rbp,24511 add rbx,r8512 adc rbp,0513 514 mov r9,-4515 mov r8,rbp516 and r9,rbp517 shr r8,2518 and rbp,3519 add r8,r9520 add r14,r8521 adc rbx,0522 adc rbp,0523 524 mov r12,r13525 mov rax,r13526 shr r13,2527 add r13,r12528 529 add r14,QWORD[rsi]530 adc rbx,QWORD[8+rsi]531 lea rsi,[16+rsi]532 adc rbp,rcx533 534 call __poly1305_block535 536 test rcx,rcx537 jz NEAR $L$store_base2_64_avx538 539 540 mov rax,r14541 mov rdx,r14542 shr r14,52543 mov r11,rbx544 mov r12,rbx545 shr rdx,26546 and rax,0x3ffffff547 shl r11,12548 and rdx,0x3ffffff549 shr rbx,14550 or r14,r11551 shl rbp,24552 and r14,0x3ffffff553 shr r12,40554 and rbx,0x3ffffff555 or rbp,r12556 557 sub r15,16558 jz NEAR $L$store_base2_26_avx559 560 vmovd xmm0,eax561 vmovd xmm1,edx562 vmovd xmm2,r14d563 vmovd xmm3,ebx564 vmovd xmm4,ebp565 jmp NEAR $L$proceed_avx566 567 ALIGN 32568 $L$store_base2_64_avx:569 mov QWORD[rdi],r14570 mov QWORD[8+rdi],rbx571 mov QWORD[16+rdi],rbp572 jmp NEAR $L$done_avx573 574 ALIGN 16575 $L$store_base2_26_avx:576 mov DWORD[rdi],eax577 mov DWORD[4+rdi],edx578 mov DWORD[8+rdi],r14d579 mov DWORD[12+rdi],ebx580 mov DWORD[16+rdi],ebp581 ALIGN 16582 $L$done_avx:583 mov r15,QWORD[rsp]584 585 mov r14,QWORD[8+rsp]586 587 mov r13,QWORD[16+rsp]588 589 mov r12,QWORD[24+rsp]590 591 mov rbp,QWORD[32+rsp]592 593 mov rbx,QWORD[40+rsp]594 595 lea rsp,[48+rsp]596 597 $L$no_data_avx:598 $L$blocks_avx_epilogue:599 mov rdi,QWORD[8+rsp] ;WIN64 epilogue600 mov rsi,QWORD[16+rsp]601 DB 0F3h,0C3h ;repret602 603 604 ALIGN 32605 $L$base2_64_avx:606 607 push rbx608 609 push rbp610 611 push r12612 613 push r13614 615 push r14616 617 push r15618 619 $L$base2_64_avx_body:620 621 mov r15,rdx622 623 mov r11,QWORD[24+rdi]624 mov r13,QWORD[32+rdi]625 626 mov r14,QWORD[rdi]627 mov rbx,QWORD[8+rdi]628 mov ebp,DWORD[16+rdi]629 630 mov r12,r13631 mov rax,r13632 shr r13,2633 add r13,r12634 635 test rdx,31636 jz NEAR $L$init_avx637 638 add r14,QWORD[rsi]639 adc rbx,QWORD[8+rsi]640 lea rsi,[16+rsi]641 adc rbp,rcx642 sub r15,16643 644 call __poly1305_block645 646 $L$init_avx:647 648 mov rax,r14649 mov rdx,r14650 shr r14,52651 mov r8,rbx652 mov r9,rbx653 shr rdx,26654 and rax,0x3ffffff655 shl r8,12656 and rdx,0x3ffffff657 shr rbx,14658 or r14,r8659 shl rbp,24660 and r14,0x3ffffff661 shr r9,40662 and rbx,0x3ffffff663 or rbp,r9664 665 vmovd xmm0,eax666 vmovd xmm1,edx667 vmovd xmm2,r14d668 vmovd xmm3,ebx669 vmovd xmm4,ebp670 mov DWORD[20+rdi],1671 672 call __poly1305_init_avx673 674 $L$proceed_avx:675 mov rdx,r15676 677 mov r15,QWORD[rsp]678 679 mov r14,QWORD[8+rsp]680 681 mov r13,QWORD[16+rsp]682 683 mov r12,QWORD[24+rsp]684 685 mov rbp,QWORD[32+rsp]686 687 mov rbx,QWORD[40+rsp]688 689 lea rax,[48+rsp]690 lea rsp,[48+rsp]691 692 $L$base2_64_avx_epilogue:693 jmp NEAR $L$do_avx694 695 696 ALIGN 32697 $L$even_avx:698 699 vmovd xmm0,DWORD[rdi]700 vmovd xmm1,DWORD[4+rdi]701 vmovd xmm2,DWORD[8+rdi]702 vmovd xmm3,DWORD[12+rdi]703 vmovd xmm4,DWORD[16+rdi]704 705 $L$do_avx:706 lea r11,[((-248))+rsp]707 sub rsp,0x218708 vmovdqa XMMWORD[80+r11],xmm6709 vmovdqa XMMWORD[96+r11],xmm7710 vmovdqa XMMWORD[112+r11],xmm8711 vmovdqa XMMWORD[128+r11],xmm9712 vmovdqa XMMWORD[144+r11],xmm10713 vmovdqa XMMWORD[160+r11],xmm11714 vmovdqa XMMWORD[176+r11],xmm12715 vmovdqa XMMWORD[192+r11],xmm13716 vmovdqa XMMWORD[208+r11],xmm14717 vmovdqa XMMWORD[224+r11],xmm15718 $L$do_avx_body:719 sub rdx,64720 lea rax,[((-32))+rsi]721 cmovc rsi,rax722 723 vmovdqu xmm14,XMMWORD[48+rdi]724 lea rdi,[112+rdi]725 lea rcx,[$L$const]726 727 728 729 vmovdqu xmm5,XMMWORD[32+rsi]730 vmovdqu xmm6,XMMWORD[48+rsi]731 vmovdqa xmm15,XMMWORD[64+rcx]732 733 vpsrldq xmm7,xmm5,6734 vpsrldq xmm8,xmm6,6735 vpunpckhqdq xmm9,xmm5,xmm6736 vpunpcklqdq xmm5,xmm5,xmm6737 vpunpcklqdq xmm8,xmm7,xmm8738 739 vpsrlq xmm9,xmm9,40740 vpsrlq xmm6,xmm5,26741 vpand xmm5,xmm5,xmm15742 vpsrlq xmm7,xmm8,4743 vpand xmm6,xmm6,xmm15744 vpsrlq xmm8,xmm8,30745 vpand xmm7,xmm7,xmm15746 vpand xmm8,xmm8,xmm15747 vpor xmm9,xmm9,XMMWORD[32+rcx]748 749 jbe NEAR $L$skip_loop_avx750 751 752 vmovdqu xmm11,XMMWORD[((-48))+rdi]753 vmovdqu xmm12,XMMWORD[((-32))+rdi]754 vpshufd xmm13,xmm14,0xEE755 vpshufd xmm10,xmm14,0x44756 vmovdqa XMMWORD[(-144)+r11],xmm13757 vmovdqa XMMWORD[rsp],xmm10758 vpshufd xmm14,xmm11,0xEE759 vmovdqu xmm10,XMMWORD[((-16))+rdi]760 vpshufd xmm11,xmm11,0x44761 vmovdqa XMMWORD[(-128)+r11],xmm14762 vmovdqa XMMWORD[16+rsp],xmm11763 vpshufd xmm13,xmm12,0xEE764 vmovdqu xmm11,XMMWORD[rdi]765 vpshufd xmm12,xmm12,0x44766 vmovdqa XMMWORD[(-112)+r11],xmm13767 vmovdqa XMMWORD[32+rsp],xmm12768 vpshufd xmm14,xmm10,0xEE769 vmovdqu xmm12,XMMWORD[16+rdi]770 vpshufd xmm10,xmm10,0x44771 vmovdqa XMMWORD[(-96)+r11],xmm14772 vmovdqa XMMWORD[48+rsp],xmm10773 vpshufd xmm13,xmm11,0xEE774 vmovdqu xmm10,XMMWORD[32+rdi]775 vpshufd xmm11,xmm11,0x44776 vmovdqa XMMWORD[(-80)+r11],xmm13777 vmovdqa XMMWORD[64+rsp],xmm11778 vpshufd xmm14,xmm12,0xEE779 vmovdqu xmm11,XMMWORD[48+rdi]780 vpshufd xmm12,xmm12,0x44781 vmovdqa XMMWORD[(-64)+r11],xmm14782 vmovdqa XMMWORD[80+rsp],xmm12783 vpshufd xmm13,xmm10,0xEE784 vmovdqu xmm12,XMMWORD[64+rdi]785 vpshufd xmm10,xmm10,0x44786 vmovdqa XMMWORD[(-48)+r11],xmm13787 vmovdqa XMMWORD[96+rsp],xmm10788 vpshufd xmm14,xmm11,0xEE789 vpshufd xmm11,xmm11,0x44790 vmovdqa XMMWORD[(-32)+r11],xmm14791 vmovdqa XMMWORD[112+rsp],xmm11792 vpshufd xmm13,xmm12,0xEE793 vmovdqa xmm14,XMMWORD[rsp]794 vpshufd xmm12,xmm12,0x44795 vmovdqa XMMWORD[(-16)+r11],xmm13796 vmovdqa XMMWORD[128+rsp],xmm12797 798 jmp NEAR $L$oop_avx799 800 ALIGN 32801 $L$oop_avx:802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 vpmuludq xmm10,xmm14,xmm5823 vpmuludq xmm11,xmm14,xmm6824 vmovdqa XMMWORD[32+r11],xmm2825 vpmuludq xmm12,xmm14,xmm7826 vmovdqa xmm2,XMMWORD[16+rsp]827 vpmuludq xmm13,xmm14,xmm8828 vpmuludq xmm14,xmm14,xmm9829 830 vmovdqa XMMWORD[r11],xmm0831 vpmuludq xmm0,xmm9,XMMWORD[32+rsp]832 vmovdqa XMMWORD[16+r11],xmm1833 vpmuludq xmm1,xmm2,xmm8834 vpaddq xmm10,xmm10,xmm0835 vpaddq xmm14,xmm14,xmm1836 vmovdqa XMMWORD[48+r11],xmm3837 vpmuludq xmm0,xmm2,xmm7838 vpmuludq xmm1,xmm2,xmm6839 vpaddq xmm13,xmm13,xmm0840 vmovdqa xmm3,XMMWORD[48+rsp]841 vpaddq xmm12,xmm12,xmm1842 vmovdqa XMMWORD[64+r11],xmm4843 vpmuludq xmm2,xmm2,xmm5844 vpmuludq xmm0,xmm3,xmm7845 vpaddq xmm11,xmm11,xmm2846 847 vmovdqa xmm4,XMMWORD[64+rsp]848 vpaddq xmm14,xmm14,xmm0849 vpmuludq xmm1,xmm3,xmm6850 vpmuludq xmm3,xmm3,xmm5851 vpaddq xmm13,xmm13,xmm1852 vmovdqa xmm2,XMMWORD[80+rsp]853 vpaddq xmm12,xmm12,xmm3854 vpmuludq xmm0,xmm4,xmm9855 vpmuludq xmm4,xmm4,xmm8856 vpaddq xmm11,xmm11,xmm0857 vmovdqa xmm3,XMMWORD[96+rsp]858 vpaddq xmm10,xmm10,xmm4859 860 vmovdqa xmm4,XMMWORD[128+rsp]861 vpmuludq xmm1,xmm2,xmm6862 vpmuludq xmm2,xmm2,xmm5863 vpaddq xmm14,xmm14,xmm1864 vpaddq xmm13,xmm13,xmm2865 vpmuludq xmm0,xmm3,xmm9866 vpmuludq xmm1,xmm3,xmm8867 vpaddq xmm12,xmm12,xmm0868 vmovdqu xmm0,XMMWORD[rsi]869 vpaddq xmm11,xmm11,xmm1870 vpmuludq xmm3,xmm3,xmm7871 vpmuludq xmm7,xmm4,xmm7872 vpaddq xmm10,xmm10,xmm3873 874 vmovdqu xmm1,XMMWORD[16+rsi]875 vpaddq xmm11,xmm11,xmm7876 vpmuludq xmm8,xmm4,xmm8877 vpmuludq xmm9,xmm4,xmm9878 vpsrldq xmm2,xmm0,6879 vpaddq xmm12,xmm12,xmm8880 vpaddq xmm13,xmm13,xmm9881 vpsrldq xmm3,xmm1,6882 vpmuludq xmm9,xmm5,XMMWORD[112+rsp]883 vpmuludq xmm5,xmm4,xmm6884 vpunpckhqdq xmm4,xmm0,xmm1885 vpaddq xmm14,xmm14,xmm9886 vmovdqa xmm9,XMMWORD[((-144))+r11]887 vpaddq xmm10,xmm10,xmm5888 889 vpunpcklqdq xmm0,xmm0,xmm1890 vpunpcklqdq xmm3,xmm2,xmm3891 892 893 vpsrldq xmm4,xmm4,5894 vpsrlq xmm1,xmm0,26895 vpand xmm0,xmm0,xmm15896 vpsrlq xmm2,xmm3,4897 vpand xmm1,xmm1,xmm15898 vpand xmm4,xmm4,XMMWORD[rcx]899 vpsrlq xmm3,xmm3,30900 vpand xmm2,xmm2,xmm15901 vpand xmm3,xmm3,xmm15902 vpor xmm4,xmm4,XMMWORD[32+rcx]903 904 vpaddq xmm0,xmm0,XMMWORD[r11]905 vpaddq xmm1,xmm1,XMMWORD[16+r11]906 vpaddq xmm2,xmm2,XMMWORD[32+r11]907 vpaddq xmm3,xmm3,XMMWORD[48+r11]908 vpaddq xmm4,xmm4,XMMWORD[64+r11]909 910 lea rax,[32+rsi]911 lea rsi,[64+rsi]912 sub rdx,64913 cmovc rsi,rax914 915 916 917 918 919 920 921 922 923 924 vpmuludq xmm5,xmm9,xmm0925 vpmuludq xmm6,xmm9,xmm1926 vpaddq xmm10,xmm10,xmm5927 vpaddq xmm11,xmm11,xmm6928 vmovdqa xmm7,XMMWORD[((-128))+r11]929 vpmuludq xmm5,xmm9,xmm2930 vpmuludq xmm6,xmm9,xmm3931 vpaddq xmm12,xmm12,xmm5932 vpaddq xmm13,xmm13,xmm6933 vpmuludq xmm9,xmm9,xmm4934 vpmuludq xmm5,xmm4,XMMWORD[((-112))+r11]935 vpaddq xmm14,xmm14,xmm9936 937 vpaddq xmm10,xmm10,xmm5938 vpmuludq xmm6,xmm7,xmm2939 vpmuludq xmm5,xmm7,xmm3940 vpaddq xmm13,xmm13,xmm6941 vmovdqa xmm8,XMMWORD[((-96))+r11]942 vpaddq xmm14,xmm14,xmm5943 vpmuludq xmm6,xmm7,xmm1944 vpmuludq xmm7,xmm7,xmm0945 vpaddq xmm12,xmm12,xmm6946 vpaddq xmm11,xmm11,xmm7947 948 vmovdqa xmm9,XMMWORD[((-80))+r11]949 vpmuludq xmm5,xmm8,xmm2950 vpmuludq xmm6,xmm8,xmm1951 vpaddq xmm14,xmm14,xmm5952 vpaddq xmm13,xmm13,xmm6953 vmovdqa xmm7,XMMWORD[((-64))+r11]954 vpmuludq xmm8,xmm8,xmm0955 vpmuludq xmm5,xmm9,xmm4956 vpaddq xmm12,xmm12,xmm8957 vpaddq xmm11,xmm11,xmm5958 vmovdqa xmm8,XMMWORD[((-48))+r11]959 vpmuludq xmm9,xmm9,xmm3960 vpmuludq xmm6,xmm7,xmm1961 vpaddq xmm10,xmm10,xmm9962 963 vmovdqa xmm9,XMMWORD[((-16))+r11]964 vpaddq xmm14,xmm14,xmm6965 vpmuludq xmm7,xmm7,xmm0966 vpmuludq xmm5,xmm8,xmm4967 vpaddq xmm13,xmm13,xmm7968 vpaddq xmm12,xmm12,xmm5969 vmovdqu xmm5,XMMWORD[32+rsi]970 vpmuludq xmm7,xmm8,xmm3971 vpmuludq xmm8,xmm8,xmm2972 vpaddq xmm11,xmm11,xmm7973 vmovdqu xmm6,XMMWORD[48+rsi]974 vpaddq xmm10,xmm10,xmm8975 976 vpmuludq xmm2,xmm9,xmm2977 vpmuludq xmm3,xmm9,xmm3978 vpsrldq xmm7,xmm5,6979 vpaddq xmm11,xmm11,xmm2980 vpmuludq xmm4,xmm9,xmm4981 vpsrldq xmm8,xmm6,6982 vpaddq xmm2,xmm12,xmm3983 vpaddq xmm3,xmm13,xmm4984 vpmuludq xmm4,xmm0,XMMWORD[((-32))+r11]985 vpmuludq xmm0,xmm9,xmm1986 vpunpckhqdq xmm9,xmm5,xmm6987 vpaddq xmm4,xmm14,xmm4988 vpaddq xmm0,xmm10,xmm0989 990 vpunpcklqdq xmm5,xmm5,xmm6991 vpunpcklqdq xmm8,xmm7,xmm8992 993 994 vpsrldq xmm9,xmm9,5995 vpsrlq xmm6,xmm5,26996 vmovdqa xmm14,XMMWORD[rsp]997 vpand xmm5,xmm5,xmm15998 vpsrlq xmm7,xmm8,4999 vpand xmm6,xmm6,xmm151000 vpand xmm9,xmm9,XMMWORD[rcx]1001 vpsrlq xmm8,xmm8,301002 vpand xmm7,xmm7,xmm151003 vpand xmm8,xmm8,xmm151004 vpor xmm9,xmm9,XMMWORD[32+rcx]1005 1006 1007 1008 1009 1010 vpsrlq xmm13,xmm3,261011 vpand xmm3,xmm3,xmm151012 vpaddq xmm4,xmm4,xmm131013 1014 vpsrlq xmm10,xmm0,261015 vpand xmm0,xmm0,xmm151016 vpaddq xmm1,xmm11,xmm101017 1018 vpsrlq xmm10,xmm4,261019 vpand xmm4,xmm4,xmm151020 1021 vpsrlq xmm11,xmm1,261022 vpand xmm1,xmm1,xmm151023 vpaddq xmm2,xmm2,xmm111024 1025 vpaddq xmm0,xmm0,xmm101026 vpsllq xmm10,xmm10,21027 vpaddq xmm0,xmm0,xmm101028 1029 vpsrlq xmm12,xmm2,261030 vpand xmm2,xmm2,xmm151031 vpaddq xmm3,xmm3,xmm121032 1033 vpsrlq xmm10,xmm0,261034 vpand xmm0,xmm0,xmm151035 vpaddq xmm1,xmm1,xmm101036 1037 vpsrlq xmm13,xmm3,261038 vpand xmm3,xmm3,xmm151039 vpaddq xmm4,xmm4,xmm131040 1041 ja NEAR $L$oop_avx1042 1043 $L$skip_loop_avx:1044 1045 1046 1047 vpshufd xmm14,xmm14,0x101048 add rdx,321049 jnz NEAR $L$ong_tail_avx1050 1051 vpaddq xmm7,xmm7,xmm21052 vpaddq xmm5,xmm5,xmm01053 vpaddq xmm6,xmm6,xmm11054 vpaddq xmm8,xmm8,xmm31055 vpaddq xmm9,xmm9,xmm41056 1057 $L$ong_tail_avx:1058 vmovdqa XMMWORD[32+r11],xmm21059 vmovdqa XMMWORD[r11],xmm01060 vmovdqa XMMWORD[16+r11],xmm11061 vmovdqa XMMWORD[48+r11],xmm31062 vmovdqa XMMWORD[64+r11],xmm41063 1064 1065 1066 1067 1068 1069 1070 vpmuludq xmm12,xmm14,xmm71071 vpmuludq xmm10,xmm14,xmm51072 vpshufd xmm2,XMMWORD[((-48))+rdi],0x101073 vpmuludq xmm11,xmm14,xmm61074 vpmuludq xmm13,xmm14,xmm81075 vpmuludq xmm14,xmm14,xmm91076 1077 vpmuludq xmm0,xmm2,xmm81078 vpaddq xmm14,xmm14,xmm01079 vpshufd xmm3,XMMWORD[((-32))+rdi],0x101080 vpmuludq xmm1,xmm2,xmm71081 vpaddq xmm13,xmm13,xmm11082 vpshufd xmm4,XMMWORD[((-16))+rdi],0x101083 vpmuludq xmm0,xmm2,xmm61084 vpaddq xmm12,xmm12,xmm01085 vpmuludq xmm2,xmm2,xmm51086 vpaddq xmm11,xmm11,xmm21087 vpmuludq xmm3,xmm3,xmm91088 vpaddq xmm10,xmm10,xmm31089 1090 vpshufd xmm2,XMMWORD[rdi],0x101091 vpmuludq xmm1,xmm4,xmm71092 vpaddq xmm14,xmm14,xmm11093 vpmuludq xmm0,xmm4,xmm61094 vpaddq xmm13,xmm13,xmm01095 vpshufd xmm3,XMMWORD[16+rdi],0x101096 vpmuludq xmm4,xmm4,xmm51097 vpaddq xmm12,xmm12,xmm41098 vpmuludq xmm1,xmm2,xmm91099 vpaddq xmm11,xmm11,xmm11100 vpshufd xmm4,XMMWORD[32+rdi],0x101101 vpmuludq xmm2,xmm2,xmm81102 vpaddq xmm10,xmm10,xmm21103 1104 vpmuludq xmm0,xmm3,xmm61105 vpaddq xmm14,xmm14,xmm01106 vpmuludq xmm3,xmm3,xmm51107 vpaddq xmm13,xmm13,xmm31108 vpshufd xmm2,XMMWORD[48+rdi],0x101109 vpmuludq xmm1,xmm4,xmm91110 vpaddq xmm12,xmm12,xmm11111 vpshufd xmm3,XMMWORD[64+rdi],0x101112 vpmuludq xmm0,xmm4,xmm81113 vpaddq xmm11,xmm11,xmm01114 vpmuludq xmm4,xmm4,xmm71115 vpaddq xmm10,xmm10,xmm41116 1117 vpmuludq xmm2,xmm2,xmm51118 vpaddq xmm14,xmm14,xmm21119 vpmuludq xmm1,xmm3,xmm91120 vpaddq xmm13,xmm13,xmm11121 vpmuludq xmm0,xmm3,xmm81122 vpaddq xmm12,xmm12,xmm01123 vpmuludq xmm1,xmm3,xmm71124 vpaddq xmm11,xmm11,xmm11125 vpmuludq xmm3,xmm3,xmm61126 vpaddq xmm10,xmm10,xmm31127 1128 jz NEAR $L$short_tail_avx1129 1130 vmovdqu xmm0,XMMWORD[rsi]1131 vmovdqu xmm1,XMMWORD[16+rsi]1132 1133 vpsrldq xmm2,xmm0,61134 vpsrldq xmm3,xmm1,61135 vpunpckhqdq xmm4,xmm0,xmm11136 vpunpcklqdq xmm0,xmm0,xmm11137 vpunpcklqdq xmm3,xmm2,xmm31138 1139 vpsrlq xmm4,xmm4,401140 vpsrlq xmm1,xmm0,261141 vpand xmm0,xmm0,xmm151142 vpsrlq xmm2,xmm3,41143 vpand xmm1,xmm1,xmm151144 vpsrlq xmm3,xmm3,301145 vpand xmm2,xmm2,xmm151146 vpand xmm3,xmm3,xmm151147 vpor xmm4,xmm4,XMMWORD[32+rcx]1148 1149 vpshufd xmm9,XMMWORD[((-64))+rdi],0x321150 vpaddq xmm0,xmm0,XMMWORD[r11]1151 vpaddq xmm1,xmm1,XMMWORD[16+r11]1152 vpaddq xmm2,xmm2,XMMWORD[32+r11]1153 vpaddq xmm3,xmm3,XMMWORD[48+r11]1154 vpaddq xmm4,xmm4,XMMWORD[64+r11]1155 1156 1157 1158 1159 vpmuludq xmm5,xmm9,xmm01160 vpaddq xmm10,xmm10,xmm51161 vpmuludq xmm6,xmm9,xmm11162 vpaddq xmm11,xmm11,xmm61163 vpmuludq xmm5,xmm9,xmm21164 vpaddq xmm12,xmm12,xmm51165 vpshufd xmm7,XMMWORD[((-48))+rdi],0x321166 vpmuludq xmm6,xmm9,xmm31167 vpaddq xmm13,xmm13,xmm61168 vpmuludq xmm9,xmm9,xmm41169 vpaddq xmm14,xmm14,xmm91170 1171 vpmuludq xmm5,xmm7,xmm31172 vpaddq xmm14,xmm14,xmm51173 vpshufd xmm8,XMMWORD[((-32))+rdi],0x321174 vpmuludq xmm6,xmm7,xmm21175 vpaddq xmm13,xmm13,xmm61176 vpshufd xmm9,XMMWORD[((-16))+rdi],0x321177 vpmuludq xmm5,xmm7,xmm11178 vpaddq xmm12,xmm12,xmm51179 vpmuludq xmm7,xmm7,xmm01180 vpaddq xmm11,xmm11,xmm71181 vpmuludq xmm8,xmm8,xmm41182 vpaddq xmm10,xmm10,xmm81183 1184 vpshufd xmm7,XMMWORD[rdi],0x321185 vpmuludq xmm6,xmm9,xmm21186 vpaddq xmm14,xmm14,xmm61187 vpmuludq xmm5,xmm9,xmm11188 vpaddq xmm13,xmm13,xmm51189 vpshufd xmm8,XMMWORD[16+rdi],0x321190 vpmuludq xmm9,xmm9,xmm01191 vpaddq xmm12,xmm12,xmm91192 vpmuludq xmm6,xmm7,xmm41193 vpaddq xmm11,xmm11,xmm61194 vpshufd xmm9,XMMWORD[32+rdi],0x321195 vpmuludq xmm7,xmm7,xmm31196 vpaddq xmm10,xmm10,xmm71197 1198 vpmuludq xmm5,xmm8,xmm11199 vpaddq xmm14,xmm14,xmm51200 vpmuludq xmm8,xmm8,xmm01201 vpaddq xmm13,xmm13,xmm81202 vpshufd xmm7,XMMWORD[48+rdi],0x321203 vpmuludq xmm6,xmm9,xmm41204 vpaddq xmm12,xmm12,xmm61205 vpshufd xmm8,XMMWORD[64+rdi],0x321206 vpmuludq xmm5,xmm9,xmm31207 vpaddq xmm11,xmm11,xmm51208 vpmuludq xmm9,xmm9,xmm21209 vpaddq xmm10,xmm10,xmm91210 1211 vpmuludq xmm7,xmm7,xmm01212 vpaddq xmm14,xmm14,xmm71213 vpmuludq xmm6,xmm8,xmm41214 vpaddq xmm13,xmm13,xmm61215 vpmuludq xmm5,xmm8,xmm31216 vpaddq xmm12,xmm12,xmm51217 vpmuludq xmm6,xmm8,xmm21218 vpaddq xmm11,xmm11,xmm61219 vpmuludq xmm8,xmm8,xmm11220 vpaddq xmm10,xmm10,xmm81221 1222 $L$short_tail_avx:1223 1224 1225 1226 vpsrldq xmm9,xmm14,81227 vpsrldq xmm8,xmm13,81228 vpsrldq xmm6,xmm11,81229 vpsrldq xmm5,xmm10,81230 vpsrldq xmm7,xmm12,81231 vpaddq xmm13,xmm13,xmm81232 vpaddq xmm14,xmm14,xmm91233 vpaddq xmm10,xmm10,xmm51234 vpaddq xmm11,xmm11,xmm61235 vpaddq xmm12,xmm12,xmm71236 1237 1238 1239 1240 vpsrlq xmm3,xmm13,261241 vpand xmm13,xmm13,xmm151242 vpaddq xmm14,xmm14,xmm31243 1244 vpsrlq xmm0,xmm10,261245 vpand xmm10,xmm10,xmm151246 vpaddq xmm11,xmm11,xmm01247 1248 vpsrlq xmm4,xmm14,261249 vpand xmm14,xmm14,xmm151250 1251 vpsrlq xmm1,xmm11,261252 vpand xmm11,xmm11,xmm151253 vpaddq xmm12,xmm12,xmm11254 1255 vpaddq xmm10,xmm10,xmm41256 vpsllq xmm4,xmm4,21257 vpaddq xmm10,xmm10,xmm41258 1259 vpsrlq xmm2,xmm12,261260 vpand xmm12,xmm12,xmm151261 vpaddq xmm13,xmm13,xmm21262 1263 vpsrlq xmm0,xmm10,261264 vpand xmm10,xmm10,xmm151265 vpaddq xmm11,xmm11,xmm01266 1267 vpsrlq xmm3,xmm13,261268 vpand xmm13,xmm13,xmm151269 vpaddq xmm14,xmm14,xmm31270 1271 vmovd DWORD[(-112)+rdi],xmm101272 vmovd DWORD[(-108)+rdi],xmm111273 vmovd DWORD[(-104)+rdi],xmm121274 vmovd DWORD[(-100)+rdi],xmm131275 vmovd DWORD[(-96)+rdi],xmm141276 vmovdqa xmm6,XMMWORD[80+r11]1277 vmovdqa xmm7,XMMWORD[96+r11]1278 vmovdqa xmm8,XMMWORD[112+r11]1279 vmovdqa xmm9,XMMWORD[128+r11]1280 vmovdqa xmm10,XMMWORD[144+r11]1281 vmovdqa xmm11,XMMWORD[160+r11]1282 vmovdqa xmm12,XMMWORD[176+r11]1283 vmovdqa xmm13,XMMWORD[192+r11]1284 vmovdqa xmm14,XMMWORD[208+r11]1285 vmovdqa xmm15,XMMWORD[224+r11]1286 lea rsp,[248+r11]1287 $L$do_avx_epilogue:1288 vzeroupper1289 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1290 mov rsi,QWORD[16+rsp]1291 DB 0F3h,0C3h ;repret1292 1293 $L$SEH_end_poly1305_blocks_avx:1294 1295 1296 ALIGN 321297 poly1305_emit_avx:1298 mov QWORD[8+rsp],rdi ;WIN64 prologue1299 mov QWORD[16+rsp],rsi1300 mov rax,rsp1301 $L$SEH_begin_poly1305_emit_avx:1302 mov rdi,rcx1303 mov rsi,rdx1304 mov rdx,r81305 1306 1307 1308 cmp DWORD[20+rdi],01309 je NEAR $L$emit1310 1311 mov eax,DWORD[rdi]1312 mov ecx,DWORD[4+rdi]1313 mov r8d,DWORD[8+rdi]1314 mov r11d,DWORD[12+rdi]1315 mov r10d,DWORD[16+rdi]1316 1317 shl rcx,261318 mov r9,r81319 shl r8,521320 add rax,rcx1321 shr r9,121322 add r8,rax1323 adc r9,01324 1325 shl r11,141326 mov rax,r101327 shr r10,241328 add r9,r111329 shl rax,401330 add r9,rax1331 adc r10,01332 1333 mov rax,r101334 mov rcx,r101335 and r10,31336 shr rax,21337 and rcx,-41338 add rax,rcx1339 add r8,rax1340 adc r9,01341 adc r10,01342 1343 mov rax,r81344 add r8,51345 mov rcx,r91346 adc r9,01347 adc r10,01348 shr r10,21349 cmovnz rax,r81350 cmovnz rcx,r91351 1352 add rax,QWORD[rdx]1353 adc rcx,QWORD[8+rdx]1354 mov QWORD[rsi],rax1355 mov QWORD[8+rsi],rcx1356 1357 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1358 mov rsi,QWORD[16+rsp]1359 DB 0F3h,0C3h ;repret1360 1361 $L$SEH_end_poly1305_emit_avx:1362 1363 ALIGN 321364 poly1305_blocks_avx2:1365 mov QWORD[8+rsp],rdi ;WIN64 prologue1366 mov QWORD[16+rsp],rsi1367 mov rax,rsp1368 $L$SEH_begin_poly1305_blocks_avx2:1369 mov rdi,rcx1370 mov rsi,rdx1371 mov rdx,r81372 mov rcx,r91373 1374 1375 1376 mov r8d,DWORD[20+rdi]1377 cmp rdx,1281378 jae NEAR $L$blocks_avx21379 test r8d,r8d1380 jz NEAR $L$blocks1381 1382 $L$blocks_avx2:1383 and rdx,-161384 jz NEAR $L$no_data_avx21385 1386 vzeroupper1387 1388 test r8d,r8d1389 jz NEAR $L$base2_64_avx21390 1391 test rdx,631392 jz NEAR $L$even_avx21393 1394 push rbx1395 1396 push rbp1397 1398 push r121399 1400 push r131401 1402 push r141403 1404 push r151405 1406 $L$blocks_avx2_body:1407 1408 mov r15,rdx1409 1410 mov r8,QWORD[rdi]1411 mov r9,QWORD[8+rdi]1412 mov ebp,DWORD[16+rdi]1413 1414 mov r11,QWORD[24+rdi]1415 mov r13,QWORD[32+rdi]1416 1417 1418 mov r14d,r8d1419 and r8,-21474836481420 mov r12,r91421 mov ebx,r9d1422 and r9,-21474836481423 1424 shr r8,61425 shl r12,521426 add r14,r81427 shr rbx,121428 shr r9,181429 add r14,r121430 adc rbx,r91431 1432 mov r8,rbp1433 shl r8,401434 shr rbp,241435 add rbx,r81436 adc rbp,01437 1438 mov r9,-41439 mov r8,rbp1440 and r9,rbp1441 shr r8,21442 and rbp,31443 add r8,r91444 add r14,r81445 adc rbx,01446 adc rbp,01447 1448 mov r12,r131449 mov rax,r131450 shr r13,21451 add r13,r121452 1453 $L$base2_26_pre_avx2:1454 add r14,QWORD[rsi]1455 adc rbx,QWORD[8+rsi]1456 lea rsi,[16+rsi]1457 adc rbp,rcx1458 sub r15,161459 1460 call __poly1305_block1461 mov rax,r121462 1463 test r15,631464 jnz NEAR $L$base2_26_pre_avx21465 1466 test rcx,rcx1467 jz NEAR $L$store_base2_64_avx21468 1469 1470 mov rax,r141471 mov rdx,r141472 shr r14,521473 mov r11,rbx1474 mov r12,rbx1475 shr rdx,261476 and rax,0x3ffffff1477 shl r11,121478 and rdx,0x3ffffff1479 shr rbx,141480 or r14,r111481 shl rbp,241482 and r14,0x3ffffff1483 shr r12,401484 and rbx,0x3ffffff1485 or rbp,r121486 1487 test r15,r151488 jz NEAR $L$store_base2_26_avx21489 1490 vmovd xmm0,eax1491 vmovd xmm1,edx1492 vmovd xmm2,r14d1493 vmovd xmm3,ebx1494 vmovd xmm4,ebp1495 jmp NEAR $L$proceed_avx21496 1497 ALIGN 321498 $L$store_base2_64_avx2:1499 mov QWORD[rdi],r141500 mov QWORD[8+rdi],rbx1501 mov QWORD[16+rdi],rbp1502 jmp NEAR $L$done_avx21503 1504 ALIGN 161505 $L$store_base2_26_avx2:1506 mov DWORD[rdi],eax1507 mov DWORD[4+rdi],edx1508 mov DWORD[8+rdi],r14d1509 mov DWORD[12+rdi],ebx1510 mov DWORD[16+rdi],ebp1511 ALIGN 161512 $L$done_avx2:1513 mov r15,QWORD[rsp]1514 1515 mov r14,QWORD[8+rsp]1516 1517 mov r13,QWORD[16+rsp]1518 1519 mov r12,QWORD[24+rsp]1520 1521 mov rbp,QWORD[32+rsp]1522 1523 mov rbx,QWORD[40+rsp]1524 1525 lea rsp,[48+rsp]1526 1527 $L$no_data_avx2:1528 $L$blocks_avx2_epilogue:1529 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1530 mov rsi,QWORD[16+rsp]1531 DB 0F3h,0C3h ;repret1532 1533 1534 ALIGN 321535 $L$base2_64_avx2:1536 1537 push rbx1538 1539 push rbp1540 1541 push r121542 1543 push r131544 1545 push r141546 1547 push r151548 1549 $L$base2_64_avx2_body:1550 1551 mov r15,rdx1552 1553 mov r11,QWORD[24+rdi]1554 mov r13,QWORD[32+rdi]1555 1556 mov r14,QWORD[rdi]1557 mov rbx,QWORD[8+rdi]1558 mov ebp,DWORD[16+rdi]1559 1560 mov r12,r131561 mov rax,r131562 shr r13,21563 add r13,r121564 1565 test rdx,631566 jz NEAR $L$init_avx21567 1568 $L$base2_64_pre_avx2:1569 add r14,QWORD[rsi]1570 adc rbx,QWORD[8+rsi]1571 lea rsi,[16+rsi]1572 adc rbp,rcx1573 sub r15,161574 1575 call __poly1305_block1576 mov rax,r121577 1578 test r15,631579 jnz NEAR $L$base2_64_pre_avx21580 1581 $L$init_avx2:1582 1583 mov rax,r141584 mov rdx,r141585 shr r14,521586 mov r8,rbx1587 mov r9,rbx1588 shr rdx,261589 and rax,0x3ffffff1590 shl r8,121591 and rdx,0x3ffffff1592 shr rbx,141593 or r14,r81594 shl rbp,241595 and r14,0x3ffffff1596 shr r9,401597 and rbx,0x3ffffff1598 or rbp,r91599 1600 vmovd xmm0,eax1601 vmovd xmm1,edx1602 vmovd xmm2,r14d1603 vmovd xmm3,ebx1604 vmovd xmm4,ebp1605 mov DWORD[20+rdi],11606 1607 call __poly1305_init_avx1608 1609 $L$proceed_avx2:1610 mov rdx,r151611 mov r10d,DWORD[((OPENSSL_ia32cap_P+8))]1612 mov r11d,32212910081613 1614 mov r15,QWORD[rsp]1615 1616 mov r14,QWORD[8+rsp]1617 1618 mov r13,QWORD[16+rsp]1619 1620 mov r12,QWORD[24+rsp]1621 1622 mov rbp,QWORD[32+rsp]1623 1624 mov rbx,QWORD[40+rsp]1625 1626 lea rax,[48+rsp]1627 lea rsp,[48+rsp]1628 1629 $L$base2_64_avx2_epilogue:1630 jmp NEAR $L$do_avx21631 1632 1633 ALIGN 321634 $L$even_avx2:1635 1636 mov r10d,DWORD[((OPENSSL_ia32cap_P+8))]1637 vmovd xmm0,DWORD[rdi]1638 vmovd xmm1,DWORD[4+rdi]1639 vmovd xmm2,DWORD[8+rdi]1640 vmovd xmm3,DWORD[12+rdi]1641 vmovd xmm4,DWORD[16+rdi]1642 1643 $L$do_avx2:1644 cmp rdx,5121645 jb NEAR $L$skip_avx5121646 and r10d,r11d1647 test r10d,655361648 jnz NEAR $L$blocks_avx5121649 $L$skip_avx512:1650 lea r11,[((-248))+rsp]1651 sub rsp,0x1c81652 vmovdqa XMMWORD[80+r11],xmm61653 vmovdqa XMMWORD[96+r11],xmm71654 vmovdqa XMMWORD[112+r11],xmm81655 vmovdqa XMMWORD[128+r11],xmm91656 vmovdqa XMMWORD[144+r11],xmm101657 vmovdqa XMMWORD[160+r11],xmm111658 vmovdqa XMMWORD[176+r11],xmm121659 vmovdqa XMMWORD[192+r11],xmm131660 vmovdqa XMMWORD[208+r11],xmm141661 vmovdqa XMMWORD[224+r11],xmm151662 $L$do_avx2_body:1663 lea rcx,[$L$const]1664 lea rdi,[((48+64))+rdi]1665 vmovdqa ymm7,YMMWORD[96+rcx]1666 1667 1668 vmovdqu xmm9,XMMWORD[((-64))+rdi]1669 and rsp,-5121670 vmovdqu xmm10,XMMWORD[((-48))+rdi]1671 vmovdqu xmm6,XMMWORD[((-32))+rdi]1672 vmovdqu xmm11,XMMWORD[((-16))+rdi]1673 vmovdqu xmm12,XMMWORD[rdi]1674 vmovdqu xmm13,XMMWORD[16+rdi]1675 lea rax,[144+rsp]1676 vmovdqu xmm14,XMMWORD[32+rdi]1677 vpermd ymm9,ymm7,ymm91678 vmovdqu xmm15,XMMWORD[48+rdi]1679 vpermd ymm10,ymm7,ymm101680 vmovdqu xmm5,XMMWORD[64+rdi]1681 vpermd ymm6,ymm7,ymm61682 vmovdqa YMMWORD[rsp],ymm91683 vpermd ymm11,ymm7,ymm111684 vmovdqa YMMWORD[(32-144)+rax],ymm101685 vpermd ymm12,ymm7,ymm121686 vmovdqa YMMWORD[(64-144)+rax],ymm61687 vpermd ymm13,ymm7,ymm131688 vmovdqa YMMWORD[(96-144)+rax],ymm111689 vpermd ymm14,ymm7,ymm141690 vmovdqa YMMWORD[(128-144)+rax],ymm121691 vpermd ymm15,ymm7,ymm151692 vmovdqa YMMWORD[(160-144)+rax],ymm131693 vpermd ymm5,ymm7,ymm51694 vmovdqa YMMWORD[(192-144)+rax],ymm141695 vmovdqa YMMWORD[(224-144)+rax],ymm151696 vmovdqa YMMWORD[(256-144)+rax],ymm51697 vmovdqa ymm5,YMMWORD[64+rcx]1698 1699 1700 1701 vmovdqu xmm7,XMMWORD[rsi]1702 vmovdqu xmm8,XMMWORD[16+rsi]1703 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],11704 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],11705 lea rsi,[64+rsi]1706 1707 vpsrldq ymm9,ymm7,61708 vpsrldq ymm10,ymm8,61709 vpunpckhqdq ymm6,ymm7,ymm81710 vpunpcklqdq ymm9,ymm9,ymm101711 vpunpcklqdq ymm7,ymm7,ymm81712 1713 vpsrlq ymm10,ymm9,301714 vpsrlq ymm9,ymm9,41715 vpsrlq ymm8,ymm7,261716 vpsrlq ymm6,ymm6,401717 vpand ymm9,ymm9,ymm51718 vpand ymm7,ymm7,ymm51719 vpand ymm8,ymm8,ymm51720 vpand ymm10,ymm10,ymm51721 vpor ymm6,ymm6,YMMWORD[32+rcx]1722 1723 vpaddq ymm2,ymm9,ymm21724 sub rdx,641725 jz NEAR $L$tail_avx21726 jmp NEAR $L$oop_avx21727 1728 ALIGN 321729 $L$oop_avx2:1730 1731 1732 1733 1734 1735 1736 1737 1738 vpaddq ymm0,ymm7,ymm01739 vmovdqa ymm7,YMMWORD[rsp]1740 vpaddq ymm1,ymm8,ymm11741 vmovdqa ymm8,YMMWORD[32+rsp]1742 vpaddq ymm3,ymm10,ymm31743 vmovdqa ymm9,YMMWORD[96+rsp]1744 vpaddq ymm4,ymm6,ymm41745 vmovdqa ymm10,YMMWORD[48+rax]1746 vmovdqa ymm5,YMMWORD[112+rax]1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 vpmuludq ymm13,ymm7,ymm21764 vpmuludq ymm14,ymm8,ymm21765 vpmuludq ymm15,ymm9,ymm21766 vpmuludq ymm11,ymm10,ymm21767 vpmuludq ymm12,ymm5,ymm21768 1769 vpmuludq ymm6,ymm8,ymm01770 vpmuludq ymm2,ymm8,ymm11771 vpaddq ymm12,ymm12,ymm61772 vpaddq ymm13,ymm13,ymm21773 vpmuludq ymm6,ymm8,ymm31774 vpmuludq ymm2,ymm4,YMMWORD[64+rsp]1775 vpaddq ymm15,ymm15,ymm61776 vpaddq ymm11,ymm11,ymm21777 vmovdqa ymm8,YMMWORD[((-16))+rax]1778 1779 vpmuludq ymm6,ymm7,ymm01780 vpmuludq ymm2,ymm7,ymm11781 vpaddq ymm11,ymm11,ymm61782 vpaddq ymm12,ymm12,ymm21783 vpmuludq ymm6,ymm7,ymm31784 vpmuludq ymm2,ymm7,ymm41785 vmovdqu xmm7,XMMWORD[rsi]1786 vpaddq ymm14,ymm14,ymm61787 vpaddq ymm15,ymm15,ymm21788 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],11789 1790 vpmuludq ymm6,ymm8,ymm31791 vpmuludq ymm2,ymm8,ymm41792 vmovdqu xmm8,XMMWORD[16+rsi]1793 vpaddq ymm11,ymm11,ymm61794 vpaddq ymm12,ymm12,ymm21795 vmovdqa ymm2,YMMWORD[16+rax]1796 vpmuludq ymm6,ymm9,ymm11797 vpmuludq ymm9,ymm9,ymm01798 vpaddq ymm14,ymm14,ymm61799 vpaddq ymm13,ymm13,ymm91800 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],11801 lea rsi,[64+rsi]1802 1803 vpmuludq ymm6,ymm2,ymm11804 vpmuludq ymm2,ymm2,ymm01805 vpsrldq ymm9,ymm7,61806 vpaddq ymm15,ymm15,ymm61807 vpaddq ymm14,ymm14,ymm21808 vpmuludq ymm6,ymm10,ymm31809 vpmuludq ymm2,ymm10,ymm41810 vpsrldq ymm10,ymm8,61811 vpaddq ymm12,ymm12,ymm61812 vpaddq ymm13,ymm13,ymm21813 vpunpckhqdq ymm6,ymm7,ymm81814 1815 vpmuludq ymm3,ymm5,ymm31816 vpmuludq ymm4,ymm5,ymm41817 vpunpcklqdq ymm7,ymm7,ymm81818 vpaddq ymm2,ymm13,ymm31819 vpaddq ymm3,ymm14,ymm41820 vpunpcklqdq ymm10,ymm9,ymm101821 vpmuludq ymm4,ymm0,YMMWORD[80+rax]1822 vpmuludq ymm0,ymm5,ymm11823 vmovdqa ymm5,YMMWORD[64+rcx]1824 vpaddq ymm4,ymm15,ymm41825 vpaddq ymm0,ymm11,ymm01826 1827 1828 1829 1830 vpsrlq ymm14,ymm3,261831 vpand ymm3,ymm3,ymm51832 vpaddq ymm4,ymm4,ymm141833 1834 vpsrlq ymm11,ymm0,261835 vpand ymm0,ymm0,ymm51836 vpaddq ymm1,ymm12,ymm111837 1838 vpsrlq ymm15,ymm4,261839 vpand ymm4,ymm4,ymm51840 1841 vpsrlq ymm9,ymm10,41842 1843 vpsrlq ymm12,ymm1,261844 vpand ymm1,ymm1,ymm51845 vpaddq ymm2,ymm2,ymm121846 1847 vpaddq ymm0,ymm0,ymm151848 vpsllq ymm15,ymm15,21849 vpaddq ymm0,ymm0,ymm151850 1851 vpand ymm9,ymm9,ymm51852 vpsrlq ymm8,ymm7,261853 1854 vpsrlq ymm13,ymm2,261855 vpand ymm2,ymm2,ymm51856 vpaddq ymm3,ymm3,ymm131857 1858 vpaddq ymm2,ymm2,ymm91859 vpsrlq ymm10,ymm10,301860 1861 vpsrlq ymm11,ymm0,261862 vpand ymm0,ymm0,ymm51863 vpaddq ymm1,ymm1,ymm111864 1865 vpsrlq ymm6,ymm6,401866 1867 vpsrlq ymm14,ymm3,261868 vpand ymm3,ymm3,ymm51869 vpaddq ymm4,ymm4,ymm141870 1871 vpand ymm7,ymm7,ymm51872 vpand ymm8,ymm8,ymm51873 vpand ymm10,ymm10,ymm51874 vpor ymm6,ymm6,YMMWORD[32+rcx]1875 1876 sub rdx,641877 jnz NEAR $L$oop_avx21878 1879 DB 0x66,0x901880 $L$tail_avx2:1881 1882 1883 1884 1885 1886 1887 1888 vpaddq ymm0,ymm7,ymm01889 vmovdqu ymm7,YMMWORD[4+rsp]1890 vpaddq ymm1,ymm8,ymm11891 vmovdqu ymm8,YMMWORD[36+rsp]1892 vpaddq ymm3,ymm10,ymm31893 vmovdqu ymm9,YMMWORD[100+rsp]1894 vpaddq ymm4,ymm6,ymm41895 vmovdqu ymm10,YMMWORD[52+rax]1896 vmovdqu ymm5,YMMWORD[116+rax]1897 1898 vpmuludq ymm13,ymm7,ymm21899 vpmuludq ymm14,ymm8,ymm21900 vpmuludq ymm15,ymm9,ymm21901 vpmuludq ymm11,ymm10,ymm21902 vpmuludq ymm12,ymm5,ymm21903 1904 vpmuludq ymm6,ymm8,ymm01905 vpmuludq ymm2,ymm8,ymm11906 vpaddq ymm12,ymm12,ymm61907 vpaddq ymm13,ymm13,ymm21908 vpmuludq ymm6,ymm8,ymm31909 vpmuludq ymm2,ymm4,YMMWORD[68+rsp]1910 vpaddq ymm15,ymm15,ymm61911 vpaddq ymm11,ymm11,ymm21912 1913 vpmuludq ymm6,ymm7,ymm01914 vpmuludq ymm2,ymm7,ymm11915 vpaddq ymm11,ymm11,ymm61916 vmovdqu ymm8,YMMWORD[((-12))+rax]1917 vpaddq ymm12,ymm12,ymm21918 vpmuludq ymm6,ymm7,ymm31919 vpmuludq ymm2,ymm7,ymm41920 vpaddq ymm14,ymm14,ymm61921 vpaddq ymm15,ymm15,ymm21922 1923 vpmuludq ymm6,ymm8,ymm31924 vpmuludq ymm2,ymm8,ymm41925 vpaddq ymm11,ymm11,ymm61926 vpaddq ymm12,ymm12,ymm21927 vmovdqu ymm2,YMMWORD[20+rax]1928 vpmuludq ymm6,ymm9,ymm11929 vpmuludq ymm9,ymm9,ymm01930 vpaddq ymm14,ymm14,ymm61931 vpaddq ymm13,ymm13,ymm91932 1933 vpmuludq ymm6,ymm2,ymm11934 vpmuludq ymm2,ymm2,ymm01935 vpaddq ymm15,ymm15,ymm61936 vpaddq ymm14,ymm14,ymm21937 vpmuludq ymm6,ymm10,ymm31938 vpmuludq ymm2,ymm10,ymm41939 vpaddq ymm12,ymm12,ymm61940 vpaddq ymm13,ymm13,ymm21941 1942 vpmuludq ymm3,ymm5,ymm31943 vpmuludq ymm4,ymm5,ymm41944 vpaddq ymm2,ymm13,ymm31945 vpaddq ymm3,ymm14,ymm41946 vpmuludq ymm4,ymm0,YMMWORD[84+rax]1947 vpmuludq ymm0,ymm5,ymm11948 vmovdqa ymm5,YMMWORD[64+rcx]1949 vpaddq ymm4,ymm15,ymm41950 vpaddq ymm0,ymm11,ymm01951 1952 1953 1954 1955 vpsrldq ymm8,ymm12,81956 vpsrldq ymm9,ymm2,81957 vpsrldq ymm10,ymm3,81958 vpsrldq ymm6,ymm4,81959 vpsrldq ymm7,ymm0,81960 vpaddq ymm12,ymm12,ymm81961 vpaddq ymm2,ymm2,ymm91962 vpaddq ymm3,ymm3,ymm101963 vpaddq ymm4,ymm4,ymm61964 vpaddq ymm0,ymm0,ymm71965 1966 vpermq ymm10,ymm3,0x21967 vpermq ymm6,ymm4,0x21968 vpermq ymm7,ymm0,0x21969 vpermq ymm8,ymm12,0x21970 vpermq ymm9,ymm2,0x21971 vpaddq ymm3,ymm3,ymm101972 vpaddq ymm4,ymm4,ymm61973 vpaddq ymm0,ymm0,ymm71974 vpaddq ymm12,ymm12,ymm81975 vpaddq ymm2,ymm2,ymm91976 1977 1978 1979 1980 vpsrlq ymm14,ymm3,261981 vpand ymm3,ymm3,ymm51982 vpaddq ymm4,ymm4,ymm141983 1984 vpsrlq ymm11,ymm0,261985 vpand ymm0,ymm0,ymm51986 vpaddq ymm1,ymm12,ymm111987 1988 vpsrlq ymm15,ymm4,261989 vpand ymm4,ymm4,ymm51990 1991 vpsrlq ymm12,ymm1,261992 vpand ymm1,ymm1,ymm51993 vpaddq ymm2,ymm2,ymm121994 1995 vpaddq ymm0,ymm0,ymm151996 vpsllq ymm15,ymm15,21997 vpaddq ymm0,ymm0,ymm151998 1999 vpsrlq ymm13,ymm2,262000 vpand ymm2,ymm2,ymm52001 vpaddq ymm3,ymm3,ymm132002 2003 vpsrlq ymm11,ymm0,262004 vpand ymm0,ymm0,ymm52005 vpaddq ymm1,ymm1,ymm112006 2007 vpsrlq ymm14,ymm3,262008 vpand ymm3,ymm3,ymm52009 vpaddq ymm4,ymm4,ymm142010 2011 vmovd DWORD[(-112)+rdi],xmm02012 vmovd DWORD[(-108)+rdi],xmm12013 vmovd DWORD[(-104)+rdi],xmm22014 vmovd DWORD[(-100)+rdi],xmm32015 vmovd DWORD[(-96)+rdi],xmm42016 vmovdqa xmm6,XMMWORD[80+r11]2017 vmovdqa xmm7,XMMWORD[96+r11]2018 vmovdqa xmm8,XMMWORD[112+r11]2019 vmovdqa xmm9,XMMWORD[128+r11]2020 vmovdqa xmm10,XMMWORD[144+r11]2021 vmovdqa xmm11,XMMWORD[160+r11]2022 vmovdqa xmm12,XMMWORD[176+r11]2023 vmovdqa xmm13,XMMWORD[192+r11]2024 vmovdqa xmm14,XMMWORD[208+r11]2025 vmovdqa xmm15,XMMWORD[224+r11]2026 lea rsp,[248+r11]2027 $L$do_avx2_epilogue:2028 vzeroupper2029 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2030 mov rsi,QWORD[16+rsp]2031 DB 0F3h,0C3h ;repret2032 2033 $L$SEH_end_poly1305_blocks_avx2:2034 2035 ALIGN 322036 poly1305_blocks_avx512:2037 mov QWORD[8+rsp],rdi ;WIN64 prologue2038 mov QWORD[16+rsp],rsi2039 mov rax,rsp2040 $L$SEH_begin_poly1305_blocks_avx512:2041 mov rdi,rcx2042 mov rsi,rdx2043 mov rdx,r82044 mov rcx,r92045 2046 2047 2048 $L$blocks_avx512:2049 mov eax,152050 kmovw k2,eax2051 lea r11,[((-248))+rsp]2052 sub rsp,0x1c82053 vmovdqa XMMWORD[80+r11],xmm62054 vmovdqa XMMWORD[96+r11],xmm72055 vmovdqa XMMWORD[112+r11],xmm82056 vmovdqa XMMWORD[128+r11],xmm92057 vmovdqa XMMWORD[144+r11],xmm102058 vmovdqa XMMWORD[160+r11],xmm112059 vmovdqa XMMWORD[176+r11],xmm122060 vmovdqa XMMWORD[192+r11],xmm132061 vmovdqa XMMWORD[208+r11],xmm142062 vmovdqa XMMWORD[224+r11],xmm152063 $L$do_avx512_body:2064 lea rcx,[$L$const]2065 lea rdi,[((48+64))+rdi]2066 vmovdqa ymm9,YMMWORD[96+rcx]2067 2068 2069 vmovdqu xmm11,XMMWORD[((-64))+rdi]2070 and rsp,-5122071 vmovdqu xmm12,XMMWORD[((-48))+rdi]2072 mov rax,0x202073 vmovdqu xmm7,XMMWORD[((-32))+rdi]2074 vmovdqu xmm13,XMMWORD[((-16))+rdi]2075 vmovdqu xmm8,XMMWORD[rdi]2076 vmovdqu xmm14,XMMWORD[16+rdi]2077 vmovdqu xmm10,XMMWORD[32+rdi]2078 vmovdqu xmm15,XMMWORD[48+rdi]2079 vmovdqu xmm6,XMMWORD[64+rdi]2080 vpermd zmm16,zmm9,zmm112081 vpbroadcastq zmm5,QWORD[64+rcx]2082 vpermd zmm17,zmm9,zmm122083 vpermd zmm21,zmm9,zmm72084 vpermd zmm18,zmm9,zmm132085 vmovdqa64 ZMMWORD[rsp]{k2},zmm162086 vpsrlq zmm7,zmm16,322087 vpermd zmm22,zmm9,zmm82088 vmovdqu64 ZMMWORD[rax*1+rsp]{k2},zmm172089 vpsrlq zmm8,zmm17,322090 vpermd zmm19,zmm9,zmm142091 vmovdqa64 ZMMWORD[64+rsp]{k2},zmm212092 vpermd zmm23,zmm9,zmm102093 vpermd zmm20,zmm9,zmm152094 vmovdqu64 ZMMWORD[64+rax*1+rsp]{k2},zmm182095 vpermd zmm24,zmm9,zmm62096 vmovdqa64 ZMMWORD[128+rsp]{k2},zmm222097 vmovdqu64 ZMMWORD[128+rax*1+rsp]{k2},zmm192098 vmovdqa64 ZMMWORD[192+rsp]{k2},zmm232099 vmovdqu64 ZMMWORD[192+rax*1+rsp]{k2},zmm202100 vmovdqa64 ZMMWORD[256+rsp]{k2},zmm242101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 vpmuludq zmm11,zmm16,zmm72112 vpmuludq zmm12,zmm17,zmm72113 vpmuludq zmm13,zmm18,zmm72114 vpmuludq zmm14,zmm19,zmm72115 vpmuludq zmm15,zmm20,zmm72116 vpsrlq zmm9,zmm18,322117 2118 vpmuludq zmm25,zmm24,zmm82119 vpmuludq zmm26,zmm16,zmm82120 vpmuludq zmm27,zmm17,zmm82121 vpmuludq zmm28,zmm18,zmm82122 vpmuludq zmm29,zmm19,zmm82123 vpsrlq zmm10,zmm19,322124 vpaddq zmm11,zmm11,zmm252125 vpaddq zmm12,zmm12,zmm262126 vpaddq zmm13,zmm13,zmm272127 vpaddq zmm14,zmm14,zmm282128 vpaddq zmm15,zmm15,zmm292129 2130 vpmuludq zmm25,zmm23,zmm92131 vpmuludq zmm26,zmm24,zmm92132 vpmuludq zmm28,zmm17,zmm92133 vpmuludq zmm29,zmm18,zmm92134 vpmuludq zmm27,zmm16,zmm92135 vpsrlq zmm6,zmm20,322136 vpaddq zmm11,zmm11,zmm252137 vpaddq zmm12,zmm12,zmm262138 vpaddq zmm14,zmm14,zmm282139 vpaddq zmm15,zmm15,zmm292140 vpaddq zmm13,zmm13,zmm272141 2142 vpmuludq zmm25,zmm22,zmm102143 vpmuludq zmm28,zmm16,zmm102144 vpmuludq zmm29,zmm17,zmm102145 vpmuludq zmm26,zmm23,zmm102146 vpmuludq zmm27,zmm24,zmm102147 vpaddq zmm11,zmm11,zmm252148 vpaddq zmm14,zmm14,zmm282149 vpaddq zmm15,zmm15,zmm292150 vpaddq zmm12,zmm12,zmm262151 vpaddq zmm13,zmm13,zmm272152 2153 vpmuludq zmm28,zmm24,zmm62154 vpmuludq zmm29,zmm16,zmm62155 vpmuludq zmm25,zmm21,zmm62156 vpmuludq zmm26,zmm22,zmm62157 vpmuludq zmm27,zmm23,zmm62158 vpaddq zmm14,zmm14,zmm282159 vpaddq zmm15,zmm15,zmm292160 vpaddq zmm11,zmm11,zmm252161 vpaddq zmm12,zmm12,zmm262162 vpaddq zmm13,zmm13,zmm272163 2164 2165 2166 vmovdqu64 zmm10,ZMMWORD[rsi]2167 vmovdqu64 zmm6,ZMMWORD[64+rsi]2168 lea rsi,[128+rsi]2169 2170 2171 2172 2173 vpsrlq zmm28,zmm14,262174 vpandq zmm14,zmm14,zmm52175 vpaddq zmm15,zmm15,zmm282176 2177 vpsrlq zmm25,zmm11,262178 vpandq zmm11,zmm11,zmm52179 vpaddq zmm12,zmm12,zmm252180 2181 vpsrlq zmm29,zmm15,262182 vpandq zmm15,zmm15,zmm52183 2184 vpsrlq zmm26,zmm12,262185 vpandq zmm12,zmm12,zmm52186 vpaddq zmm13,zmm13,zmm262187 2188 vpaddq zmm11,zmm11,zmm292189 vpsllq zmm29,zmm29,22190 vpaddq zmm11,zmm11,zmm292191 2192 vpsrlq zmm27,zmm13,262193 vpandq zmm13,zmm13,zmm52194 vpaddq zmm14,zmm14,zmm272195 2196 vpsrlq zmm25,zmm11,262197 vpandq zmm11,zmm11,zmm52198 vpaddq zmm12,zmm12,zmm252199 2200 vpsrlq zmm28,zmm14,262201 vpandq zmm14,zmm14,zmm52202 vpaddq zmm15,zmm15,zmm282203 2204 2205 2206 2207 2208 vpunpcklqdq zmm7,zmm10,zmm62209 vpunpckhqdq zmm6,zmm10,zmm62210 2211 2212 2213 2214 2215 2216 vmovdqa32 zmm25,ZMMWORD[128+rcx]2217 mov eax,0x77772218 kmovw k1,eax2219 2220 vpermd zmm16,zmm25,zmm162221 vpermd zmm17,zmm25,zmm172222 vpermd zmm18,zmm25,zmm182223 vpermd zmm19,zmm25,zmm192224 vpermd zmm20,zmm25,zmm202225 2226 vpermd zmm16{k1},zmm25,zmm112227 vpermd zmm17{k1},zmm25,zmm122228 vpermd zmm18{k1},zmm25,zmm132229 vpermd zmm19{k1},zmm25,zmm142230 vpermd zmm20{k1},zmm25,zmm152231 2232 vpslld zmm21,zmm17,22233 vpslld zmm22,zmm18,22234 vpslld zmm23,zmm19,22235 vpslld zmm24,zmm20,22236 vpaddd zmm21,zmm21,zmm172237 vpaddd zmm22,zmm22,zmm182238 vpaddd zmm23,zmm23,zmm192239 vpaddd zmm24,zmm24,zmm202240 2241 vpbroadcastq zmm30,QWORD[32+rcx]2242 2243 vpsrlq zmm9,zmm7,522244 vpsllq zmm10,zmm6,122245 vporq zmm9,zmm9,zmm102246 vpsrlq zmm8,zmm7,262247 vpsrlq zmm10,zmm6,142248 vpsrlq zmm6,zmm6,402249 vpandq zmm9,zmm9,zmm52250 vpandq zmm7,zmm7,zmm52251 2252 2253 2254 2255 vpaddq zmm2,zmm9,zmm22256 sub rdx,1922257 jbe NEAR $L$tail_avx5122258 jmp NEAR $L$oop_avx5122259 2260 ALIGN 322261 $L$oop_avx512:2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 vpmuludq zmm14,zmm17,zmm22291 vpaddq zmm0,zmm7,zmm02292 vpmuludq zmm15,zmm18,zmm22293 vpandq zmm8,zmm8,zmm52294 vpmuludq zmm11,zmm23,zmm22295 vpandq zmm10,zmm10,zmm52296 vpmuludq zmm12,zmm24,zmm22297 vporq zmm6,zmm6,zmm302298 vpmuludq zmm13,zmm16,zmm22299 vpaddq zmm1,zmm8,zmm12300 vpaddq zmm3,zmm10,zmm32301 vpaddq zmm4,zmm6,zmm42302 2303 vmovdqu64 zmm10,ZMMWORD[rsi]2304 vmovdqu64 zmm6,ZMMWORD[64+rsi]2305 lea rsi,[128+rsi]2306 vpmuludq zmm28,zmm19,zmm02307 vpmuludq zmm29,zmm20,zmm02308 vpmuludq zmm25,zmm16,zmm02309 vpmuludq zmm26,zmm17,zmm02310 vpaddq zmm14,zmm14,zmm282311 vpaddq zmm15,zmm15,zmm292312 vpaddq zmm11,zmm11,zmm252313 vpaddq zmm12,zmm12,zmm262314 2315 vpmuludq zmm28,zmm18,zmm12316 vpmuludq zmm29,zmm19,zmm12317 vpmuludq zmm25,zmm24,zmm12318 vpmuludq zmm27,zmm18,zmm02319 vpaddq zmm14,zmm14,zmm282320 vpaddq zmm15,zmm15,zmm292321 vpaddq zmm11,zmm11,zmm252322 vpaddq zmm13,zmm13,zmm272323 2324 vpunpcklqdq zmm7,zmm10,zmm62325 vpunpckhqdq zmm6,zmm10,zmm62326 2327 vpmuludq zmm28,zmm16,zmm32328 vpmuludq zmm29,zmm17,zmm32329 vpmuludq zmm26,zmm16,zmm12330 vpmuludq zmm27,zmm17,zmm12331 vpaddq zmm14,zmm14,zmm282332 vpaddq zmm15,zmm15,zmm292333 vpaddq zmm12,zmm12,zmm262334 vpaddq zmm13,zmm13,zmm272335 2336 vpmuludq zmm28,zmm24,zmm42337 vpmuludq zmm29,zmm16,zmm42338 vpmuludq zmm25,zmm22,zmm32339 vpmuludq zmm26,zmm23,zmm32340 vpaddq zmm14,zmm14,zmm282341 vpmuludq zmm27,zmm24,zmm32342 vpaddq zmm15,zmm15,zmm292343 vpaddq zmm11,zmm11,zmm252344 vpaddq zmm12,zmm12,zmm262345 vpaddq zmm13,zmm13,zmm272346 2347 vpmuludq zmm25,zmm21,zmm42348 vpmuludq zmm26,zmm22,zmm42349 vpmuludq zmm27,zmm23,zmm42350 vpaddq zmm0,zmm11,zmm252351 vpaddq zmm1,zmm12,zmm262352 vpaddq zmm2,zmm13,zmm272353 2354 2355 2356 2357 vpsrlq zmm9,zmm7,522358 vpsllq zmm10,zmm6,122359 2360 vpsrlq zmm3,zmm14,262361 vpandq zmm14,zmm14,zmm52362 vpaddq zmm4,zmm15,zmm32363 2364 vporq zmm9,zmm9,zmm102365 2366 vpsrlq zmm11,zmm0,262367 vpandq zmm0,zmm0,zmm52368 vpaddq zmm1,zmm1,zmm112369 2370 vpandq zmm9,zmm9,zmm52371 2372 vpsrlq zmm15,zmm4,262373 vpandq zmm4,zmm4,zmm52374 2375 vpsrlq zmm12,zmm1,262376 vpandq zmm1,zmm1,zmm52377 vpaddq zmm2,zmm2,zmm122378 2379 vpaddq zmm0,zmm0,zmm152380 vpsllq zmm15,zmm15,22381 vpaddq zmm0,zmm0,zmm152382 2383 vpaddq zmm2,zmm2,zmm92384 vpsrlq zmm8,zmm7,262385 2386 vpsrlq zmm13,zmm2,262387 vpandq zmm2,zmm2,zmm52388 vpaddq zmm3,zmm14,zmm132389 2390 vpsrlq zmm10,zmm6,142391 2392 vpsrlq zmm11,zmm0,262393 vpandq zmm0,zmm0,zmm52394 vpaddq zmm1,zmm1,zmm112395 2396 vpsrlq zmm6,zmm6,402397 2398 vpsrlq zmm14,zmm3,262399 vpandq zmm3,zmm3,zmm52400 vpaddq zmm4,zmm4,zmm142401 2402 vpandq zmm7,zmm7,zmm52403 2404 2405 2406 2407 sub rdx,1282408 ja NEAR $L$oop_avx5122409 2410 $L$tail_avx512:2411 2412 2413 2414 2415 2416 vpsrlq zmm16,zmm16,322417 vpsrlq zmm17,zmm17,322418 vpsrlq zmm18,zmm18,322419 vpsrlq zmm23,zmm23,322420 vpsrlq zmm24,zmm24,322421 vpsrlq zmm19,zmm19,322422 vpsrlq zmm20,zmm20,322423 vpsrlq zmm21,zmm21,322424 vpsrlq zmm22,zmm22,322425 2426 2427 2428 lea rsi,[rdx*1+rsi]2429 2430 2431 vpaddq zmm0,zmm7,zmm02432 2433 vpmuludq zmm14,zmm17,zmm22434 vpmuludq zmm15,zmm18,zmm22435 vpmuludq zmm11,zmm23,zmm22436 vpandq zmm8,zmm8,zmm52437 vpmuludq zmm12,zmm24,zmm22438 vpandq zmm10,zmm10,zmm52439 vpmuludq zmm13,zmm16,zmm22440 vporq zmm6,zmm6,zmm302441 vpaddq zmm1,zmm8,zmm12442 vpaddq zmm3,zmm10,zmm32443 vpaddq zmm4,zmm6,zmm42444 2445 vmovdqu xmm7,XMMWORD[rsi]2446 vpmuludq zmm28,zmm19,zmm02447 vpmuludq zmm29,zmm20,zmm02448 vpmuludq zmm25,zmm16,zmm02449 vpmuludq zmm26,zmm17,zmm02450 vpaddq zmm14,zmm14,zmm282451 vpaddq zmm15,zmm15,zmm292452 vpaddq zmm11,zmm11,zmm252453 vpaddq zmm12,zmm12,zmm262454 2455 vmovdqu xmm8,XMMWORD[16+rsi]2456 vpmuludq zmm28,zmm18,zmm12457 vpmuludq zmm29,zmm19,zmm12458 vpmuludq zmm25,zmm24,zmm12459 vpmuludq zmm27,zmm18,zmm02460 vpaddq zmm14,zmm14,zmm282461 vpaddq zmm15,zmm15,zmm292462 vpaddq zmm11,zmm11,zmm252463 vpaddq zmm13,zmm13,zmm272464 2465 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],12466 vpmuludq zmm28,zmm16,zmm32467 vpmuludq zmm29,zmm17,zmm32468 vpmuludq zmm26,zmm16,zmm12469 vpmuludq zmm27,zmm17,zmm12470 vpaddq zmm14,zmm14,zmm282471 vpaddq zmm15,zmm15,zmm292472 vpaddq zmm12,zmm12,zmm262473 vpaddq zmm13,zmm13,zmm272474 2475 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],12476 vpmuludq zmm28,zmm24,zmm42477 vpmuludq zmm29,zmm16,zmm42478 vpmuludq zmm25,zmm22,zmm32479 vpmuludq zmm26,zmm23,zmm32480 vpmuludq zmm27,zmm24,zmm32481 vpaddq zmm3,zmm14,zmm282482 vpaddq zmm15,zmm15,zmm292483 vpaddq zmm11,zmm11,zmm252484 vpaddq zmm12,zmm12,zmm262485 vpaddq zmm13,zmm13,zmm272486 2487 vpmuludq zmm25,zmm21,zmm42488 vpmuludq zmm26,zmm22,zmm42489 vpmuludq zmm27,zmm23,zmm42490 vpaddq zmm0,zmm11,zmm252491 vpaddq zmm1,zmm12,zmm262492 vpaddq zmm2,zmm13,zmm272493 2494 2495 2496 2497 mov eax,12498 vpermq zmm14,zmm3,0xb12499 vpermq zmm4,zmm15,0xb12500 vpermq zmm11,zmm0,0xb12501 vpermq zmm12,zmm1,0xb12502 vpermq zmm13,zmm2,0xb12503 vpaddq zmm3,zmm3,zmm142504 vpaddq zmm4,zmm4,zmm152505 vpaddq zmm0,zmm0,zmm112506 vpaddq zmm1,zmm1,zmm122507 vpaddq zmm2,zmm2,zmm132508 2509 kmovw k3,eax2510 vpermq zmm14,zmm3,0x22511 vpermq zmm15,zmm4,0x22512 vpermq zmm11,zmm0,0x22513 vpermq zmm12,zmm1,0x22514 vpermq zmm13,zmm2,0x22515 vpaddq zmm3,zmm3,zmm142516 vpaddq zmm4,zmm4,zmm152517 vpaddq zmm0,zmm0,zmm112518 vpaddq zmm1,zmm1,zmm122519 vpaddq zmm2,zmm2,zmm132520 2521 vextracti64x4 ymm14,zmm3,0x12522 vextracti64x4 ymm15,zmm4,0x12523 vextracti64x4 ymm11,zmm0,0x12524 vextracti64x4 ymm12,zmm1,0x12525 vextracti64x4 ymm13,zmm2,0x12526 vpaddq zmm3{k3}{z},zmm3,zmm142527 vpaddq zmm4{k3}{z},zmm4,zmm152528 vpaddq zmm0{k3}{z},zmm0,zmm112529 vpaddq zmm1{k3}{z},zmm1,zmm122530 vpaddq zmm2{k3}{z},zmm2,zmm132531 2532 2533 2534 vpsrlq ymm14,ymm3,262535 vpand ymm3,ymm3,ymm52536 vpsrldq ymm9,ymm7,62537 vpsrldq ymm10,ymm8,62538 vpunpckhqdq ymm6,ymm7,ymm82539 vpaddq ymm4,ymm4,ymm142540 2541 vpsrlq ymm11,ymm0,262542 vpand ymm0,ymm0,ymm52543 vpunpcklqdq ymm9,ymm9,ymm102544 vpunpcklqdq ymm7,ymm7,ymm82545 vpaddq ymm1,ymm1,ymm112546 2547 vpsrlq ymm15,ymm4,262548 vpand ymm4,ymm4,ymm52549 2550 vpsrlq ymm12,ymm1,262551 vpand ymm1,ymm1,ymm52552 vpsrlq ymm10,ymm9,302553 vpsrlq ymm9,ymm9,42554 vpaddq ymm2,ymm2,ymm122555 2556 vpaddq ymm0,ymm0,ymm152557 vpsllq ymm15,ymm15,22558 vpsrlq ymm8,ymm7,262559 vpsrlq ymm6,ymm6,402560 vpaddq ymm0,ymm0,ymm152561 2562 vpsrlq ymm13,ymm2,262563 vpand ymm2,ymm2,ymm52564 vpand ymm9,ymm9,ymm52565 vpand ymm7,ymm7,ymm52566 vpaddq ymm3,ymm3,ymm132567 2568 vpsrlq ymm11,ymm0,262569 vpand ymm0,ymm0,ymm52570 vpaddq ymm2,ymm9,ymm22571 vpand ymm8,ymm8,ymm52572 vpaddq ymm1,ymm1,ymm112573 2574 vpsrlq ymm14,ymm3,262575 vpand ymm3,ymm3,ymm52576 vpand ymm10,ymm10,ymm52577 vpor ymm6,ymm6,YMMWORD[32+rcx]2578 vpaddq ymm4,ymm4,ymm142579 2580 lea rax,[144+rsp]2581 add rdx,642582 jnz NEAR $L$tail_avx22583 2584 vpsubq ymm2,ymm2,ymm92585 vmovd DWORD[(-112)+rdi],xmm02586 vmovd DWORD[(-108)+rdi],xmm12587 vmovd DWORD[(-104)+rdi],xmm22588 vmovd DWORD[(-100)+rdi],xmm32589 vmovd DWORD[(-96)+rdi],xmm42590 vzeroall2591 movdqa xmm6,XMMWORD[80+r11]2592 movdqa xmm7,XMMWORD[96+r11]2593 movdqa xmm8,XMMWORD[112+r11]2594 movdqa xmm9,XMMWORD[128+r11]2595 movdqa xmm10,XMMWORD[144+r11]2596 movdqa xmm11,XMMWORD[160+r11]2597 movdqa xmm12,XMMWORD[176+r11]2598 movdqa xmm13,XMMWORD[192+r11]2599 movdqa xmm14,XMMWORD[208+r11]2600 movdqa xmm15,XMMWORD[224+r11]2601 lea rsp,[248+r11]2602 $L$do_avx512_epilogue:2603 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2604 mov rsi,QWORD[16+rsp]2605 DB 0F3h,0C3h ;repret2606 2607 $L$SEH_end_poly1305_blocks_avx512:2608 2609 ALIGN 322610 poly1305_init_base2_44:2611 mov QWORD[8+rsp],rdi ;WIN64 prologue2612 mov QWORD[16+rsp],rsi2613 mov rax,rsp2614 $L$SEH_begin_poly1305_init_base2_44:2615 mov rdi,rcx2616 mov rsi,rdx2617 mov rdx,r82618 2619 2620 2621 xor rax,rax2622 mov QWORD[rdi],rax2623 mov QWORD[8+rdi],rax2624 mov QWORD[16+rdi],rax2625 2626 $L$init_base2_44:2627 lea r10,[poly1305_blocks_vpmadd52]2628 lea r11,[poly1305_emit_base2_44]2629 2630 mov rax,0x0ffffffc0fffffff2631 mov rcx,0x0ffffffc0ffffffc2632 and rax,QWORD[rsi]2633 mov r8,0x00000fffffffffff2634 and rcx,QWORD[8+rsi]2635 mov r9,0x00000fffffffffff2636 and r8,rax2637 shrd rax,rcx,442638 mov QWORD[40+rdi],r82639 and rax,r92640 shr rcx,242641 mov QWORD[48+rdi],rax2642 lea rax,[rax*4+rax]2643 mov QWORD[56+rdi],rcx2644 shl rax,22645 lea rcx,[rcx*4+rcx]2646 shl rcx,22647 mov QWORD[24+rdi],rax2648 mov QWORD[32+rdi],rcx2649 mov QWORD[64+rdi],-12650 mov QWORD[rdx],r102651 mov QWORD[8+rdx],r112652 mov eax,12653 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2654 mov rsi,QWORD[16+rsp]2655 DB 0F3h,0C3h ;repret2656 2657 $L$SEH_end_poly1305_init_base2_44:2658 2659 ALIGN 322660 poly1305_blocks_vpmadd52:2661 mov QWORD[8+rsp],rdi ;WIN64 prologue2662 mov QWORD[16+rsp],rsi2663 mov rax,rsp2664 $L$SEH_begin_poly1305_blocks_vpmadd52:2665 mov rdi,rcx2666 mov rsi,rdx2667 mov rdx,r82668 mov rcx,r92669 2670 2671 2672 DB 243,15,30,2502673 shr rdx,42674 jz NEAR $L$no_data_vpmadd522675 2676 shl rcx,402677 mov r8,QWORD[64+rdi]2678 2679 2680 2681 2682 2683 2684 mov rax,32685 mov r10,12686 cmp rdx,42687 cmovae rax,r102688 test r8,r82689 cmovns rax,r102690 2691 and rax,rdx2692 jz NEAR $L$blocks_vpmadd52_4x2693 2694 sub rdx,rax2695 mov r10d,72696 mov r11d,12697 kmovw k7,r10d2698 lea r10,[$L$2_44_inp_permd]2699 kmovw k1,r11d2700 2701 vmovq xmm21,rcx2702 vmovdqa64 ymm19,YMMWORD[r10]2703 vmovdqa64 ymm20,YMMWORD[32+r10]2704 vpermq ymm21,ymm21,0xcf2705 vmovdqa64 ymm22,YMMWORD[64+r10]2706 2707 vmovdqu64 ymm16{k7}{z},[rdi]2708 vmovdqu64 ymm3{k7}{z},[40+rdi]2709 vmovdqu64 ymm4{k7}{z},[32+rdi]2710 vmovdqu64 ymm5{k7}{z},[24+rdi]2711 2712 vmovdqa64 ymm23,YMMWORD[96+r10]2713 vmovdqa64 ymm24,YMMWORD[128+r10]2714 2715 jmp NEAR $L$oop_vpmadd522716 2717 ALIGN 322718 $L$oop_vpmadd52:2719 vmovdqu32 xmm18,XMMWORD[rsi]2720 lea rsi,[16+rsi]2721 2722 vpermd ymm18,ymm19,ymm182723 vpsrlvq ymm18,ymm18,ymm202724 vpandq ymm18,ymm18,ymm222725 vporq ymm18,ymm18,ymm212726 2727 vpaddq ymm16,ymm16,ymm182728 2729 vpermq ymm0{k7}{z},ymm16,02730 vpermq ymm1{k7}{z},ymm16,852731 vpermq ymm2{k7}{z},ymm16,1702732 2733 vpxord ymm16,ymm16,ymm162734 vpxord ymm17,ymm17,ymm172735 2736 vpmadd52luq ymm16,ymm0,ymm32737 vpmadd52huq ymm17,ymm0,ymm32738 2739 vpmadd52luq ymm16,ymm1,ymm42740 vpmadd52huq ymm17,ymm1,ymm42741 2742 vpmadd52luq ymm16,ymm2,ymm52743 vpmadd52huq ymm17,ymm2,ymm52744 2745 vpsrlvq ymm18,ymm16,ymm232746 vpsllvq ymm17,ymm17,ymm242747 vpandq ymm16,ymm16,ymm222748 2749 vpaddq ymm17,ymm17,ymm182750 2751 vpermq ymm17,ymm17,1472752 2753 vpaddq ymm16,ymm16,ymm172754 2755 vpsrlvq ymm18,ymm16,ymm232756 vpandq ymm16,ymm16,ymm222757 2758 vpermq ymm18,ymm18,1472759 2760 vpaddq ymm16,ymm16,ymm182761 2762 vpermq ymm18{k1}{z},ymm16,1472763 2764 vpaddq ymm16,ymm16,ymm182765 vpsllq ymm18,ymm18,22766 2767 vpaddq ymm16,ymm16,ymm182768 2769 dec rax2770 jnz NEAR $L$oop_vpmadd522771 2772 vmovdqu64 YMMWORD[rdi]{k7},ymm162773 2774 test rdx,rdx2775 jnz NEAR $L$blocks_vpmadd52_4x2776 2777 $L$no_data_vpmadd52:2778 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2779 mov rsi,QWORD[16+rsp]2780 DB 0F3h,0C3h ;repret2781 2782 $L$SEH_end_poly1305_blocks_vpmadd52:2783 2784 ALIGN 322785 poly1305_blocks_vpmadd52_4x:2786 mov QWORD[8+rsp],rdi ;WIN64 prologue2787 mov QWORD[16+rsp],rsi2788 mov rax,rsp2789 $L$SEH_begin_poly1305_blocks_vpmadd52_4x:2790 mov rdi,rcx2791 mov rsi,rdx2792 mov rdx,r82793 mov rcx,r92794 2795 2796 2797 shr rdx,42798 jz NEAR $L$no_data_vpmadd52_4x2799 2800 shl rcx,402801 mov r8,QWORD[64+rdi]2802 2803 $L$blocks_vpmadd52_4x:2804 vpbroadcastq ymm31,rcx2805 2806 vmovdqa64 ymm28,YMMWORD[$L$x_mask44]2807 mov eax,52808 vmovdqa64 ymm29,YMMWORD[$L$x_mask42]2809 kmovw k1,eax2810 2811 test r8,r82812 js NEAR $L$init_vpmadd522813 2814 vmovq xmm0,QWORD[rdi]2815 vmovq xmm1,QWORD[8+rdi]2816 vmovq xmm2,QWORD[16+rdi]2817 2818 test rdx,32819 jnz NEAR $L$blocks_vpmadd52_2x_do2820 2821 $L$blocks_vpmadd52_4x_do:2822 vpbroadcastq ymm3,QWORD[64+rdi]2823 vpbroadcastq ymm4,QWORD[96+rdi]2824 vpbroadcastq ymm5,QWORD[128+rdi]2825 vpbroadcastq ymm16,QWORD[160+rdi]2826 2827 $L$blocks_vpmadd52_4x_key_loaded:2828 vpsllq ymm17,ymm5,22829 vpaddq ymm17,ymm17,ymm52830 vpsllq ymm17,ymm17,22831 2832 test rdx,72833 jz NEAR $L$blocks_vpmadd52_8x2834 2835 vmovdqu64 ymm26,YMMWORD[rsi]2836 vmovdqu64 ymm27,YMMWORD[32+rsi]2837 lea rsi,[64+rsi]2838 2839 vpunpcklqdq ymm25,ymm26,ymm272840 vpunpckhqdq ymm27,ymm26,ymm272841 2842 2843 2844 vpsrlq ymm26,ymm27,242845 vporq ymm26,ymm26,ymm312846 vpaddq ymm2,ymm2,ymm262847 vpandq ymm24,ymm25,ymm282848 vpsrlq ymm25,ymm25,442849 vpsllq ymm27,ymm27,202850 vporq ymm25,ymm25,ymm272851 vpandq ymm25,ymm25,ymm282852 2853 sub rdx,42854 jz NEAR $L$tail_vpmadd52_4x2855 jmp NEAR $L$oop_vpmadd52_4x2856 ud22857 2858 ALIGN 322859 $L$init_vpmadd52:2860 vmovq xmm16,QWORD[24+rdi]2861 vmovq xmm2,QWORD[56+rdi]2862 vmovq xmm17,QWORD[32+rdi]2863 vmovq xmm3,QWORD[40+rdi]2864 vmovq xmm4,QWORD[48+rdi]2865 2866 vmovdqa ymm0,ymm32867 vmovdqa ymm1,ymm42868 vmovdqa ymm5,ymm22869 2870 mov eax,22871 2872 $L$mul_init_vpmadd52:2873 vpxorq ymm18,ymm18,ymm182874 vpmadd52luq ymm18,ymm16,ymm22875 vpxorq ymm19,ymm19,ymm192876 vpmadd52huq ymm19,ymm16,ymm22877 vpxorq ymm20,ymm20,ymm202878 vpmadd52luq ymm20,ymm17,ymm22879 vpxorq ymm21,ymm21,ymm212880 vpmadd52huq ymm21,ymm17,ymm22881 vpxorq ymm22,ymm22,ymm222882 vpmadd52luq ymm22,ymm3,ymm22883 vpxorq ymm23,ymm23,ymm232884 vpmadd52huq ymm23,ymm3,ymm22885 2886 vpmadd52luq ymm18,ymm3,ymm02887 vpmadd52huq ymm19,ymm3,ymm02888 vpmadd52luq ymm20,ymm4,ymm02889 vpmadd52huq ymm21,ymm4,ymm02890 vpmadd52luq ymm22,ymm5,ymm02891 vpmadd52huq ymm23,ymm5,ymm02892 2893 vpmadd52luq ymm18,ymm17,ymm12894 vpmadd52huq ymm19,ymm17,ymm12895 vpmadd52luq ymm20,ymm3,ymm12896 vpmadd52huq ymm21,ymm3,ymm12897 vpmadd52luq ymm22,ymm4,ymm12898 vpmadd52huq ymm23,ymm4,ymm12899 2900 2901 2902 vpsrlq ymm30,ymm18,442903 vpsllq ymm19,ymm19,82904 vpandq ymm0,ymm18,ymm282905 vpaddq ymm19,ymm19,ymm302906 2907 vpaddq ymm20,ymm20,ymm192908 2909 vpsrlq ymm30,ymm20,442910 vpsllq ymm21,ymm21,82911 vpandq ymm1,ymm20,ymm282912 vpaddq ymm21,ymm21,ymm302913 2914 vpaddq ymm22,ymm22,ymm212915 2916 vpsrlq ymm30,ymm22,422917 vpsllq ymm23,ymm23,102918 vpandq ymm2,ymm22,ymm292919 vpaddq ymm23,ymm23,ymm302920 2921 vpaddq ymm0,ymm0,ymm232922 vpsllq ymm23,ymm23,22923 2924 vpaddq ymm0,ymm0,ymm232925 2926 vpsrlq ymm30,ymm0,442927 vpandq ymm0,ymm0,ymm282928 2929 vpaddq ymm1,ymm1,ymm302930 2931 dec eax2932 jz NEAR $L$done_init_vpmadd522933 2934 vpunpcklqdq ymm4,ymm1,ymm42935 vpbroadcastq xmm1,xmm12936 vpunpcklqdq ymm5,ymm2,ymm52937 vpbroadcastq xmm2,xmm22938 vpunpcklqdq ymm3,ymm0,ymm32939 vpbroadcastq xmm0,xmm02940 2941 vpsllq ymm16,ymm4,22942 vpsllq ymm17,ymm5,22943 vpaddq ymm16,ymm16,ymm42944 vpaddq ymm17,ymm17,ymm52945 vpsllq ymm16,ymm16,22946 vpsllq ymm17,ymm17,22947 2948 jmp NEAR $L$mul_init_vpmadd522949 ud22950 2951 ALIGN 322952 $L$done_init_vpmadd52:2953 vinserti128 ymm4,ymm1,xmm4,12954 vinserti128 ymm5,ymm2,xmm5,12955 vinserti128 ymm3,ymm0,xmm3,12956 2957 vpermq ymm4,ymm4,2162958 vpermq ymm5,ymm5,2162959 vpermq ymm3,ymm3,2162960 2961 vpsllq ymm16,ymm4,22962 vpaddq ymm16,ymm16,ymm42963 vpsllq ymm16,ymm16,22964 2965 vmovq xmm0,QWORD[rdi]2966 vmovq xmm1,QWORD[8+rdi]2967 vmovq xmm2,QWORD[16+rdi]2968 2969 test rdx,32970 jnz NEAR $L$done_init_vpmadd52_2x2971 2972 vmovdqu64 YMMWORD[64+rdi],ymm32973 vpbroadcastq ymm3,xmm32974 vmovdqu64 YMMWORD[96+rdi],ymm42975 vpbroadcastq ymm4,xmm42976 vmovdqu64 YMMWORD[128+rdi],ymm52977 vpbroadcastq ymm5,xmm52978 vmovdqu64 YMMWORD[160+rdi],ymm162979 vpbroadcastq ymm16,xmm162980 2981 jmp NEAR $L$blocks_vpmadd52_4x_key_loaded2982 ud22983 2984 ALIGN 322985 $L$done_init_vpmadd52_2x:2986 vmovdqu64 YMMWORD[64+rdi],ymm32987 vpsrldq ymm3,ymm3,82988 vmovdqu64 YMMWORD[96+rdi],ymm42989 vpsrldq ymm4,ymm4,82990 vmovdqu64 YMMWORD[128+rdi],ymm52991 vpsrldq ymm5,ymm5,82992 vmovdqu64 YMMWORD[160+rdi],ymm162993 vpsrldq ymm16,ymm16,82994 jmp NEAR $L$blocks_vpmadd52_2x_key_loaded2995 ud22996 2997 ALIGN 322998 $L$blocks_vpmadd52_2x_do:2999 vmovdqu64 ymm5{k1}{z},[((128+8))+rdi]3000 vmovdqu64 ymm16{k1}{z},[((160+8))+rdi]3001 vmovdqu64 ymm3{k1}{z},[((64+8))+rdi]3002 vmovdqu64 ymm4{k1}{z},[((96+8))+rdi]3003 3004 $L$blocks_vpmadd52_2x_key_loaded:3005 vmovdqu64 ymm26,YMMWORD[rsi]3006 vpxorq ymm27,ymm27,ymm273007 lea rsi,[32+rsi]3008 3009 vpunpcklqdq ymm25,ymm26,ymm273010 vpunpckhqdq ymm27,ymm26,ymm273011 3012 3013 3014 vpsrlq ymm26,ymm27,243015 vporq ymm26,ymm26,ymm313016 vpaddq ymm2,ymm2,ymm263017 vpandq ymm24,ymm25,ymm283018 vpsrlq ymm25,ymm25,443019 vpsllq ymm27,ymm27,203020 vporq ymm25,ymm25,ymm273021 vpandq ymm25,ymm25,ymm283022 3023 jmp NEAR $L$tail_vpmadd52_2x3024 ud23025 3026 ALIGN 323027 $L$oop_vpmadd52_4x:3028 3029 vpaddq ymm0,ymm0,ymm243030 vpaddq ymm1,ymm1,ymm253031 3032 vpxorq ymm18,ymm18,ymm183033 vpmadd52luq ymm18,ymm16,ymm23034 vpxorq ymm19,ymm19,ymm193035 vpmadd52huq ymm19,ymm16,ymm23036 vpxorq ymm20,ymm20,ymm203037 vpmadd52luq ymm20,ymm17,ymm23038 vpxorq ymm21,ymm21,ymm213039 vpmadd52huq ymm21,ymm17,ymm23040 vpxorq ymm22,ymm22,ymm223041 vpmadd52luq ymm22,ymm3,ymm23042 vpxorq ymm23,ymm23,ymm233043 vpmadd52huq ymm23,ymm3,ymm23044 3045 vmovdqu64 ymm26,YMMWORD[rsi]3046 vmovdqu64 ymm27,YMMWORD[32+rsi]3047 lea rsi,[64+rsi]3048 vpmadd52luq ymm18,ymm3,ymm03049 vpmadd52huq ymm19,ymm3,ymm03050 vpmadd52luq ymm20,ymm4,ymm03051 vpmadd52huq ymm21,ymm4,ymm03052 vpmadd52luq ymm22,ymm5,ymm03053 vpmadd52huq ymm23,ymm5,ymm03054 3055 vpunpcklqdq ymm25,ymm26,ymm273056 vpunpckhqdq ymm27,ymm26,ymm273057 vpmadd52luq ymm18,ymm17,ymm13058 vpmadd52huq ymm19,ymm17,ymm13059 vpmadd52luq ymm20,ymm3,ymm13060 vpmadd52huq ymm21,ymm3,ymm13061 vpmadd52luq ymm22,ymm4,ymm13062 vpmadd52huq ymm23,ymm4,ymm13063 3064 3065 3066 vpsrlq ymm30,ymm18,443067 vpsllq ymm19,ymm19,83068 vpandq ymm0,ymm18,ymm283069 vpaddq ymm19,ymm19,ymm303070 3071 vpsrlq ymm26,ymm27,243072 vporq ymm26,ymm26,ymm313073 vpaddq ymm20,ymm20,ymm193074 3075 vpsrlq ymm30,ymm20,443076 vpsllq ymm21,ymm21,83077 vpandq ymm1,ymm20,ymm283078 vpaddq ymm21,ymm21,ymm303079 3080 vpandq ymm24,ymm25,ymm283081 vpsrlq ymm25,ymm25,443082 vpsllq ymm27,ymm27,203083 vpaddq ymm22,ymm22,ymm213084 3085 vpsrlq ymm30,ymm22,423086 vpsllq ymm23,ymm23,103087 vpandq ymm2,ymm22,ymm293088 vpaddq ymm23,ymm23,ymm303089 3090 vpaddq ymm2,ymm2,ymm263091 vpaddq ymm0,ymm0,ymm233092 vpsllq ymm23,ymm23,23093 3094 vpaddq ymm0,ymm0,ymm233095 vporq ymm25,ymm25,ymm273096 vpandq ymm25,ymm25,ymm283097 3098 vpsrlq ymm30,ymm0,443099 vpandq ymm0,ymm0,ymm283100 3101 vpaddq ymm1,ymm1,ymm303102 3103 sub rdx,43104 jnz NEAR $L$oop_vpmadd52_4x3105 3106 $L$tail_vpmadd52_4x:3107 vmovdqu64 ymm5,YMMWORD[128+rdi]3108 vmovdqu64 ymm16,YMMWORD[160+rdi]3109 vmovdqu64 ymm3,YMMWORD[64+rdi]3110 vmovdqu64 ymm4,YMMWORD[96+rdi]3111 3112 $L$tail_vpmadd52_2x:3113 vpsllq ymm17,ymm5,23114 vpaddq ymm17,ymm17,ymm53115 vpsllq ymm17,ymm17,23116 3117 3118 vpaddq ymm0,ymm0,ymm243119 vpaddq ymm1,ymm1,ymm253120 3121 vpxorq ymm18,ymm18,ymm183122 vpmadd52luq ymm18,ymm16,ymm23123 vpxorq ymm19,ymm19,ymm193124 vpmadd52huq ymm19,ymm16,ymm23125 vpxorq ymm20,ymm20,ymm203126 vpmadd52luq ymm20,ymm17,ymm23127 vpxorq ymm21,ymm21,ymm213128 vpmadd52huq ymm21,ymm17,ymm23129 vpxorq ymm22,ymm22,ymm223130 vpmadd52luq ymm22,ymm3,ymm23131 vpxorq ymm23,ymm23,ymm233132 vpmadd52huq ymm23,ymm3,ymm23133 3134 vpmadd52luq ymm18,ymm3,ymm03135 vpmadd52huq ymm19,ymm3,ymm03136 vpmadd52luq ymm20,ymm4,ymm03137 vpmadd52huq ymm21,ymm4,ymm03138 vpmadd52luq ymm22,ymm5,ymm03139 vpmadd52huq ymm23,ymm5,ymm03140 3141 vpmadd52luq ymm18,ymm17,ymm13142 vpmadd52huq ymm19,ymm17,ymm13143 vpmadd52luq ymm20,ymm3,ymm13144 vpmadd52huq ymm21,ymm3,ymm13145 vpmadd52luq ymm22,ymm4,ymm13146 vpmadd52huq ymm23,ymm4,ymm13147 3148 3149 3150 3151 mov eax,13152 kmovw k1,eax3153 vpsrldq ymm24,ymm18,83154 vpsrldq ymm0,ymm19,83155 vpsrldq ymm25,ymm20,83156 vpsrldq ymm1,ymm21,83157 vpaddq ymm18,ymm18,ymm243158 vpaddq ymm19,ymm19,ymm03159 vpsrldq ymm26,ymm22,83160 vpsrldq ymm2,ymm23,83161 vpaddq ymm20,ymm20,ymm253162 vpaddq ymm21,ymm21,ymm13163 vpermq ymm24,ymm18,0x23164 vpermq ymm0,ymm19,0x23165 vpaddq ymm22,ymm22,ymm263166 vpaddq ymm23,ymm23,ymm23167 3168 vpermq ymm25,ymm20,0x23169 vpermq ymm1,ymm21,0x23170 vpaddq ymm18{k1}{z},ymm18,ymm243171 vpaddq ymm19{k1}{z},ymm19,ymm03172 vpermq ymm26,ymm22,0x23173 vpermq ymm2,ymm23,0x23174 vpaddq ymm20{k1}{z},ymm20,ymm253175 vpaddq ymm21{k1}{z},ymm21,ymm13176 vpaddq ymm22{k1}{z},ymm22,ymm263177 vpaddq ymm23{k1}{z},ymm23,ymm23178 3179 3180 3181 vpsrlq ymm30,ymm18,443182 vpsllq ymm19,ymm19,83183 vpandq ymm0,ymm18,ymm283184 vpaddq ymm19,ymm19,ymm303185 3186 vpaddq ymm20,ymm20,ymm193187 3188 vpsrlq ymm30,ymm20,443189 vpsllq ymm21,ymm21,83190 vpandq ymm1,ymm20,ymm283191 vpaddq ymm21,ymm21,ymm303192 3193 vpaddq ymm22,ymm22,ymm213194 3195 vpsrlq ymm30,ymm22,423196 vpsllq ymm23,ymm23,103197 vpandq ymm2,ymm22,ymm293198 vpaddq ymm23,ymm23,ymm303199 3200 vpaddq ymm0,ymm0,ymm233201 vpsllq ymm23,ymm23,23202 3203 vpaddq ymm0,ymm0,ymm233204 3205 vpsrlq ymm30,ymm0,443206 vpandq ymm0,ymm0,ymm283207 3208 vpaddq ymm1,ymm1,ymm303209 3210 3211 sub rdx,23212 ja NEAR $L$blocks_vpmadd52_4x_do3213 3214 vmovq QWORD[rdi],xmm03215 vmovq QWORD[8+rdi],xmm13216 vmovq QWORD[16+rdi],xmm23217 vzeroall3218 3219 $L$no_data_vpmadd52_4x:3220 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3221 mov rsi,QWORD[16+rsp]3222 DB 0F3h,0C3h ;repret3223 3224 $L$SEH_end_poly1305_blocks_vpmadd52_4x:3225 3226 ALIGN 323227 poly1305_blocks_vpmadd52_8x:3228 mov QWORD[8+rsp],rdi ;WIN64 prologue3229 mov QWORD[16+rsp],rsi3230 mov rax,rsp3231 $L$SEH_begin_poly1305_blocks_vpmadd52_8x:3232 mov rdi,rcx3233 mov rsi,rdx3234 mov rdx,r83235 mov rcx,r93236 3237 3238 3239 shr rdx,43240 jz NEAR $L$no_data_vpmadd52_8x3241 3242 shl rcx,403243 mov r8,QWORD[64+rdi]3244 3245 vmovdqa64 ymm28,YMMWORD[$L$x_mask44]3246 vmovdqa64 ymm29,YMMWORD[$L$x_mask42]3247 3248 test r8,r83249 js NEAR $L$init_vpmadd523250 3251 vmovq xmm0,QWORD[rdi]3252 vmovq xmm1,QWORD[8+rdi]3253 vmovq xmm2,QWORD[16+rdi]3254 3255 $L$blocks_vpmadd52_8x:3256 3257 3258 3259 vmovdqu64 ymm5,YMMWORD[128+rdi]3260 vmovdqu64 ymm16,YMMWORD[160+rdi]3261 vmovdqu64 ymm3,YMMWORD[64+rdi]3262 vmovdqu64 ymm4,YMMWORD[96+rdi]3263 3264 vpsllq ymm17,ymm5,23265 vpaddq ymm17,ymm17,ymm53266 vpsllq ymm17,ymm17,23267 3268 vpbroadcastq ymm8,xmm53269 vpbroadcastq ymm6,xmm33270 vpbroadcastq ymm7,xmm43271 3272 vpxorq ymm18,ymm18,ymm183273 vpmadd52luq ymm18,ymm16,ymm83274 vpxorq ymm19,ymm19,ymm193275 vpmadd52huq ymm19,ymm16,ymm83276 vpxorq ymm20,ymm20,ymm203277 vpmadd52luq ymm20,ymm17,ymm83278 vpxorq ymm21,ymm21,ymm213279 vpmadd52huq ymm21,ymm17,ymm83280 vpxorq ymm22,ymm22,ymm223281 vpmadd52luq ymm22,ymm3,ymm83282 vpxorq ymm23,ymm23,ymm233283 vpmadd52huq ymm23,ymm3,ymm83284 3285 vpmadd52luq ymm18,ymm3,ymm63286 vpmadd52huq ymm19,ymm3,ymm63287 vpmadd52luq ymm20,ymm4,ymm63288 vpmadd52huq ymm21,ymm4,ymm63289 vpmadd52luq ymm22,ymm5,ymm63290 vpmadd52huq ymm23,ymm5,ymm63291 3292 vpmadd52luq ymm18,ymm17,ymm73293 vpmadd52huq ymm19,ymm17,ymm73294 vpmadd52luq ymm20,ymm3,ymm73295 vpmadd52huq ymm21,ymm3,ymm73296 vpmadd52luq ymm22,ymm4,ymm73297 vpmadd52huq ymm23,ymm4,ymm73298 3299 3300 3301 vpsrlq ymm30,ymm18,443302 vpsllq ymm19,ymm19,83303 vpandq ymm6,ymm18,ymm283304 vpaddq ymm19,ymm19,ymm303305 3306 vpaddq ymm20,ymm20,ymm193307 3308 vpsrlq ymm30,ymm20,443309 vpsllq ymm21,ymm21,83310 vpandq ymm7,ymm20,ymm283311 vpaddq ymm21,ymm21,ymm303312 3313 vpaddq ymm22,ymm22,ymm213314 3315 vpsrlq ymm30,ymm22,423316 vpsllq ymm23,ymm23,103317 vpandq ymm8,ymm22,ymm293318 vpaddq ymm23,ymm23,ymm303319 3320 vpaddq ymm6,ymm6,ymm233321 vpsllq ymm23,ymm23,23322 3323 vpaddq ymm6,ymm6,ymm233324 3325 vpsrlq ymm30,ymm6,443326 vpandq ymm6,ymm6,ymm283327 3328 vpaddq ymm7,ymm7,ymm303329 3330 3331 3332 3333 3334 vpunpcklqdq ymm26,ymm8,ymm53335 vpunpckhqdq ymm5,ymm8,ymm53336 vpunpcklqdq ymm24,ymm6,ymm33337 vpunpckhqdq ymm3,ymm6,ymm33338 vpunpcklqdq ymm25,ymm7,ymm43339 vpunpckhqdq ymm4,ymm7,ymm43340 vshufi64x2 zmm8,zmm26,zmm5,0x443341 vshufi64x2 zmm6,zmm24,zmm3,0x443342 vshufi64x2 zmm7,zmm25,zmm4,0x443343 3344 vmovdqu64 zmm26,ZMMWORD[rsi]3345 vmovdqu64 zmm27,ZMMWORD[64+rsi]3346 lea rsi,[128+rsi]3347 3348 vpsllq zmm10,zmm8,23349 vpsllq zmm9,zmm7,23350 vpaddq zmm10,zmm10,zmm83351 vpaddq zmm9,zmm9,zmm73352 vpsllq zmm10,zmm10,23353 vpsllq zmm9,zmm9,23354 3355 vpbroadcastq zmm31,rcx3356 vpbroadcastq zmm28,xmm283357 vpbroadcastq zmm29,xmm293358 3359 vpbroadcastq zmm16,xmm93360 vpbroadcastq zmm17,xmm103361 vpbroadcastq zmm3,xmm63362 vpbroadcastq zmm4,xmm73363 vpbroadcastq zmm5,xmm83364 3365 vpunpcklqdq zmm25,zmm26,zmm273366 vpunpckhqdq zmm27,zmm26,zmm273367 3368 3369 3370 vpsrlq zmm26,zmm27,243371 vporq zmm26,zmm26,zmm313372 vpaddq zmm2,zmm2,zmm263373 vpandq zmm24,zmm25,zmm283374 vpsrlq zmm25,zmm25,443375 vpsllq zmm27,zmm27,203376 vporq zmm25,zmm25,zmm273377 vpandq zmm25,zmm25,zmm283378 3379 sub rdx,83380 jz NEAR $L$tail_vpmadd52_8x3381 jmp NEAR $L$oop_vpmadd52_8x3382 3383 ALIGN 323384 $L$oop_vpmadd52_8x:3385 3386 vpaddq zmm0,zmm0,zmm243387 vpaddq zmm1,zmm1,zmm253388 3389 vpxorq zmm18,zmm18,zmm183390 vpmadd52luq zmm18,zmm16,zmm23391 vpxorq zmm19,zmm19,zmm193392 vpmadd52huq zmm19,zmm16,zmm23393 vpxorq zmm20,zmm20,zmm203394 vpmadd52luq zmm20,zmm17,zmm23395 vpxorq zmm21,zmm21,zmm213396 vpmadd52huq zmm21,zmm17,zmm23397 vpxorq zmm22,zmm22,zmm223398 vpmadd52luq zmm22,zmm3,zmm23399 vpxorq zmm23,zmm23,zmm233400 vpmadd52huq zmm23,zmm3,zmm23401 3402 vmovdqu64 zmm26,ZMMWORD[rsi]3403 vmovdqu64 zmm27,ZMMWORD[64+rsi]3404 lea rsi,[128+rsi]3405 vpmadd52luq zmm18,zmm3,zmm03406 vpmadd52huq zmm19,zmm3,zmm03407 vpmadd52luq zmm20,zmm4,zmm03408 vpmadd52huq zmm21,zmm4,zmm03409 vpmadd52luq zmm22,zmm5,zmm03410 vpmadd52huq zmm23,zmm5,zmm03411 3412 vpunpcklqdq zmm25,zmm26,zmm273413 vpunpckhqdq zmm27,zmm26,zmm273414 vpmadd52luq zmm18,zmm17,zmm13415 vpmadd52huq zmm19,zmm17,zmm13416 vpmadd52luq zmm20,zmm3,zmm13417 vpmadd52huq zmm21,zmm3,zmm13418 vpmadd52luq zmm22,zmm4,zmm13419 vpmadd52huq zmm23,zmm4,zmm13420 3421 3422 3423 vpsrlq zmm30,zmm18,443424 vpsllq zmm19,zmm19,83425 vpandq zmm0,zmm18,zmm283426 vpaddq zmm19,zmm19,zmm303427 3428 vpsrlq zmm26,zmm27,243429 vporq zmm26,zmm26,zmm313430 vpaddq zmm20,zmm20,zmm193431 3432 vpsrlq zmm30,zmm20,443433 vpsllq zmm21,zmm21,83434 vpandq zmm1,zmm20,zmm283435 vpaddq zmm21,zmm21,zmm303436 3437 vpandq zmm24,zmm25,zmm283438 vpsrlq zmm25,zmm25,443439 vpsllq zmm27,zmm27,203440 vpaddq zmm22,zmm22,zmm213441 3442 vpsrlq zmm30,zmm22,423443 vpsllq zmm23,zmm23,103444 vpandq zmm2,zmm22,zmm293445 vpaddq zmm23,zmm23,zmm303446 3447 vpaddq zmm2,zmm2,zmm263448 vpaddq zmm0,zmm0,zmm233449 vpsllq zmm23,zmm23,23450 3451 vpaddq zmm0,zmm0,zmm233452 vporq zmm25,zmm25,zmm273453 vpandq zmm25,zmm25,zmm283454 3455 vpsrlq zmm30,zmm0,443456 vpandq zmm0,zmm0,zmm283457 3458 vpaddq zmm1,zmm1,zmm303459 3460 sub rdx,83461 jnz NEAR $L$oop_vpmadd52_8x3462 3463 $L$tail_vpmadd52_8x:3464 3465 vpaddq zmm0,zmm0,zmm243466 vpaddq zmm1,zmm1,zmm253467 3468 vpxorq zmm18,zmm18,zmm183469 vpmadd52luq zmm18,zmm9,zmm23470 vpxorq zmm19,zmm19,zmm193471 vpmadd52huq zmm19,zmm9,zmm23472 vpxorq zmm20,zmm20,zmm203473 vpmadd52luq zmm20,zmm10,zmm23474 vpxorq zmm21,zmm21,zmm213475 vpmadd52huq zmm21,zmm10,zmm23476 vpxorq zmm22,zmm22,zmm223477 vpmadd52luq zmm22,zmm6,zmm23478 vpxorq zmm23,zmm23,zmm233479 vpmadd52huq zmm23,zmm6,zmm23480 3481 vpmadd52luq zmm18,zmm6,zmm03482 vpmadd52huq zmm19,zmm6,zmm03483 vpmadd52luq zmm20,zmm7,zmm03484 vpmadd52huq zmm21,zmm7,zmm03485 vpmadd52luq zmm22,zmm8,zmm03486 vpmadd52huq zmm23,zmm8,zmm03487 3488 vpmadd52luq zmm18,zmm10,zmm13489 vpmadd52huq zmm19,zmm10,zmm13490 vpmadd52luq zmm20,zmm6,zmm13491 vpmadd52huq zmm21,zmm6,zmm13492 vpmadd52luq zmm22,zmm7,zmm13493 vpmadd52huq zmm23,zmm7,zmm13494 3495 3496 3497 3498 mov eax,13499 kmovw k1,eax3500 vpsrldq zmm24,zmm18,83501 vpsrldq zmm0,zmm19,83502 vpsrldq zmm25,zmm20,83503 vpsrldq zmm1,zmm21,83504 vpaddq zmm18,zmm18,zmm243505 vpaddq zmm19,zmm19,zmm03506 vpsrldq zmm26,zmm22,83507 vpsrldq zmm2,zmm23,83508 vpaddq zmm20,zmm20,zmm253509 vpaddq zmm21,zmm21,zmm13510 vpermq zmm24,zmm18,0x23511 vpermq zmm0,zmm19,0x23512 vpaddq zmm22,zmm22,zmm263513 vpaddq zmm23,zmm23,zmm23514 3515 vpermq zmm25,zmm20,0x23516 vpermq zmm1,zmm21,0x23517 vpaddq zmm18,zmm18,zmm243518 vpaddq zmm19,zmm19,zmm03519 vpermq zmm26,zmm22,0x23520 vpermq zmm2,zmm23,0x23521 vpaddq zmm20,zmm20,zmm253522 vpaddq zmm21,zmm21,zmm13523 vextracti64x4 ymm24,zmm18,13524 vextracti64x4 ymm0,zmm19,13525 vpaddq zmm22,zmm22,zmm263526 vpaddq zmm23,zmm23,zmm23527 3528 vextracti64x4 ymm25,zmm20,13529 vextracti64x4 ymm1,zmm21,13530 vextracti64x4 ymm26,zmm22,13531 vextracti64x4 ymm2,zmm23,13532 vpaddq ymm18{k1}{z},ymm18,ymm243533 vpaddq ymm19{k1}{z},ymm19,ymm03534 vpaddq ymm20{k1}{z},ymm20,ymm253535 vpaddq ymm21{k1}{z},ymm21,ymm13536 vpaddq ymm22{k1}{z},ymm22,ymm263537 vpaddq ymm23{k1}{z},ymm23,ymm23538 3539 3540 3541 vpsrlq ymm30,ymm18,443542 vpsllq ymm19,ymm19,83543 vpandq ymm0,ymm18,ymm283544 vpaddq ymm19,ymm19,ymm303545 3546 vpaddq ymm20,ymm20,ymm193547 3548 vpsrlq ymm30,ymm20,443549 vpsllq ymm21,ymm21,83550 vpandq ymm1,ymm20,ymm283551 vpaddq ymm21,ymm21,ymm303552 3553 vpaddq ymm22,ymm22,ymm213554 3555 vpsrlq ymm30,ymm22,423556 vpsllq ymm23,ymm23,103557 vpandq ymm2,ymm22,ymm293558 vpaddq ymm23,ymm23,ymm303559 3560 vpaddq ymm0,ymm0,ymm233561 vpsllq ymm23,ymm23,23562 3563 vpaddq ymm0,ymm0,ymm233564 3565 vpsrlq ymm30,ymm0,443566 vpandq ymm0,ymm0,ymm283567 3568 vpaddq ymm1,ymm1,ymm303569 3570 3571 3572 vmovq QWORD[rdi],xmm03573 vmovq QWORD[8+rdi],xmm13574 vmovq QWORD[16+rdi],xmm23575 vzeroall3576 3577 $L$no_data_vpmadd52_8x:3578 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3579 mov rsi,QWORD[16+rsp]3580 DB 0F3h,0C3h ;repret3581 3582 $L$SEH_end_poly1305_blocks_vpmadd52_8x:3583 3584 ALIGN 323585 poly1305_emit_base2_44:3586 mov QWORD[8+rsp],rdi ;WIN64 prologue3587 mov QWORD[16+rsp],rsi3588 mov rax,rsp3589 $L$SEH_begin_poly1305_emit_base2_44:3590 mov rdi,rcx3591 mov rsi,rdx3592 mov rdx,r83593 3594 3595 3596 DB 243,15,30,2503597 mov r8,QWORD[rdi]3598 mov r9,QWORD[8+rdi]3599 mov r10,QWORD[16+rdi]3600 3601 mov rax,r93602 shr r9,203603 shl rax,443604 mov rcx,r103605 shr r10,403606 shl rcx,243607 3608 add r8,rax3609 adc r9,rcx3610 adc r10,03611 3612 mov rax,r83613 add r8,53614 mov rcx,r93615 adc r9,03616 adc r10,03617 shr r10,23618 cmovnz rax,r83619 cmovnz rcx,r93620 3621 add rax,QWORD[rdx]3622 adc rcx,QWORD[8+rdx]3623 mov QWORD[rsi],rax3624 mov QWORD[8+rsi],rcx3625 3626 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3627 mov rsi,QWORD[16+rsp]3628 DB 0F3h,0C3h ;repret3629 3630 $L$SEH_end_poly1305_emit_base2_44:3631 ALIGN 643632 $L$const:3633 $L$mask24:3634 DD 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,03635 $L$129:3636 DD 16777216,0,16777216,0,16777216,0,16777216,03637 $L$mask26:3638 DD 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,03639 $L$permd_avx2:3640 DD 2,2,2,3,2,0,2,13641 $L$permd_avx512:3642 DD 0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,73643 3644 $L$2_44_inp_permd:3645 DD 0,1,1,2,2,3,7,73646 $L$2_44_inp_shift:3647 DQ 0,12,24,643648 $L$2_44_mask:3649 DQ 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff3650 $L$2_44_shift_rgt:3651 DQ 44,44,42,643652 $L$2_44_shift_lft:3653 DQ 8,8,10,643654 3655 ALIGN 643656 $L$x_mask44:3657 DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff3658 DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff3659 $L$x_mask42:3660 DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff3661 DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff3662 214 DB 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 3663 215 DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 … … 3910 462 DD $L$SEH_end_poly1305_emit wrt ..imagebase 3911 463 DD $L$SEH_info_poly1305_emit wrt ..imagebase 3912 DD $L$SEH_begin_poly1305_blocks_avx wrt ..imagebase3913 DD $L$base2_64_avx wrt ..imagebase3914 DD $L$SEH_info_poly1305_blocks_avx_1 wrt ..imagebase3915 3916 DD $L$base2_64_avx wrt ..imagebase3917 DD $L$even_avx wrt ..imagebase3918 DD $L$SEH_info_poly1305_blocks_avx_2 wrt ..imagebase3919 3920 DD $L$even_avx wrt ..imagebase3921 DD $L$SEH_end_poly1305_blocks_avx wrt ..imagebase3922 DD $L$SEH_info_poly1305_blocks_avx_3 wrt ..imagebase3923 3924 DD $L$SEH_begin_poly1305_emit_avx wrt ..imagebase3925 DD $L$SEH_end_poly1305_emit_avx wrt ..imagebase3926 DD $L$SEH_info_poly1305_emit_avx wrt ..imagebase3927 DD $L$SEH_begin_poly1305_blocks_avx2 wrt ..imagebase3928 DD $L$base2_64_avx2 wrt ..imagebase3929 DD $L$SEH_info_poly1305_blocks_avx2_1 wrt ..imagebase3930 3931 DD $L$base2_64_avx2 wrt ..imagebase3932 DD $L$even_avx2 wrt ..imagebase3933 DD $L$SEH_info_poly1305_blocks_avx2_2 wrt ..imagebase3934 3935 DD $L$even_avx2 wrt ..imagebase3936 DD $L$SEH_end_poly1305_blocks_avx2 wrt ..imagebase3937 DD $L$SEH_info_poly1305_blocks_avx2_3 wrt ..imagebase3938 DD $L$SEH_begin_poly1305_blocks_avx512 wrt ..imagebase3939 DD $L$SEH_end_poly1305_blocks_avx512 wrt ..imagebase3940 DD $L$SEH_info_poly1305_blocks_avx512 wrt ..imagebase3941 464 section .xdata rdata align=8 3942 465 ALIGN 8 … … 3955 478 DD se_handler wrt ..imagebase 3956 479 DD $L$SEH_begin_poly1305_emit wrt ..imagebase,$L$SEH_begin_poly1305_emit wrt ..imagebase 3957 $L$SEH_info_poly1305_blocks_avx_1:3958 DB 9,0,0,03959 DD se_handler wrt ..imagebase3960 DD $L$blocks_avx_body wrt ..imagebase,$L$blocks_avx_epilogue wrt ..imagebase3961 3962 $L$SEH_info_poly1305_blocks_avx_2:3963 DB 9,0,0,03964 DD se_handler wrt ..imagebase3965 DD $L$base2_64_avx_body wrt ..imagebase,$L$base2_64_avx_epilogue wrt ..imagebase3966 3967 $L$SEH_info_poly1305_blocks_avx_3:3968 DB 9,0,0,03969 DD avx_handler wrt ..imagebase3970 DD $L$do_avx_body wrt ..imagebase,$L$do_avx_epilogue wrt ..imagebase3971 3972 $L$SEH_info_poly1305_emit_avx:3973 DB 9,0,0,03974 DD se_handler wrt ..imagebase3975 DD $L$SEH_begin_poly1305_emit_avx wrt ..imagebase,$L$SEH_begin_poly1305_emit_avx wrt ..imagebase3976 $L$SEH_info_poly1305_blocks_avx2_1:3977 DB 9,0,0,03978 DD se_handler wrt ..imagebase3979 DD $L$blocks_avx2_body wrt ..imagebase,$L$blocks_avx2_epilogue wrt ..imagebase3980 3981 $L$SEH_info_poly1305_blocks_avx2_2:3982 DB 9,0,0,03983 DD se_handler wrt ..imagebase3984 DD $L$base2_64_avx2_body wrt ..imagebase,$L$base2_64_avx2_epilogue wrt ..imagebase3985 3986 $L$SEH_info_poly1305_blocks_avx2_3:3987 DB 9,0,0,03988 DD avx_handler wrt ..imagebase3989 DD $L$do_avx2_body wrt ..imagebase,$L$do_avx2_epilogue wrt ..imagebase3990 $L$SEH_info_poly1305_blocks_avx512:3991 DB 9,0,0,03992 DD avx_handler wrt ..imagebase3993 DD $L$do_avx512_body wrt ..imagebase,$L$do_avx512_epilogue wrt ..imagebase
Note:
See TracChangeset
for help on using the changeset viewer.