Changeset 95221 in vbox for trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/x86_64-mont5.S
- Timestamp:
- Jun 8, 2022 8:35:57 AM (3 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/x86_64-mont5.S
r95219 r95221 30 30 test r9d,7 31 31 jnz NEAR $L$mul_enter 32 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]33 32 jmp NEAR $L$mul4x_enter 34 33 … … 481 480 482 481 $L$mul4x_enter: 483 and r11d,0x80108484 cmp r11d,0x80108485 je NEAR $L$mulx4x_enter486 482 push rbx 487 483 … … 1127 1123 mov rax,rsp 1128 1124 1129 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]1130 and r11d,0x801081131 cmp r11d,0x801081132 je NEAR $L$powerx5_enter1133 1125 push rbx 1134 1126 … … 2234 2226 mov rbp,rcx 2235 2227 DB 102,73,15,110,218 2236 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]2237 and r11d,0x801082238 cmp r11d,0x801082239 jne NEAR $L$from_mont_nox2240 2241 lea rdi,[r9*1+rax]2242 call __bn_sqrx8x_reduction2243 call __bn_postx4x_internal2244 2245 pxor xmm0,xmm02246 lea rax,[48+rsp]2247 jmp NEAR $L$from_mont_zero2248 2249 ALIGN 322250 $L$from_mont_nox:2251 2228 call __bn_sqr8x_reduction 2252 2229 call __bn_post4x_internal … … 2289 2266 2290 2267 $L$SEH_end_bn_from_mont8x: 2291 2292 ALIGN 322293 bn_mulx4x_mont_gather5:2294 mov QWORD[8+rsp],rdi ;WIN64 prologue2295 mov QWORD[16+rsp],rsi2296 mov rax,rsp2297 $L$SEH_begin_bn_mulx4x_mont_gather5:2298 mov rdi,rcx2299 mov rsi,rdx2300 mov rdx,r82301 mov rcx,r92302 mov r8,QWORD[40+rsp]2303 mov r9,QWORD[48+rsp]2304 2305 2306 2307 mov rax,rsp2308 2309 $L$mulx4x_enter:2310 push rbx2311 2312 push rbp2313 2314 push r122315 2316 push r132317 2318 push r142319 2320 push r152321 2322 $L$mulx4x_prologue:2323 2324 shl r9d,32325 lea r10,[r9*2+r9]2326 neg r92327 mov r8,QWORD[r8]2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 lea r11,[((-320))+r9*2+rsp]2339 mov rbp,rsp2340 sub r11,rdi2341 and r11,40952342 cmp r10,r112343 jb NEAR $L$mulx4xsp_alt2344 sub rbp,r112345 lea rbp,[((-320))+r9*2+rbp]2346 jmp NEAR $L$mulx4xsp_done2347 2348 $L$mulx4xsp_alt:2349 lea r10,[((4096-320))+r9*2]2350 lea rbp,[((-320))+r9*2+rbp]2351 sub r11,r102352 mov r10,02353 cmovc r11,r102354 sub rbp,r112355 $L$mulx4xsp_done:2356 and rbp,-642357 mov r11,rsp2358 sub r11,rbp2359 and r11,-40962360 lea rsp,[rbp*1+r11]2361 mov r10,QWORD[rsp]2362 cmp rsp,rbp2363 ja NEAR $L$mulx4x_page_walk2364 jmp NEAR $L$mulx4x_page_walk_done2365 2366 $L$mulx4x_page_walk:2367 lea rsp,[((-4096))+rsp]2368 mov r10,QWORD[rsp]2369 cmp rsp,rbp2370 ja NEAR $L$mulx4x_page_walk2371 $L$mulx4x_page_walk_done:2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 mov QWORD[32+rsp],r82386 mov QWORD[40+rsp],rax2387 2388 $L$mulx4x_body:2389 call mulx4x_internal2390 2391 mov rsi,QWORD[40+rsp]2392 2393 mov rax,12394 2395 mov r15,QWORD[((-48))+rsi]2396 2397 mov r14,QWORD[((-40))+rsi]2398 2399 mov r13,QWORD[((-32))+rsi]2400 2401 mov r12,QWORD[((-24))+rsi]2402 2403 mov rbp,QWORD[((-16))+rsi]2404 2405 mov rbx,QWORD[((-8))+rsi]2406 2407 lea rsp,[rsi]2408 2409 $L$mulx4x_epilogue:2410 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2411 mov rsi,QWORD[16+rsp]2412 DB 0F3h,0C3h ;repret2413 2414 $L$SEH_end_bn_mulx4x_mont_gather5:2415 2416 2417 ALIGN 322418 mulx4x_internal:2419 2420 mov QWORD[8+rsp],r92421 mov r10,r92422 neg r92423 shl r9,52424 neg r102425 lea r13,[128+r9*1+rdx]2426 shr r9,5+52427 movd xmm5,DWORD[56+rax]2428 sub r9,12429 lea rax,[$L$inc]2430 mov QWORD[((16+8))+rsp],r132431 mov QWORD[((24+8))+rsp],r92432 mov QWORD[((56+8))+rsp],rdi2433 movdqa xmm0,XMMWORD[rax]2434 movdqa xmm1,XMMWORD[16+rax]2435 lea r10,[((88-112))+r10*1+rsp]2436 lea rdi,[128+rdx]2437 2438 pshufd xmm5,xmm5,02439 movdqa xmm4,xmm12440 DB 0x672441 movdqa xmm2,xmm12442 DB 0x672443 paddd xmm1,xmm02444 pcmpeqd xmm0,xmm52445 movdqa xmm3,xmm42446 paddd xmm2,xmm12447 pcmpeqd xmm1,xmm52448 movdqa XMMWORD[112+r10],xmm02449 movdqa xmm0,xmm42450 2451 paddd xmm3,xmm22452 pcmpeqd xmm2,xmm52453 movdqa XMMWORD[128+r10],xmm12454 movdqa xmm1,xmm42455 2456 paddd xmm0,xmm32457 pcmpeqd xmm3,xmm52458 movdqa XMMWORD[144+r10],xmm22459 movdqa xmm2,xmm42460 2461 paddd xmm1,xmm02462 pcmpeqd xmm0,xmm52463 movdqa XMMWORD[160+r10],xmm32464 movdqa xmm3,xmm42465 paddd xmm2,xmm12466 pcmpeqd xmm1,xmm52467 movdqa XMMWORD[176+r10],xmm02468 movdqa xmm0,xmm42469 2470 paddd xmm3,xmm22471 pcmpeqd xmm2,xmm52472 movdqa XMMWORD[192+r10],xmm12473 movdqa xmm1,xmm42474 2475 paddd xmm0,xmm32476 pcmpeqd xmm3,xmm52477 movdqa XMMWORD[208+r10],xmm22478 movdqa xmm2,xmm42479 2480 paddd xmm1,xmm02481 pcmpeqd xmm0,xmm52482 movdqa XMMWORD[224+r10],xmm32483 movdqa xmm3,xmm42484 paddd xmm2,xmm12485 pcmpeqd xmm1,xmm52486 movdqa XMMWORD[240+r10],xmm02487 movdqa xmm0,xmm42488 2489 paddd xmm3,xmm22490 pcmpeqd xmm2,xmm52491 movdqa XMMWORD[256+r10],xmm12492 movdqa xmm1,xmm42493 2494 paddd xmm0,xmm32495 pcmpeqd xmm3,xmm52496 movdqa XMMWORD[272+r10],xmm22497 movdqa xmm2,xmm42498 2499 paddd xmm1,xmm02500 pcmpeqd xmm0,xmm52501 movdqa XMMWORD[288+r10],xmm32502 movdqa xmm3,xmm42503 DB 0x672504 paddd xmm2,xmm12505 pcmpeqd xmm1,xmm52506 movdqa XMMWORD[304+r10],xmm02507 2508 paddd xmm3,xmm22509 pcmpeqd xmm2,xmm52510 movdqa XMMWORD[320+r10],xmm12511 2512 pcmpeqd xmm3,xmm52513 movdqa XMMWORD[336+r10],xmm22514 2515 pand xmm0,XMMWORD[64+rdi]2516 pand xmm1,XMMWORD[80+rdi]2517 pand xmm2,XMMWORD[96+rdi]2518 movdqa XMMWORD[352+r10],xmm32519 pand xmm3,XMMWORD[112+rdi]2520 por xmm0,xmm22521 por xmm1,xmm32522 movdqa xmm4,XMMWORD[((-128))+rdi]2523 movdqa xmm5,XMMWORD[((-112))+rdi]2524 movdqa xmm2,XMMWORD[((-96))+rdi]2525 pand xmm4,XMMWORD[112+r10]2526 movdqa xmm3,XMMWORD[((-80))+rdi]2527 pand xmm5,XMMWORD[128+r10]2528 por xmm0,xmm42529 pand xmm2,XMMWORD[144+r10]2530 por xmm1,xmm52531 pand xmm3,XMMWORD[160+r10]2532 por xmm0,xmm22533 por xmm1,xmm32534 movdqa xmm4,XMMWORD[((-64))+rdi]2535 movdqa xmm5,XMMWORD[((-48))+rdi]2536 movdqa xmm2,XMMWORD[((-32))+rdi]2537 pand xmm4,XMMWORD[176+r10]2538 movdqa xmm3,XMMWORD[((-16))+rdi]2539 pand xmm5,XMMWORD[192+r10]2540 por xmm0,xmm42541 pand xmm2,XMMWORD[208+r10]2542 por xmm1,xmm52543 pand xmm3,XMMWORD[224+r10]2544 por xmm0,xmm22545 por xmm1,xmm32546 movdqa xmm4,XMMWORD[rdi]2547 movdqa xmm5,XMMWORD[16+rdi]2548 movdqa xmm2,XMMWORD[32+rdi]2549 pand xmm4,XMMWORD[240+r10]2550 movdqa xmm3,XMMWORD[48+rdi]2551 pand xmm5,XMMWORD[256+r10]2552 por xmm0,xmm42553 pand xmm2,XMMWORD[272+r10]2554 por xmm1,xmm52555 pand xmm3,XMMWORD[288+r10]2556 por xmm0,xmm22557 por xmm1,xmm32558 pxor xmm0,xmm12559 pshufd xmm1,xmm0,0x4e2560 por xmm0,xmm12561 lea rdi,[256+rdi]2562 DB 102,72,15,126,1942563 lea rbx,[((64+32+8))+rsp]2564 2565 mov r9,rdx2566 mulx rax,r8,QWORD[rsi]2567 mulx r12,r11,QWORD[8+rsi]2568 add r11,rax2569 mulx r13,rax,QWORD[16+rsi]2570 adc r12,rax2571 adc r13,02572 mulx r14,rax,QWORD[24+rsi]2573 2574 mov r15,r82575 imul r8,QWORD[((32+8))+rsp]2576 xor rbp,rbp2577 mov rdx,r82578 2579 mov QWORD[((8+8))+rsp],rdi2580 2581 lea rsi,[32+rsi]2582 adcx r13,rax2583 adcx r14,rbp2584 2585 mulx r10,rax,QWORD[rcx]2586 adcx r15,rax2587 adox r10,r112588 mulx r11,rax,QWORD[8+rcx]2589 adcx r10,rax2590 adox r11,r122591 mulx r12,rax,QWORD[16+rcx]2592 mov rdi,QWORD[((24+8))+rsp]2593 mov QWORD[((-32))+rbx],r102594 adcx r11,rax2595 adox r12,r132596 mulx r15,rax,QWORD[24+rcx]2597 mov rdx,r92598 mov QWORD[((-24))+rbx],r112599 adcx r12,rax2600 adox r15,rbp2601 lea rcx,[32+rcx]2602 mov QWORD[((-16))+rbx],r122603 jmp NEAR $L$mulx4x_1st2604 2605 ALIGN 322606 $L$mulx4x_1st:2607 adcx r15,rbp2608 mulx rax,r10,QWORD[rsi]2609 adcx r10,r142610 mulx r14,r11,QWORD[8+rsi]2611 adcx r11,rax2612 mulx rax,r12,QWORD[16+rsi]2613 adcx r12,r142614 mulx r14,r13,QWORD[24+rsi]2615 DB 0x67,0x672616 mov rdx,r82617 adcx r13,rax2618 adcx r14,rbp2619 lea rsi,[32+rsi]2620 lea rbx,[32+rbx]2621 2622 adox r10,r152623 mulx r15,rax,QWORD[rcx]2624 adcx r10,rax2625 adox r11,r152626 mulx r15,rax,QWORD[8+rcx]2627 adcx r11,rax2628 adox r12,r152629 mulx r15,rax,QWORD[16+rcx]2630 mov QWORD[((-40))+rbx],r102631 adcx r12,rax2632 mov QWORD[((-32))+rbx],r112633 adox r13,r152634 mulx r15,rax,QWORD[24+rcx]2635 mov rdx,r92636 mov QWORD[((-24))+rbx],r122637 adcx r13,rax2638 adox r15,rbp2639 lea rcx,[32+rcx]2640 mov QWORD[((-16))+rbx],r132641 2642 dec rdi2643 jnz NEAR $L$mulx4x_1st2644 2645 mov rax,QWORD[8+rsp]2646 adc r15,rbp2647 lea rsi,[rax*1+rsi]2648 add r14,r152649 mov rdi,QWORD[((8+8))+rsp]2650 adc rbp,rbp2651 mov QWORD[((-8))+rbx],r142652 jmp NEAR $L$mulx4x_outer2653 2654 ALIGN 322655 $L$mulx4x_outer:2656 lea r10,[((16-256))+rbx]2657 pxor xmm4,xmm42658 DB 0x67,0x672659 pxor xmm5,xmm52660 movdqa xmm0,XMMWORD[((-128))+rdi]2661 movdqa xmm1,XMMWORD[((-112))+rdi]2662 movdqa xmm2,XMMWORD[((-96))+rdi]2663 pand xmm0,XMMWORD[256+r10]2664 movdqa xmm3,XMMWORD[((-80))+rdi]2665 pand xmm1,XMMWORD[272+r10]2666 por xmm4,xmm02667 pand xmm2,XMMWORD[288+r10]2668 por xmm5,xmm12669 pand xmm3,XMMWORD[304+r10]2670 por xmm4,xmm22671 por xmm5,xmm32672 movdqa xmm0,XMMWORD[((-64))+rdi]2673 movdqa xmm1,XMMWORD[((-48))+rdi]2674 movdqa xmm2,XMMWORD[((-32))+rdi]2675 pand xmm0,XMMWORD[320+r10]2676 movdqa xmm3,XMMWORD[((-16))+rdi]2677 pand xmm1,XMMWORD[336+r10]2678 por xmm4,xmm02679 pand xmm2,XMMWORD[352+r10]2680 por xmm5,xmm12681 pand xmm3,XMMWORD[368+r10]2682 por xmm4,xmm22683 por xmm5,xmm32684 movdqa xmm0,XMMWORD[rdi]2685 movdqa xmm1,XMMWORD[16+rdi]2686 movdqa xmm2,XMMWORD[32+rdi]2687 pand xmm0,XMMWORD[384+r10]2688 movdqa xmm3,XMMWORD[48+rdi]2689 pand xmm1,XMMWORD[400+r10]2690 por xmm4,xmm02691 pand xmm2,XMMWORD[416+r10]2692 por xmm5,xmm12693 pand xmm3,XMMWORD[432+r10]2694 por xmm4,xmm22695 por xmm5,xmm32696 movdqa xmm0,XMMWORD[64+rdi]2697 movdqa xmm1,XMMWORD[80+rdi]2698 movdqa xmm2,XMMWORD[96+rdi]2699 pand xmm0,XMMWORD[448+r10]2700 movdqa xmm3,XMMWORD[112+rdi]2701 pand xmm1,XMMWORD[464+r10]2702 por xmm4,xmm02703 pand xmm2,XMMWORD[480+r10]2704 por xmm5,xmm12705 pand xmm3,XMMWORD[496+r10]2706 por xmm4,xmm22707 por xmm5,xmm32708 por xmm4,xmm52709 pshufd xmm0,xmm4,0x4e2710 por xmm0,xmm42711 lea rdi,[256+rdi]2712 DB 102,72,15,126,1942713 2714 mov QWORD[rbx],rbp2715 lea rbx,[32+rax*1+rbx]2716 mulx r11,r8,QWORD[rsi]2717 xor rbp,rbp2718 mov r9,rdx2719 mulx r12,r14,QWORD[8+rsi]2720 adox r8,QWORD[((-32))+rbx]2721 adcx r11,r142722 mulx r13,r15,QWORD[16+rsi]2723 adox r11,QWORD[((-24))+rbx]2724 adcx r12,r152725 mulx r14,rdx,QWORD[24+rsi]2726 adox r12,QWORD[((-16))+rbx]2727 adcx r13,rdx2728 lea rcx,[rax*1+rcx]2729 lea rsi,[32+rsi]2730 adox r13,QWORD[((-8))+rbx]2731 adcx r14,rbp2732 adox r14,rbp2733 2734 mov r15,r82735 imul r8,QWORD[((32+8))+rsp]2736 2737 mov rdx,r82738 xor rbp,rbp2739 mov QWORD[((8+8))+rsp],rdi2740 2741 mulx r10,rax,QWORD[rcx]2742 adcx r15,rax2743 adox r10,r112744 mulx r11,rax,QWORD[8+rcx]2745 adcx r10,rax2746 adox r11,r122747 mulx r12,rax,QWORD[16+rcx]2748 adcx r11,rax2749 adox r12,r132750 mulx r15,rax,QWORD[24+rcx]2751 mov rdx,r92752 mov rdi,QWORD[((24+8))+rsp]2753 mov QWORD[((-32))+rbx],r102754 adcx r12,rax2755 mov QWORD[((-24))+rbx],r112756 adox r15,rbp2757 mov QWORD[((-16))+rbx],r122758 lea rcx,[32+rcx]2759 jmp NEAR $L$mulx4x_inner2760 2761 ALIGN 322762 $L$mulx4x_inner:2763 mulx rax,r10,QWORD[rsi]2764 adcx r15,rbp2765 adox r10,r142766 mulx r14,r11,QWORD[8+rsi]2767 adcx r10,QWORD[rbx]2768 adox r11,rax2769 mulx rax,r12,QWORD[16+rsi]2770 adcx r11,QWORD[8+rbx]2771 adox r12,r142772 mulx r14,r13,QWORD[24+rsi]2773 mov rdx,r82774 adcx r12,QWORD[16+rbx]2775 adox r13,rax2776 adcx r13,QWORD[24+rbx]2777 adox r14,rbp2778 lea rsi,[32+rsi]2779 lea rbx,[32+rbx]2780 adcx r14,rbp2781 2782 adox r10,r152783 mulx r15,rax,QWORD[rcx]2784 adcx r10,rax2785 adox r11,r152786 mulx r15,rax,QWORD[8+rcx]2787 adcx r11,rax2788 adox r12,r152789 mulx r15,rax,QWORD[16+rcx]2790 mov QWORD[((-40))+rbx],r102791 adcx r12,rax2792 adox r13,r152793 mov QWORD[((-32))+rbx],r112794 mulx r15,rax,QWORD[24+rcx]2795 mov rdx,r92796 lea rcx,[32+rcx]2797 mov QWORD[((-24))+rbx],r122798 adcx r13,rax2799 adox r15,rbp2800 mov QWORD[((-16))+rbx],r132801 2802 dec rdi2803 jnz NEAR $L$mulx4x_inner2804 2805 mov rax,QWORD[((0+8))+rsp]2806 adc r15,rbp2807 sub rdi,QWORD[rbx]2808 mov rdi,QWORD[((8+8))+rsp]2809 mov r10,QWORD[((16+8))+rsp]2810 adc r14,r152811 lea rsi,[rax*1+rsi]2812 adc rbp,rbp2813 mov QWORD[((-8))+rbx],r142814 2815 cmp rdi,r102816 jb NEAR $L$mulx4x_outer2817 2818 mov r10,QWORD[((-8))+rcx]2819 mov r8,rbp2820 mov r12,QWORD[rax*1+rcx]2821 lea rbp,[rax*1+rcx]2822 mov rcx,rax2823 lea rdi,[rax*1+rbx]2824 xor eax,eax2825 xor r15,r152826 sub r10,r142827 adc r15,r152828 or r8,r152829 sar rcx,3+22830 sub rax,r82831 mov rdx,QWORD[((56+8))+rsp]2832 dec r122833 mov r13,QWORD[8+rbp]2834 xor r8,r82835 mov r14,QWORD[16+rbp]2836 mov r15,QWORD[24+rbp]2837 jmp NEAR $L$sqrx4x_sub_entry2838 2839 2840 2841 ALIGN 322842 bn_powerx5:2843 mov QWORD[8+rsp],rdi ;WIN64 prologue2844 mov QWORD[16+rsp],rsi2845 mov rax,rsp2846 $L$SEH_begin_bn_powerx5:2847 mov rdi,rcx2848 mov rsi,rdx2849 mov rdx,r82850 mov rcx,r92851 mov r8,QWORD[40+rsp]2852 mov r9,QWORD[48+rsp]2853 2854 2855 2856 mov rax,rsp2857 2858 $L$powerx5_enter:2859 push rbx2860 2861 push rbp2862 2863 push r122864 2865 push r132866 2867 push r142868 2869 push r152870 2871 $L$powerx5_prologue:2872 2873 shl r9d,32874 lea r10,[r9*2+r9]2875 neg r92876 mov r8,QWORD[r8]2877 2878 2879 2880 2881 2882 2883 2884 2885 lea r11,[((-320))+r9*2+rsp]2886 mov rbp,rsp2887 sub r11,rdi2888 and r11,40952889 cmp r10,r112890 jb NEAR $L$pwrx_sp_alt2891 sub rbp,r112892 lea rbp,[((-320))+r9*2+rbp]2893 jmp NEAR $L$pwrx_sp_done2894 2895 ALIGN 322896 $L$pwrx_sp_alt:2897 lea r10,[((4096-320))+r9*2]2898 lea rbp,[((-320))+r9*2+rbp]2899 sub r11,r102900 mov r10,02901 cmovc r11,r102902 sub rbp,r112903 $L$pwrx_sp_done:2904 and rbp,-642905 mov r11,rsp2906 sub r11,rbp2907 and r11,-40962908 lea rsp,[rbp*1+r11]2909 mov r10,QWORD[rsp]2910 cmp rsp,rbp2911 ja NEAR $L$pwrx_page_walk2912 jmp NEAR $L$pwrx_page_walk_done2913 2914 $L$pwrx_page_walk:2915 lea rsp,[((-4096))+rsp]2916 mov r10,QWORD[rsp]2917 cmp rsp,rbp2918 ja NEAR $L$pwrx_page_walk2919 $L$pwrx_page_walk_done:2920 2921 mov r10,r92922 neg r92923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 pxor xmm0,xmm02936 DB 102,72,15,110,2072937 DB 102,72,15,110,2092938 DB 102,73,15,110,2182939 DB 102,72,15,110,2262940 mov QWORD[32+rsp],r82941 mov QWORD[40+rsp],rax2942 2943 $L$powerx5_body:2944 2945 call __bn_sqrx8x_internal2946 call __bn_postx4x_internal2947 call __bn_sqrx8x_internal2948 call __bn_postx4x_internal2949 call __bn_sqrx8x_internal2950 call __bn_postx4x_internal2951 call __bn_sqrx8x_internal2952 call __bn_postx4x_internal2953 call __bn_sqrx8x_internal2954 call __bn_postx4x_internal2955 2956 mov r9,r102957 mov rdi,rsi2958 DB 102,72,15,126,2092959 DB 102,72,15,126,2262960 mov rax,QWORD[40+rsp]2961 2962 call mulx4x_internal2963 2964 mov rsi,QWORD[40+rsp]2965 2966 mov rax,12967 2968 mov r15,QWORD[((-48))+rsi]2969 2970 mov r14,QWORD[((-40))+rsi]2971 2972 mov r13,QWORD[((-32))+rsi]2973 2974 mov r12,QWORD[((-24))+rsi]2975 2976 mov rbp,QWORD[((-16))+rsi]2977 2978 mov rbx,QWORD[((-8))+rsi]2979 2980 lea rsp,[rsi]2981 2982 $L$powerx5_epilogue:2983 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2984 mov rsi,QWORD[16+rsp]2985 DB 0F3h,0C3h ;repret2986 2987 $L$SEH_end_bn_powerx5:2988 2989 global bn_sqrx8x_internal2990 2991 2992 ALIGN 322993 bn_sqrx8x_internal:2994 __bn_sqrx8x_internal:2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 lea rdi,[((48+8))+rsp]3037 lea rbp,[r9*1+rsi]3038 mov QWORD[((0+8))+rsp],r93039 mov QWORD[((8+8))+rsp],rbp3040 jmp NEAR $L$sqr8x_zero_start3041 3042 ALIGN 323043 DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x003044 $L$sqrx8x_zero:3045 DB 0x3e3046 movdqa XMMWORD[rdi],xmm03047 movdqa XMMWORD[16+rdi],xmm03048 movdqa XMMWORD[32+rdi],xmm03049 movdqa XMMWORD[48+rdi],xmm03050 $L$sqr8x_zero_start:3051 movdqa XMMWORD[64+rdi],xmm03052 movdqa XMMWORD[80+rdi],xmm03053 movdqa XMMWORD[96+rdi],xmm03054 movdqa XMMWORD[112+rdi],xmm03055 lea rdi,[128+rdi]3056 sub r9,643057 jnz NEAR $L$sqrx8x_zero3058 3059 mov rdx,QWORD[rsi]3060 3061 xor r10,r103062 xor r11,r113063 xor r12,r123064 xor r13,r133065 xor r14,r143066 xor r15,r153067 lea rdi,[((48+8))+rsp]3068 xor rbp,rbp3069 jmp NEAR $L$sqrx8x_outer_loop3070 3071 ALIGN 323072 $L$sqrx8x_outer_loop:3073 mulx rax,r8,QWORD[8+rsi]3074 adcx r8,r93075 adox r10,rax3076 mulx rax,r9,QWORD[16+rsi]3077 adcx r9,r103078 adox r11,rax3079 DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x003080 adcx r10,r113081 adox r12,rax3082 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x003083 adcx r11,r123084 adox r13,rax3085 mulx rax,r12,QWORD[40+rsi]3086 adcx r12,r133087 adox r14,rax3088 mulx rax,r13,QWORD[48+rsi]3089 adcx r13,r143090 adox rax,r153091 mulx r15,r14,QWORD[56+rsi]3092 mov rdx,QWORD[8+rsi]3093 adcx r14,rax3094 adox r15,rbp3095 adc r15,QWORD[64+rdi]3096 mov QWORD[8+rdi],r83097 mov QWORD[16+rdi],r93098 sbb rcx,rcx3099 xor rbp,rbp3100 3101 3102 mulx rbx,r8,QWORD[16+rsi]3103 mulx rax,r9,QWORD[24+rsi]3104 adcx r8,r103105 adox r9,rbx3106 mulx rbx,r10,QWORD[32+rsi]3107 adcx r9,r113108 adox r10,rax3109 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x003110 adcx r10,r123111 adox r11,rbx3112 DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x003113 adcx r11,r133114 adox r12,r143115 DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x003116 mov rdx,QWORD[16+rsi]3117 adcx r12,rax3118 adox r13,rbx3119 adcx r13,r153120 adox r14,rbp3121 adcx r14,rbp3122 3123 mov QWORD[24+rdi],r83124 mov QWORD[32+rdi],r93125 3126 mulx rbx,r8,QWORD[24+rsi]3127 mulx rax,r9,QWORD[32+rsi]3128 adcx r8,r103129 adox r9,rbx3130 mulx rbx,r10,QWORD[40+rsi]3131 adcx r9,r113132 adox r10,rax3133 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x003134 adcx r10,r123135 adox r11,r133136 DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x003137 DB 0x3e3138 mov rdx,QWORD[24+rsi]3139 adcx r11,rbx3140 adox r12,rax3141 adcx r12,r143142 mov QWORD[40+rdi],r83143 mov QWORD[48+rdi],r93144 mulx rax,r8,QWORD[32+rsi]3145 adox r13,rbp3146 adcx r13,rbp3147 3148 mulx rbx,r9,QWORD[40+rsi]3149 adcx r8,r103150 adox r9,rax3151 mulx rax,r10,QWORD[48+rsi]3152 adcx r9,r113153 adox r10,r123154 mulx r12,r11,QWORD[56+rsi]3155 mov rdx,QWORD[32+rsi]3156 mov r14,QWORD[40+rsi]3157 adcx r10,rbx3158 adox r11,rax3159 mov r15,QWORD[48+rsi]3160 adcx r11,r133161 adox r12,rbp3162 adcx r12,rbp3163 3164 mov QWORD[56+rdi],r83165 mov QWORD[64+rdi],r93166 3167 mulx rax,r9,r143168 mov r8,QWORD[56+rsi]3169 adcx r9,r103170 mulx rbx,r10,r153171 adox r10,rax3172 adcx r10,r113173 mulx rax,r11,r83174 mov rdx,r143175 adox r11,rbx3176 adcx r11,r123177 3178 adcx rax,rbp3179 3180 mulx rbx,r14,r153181 mulx r13,r12,r83182 mov rdx,r153183 lea rsi,[64+rsi]3184 adcx r11,r143185 adox r12,rbx3186 adcx r12,rax3187 adox r13,rbp3188 3189 DB 0x67,0x673190 mulx r14,r8,r83191 adcx r13,r83192 adcx r14,rbp3193 3194 cmp rsi,QWORD[((8+8))+rsp]3195 je NEAR $L$sqrx8x_outer_break3196 3197 neg rcx3198 mov rcx,-83199 mov r15,rbp3200 mov r8,QWORD[64+rdi]3201 adcx r9,QWORD[72+rdi]3202 adcx r10,QWORD[80+rdi]3203 adcx r11,QWORD[88+rdi]3204 adc r12,QWORD[96+rdi]3205 adc r13,QWORD[104+rdi]3206 adc r14,QWORD[112+rdi]3207 adc r15,QWORD[120+rdi]3208 lea rbp,[rsi]3209 lea rdi,[128+rdi]3210 sbb rax,rax3211 3212 mov rdx,QWORD[((-64))+rsi]3213 mov QWORD[((16+8))+rsp],rax3214 mov QWORD[((24+8))+rsp],rdi3215 3216 3217 xor eax,eax3218 jmp NEAR $L$sqrx8x_loop3219 3220 ALIGN 323221 $L$sqrx8x_loop:3222 mov rbx,r83223 mulx r8,rax,QWORD[rbp]3224 adcx rbx,rax3225 adox r8,r93226 3227 mulx r9,rax,QWORD[8+rbp]3228 adcx r8,rax3229 adox r9,r103230 3231 mulx r10,rax,QWORD[16+rbp]3232 adcx r9,rax3233 adox r10,r113234 3235 mulx r11,rax,QWORD[24+rbp]3236 adcx r10,rax3237 adox r11,r123238 3239 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x003240 adcx r11,rax3241 adox r12,r133242 3243 mulx r13,rax,QWORD[40+rbp]3244 adcx r12,rax3245 adox r13,r143246 3247 mulx r14,rax,QWORD[48+rbp]3248 mov QWORD[rcx*8+rdi],rbx3249 mov ebx,03250 adcx r13,rax3251 adox r14,r153252 3253 DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x003254 mov rdx,QWORD[8+rcx*8+rsi]3255 adcx r14,rax3256 adox r15,rbx3257 adcx r15,rbx3258 3259 DB 0x673260 inc rcx3261 jnz NEAR $L$sqrx8x_loop3262 3263 lea rbp,[64+rbp]3264 mov rcx,-83265 cmp rbp,QWORD[((8+8))+rsp]3266 je NEAR $L$sqrx8x_break3267 3268 sub rbx,QWORD[((16+8))+rsp]3269 DB 0x663270 mov rdx,QWORD[((-64))+rsi]3271 adcx r8,QWORD[rdi]3272 adcx r9,QWORD[8+rdi]3273 adc r10,QWORD[16+rdi]3274 adc r11,QWORD[24+rdi]3275 adc r12,QWORD[32+rdi]3276 adc r13,QWORD[40+rdi]3277 adc r14,QWORD[48+rdi]3278 adc r15,QWORD[56+rdi]3279 lea rdi,[64+rdi]3280 DB 0x673281 sbb rax,rax3282 xor ebx,ebx3283 mov QWORD[((16+8))+rsp],rax3284 jmp NEAR $L$sqrx8x_loop3285 3286 ALIGN 323287 $L$sqrx8x_break:3288 xor rbp,rbp3289 sub rbx,QWORD[((16+8))+rsp]3290 adcx r8,rbp3291 mov rcx,QWORD[((24+8))+rsp]3292 adcx r9,rbp3293 mov rdx,QWORD[rsi]3294 adc r10,03295 mov QWORD[rdi],r83296 adc r11,03297 adc r12,03298 adc r13,03299 adc r14,03300 adc r15,03301 cmp rdi,rcx3302 je NEAR $L$sqrx8x_outer_loop3303 3304 mov QWORD[8+rdi],r93305 mov r9,QWORD[8+rcx]3306 mov QWORD[16+rdi],r103307 mov r10,QWORD[16+rcx]3308 mov QWORD[24+rdi],r113309 mov r11,QWORD[24+rcx]3310 mov QWORD[32+rdi],r123311 mov r12,QWORD[32+rcx]3312 mov QWORD[40+rdi],r133313 mov r13,QWORD[40+rcx]3314 mov QWORD[48+rdi],r143315 mov r14,QWORD[48+rcx]3316 mov QWORD[56+rdi],r153317 mov r15,QWORD[56+rcx]3318 mov rdi,rcx3319 jmp NEAR $L$sqrx8x_outer_loop3320 3321 ALIGN 323322 $L$sqrx8x_outer_break:3323 mov QWORD[72+rdi],r93324 DB 102,72,15,126,2173325 mov QWORD[80+rdi],r103326 mov QWORD[88+rdi],r113327 mov QWORD[96+rdi],r123328 mov QWORD[104+rdi],r133329 mov QWORD[112+rdi],r143330 lea rdi,[((48+8))+rsp]3331 mov rdx,QWORD[rcx*1+rsi]3332 3333 mov r11,QWORD[8+rdi]3334 xor r10,r103335 mov r9,QWORD[((0+8))+rsp]3336 adox r11,r113337 mov r12,QWORD[16+rdi]3338 mov r13,QWORD[24+rdi]3339 3340 3341 ALIGN 323342 $L$sqrx4x_shift_n_add:3343 mulx rbx,rax,rdx3344 adox r12,r123345 adcx rax,r103346 DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x003347 DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x003348 adox r13,r133349 adcx rbx,r113350 mov r11,QWORD[40+rdi]3351 mov QWORD[rdi],rax3352 mov QWORD[8+rdi],rbx3353 3354 mulx rbx,rax,rdx3355 adox r10,r103356 adcx rax,r123357 mov rdx,QWORD[16+rcx*1+rsi]3358 mov r12,QWORD[48+rdi]3359 adox r11,r113360 adcx rbx,r133361 mov r13,QWORD[56+rdi]3362 mov QWORD[16+rdi],rax3363 mov QWORD[24+rdi],rbx3364 3365 mulx rbx,rax,rdx3366 adox r12,r123367 adcx rax,r103368 mov rdx,QWORD[24+rcx*1+rsi]3369 lea rcx,[32+rcx]3370 mov r10,QWORD[64+rdi]3371 adox r13,r133372 adcx rbx,r113373 mov r11,QWORD[72+rdi]3374 mov QWORD[32+rdi],rax3375 mov QWORD[40+rdi],rbx3376 3377 mulx rbx,rax,rdx3378 adox r10,r103379 adcx rax,r123380 jrcxz $L$sqrx4x_shift_n_add_break3381 DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x003382 adox r11,r113383 adcx rbx,r133384 mov r12,QWORD[80+rdi]3385 mov r13,QWORD[88+rdi]3386 mov QWORD[48+rdi],rax3387 mov QWORD[56+rdi],rbx3388 lea rdi,[64+rdi]3389 nop3390 jmp NEAR $L$sqrx4x_shift_n_add3391 3392 ALIGN 323393 $L$sqrx4x_shift_n_add_break:3394 adcx rbx,r133395 mov QWORD[48+rdi],rax3396 mov QWORD[56+rdi],rbx3397 lea rdi,[64+rdi]3398 DB 102,72,15,126,2133399 __bn_sqrx8x_reduction:3400 xor eax,eax3401 mov rbx,QWORD[((32+8))+rsp]3402 mov rdx,QWORD[((48+8))+rsp]3403 lea rcx,[((-64))+r9*1+rbp]3404 3405 mov QWORD[((0+8))+rsp],rcx3406 mov QWORD[((8+8))+rsp],rdi3407 3408 lea rdi,[((48+8))+rsp]3409 jmp NEAR $L$sqrx8x_reduction_loop3410 3411 ALIGN 323412 $L$sqrx8x_reduction_loop:3413 mov r9,QWORD[8+rdi]3414 mov r10,QWORD[16+rdi]3415 mov r11,QWORD[24+rdi]3416 mov r12,QWORD[32+rdi]3417 mov r8,rdx3418 imul rdx,rbx3419 mov r13,QWORD[40+rdi]3420 mov r14,QWORD[48+rdi]3421 mov r15,QWORD[56+rdi]3422 mov QWORD[((24+8))+rsp],rax3423 3424 lea rdi,[64+rdi]3425 xor rsi,rsi3426 mov rcx,-83427 jmp NEAR $L$sqrx8x_reduce3428 3429 ALIGN 323430 $L$sqrx8x_reduce:3431 mov rbx,r83432 mulx r8,rax,QWORD[rbp]3433 adcx rax,rbx3434 adox r8,r93435 3436 mulx r9,rbx,QWORD[8+rbp]3437 adcx r8,rbx3438 adox r9,r103439 3440 mulx r10,rbx,QWORD[16+rbp]3441 adcx r9,rbx3442 adox r10,r113443 3444 mulx r11,rbx,QWORD[24+rbp]3445 adcx r10,rbx3446 adox r11,r123447 3448 DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x003449 mov rax,rdx3450 mov rdx,r83451 adcx r11,rbx3452 adox r12,r133453 3454 mulx rdx,rbx,QWORD[((32+8))+rsp]3455 mov rdx,rax3456 mov QWORD[((64+48+8))+rcx*8+rsp],rax3457 3458 mulx r13,rax,QWORD[40+rbp]3459 adcx r12,rax3460 adox r13,r143461 3462 mulx r14,rax,QWORD[48+rbp]3463 adcx r13,rax3464 adox r14,r153465 3466 mulx r15,rax,QWORD[56+rbp]3467 mov rdx,rbx3468 adcx r14,rax3469 adox r15,rsi3470 adcx r15,rsi3471 3472 DB 0x67,0x67,0x673473 inc rcx3474 jnz NEAR $L$sqrx8x_reduce3475 3476 mov rax,rsi3477 cmp rbp,QWORD[((0+8))+rsp]3478 jae NEAR $L$sqrx8x_no_tail3479 3480 mov rdx,QWORD[((48+8))+rsp]3481 add r8,QWORD[rdi]3482 lea rbp,[64+rbp]3483 mov rcx,-83484 adcx r9,QWORD[8+rdi]3485 adcx r10,QWORD[16+rdi]3486 adc r11,QWORD[24+rdi]3487 adc r12,QWORD[32+rdi]3488 adc r13,QWORD[40+rdi]3489 adc r14,QWORD[48+rdi]3490 adc r15,QWORD[56+rdi]3491 lea rdi,[64+rdi]3492 sbb rax,rax3493 3494 xor rsi,rsi3495 mov QWORD[((16+8))+rsp],rax3496 jmp NEAR $L$sqrx8x_tail3497 3498 ALIGN 323499 $L$sqrx8x_tail:3500 mov rbx,r83501 mulx r8,rax,QWORD[rbp]3502 adcx rbx,rax3503 adox r8,r93504 3505 mulx r9,rax,QWORD[8+rbp]3506 adcx r8,rax3507 adox r9,r103508 3509 mulx r10,rax,QWORD[16+rbp]3510 adcx r9,rax3511 adox r10,r113512 3513 mulx r11,rax,QWORD[24+rbp]3514 adcx r10,rax3515 adox r11,r123516 3517 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x003518 adcx r11,rax3519 adox r12,r133520 3521 mulx r13,rax,QWORD[40+rbp]3522 adcx r12,rax3523 adox r13,r143524 3525 mulx r14,rax,QWORD[48+rbp]3526 adcx r13,rax3527 adox r14,r153528 3529 mulx r15,rax,QWORD[56+rbp]3530 mov rdx,QWORD[((72+48+8))+rcx*8+rsp]3531 adcx r14,rax3532 adox r15,rsi3533 mov QWORD[rcx*8+rdi],rbx3534 mov rbx,r83535 adcx r15,rsi3536 3537 inc rcx3538 jnz NEAR $L$sqrx8x_tail3539 3540 cmp rbp,QWORD[((0+8))+rsp]3541 jae NEAR $L$sqrx8x_tail_done3542 3543 sub rsi,QWORD[((16+8))+rsp]3544 mov rdx,QWORD[((48+8))+rsp]3545 lea rbp,[64+rbp]3546 adc r8,QWORD[rdi]3547 adc r9,QWORD[8+rdi]3548 adc r10,QWORD[16+rdi]3549 adc r11,QWORD[24+rdi]3550 adc r12,QWORD[32+rdi]3551 adc r13,QWORD[40+rdi]3552 adc r14,QWORD[48+rdi]3553 adc r15,QWORD[56+rdi]3554 lea rdi,[64+rdi]3555 sbb rax,rax3556 sub rcx,83557 3558 xor rsi,rsi3559 mov QWORD[((16+8))+rsp],rax3560 jmp NEAR $L$sqrx8x_tail3561 3562 ALIGN 323563 $L$sqrx8x_tail_done:3564 xor rax,rax3565 add r8,QWORD[((24+8))+rsp]3566 adc r9,03567 adc r10,03568 adc r11,03569 adc r12,03570 adc r13,03571 adc r14,03572 adc r15,03573 adc rax,03574 3575 sub rsi,QWORD[((16+8))+rsp]3576 $L$sqrx8x_no_tail:3577 adc r8,QWORD[rdi]3578 DB 102,72,15,126,2173579 adc r9,QWORD[8+rdi]3580 mov rsi,QWORD[56+rbp]3581 DB 102,72,15,126,2133582 adc r10,QWORD[16+rdi]3583 adc r11,QWORD[24+rdi]3584 adc r12,QWORD[32+rdi]3585 adc r13,QWORD[40+rdi]3586 adc r14,QWORD[48+rdi]3587 adc r15,QWORD[56+rdi]3588 adc rax,03589 3590 mov rbx,QWORD[((32+8))+rsp]3591 mov rdx,QWORD[64+rcx*1+rdi]3592 3593 mov QWORD[rdi],r83594 lea r8,[64+rdi]3595 mov QWORD[8+rdi],r93596 mov QWORD[16+rdi],r103597 mov QWORD[24+rdi],r113598 mov QWORD[32+rdi],r123599 mov QWORD[40+rdi],r133600 mov QWORD[48+rdi],r143601 mov QWORD[56+rdi],r153602 3603 lea rdi,[64+rcx*1+rdi]3604 cmp r8,QWORD[((8+8))+rsp]3605 jb NEAR $L$sqrx8x_reduction_loop3606 DB 0F3h,0C3h ;repret3607 3608 3609 ALIGN 323610 __bn_postx4x_internal:3611 3612 mov r12,QWORD[rbp]3613 mov r10,rcx3614 mov r9,rcx3615 neg rax3616 sar rcx,3+23617 3618 DB 102,72,15,126,2023619 DB 102,72,15,126,2063620 dec r123621 mov r13,QWORD[8+rbp]3622 xor r8,r83623 mov r14,QWORD[16+rbp]3624 mov r15,QWORD[24+rbp]3625 jmp NEAR $L$sqrx4x_sub_entry3626 3627 ALIGN 163628 $L$sqrx4x_sub:3629 mov r12,QWORD[rbp]3630 mov r13,QWORD[8+rbp]3631 mov r14,QWORD[16+rbp]3632 mov r15,QWORD[24+rbp]3633 $L$sqrx4x_sub_entry:3634 andn r12,r12,rax3635 lea rbp,[32+rbp]3636 andn r13,r13,rax3637 andn r14,r14,rax3638 andn r15,r15,rax3639 3640 neg r83641 adc r12,QWORD[rdi]3642 adc r13,QWORD[8+rdi]3643 adc r14,QWORD[16+rdi]3644 adc r15,QWORD[24+rdi]3645 mov QWORD[rdx],r123646 lea rdi,[32+rdi]3647 mov QWORD[8+rdx],r133648 sbb r8,r83649 mov QWORD[16+rdx],r143650 mov QWORD[24+rdx],r153651 lea rdx,[32+rdx]3652 3653 inc rcx3654 jnz NEAR $L$sqrx4x_sub3655 3656 neg r93657 3658 DB 0F3h,0C3h ;repret3659 3660 3661 2268 global bn_get_bits5 3662 2269 … … 3995 2602 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 3996 2603 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 3997 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase3998 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase3999 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase4000 4001 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase4002 DD $L$SEH_end_bn_powerx5 wrt ..imagebase4003 DD $L$SEH_info_bn_powerx5 wrt ..imagebase4004 2604 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 4005 2605 DD $L$SEH_end_bn_gather5 wrt ..imagebase … … 4028 2628 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 4029 2629 ALIGN 8 4030 $L$SEH_info_bn_mulx4x_mont_gather5:4031 DB 9,0,0,04032 DD mul_handler wrt ..imagebase4033 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase4034 ALIGN 84035 $L$SEH_info_bn_powerx5:4036 DB 9,0,0,04037 DD mul_handler wrt ..imagebase4038 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase4039 ALIGN 84040 2630 $L$SEH_info_bn_gather5: 4041 2631 DB 0x01,0x0b,0x03,0x0a
Note:
See TracChangeset
for help on using the changeset viewer.