Changeset 97373 in vbox for trunk/src/libs/openssl-3.0.7/crypto/genasm-nasm/sha256-x86_64.S
- Timestamp:
- Nov 2, 2022 7:49:19 AM (2 years ago)
- svn:sync-xref-src-repo-rev:
- 154373
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/libs/openssl-3.0.7/crypto/genasm-nasm/sha256-x86_64.S
r97372 r97373 27 27 test r11d,536870912 28 28 jnz NEAR _shaext_shortcut 29 and r11d,29630 cmp r11d,29631 je NEAR $L$avx2_shortcut32 and r9d,107374182433 and r10d,26843596834 or r10d,r9d35 cmp r10d,134217779236 je NEAR $L$avx_shortcut37 29 test r10d,512 38 30 jnz NEAR $L$ssse3_shortcut … … 3158 3150 3159 3151 $L$SEH_end_sha256_block_data_order_ssse3: 3160 3161 ALIGN 643162 sha256_block_data_order_avx:3163 mov QWORD[8+rsp],rdi ;WIN64 prologue3164 mov QWORD[16+rsp],rsi3165 mov rax,rsp3166 $L$SEH_begin_sha256_block_data_order_avx:3167 mov rdi,rcx3168 mov rsi,rdx3169 mov rdx,r83170 3171 3172 3173 $L$avx_shortcut:3174 mov rax,rsp3175 3176 push rbx3177 3178 push rbp3179 3180 push r123181 3182 push r133183 3184 push r143185 3186 push r153187 3188 shl rdx,43189 sub rsp,1603190 lea rdx,[rdx*4+rsi]3191 and rsp,-643192 mov QWORD[((64+0))+rsp],rdi3193 mov QWORD[((64+8))+rsp],rsi3194 mov QWORD[((64+16))+rsp],rdx3195 mov QWORD[88+rsp],rax3196 3197 movaps XMMWORD[(64+32)+rsp],xmm63198 movaps XMMWORD[(64+48)+rsp],xmm73199 movaps XMMWORD[(64+64)+rsp],xmm83200 movaps XMMWORD[(64+80)+rsp],xmm93201 $L$prologue_avx:3202 3203 vzeroupper3204 mov eax,DWORD[rdi]3205 mov ebx,DWORD[4+rdi]3206 mov ecx,DWORD[8+rdi]3207 mov edx,DWORD[12+rdi]3208 mov r8d,DWORD[16+rdi]3209 mov r9d,DWORD[20+rdi]3210 mov r10d,DWORD[24+rdi]3211 mov r11d,DWORD[28+rdi]3212 vmovdqa xmm8,XMMWORD[((K256+512+32))]3213 vmovdqa xmm9,XMMWORD[((K256+512+64))]3214 jmp NEAR $L$loop_avx3215 ALIGN 163216 $L$loop_avx:3217 vmovdqa xmm7,XMMWORD[((K256+512))]3218 vmovdqu xmm0,XMMWORD[rsi]3219 vmovdqu xmm1,XMMWORD[16+rsi]3220 vmovdqu xmm2,XMMWORD[32+rsi]3221 vmovdqu xmm3,XMMWORD[48+rsi]3222 vpshufb xmm0,xmm0,xmm73223 lea rbp,[K256]3224 vpshufb xmm1,xmm1,xmm73225 vpshufb xmm2,xmm2,xmm73226 vpaddd xmm4,xmm0,XMMWORD[rbp]3227 vpshufb xmm3,xmm3,xmm73228 vpaddd xmm5,xmm1,XMMWORD[32+rbp]3229 vpaddd xmm6,xmm2,XMMWORD[64+rbp]3230 vpaddd xmm7,xmm3,XMMWORD[96+rbp]3231 vmovdqa XMMWORD[rsp],xmm43232 mov r14d,eax3233 vmovdqa XMMWORD[16+rsp],xmm53234 mov edi,ebx3235 vmovdqa XMMWORD[32+rsp],xmm63236 xor edi,ecx3237 vmovdqa XMMWORD[48+rsp],xmm73238 mov r13d,r8d3239 jmp NEAR $L$avx_00_473240 3241 ALIGN 163242 $L$avx_00_47:3243 sub rbp,-1283244 vpalignr xmm4,xmm1,xmm0,43245 shrd r13d,r13d,143246 mov eax,r14d3247 mov r12d,r9d3248 vpalignr xmm7,xmm3,xmm2,43249 shrd r14d,r14d,93250 xor r13d,r8d3251 xor r12d,r10d3252 vpsrld xmm6,xmm4,73253 shrd r13d,r13d,53254 xor r14d,eax3255 and r12d,r8d3256 vpaddd xmm0,xmm0,xmm73257 xor r13d,r8d3258 add r11d,DWORD[rsp]3259 mov r15d,eax3260 vpsrld xmm7,xmm4,33261 xor r12d,r10d3262 shrd r14d,r14d,113263 xor r15d,ebx3264 vpslld xmm5,xmm4,143265 add r11d,r12d3266 shrd r13d,r13d,63267 and edi,r15d3268 vpxor xmm4,xmm7,xmm63269 xor r14d,eax3270 add r11d,r13d3271 xor edi,ebx3272 vpshufd xmm7,xmm3,2503273 shrd r14d,r14d,23274 add edx,r11d3275 add r11d,edi3276 vpsrld xmm6,xmm6,113277 mov r13d,edx3278 add r14d,r11d3279 shrd r13d,r13d,143280 vpxor xmm4,xmm4,xmm53281 mov r11d,r14d3282 mov r12d,r8d3283 shrd r14d,r14d,93284 vpslld xmm5,xmm5,113285 xor r13d,edx3286 xor r12d,r9d3287 shrd r13d,r13d,53288 vpxor xmm4,xmm4,xmm63289 xor r14d,r11d3290 and r12d,edx3291 xor r13d,edx3292 vpsrld xmm6,xmm7,103293 add r10d,DWORD[4+rsp]3294 mov edi,r11d3295 xor r12d,r9d3296 vpxor xmm4,xmm4,xmm53297 shrd r14d,r14d,113298 xor edi,eax3299 add r10d,r12d3300 vpsrlq xmm7,xmm7,173301 shrd r13d,r13d,63302 and r15d,edi3303 xor r14d,r11d3304 vpaddd xmm0,xmm0,xmm43305 add r10d,r13d3306 xor r15d,eax3307 shrd r14d,r14d,23308 vpxor xmm6,xmm6,xmm73309 add ecx,r10d3310 add r10d,r15d3311 mov r13d,ecx3312 vpsrlq xmm7,xmm7,23313 add r14d,r10d3314 shrd r13d,r13d,143315 mov r10d,r14d3316 vpxor xmm6,xmm6,xmm73317 mov r12d,edx3318 shrd r14d,r14d,93319 xor r13d,ecx3320 vpshufb xmm6,xmm6,xmm83321 xor r12d,r8d3322 shrd r13d,r13d,53323 xor r14d,r10d3324 vpaddd xmm0,xmm0,xmm63325 and r12d,ecx3326 xor r13d,ecx3327 add r9d,DWORD[8+rsp]3328 vpshufd xmm7,xmm0,803329 mov r15d,r10d3330 xor r12d,r8d3331 shrd r14d,r14d,113332 vpsrld xmm6,xmm7,103333 xor r15d,r11d3334 add r9d,r12d3335 shrd r13d,r13d,63336 vpsrlq xmm7,xmm7,173337 and edi,r15d3338 xor r14d,r10d3339 add r9d,r13d3340 vpxor xmm6,xmm6,xmm73341 xor edi,r11d3342 shrd r14d,r14d,23343 add ebx,r9d3344 vpsrlq xmm7,xmm7,23345 add r9d,edi3346 mov r13d,ebx3347 add r14d,r9d3348 vpxor xmm6,xmm6,xmm73349 shrd r13d,r13d,143350 mov r9d,r14d3351 mov r12d,ecx3352 vpshufb xmm6,xmm6,xmm93353 shrd r14d,r14d,93354 xor r13d,ebx3355 xor r12d,edx3356 vpaddd xmm0,xmm0,xmm63357 shrd r13d,r13d,53358 xor r14d,r9d3359 and r12d,ebx3360 vpaddd xmm6,xmm0,XMMWORD[rbp]3361 xor r13d,ebx3362 add r8d,DWORD[12+rsp]3363 mov edi,r9d3364 xor r12d,edx3365 shrd r14d,r14d,113366 xor edi,r10d3367 add r8d,r12d3368 shrd r13d,r13d,63369 and r15d,edi3370 xor r14d,r9d3371 add r8d,r13d3372 xor r15d,r10d3373 shrd r14d,r14d,23374 add eax,r8d3375 add r8d,r15d3376 mov r13d,eax3377 add r14d,r8d3378 vmovdqa XMMWORD[rsp],xmm63379 vpalignr xmm4,xmm2,xmm1,43380 shrd r13d,r13d,143381 mov r8d,r14d3382 mov r12d,ebx3383 vpalignr xmm7,xmm0,xmm3,43384 shrd r14d,r14d,93385 xor r13d,eax3386 xor r12d,ecx3387 vpsrld xmm6,xmm4,73388 shrd r13d,r13d,53389 xor r14d,r8d3390 and r12d,eax3391 vpaddd xmm1,xmm1,xmm73392 xor r13d,eax3393 add edx,DWORD[16+rsp]3394 mov r15d,r8d3395 vpsrld xmm7,xmm4,33396 xor r12d,ecx3397 shrd r14d,r14d,113398 xor r15d,r9d3399 vpslld xmm5,xmm4,143400 add edx,r12d3401 shrd r13d,r13d,63402 and edi,r15d3403 vpxor xmm4,xmm7,xmm63404 xor r14d,r8d3405 add edx,r13d3406 xor edi,r9d3407 vpshufd xmm7,xmm0,2503408 shrd r14d,r14d,23409 add r11d,edx3410 add edx,edi3411 vpsrld xmm6,xmm6,113412 mov r13d,r11d3413 add r14d,edx3414 shrd r13d,r13d,143415 vpxor xmm4,xmm4,xmm53416 mov edx,r14d3417 mov r12d,eax3418 shrd r14d,r14d,93419 vpslld xmm5,xmm5,113420 xor r13d,r11d3421 xor r12d,ebx3422 shrd r13d,r13d,53423 vpxor xmm4,xmm4,xmm63424 xor r14d,edx3425 and r12d,r11d3426 xor r13d,r11d3427 vpsrld xmm6,xmm7,103428 add ecx,DWORD[20+rsp]3429 mov edi,edx3430 xor r12d,ebx3431 vpxor xmm4,xmm4,xmm53432 shrd r14d,r14d,113433 xor edi,r8d3434 add ecx,r12d3435 vpsrlq xmm7,xmm7,173436 shrd r13d,r13d,63437 and r15d,edi3438 xor r14d,edx3439 vpaddd xmm1,xmm1,xmm43440 add ecx,r13d3441 xor r15d,r8d3442 shrd r14d,r14d,23443 vpxor xmm6,xmm6,xmm73444 add r10d,ecx3445 add ecx,r15d3446 mov r13d,r10d3447 vpsrlq xmm7,xmm7,23448 add r14d,ecx3449 shrd r13d,r13d,143450 mov ecx,r14d3451 vpxor xmm6,xmm6,xmm73452 mov r12d,r11d3453 shrd r14d,r14d,93454 xor r13d,r10d3455 vpshufb xmm6,xmm6,xmm83456 xor r12d,eax3457 shrd r13d,r13d,53458 xor r14d,ecx3459 vpaddd xmm1,xmm1,xmm63460 and r12d,r10d3461 xor r13d,r10d3462 add ebx,DWORD[24+rsp]3463 vpshufd xmm7,xmm1,803464 mov r15d,ecx3465 xor r12d,eax3466 shrd r14d,r14d,113467 vpsrld xmm6,xmm7,103468 xor r15d,edx3469 add ebx,r12d3470 shrd r13d,r13d,63471 vpsrlq xmm7,xmm7,173472 and edi,r15d3473 xor r14d,ecx3474 add ebx,r13d3475 vpxor xmm6,xmm6,xmm73476 xor edi,edx3477 shrd r14d,r14d,23478 add r9d,ebx3479 vpsrlq xmm7,xmm7,23480 add ebx,edi3481 mov r13d,r9d3482 add r14d,ebx3483 vpxor xmm6,xmm6,xmm73484 shrd r13d,r13d,143485 mov ebx,r14d3486 mov r12d,r10d3487 vpshufb xmm6,xmm6,xmm93488 shrd r14d,r14d,93489 xor r13d,r9d3490 xor r12d,r11d3491 vpaddd xmm1,xmm1,xmm63492 shrd r13d,r13d,53493 xor r14d,ebx3494 and r12d,r9d3495 vpaddd xmm6,xmm1,XMMWORD[32+rbp]3496 xor r13d,r9d3497 add eax,DWORD[28+rsp]3498 mov edi,ebx3499 xor r12d,r11d3500 shrd r14d,r14d,113501 xor edi,ecx3502 add eax,r12d3503 shrd r13d,r13d,63504 and r15d,edi3505 xor r14d,ebx3506 add eax,r13d3507 xor r15d,ecx3508 shrd r14d,r14d,23509 add r8d,eax3510 add eax,r15d3511 mov r13d,r8d3512 add r14d,eax3513 vmovdqa XMMWORD[16+rsp],xmm63514 vpalignr xmm4,xmm3,xmm2,43515 shrd r13d,r13d,143516 mov eax,r14d3517 mov r12d,r9d3518 vpalignr xmm7,xmm1,xmm0,43519 shrd r14d,r14d,93520 xor r13d,r8d3521 xor r12d,r10d3522 vpsrld xmm6,xmm4,73523 shrd r13d,r13d,53524 xor r14d,eax3525 and r12d,r8d3526 vpaddd xmm2,xmm2,xmm73527 xor r13d,r8d3528 add r11d,DWORD[32+rsp]3529 mov r15d,eax3530 vpsrld xmm7,xmm4,33531 xor r12d,r10d3532 shrd r14d,r14d,113533 xor r15d,ebx3534 vpslld xmm5,xmm4,143535 add r11d,r12d3536 shrd r13d,r13d,63537 and edi,r15d3538 vpxor xmm4,xmm7,xmm63539 xor r14d,eax3540 add r11d,r13d3541 xor edi,ebx3542 vpshufd xmm7,xmm1,2503543 shrd r14d,r14d,23544 add edx,r11d3545 add r11d,edi3546 vpsrld xmm6,xmm6,113547 mov r13d,edx3548 add r14d,r11d3549 shrd r13d,r13d,143550 vpxor xmm4,xmm4,xmm53551 mov r11d,r14d3552 mov r12d,r8d3553 shrd r14d,r14d,93554 vpslld xmm5,xmm5,113555 xor r13d,edx3556 xor r12d,r9d3557 shrd r13d,r13d,53558 vpxor xmm4,xmm4,xmm63559 xor r14d,r11d3560 and r12d,edx3561 xor r13d,edx3562 vpsrld xmm6,xmm7,103563 add r10d,DWORD[36+rsp]3564 mov edi,r11d3565 xor r12d,r9d3566 vpxor xmm4,xmm4,xmm53567 shrd r14d,r14d,113568 xor edi,eax3569 add r10d,r12d3570 vpsrlq xmm7,xmm7,173571 shrd r13d,r13d,63572 and r15d,edi3573 xor r14d,r11d3574 vpaddd xmm2,xmm2,xmm43575 add r10d,r13d3576 xor r15d,eax3577 shrd r14d,r14d,23578 vpxor xmm6,xmm6,xmm73579 add ecx,r10d3580 add r10d,r15d3581 mov r13d,ecx3582 vpsrlq xmm7,xmm7,23583 add r14d,r10d3584 shrd r13d,r13d,143585 mov r10d,r14d3586 vpxor xmm6,xmm6,xmm73587 mov r12d,edx3588 shrd r14d,r14d,93589 xor r13d,ecx3590 vpshufb xmm6,xmm6,xmm83591 xor r12d,r8d3592 shrd r13d,r13d,53593 xor r14d,r10d3594 vpaddd xmm2,xmm2,xmm63595 and r12d,ecx3596 xor r13d,ecx3597 add r9d,DWORD[40+rsp]3598 vpshufd xmm7,xmm2,803599 mov r15d,r10d3600 xor r12d,r8d3601 shrd r14d,r14d,113602 vpsrld xmm6,xmm7,103603 xor r15d,r11d3604 add r9d,r12d3605 shrd r13d,r13d,63606 vpsrlq xmm7,xmm7,173607 and edi,r15d3608 xor r14d,r10d3609 add r9d,r13d3610 vpxor xmm6,xmm6,xmm73611 xor edi,r11d3612 shrd r14d,r14d,23613 add ebx,r9d3614 vpsrlq xmm7,xmm7,23615 add r9d,edi3616 mov r13d,ebx3617 add r14d,r9d3618 vpxor xmm6,xmm6,xmm73619 shrd r13d,r13d,143620 mov r9d,r14d3621 mov r12d,ecx3622 vpshufb xmm6,xmm6,xmm93623 shrd r14d,r14d,93624 xor r13d,ebx3625 xor r12d,edx3626 vpaddd xmm2,xmm2,xmm63627 shrd r13d,r13d,53628 xor r14d,r9d3629 and r12d,ebx3630 vpaddd xmm6,xmm2,XMMWORD[64+rbp]3631 xor r13d,ebx3632 add r8d,DWORD[44+rsp]3633 mov edi,r9d3634 xor r12d,edx3635 shrd r14d,r14d,113636 xor edi,r10d3637 add r8d,r12d3638 shrd r13d,r13d,63639 and r15d,edi3640 xor r14d,r9d3641 add r8d,r13d3642 xor r15d,r10d3643 shrd r14d,r14d,23644 add eax,r8d3645 add r8d,r15d3646 mov r13d,eax3647 add r14d,r8d3648 vmovdqa XMMWORD[32+rsp],xmm63649 vpalignr xmm4,xmm0,xmm3,43650 shrd r13d,r13d,143651 mov r8d,r14d3652 mov r12d,ebx3653 vpalignr xmm7,xmm2,xmm1,43654 shrd r14d,r14d,93655 xor r13d,eax3656 xor r12d,ecx3657 vpsrld xmm6,xmm4,73658 shrd r13d,r13d,53659 xor r14d,r8d3660 and r12d,eax3661 vpaddd xmm3,xmm3,xmm73662 xor r13d,eax3663 add edx,DWORD[48+rsp]3664 mov r15d,r8d3665 vpsrld xmm7,xmm4,33666 xor r12d,ecx3667 shrd r14d,r14d,113668 xor r15d,r9d3669 vpslld xmm5,xmm4,143670 add edx,r12d3671 shrd r13d,r13d,63672 and edi,r15d3673 vpxor xmm4,xmm7,xmm63674 xor r14d,r8d3675 add edx,r13d3676 xor edi,r9d3677 vpshufd xmm7,xmm2,2503678 shrd r14d,r14d,23679 add r11d,edx3680 add edx,edi3681 vpsrld xmm6,xmm6,113682 mov r13d,r11d3683 add r14d,edx3684 shrd r13d,r13d,143685 vpxor xmm4,xmm4,xmm53686 mov edx,r14d3687 mov r12d,eax3688 shrd r14d,r14d,93689 vpslld xmm5,xmm5,113690 xor r13d,r11d3691 xor r12d,ebx3692 shrd r13d,r13d,53693 vpxor xmm4,xmm4,xmm63694 xor r14d,edx3695 and r12d,r11d3696 xor r13d,r11d3697 vpsrld xmm6,xmm7,103698 add ecx,DWORD[52+rsp]3699 mov edi,edx3700 xor r12d,ebx3701 vpxor xmm4,xmm4,xmm53702 shrd r14d,r14d,113703 xor edi,r8d3704 add ecx,r12d3705 vpsrlq xmm7,xmm7,173706 shrd r13d,r13d,63707 and r15d,edi3708 xor r14d,edx3709 vpaddd xmm3,xmm3,xmm43710 add ecx,r13d3711 xor r15d,r8d3712 shrd r14d,r14d,23713 vpxor xmm6,xmm6,xmm73714 add r10d,ecx3715 add ecx,r15d3716 mov r13d,r10d3717 vpsrlq xmm7,xmm7,23718 add r14d,ecx3719 shrd r13d,r13d,143720 mov ecx,r14d3721 vpxor xmm6,xmm6,xmm73722 mov r12d,r11d3723 shrd r14d,r14d,93724 xor r13d,r10d3725 vpshufb xmm6,xmm6,xmm83726 xor r12d,eax3727 shrd r13d,r13d,53728 xor r14d,ecx3729 vpaddd xmm3,xmm3,xmm63730 and r12d,r10d3731 xor r13d,r10d3732 add ebx,DWORD[56+rsp]3733 vpshufd xmm7,xmm3,803734 mov r15d,ecx3735 xor r12d,eax3736 shrd r14d,r14d,113737 vpsrld xmm6,xmm7,103738 xor r15d,edx3739 add ebx,r12d3740 shrd r13d,r13d,63741 vpsrlq xmm7,xmm7,173742 and edi,r15d3743 xor r14d,ecx3744 add ebx,r13d3745 vpxor xmm6,xmm6,xmm73746 xor edi,edx3747 shrd r14d,r14d,23748 add r9d,ebx3749 vpsrlq xmm7,xmm7,23750 add ebx,edi3751 mov r13d,r9d3752 add r14d,ebx3753 vpxor xmm6,xmm6,xmm73754 shrd r13d,r13d,143755 mov ebx,r14d3756 mov r12d,r10d3757 vpshufb xmm6,xmm6,xmm93758 shrd r14d,r14d,93759 xor r13d,r9d3760 xor r12d,r11d3761 vpaddd xmm3,xmm3,xmm63762 shrd r13d,r13d,53763 xor r14d,ebx3764 and r12d,r9d3765 vpaddd xmm6,xmm3,XMMWORD[96+rbp]3766 xor r13d,r9d3767 add eax,DWORD[60+rsp]3768 mov edi,ebx3769 xor r12d,r11d3770 shrd r14d,r14d,113771 xor edi,ecx3772 add eax,r12d3773 shrd r13d,r13d,63774 and r15d,edi3775 xor r14d,ebx3776 add eax,r13d3777 xor r15d,ecx3778 shrd r14d,r14d,23779 add r8d,eax3780 add eax,r15d3781 mov r13d,r8d3782 add r14d,eax3783 vmovdqa XMMWORD[48+rsp],xmm63784 cmp BYTE[131+rbp],03785 jne NEAR $L$avx_00_473786 shrd r13d,r13d,143787 mov eax,r14d3788 mov r12d,r9d3789 shrd r14d,r14d,93790 xor r13d,r8d3791 xor r12d,r10d3792 shrd r13d,r13d,53793 xor r14d,eax3794 and r12d,r8d3795 xor r13d,r8d3796 add r11d,DWORD[rsp]3797 mov r15d,eax3798 xor r12d,r10d3799 shrd r14d,r14d,113800 xor r15d,ebx3801 add r11d,r12d3802 shrd r13d,r13d,63803 and edi,r15d3804 xor r14d,eax3805 add r11d,r13d3806 xor edi,ebx3807 shrd r14d,r14d,23808 add edx,r11d3809 add r11d,edi3810 mov r13d,edx3811 add r14d,r11d3812 shrd r13d,r13d,143813 mov r11d,r14d3814 mov r12d,r8d3815 shrd r14d,r14d,93816 xor r13d,edx3817 xor r12d,r9d3818 shrd r13d,r13d,53819 xor r14d,r11d3820 and r12d,edx3821 xor r13d,edx3822 add r10d,DWORD[4+rsp]3823 mov edi,r11d3824 xor r12d,r9d3825 shrd r14d,r14d,113826 xor edi,eax3827 add r10d,r12d3828 shrd r13d,r13d,63829 and r15d,edi3830 xor r14d,r11d3831 add r10d,r13d3832 xor r15d,eax3833 shrd r14d,r14d,23834 add ecx,r10d3835 add r10d,r15d3836 mov r13d,ecx3837 add r14d,r10d3838 shrd r13d,r13d,143839 mov r10d,r14d3840 mov r12d,edx3841 shrd r14d,r14d,93842 xor r13d,ecx3843 xor r12d,r8d3844 shrd r13d,r13d,53845 xor r14d,r10d3846 and r12d,ecx3847 xor r13d,ecx3848 add r9d,DWORD[8+rsp]3849 mov r15d,r10d3850 xor r12d,r8d3851 shrd r14d,r14d,113852 xor r15d,r11d3853 add r9d,r12d3854 shrd r13d,r13d,63855 and edi,r15d3856 xor r14d,r10d3857 add r9d,r13d3858 xor edi,r11d3859 shrd r14d,r14d,23860 add ebx,r9d3861 add r9d,edi3862 mov r13d,ebx3863 add r14d,r9d3864 shrd r13d,r13d,143865 mov r9d,r14d3866 mov r12d,ecx3867 shrd r14d,r14d,93868 xor r13d,ebx3869 xor r12d,edx3870 shrd r13d,r13d,53871 xor r14d,r9d3872 and r12d,ebx3873 xor r13d,ebx3874 add r8d,DWORD[12+rsp]3875 mov edi,r9d3876 xor r12d,edx3877 shrd r14d,r14d,113878 xor edi,r10d3879 add r8d,r12d3880 shrd r13d,r13d,63881 and r15d,edi3882 xor r14d,r9d3883 add r8d,r13d3884 xor r15d,r10d3885 shrd r14d,r14d,23886 add eax,r8d3887 add r8d,r15d3888 mov r13d,eax3889 add r14d,r8d3890 shrd r13d,r13d,143891 mov r8d,r14d3892 mov r12d,ebx3893 shrd r14d,r14d,93894 xor r13d,eax3895 xor r12d,ecx3896 shrd r13d,r13d,53897 xor r14d,r8d3898 and r12d,eax3899 xor r13d,eax3900 add edx,DWORD[16+rsp]3901 mov r15d,r8d3902 xor r12d,ecx3903 shrd r14d,r14d,113904 xor r15d,r9d3905 add edx,r12d3906 shrd r13d,r13d,63907 and edi,r15d3908 xor r14d,r8d3909 add edx,r13d3910 xor edi,r9d3911 shrd r14d,r14d,23912 add r11d,edx3913 add edx,edi3914 mov r13d,r11d3915 add r14d,edx3916 shrd r13d,r13d,143917 mov edx,r14d3918 mov r12d,eax3919 shrd r14d,r14d,93920 xor r13d,r11d3921 xor r12d,ebx3922 shrd r13d,r13d,53923 xor r14d,edx3924 and r12d,r11d3925 xor r13d,r11d3926 add ecx,DWORD[20+rsp]3927 mov edi,edx3928 xor r12d,ebx3929 shrd r14d,r14d,113930 xor edi,r8d3931 add ecx,r12d3932 shrd r13d,r13d,63933 and r15d,edi3934 xor r14d,edx3935 add ecx,r13d3936 xor r15d,r8d3937 shrd r14d,r14d,23938 add r10d,ecx3939 add ecx,r15d3940 mov r13d,r10d3941 add r14d,ecx3942 shrd r13d,r13d,143943 mov ecx,r14d3944 mov r12d,r11d3945 shrd r14d,r14d,93946 xor r13d,r10d3947 xor r12d,eax3948 shrd r13d,r13d,53949 xor r14d,ecx3950 and r12d,r10d3951 xor r13d,r10d3952 add ebx,DWORD[24+rsp]3953 mov r15d,ecx3954 xor r12d,eax3955 shrd r14d,r14d,113956 xor r15d,edx3957 add ebx,r12d3958 shrd r13d,r13d,63959 and edi,r15d3960 xor r14d,ecx3961 add ebx,r13d3962 xor edi,edx3963 shrd r14d,r14d,23964 add r9d,ebx3965 add ebx,edi3966 mov r13d,r9d3967 add r14d,ebx3968 shrd r13d,r13d,143969 mov ebx,r14d3970 mov r12d,r10d3971 shrd r14d,r14d,93972 xor r13d,r9d3973 xor r12d,r11d3974 shrd r13d,r13d,53975 xor r14d,ebx3976 and r12d,r9d3977 xor r13d,r9d3978 add eax,DWORD[28+rsp]3979 mov edi,ebx3980 xor r12d,r11d3981 shrd r14d,r14d,113982 xor edi,ecx3983 add eax,r12d3984 shrd r13d,r13d,63985 and r15d,edi3986 xor r14d,ebx3987 add eax,r13d3988 xor r15d,ecx3989 shrd r14d,r14d,23990 add r8d,eax3991 add eax,r15d3992 mov r13d,r8d3993 add r14d,eax3994 shrd r13d,r13d,143995 mov eax,r14d3996 mov r12d,r9d3997 shrd r14d,r14d,93998 xor r13d,r8d3999 xor r12d,r10d4000 shrd r13d,r13d,54001 xor r14d,eax4002 and r12d,r8d4003 xor r13d,r8d4004 add r11d,DWORD[32+rsp]4005 mov r15d,eax4006 xor r12d,r10d4007 shrd r14d,r14d,114008 xor r15d,ebx4009 add r11d,r12d4010 shrd r13d,r13d,64011 and edi,r15d4012 xor r14d,eax4013 add r11d,r13d4014 xor edi,ebx4015 shrd r14d,r14d,24016 add edx,r11d4017 add r11d,edi4018 mov r13d,edx4019 add r14d,r11d4020 shrd r13d,r13d,144021 mov r11d,r14d4022 mov r12d,r8d4023 shrd r14d,r14d,94024 xor r13d,edx4025 xor r12d,r9d4026 shrd r13d,r13d,54027 xor r14d,r11d4028 and r12d,edx4029 xor r13d,edx4030 add r10d,DWORD[36+rsp]4031 mov edi,r11d4032 xor r12d,r9d4033 shrd r14d,r14d,114034 xor edi,eax4035 add r10d,r12d4036 shrd r13d,r13d,64037 and r15d,edi4038 xor r14d,r11d4039 add r10d,r13d4040 xor r15d,eax4041 shrd r14d,r14d,24042 add ecx,r10d4043 add r10d,r15d4044 mov r13d,ecx4045 add r14d,r10d4046 shrd r13d,r13d,144047 mov r10d,r14d4048 mov r12d,edx4049 shrd r14d,r14d,94050 xor r13d,ecx4051 xor r12d,r8d4052 shrd r13d,r13d,54053 xor r14d,r10d4054 and r12d,ecx4055 xor r13d,ecx4056 add r9d,DWORD[40+rsp]4057 mov r15d,r10d4058 xor r12d,r8d4059 shrd r14d,r14d,114060 xor r15d,r11d4061 add r9d,r12d4062 shrd r13d,r13d,64063 and edi,r15d4064 xor r14d,r10d4065 add r9d,r13d4066 xor edi,r11d4067 shrd r14d,r14d,24068 add ebx,r9d4069 add r9d,edi4070 mov r13d,ebx4071 add r14d,r9d4072 shrd r13d,r13d,144073 mov r9d,r14d4074 mov r12d,ecx4075 shrd r14d,r14d,94076 xor r13d,ebx4077 xor r12d,edx4078 shrd r13d,r13d,54079 xor r14d,r9d4080 and r12d,ebx4081 xor r13d,ebx4082 add r8d,DWORD[44+rsp]4083 mov edi,r9d4084 xor r12d,edx4085 shrd r14d,r14d,114086 xor edi,r10d4087 add r8d,r12d4088 shrd r13d,r13d,64089 and r15d,edi4090 xor r14d,r9d4091 add r8d,r13d4092 xor r15d,r10d4093 shrd r14d,r14d,24094 add eax,r8d4095 add r8d,r15d4096 mov r13d,eax4097 add r14d,r8d4098 shrd r13d,r13d,144099 mov r8d,r14d4100 mov r12d,ebx4101 shrd r14d,r14d,94102 xor r13d,eax4103 xor r12d,ecx4104 shrd r13d,r13d,54105 xor r14d,r8d4106 and r12d,eax4107 xor r13d,eax4108 add edx,DWORD[48+rsp]4109 mov r15d,r8d4110 xor r12d,ecx4111 shrd r14d,r14d,114112 xor r15d,r9d4113 add edx,r12d4114 shrd r13d,r13d,64115 and edi,r15d4116 xor r14d,r8d4117 add edx,r13d4118 xor edi,r9d4119 shrd r14d,r14d,24120 add r11d,edx4121 add edx,edi4122 mov r13d,r11d4123 add r14d,edx4124 shrd r13d,r13d,144125 mov edx,r14d4126 mov r12d,eax4127 shrd r14d,r14d,94128 xor r13d,r11d4129 xor r12d,ebx4130 shrd r13d,r13d,54131 xor r14d,edx4132 and r12d,r11d4133 xor r13d,r11d4134 add ecx,DWORD[52+rsp]4135 mov edi,edx4136 xor r12d,ebx4137 shrd r14d,r14d,114138 xor edi,r8d4139 add ecx,r12d4140 shrd r13d,r13d,64141 and r15d,edi4142 xor r14d,edx4143 add ecx,r13d4144 xor r15d,r8d4145 shrd r14d,r14d,24146 add r10d,ecx4147 add ecx,r15d4148 mov r13d,r10d4149 add r14d,ecx4150 shrd r13d,r13d,144151 mov ecx,r14d4152 mov r12d,r11d4153 shrd r14d,r14d,94154 xor r13d,r10d4155 xor r12d,eax4156 shrd r13d,r13d,54157 xor r14d,ecx4158 and r12d,r10d4159 xor r13d,r10d4160 add ebx,DWORD[56+rsp]4161 mov r15d,ecx4162 xor r12d,eax4163 shrd r14d,r14d,114164 xor r15d,edx4165 add ebx,r12d4166 shrd r13d,r13d,64167 and edi,r15d4168 xor r14d,ecx4169 add ebx,r13d4170 xor edi,edx4171 shrd r14d,r14d,24172 add r9d,ebx4173 add ebx,edi4174 mov r13d,r9d4175 add r14d,ebx4176 shrd r13d,r13d,144177 mov ebx,r14d4178 mov r12d,r10d4179 shrd r14d,r14d,94180 xor r13d,r9d4181 xor r12d,r11d4182 shrd r13d,r13d,54183 xor r14d,ebx4184 and r12d,r9d4185 xor r13d,r9d4186 add eax,DWORD[60+rsp]4187 mov edi,ebx4188 xor r12d,r11d4189 shrd r14d,r14d,114190 xor edi,ecx4191 add eax,r12d4192 shrd r13d,r13d,64193 and r15d,edi4194 xor r14d,ebx4195 add eax,r13d4196 xor r15d,ecx4197 shrd r14d,r14d,24198 add r8d,eax4199 add eax,r15d4200 mov r13d,r8d4201 add r14d,eax4202 mov rdi,QWORD[((64+0))+rsp]4203 mov eax,r14d4204 4205 add eax,DWORD[rdi]4206 lea rsi,[64+rsi]4207 add ebx,DWORD[4+rdi]4208 add ecx,DWORD[8+rdi]4209 add edx,DWORD[12+rdi]4210 add r8d,DWORD[16+rdi]4211 add r9d,DWORD[20+rdi]4212 add r10d,DWORD[24+rdi]4213 add r11d,DWORD[28+rdi]4214 4215 cmp rsi,QWORD[((64+16))+rsp]4216 4217 mov DWORD[rdi],eax4218 mov DWORD[4+rdi],ebx4219 mov DWORD[8+rdi],ecx4220 mov DWORD[12+rdi],edx4221 mov DWORD[16+rdi],r8d4222 mov DWORD[20+rdi],r9d4223 mov DWORD[24+rdi],r10d4224 mov DWORD[28+rdi],r11d4225 jb NEAR $L$loop_avx4226 4227 mov rsi,QWORD[88+rsp]4228 4229 vzeroupper4230 movaps xmm6,XMMWORD[((64+32))+rsp]4231 movaps xmm7,XMMWORD[((64+48))+rsp]4232 movaps xmm8,XMMWORD[((64+64))+rsp]4233 movaps xmm9,XMMWORD[((64+80))+rsp]4234 mov r15,QWORD[((-48))+rsi]4235 4236 mov r14,QWORD[((-40))+rsi]4237 4238 mov r13,QWORD[((-32))+rsi]4239 4240 mov r12,QWORD[((-24))+rsi]4241 4242 mov rbp,QWORD[((-16))+rsi]4243 4244 mov rbx,QWORD[((-8))+rsi]4245 4246 lea rsp,[rsi]4247 4248 $L$epilogue_avx:4249 mov rdi,QWORD[8+rsp] ;WIN64 epilogue4250 mov rsi,QWORD[16+rsp]4251 DB 0F3h,0C3h ;repret4252 4253 $L$SEH_end_sha256_block_data_order_avx:4254 4255 ALIGN 644256 sha256_block_data_order_avx2:4257 mov QWORD[8+rsp],rdi ;WIN64 prologue4258 mov QWORD[16+rsp],rsi4259 mov rax,rsp4260 $L$SEH_begin_sha256_block_data_order_avx2:4261 mov rdi,rcx4262 mov rsi,rdx4263 mov rdx,r84264 4265 4266 4267 $L$avx2_shortcut:4268 mov rax,rsp4269 4270 push rbx4271 4272 push rbp4273 4274 push r124275 4276 push r134277 4278 push r144279 4280 push r154281 4282 sub rsp,6084283 shl rdx,44284 and rsp,-256*44285 lea rdx,[rdx*4+rsi]4286 add rsp,4484287 mov QWORD[((64+0))+rsp],rdi4288 mov QWORD[((64+8))+rsp],rsi4289 mov QWORD[((64+16))+rsp],rdx4290 mov QWORD[88+rsp],rax4291 4292 movaps XMMWORD[(64+32)+rsp],xmm64293 movaps XMMWORD[(64+48)+rsp],xmm74294 movaps XMMWORD[(64+64)+rsp],xmm84295 movaps XMMWORD[(64+80)+rsp],xmm94296 $L$prologue_avx2:4297 4298 vzeroupper4299 sub rsi,-16*44300 mov eax,DWORD[rdi]4301 mov r12,rsi4302 mov ebx,DWORD[4+rdi]4303 cmp rsi,rdx4304 mov ecx,DWORD[8+rdi]4305 cmove r12,rsp4306 mov edx,DWORD[12+rdi]4307 mov r8d,DWORD[16+rdi]4308 mov r9d,DWORD[20+rdi]4309 mov r10d,DWORD[24+rdi]4310 mov r11d,DWORD[28+rdi]4311 vmovdqa ymm8,YMMWORD[((K256+512+32))]4312 vmovdqa ymm9,YMMWORD[((K256+512+64))]4313 jmp NEAR $L$oop_avx24314 ALIGN 164315 $L$oop_avx2:4316 vmovdqa ymm7,YMMWORD[((K256+512))]4317 vmovdqu xmm0,XMMWORD[((-64+0))+rsi]4318 vmovdqu xmm1,XMMWORD[((-64+16))+rsi]4319 vmovdqu xmm2,XMMWORD[((-64+32))+rsi]4320 vmovdqu xmm3,XMMWORD[((-64+48))+rsi]4321 4322 vinserti128 ymm0,ymm0,XMMWORD[r12],14323 vinserti128 ymm1,ymm1,XMMWORD[16+r12],14324 vpshufb ymm0,ymm0,ymm74325 vinserti128 ymm2,ymm2,XMMWORD[32+r12],14326 vpshufb ymm1,ymm1,ymm74327 vinserti128 ymm3,ymm3,XMMWORD[48+r12],14328 4329 lea rbp,[K256]4330 vpshufb ymm2,ymm2,ymm74331 vpaddd ymm4,ymm0,YMMWORD[rbp]4332 vpshufb ymm3,ymm3,ymm74333 vpaddd ymm5,ymm1,YMMWORD[32+rbp]4334 vpaddd ymm6,ymm2,YMMWORD[64+rbp]4335 vpaddd ymm7,ymm3,YMMWORD[96+rbp]4336 vmovdqa YMMWORD[rsp],ymm44337 xor r14d,r14d4338 vmovdqa YMMWORD[32+rsp],ymm54339 lea rsp,[((-64))+rsp]4340 mov edi,ebx4341 vmovdqa YMMWORD[rsp],ymm64342 xor edi,ecx4343 vmovdqa YMMWORD[32+rsp],ymm74344 mov r12d,r9d4345 sub rbp,-16*2*44346 jmp NEAR $L$avx2_00_474347 4348 ALIGN 164349 $L$avx2_00_47:4350 lea rsp,[((-64))+rsp]4351 vpalignr ymm4,ymm1,ymm0,44352 add r11d,DWORD[((0+128))+rsp]4353 and r12d,r8d4354 rorx r13d,r8d,254355 vpalignr ymm7,ymm3,ymm2,44356 rorx r15d,r8d,114357 lea eax,[r14*1+rax]4358 lea r11d,[r12*1+r11]4359 vpsrld ymm6,ymm4,74360 andn r12d,r8d,r10d4361 xor r13d,r15d4362 rorx r14d,r8d,64363 vpaddd ymm0,ymm0,ymm74364 lea r11d,[r12*1+r11]4365 xor r13d,r14d4366 mov r15d,eax4367 vpsrld ymm7,ymm4,34368 rorx r12d,eax,224369 lea r11d,[r13*1+r11]4370 xor r15d,ebx4371 vpslld ymm5,ymm4,144372 rorx r14d,eax,134373 rorx r13d,eax,24374 lea edx,[r11*1+rdx]4375 vpxor ymm4,ymm7,ymm64376 and edi,r15d4377 xor r14d,r12d4378 xor edi,ebx4379 vpshufd ymm7,ymm3,2504380 xor r14d,r13d4381 lea r11d,[rdi*1+r11]4382 mov r12d,r8d4383 vpsrld ymm6,ymm6,114384 add r10d,DWORD[((4+128))+rsp]4385 and r12d,edx4386 rorx r13d,edx,254387 vpxor ymm4,ymm4,ymm54388 rorx edi,edx,114389 lea r11d,[r14*1+r11]4390 lea r10d,[r12*1+r10]4391 vpslld ymm5,ymm5,114392 andn r12d,edx,r9d4393 xor r13d,edi4394 rorx r14d,edx,64395 vpxor ymm4,ymm4,ymm64396 lea r10d,[r12*1+r10]4397 xor r13d,r14d4398 mov edi,r11d4399 vpsrld ymm6,ymm7,104400 rorx r12d,r11d,224401 lea r10d,[r13*1+r10]4402 xor edi,eax4403 vpxor ymm4,ymm4,ymm54404 rorx r14d,r11d,134405 rorx r13d,r11d,24406 lea ecx,[r10*1+rcx]4407 vpsrlq ymm7,ymm7,174408 and r15d,edi4409 xor r14d,r12d4410 xor r15d,eax4411 vpaddd ymm0,ymm0,ymm44412 xor r14d,r13d4413 lea r10d,[r15*1+r10]4414 mov r12d,edx4415 vpxor ymm6,ymm6,ymm74416 add r9d,DWORD[((8+128))+rsp]4417 and r12d,ecx4418 rorx r13d,ecx,254419 vpsrlq ymm7,ymm7,24420 rorx r15d,ecx,114421 lea r10d,[r14*1+r10]4422 lea r9d,[r12*1+r9]4423 vpxor ymm6,ymm6,ymm74424 andn r12d,ecx,r8d4425 xor r13d,r15d4426 rorx r14d,ecx,64427 vpshufb ymm6,ymm6,ymm84428 lea r9d,[r12*1+r9]4429 xor r13d,r14d4430 mov r15d,r10d4431 vpaddd ymm0,ymm0,ymm64432 rorx r12d,r10d,224433 lea r9d,[r13*1+r9]4434 xor r15d,r11d4435 vpshufd ymm7,ymm0,804436 rorx r14d,r10d,134437 rorx r13d,r10d,24438 lea ebx,[r9*1+rbx]4439 vpsrld ymm6,ymm7,104440 and edi,r15d4441 xor r14d,r12d4442 xor edi,r11d4443 vpsrlq ymm7,ymm7,174444 xor r14d,r13d4445 lea r9d,[rdi*1+r9]4446 mov r12d,ecx4447 vpxor ymm6,ymm6,ymm74448 add r8d,DWORD[((12+128))+rsp]4449 and r12d,ebx4450 rorx r13d,ebx,254451 vpsrlq ymm7,ymm7,24452 rorx edi,ebx,114453 lea r9d,[r14*1+r9]4454 lea r8d,[r12*1+r8]4455 vpxor ymm6,ymm6,ymm74456 andn r12d,ebx,edx4457 xor r13d,edi4458 rorx r14d,ebx,64459 vpshufb ymm6,ymm6,ymm94460 lea r8d,[r12*1+r8]4461 xor r13d,r14d4462 mov edi,r9d4463 vpaddd ymm0,ymm0,ymm64464 rorx r12d,r9d,224465 lea r8d,[r13*1+r8]4466 xor edi,r10d4467 vpaddd ymm6,ymm0,YMMWORD[rbp]4468 rorx r14d,r9d,134469 rorx r13d,r9d,24470 lea eax,[r8*1+rax]4471 and r15d,edi4472 xor r14d,r12d4473 xor r15d,r10d4474 xor r14d,r13d4475 lea r8d,[r15*1+r8]4476 mov r12d,ebx4477 vmovdqa YMMWORD[rsp],ymm64478 vpalignr ymm4,ymm2,ymm1,44479 add edx,DWORD[((32+128))+rsp]4480 and r12d,eax4481 rorx r13d,eax,254482 vpalignr ymm7,ymm0,ymm3,44483 rorx r15d,eax,114484 lea r8d,[r14*1+r8]4485 lea edx,[r12*1+rdx]4486 vpsrld ymm6,ymm4,74487 andn r12d,eax,ecx4488 xor r13d,r15d4489 rorx r14d,eax,64490 vpaddd ymm1,ymm1,ymm74491 lea edx,[r12*1+rdx]4492 xor r13d,r14d4493 mov r15d,r8d4494 vpsrld ymm7,ymm4,34495 rorx r12d,r8d,224496 lea edx,[r13*1+rdx]4497 xor r15d,r9d4498 vpslld ymm5,ymm4,144499 rorx r14d,r8d,134500 rorx r13d,r8d,24501 lea r11d,[rdx*1+r11]4502 vpxor ymm4,ymm7,ymm64503 and edi,r15d4504 xor r14d,r12d4505 xor edi,r9d4506 vpshufd ymm7,ymm0,2504507 xor r14d,r13d4508 lea edx,[rdi*1+rdx]4509 mov r12d,eax4510 vpsrld ymm6,ymm6,114511 add ecx,DWORD[((36+128))+rsp]4512 and r12d,r11d4513 rorx r13d,r11d,254514 vpxor ymm4,ymm4,ymm54515 rorx edi,r11d,114516 lea edx,[r14*1+rdx]4517 lea ecx,[r12*1+rcx]4518 vpslld ymm5,ymm5,114519 andn r12d,r11d,ebx4520 xor r13d,edi4521 rorx r14d,r11d,64522 vpxor ymm4,ymm4,ymm64523 lea ecx,[r12*1+rcx]4524 xor r13d,r14d4525 mov edi,edx4526 vpsrld ymm6,ymm7,104527 rorx r12d,edx,224528 lea ecx,[r13*1+rcx]4529 xor edi,r8d4530 vpxor ymm4,ymm4,ymm54531 rorx r14d,edx,134532 rorx r13d,edx,24533 lea r10d,[rcx*1+r10]4534 vpsrlq ymm7,ymm7,174535 and r15d,edi4536 xor r14d,r12d4537 xor r15d,r8d4538 vpaddd ymm1,ymm1,ymm44539 xor r14d,r13d4540 lea ecx,[r15*1+rcx]4541 mov r12d,r11d4542 vpxor ymm6,ymm6,ymm74543 add ebx,DWORD[((40+128))+rsp]4544 and r12d,r10d4545 rorx r13d,r10d,254546 vpsrlq ymm7,ymm7,24547 rorx r15d,r10d,114548 lea ecx,[r14*1+rcx]4549 lea ebx,[r12*1+rbx]4550 vpxor ymm6,ymm6,ymm74551 andn r12d,r10d,eax4552 xor r13d,r15d4553 rorx r14d,r10d,64554 vpshufb ymm6,ymm6,ymm84555 lea ebx,[r12*1+rbx]4556 xor r13d,r14d4557 mov r15d,ecx4558 vpaddd ymm1,ymm1,ymm64559 rorx r12d,ecx,224560 lea ebx,[r13*1+rbx]4561 xor r15d,edx4562 vpshufd ymm7,ymm1,804563 rorx r14d,ecx,134564 rorx r13d,ecx,24565 lea r9d,[rbx*1+r9]4566 vpsrld ymm6,ymm7,104567 and edi,r15d4568 xor r14d,r12d4569 xor edi,edx4570 vpsrlq ymm7,ymm7,174571 xor r14d,r13d4572 lea ebx,[rdi*1+rbx]4573 mov r12d,r10d4574 vpxor ymm6,ymm6,ymm74575 add eax,DWORD[((44+128))+rsp]4576 and r12d,r9d4577 rorx r13d,r9d,254578 vpsrlq ymm7,ymm7,24579 rorx edi,r9d,114580 lea ebx,[r14*1+rbx]4581 lea eax,[r12*1+rax]4582 vpxor ymm6,ymm6,ymm74583 andn r12d,r9d,r11d4584 xor r13d,edi4585 rorx r14d,r9d,64586 vpshufb ymm6,ymm6,ymm94587 lea eax,[r12*1+rax]4588 xor r13d,r14d4589 mov edi,ebx4590 vpaddd ymm1,ymm1,ymm64591 rorx r12d,ebx,224592 lea eax,[r13*1+rax]4593 xor edi,ecx4594 vpaddd ymm6,ymm1,YMMWORD[32+rbp]4595 rorx r14d,ebx,134596 rorx r13d,ebx,24597 lea r8d,[rax*1+r8]4598 and r15d,edi4599 xor r14d,r12d4600 xor r15d,ecx4601 xor r14d,r13d4602 lea eax,[r15*1+rax]4603 mov r12d,r9d4604 vmovdqa YMMWORD[32+rsp],ymm64605 lea rsp,[((-64))+rsp]4606 vpalignr ymm4,ymm3,ymm2,44607 add r11d,DWORD[((0+128))+rsp]4608 and r12d,r8d4609 rorx r13d,r8d,254610 vpalignr ymm7,ymm1,ymm0,44611 rorx r15d,r8d,114612 lea eax,[r14*1+rax]4613 lea r11d,[r12*1+r11]4614 vpsrld ymm6,ymm4,74615 andn r12d,r8d,r10d4616 xor r13d,r15d4617 rorx r14d,r8d,64618 vpaddd ymm2,ymm2,ymm74619 lea r11d,[r12*1+r11]4620 xor r13d,r14d4621 mov r15d,eax4622 vpsrld ymm7,ymm4,34623 rorx r12d,eax,224624 lea r11d,[r13*1+r11]4625 xor r15d,ebx4626 vpslld ymm5,ymm4,144627 rorx r14d,eax,134628 rorx r13d,eax,24629 lea edx,[r11*1+rdx]4630 vpxor ymm4,ymm7,ymm64631 and edi,r15d4632 xor r14d,r12d4633 xor edi,ebx4634 vpshufd ymm7,ymm1,2504635 xor r14d,r13d4636 lea r11d,[rdi*1+r11]4637 mov r12d,r8d4638 vpsrld ymm6,ymm6,114639 add r10d,DWORD[((4+128))+rsp]4640 and r12d,edx4641 rorx r13d,edx,254642 vpxor ymm4,ymm4,ymm54643 rorx edi,edx,114644 lea r11d,[r14*1+r11]4645 lea r10d,[r12*1+r10]4646 vpslld ymm5,ymm5,114647 andn r12d,edx,r9d4648 xor r13d,edi4649 rorx r14d,edx,64650 vpxor ymm4,ymm4,ymm64651 lea r10d,[r12*1+r10]4652 xor r13d,r14d4653 mov edi,r11d4654 vpsrld ymm6,ymm7,104655 rorx r12d,r11d,224656 lea r10d,[r13*1+r10]4657 xor edi,eax4658 vpxor ymm4,ymm4,ymm54659 rorx r14d,r11d,134660 rorx r13d,r11d,24661 lea ecx,[r10*1+rcx]4662 vpsrlq ymm7,ymm7,174663 and r15d,edi4664 xor r14d,r12d4665 xor r15d,eax4666 vpaddd ymm2,ymm2,ymm44667 xor r14d,r13d4668 lea r10d,[r15*1+r10]4669 mov r12d,edx4670 vpxor ymm6,ymm6,ymm74671 add r9d,DWORD[((8+128))+rsp]4672 and r12d,ecx4673 rorx r13d,ecx,254674 vpsrlq ymm7,ymm7,24675 rorx r15d,ecx,114676 lea r10d,[r14*1+r10]4677 lea r9d,[r12*1+r9]4678 vpxor ymm6,ymm6,ymm74679 andn r12d,ecx,r8d4680 xor r13d,r15d4681 rorx r14d,ecx,64682 vpshufb ymm6,ymm6,ymm84683 lea r9d,[r12*1+r9]4684 xor r13d,r14d4685 mov r15d,r10d4686 vpaddd ymm2,ymm2,ymm64687 rorx r12d,r10d,224688 lea r9d,[r13*1+r9]4689 xor r15d,r11d4690 vpshufd ymm7,ymm2,804691 rorx r14d,r10d,134692 rorx r13d,r10d,24693 lea ebx,[r9*1+rbx]4694 vpsrld ymm6,ymm7,104695 and edi,r15d4696 xor r14d,r12d4697 xor edi,r11d4698 vpsrlq ymm7,ymm7,174699 xor r14d,r13d4700 lea r9d,[rdi*1+r9]4701 mov r12d,ecx4702 vpxor ymm6,ymm6,ymm74703 add r8d,DWORD[((12+128))+rsp]4704 and r12d,ebx4705 rorx r13d,ebx,254706 vpsrlq ymm7,ymm7,24707 rorx edi,ebx,114708 lea r9d,[r14*1+r9]4709 lea r8d,[r12*1+r8]4710 vpxor ymm6,ymm6,ymm74711 andn r12d,ebx,edx4712 xor r13d,edi4713 rorx r14d,ebx,64714 vpshufb ymm6,ymm6,ymm94715 lea r8d,[r12*1+r8]4716 xor r13d,r14d4717 mov edi,r9d4718 vpaddd ymm2,ymm2,ymm64719 rorx r12d,r9d,224720 lea r8d,[r13*1+r8]4721 xor edi,r10d4722 vpaddd ymm6,ymm2,YMMWORD[64+rbp]4723 rorx r14d,r9d,134724 rorx r13d,r9d,24725 lea eax,[r8*1+rax]4726 and r15d,edi4727 xor r14d,r12d4728 xor r15d,r10d4729 xor r14d,r13d4730 lea r8d,[r15*1+r8]4731 mov r12d,ebx4732 vmovdqa YMMWORD[rsp],ymm64733 vpalignr ymm4,ymm0,ymm3,44734 add edx,DWORD[((32+128))+rsp]4735 and r12d,eax4736 rorx r13d,eax,254737 vpalignr ymm7,ymm2,ymm1,44738 rorx r15d,eax,114739 lea r8d,[r14*1+r8]4740 lea edx,[r12*1+rdx]4741 vpsrld ymm6,ymm4,74742 andn r12d,eax,ecx4743 xor r13d,r15d4744 rorx r14d,eax,64745 vpaddd ymm3,ymm3,ymm74746 lea edx,[r12*1+rdx]4747 xor r13d,r14d4748 mov r15d,r8d4749 vpsrld ymm7,ymm4,34750 rorx r12d,r8d,224751 lea edx,[r13*1+rdx]4752 xor r15d,r9d4753 vpslld ymm5,ymm4,144754 rorx r14d,r8d,134755 rorx r13d,r8d,24756 lea r11d,[rdx*1+r11]4757 vpxor ymm4,ymm7,ymm64758 and edi,r15d4759 xor r14d,r12d4760 xor edi,r9d4761 vpshufd ymm7,ymm2,2504762 xor r14d,r13d4763 lea edx,[rdi*1+rdx]4764 mov r12d,eax4765 vpsrld ymm6,ymm6,114766 add ecx,DWORD[((36+128))+rsp]4767 and r12d,r11d4768 rorx r13d,r11d,254769 vpxor ymm4,ymm4,ymm54770 rorx edi,r11d,114771 lea edx,[r14*1+rdx]4772 lea ecx,[r12*1+rcx]4773 vpslld ymm5,ymm5,114774 andn r12d,r11d,ebx4775 xor r13d,edi4776 rorx r14d,r11d,64777 vpxor ymm4,ymm4,ymm64778 lea ecx,[r12*1+rcx]4779 xor r13d,r14d4780 mov edi,edx4781 vpsrld ymm6,ymm7,104782 rorx r12d,edx,224783 lea ecx,[r13*1+rcx]4784 xor edi,r8d4785 vpxor ymm4,ymm4,ymm54786 rorx r14d,edx,134787 rorx r13d,edx,24788 lea r10d,[rcx*1+r10]4789 vpsrlq ymm7,ymm7,174790 and r15d,edi4791 xor r14d,r12d4792 xor r15d,r8d4793 vpaddd ymm3,ymm3,ymm44794 xor r14d,r13d4795 lea ecx,[r15*1+rcx]4796 mov r12d,r11d4797 vpxor ymm6,ymm6,ymm74798 add ebx,DWORD[((40+128))+rsp]4799 and r12d,r10d4800 rorx r13d,r10d,254801 vpsrlq ymm7,ymm7,24802 rorx r15d,r10d,114803 lea ecx,[r14*1+rcx]4804 lea ebx,[r12*1+rbx]4805 vpxor ymm6,ymm6,ymm74806 andn r12d,r10d,eax4807 xor r13d,r15d4808 rorx r14d,r10d,64809 vpshufb ymm6,ymm6,ymm84810 lea ebx,[r12*1+rbx]4811 xor r13d,r14d4812 mov r15d,ecx4813 vpaddd ymm3,ymm3,ymm64814 rorx r12d,ecx,224815 lea ebx,[r13*1+rbx]4816 xor r15d,edx4817 vpshufd ymm7,ymm3,804818 rorx r14d,ecx,134819 rorx r13d,ecx,24820 lea r9d,[rbx*1+r9]4821 vpsrld ymm6,ymm7,104822 and edi,r15d4823 xor r14d,r12d4824 xor edi,edx4825 vpsrlq ymm7,ymm7,174826 xor r14d,r13d4827 lea ebx,[rdi*1+rbx]4828 mov r12d,r10d4829 vpxor ymm6,ymm6,ymm74830 add eax,DWORD[((44+128))+rsp]4831 and r12d,r9d4832 rorx r13d,r9d,254833 vpsrlq ymm7,ymm7,24834 rorx edi,r9d,114835 lea ebx,[r14*1+rbx]4836 lea eax,[r12*1+rax]4837 vpxor ymm6,ymm6,ymm74838 andn r12d,r9d,r11d4839 xor r13d,edi4840 rorx r14d,r9d,64841 vpshufb ymm6,ymm6,ymm94842 lea eax,[r12*1+rax]4843 xor r13d,r14d4844 mov edi,ebx4845 vpaddd ymm3,ymm3,ymm64846 rorx r12d,ebx,224847 lea eax,[r13*1+rax]4848 xor edi,ecx4849 vpaddd ymm6,ymm3,YMMWORD[96+rbp]4850 rorx r14d,ebx,134851 rorx r13d,ebx,24852 lea r8d,[rax*1+r8]4853 and r15d,edi4854 xor r14d,r12d4855 xor r15d,ecx4856 xor r14d,r13d4857 lea eax,[r15*1+rax]4858 mov r12d,r9d4859 vmovdqa YMMWORD[32+rsp],ymm64860 lea rbp,[128+rbp]4861 cmp BYTE[3+rbp],04862 jne NEAR $L$avx2_00_474863 add r11d,DWORD[((0+64))+rsp]4864 and r12d,r8d4865 rorx r13d,r8d,254866 rorx r15d,r8d,114867 lea eax,[r14*1+rax]4868 lea r11d,[r12*1+r11]4869 andn r12d,r8d,r10d4870 xor r13d,r15d4871 rorx r14d,r8d,64872 lea r11d,[r12*1+r11]4873 xor r13d,r14d4874 mov r15d,eax4875 rorx r12d,eax,224876 lea r11d,[r13*1+r11]4877 xor r15d,ebx4878 rorx r14d,eax,134879 rorx r13d,eax,24880 lea edx,[r11*1+rdx]4881 and edi,r15d4882 xor r14d,r12d4883 xor edi,ebx4884 xor r14d,r13d4885 lea r11d,[rdi*1+r11]4886 mov r12d,r8d4887 add r10d,DWORD[((4+64))+rsp]4888 and r12d,edx4889 rorx r13d,edx,254890 rorx edi,edx,114891 lea r11d,[r14*1+r11]4892 lea r10d,[r12*1+r10]4893 andn r12d,edx,r9d4894 xor r13d,edi4895 rorx r14d,edx,64896 lea r10d,[r12*1+r10]4897 xor r13d,r14d4898 mov edi,r11d4899 rorx r12d,r11d,224900 lea r10d,[r13*1+r10]4901 xor edi,eax4902 rorx r14d,r11d,134903 rorx r13d,r11d,24904 lea ecx,[r10*1+rcx]4905 and r15d,edi4906 xor r14d,r12d4907 xor r15d,eax4908 xor r14d,r13d4909 lea r10d,[r15*1+r10]4910 mov r12d,edx4911 add r9d,DWORD[((8+64))+rsp]4912 and r12d,ecx4913 rorx r13d,ecx,254914 rorx r15d,ecx,114915 lea r10d,[r14*1+r10]4916 lea r9d,[r12*1+r9]4917 andn r12d,ecx,r8d4918 xor r13d,r15d4919 rorx r14d,ecx,64920 lea r9d,[r12*1+r9]4921 xor r13d,r14d4922 mov r15d,r10d4923 rorx r12d,r10d,224924 lea r9d,[r13*1+r9]4925 xor r15d,r11d4926 rorx r14d,r10d,134927 rorx r13d,r10d,24928 lea ebx,[r9*1+rbx]4929 and edi,r15d4930 xor r14d,r12d4931 xor edi,r11d4932 xor r14d,r13d4933 lea r9d,[rdi*1+r9]4934 mov r12d,ecx4935 add r8d,DWORD[((12+64))+rsp]4936 and r12d,ebx4937 rorx r13d,ebx,254938 rorx edi,ebx,114939 lea r9d,[r14*1+r9]4940 lea r8d,[r12*1+r8]4941 andn r12d,ebx,edx4942 xor r13d,edi4943 rorx r14d,ebx,64944 lea r8d,[r12*1+r8]4945 xor r13d,r14d4946 mov edi,r9d4947 rorx r12d,r9d,224948 lea r8d,[r13*1+r8]4949 xor edi,r10d4950 rorx r14d,r9d,134951 rorx r13d,r9d,24952 lea eax,[r8*1+rax]4953 and r15d,edi4954 xor r14d,r12d4955 xor r15d,r10d4956 xor r14d,r13d4957 lea r8d,[r15*1+r8]4958 mov r12d,ebx4959 add edx,DWORD[((32+64))+rsp]4960 and r12d,eax4961 rorx r13d,eax,254962 rorx r15d,eax,114963 lea r8d,[r14*1+r8]4964 lea edx,[r12*1+rdx]4965 andn r12d,eax,ecx4966 xor r13d,r15d4967 rorx r14d,eax,64968 lea edx,[r12*1+rdx]4969 xor r13d,r14d4970 mov r15d,r8d4971 rorx r12d,r8d,224972 lea edx,[r13*1+rdx]4973 xor r15d,r9d4974 rorx r14d,r8d,134975 rorx r13d,r8d,24976 lea r11d,[rdx*1+r11]4977 and edi,r15d4978 xor r14d,r12d4979 xor edi,r9d4980 xor r14d,r13d4981 lea edx,[rdi*1+rdx]4982 mov r12d,eax4983 add ecx,DWORD[((36+64))+rsp]4984 and r12d,r11d4985 rorx r13d,r11d,254986 rorx edi,r11d,114987 lea edx,[r14*1+rdx]4988 lea ecx,[r12*1+rcx]4989 andn r12d,r11d,ebx4990 xor r13d,edi4991 rorx r14d,r11d,64992 lea ecx,[r12*1+rcx]4993 xor r13d,r14d4994 mov edi,edx4995 rorx r12d,edx,224996 lea ecx,[r13*1+rcx]4997 xor edi,r8d4998 rorx r14d,edx,134999 rorx r13d,edx,25000 lea r10d,[rcx*1+r10]5001 and r15d,edi5002 xor r14d,r12d5003 xor r15d,r8d5004 xor r14d,r13d5005 lea ecx,[r15*1+rcx]5006 mov r12d,r11d5007 add ebx,DWORD[((40+64))+rsp]5008 and r12d,r10d5009 rorx r13d,r10d,255010 rorx r15d,r10d,115011 lea ecx,[r14*1+rcx]5012 lea ebx,[r12*1+rbx]5013 andn r12d,r10d,eax5014 xor r13d,r15d5015 rorx r14d,r10d,65016 lea ebx,[r12*1+rbx]5017 xor r13d,r14d5018 mov r15d,ecx5019 rorx r12d,ecx,225020 lea ebx,[r13*1+rbx]5021 xor r15d,edx5022 rorx r14d,ecx,135023 rorx r13d,ecx,25024 lea r9d,[rbx*1+r9]5025 and edi,r15d5026 xor r14d,r12d5027 xor edi,edx5028 xor r14d,r13d5029 lea ebx,[rdi*1+rbx]5030 mov r12d,r10d5031 add eax,DWORD[((44+64))+rsp]5032 and r12d,r9d5033 rorx r13d,r9d,255034 rorx edi,r9d,115035 lea ebx,[r14*1+rbx]5036 lea eax,[r12*1+rax]5037 andn r12d,r9d,r11d5038 xor r13d,edi5039 rorx r14d,r9d,65040 lea eax,[r12*1+rax]5041 xor r13d,r14d5042 mov edi,ebx5043 rorx r12d,ebx,225044 lea eax,[r13*1+rax]5045 xor edi,ecx5046 rorx r14d,ebx,135047 rorx r13d,ebx,25048 lea r8d,[rax*1+r8]5049 and r15d,edi5050 xor r14d,r12d5051 xor r15d,ecx5052 xor r14d,r13d5053 lea eax,[r15*1+rax]5054 mov r12d,r9d5055 add r11d,DWORD[rsp]5056 and r12d,r8d5057 rorx r13d,r8d,255058 rorx r15d,r8d,115059 lea eax,[r14*1+rax]5060 lea r11d,[r12*1+r11]5061 andn r12d,r8d,r10d5062 xor r13d,r15d5063 rorx r14d,r8d,65064 lea r11d,[r12*1+r11]5065 xor r13d,r14d5066 mov r15d,eax5067 rorx r12d,eax,225068 lea r11d,[r13*1+r11]5069 xor r15d,ebx5070 rorx r14d,eax,135071 rorx r13d,eax,25072 lea edx,[r11*1+rdx]5073 and edi,r15d5074 xor r14d,r12d5075 xor edi,ebx5076 xor r14d,r13d5077 lea r11d,[rdi*1+r11]5078 mov r12d,r8d5079 add r10d,DWORD[4+rsp]5080 and r12d,edx5081 rorx r13d,edx,255082 rorx edi,edx,115083 lea r11d,[r14*1+r11]5084 lea r10d,[r12*1+r10]5085 andn r12d,edx,r9d5086 xor r13d,edi5087 rorx r14d,edx,65088 lea r10d,[r12*1+r10]5089 xor r13d,r14d5090 mov edi,r11d5091 rorx r12d,r11d,225092 lea r10d,[r13*1+r10]5093 xor edi,eax5094 rorx r14d,r11d,135095 rorx r13d,r11d,25096 lea ecx,[r10*1+rcx]5097 and r15d,edi5098 xor r14d,r12d5099 xor r15d,eax5100 xor r14d,r13d5101 lea r10d,[r15*1+r10]5102 mov r12d,edx5103 add r9d,DWORD[8+rsp]5104 and r12d,ecx5105 rorx r13d,ecx,255106 rorx r15d,ecx,115107 lea r10d,[r14*1+r10]5108 lea r9d,[r12*1+r9]5109 andn r12d,ecx,r8d5110 xor r13d,r15d5111 rorx r14d,ecx,65112 lea r9d,[r12*1+r9]5113 xor r13d,r14d5114 mov r15d,r10d5115 rorx r12d,r10d,225116 lea r9d,[r13*1+r9]5117 xor r15d,r11d5118 rorx r14d,r10d,135119 rorx r13d,r10d,25120 lea ebx,[r9*1+rbx]5121 and edi,r15d5122 xor r14d,r12d5123 xor edi,r11d5124 xor r14d,r13d5125 lea r9d,[rdi*1+r9]5126 mov r12d,ecx5127 add r8d,DWORD[12+rsp]5128 and r12d,ebx5129 rorx r13d,ebx,255130 rorx edi,ebx,115131 lea r9d,[r14*1+r9]5132 lea r8d,[r12*1+r8]5133 andn r12d,ebx,edx5134 xor r13d,edi5135 rorx r14d,ebx,65136 lea r8d,[r12*1+r8]5137 xor r13d,r14d5138 mov edi,r9d5139 rorx r12d,r9d,225140 lea r8d,[r13*1+r8]5141 xor edi,r10d5142 rorx r14d,r9d,135143 rorx r13d,r9d,25144 lea eax,[r8*1+rax]5145 and r15d,edi5146 xor r14d,r12d5147 xor r15d,r10d5148 xor r14d,r13d5149 lea r8d,[r15*1+r8]5150 mov r12d,ebx5151 add edx,DWORD[32+rsp]5152 and r12d,eax5153 rorx r13d,eax,255154 rorx r15d,eax,115155 lea r8d,[r14*1+r8]5156 lea edx,[r12*1+rdx]5157 andn r12d,eax,ecx5158 xor r13d,r15d5159 rorx r14d,eax,65160 lea edx,[r12*1+rdx]5161 xor r13d,r14d5162 mov r15d,r8d5163 rorx r12d,r8d,225164 lea edx,[r13*1+rdx]5165 xor r15d,r9d5166 rorx r14d,r8d,135167 rorx r13d,r8d,25168 lea r11d,[rdx*1+r11]5169 and edi,r15d5170 xor r14d,r12d5171 xor edi,r9d5172 xor r14d,r13d5173 lea edx,[rdi*1+rdx]5174 mov r12d,eax5175 add ecx,DWORD[36+rsp]5176 and r12d,r11d5177 rorx r13d,r11d,255178 rorx edi,r11d,115179 lea edx,[r14*1+rdx]5180 lea ecx,[r12*1+rcx]5181 andn r12d,r11d,ebx5182 xor r13d,edi5183 rorx r14d,r11d,65184 lea ecx,[r12*1+rcx]5185 xor r13d,r14d5186 mov edi,edx5187 rorx r12d,edx,225188 lea ecx,[r13*1+rcx]5189 xor edi,r8d5190 rorx r14d,edx,135191 rorx r13d,edx,25192 lea r10d,[rcx*1+r10]5193 and r15d,edi5194 xor r14d,r12d5195 xor r15d,r8d5196 xor r14d,r13d5197 lea ecx,[r15*1+rcx]5198 mov r12d,r11d5199 add ebx,DWORD[40+rsp]5200 and r12d,r10d5201 rorx r13d,r10d,255202 rorx r15d,r10d,115203 lea ecx,[r14*1+rcx]5204 lea ebx,[r12*1+rbx]5205 andn r12d,r10d,eax5206 xor r13d,r15d5207 rorx r14d,r10d,65208 lea ebx,[r12*1+rbx]5209 xor r13d,r14d5210 mov r15d,ecx5211 rorx r12d,ecx,225212 lea ebx,[r13*1+rbx]5213 xor r15d,edx5214 rorx r14d,ecx,135215 rorx r13d,ecx,25216 lea r9d,[rbx*1+r9]5217 and edi,r15d5218 xor r14d,r12d5219 xor edi,edx5220 xor r14d,r13d5221 lea ebx,[rdi*1+rbx]5222 mov r12d,r10d5223 add eax,DWORD[44+rsp]5224 and r12d,r9d5225 rorx r13d,r9d,255226 rorx edi,r9d,115227 lea ebx,[r14*1+rbx]5228 lea eax,[r12*1+rax]5229 andn r12d,r9d,r11d5230 xor r13d,edi5231 rorx r14d,r9d,65232 lea eax,[r12*1+rax]5233 xor r13d,r14d5234 mov edi,ebx5235 rorx r12d,ebx,225236 lea eax,[r13*1+rax]5237 xor edi,ecx5238 rorx r14d,ebx,135239 rorx r13d,ebx,25240 lea r8d,[rax*1+r8]5241 and r15d,edi5242 xor r14d,r12d5243 xor r15d,ecx5244 xor r14d,r13d5245 lea eax,[r15*1+rax]5246 mov r12d,r9d5247 mov rdi,QWORD[512+rsp]5248 add eax,r14d5249 5250 lea rbp,[448+rsp]5251 5252 add eax,DWORD[rdi]5253 add ebx,DWORD[4+rdi]5254 add ecx,DWORD[8+rdi]5255 add edx,DWORD[12+rdi]5256 add r8d,DWORD[16+rdi]5257 add r9d,DWORD[20+rdi]5258 add r10d,DWORD[24+rdi]5259 add r11d,DWORD[28+rdi]5260 5261 mov DWORD[rdi],eax5262 mov DWORD[4+rdi],ebx5263 mov DWORD[8+rdi],ecx5264 mov DWORD[12+rdi],edx5265 mov DWORD[16+rdi],r8d5266 mov DWORD[20+rdi],r9d5267 mov DWORD[24+rdi],r10d5268 mov DWORD[28+rdi],r11d5269 5270 cmp rsi,QWORD[80+rbp]5271 je NEAR $L$done_avx25272 5273 xor r14d,r14d5274 mov edi,ebx5275 xor edi,ecx5276 mov r12d,r9d5277 jmp NEAR $L$ower_avx25278 ALIGN 165279 $L$ower_avx2:5280 add r11d,DWORD[((0+16))+rbp]5281 and r12d,r8d5282 rorx r13d,r8d,255283 rorx r15d,r8d,115284 lea eax,[r14*1+rax]5285 lea r11d,[r12*1+r11]5286 andn r12d,r8d,r10d5287 xor r13d,r15d5288 rorx r14d,r8d,65289 lea r11d,[r12*1+r11]5290 xor r13d,r14d5291 mov r15d,eax5292 rorx r12d,eax,225293 lea r11d,[r13*1+r11]5294 xor r15d,ebx5295 rorx r14d,eax,135296 rorx r13d,eax,25297 lea edx,[r11*1+rdx]5298 and edi,r15d5299 xor r14d,r12d5300 xor edi,ebx5301 xor r14d,r13d5302 lea r11d,[rdi*1+r11]5303 mov r12d,r8d5304 add r10d,DWORD[((4+16))+rbp]5305 and r12d,edx5306 rorx r13d,edx,255307 rorx edi,edx,115308 lea r11d,[r14*1+r11]5309 lea r10d,[r12*1+r10]5310 andn r12d,edx,r9d5311 xor r13d,edi5312 rorx r14d,edx,65313 lea r10d,[r12*1+r10]5314 xor r13d,r14d5315 mov edi,r11d5316 rorx r12d,r11d,225317 lea r10d,[r13*1+r10]5318 xor edi,eax5319 rorx r14d,r11d,135320 rorx r13d,r11d,25321 lea ecx,[r10*1+rcx]5322 and r15d,edi5323 xor r14d,r12d5324 xor r15d,eax5325 xor r14d,r13d5326 lea r10d,[r15*1+r10]5327 mov r12d,edx5328 add r9d,DWORD[((8+16))+rbp]5329 and r12d,ecx5330 rorx r13d,ecx,255331 rorx r15d,ecx,115332 lea r10d,[r14*1+r10]5333 lea r9d,[r12*1+r9]5334 andn r12d,ecx,r8d5335 xor r13d,r15d5336 rorx r14d,ecx,65337 lea r9d,[r12*1+r9]5338 xor r13d,r14d5339 mov r15d,r10d5340 rorx r12d,r10d,225341 lea r9d,[r13*1+r9]5342 xor r15d,r11d5343 rorx r14d,r10d,135344 rorx r13d,r10d,25345 lea ebx,[r9*1+rbx]5346 and edi,r15d5347 xor r14d,r12d5348 xor edi,r11d5349 xor r14d,r13d5350 lea r9d,[rdi*1+r9]5351 mov r12d,ecx5352 add r8d,DWORD[((12+16))+rbp]5353 and r12d,ebx5354 rorx r13d,ebx,255355 rorx edi,ebx,115356 lea r9d,[r14*1+r9]5357 lea r8d,[r12*1+r8]5358 andn r12d,ebx,edx5359 xor r13d,edi5360 rorx r14d,ebx,65361 lea r8d,[r12*1+r8]5362 xor r13d,r14d5363 mov edi,r9d5364 rorx r12d,r9d,225365 lea r8d,[r13*1+r8]5366 xor edi,r10d5367 rorx r14d,r9d,135368 rorx r13d,r9d,25369 lea eax,[r8*1+rax]5370 and r15d,edi5371 xor r14d,r12d5372 xor r15d,r10d5373 xor r14d,r13d5374 lea r8d,[r15*1+r8]5375 mov r12d,ebx5376 add edx,DWORD[((32+16))+rbp]5377 and r12d,eax5378 rorx r13d,eax,255379 rorx r15d,eax,115380 lea r8d,[r14*1+r8]5381 lea edx,[r12*1+rdx]5382 andn r12d,eax,ecx5383 xor r13d,r15d5384 rorx r14d,eax,65385 lea edx,[r12*1+rdx]5386 xor r13d,r14d5387 mov r15d,r8d5388 rorx r12d,r8d,225389 lea edx,[r13*1+rdx]5390 xor r15d,r9d5391 rorx r14d,r8d,135392 rorx r13d,r8d,25393 lea r11d,[rdx*1+r11]5394 and edi,r15d5395 xor r14d,r12d5396 xor edi,r9d5397 xor r14d,r13d5398 lea edx,[rdi*1+rdx]5399 mov r12d,eax5400 add ecx,DWORD[((36+16))+rbp]5401 and r12d,r11d5402 rorx r13d,r11d,255403 rorx edi,r11d,115404 lea edx,[r14*1+rdx]5405 lea ecx,[r12*1+rcx]5406 andn r12d,r11d,ebx5407 xor r13d,edi5408 rorx r14d,r11d,65409 lea ecx,[r12*1+rcx]5410 xor r13d,r14d5411 mov edi,edx5412 rorx r12d,edx,225413 lea ecx,[r13*1+rcx]5414 xor edi,r8d5415 rorx r14d,edx,135416 rorx r13d,edx,25417 lea r10d,[rcx*1+r10]5418 and r15d,edi5419 xor r14d,r12d5420 xor r15d,r8d5421 xor r14d,r13d5422 lea ecx,[r15*1+rcx]5423 mov r12d,r11d5424 add ebx,DWORD[((40+16))+rbp]5425 and r12d,r10d5426 rorx r13d,r10d,255427 rorx r15d,r10d,115428 lea ecx,[r14*1+rcx]5429 lea ebx,[r12*1+rbx]5430 andn r12d,r10d,eax5431 xor r13d,r15d5432 rorx r14d,r10d,65433 lea ebx,[r12*1+rbx]5434 xor r13d,r14d5435 mov r15d,ecx5436 rorx r12d,ecx,225437 lea ebx,[r13*1+rbx]5438 xor r15d,edx5439 rorx r14d,ecx,135440 rorx r13d,ecx,25441 lea r9d,[rbx*1+r9]5442 and edi,r15d5443 xor r14d,r12d5444 xor edi,edx5445 xor r14d,r13d5446 lea ebx,[rdi*1+rbx]5447 mov r12d,r10d5448 add eax,DWORD[((44+16))+rbp]5449 and r12d,r9d5450 rorx r13d,r9d,255451 rorx edi,r9d,115452 lea ebx,[r14*1+rbx]5453 lea eax,[r12*1+rax]5454 andn r12d,r9d,r11d5455 xor r13d,edi5456 rorx r14d,r9d,65457 lea eax,[r12*1+rax]5458 xor r13d,r14d5459 mov edi,ebx5460 rorx r12d,ebx,225461 lea eax,[r13*1+rax]5462 xor edi,ecx5463 rorx r14d,ebx,135464 rorx r13d,ebx,25465 lea r8d,[rax*1+r8]5466 and r15d,edi5467 xor r14d,r12d5468 xor r15d,ecx5469 xor r14d,r13d5470 lea eax,[r15*1+rax]5471 mov r12d,r9d5472 lea rbp,[((-64))+rbp]5473 cmp rbp,rsp5474 jae NEAR $L$ower_avx25475 5476 mov rdi,QWORD[512+rsp]5477 add eax,r14d5478 5479 lea rsp,[448+rsp]5480 5481 5482 5483 add eax,DWORD[rdi]5484 add ebx,DWORD[4+rdi]5485 add ecx,DWORD[8+rdi]5486 add edx,DWORD[12+rdi]5487 add r8d,DWORD[16+rdi]5488 add r9d,DWORD[20+rdi]5489 lea rsi,[128+rsi]5490 add r10d,DWORD[24+rdi]5491 mov r12,rsi5492 add r11d,DWORD[28+rdi]5493 cmp rsi,QWORD[((64+16))+rsp]5494 5495 mov DWORD[rdi],eax5496 cmove r12,rsp5497 mov DWORD[4+rdi],ebx5498 mov DWORD[8+rdi],ecx5499 mov DWORD[12+rdi],edx5500 mov DWORD[16+rdi],r8d5501 mov DWORD[20+rdi],r9d5502 mov DWORD[24+rdi],r10d5503 mov DWORD[28+rdi],r11d5504 5505 jbe NEAR $L$oop_avx25506 lea rbp,[rsp]5507 5508 5509 5510 5511 $L$done_avx2:5512 mov rsi,QWORD[88+rbp]5513 5514 vzeroupper5515 movaps xmm6,XMMWORD[((64+32))+rbp]5516 movaps xmm7,XMMWORD[((64+48))+rbp]5517 movaps xmm8,XMMWORD[((64+64))+rbp]5518 movaps xmm9,XMMWORD[((64+80))+rbp]5519 mov r15,QWORD[((-48))+rsi]5520 5521 mov r14,QWORD[((-40))+rsi]5522 5523 mov r13,QWORD[((-32))+rsi]5524 5525 mov r12,QWORD[((-24))+rsi]5526 5527 mov rbp,QWORD[((-16))+rsi]5528 5529 mov rbx,QWORD[((-8))+rsi]5530 5531 lea rsp,[rsi]5532 5533 $L$epilogue_avx2:5534 mov rdi,QWORD[8+rsp] ;WIN64 epilogue5535 mov rsi,QWORD[16+rsp]5536 DB 0F3h,0C3h ;repret5537 5538 $L$SEH_end_sha256_block_data_order_avx2:5539 3152 EXTERN __imp_RtlVirtualUnwind 5540 3153 … … 5569 3182 cmp rbx,r10 5570 3183 jae NEAR $L$in_prologue 5571 lea r10,[$L$avx2_shortcut]5572 cmp rbx,r105573 jb NEAR $L$not_in_avx25574 5575 and rax,-256*45576 add rax,4485577 $L$not_in_avx2:5578 3184 mov rsi,rax 5579 3185 mov rax,QWORD[((64+24))+rax] … … 5683 3289 DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase 5684 3290 DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase 5685 DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase5686 DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase5687 DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase5688 DD $L$SEH_begin_sha256_block_data_order_avx2 wrt ..imagebase5689 DD $L$SEH_end_sha256_block_data_order_avx2 wrt ..imagebase5690 DD $L$SEH_info_sha256_block_data_order_avx2 wrt ..imagebase5691 3291 section .xdata rdata align=8 5692 3292 ALIGN 8 … … 5702 3302 DD se_handler wrt ..imagebase 5703 3303 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase 5704 $L$SEH_info_sha256_block_data_order_avx:5705 DB 9,0,0,05706 DD se_handler wrt ..imagebase5707 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase5708 $L$SEH_info_sha256_block_data_order_avx2:5709 DB 9,0,0,05710 DD se_handler wrt ..imagebase5711 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
Note:
See TracChangeset
for help on using the changeset viewer.