Changeset 95221 in vbox
- Timestamp:
- Jun 8, 2022 8:35:57 AM (3 years ago)
- Location:
- trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm
- Files:
-
- 19 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/aesni-gcm-x86_64.S
r94083 r95221 6 6 7 7 8 global aesni_gcm_encrypt 8 9 9 ALIGN 32 10 _aesni_ctr32_ghash_6x: 10 aesni_gcm_encrypt: 11 11 12 vmovdqu xmm2,XMMWORD[32+r11] 13 sub rdx,6 14 vpxor xmm4,xmm4,xmm4 15 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 16 vpaddb xmm10,xmm1,xmm2 17 vpaddb xmm11,xmm10,xmm2 18 vpaddb xmm12,xmm11,xmm2 19 vpaddb xmm13,xmm12,xmm2 20 vpaddb xmm14,xmm13,xmm2 21 vpxor xmm9,xmm1,xmm15 22 vmovdqu XMMWORD[(16+8)+rsp],xmm4 23 jmp NEAR $L$oop6x 12 xor eax,eax 13 DB 0F3h,0C3h ;repret 24 14 25 ALIGN 3226 $L$oop6x:27 add ebx,10066329628 jc NEAR $L$handle_ctr3229 vmovdqu xmm3,XMMWORD[((0-32))+r9]30 vpaddb xmm1,xmm14,xmm231 vpxor xmm10,xmm10,xmm1532 vpxor xmm11,xmm11,xmm1533 34 $L$resume_ctr32:35 vmovdqu XMMWORD[r8],xmm136 vpclmulqdq xmm5,xmm7,xmm3,0x1037 vpxor xmm12,xmm12,xmm1538 vmovups xmm2,XMMWORD[((16-128))+rcx]39 vpclmulqdq xmm6,xmm7,xmm3,0x0140 xor r12,r1241 cmp r15,r1442 43 vaesenc xmm9,xmm9,xmm244 vmovdqu xmm0,XMMWORD[((48+8))+rsp]45 vpxor xmm13,xmm13,xmm1546 vpclmulqdq xmm1,xmm7,xmm3,0x0047 vaesenc xmm10,xmm10,xmm248 vpxor xmm14,xmm14,xmm1549 setnc r12b50 vpclmulqdq xmm7,xmm7,xmm3,0x1151 vaesenc xmm11,xmm11,xmm252 vmovdqu xmm3,XMMWORD[((16-32))+r9]53 neg r1254 vaesenc xmm12,xmm12,xmm255 vpxor xmm6,xmm6,xmm556 vpclmulqdq xmm5,xmm0,xmm3,0x0057 vpxor xmm8,xmm8,xmm458 vaesenc xmm13,xmm13,xmm259 vpxor xmm4,xmm1,xmm560 and r12,0x6061 vmovups xmm15,XMMWORD[((32-128))+rcx]62 vpclmulqdq xmm1,xmm0,xmm3,0x1063 vaesenc xmm14,xmm14,xmm264 65 vpclmulqdq xmm2,xmm0,xmm3,0x0166 lea r14,[r12*1+r14]67 vaesenc xmm9,xmm9,xmm1568 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]69 vpclmulqdq xmm3,xmm0,xmm3,0x1170 vmovdqu xmm0,XMMWORD[((64+8))+rsp]71 vaesenc xmm10,xmm10,xmm1572 movbe r13,QWORD[88+r14]73 vaesenc xmm11,xmm11,xmm1574 movbe r12,QWORD[80+r14]75 vaesenc xmm12,xmm12,xmm1576 mov QWORD[((32+8))+rsp],r1377 vaesenc xmm13,xmm13,xmm1578 mov QWORD[((40+8))+rsp],r1279 vmovdqu xmm5,XMMWORD[((48-32))+r9]80 vaesenc xmm14,xmm14,xmm1581 82 vmovups xmm15,XMMWORD[((48-128))+rcx]83 vpxor xmm6,xmm6,xmm184 vpclmulqdq xmm1,xmm0,xmm5,0x0085 vaesenc xmm9,xmm9,xmm1586 vpxor xmm6,xmm6,xmm287 vpclmulqdq xmm2,xmm0,xmm5,0x1088 vaesenc xmm10,xmm10,xmm1589 vpxor xmm7,xmm7,xmm390 vpclmulqdq xmm3,xmm0,xmm5,0x0191 vaesenc xmm11,xmm11,xmm1592 vpclmulqdq xmm5,xmm0,xmm5,0x1193 vmovdqu xmm0,XMMWORD[((80+8))+rsp]94 vaesenc xmm12,xmm12,xmm1595 vaesenc xmm13,xmm13,xmm1596 vpxor xmm4,xmm4,xmm197 vmovdqu xmm1,XMMWORD[((64-32))+r9]98 vaesenc xmm14,xmm14,xmm1599 100 vmovups xmm15,XMMWORD[((64-128))+rcx]101 vpxor xmm6,xmm6,xmm2102 vpclmulqdq xmm2,xmm0,xmm1,0x00103 vaesenc xmm9,xmm9,xmm15104 vpxor xmm6,xmm6,xmm3105 vpclmulqdq xmm3,xmm0,xmm1,0x10106 vaesenc xmm10,xmm10,xmm15107 movbe r13,QWORD[72+r14]108 vpxor xmm7,xmm7,xmm5109 vpclmulqdq xmm5,xmm0,xmm1,0x01110 vaesenc xmm11,xmm11,xmm15111 movbe r12,QWORD[64+r14]112 vpclmulqdq xmm1,xmm0,xmm1,0x11113 vmovdqu xmm0,XMMWORD[((96+8))+rsp]114 vaesenc xmm12,xmm12,xmm15115 mov QWORD[((48+8))+rsp],r13116 vaesenc xmm13,xmm13,xmm15117 mov QWORD[((56+8))+rsp],r12118 vpxor xmm4,xmm4,xmm2119 vmovdqu xmm2,XMMWORD[((96-32))+r9]120 vaesenc xmm14,xmm14,xmm15121 122 vmovups xmm15,XMMWORD[((80-128))+rcx]123 vpxor xmm6,xmm6,xmm3124 vpclmulqdq xmm3,xmm0,xmm2,0x00125 vaesenc xmm9,xmm9,xmm15126 vpxor xmm6,xmm6,xmm5127 vpclmulqdq xmm5,xmm0,xmm2,0x10128 vaesenc xmm10,xmm10,xmm15129 movbe r13,QWORD[56+r14]130 vpxor xmm7,xmm7,xmm1131 vpclmulqdq xmm1,xmm0,xmm2,0x01132 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp]133 vaesenc xmm11,xmm11,xmm15134 movbe r12,QWORD[48+r14]135 vpclmulqdq xmm2,xmm0,xmm2,0x11136 vaesenc xmm12,xmm12,xmm15137 mov QWORD[((64+8))+rsp],r13138 vaesenc xmm13,xmm13,xmm15139 mov QWORD[((72+8))+rsp],r12140 vpxor xmm4,xmm4,xmm3141 vmovdqu xmm3,XMMWORD[((112-32))+r9]142 vaesenc xmm14,xmm14,xmm15143 144 vmovups xmm15,XMMWORD[((96-128))+rcx]145 vpxor xmm6,xmm6,xmm5146 vpclmulqdq xmm5,xmm8,xmm3,0x10147 vaesenc xmm9,xmm9,xmm15148 vpxor xmm6,xmm6,xmm1149 vpclmulqdq xmm1,xmm8,xmm3,0x01150 vaesenc xmm10,xmm10,xmm15151 movbe r13,QWORD[40+r14]152 vpxor xmm7,xmm7,xmm2153 vpclmulqdq xmm2,xmm8,xmm3,0x00154 vaesenc xmm11,xmm11,xmm15155 movbe r12,QWORD[32+r14]156 vpclmulqdq xmm8,xmm8,xmm3,0x11157 vaesenc xmm12,xmm12,xmm15158 mov QWORD[((80+8))+rsp],r13159 vaesenc xmm13,xmm13,xmm15160 mov QWORD[((88+8))+rsp],r12161 vpxor xmm6,xmm6,xmm5162 vaesenc xmm14,xmm14,xmm15163 vpxor xmm6,xmm6,xmm1164 165 vmovups xmm15,XMMWORD[((112-128))+rcx]166 vpslldq xmm5,xmm6,8167 vpxor xmm4,xmm4,xmm2168 vmovdqu xmm3,XMMWORD[16+r11]169 170 vaesenc xmm9,xmm9,xmm15171 vpxor xmm7,xmm7,xmm8172 vaesenc xmm10,xmm10,xmm15173 vpxor xmm4,xmm4,xmm5174 movbe r13,QWORD[24+r14]175 vaesenc xmm11,xmm11,xmm15176 movbe r12,QWORD[16+r14]177 vpalignr xmm0,xmm4,xmm4,8178 vpclmulqdq xmm4,xmm4,xmm3,0x10179 mov QWORD[((96+8))+rsp],r13180 vaesenc xmm12,xmm12,xmm15181 mov QWORD[((104+8))+rsp],r12182 vaesenc xmm13,xmm13,xmm15183 vmovups xmm1,XMMWORD[((128-128))+rcx]184 vaesenc xmm14,xmm14,xmm15185 186 vaesenc xmm9,xmm9,xmm1187 vmovups xmm15,XMMWORD[((144-128))+rcx]188 vaesenc xmm10,xmm10,xmm1189 vpsrldq xmm6,xmm6,8190 vaesenc xmm11,xmm11,xmm1191 vpxor xmm7,xmm7,xmm6192 vaesenc xmm12,xmm12,xmm1193 vpxor xmm4,xmm4,xmm0194 movbe r13,QWORD[8+r14]195 vaesenc xmm13,xmm13,xmm1196 movbe r12,QWORD[r14]197 vaesenc xmm14,xmm14,xmm1198 vmovups xmm1,XMMWORD[((160-128))+rcx]199 cmp ebp,11200 jb NEAR $L$enc_tail201 202 vaesenc xmm9,xmm9,xmm15203 vaesenc xmm10,xmm10,xmm15204 vaesenc xmm11,xmm11,xmm15205 vaesenc xmm12,xmm12,xmm15206 vaesenc xmm13,xmm13,xmm15207 vaesenc xmm14,xmm14,xmm15208 209 vaesenc xmm9,xmm9,xmm1210 vaesenc xmm10,xmm10,xmm1211 vaesenc xmm11,xmm11,xmm1212 vaesenc xmm12,xmm12,xmm1213 vaesenc xmm13,xmm13,xmm1214 vmovups xmm15,XMMWORD[((176-128))+rcx]215 vaesenc xmm14,xmm14,xmm1216 vmovups xmm1,XMMWORD[((192-128))+rcx]217 je NEAR $L$enc_tail218 219 vaesenc xmm9,xmm9,xmm15220 vaesenc xmm10,xmm10,xmm15221 vaesenc xmm11,xmm11,xmm15222 vaesenc xmm12,xmm12,xmm15223 vaesenc xmm13,xmm13,xmm15224 vaesenc xmm14,xmm14,xmm15225 226 vaesenc xmm9,xmm9,xmm1227 vaesenc xmm10,xmm10,xmm1228 vaesenc xmm11,xmm11,xmm1229 vaesenc xmm12,xmm12,xmm1230 vaesenc xmm13,xmm13,xmm1231 vmovups xmm15,XMMWORD[((208-128))+rcx]232 vaesenc xmm14,xmm14,xmm1233 vmovups xmm1,XMMWORD[((224-128))+rcx]234 jmp NEAR $L$enc_tail235 236 ALIGN 32237 $L$handle_ctr32:238 vmovdqu xmm0,XMMWORD[r11]239 vpshufb xmm6,xmm1,xmm0240 vmovdqu xmm5,XMMWORD[48+r11]241 vpaddd xmm10,xmm6,XMMWORD[64+r11]242 vpaddd xmm11,xmm6,xmm5243 vmovdqu xmm3,XMMWORD[((0-32))+r9]244 vpaddd xmm12,xmm10,xmm5245 vpshufb xmm10,xmm10,xmm0246 vpaddd xmm13,xmm11,xmm5247 vpshufb xmm11,xmm11,xmm0248 vpxor xmm10,xmm10,xmm15249 vpaddd xmm14,xmm12,xmm5250 vpshufb xmm12,xmm12,xmm0251 vpxor xmm11,xmm11,xmm15252 vpaddd xmm1,xmm13,xmm5253 vpshufb xmm13,xmm13,xmm0254 vpshufb xmm14,xmm14,xmm0255 vpshufb xmm1,xmm1,xmm0256 jmp NEAR $L$resume_ctr32257 258 ALIGN 32259 $L$enc_tail:260 vaesenc xmm9,xmm9,xmm15261 vmovdqu XMMWORD[(16+8)+rsp],xmm7262 vpalignr xmm8,xmm4,xmm4,8263 vaesenc xmm10,xmm10,xmm15264 vpclmulqdq xmm4,xmm4,xmm3,0x10265 vpxor xmm2,xmm1,XMMWORD[rdi]266 vaesenc xmm11,xmm11,xmm15267 vpxor xmm0,xmm1,XMMWORD[16+rdi]268 vaesenc xmm12,xmm12,xmm15269 vpxor xmm5,xmm1,XMMWORD[32+rdi]270 vaesenc xmm13,xmm13,xmm15271 vpxor xmm6,xmm1,XMMWORD[48+rdi]272 vaesenc xmm14,xmm14,xmm15273 vpxor xmm7,xmm1,XMMWORD[64+rdi]274 vpxor xmm3,xmm1,XMMWORD[80+rdi]275 vmovdqu xmm1,XMMWORD[r8]276 277 vaesenclast xmm9,xmm9,xmm2278 vmovdqu xmm2,XMMWORD[32+r11]279 vaesenclast xmm10,xmm10,xmm0280 vpaddb xmm0,xmm1,xmm2281 mov QWORD[((112+8))+rsp],r13282 lea rdi,[96+rdi]283 vaesenclast xmm11,xmm11,xmm5284 vpaddb xmm5,xmm0,xmm2285 mov QWORD[((120+8))+rsp],r12286 lea rsi,[96+rsi]287 vmovdqu xmm15,XMMWORD[((0-128))+rcx]288 vaesenclast xmm12,xmm12,xmm6289 vpaddb xmm6,xmm5,xmm2290 vaesenclast xmm13,xmm13,xmm7291 vpaddb xmm7,xmm6,xmm2292 vaesenclast xmm14,xmm14,xmm3293 vpaddb xmm3,xmm7,xmm2294 295 add r10,0x60296 sub rdx,0x6297 jc NEAR $L$6x_done298 299 vmovups XMMWORD[(-96)+rsi],xmm9300 vpxor xmm9,xmm1,xmm15301 vmovups XMMWORD[(-80)+rsi],xmm10302 vmovdqa xmm10,xmm0303 vmovups XMMWORD[(-64)+rsi],xmm11304 vmovdqa xmm11,xmm5305 vmovups XMMWORD[(-48)+rsi],xmm12306 vmovdqa xmm12,xmm6307 vmovups XMMWORD[(-32)+rsi],xmm13308 vmovdqa xmm13,xmm7309 vmovups XMMWORD[(-16)+rsi],xmm14310 vmovdqa xmm14,xmm3311 vmovdqu xmm7,XMMWORD[((32+8))+rsp]312 jmp NEAR $L$oop6x313 314 $L$6x_done:315 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp]316 vpxor xmm8,xmm8,xmm4317 318 DB 0F3h,0C3h ;repret319 15 320 16 321 17 global aesni_gcm_decrypt 322 18 323 ALIGN 32324 19 aesni_gcm_decrypt: 325 mov QWORD[8+rsp],rdi ;WIN64 prologue326 mov QWORD[16+rsp],rsi327 mov rax,rsp328 $L$SEH_begin_aesni_gcm_decrypt:329 mov rdi,rcx330 mov rsi,rdx331 mov rdx,r8332 mov rcx,r9333 mov r8,QWORD[40+rsp]334 mov r9,QWORD[48+rsp]335 20 336 337 338 xor r10,r10 339 cmp rdx,0x60 340 jb NEAR $L$gcm_dec_abort 341 342 lea rax,[rsp] 343 344 push rbx 345 346 push rbp 347 348 push r12 349 350 push r13 351 352 push r14 353 354 push r15 355 356 lea rsp,[((-168))+rsp] 357 movaps XMMWORD[(-216)+rax],xmm6 358 movaps XMMWORD[(-200)+rax],xmm7 359 movaps XMMWORD[(-184)+rax],xmm8 360 movaps XMMWORD[(-168)+rax],xmm9 361 movaps XMMWORD[(-152)+rax],xmm10 362 movaps XMMWORD[(-136)+rax],xmm11 363 movaps XMMWORD[(-120)+rax],xmm12 364 movaps XMMWORD[(-104)+rax],xmm13 365 movaps XMMWORD[(-88)+rax],xmm14 366 movaps XMMWORD[(-72)+rax],xmm15 367 $L$gcm_dec_body: 368 vzeroupper 369 370 vmovdqu xmm1,XMMWORD[r8] 371 add rsp,-128 372 mov ebx,DWORD[12+r8] 373 lea r11,[$L$bswap_mask] 374 lea r14,[((-128))+rcx] 375 mov r15,0xf80 376 vmovdqu xmm8,XMMWORD[r9] 377 and rsp,-128 378 vmovdqu xmm0,XMMWORD[r11] 379 lea rcx,[128+rcx] 380 lea r9,[((32+32))+r9] 381 mov ebp,DWORD[((240-128))+rcx] 382 vpshufb xmm8,xmm8,xmm0 383 384 and r14,r15 385 and r15,rsp 386 sub r15,r14 387 jc NEAR $L$dec_no_key_aliasing 388 cmp r15,768 389 jnc NEAR $L$dec_no_key_aliasing 390 sub rsp,r15 391 $L$dec_no_key_aliasing: 392 393 vmovdqu xmm7,XMMWORD[80+rdi] 394 lea r14,[rdi] 395 vmovdqu xmm4,XMMWORD[64+rdi] 396 lea r15,[((-192))+rdx*1+rdi] 397 vmovdqu xmm5,XMMWORD[48+rdi] 398 shr rdx,4 399 xor r10,r10 400 vmovdqu xmm6,XMMWORD[32+rdi] 401 vpshufb xmm7,xmm7,xmm0 402 vmovdqu xmm2,XMMWORD[16+rdi] 403 vpshufb xmm4,xmm4,xmm0 404 vmovdqu xmm3,XMMWORD[rdi] 405 vpshufb xmm5,xmm5,xmm0 406 vmovdqu XMMWORD[48+rsp],xmm4 407 vpshufb xmm6,xmm6,xmm0 408 vmovdqu XMMWORD[64+rsp],xmm5 409 vpshufb xmm2,xmm2,xmm0 410 vmovdqu XMMWORD[80+rsp],xmm6 411 vpshufb xmm3,xmm3,xmm0 412 vmovdqu XMMWORD[96+rsp],xmm2 413 vmovdqu XMMWORD[112+rsp],xmm3 414 415 call _aesni_ctr32_ghash_6x 416 417 vmovups XMMWORD[(-96)+rsi],xmm9 418 vmovups XMMWORD[(-80)+rsi],xmm10 419 vmovups XMMWORD[(-64)+rsi],xmm11 420 vmovups XMMWORD[(-48)+rsi],xmm12 421 vmovups XMMWORD[(-32)+rsi],xmm13 422 vmovups XMMWORD[(-16)+rsi],xmm14 423 424 vpshufb xmm8,xmm8,XMMWORD[r11] 425 vmovdqu XMMWORD[(-64)+r9],xmm8 426 427 vzeroupper 428 movaps xmm6,XMMWORD[((-216))+rax] 429 movaps xmm7,XMMWORD[((-200))+rax] 430 movaps xmm8,XMMWORD[((-184))+rax] 431 movaps xmm9,XMMWORD[((-168))+rax] 432 movaps xmm10,XMMWORD[((-152))+rax] 433 movaps xmm11,XMMWORD[((-136))+rax] 434 movaps xmm12,XMMWORD[((-120))+rax] 435 movaps xmm13,XMMWORD[((-104))+rax] 436 movaps xmm14,XMMWORD[((-88))+rax] 437 movaps xmm15,XMMWORD[((-72))+rax] 438 mov r15,QWORD[((-48))+rax] 439 440 mov r14,QWORD[((-40))+rax] 441 442 mov r13,QWORD[((-32))+rax] 443 444 mov r12,QWORD[((-24))+rax] 445 446 mov rbp,QWORD[((-16))+rax] 447 448 mov rbx,QWORD[((-8))+rax] 449 450 lea rsp,[rax] 451 452 $L$gcm_dec_abort: 453 mov rax,r10 454 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 455 mov rsi,QWORD[16+rsp] 456 DB 0F3h,0C3h ;repret 457 458 $L$SEH_end_aesni_gcm_decrypt: 459 460 ALIGN 32 461 _aesni_ctr32_6x: 462 463 vmovdqu xmm4,XMMWORD[((0-128))+rcx] 464 vmovdqu xmm2,XMMWORD[32+r11] 465 lea r13,[((-1))+rbp] 466 vmovups xmm15,XMMWORD[((16-128))+rcx] 467 lea r12,[((32-128))+rcx] 468 vpxor xmm9,xmm1,xmm4 469 add ebx,100663296 470 jc NEAR $L$handle_ctr32_2 471 vpaddb xmm10,xmm1,xmm2 472 vpaddb xmm11,xmm10,xmm2 473 vpxor xmm10,xmm10,xmm4 474 vpaddb xmm12,xmm11,xmm2 475 vpxor xmm11,xmm11,xmm4 476 vpaddb xmm13,xmm12,xmm2 477 vpxor xmm12,xmm12,xmm4 478 vpaddb xmm14,xmm13,xmm2 479 vpxor xmm13,xmm13,xmm4 480 vpaddb xmm1,xmm14,xmm2 481 vpxor xmm14,xmm14,xmm4 482 jmp NEAR $L$oop_ctr32 483 484 ALIGN 16 485 $L$oop_ctr32: 486 vaesenc xmm9,xmm9,xmm15 487 vaesenc xmm10,xmm10,xmm15 488 vaesenc xmm11,xmm11,xmm15 489 vaesenc xmm12,xmm12,xmm15 490 vaesenc xmm13,xmm13,xmm15 491 vaesenc xmm14,xmm14,xmm15 492 vmovups xmm15,XMMWORD[r12] 493 lea r12,[16+r12] 494 dec r13d 495 jnz NEAR $L$oop_ctr32 496 497 vmovdqu xmm3,XMMWORD[r12] 498 vaesenc xmm9,xmm9,xmm15 499 vpxor xmm4,xmm3,XMMWORD[rdi] 500 vaesenc xmm10,xmm10,xmm15 501 vpxor xmm5,xmm3,XMMWORD[16+rdi] 502 vaesenc xmm11,xmm11,xmm15 503 vpxor xmm6,xmm3,XMMWORD[32+rdi] 504 vaesenc xmm12,xmm12,xmm15 505 vpxor xmm8,xmm3,XMMWORD[48+rdi] 506 vaesenc xmm13,xmm13,xmm15 507 vpxor xmm2,xmm3,XMMWORD[64+rdi] 508 vaesenc xmm14,xmm14,xmm15 509 vpxor xmm3,xmm3,XMMWORD[80+rdi] 510 lea rdi,[96+rdi] 511 512 vaesenclast xmm9,xmm9,xmm4 513 vaesenclast xmm10,xmm10,xmm5 514 vaesenclast xmm11,xmm11,xmm6 515 vaesenclast xmm12,xmm12,xmm8 516 vaesenclast xmm13,xmm13,xmm2 517 vaesenclast xmm14,xmm14,xmm3 518 vmovups XMMWORD[rsi],xmm9 519 vmovups XMMWORD[16+rsi],xmm10 520 vmovups XMMWORD[32+rsi],xmm11 521 vmovups XMMWORD[48+rsi],xmm12 522 vmovups XMMWORD[64+rsi],xmm13 523 vmovups XMMWORD[80+rsi],xmm14 524 lea rsi,[96+rsi] 525 526 DB 0F3h,0C3h ;repret 527 ALIGN 32 528 $L$handle_ctr32_2: 529 vpshufb xmm6,xmm1,xmm0 530 vmovdqu xmm5,XMMWORD[48+r11] 531 vpaddd xmm10,xmm6,XMMWORD[64+r11] 532 vpaddd xmm11,xmm6,xmm5 533 vpaddd xmm12,xmm10,xmm5 534 vpshufb xmm10,xmm10,xmm0 535 vpaddd xmm13,xmm11,xmm5 536 vpshufb xmm11,xmm11,xmm0 537 vpxor xmm10,xmm10,xmm4 538 vpaddd xmm14,xmm12,xmm5 539 vpshufb xmm12,xmm12,xmm0 540 vpxor xmm11,xmm11,xmm4 541 vpaddd xmm1,xmm13,xmm5 542 vpshufb xmm13,xmm13,xmm0 543 vpxor xmm12,xmm12,xmm4 544 vpshufb xmm14,xmm14,xmm0 545 vpxor xmm13,xmm13,xmm4 546 vpshufb xmm1,xmm1,xmm0 547 vpxor xmm14,xmm14,xmm4 548 jmp NEAR $L$oop_ctr32 549 550 551 552 global aesni_gcm_encrypt 553 554 ALIGN 32 555 aesni_gcm_encrypt: 556 mov QWORD[8+rsp],rdi ;WIN64 prologue 557 mov QWORD[16+rsp],rsi 558 mov rax,rsp 559 $L$SEH_begin_aesni_gcm_encrypt: 560 mov rdi,rcx 561 mov rsi,rdx 562 mov rdx,r8 563 mov rcx,r9 564 mov r8,QWORD[40+rsp] 565 mov r9,QWORD[48+rsp] 566 567 568 569 xor r10,r10 570 cmp rdx,0x60*3 571 jb NEAR $L$gcm_enc_abort 572 573 lea rax,[rsp] 574 575 push rbx 576 577 push rbp 578 579 push r12 580 581 push r13 582 583 push r14 584 585 push r15 586 587 lea rsp,[((-168))+rsp] 588 movaps XMMWORD[(-216)+rax],xmm6 589 movaps XMMWORD[(-200)+rax],xmm7 590 movaps XMMWORD[(-184)+rax],xmm8 591 movaps XMMWORD[(-168)+rax],xmm9 592 movaps XMMWORD[(-152)+rax],xmm10 593 movaps XMMWORD[(-136)+rax],xmm11 594 movaps XMMWORD[(-120)+rax],xmm12 595 movaps XMMWORD[(-104)+rax],xmm13 596 movaps XMMWORD[(-88)+rax],xmm14 597 movaps XMMWORD[(-72)+rax],xmm15 598 $L$gcm_enc_body: 599 vzeroupper 600 601 vmovdqu xmm1,XMMWORD[r8] 602 add rsp,-128 603 mov ebx,DWORD[12+r8] 604 lea r11,[$L$bswap_mask] 605 lea r14,[((-128))+rcx] 606 mov r15,0xf80 607 lea rcx,[128+rcx] 608 vmovdqu xmm0,XMMWORD[r11] 609 and rsp,-128 610 mov ebp,DWORD[((240-128))+rcx] 611 612 and r14,r15 613 and r15,rsp 614 sub r15,r14 615 jc NEAR $L$enc_no_key_aliasing 616 cmp r15,768 617 jnc NEAR $L$enc_no_key_aliasing 618 sub rsp,r15 619 $L$enc_no_key_aliasing: 620 621 lea r14,[rsi] 622 lea r15,[((-192))+rdx*1+rsi] 623 shr rdx,4 624 625 call _aesni_ctr32_6x 626 vpshufb xmm8,xmm9,xmm0 627 vpshufb xmm2,xmm10,xmm0 628 vmovdqu XMMWORD[112+rsp],xmm8 629 vpshufb xmm4,xmm11,xmm0 630 vmovdqu XMMWORD[96+rsp],xmm2 631 vpshufb xmm5,xmm12,xmm0 632 vmovdqu XMMWORD[80+rsp],xmm4 633 vpshufb xmm6,xmm13,xmm0 634 vmovdqu XMMWORD[64+rsp],xmm5 635 vpshufb xmm7,xmm14,xmm0 636 vmovdqu XMMWORD[48+rsp],xmm6 637 638 call _aesni_ctr32_6x 639 640 vmovdqu xmm8,XMMWORD[r9] 641 lea r9,[((32+32))+r9] 642 sub rdx,12 643 mov r10,0x60*2 644 vpshufb xmm8,xmm8,xmm0 645 646 call _aesni_ctr32_ghash_6x 647 vmovdqu xmm7,XMMWORD[32+rsp] 648 vmovdqu xmm0,XMMWORD[r11] 649 vmovdqu xmm3,XMMWORD[((0-32))+r9] 650 vpunpckhqdq xmm1,xmm7,xmm7 651 vmovdqu xmm15,XMMWORD[((32-32))+r9] 652 vmovups XMMWORD[(-96)+rsi],xmm9 653 vpshufb xmm9,xmm9,xmm0 654 vpxor xmm1,xmm1,xmm7 655 vmovups XMMWORD[(-80)+rsi],xmm10 656 vpshufb xmm10,xmm10,xmm0 657 vmovups XMMWORD[(-64)+rsi],xmm11 658 vpshufb xmm11,xmm11,xmm0 659 vmovups XMMWORD[(-48)+rsi],xmm12 660 vpshufb xmm12,xmm12,xmm0 661 vmovups XMMWORD[(-32)+rsi],xmm13 662 vpshufb xmm13,xmm13,xmm0 663 vmovups XMMWORD[(-16)+rsi],xmm14 664 vpshufb xmm14,xmm14,xmm0 665 vmovdqu XMMWORD[16+rsp],xmm9 666 vmovdqu xmm6,XMMWORD[48+rsp] 667 vmovdqu xmm0,XMMWORD[((16-32))+r9] 668 vpunpckhqdq xmm2,xmm6,xmm6 669 vpclmulqdq xmm5,xmm7,xmm3,0x00 670 vpxor xmm2,xmm2,xmm6 671 vpclmulqdq xmm7,xmm7,xmm3,0x11 672 vpclmulqdq xmm1,xmm1,xmm15,0x00 673 674 vmovdqu xmm9,XMMWORD[64+rsp] 675 vpclmulqdq xmm4,xmm6,xmm0,0x00 676 vmovdqu xmm3,XMMWORD[((48-32))+r9] 677 vpxor xmm4,xmm4,xmm5 678 vpunpckhqdq xmm5,xmm9,xmm9 679 vpclmulqdq xmm6,xmm6,xmm0,0x11 680 vpxor xmm5,xmm5,xmm9 681 vpxor xmm6,xmm6,xmm7 682 vpclmulqdq xmm2,xmm2,xmm15,0x10 683 vmovdqu xmm15,XMMWORD[((80-32))+r9] 684 vpxor xmm2,xmm2,xmm1 685 686 vmovdqu xmm1,XMMWORD[80+rsp] 687 vpclmulqdq xmm7,xmm9,xmm3,0x00 688 vmovdqu xmm0,XMMWORD[((64-32))+r9] 689 vpxor xmm7,xmm7,xmm4 690 vpunpckhqdq xmm4,xmm1,xmm1 691 vpclmulqdq xmm9,xmm9,xmm3,0x11 692 vpxor xmm4,xmm4,xmm1 693 vpxor xmm9,xmm9,xmm6 694 vpclmulqdq xmm5,xmm5,xmm15,0x00 695 vpxor xmm5,xmm5,xmm2 696 697 vmovdqu xmm2,XMMWORD[96+rsp] 698 vpclmulqdq xmm6,xmm1,xmm0,0x00 699 vmovdqu xmm3,XMMWORD[((96-32))+r9] 700 vpxor xmm6,xmm6,xmm7 701 vpunpckhqdq xmm7,xmm2,xmm2 702 vpclmulqdq xmm1,xmm1,xmm0,0x11 703 vpxor xmm7,xmm7,xmm2 704 vpxor xmm1,xmm1,xmm9 705 vpclmulqdq xmm4,xmm4,xmm15,0x10 706 vmovdqu xmm15,XMMWORD[((128-32))+r9] 707 vpxor xmm4,xmm4,xmm5 708 709 vpxor xmm8,xmm8,XMMWORD[112+rsp] 710 vpclmulqdq xmm5,xmm2,xmm3,0x00 711 vmovdqu xmm0,XMMWORD[((112-32))+r9] 712 vpunpckhqdq xmm9,xmm8,xmm8 713 vpxor xmm5,xmm5,xmm6 714 vpclmulqdq xmm2,xmm2,xmm3,0x11 715 vpxor xmm9,xmm9,xmm8 716 vpxor xmm2,xmm2,xmm1 717 vpclmulqdq xmm7,xmm7,xmm15,0x00 718 vpxor xmm4,xmm7,xmm4 719 720 vpclmulqdq xmm6,xmm8,xmm0,0x00 721 vmovdqu xmm3,XMMWORD[((0-32))+r9] 722 vpunpckhqdq xmm1,xmm14,xmm14 723 vpclmulqdq xmm8,xmm8,xmm0,0x11 724 vpxor xmm1,xmm1,xmm14 725 vpxor xmm5,xmm6,xmm5 726 vpclmulqdq xmm9,xmm9,xmm15,0x10 727 vmovdqu xmm15,XMMWORD[((32-32))+r9] 728 vpxor xmm7,xmm8,xmm2 729 vpxor xmm6,xmm9,xmm4 730 731 vmovdqu xmm0,XMMWORD[((16-32))+r9] 732 vpxor xmm9,xmm7,xmm5 733 vpclmulqdq xmm4,xmm14,xmm3,0x00 734 vpxor xmm6,xmm6,xmm9 735 vpunpckhqdq xmm2,xmm13,xmm13 736 vpclmulqdq xmm14,xmm14,xmm3,0x11 737 vpxor xmm2,xmm2,xmm13 738 vpslldq xmm9,xmm6,8 739 vpclmulqdq xmm1,xmm1,xmm15,0x00 740 vpxor xmm8,xmm5,xmm9 741 vpsrldq xmm6,xmm6,8 742 vpxor xmm7,xmm7,xmm6 743 744 vpclmulqdq xmm5,xmm13,xmm0,0x00 745 vmovdqu xmm3,XMMWORD[((48-32))+r9] 746 vpxor xmm5,xmm5,xmm4 747 vpunpckhqdq xmm9,xmm12,xmm12 748 vpclmulqdq xmm13,xmm13,xmm0,0x11 749 vpxor xmm9,xmm9,xmm12 750 vpxor xmm13,xmm13,xmm14 751 vpalignr xmm14,xmm8,xmm8,8 752 vpclmulqdq xmm2,xmm2,xmm15,0x10 753 vmovdqu xmm15,XMMWORD[((80-32))+r9] 754 vpxor xmm2,xmm2,xmm1 755 756 vpclmulqdq xmm4,xmm12,xmm3,0x00 757 vmovdqu xmm0,XMMWORD[((64-32))+r9] 758 vpxor xmm4,xmm4,xmm5 759 vpunpckhqdq xmm1,xmm11,xmm11 760 vpclmulqdq xmm12,xmm12,xmm3,0x11 761 vpxor xmm1,xmm1,xmm11 762 vpxor xmm12,xmm12,xmm13 763 vxorps xmm7,xmm7,XMMWORD[16+rsp] 764 vpclmulqdq xmm9,xmm9,xmm15,0x00 765 vpxor xmm9,xmm9,xmm2 766 767 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 768 vxorps xmm8,xmm8,xmm14 769 770 vpclmulqdq xmm5,xmm11,xmm0,0x00 771 vmovdqu xmm3,XMMWORD[((96-32))+r9] 772 vpxor xmm5,xmm5,xmm4 773 vpunpckhqdq xmm2,xmm10,xmm10 774 vpclmulqdq xmm11,xmm11,xmm0,0x11 775 vpxor xmm2,xmm2,xmm10 776 vpalignr xmm14,xmm8,xmm8,8 777 vpxor xmm11,xmm11,xmm12 778 vpclmulqdq xmm1,xmm1,xmm15,0x10 779 vmovdqu xmm15,XMMWORD[((128-32))+r9] 780 vpxor xmm1,xmm1,xmm9 781 782 vxorps xmm14,xmm14,xmm7 783 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 784 vxorps xmm8,xmm8,xmm14 785 786 vpclmulqdq xmm4,xmm10,xmm3,0x00 787 vmovdqu xmm0,XMMWORD[((112-32))+r9] 788 vpxor xmm4,xmm4,xmm5 789 vpunpckhqdq xmm9,xmm8,xmm8 790 vpclmulqdq xmm10,xmm10,xmm3,0x11 791 vpxor xmm9,xmm9,xmm8 792 vpxor xmm10,xmm10,xmm11 793 vpclmulqdq xmm2,xmm2,xmm15,0x00 794 vpxor xmm2,xmm2,xmm1 795 796 vpclmulqdq xmm5,xmm8,xmm0,0x00 797 vpclmulqdq xmm7,xmm8,xmm0,0x11 798 vpxor xmm5,xmm5,xmm4 799 vpclmulqdq xmm6,xmm9,xmm15,0x10 800 vpxor xmm7,xmm7,xmm10 801 vpxor xmm6,xmm6,xmm2 802 803 vpxor xmm4,xmm7,xmm5 804 vpxor xmm6,xmm6,xmm4 805 vpslldq xmm1,xmm6,8 806 vmovdqu xmm3,XMMWORD[16+r11] 807 vpsrldq xmm6,xmm6,8 808 vpxor xmm8,xmm5,xmm1 809 vpxor xmm7,xmm7,xmm6 810 811 vpalignr xmm2,xmm8,xmm8,8 812 vpclmulqdq xmm8,xmm8,xmm3,0x10 813 vpxor xmm8,xmm8,xmm2 814 815 vpalignr xmm2,xmm8,xmm8,8 816 vpclmulqdq xmm8,xmm8,xmm3,0x10 817 vpxor xmm2,xmm2,xmm7 818 vpxor xmm8,xmm8,xmm2 819 vpshufb xmm8,xmm8,XMMWORD[r11] 820 vmovdqu XMMWORD[(-64)+r9],xmm8 821 822 vzeroupper 823 movaps xmm6,XMMWORD[((-216))+rax] 824 movaps xmm7,XMMWORD[((-200))+rax] 825 movaps xmm8,XMMWORD[((-184))+rax] 826 movaps xmm9,XMMWORD[((-168))+rax] 827 movaps xmm10,XMMWORD[((-152))+rax] 828 movaps xmm11,XMMWORD[((-136))+rax] 829 movaps xmm12,XMMWORD[((-120))+rax] 830 movaps xmm13,XMMWORD[((-104))+rax] 831 movaps xmm14,XMMWORD[((-88))+rax] 832 movaps xmm15,XMMWORD[((-72))+rax] 833 mov r15,QWORD[((-48))+rax] 834 835 mov r14,QWORD[((-40))+rax] 836 837 mov r13,QWORD[((-32))+rax] 838 839 mov r12,QWORD[((-24))+rax] 840 841 mov rbp,QWORD[((-16))+rax] 842 843 mov rbx,QWORD[((-8))+rax] 844 845 lea rsp,[rax] 846 847 $L$gcm_enc_abort: 848 mov rax,r10 849 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 850 mov rsi,QWORD[16+rsp] 851 DB 0F3h,0C3h ;repret 852 853 $L$SEH_end_aesni_gcm_encrypt: 854 ALIGN 64 855 $L$bswap_mask: 856 DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 857 $L$poly: 858 DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 859 $L$one_msb: 860 DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 861 $L$two_lsb: 862 DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 863 $L$one_lsb: 864 DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 865 DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 866 DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 867 DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 868 DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 869 ALIGN 64 870 EXTERN __imp_RtlVirtualUnwind 871 872 ALIGN 16 873 gcm_se_handler: 874 push rsi 875 push rdi 876 push rbx 877 push rbp 878 push r12 879 push r13 880 push r14 881 push r15 882 pushfq 883 sub rsp,64 884 885 mov rax,QWORD[120+r8] 886 mov rbx,QWORD[248+r8] 887 888 mov rsi,QWORD[8+r9] 889 mov r11,QWORD[56+r9] 890 891 mov r10d,DWORD[r11] 892 lea r10,[r10*1+rsi] 893 cmp rbx,r10 894 jb NEAR $L$common_seh_tail 895 896 mov rax,QWORD[152+r8] 897 898 mov r10d,DWORD[4+r11] 899 lea r10,[r10*1+rsi] 900 cmp rbx,r10 901 jae NEAR $L$common_seh_tail 902 903 mov rax,QWORD[120+r8] 904 905 mov r15,QWORD[((-48))+rax] 906 mov r14,QWORD[((-40))+rax] 907 mov r13,QWORD[((-32))+rax] 908 mov r12,QWORD[((-24))+rax] 909 mov rbp,QWORD[((-16))+rax] 910 mov rbx,QWORD[((-8))+rax] 911 mov QWORD[240+r8],r15 912 mov QWORD[232+r8],r14 913 mov QWORD[224+r8],r13 914 mov QWORD[216+r8],r12 915 mov QWORD[160+r8],rbp 916 mov QWORD[144+r8],rbx 917 918 lea rsi,[((-216))+rax] 919 lea rdi,[512+r8] 920 mov ecx,20 921 DD 0xa548f3fc 922 923 $L$common_seh_tail: 924 mov rdi,QWORD[8+rax] 925 mov rsi,QWORD[16+rax] 926 mov QWORD[152+r8],rax 927 mov QWORD[168+r8],rsi 928 mov QWORD[176+r8],rdi 929 930 mov rdi,QWORD[40+r9] 931 mov rsi,r8 932 mov ecx,154 933 DD 0xa548f3fc 934 935 mov rsi,r9 936 xor rcx,rcx 937 mov rdx,QWORD[8+rsi] 938 mov r8,QWORD[rsi] 939 mov r9,QWORD[16+rsi] 940 mov r10,QWORD[40+rsi] 941 lea r11,[56+rsi] 942 lea r12,[24+rsi] 943 mov QWORD[32+rsp],r10 944 mov QWORD[40+rsp],r11 945 mov QWORD[48+rsp],r12 946 mov QWORD[56+rsp],rcx 947 call QWORD[__imp_RtlVirtualUnwind] 948 949 mov eax,1 950 add rsp,64 951 popfq 952 pop r15 953 pop r14 954 pop r13 955 pop r12 956 pop rbp 957 pop rbx 958 pop rdi 959 pop rsi 21 xor eax,eax 960 22 DB 0F3h,0C3h ;repret 961 23 962 24 963 section .pdata rdata align=4964 ALIGN 4965 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase966 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase967 DD $L$SEH_gcm_dec_info wrt ..imagebase968 969 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase970 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase971 DD $L$SEH_gcm_enc_info wrt ..imagebase972 section .xdata rdata align=8973 ALIGN 8974 $L$SEH_gcm_dec_info:975 DB 9,0,0,0976 DD gcm_se_handler wrt ..imagebase977 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase978 $L$SEH_gcm_enc_info:979 DB 9,0,0,0980 DD gcm_se_handler wrt ..imagebase981 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/aesni-mb-x86_64.S
r94083 r95221 22 22 23 23 24 cmp edx,225 jb NEAR $L$enc_non_avx26 mov ecx,DWORD[((OPENSSL_ia32cap_P+4))]27 test ecx,26843545628 jnz NEAR _avx_cbc_enc_shortcut29 jmp NEAR $L$enc_non_avx30 ALIGN 1631 $L$enc_non_avx:32 24 mov rax,rsp 33 25 … … 345 337 346 338 347 cmp edx,2348 jb NEAR $L$dec_non_avx349 mov ecx,DWORD[((OPENSSL_ia32cap_P+4))]350 test ecx,268435456351 jnz NEAR _avx_cbc_dec_shortcut352 jmp NEAR $L$dec_non_avx353 ALIGN 16354 $L$dec_non_avx:355 339 mov rax,rsp 356 340 … … 643 627 644 628 $L$SEH_end_aesni_multi_cbc_decrypt: 645 646 ALIGN 32647 aesni_multi_cbc_encrypt_avx:648 mov QWORD[8+rsp],rdi ;WIN64 prologue649 mov QWORD[16+rsp],rsi650 mov rax,rsp651 $L$SEH_begin_aesni_multi_cbc_encrypt_avx:652 mov rdi,rcx653 mov rsi,rdx654 mov rdx,r8655 656 657 658 _avx_cbc_enc_shortcut:659 mov rax,rsp660 661 push rbx662 663 push rbp664 665 push r12666 667 push r13668 669 push r14670 671 push r15672 673 lea rsp,[((-168))+rsp]674 movaps XMMWORD[rsp],xmm6675 movaps XMMWORD[16+rsp],xmm7676 movaps XMMWORD[32+rsp],xmm8677 movaps XMMWORD[48+rsp],xmm9678 movaps XMMWORD[64+rsp],xmm10679 movaps XMMWORD[80+rsp],xmm11680 movaps XMMWORD[(-120)+rax],xmm12681 movaps XMMWORD[(-104)+rax],xmm13682 movaps XMMWORD[(-88)+rax],xmm14683 movaps XMMWORD[(-72)+rax],xmm15684 685 686 687 688 689 690 691 692 sub rsp,192693 and rsp,-128694 mov QWORD[16+rsp],rax695 696 697 $L$enc8x_body:698 vzeroupper699 vmovdqu xmm15,XMMWORD[rsi]700 lea rsi,[120+rsi]701 lea rdi,[160+rdi]702 shr edx,1703 704 $L$enc8x_loop_grande:705 706 xor edx,edx707 708 mov ecx,DWORD[((-144))+rdi]709 710 mov r8,QWORD[((-160))+rdi]711 cmp ecx,edx712 713 mov rbx,QWORD[((-152))+rdi]714 cmovg edx,ecx715 test ecx,ecx716 717 vmovdqu xmm2,XMMWORD[((-136))+rdi]718 mov DWORD[32+rsp],ecx719 cmovle r8,rsp720 sub rbx,r8721 mov QWORD[64+rsp],rbx722 723 mov ecx,DWORD[((-104))+rdi]724 725 mov r9,QWORD[((-120))+rdi]726 cmp ecx,edx727 728 mov rbp,QWORD[((-112))+rdi]729 cmovg edx,ecx730 test ecx,ecx731 732 vmovdqu xmm3,XMMWORD[((-96))+rdi]733 mov DWORD[36+rsp],ecx734 cmovle r9,rsp735 sub rbp,r9736 mov QWORD[72+rsp],rbp737 738 mov ecx,DWORD[((-64))+rdi]739 740 mov r10,QWORD[((-80))+rdi]741 cmp ecx,edx742 743 mov rbp,QWORD[((-72))+rdi]744 cmovg edx,ecx745 test ecx,ecx746 747 vmovdqu xmm4,XMMWORD[((-56))+rdi]748 mov DWORD[40+rsp],ecx749 cmovle r10,rsp750 sub rbp,r10751 mov QWORD[80+rsp],rbp752 753 mov ecx,DWORD[((-24))+rdi]754 755 mov r11,QWORD[((-40))+rdi]756 cmp ecx,edx757 758 mov rbp,QWORD[((-32))+rdi]759 cmovg edx,ecx760 test ecx,ecx761 762 vmovdqu xmm5,XMMWORD[((-16))+rdi]763 mov DWORD[44+rsp],ecx764 cmovle r11,rsp765 sub rbp,r11766 mov QWORD[88+rsp],rbp767 768 mov ecx,DWORD[16+rdi]769 770 mov r12,QWORD[rdi]771 cmp ecx,edx772 773 mov rbp,QWORD[8+rdi]774 cmovg edx,ecx775 test ecx,ecx776 777 vmovdqu xmm6,XMMWORD[24+rdi]778 mov DWORD[48+rsp],ecx779 cmovle r12,rsp780 sub rbp,r12781 mov QWORD[96+rsp],rbp782 783 mov ecx,DWORD[56+rdi]784 785 mov r13,QWORD[40+rdi]786 cmp ecx,edx787 788 mov rbp,QWORD[48+rdi]789 cmovg edx,ecx790 test ecx,ecx791 792 vmovdqu xmm7,XMMWORD[64+rdi]793 mov DWORD[52+rsp],ecx794 cmovle r13,rsp795 sub rbp,r13796 mov QWORD[104+rsp],rbp797 798 mov ecx,DWORD[96+rdi]799 800 mov r14,QWORD[80+rdi]801 cmp ecx,edx802 803 mov rbp,QWORD[88+rdi]804 cmovg edx,ecx805 test ecx,ecx806 807 vmovdqu xmm8,XMMWORD[104+rdi]808 mov DWORD[56+rsp],ecx809 cmovle r14,rsp810 sub rbp,r14811 mov QWORD[112+rsp],rbp812 813 mov ecx,DWORD[136+rdi]814 815 mov r15,QWORD[120+rdi]816 cmp ecx,edx817 818 mov rbp,QWORD[128+rdi]819 cmovg edx,ecx820 test ecx,ecx821 822 vmovdqu xmm9,XMMWORD[144+rdi]823 mov DWORD[60+rsp],ecx824 cmovle r15,rsp825 sub rbp,r15826 mov QWORD[120+rsp],rbp827 test edx,edx828 jz NEAR $L$enc8x_done829 830 vmovups xmm1,XMMWORD[((16-120))+rsi]831 vmovups xmm0,XMMWORD[((32-120))+rsi]832 mov eax,DWORD[((240-120))+rsi]833 834 vpxor xmm10,xmm15,XMMWORD[r8]835 lea rbp,[128+rsp]836 vpxor xmm11,xmm15,XMMWORD[r9]837 vpxor xmm12,xmm15,XMMWORD[r10]838 vpxor xmm13,xmm15,XMMWORD[r11]839 vpxor xmm2,xmm2,xmm10840 vpxor xmm10,xmm15,XMMWORD[r12]841 vpxor xmm3,xmm3,xmm11842 vpxor xmm11,xmm15,XMMWORD[r13]843 vpxor xmm4,xmm4,xmm12844 vpxor xmm12,xmm15,XMMWORD[r14]845 vpxor xmm5,xmm5,xmm13846 vpxor xmm13,xmm15,XMMWORD[r15]847 vpxor xmm6,xmm6,xmm10848 mov ecx,1849 vpxor xmm7,xmm7,xmm11850 vpxor xmm8,xmm8,xmm12851 vpxor xmm9,xmm9,xmm13852 jmp NEAR $L$oop_enc8x853 854 ALIGN 32855 $L$oop_enc8x:856 vaesenc xmm2,xmm2,xmm1857 cmp ecx,DWORD[((32+0))+rsp]858 vaesenc xmm3,xmm3,xmm1859 prefetcht0 [31+r8]860 vaesenc xmm4,xmm4,xmm1861 vaesenc xmm5,xmm5,xmm1862 lea rbx,[rbx*1+r8]863 cmovge r8,rsp864 vaesenc xmm6,xmm6,xmm1865 cmovg rbx,rsp866 vaesenc xmm7,xmm7,xmm1867 sub rbx,r8868 vaesenc xmm8,xmm8,xmm1869 vpxor xmm10,xmm15,XMMWORD[16+r8]870 mov QWORD[((64+0))+rsp],rbx871 vaesenc xmm9,xmm9,xmm1872 vmovups xmm1,XMMWORD[((-72))+rsi]873 lea r8,[16+rbx*1+r8]874 vmovdqu XMMWORD[rbp],xmm10875 vaesenc xmm2,xmm2,xmm0876 cmp ecx,DWORD[((32+4))+rsp]877 mov rbx,QWORD[((64+8))+rsp]878 vaesenc xmm3,xmm3,xmm0879 prefetcht0 [31+r9]880 vaesenc xmm4,xmm4,xmm0881 vaesenc xmm5,xmm5,xmm0882 lea rbx,[rbx*1+r9]883 cmovge r9,rsp884 vaesenc xmm6,xmm6,xmm0885 cmovg rbx,rsp886 vaesenc xmm7,xmm7,xmm0887 sub rbx,r9888 vaesenc xmm8,xmm8,xmm0889 vpxor xmm11,xmm15,XMMWORD[16+r9]890 mov QWORD[((64+8))+rsp],rbx891 vaesenc xmm9,xmm9,xmm0892 vmovups xmm0,XMMWORD[((-56))+rsi]893 lea r9,[16+rbx*1+r9]894 vmovdqu XMMWORD[16+rbp],xmm11895 vaesenc xmm2,xmm2,xmm1896 cmp ecx,DWORD[((32+8))+rsp]897 mov rbx,QWORD[((64+16))+rsp]898 vaesenc xmm3,xmm3,xmm1899 prefetcht0 [31+r10]900 vaesenc xmm4,xmm4,xmm1901 prefetcht0 [15+r8]902 vaesenc xmm5,xmm5,xmm1903 lea rbx,[rbx*1+r10]904 cmovge r10,rsp905 vaesenc xmm6,xmm6,xmm1906 cmovg rbx,rsp907 vaesenc xmm7,xmm7,xmm1908 sub rbx,r10909 vaesenc xmm8,xmm8,xmm1910 vpxor xmm12,xmm15,XMMWORD[16+r10]911 mov QWORD[((64+16))+rsp],rbx912 vaesenc xmm9,xmm9,xmm1913 vmovups xmm1,XMMWORD[((-40))+rsi]914 lea r10,[16+rbx*1+r10]915 vmovdqu XMMWORD[32+rbp],xmm12916 vaesenc xmm2,xmm2,xmm0917 cmp ecx,DWORD[((32+12))+rsp]918 mov rbx,QWORD[((64+24))+rsp]919 vaesenc xmm3,xmm3,xmm0920 prefetcht0 [31+r11]921 vaesenc xmm4,xmm4,xmm0922 prefetcht0 [15+r9]923 vaesenc xmm5,xmm5,xmm0924 lea rbx,[rbx*1+r11]925 cmovge r11,rsp926 vaesenc xmm6,xmm6,xmm0927 cmovg rbx,rsp928 vaesenc xmm7,xmm7,xmm0929 sub rbx,r11930 vaesenc xmm8,xmm8,xmm0931 vpxor xmm13,xmm15,XMMWORD[16+r11]932 mov QWORD[((64+24))+rsp],rbx933 vaesenc xmm9,xmm9,xmm0934 vmovups xmm0,XMMWORD[((-24))+rsi]935 lea r11,[16+rbx*1+r11]936 vmovdqu XMMWORD[48+rbp],xmm13937 vaesenc xmm2,xmm2,xmm1938 cmp ecx,DWORD[((32+16))+rsp]939 mov rbx,QWORD[((64+32))+rsp]940 vaesenc xmm3,xmm3,xmm1941 prefetcht0 [31+r12]942 vaesenc xmm4,xmm4,xmm1943 prefetcht0 [15+r10]944 vaesenc xmm5,xmm5,xmm1945 lea rbx,[rbx*1+r12]946 cmovge r12,rsp947 vaesenc xmm6,xmm6,xmm1948 cmovg rbx,rsp949 vaesenc xmm7,xmm7,xmm1950 sub rbx,r12951 vaesenc xmm8,xmm8,xmm1952 vpxor xmm10,xmm15,XMMWORD[16+r12]953 mov QWORD[((64+32))+rsp],rbx954 vaesenc xmm9,xmm9,xmm1955 vmovups xmm1,XMMWORD[((-8))+rsi]956 lea r12,[16+rbx*1+r12]957 vaesenc xmm2,xmm2,xmm0958 cmp ecx,DWORD[((32+20))+rsp]959 mov rbx,QWORD[((64+40))+rsp]960 vaesenc xmm3,xmm3,xmm0961 prefetcht0 [31+r13]962 vaesenc xmm4,xmm4,xmm0963 prefetcht0 [15+r11]964 vaesenc xmm5,xmm5,xmm0965 lea rbx,[r13*1+rbx]966 cmovge r13,rsp967 vaesenc xmm6,xmm6,xmm0968 cmovg rbx,rsp969 vaesenc xmm7,xmm7,xmm0970 sub rbx,r13971 vaesenc xmm8,xmm8,xmm0972 vpxor xmm11,xmm15,XMMWORD[16+r13]973 mov QWORD[((64+40))+rsp],rbx974 vaesenc xmm9,xmm9,xmm0975 vmovups xmm0,XMMWORD[8+rsi]976 lea r13,[16+rbx*1+r13]977 vaesenc xmm2,xmm2,xmm1978 cmp ecx,DWORD[((32+24))+rsp]979 mov rbx,QWORD[((64+48))+rsp]980 vaesenc xmm3,xmm3,xmm1981 prefetcht0 [31+r14]982 vaesenc xmm4,xmm4,xmm1983 prefetcht0 [15+r12]984 vaesenc xmm5,xmm5,xmm1985 lea rbx,[rbx*1+r14]986 cmovge r14,rsp987 vaesenc xmm6,xmm6,xmm1988 cmovg rbx,rsp989 vaesenc xmm7,xmm7,xmm1990 sub rbx,r14991 vaesenc xmm8,xmm8,xmm1992 vpxor xmm12,xmm15,XMMWORD[16+r14]993 mov QWORD[((64+48))+rsp],rbx994 vaesenc xmm9,xmm9,xmm1995 vmovups xmm1,XMMWORD[24+rsi]996 lea r14,[16+rbx*1+r14]997 vaesenc xmm2,xmm2,xmm0998 cmp ecx,DWORD[((32+28))+rsp]999 mov rbx,QWORD[((64+56))+rsp]1000 vaesenc xmm3,xmm3,xmm01001 prefetcht0 [31+r15]1002 vaesenc xmm4,xmm4,xmm01003 prefetcht0 [15+r13]1004 vaesenc xmm5,xmm5,xmm01005 lea rbx,[rbx*1+r15]1006 cmovge r15,rsp1007 vaesenc xmm6,xmm6,xmm01008 cmovg rbx,rsp1009 vaesenc xmm7,xmm7,xmm01010 sub rbx,r151011 vaesenc xmm8,xmm8,xmm01012 vpxor xmm13,xmm15,XMMWORD[16+r15]1013 mov QWORD[((64+56))+rsp],rbx1014 vaesenc xmm9,xmm9,xmm01015 vmovups xmm0,XMMWORD[40+rsi]1016 lea r15,[16+rbx*1+r15]1017 vmovdqu xmm14,XMMWORD[32+rsp]1018 prefetcht0 [15+r14]1019 prefetcht0 [15+r15]1020 cmp eax,111021 jb NEAR $L$enc8x_tail1022 1023 vaesenc xmm2,xmm2,xmm11024 vaesenc xmm3,xmm3,xmm11025 vaesenc xmm4,xmm4,xmm11026 vaesenc xmm5,xmm5,xmm11027 vaesenc xmm6,xmm6,xmm11028 vaesenc xmm7,xmm7,xmm11029 vaesenc xmm8,xmm8,xmm11030 vaesenc xmm9,xmm9,xmm11031 vmovups xmm1,XMMWORD[((176-120))+rsi]1032 1033 vaesenc xmm2,xmm2,xmm01034 vaesenc xmm3,xmm3,xmm01035 vaesenc xmm4,xmm4,xmm01036 vaesenc xmm5,xmm5,xmm01037 vaesenc xmm6,xmm6,xmm01038 vaesenc xmm7,xmm7,xmm01039 vaesenc xmm8,xmm8,xmm01040 vaesenc xmm9,xmm9,xmm01041 vmovups xmm0,XMMWORD[((192-120))+rsi]1042 je NEAR $L$enc8x_tail1043 1044 vaesenc xmm2,xmm2,xmm11045 vaesenc xmm3,xmm3,xmm11046 vaesenc xmm4,xmm4,xmm11047 vaesenc xmm5,xmm5,xmm11048 vaesenc xmm6,xmm6,xmm11049 vaesenc xmm7,xmm7,xmm11050 vaesenc xmm8,xmm8,xmm11051 vaesenc xmm9,xmm9,xmm11052 vmovups xmm1,XMMWORD[((208-120))+rsi]1053 1054 vaesenc xmm2,xmm2,xmm01055 vaesenc xmm3,xmm3,xmm01056 vaesenc xmm4,xmm4,xmm01057 vaesenc xmm5,xmm5,xmm01058 vaesenc xmm6,xmm6,xmm01059 vaesenc xmm7,xmm7,xmm01060 vaesenc xmm8,xmm8,xmm01061 vaesenc xmm9,xmm9,xmm01062 vmovups xmm0,XMMWORD[((224-120))+rsi]1063 1064 $L$enc8x_tail:1065 vaesenc xmm2,xmm2,xmm11066 vpxor xmm15,xmm15,xmm151067 vaesenc xmm3,xmm3,xmm11068 vaesenc xmm4,xmm4,xmm11069 vpcmpgtd xmm15,xmm14,xmm151070 vaesenc xmm5,xmm5,xmm11071 vaesenc xmm6,xmm6,xmm11072 vpaddd xmm15,xmm15,xmm141073 vmovdqu xmm14,XMMWORD[48+rsp]1074 vaesenc xmm7,xmm7,xmm11075 mov rbx,QWORD[64+rsp]1076 vaesenc xmm8,xmm8,xmm11077 vaesenc xmm9,xmm9,xmm11078 vmovups xmm1,XMMWORD[((16-120))+rsi]1079 1080 vaesenclast xmm2,xmm2,xmm01081 vmovdqa XMMWORD[32+rsp],xmm151082 vpxor xmm15,xmm15,xmm151083 vaesenclast xmm3,xmm3,xmm01084 vaesenclast xmm4,xmm4,xmm01085 vpcmpgtd xmm15,xmm14,xmm151086 vaesenclast xmm5,xmm5,xmm01087 vaesenclast xmm6,xmm6,xmm01088 vpaddd xmm14,xmm14,xmm151089 vmovdqu xmm15,XMMWORD[((-120))+rsi]1090 vaesenclast xmm7,xmm7,xmm01091 vaesenclast xmm8,xmm8,xmm01092 vmovdqa XMMWORD[48+rsp],xmm141093 vaesenclast xmm9,xmm9,xmm01094 vmovups xmm0,XMMWORD[((32-120))+rsi]1095 1096 vmovups XMMWORD[(-16)+r8],xmm21097 sub r8,rbx1098 vpxor xmm2,xmm2,XMMWORD[rbp]1099 vmovups XMMWORD[(-16)+r9],xmm31100 sub r9,QWORD[72+rsp]1101 vpxor xmm3,xmm3,XMMWORD[16+rbp]1102 vmovups XMMWORD[(-16)+r10],xmm41103 sub r10,QWORD[80+rsp]1104 vpxor xmm4,xmm4,XMMWORD[32+rbp]1105 vmovups XMMWORD[(-16)+r11],xmm51106 sub r11,QWORD[88+rsp]1107 vpxor xmm5,xmm5,XMMWORD[48+rbp]1108 vmovups XMMWORD[(-16)+r12],xmm61109 sub r12,QWORD[96+rsp]1110 vpxor xmm6,xmm6,xmm101111 vmovups XMMWORD[(-16)+r13],xmm71112 sub r13,QWORD[104+rsp]1113 vpxor xmm7,xmm7,xmm111114 vmovups XMMWORD[(-16)+r14],xmm81115 sub r14,QWORD[112+rsp]1116 vpxor xmm8,xmm8,xmm121117 vmovups XMMWORD[(-16)+r15],xmm91118 sub r15,QWORD[120+rsp]1119 vpxor xmm9,xmm9,xmm131120 1121 dec edx1122 jnz NEAR $L$oop_enc8x1123 1124 mov rax,QWORD[16+rsp]1125 1126 1127 1128 1129 1130 1131 $L$enc8x_done:1132 vzeroupper1133 movaps xmm6,XMMWORD[((-216))+rax]1134 movaps xmm7,XMMWORD[((-200))+rax]1135 movaps xmm8,XMMWORD[((-184))+rax]1136 movaps xmm9,XMMWORD[((-168))+rax]1137 movaps xmm10,XMMWORD[((-152))+rax]1138 movaps xmm11,XMMWORD[((-136))+rax]1139 movaps xmm12,XMMWORD[((-120))+rax]1140 movaps xmm13,XMMWORD[((-104))+rax]1141 movaps xmm14,XMMWORD[((-88))+rax]1142 movaps xmm15,XMMWORD[((-72))+rax]1143 mov r15,QWORD[((-48))+rax]1144 1145 mov r14,QWORD[((-40))+rax]1146 1147 mov r13,QWORD[((-32))+rax]1148 1149 mov r12,QWORD[((-24))+rax]1150 1151 mov rbp,QWORD[((-16))+rax]1152 1153 mov rbx,QWORD[((-8))+rax]1154 1155 lea rsp,[rax]1156 1157 $L$enc8x_epilogue:1158 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1159 mov rsi,QWORD[16+rsp]1160 DB 0F3h,0C3h ;repret1161 1162 $L$SEH_end_aesni_multi_cbc_encrypt_avx:1163 1164 1165 ALIGN 321166 aesni_multi_cbc_decrypt_avx:1167 mov QWORD[8+rsp],rdi ;WIN64 prologue1168 mov QWORD[16+rsp],rsi1169 mov rax,rsp1170 $L$SEH_begin_aesni_multi_cbc_decrypt_avx:1171 mov rdi,rcx1172 mov rsi,rdx1173 mov rdx,r81174 1175 1176 1177 _avx_cbc_dec_shortcut:1178 mov rax,rsp1179 1180 push rbx1181 1182 push rbp1183 1184 push r121185 1186 push r131187 1188 push r141189 1190 push r151191 1192 lea rsp,[((-168))+rsp]1193 movaps XMMWORD[rsp],xmm61194 movaps XMMWORD[16+rsp],xmm71195 movaps XMMWORD[32+rsp],xmm81196 movaps XMMWORD[48+rsp],xmm91197 movaps XMMWORD[64+rsp],xmm101198 movaps XMMWORD[80+rsp],xmm111199 movaps XMMWORD[(-120)+rax],xmm121200 movaps XMMWORD[(-104)+rax],xmm131201 movaps XMMWORD[(-88)+rax],xmm141202 movaps XMMWORD[(-72)+rax],xmm151203 1204 1205 1206 1207 1208 1209 1210 1211 1212 sub rsp,2561213 and rsp,-2561214 sub rsp,1921215 mov QWORD[16+rsp],rax1216 1217 1218 $L$dec8x_body:1219 vzeroupper1220 vmovdqu xmm15,XMMWORD[rsi]1221 lea rsi,[120+rsi]1222 lea rdi,[160+rdi]1223 shr edx,11224 1225 $L$dec8x_loop_grande:1226 1227 xor edx,edx1228 1229 mov ecx,DWORD[((-144))+rdi]1230 1231 mov r8,QWORD[((-160))+rdi]1232 cmp ecx,edx1233 1234 mov rbx,QWORD[((-152))+rdi]1235 cmovg edx,ecx1236 test ecx,ecx1237 1238 vmovdqu xmm2,XMMWORD[((-136))+rdi]1239 mov DWORD[32+rsp],ecx1240 cmovle r8,rsp1241 sub rbx,r81242 mov QWORD[64+rsp],rbx1243 vmovdqu XMMWORD[192+rsp],xmm21244 1245 mov ecx,DWORD[((-104))+rdi]1246 1247 mov r9,QWORD[((-120))+rdi]1248 cmp ecx,edx1249 1250 mov rbp,QWORD[((-112))+rdi]1251 cmovg edx,ecx1252 test ecx,ecx1253 1254 vmovdqu xmm3,XMMWORD[((-96))+rdi]1255 mov DWORD[36+rsp],ecx1256 cmovle r9,rsp1257 sub rbp,r91258 mov QWORD[72+rsp],rbp1259 vmovdqu XMMWORD[208+rsp],xmm31260 1261 mov ecx,DWORD[((-64))+rdi]1262 1263 mov r10,QWORD[((-80))+rdi]1264 cmp ecx,edx1265 1266 mov rbp,QWORD[((-72))+rdi]1267 cmovg edx,ecx1268 test ecx,ecx1269 1270 vmovdqu xmm4,XMMWORD[((-56))+rdi]1271 mov DWORD[40+rsp],ecx1272 cmovle r10,rsp1273 sub rbp,r101274 mov QWORD[80+rsp],rbp1275 vmovdqu XMMWORD[224+rsp],xmm41276 1277 mov ecx,DWORD[((-24))+rdi]1278 1279 mov r11,QWORD[((-40))+rdi]1280 cmp ecx,edx1281 1282 mov rbp,QWORD[((-32))+rdi]1283 cmovg edx,ecx1284 test ecx,ecx1285 1286 vmovdqu xmm5,XMMWORD[((-16))+rdi]1287 mov DWORD[44+rsp],ecx1288 cmovle r11,rsp1289 sub rbp,r111290 mov QWORD[88+rsp],rbp1291 vmovdqu XMMWORD[240+rsp],xmm51292 1293 mov ecx,DWORD[16+rdi]1294 1295 mov r12,QWORD[rdi]1296 cmp ecx,edx1297 1298 mov rbp,QWORD[8+rdi]1299 cmovg edx,ecx1300 test ecx,ecx1301 1302 vmovdqu xmm6,XMMWORD[24+rdi]1303 mov DWORD[48+rsp],ecx1304 cmovle r12,rsp1305 sub rbp,r121306 mov QWORD[96+rsp],rbp1307 vmovdqu XMMWORD[256+rsp],xmm61308 1309 mov ecx,DWORD[56+rdi]1310 1311 mov r13,QWORD[40+rdi]1312 cmp ecx,edx1313 1314 mov rbp,QWORD[48+rdi]1315 cmovg edx,ecx1316 test ecx,ecx1317 1318 vmovdqu xmm7,XMMWORD[64+rdi]1319 mov DWORD[52+rsp],ecx1320 cmovle r13,rsp1321 sub rbp,r131322 mov QWORD[104+rsp],rbp1323 vmovdqu XMMWORD[272+rsp],xmm71324 1325 mov ecx,DWORD[96+rdi]1326 1327 mov r14,QWORD[80+rdi]1328 cmp ecx,edx1329 1330 mov rbp,QWORD[88+rdi]1331 cmovg edx,ecx1332 test ecx,ecx1333 1334 vmovdqu xmm8,XMMWORD[104+rdi]1335 mov DWORD[56+rsp],ecx1336 cmovle r14,rsp1337 sub rbp,r141338 mov QWORD[112+rsp],rbp1339 vmovdqu XMMWORD[288+rsp],xmm81340 1341 mov ecx,DWORD[136+rdi]1342 1343 mov r15,QWORD[120+rdi]1344 cmp ecx,edx1345 1346 mov rbp,QWORD[128+rdi]1347 cmovg edx,ecx1348 test ecx,ecx1349 1350 vmovdqu xmm9,XMMWORD[144+rdi]1351 mov DWORD[60+rsp],ecx1352 cmovle r15,rsp1353 sub rbp,r151354 mov QWORD[120+rsp],rbp1355 vmovdqu XMMWORD[304+rsp],xmm91356 test edx,edx1357 jz NEAR $L$dec8x_done1358 1359 vmovups xmm1,XMMWORD[((16-120))+rsi]1360 vmovups xmm0,XMMWORD[((32-120))+rsi]1361 mov eax,DWORD[((240-120))+rsi]1362 lea rbp,[((192+128))+rsp]1363 1364 vmovdqu xmm2,XMMWORD[r8]1365 vmovdqu xmm3,XMMWORD[r9]1366 vmovdqu xmm4,XMMWORD[r10]1367 vmovdqu xmm5,XMMWORD[r11]1368 vmovdqu xmm6,XMMWORD[r12]1369 vmovdqu xmm7,XMMWORD[r13]1370 vmovdqu xmm8,XMMWORD[r14]1371 vmovdqu xmm9,XMMWORD[r15]1372 vmovdqu XMMWORD[rbp],xmm21373 vpxor xmm2,xmm2,xmm151374 vmovdqu XMMWORD[16+rbp],xmm31375 vpxor xmm3,xmm3,xmm151376 vmovdqu XMMWORD[32+rbp],xmm41377 vpxor xmm4,xmm4,xmm151378 vmovdqu XMMWORD[48+rbp],xmm51379 vpxor xmm5,xmm5,xmm151380 vmovdqu XMMWORD[64+rbp],xmm61381 vpxor xmm6,xmm6,xmm151382 vmovdqu XMMWORD[80+rbp],xmm71383 vpxor xmm7,xmm7,xmm151384 vmovdqu XMMWORD[96+rbp],xmm81385 vpxor xmm8,xmm8,xmm151386 vmovdqu XMMWORD[112+rbp],xmm91387 vpxor xmm9,xmm9,xmm151388 xor rbp,0x801389 mov ecx,11390 jmp NEAR $L$oop_dec8x1391 1392 ALIGN 321393 $L$oop_dec8x:1394 vaesdec xmm2,xmm2,xmm11395 cmp ecx,DWORD[((32+0))+rsp]1396 vaesdec xmm3,xmm3,xmm11397 prefetcht0 [31+r8]1398 vaesdec xmm4,xmm4,xmm11399 vaesdec xmm5,xmm5,xmm11400 lea rbx,[rbx*1+r8]1401 cmovge r8,rsp1402 vaesdec xmm6,xmm6,xmm11403 cmovg rbx,rsp1404 vaesdec xmm7,xmm7,xmm11405 sub rbx,r81406 vaesdec xmm8,xmm8,xmm11407 vmovdqu xmm10,XMMWORD[16+r8]1408 mov QWORD[((64+0))+rsp],rbx1409 vaesdec xmm9,xmm9,xmm11410 vmovups xmm1,XMMWORD[((-72))+rsi]1411 lea r8,[16+rbx*1+r8]1412 vmovdqu XMMWORD[128+rsp],xmm101413 vaesdec xmm2,xmm2,xmm01414 cmp ecx,DWORD[((32+4))+rsp]1415 mov rbx,QWORD[((64+8))+rsp]1416 vaesdec xmm3,xmm3,xmm01417 prefetcht0 [31+r9]1418 vaesdec xmm4,xmm4,xmm01419 vaesdec xmm5,xmm5,xmm01420 lea rbx,[rbx*1+r9]1421 cmovge r9,rsp1422 vaesdec xmm6,xmm6,xmm01423 cmovg rbx,rsp1424 vaesdec xmm7,xmm7,xmm01425 sub rbx,r91426 vaesdec xmm8,xmm8,xmm01427 vmovdqu xmm11,XMMWORD[16+r9]1428 mov QWORD[((64+8))+rsp],rbx1429 vaesdec xmm9,xmm9,xmm01430 vmovups xmm0,XMMWORD[((-56))+rsi]1431 lea r9,[16+rbx*1+r9]1432 vmovdqu XMMWORD[144+rsp],xmm111433 vaesdec xmm2,xmm2,xmm11434 cmp ecx,DWORD[((32+8))+rsp]1435 mov rbx,QWORD[((64+16))+rsp]1436 vaesdec xmm3,xmm3,xmm11437 prefetcht0 [31+r10]1438 vaesdec xmm4,xmm4,xmm11439 prefetcht0 [15+r8]1440 vaesdec xmm5,xmm5,xmm11441 lea rbx,[rbx*1+r10]1442 cmovge r10,rsp1443 vaesdec xmm6,xmm6,xmm11444 cmovg rbx,rsp1445 vaesdec xmm7,xmm7,xmm11446 sub rbx,r101447 vaesdec xmm8,xmm8,xmm11448 vmovdqu xmm12,XMMWORD[16+r10]1449 mov QWORD[((64+16))+rsp],rbx1450 vaesdec xmm9,xmm9,xmm11451 vmovups xmm1,XMMWORD[((-40))+rsi]1452 lea r10,[16+rbx*1+r10]1453 vmovdqu XMMWORD[160+rsp],xmm121454 vaesdec xmm2,xmm2,xmm01455 cmp ecx,DWORD[((32+12))+rsp]1456 mov rbx,QWORD[((64+24))+rsp]1457 vaesdec xmm3,xmm3,xmm01458 prefetcht0 [31+r11]1459 vaesdec xmm4,xmm4,xmm01460 prefetcht0 [15+r9]1461 vaesdec xmm5,xmm5,xmm01462 lea rbx,[rbx*1+r11]1463 cmovge r11,rsp1464 vaesdec xmm6,xmm6,xmm01465 cmovg rbx,rsp1466 vaesdec xmm7,xmm7,xmm01467 sub rbx,r111468 vaesdec xmm8,xmm8,xmm01469 vmovdqu xmm13,XMMWORD[16+r11]1470 mov QWORD[((64+24))+rsp],rbx1471 vaesdec xmm9,xmm9,xmm01472 vmovups xmm0,XMMWORD[((-24))+rsi]1473 lea r11,[16+rbx*1+r11]1474 vmovdqu XMMWORD[176+rsp],xmm131475 vaesdec xmm2,xmm2,xmm11476 cmp ecx,DWORD[((32+16))+rsp]1477 mov rbx,QWORD[((64+32))+rsp]1478 vaesdec xmm3,xmm3,xmm11479 prefetcht0 [31+r12]1480 vaesdec xmm4,xmm4,xmm11481 prefetcht0 [15+r10]1482 vaesdec xmm5,xmm5,xmm11483 lea rbx,[rbx*1+r12]1484 cmovge r12,rsp1485 vaesdec xmm6,xmm6,xmm11486 cmovg rbx,rsp1487 vaesdec xmm7,xmm7,xmm11488 sub rbx,r121489 vaesdec xmm8,xmm8,xmm11490 vmovdqu xmm10,XMMWORD[16+r12]1491 mov QWORD[((64+32))+rsp],rbx1492 vaesdec xmm9,xmm9,xmm11493 vmovups xmm1,XMMWORD[((-8))+rsi]1494 lea r12,[16+rbx*1+r12]1495 vaesdec xmm2,xmm2,xmm01496 cmp ecx,DWORD[((32+20))+rsp]1497 mov rbx,QWORD[((64+40))+rsp]1498 vaesdec xmm3,xmm3,xmm01499 prefetcht0 [31+r13]1500 vaesdec xmm4,xmm4,xmm01501 prefetcht0 [15+r11]1502 vaesdec xmm5,xmm5,xmm01503 lea rbx,[r13*1+rbx]1504 cmovge r13,rsp1505 vaesdec xmm6,xmm6,xmm01506 cmovg rbx,rsp1507 vaesdec xmm7,xmm7,xmm01508 sub rbx,r131509 vaesdec xmm8,xmm8,xmm01510 vmovdqu xmm11,XMMWORD[16+r13]1511 mov QWORD[((64+40))+rsp],rbx1512 vaesdec xmm9,xmm9,xmm01513 vmovups xmm0,XMMWORD[8+rsi]1514 lea r13,[16+rbx*1+r13]1515 vaesdec xmm2,xmm2,xmm11516 cmp ecx,DWORD[((32+24))+rsp]1517 mov rbx,QWORD[((64+48))+rsp]1518 vaesdec xmm3,xmm3,xmm11519 prefetcht0 [31+r14]1520 vaesdec xmm4,xmm4,xmm11521 prefetcht0 [15+r12]1522 vaesdec xmm5,xmm5,xmm11523 lea rbx,[rbx*1+r14]1524 cmovge r14,rsp1525 vaesdec xmm6,xmm6,xmm11526 cmovg rbx,rsp1527 vaesdec xmm7,xmm7,xmm11528 sub rbx,r141529 vaesdec xmm8,xmm8,xmm11530 vmovdqu xmm12,XMMWORD[16+r14]1531 mov QWORD[((64+48))+rsp],rbx1532 vaesdec xmm9,xmm9,xmm11533 vmovups xmm1,XMMWORD[24+rsi]1534 lea r14,[16+rbx*1+r14]1535 vaesdec xmm2,xmm2,xmm01536 cmp ecx,DWORD[((32+28))+rsp]1537 mov rbx,QWORD[((64+56))+rsp]1538 vaesdec xmm3,xmm3,xmm01539 prefetcht0 [31+r15]1540 vaesdec xmm4,xmm4,xmm01541 prefetcht0 [15+r13]1542 vaesdec xmm5,xmm5,xmm01543 lea rbx,[rbx*1+r15]1544 cmovge r15,rsp1545 vaesdec xmm6,xmm6,xmm01546 cmovg rbx,rsp1547 vaesdec xmm7,xmm7,xmm01548 sub rbx,r151549 vaesdec xmm8,xmm8,xmm01550 vmovdqu xmm13,XMMWORD[16+r15]1551 mov QWORD[((64+56))+rsp],rbx1552 vaesdec xmm9,xmm9,xmm01553 vmovups xmm0,XMMWORD[40+rsi]1554 lea r15,[16+rbx*1+r15]1555 vmovdqu xmm14,XMMWORD[32+rsp]1556 prefetcht0 [15+r14]1557 prefetcht0 [15+r15]1558 cmp eax,111559 jb NEAR $L$dec8x_tail1560 1561 vaesdec xmm2,xmm2,xmm11562 vaesdec xmm3,xmm3,xmm11563 vaesdec xmm4,xmm4,xmm11564 vaesdec xmm5,xmm5,xmm11565 vaesdec xmm6,xmm6,xmm11566 vaesdec xmm7,xmm7,xmm11567 vaesdec xmm8,xmm8,xmm11568 vaesdec xmm9,xmm9,xmm11569 vmovups xmm1,XMMWORD[((176-120))+rsi]1570 1571 vaesdec xmm2,xmm2,xmm01572 vaesdec xmm3,xmm3,xmm01573 vaesdec xmm4,xmm4,xmm01574 vaesdec xmm5,xmm5,xmm01575 vaesdec xmm6,xmm6,xmm01576 vaesdec xmm7,xmm7,xmm01577 vaesdec xmm8,xmm8,xmm01578 vaesdec xmm9,xmm9,xmm01579 vmovups xmm0,XMMWORD[((192-120))+rsi]1580 je NEAR $L$dec8x_tail1581 1582 vaesdec xmm2,xmm2,xmm11583 vaesdec xmm3,xmm3,xmm11584 vaesdec xmm4,xmm4,xmm11585 vaesdec xmm5,xmm5,xmm11586 vaesdec xmm6,xmm6,xmm11587 vaesdec xmm7,xmm7,xmm11588 vaesdec xmm8,xmm8,xmm11589 vaesdec xmm9,xmm9,xmm11590 vmovups xmm1,XMMWORD[((208-120))+rsi]1591 1592 vaesdec xmm2,xmm2,xmm01593 vaesdec xmm3,xmm3,xmm01594 vaesdec xmm4,xmm4,xmm01595 vaesdec xmm5,xmm5,xmm01596 vaesdec xmm6,xmm6,xmm01597 vaesdec xmm7,xmm7,xmm01598 vaesdec xmm8,xmm8,xmm01599 vaesdec xmm9,xmm9,xmm01600 vmovups xmm0,XMMWORD[((224-120))+rsi]1601 1602 $L$dec8x_tail:1603 vaesdec xmm2,xmm2,xmm11604 vpxor xmm15,xmm15,xmm151605 vaesdec xmm3,xmm3,xmm11606 vaesdec xmm4,xmm4,xmm11607 vpcmpgtd xmm15,xmm14,xmm151608 vaesdec xmm5,xmm5,xmm11609 vaesdec xmm6,xmm6,xmm11610 vpaddd xmm15,xmm15,xmm141611 vmovdqu xmm14,XMMWORD[48+rsp]1612 vaesdec xmm7,xmm7,xmm11613 mov rbx,QWORD[64+rsp]1614 vaesdec xmm8,xmm8,xmm11615 vaesdec xmm9,xmm9,xmm11616 vmovups xmm1,XMMWORD[((16-120))+rsi]1617 1618 vaesdeclast xmm2,xmm2,xmm01619 vmovdqa XMMWORD[32+rsp],xmm151620 vpxor xmm15,xmm15,xmm151621 vaesdeclast xmm3,xmm3,xmm01622 vpxor xmm2,xmm2,XMMWORD[rbp]1623 vaesdeclast xmm4,xmm4,xmm01624 vpxor xmm3,xmm3,XMMWORD[16+rbp]1625 vpcmpgtd xmm15,xmm14,xmm151626 vaesdeclast xmm5,xmm5,xmm01627 vpxor xmm4,xmm4,XMMWORD[32+rbp]1628 vaesdeclast xmm6,xmm6,xmm01629 vpxor xmm5,xmm5,XMMWORD[48+rbp]1630 vpaddd xmm14,xmm14,xmm151631 vmovdqu xmm15,XMMWORD[((-120))+rsi]1632 vaesdeclast xmm7,xmm7,xmm01633 vpxor xmm6,xmm6,XMMWORD[64+rbp]1634 vaesdeclast xmm8,xmm8,xmm01635 vpxor xmm7,xmm7,XMMWORD[80+rbp]1636 vmovdqa XMMWORD[48+rsp],xmm141637 vaesdeclast xmm9,xmm9,xmm01638 vpxor xmm8,xmm8,XMMWORD[96+rbp]1639 vmovups xmm0,XMMWORD[((32-120))+rsi]1640 1641 vmovups XMMWORD[(-16)+r8],xmm21642 sub r8,rbx1643 vmovdqu xmm2,XMMWORD[((128+0))+rsp]1644 vpxor xmm9,xmm9,XMMWORD[112+rbp]1645 vmovups XMMWORD[(-16)+r9],xmm31646 sub r9,QWORD[72+rsp]1647 vmovdqu XMMWORD[rbp],xmm21648 vpxor xmm2,xmm2,xmm151649 vmovdqu xmm3,XMMWORD[((128+16))+rsp]1650 vmovups XMMWORD[(-16)+r10],xmm41651 sub r10,QWORD[80+rsp]1652 vmovdqu XMMWORD[16+rbp],xmm31653 vpxor xmm3,xmm3,xmm151654 vmovdqu xmm4,XMMWORD[((128+32))+rsp]1655 vmovups XMMWORD[(-16)+r11],xmm51656 sub r11,QWORD[88+rsp]1657 vmovdqu XMMWORD[32+rbp],xmm41658 vpxor xmm4,xmm4,xmm151659 vmovdqu xmm5,XMMWORD[((128+48))+rsp]1660 vmovups XMMWORD[(-16)+r12],xmm61661 sub r12,QWORD[96+rsp]1662 vmovdqu XMMWORD[48+rbp],xmm51663 vpxor xmm5,xmm5,xmm151664 vmovdqu XMMWORD[64+rbp],xmm101665 vpxor xmm6,xmm15,xmm101666 vmovups XMMWORD[(-16)+r13],xmm71667 sub r13,QWORD[104+rsp]1668 vmovdqu XMMWORD[80+rbp],xmm111669 vpxor xmm7,xmm15,xmm111670 vmovups XMMWORD[(-16)+r14],xmm81671 sub r14,QWORD[112+rsp]1672 vmovdqu XMMWORD[96+rbp],xmm121673 vpxor xmm8,xmm15,xmm121674 vmovups XMMWORD[(-16)+r15],xmm91675 sub r15,QWORD[120+rsp]1676 vmovdqu XMMWORD[112+rbp],xmm131677 vpxor xmm9,xmm15,xmm131678 1679 xor rbp,1281680 dec edx1681 jnz NEAR $L$oop_dec8x1682 1683 mov rax,QWORD[16+rsp]1684 1685 1686 1687 1688 1689 1690 $L$dec8x_done:1691 vzeroupper1692 movaps xmm6,XMMWORD[((-216))+rax]1693 movaps xmm7,XMMWORD[((-200))+rax]1694 movaps xmm8,XMMWORD[((-184))+rax]1695 movaps xmm9,XMMWORD[((-168))+rax]1696 movaps xmm10,XMMWORD[((-152))+rax]1697 movaps xmm11,XMMWORD[((-136))+rax]1698 movaps xmm12,XMMWORD[((-120))+rax]1699 movaps xmm13,XMMWORD[((-104))+rax]1700 movaps xmm14,XMMWORD[((-88))+rax]1701 movaps xmm15,XMMWORD[((-72))+rax]1702 mov r15,QWORD[((-48))+rax]1703 1704 mov r14,QWORD[((-40))+rax]1705 1706 mov r13,QWORD[((-32))+rax]1707 1708 mov r12,QWORD[((-24))+rax]1709 1710 mov rbp,QWORD[((-16))+rax]1711 1712 mov rbx,QWORD[((-8))+rax]1713 1714 lea rsp,[rax]1715 1716 $L$dec8x_epilogue:1717 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1718 mov rsi,QWORD[16+rsp]1719 DB 0F3h,0C3h ;repret1720 1721 $L$SEH_end_aesni_multi_cbc_decrypt_avx:1722 629 EXTERN __imp_RtlVirtualUnwind 1723 630 … … 1821 728 DD $L$SEH_end_aesni_multi_cbc_decrypt wrt ..imagebase 1822 729 DD $L$SEH_info_aesni_multi_cbc_decrypt wrt ..imagebase 1823 DD $L$SEH_begin_aesni_multi_cbc_encrypt_avx wrt ..imagebase1824 DD $L$SEH_end_aesni_multi_cbc_encrypt_avx wrt ..imagebase1825 DD $L$SEH_info_aesni_multi_cbc_encrypt_avx wrt ..imagebase1826 DD $L$SEH_begin_aesni_multi_cbc_decrypt_avx wrt ..imagebase1827 DD $L$SEH_end_aesni_multi_cbc_decrypt_avx wrt ..imagebase1828 DD $L$SEH_info_aesni_multi_cbc_decrypt_avx wrt ..imagebase1829 730 section .xdata rdata align=8 1830 731 ALIGN 8 … … 1837 738 DD se_handler wrt ..imagebase 1838 739 DD $L$dec4x_body wrt ..imagebase,$L$dec4x_epilogue wrt ..imagebase 1839 $L$SEH_info_aesni_multi_cbc_encrypt_avx:1840 DB 9,0,0,01841 DD se_handler wrt ..imagebase1842 DD $L$enc8x_body wrt ..imagebase,$L$enc8x_epilogue wrt ..imagebase1843 $L$SEH_info_aesni_multi_cbc_decrypt_avx:1844 DB 9,0,0,01845 DD se_handler wrt ..imagebase1846 DD $L$dec8x_body wrt ..imagebase,$L$dec8x_epilogue wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/aesni-sha1-x86_64.S
r94083 r95221 17 17 bt r11,61 18 18 jc NEAR aesni_cbc_sha1_enc_shaext 19 and r11d,26843545620 and r10d,107374182421 or r10d,r11d22 cmp r10d,134217728023 je NEAR aesni_cbc_sha1_enc_avx24 19 jmp NEAR aesni_cbc_sha1_enc_ssse3 25 20 DB 0F3h,0C3h ;repret … … 1432 1427 1433 1428 $L$SEH_end_aesni_cbc_sha1_enc_ssse3: 1434 1435 ALIGN 321436 aesni_cbc_sha1_enc_avx:1437 mov QWORD[8+rsp],rdi ;WIN64 prologue1438 mov QWORD[16+rsp],rsi1439 mov rax,rsp1440 $L$SEH_begin_aesni_cbc_sha1_enc_avx:1441 mov rdi,rcx1442 mov rsi,rdx1443 mov rdx,r81444 mov rcx,r91445 mov r8,QWORD[40+rsp]1446 mov r9,QWORD[48+rsp]1447 1448 1449 1450 mov r10,QWORD[56+rsp]1451 1452 1453 push rbx1454 1455 push rbp1456 1457 push r121458 1459 push r131460 1461 push r141462 1463 push r151464 1465 lea rsp,[((-264))+rsp]1466 1467 1468 1469 movaps XMMWORD[(96+0)+rsp],xmm61470 movaps XMMWORD[(96+16)+rsp],xmm71471 movaps XMMWORD[(96+32)+rsp],xmm81472 movaps XMMWORD[(96+48)+rsp],xmm91473 movaps XMMWORD[(96+64)+rsp],xmm101474 movaps XMMWORD[(96+80)+rsp],xmm111475 movaps XMMWORD[(96+96)+rsp],xmm121476 movaps XMMWORD[(96+112)+rsp],xmm131477 movaps XMMWORD[(96+128)+rsp],xmm141478 movaps XMMWORD[(96+144)+rsp],xmm151479 $L$prologue_avx:1480 vzeroall1481 mov r12,rdi1482 mov r13,rsi1483 mov r14,rdx1484 lea r15,[112+rcx]1485 vmovdqu xmm12,XMMWORD[r8]1486 mov QWORD[88+rsp],r81487 shl r14,61488 sub r13,r121489 mov r8d,DWORD[((240-112))+r15]1490 add r14,r101491 1492 lea r11,[K_XX_XX]1493 mov eax,DWORD[r9]1494 mov ebx,DWORD[4+r9]1495 mov ecx,DWORD[8+r9]1496 mov edx,DWORD[12+r9]1497 mov esi,ebx1498 mov ebp,DWORD[16+r9]1499 mov edi,ecx1500 xor edi,edx1501 and esi,edi1502 1503 vmovdqa xmm6,XMMWORD[64+r11]1504 vmovdqa xmm10,XMMWORD[r11]1505 vmovdqu xmm0,XMMWORD[r10]1506 vmovdqu xmm1,XMMWORD[16+r10]1507 vmovdqu xmm2,XMMWORD[32+r10]1508 vmovdqu xmm3,XMMWORD[48+r10]1509 vpshufb xmm0,xmm0,xmm61510 add r10,641511 vpshufb xmm1,xmm1,xmm61512 vpshufb xmm2,xmm2,xmm61513 vpshufb xmm3,xmm3,xmm61514 vpaddd xmm4,xmm0,xmm101515 vpaddd xmm5,xmm1,xmm101516 vpaddd xmm6,xmm2,xmm101517 vmovdqa XMMWORD[rsp],xmm41518 vmovdqa XMMWORD[16+rsp],xmm51519 vmovdqa XMMWORD[32+rsp],xmm61520 vmovups xmm15,XMMWORD[((-112))+r15]1521 vmovups xmm14,XMMWORD[((16-112))+r15]1522 jmp NEAR $L$oop_avx1523 ALIGN 321524 $L$oop_avx:1525 shrd ebx,ebx,21526 vmovdqu xmm13,XMMWORD[r12]1527 vpxor xmm13,xmm13,xmm151528 vpxor xmm12,xmm12,xmm131529 vaesenc xmm12,xmm12,xmm141530 vmovups xmm15,XMMWORD[((-80))+r15]1531 xor esi,edx1532 vpalignr xmm4,xmm1,xmm0,81533 mov edi,eax1534 add ebp,DWORD[rsp]1535 vpaddd xmm9,xmm10,xmm31536 xor ebx,ecx1537 shld eax,eax,51538 vpsrldq xmm8,xmm3,41539 add ebp,esi1540 and edi,ebx1541 vpxor xmm4,xmm4,xmm01542 xor ebx,ecx1543 add ebp,eax1544 vpxor xmm8,xmm8,xmm21545 shrd eax,eax,71546 xor edi,ecx1547 mov esi,ebp1548 add edx,DWORD[4+rsp]1549 vpxor xmm4,xmm4,xmm81550 xor eax,ebx1551 shld ebp,ebp,51552 vmovdqa XMMWORD[48+rsp],xmm91553 add edx,edi1554 vaesenc xmm12,xmm12,xmm151555 vmovups xmm14,XMMWORD[((-64))+r15]1556 and esi,eax1557 vpsrld xmm8,xmm4,311558 xor eax,ebx1559 add edx,ebp1560 shrd ebp,ebp,71561 xor esi,ebx1562 vpslldq xmm9,xmm4,121563 vpaddd xmm4,xmm4,xmm41564 mov edi,edx1565 add ecx,DWORD[8+rsp]1566 xor ebp,eax1567 shld edx,edx,51568 vpor xmm4,xmm4,xmm81569 vpsrld xmm8,xmm9,301570 add ecx,esi1571 and edi,ebp1572 xor ebp,eax1573 add ecx,edx1574 vpslld xmm9,xmm9,21575 vpxor xmm4,xmm4,xmm81576 shrd edx,edx,71577 xor edi,eax1578 mov esi,ecx1579 add ebx,DWORD[12+rsp]1580 vaesenc xmm12,xmm12,xmm141581 vmovups xmm15,XMMWORD[((-48))+r15]1582 vpxor xmm4,xmm4,xmm91583 xor edx,ebp1584 shld ecx,ecx,51585 add ebx,edi1586 and esi,edx1587 xor edx,ebp1588 add ebx,ecx1589 shrd ecx,ecx,71590 xor esi,ebp1591 vpalignr xmm5,xmm2,xmm1,81592 mov edi,ebx1593 add eax,DWORD[16+rsp]1594 vpaddd xmm9,xmm10,xmm41595 xor ecx,edx1596 shld ebx,ebx,51597 vpsrldq xmm8,xmm4,41598 add eax,esi1599 and edi,ecx1600 vpxor xmm5,xmm5,xmm11601 xor ecx,edx1602 add eax,ebx1603 vpxor xmm8,xmm8,xmm31604 shrd ebx,ebx,71605 vaesenc xmm12,xmm12,xmm151606 vmovups xmm14,XMMWORD[((-32))+r15]1607 xor edi,edx1608 mov esi,eax1609 add ebp,DWORD[20+rsp]1610 vpxor xmm5,xmm5,xmm81611 xor ebx,ecx1612 shld eax,eax,51613 vmovdqa XMMWORD[rsp],xmm91614 add ebp,edi1615 and esi,ebx1616 vpsrld xmm8,xmm5,311617 xor ebx,ecx1618 add ebp,eax1619 shrd eax,eax,71620 xor esi,ecx1621 vpslldq xmm9,xmm5,121622 vpaddd xmm5,xmm5,xmm51623 mov edi,ebp1624 add edx,DWORD[24+rsp]1625 xor eax,ebx1626 shld ebp,ebp,51627 vpor xmm5,xmm5,xmm81628 vpsrld xmm8,xmm9,301629 add edx,esi1630 vaesenc xmm12,xmm12,xmm141631 vmovups xmm15,XMMWORD[((-16))+r15]1632 and edi,eax1633 xor eax,ebx1634 add edx,ebp1635 vpslld xmm9,xmm9,21636 vpxor xmm5,xmm5,xmm81637 shrd ebp,ebp,71638 xor edi,ebx1639 mov esi,edx1640 add ecx,DWORD[28+rsp]1641 vpxor xmm5,xmm5,xmm91642 xor ebp,eax1643 shld edx,edx,51644 vmovdqa xmm10,XMMWORD[16+r11]1645 add ecx,edi1646 and esi,ebp1647 xor ebp,eax1648 add ecx,edx1649 shrd edx,edx,71650 xor esi,eax1651 vpalignr xmm6,xmm3,xmm2,81652 mov edi,ecx1653 add ebx,DWORD[32+rsp]1654 vaesenc xmm12,xmm12,xmm151655 vmovups xmm14,XMMWORD[r15]1656 vpaddd xmm9,xmm10,xmm51657 xor edx,ebp1658 shld ecx,ecx,51659 vpsrldq xmm8,xmm5,41660 add ebx,esi1661 and edi,edx1662 vpxor xmm6,xmm6,xmm21663 xor edx,ebp1664 add ebx,ecx1665 vpxor xmm8,xmm8,xmm41666 shrd ecx,ecx,71667 xor edi,ebp1668 mov esi,ebx1669 add eax,DWORD[36+rsp]1670 vpxor xmm6,xmm6,xmm81671 xor ecx,edx1672 shld ebx,ebx,51673 vmovdqa XMMWORD[16+rsp],xmm91674 add eax,edi1675 and esi,ecx1676 vpsrld xmm8,xmm6,311677 xor ecx,edx1678 add eax,ebx1679 shrd ebx,ebx,71680 vaesenc xmm12,xmm12,xmm141681 vmovups xmm15,XMMWORD[16+r15]1682 xor esi,edx1683 vpslldq xmm9,xmm6,121684 vpaddd xmm6,xmm6,xmm61685 mov edi,eax1686 add ebp,DWORD[40+rsp]1687 xor ebx,ecx1688 shld eax,eax,51689 vpor xmm6,xmm6,xmm81690 vpsrld xmm8,xmm9,301691 add ebp,esi1692 and edi,ebx1693 xor ebx,ecx1694 add ebp,eax1695 vpslld xmm9,xmm9,21696 vpxor xmm6,xmm6,xmm81697 shrd eax,eax,71698 xor edi,ecx1699 mov esi,ebp1700 add edx,DWORD[44+rsp]1701 vpxor xmm6,xmm6,xmm91702 xor eax,ebx1703 shld ebp,ebp,51704 add edx,edi1705 vaesenc xmm12,xmm12,xmm151706 vmovups xmm14,XMMWORD[32+r15]1707 and esi,eax1708 xor eax,ebx1709 add edx,ebp1710 shrd ebp,ebp,71711 xor esi,ebx1712 vpalignr xmm7,xmm4,xmm3,81713 mov edi,edx1714 add ecx,DWORD[48+rsp]1715 vpaddd xmm9,xmm10,xmm61716 xor ebp,eax1717 shld edx,edx,51718 vpsrldq xmm8,xmm6,41719 add ecx,esi1720 and edi,ebp1721 vpxor xmm7,xmm7,xmm31722 xor ebp,eax1723 add ecx,edx1724 vpxor xmm8,xmm8,xmm51725 shrd edx,edx,71726 xor edi,eax1727 mov esi,ecx1728 add ebx,DWORD[52+rsp]1729 vaesenc xmm12,xmm12,xmm141730 vmovups xmm15,XMMWORD[48+r15]1731 vpxor xmm7,xmm7,xmm81732 xor edx,ebp1733 shld ecx,ecx,51734 vmovdqa XMMWORD[32+rsp],xmm91735 add ebx,edi1736 and esi,edx1737 vpsrld xmm8,xmm7,311738 xor edx,ebp1739 add ebx,ecx1740 shrd ecx,ecx,71741 xor esi,ebp1742 vpslldq xmm9,xmm7,121743 vpaddd xmm7,xmm7,xmm71744 mov edi,ebx1745 add eax,DWORD[56+rsp]1746 xor ecx,edx1747 shld ebx,ebx,51748 vpor xmm7,xmm7,xmm81749 vpsrld xmm8,xmm9,301750 add eax,esi1751 and edi,ecx1752 xor ecx,edx1753 add eax,ebx1754 vpslld xmm9,xmm9,21755 vpxor xmm7,xmm7,xmm81756 shrd ebx,ebx,71757 cmp r8d,111758 jb NEAR $L$vaesenclast61759 vaesenc xmm12,xmm12,xmm151760 vmovups xmm14,XMMWORD[64+r15]1761 vaesenc xmm12,xmm12,xmm141762 vmovups xmm15,XMMWORD[80+r15]1763 je NEAR $L$vaesenclast61764 vaesenc xmm12,xmm12,xmm151765 vmovups xmm14,XMMWORD[96+r15]1766 vaesenc xmm12,xmm12,xmm141767 vmovups xmm15,XMMWORD[112+r15]1768 $L$vaesenclast6:1769 vaesenclast xmm12,xmm12,xmm151770 vmovups xmm15,XMMWORD[((-112))+r15]1771 vmovups xmm14,XMMWORD[((16-112))+r15]1772 xor edi,edx1773 mov esi,eax1774 add ebp,DWORD[60+rsp]1775 vpxor xmm7,xmm7,xmm91776 xor ebx,ecx1777 shld eax,eax,51778 add ebp,edi1779 and esi,ebx1780 xor ebx,ecx1781 add ebp,eax1782 vpalignr xmm8,xmm7,xmm6,81783 vpxor xmm0,xmm0,xmm41784 shrd eax,eax,71785 xor esi,ecx1786 mov edi,ebp1787 add edx,DWORD[rsp]1788 vpxor xmm0,xmm0,xmm11789 xor eax,ebx1790 shld ebp,ebp,51791 vpaddd xmm9,xmm10,xmm71792 add edx,esi1793 vmovdqu xmm13,XMMWORD[16+r12]1794 vpxor xmm13,xmm13,xmm151795 vmovups XMMWORD[r13*1+r12],xmm121796 vpxor xmm12,xmm12,xmm131797 vaesenc xmm12,xmm12,xmm141798 vmovups xmm15,XMMWORD[((-80))+r15]1799 and edi,eax1800 vpxor xmm0,xmm0,xmm81801 xor eax,ebx1802 add edx,ebp1803 shrd ebp,ebp,71804 xor edi,ebx1805 vpsrld xmm8,xmm0,301806 vmovdqa XMMWORD[48+rsp],xmm91807 mov esi,edx1808 add ecx,DWORD[4+rsp]1809 xor ebp,eax1810 shld edx,edx,51811 vpslld xmm0,xmm0,21812 add ecx,edi1813 and esi,ebp1814 xor ebp,eax1815 add ecx,edx1816 shrd edx,edx,71817 xor esi,eax1818 mov edi,ecx1819 add ebx,DWORD[8+rsp]1820 vaesenc xmm12,xmm12,xmm151821 vmovups xmm14,XMMWORD[((-64))+r15]1822 vpor xmm0,xmm0,xmm81823 xor edx,ebp1824 shld ecx,ecx,51825 add ebx,esi1826 and edi,edx1827 xor edx,ebp1828 add ebx,ecx1829 add eax,DWORD[12+rsp]1830 xor edi,ebp1831 mov esi,ebx1832 shld ebx,ebx,51833 add eax,edi1834 xor esi,edx1835 shrd ecx,ecx,71836 add eax,ebx1837 vpalignr xmm8,xmm0,xmm7,81838 vpxor xmm1,xmm1,xmm51839 add ebp,DWORD[16+rsp]1840 vaesenc xmm12,xmm12,xmm141841 vmovups xmm15,XMMWORD[((-48))+r15]1842 xor esi,ecx1843 mov edi,eax1844 shld eax,eax,51845 vpxor xmm1,xmm1,xmm21846 add ebp,esi1847 xor edi,ecx1848 vpaddd xmm9,xmm10,xmm01849 shrd ebx,ebx,71850 add ebp,eax1851 vpxor xmm1,xmm1,xmm81852 add edx,DWORD[20+rsp]1853 xor edi,ebx1854 mov esi,ebp1855 shld ebp,ebp,51856 vpsrld xmm8,xmm1,301857 vmovdqa XMMWORD[rsp],xmm91858 add edx,edi1859 xor esi,ebx1860 shrd eax,eax,71861 add edx,ebp1862 vpslld xmm1,xmm1,21863 add ecx,DWORD[24+rsp]1864 xor esi,eax1865 mov edi,edx1866 shld edx,edx,51867 add ecx,esi1868 vaesenc xmm12,xmm12,xmm151869 vmovups xmm14,XMMWORD[((-32))+r15]1870 xor edi,eax1871 shrd ebp,ebp,71872 add ecx,edx1873 vpor xmm1,xmm1,xmm81874 add ebx,DWORD[28+rsp]1875 xor edi,ebp1876 mov esi,ecx1877 shld ecx,ecx,51878 add ebx,edi1879 xor esi,ebp1880 shrd edx,edx,71881 add ebx,ecx1882 vpalignr xmm8,xmm1,xmm0,81883 vpxor xmm2,xmm2,xmm61884 add eax,DWORD[32+rsp]1885 xor esi,edx1886 mov edi,ebx1887 shld ebx,ebx,51888 vpxor xmm2,xmm2,xmm31889 add eax,esi1890 xor edi,edx1891 vpaddd xmm9,xmm10,xmm11892 vmovdqa xmm10,XMMWORD[32+r11]1893 shrd ecx,ecx,71894 add eax,ebx1895 vpxor xmm2,xmm2,xmm81896 add ebp,DWORD[36+rsp]1897 vaesenc xmm12,xmm12,xmm141898 vmovups xmm15,XMMWORD[((-16))+r15]1899 xor edi,ecx1900 mov esi,eax1901 shld eax,eax,51902 vpsrld xmm8,xmm2,301903 vmovdqa XMMWORD[16+rsp],xmm91904 add ebp,edi1905 xor esi,ecx1906 shrd ebx,ebx,71907 add ebp,eax1908 vpslld xmm2,xmm2,21909 add edx,DWORD[40+rsp]1910 xor esi,ebx1911 mov edi,ebp1912 shld ebp,ebp,51913 add edx,esi1914 xor edi,ebx1915 shrd eax,eax,71916 add edx,ebp1917 vpor xmm2,xmm2,xmm81918 add ecx,DWORD[44+rsp]1919 xor edi,eax1920 mov esi,edx1921 shld edx,edx,51922 add ecx,edi1923 vaesenc xmm12,xmm12,xmm151924 vmovups xmm14,XMMWORD[r15]1925 xor esi,eax1926 shrd ebp,ebp,71927 add ecx,edx1928 vpalignr xmm8,xmm2,xmm1,81929 vpxor xmm3,xmm3,xmm71930 add ebx,DWORD[48+rsp]1931 xor esi,ebp1932 mov edi,ecx1933 shld ecx,ecx,51934 vpxor xmm3,xmm3,xmm41935 add ebx,esi1936 xor edi,ebp1937 vpaddd xmm9,xmm10,xmm21938 shrd edx,edx,71939 add ebx,ecx1940 vpxor xmm3,xmm3,xmm81941 add eax,DWORD[52+rsp]1942 xor edi,edx1943 mov esi,ebx1944 shld ebx,ebx,51945 vpsrld xmm8,xmm3,301946 vmovdqa XMMWORD[32+rsp],xmm91947 add eax,edi1948 xor esi,edx1949 shrd ecx,ecx,71950 add eax,ebx1951 vpslld xmm3,xmm3,21952 add ebp,DWORD[56+rsp]1953 vaesenc xmm12,xmm12,xmm141954 vmovups xmm15,XMMWORD[16+r15]1955 xor esi,ecx1956 mov edi,eax1957 shld eax,eax,51958 add ebp,esi1959 xor edi,ecx1960 shrd ebx,ebx,71961 add ebp,eax1962 vpor xmm3,xmm3,xmm81963 add edx,DWORD[60+rsp]1964 xor edi,ebx1965 mov esi,ebp1966 shld ebp,ebp,51967 add edx,edi1968 xor esi,ebx1969 shrd eax,eax,71970 add edx,ebp1971 vpalignr xmm8,xmm3,xmm2,81972 vpxor xmm4,xmm4,xmm01973 add ecx,DWORD[rsp]1974 xor esi,eax1975 mov edi,edx1976 shld edx,edx,51977 vpxor xmm4,xmm4,xmm51978 add ecx,esi1979 vaesenc xmm12,xmm12,xmm151980 vmovups xmm14,XMMWORD[32+r15]1981 xor edi,eax1982 vpaddd xmm9,xmm10,xmm31983 shrd ebp,ebp,71984 add ecx,edx1985 vpxor xmm4,xmm4,xmm81986 add ebx,DWORD[4+rsp]1987 xor edi,ebp1988 mov esi,ecx1989 shld ecx,ecx,51990 vpsrld xmm8,xmm4,301991 vmovdqa XMMWORD[48+rsp],xmm91992 add ebx,edi1993 xor esi,ebp1994 shrd edx,edx,71995 add ebx,ecx1996 vpslld xmm4,xmm4,21997 add eax,DWORD[8+rsp]1998 xor esi,edx1999 mov edi,ebx2000 shld ebx,ebx,52001 add eax,esi2002 xor edi,edx2003 shrd ecx,ecx,72004 add eax,ebx2005 vpor xmm4,xmm4,xmm82006 add ebp,DWORD[12+rsp]2007 vaesenc xmm12,xmm12,xmm142008 vmovups xmm15,XMMWORD[48+r15]2009 xor edi,ecx2010 mov esi,eax2011 shld eax,eax,52012 add ebp,edi2013 xor esi,ecx2014 shrd ebx,ebx,72015 add ebp,eax2016 vpalignr xmm8,xmm4,xmm3,82017 vpxor xmm5,xmm5,xmm12018 add edx,DWORD[16+rsp]2019 xor esi,ebx2020 mov edi,ebp2021 shld ebp,ebp,52022 vpxor xmm5,xmm5,xmm62023 add edx,esi2024 xor edi,ebx2025 vpaddd xmm9,xmm10,xmm42026 shrd eax,eax,72027 add edx,ebp2028 vpxor xmm5,xmm5,xmm82029 add ecx,DWORD[20+rsp]2030 xor edi,eax2031 mov esi,edx2032 shld edx,edx,52033 vpsrld xmm8,xmm5,302034 vmovdqa XMMWORD[rsp],xmm92035 add ecx,edi2036 cmp r8d,112037 jb NEAR $L$vaesenclast72038 vaesenc xmm12,xmm12,xmm152039 vmovups xmm14,XMMWORD[64+r15]2040 vaesenc xmm12,xmm12,xmm142041 vmovups xmm15,XMMWORD[80+r15]2042 je NEAR $L$vaesenclast72043 vaesenc xmm12,xmm12,xmm152044 vmovups xmm14,XMMWORD[96+r15]2045 vaesenc xmm12,xmm12,xmm142046 vmovups xmm15,XMMWORD[112+r15]2047 $L$vaesenclast7:2048 vaesenclast xmm12,xmm12,xmm152049 vmovups xmm15,XMMWORD[((-112))+r15]2050 vmovups xmm14,XMMWORD[((16-112))+r15]2051 xor esi,eax2052 shrd ebp,ebp,72053 add ecx,edx2054 vpslld xmm5,xmm5,22055 add ebx,DWORD[24+rsp]2056 xor esi,ebp2057 mov edi,ecx2058 shld ecx,ecx,52059 add ebx,esi2060 xor edi,ebp2061 shrd edx,edx,72062 add ebx,ecx2063 vpor xmm5,xmm5,xmm82064 add eax,DWORD[28+rsp]2065 shrd ecx,ecx,72066 mov esi,ebx2067 xor edi,edx2068 shld ebx,ebx,52069 add eax,edi2070 xor esi,ecx2071 xor ecx,edx2072 add eax,ebx2073 vpalignr xmm8,xmm5,xmm4,82074 vpxor xmm6,xmm6,xmm22075 add ebp,DWORD[32+rsp]2076 vmovdqu xmm13,XMMWORD[32+r12]2077 vpxor xmm13,xmm13,xmm152078 vmovups XMMWORD[16+r12*1+r13],xmm122079 vpxor xmm12,xmm12,xmm132080 vaesenc xmm12,xmm12,xmm142081 vmovups xmm15,XMMWORD[((-80))+r15]2082 and esi,ecx2083 xor ecx,edx2084 shrd ebx,ebx,72085 vpxor xmm6,xmm6,xmm72086 mov edi,eax2087 xor esi,ecx2088 vpaddd xmm9,xmm10,xmm52089 shld eax,eax,52090 add ebp,esi2091 vpxor xmm6,xmm6,xmm82092 xor edi,ebx2093 xor ebx,ecx2094 add ebp,eax2095 add edx,DWORD[36+rsp]2096 vpsrld xmm8,xmm6,302097 vmovdqa XMMWORD[16+rsp],xmm92098 and edi,ebx2099 xor ebx,ecx2100 shrd eax,eax,72101 mov esi,ebp2102 vpslld xmm6,xmm6,22103 xor edi,ebx2104 shld ebp,ebp,52105 add edx,edi2106 vaesenc xmm12,xmm12,xmm152107 vmovups xmm14,XMMWORD[((-64))+r15]2108 xor esi,eax2109 xor eax,ebx2110 add edx,ebp2111 add ecx,DWORD[40+rsp]2112 and esi,eax2113 vpor xmm6,xmm6,xmm82114 xor eax,ebx2115 shrd ebp,ebp,72116 mov edi,edx2117 xor esi,eax2118 shld edx,edx,52119 add ecx,esi2120 xor edi,ebp2121 xor ebp,eax2122 add ecx,edx2123 add ebx,DWORD[44+rsp]2124 and edi,ebp2125 xor ebp,eax2126 shrd edx,edx,72127 vaesenc xmm12,xmm12,xmm142128 vmovups xmm15,XMMWORD[((-48))+r15]2129 mov esi,ecx2130 xor edi,ebp2131 shld ecx,ecx,52132 add ebx,edi2133 xor esi,edx2134 xor edx,ebp2135 add ebx,ecx2136 vpalignr xmm8,xmm6,xmm5,82137 vpxor xmm7,xmm7,xmm32138 add eax,DWORD[48+rsp]2139 and esi,edx2140 xor edx,ebp2141 shrd ecx,ecx,72142 vpxor xmm7,xmm7,xmm02143 mov edi,ebx2144 xor esi,edx2145 vpaddd xmm9,xmm10,xmm62146 vmovdqa xmm10,XMMWORD[48+r11]2147 shld ebx,ebx,52148 add eax,esi2149 vpxor xmm7,xmm7,xmm82150 xor edi,ecx2151 xor ecx,edx2152 add eax,ebx2153 add ebp,DWORD[52+rsp]2154 vaesenc xmm12,xmm12,xmm152155 vmovups xmm14,XMMWORD[((-32))+r15]2156 vpsrld xmm8,xmm7,302157 vmovdqa XMMWORD[32+rsp],xmm92158 and edi,ecx2159 xor ecx,edx2160 shrd ebx,ebx,72161 mov esi,eax2162 vpslld xmm7,xmm7,22163 xor edi,ecx2164 shld eax,eax,52165 add ebp,edi2166 xor esi,ebx2167 xor ebx,ecx2168 add ebp,eax2169 add edx,DWORD[56+rsp]2170 and esi,ebx2171 vpor xmm7,xmm7,xmm82172 xor ebx,ecx2173 shrd eax,eax,72174 mov edi,ebp2175 xor esi,ebx2176 shld ebp,ebp,52177 add edx,esi2178 vaesenc xmm12,xmm12,xmm142179 vmovups xmm15,XMMWORD[((-16))+r15]2180 xor edi,eax2181 xor eax,ebx2182 add edx,ebp2183 add ecx,DWORD[60+rsp]2184 and edi,eax2185 xor eax,ebx2186 shrd ebp,ebp,72187 mov esi,edx2188 xor edi,eax2189 shld edx,edx,52190 add ecx,edi2191 xor esi,ebp2192 xor ebp,eax2193 add ecx,edx2194 vpalignr xmm8,xmm7,xmm6,82195 vpxor xmm0,xmm0,xmm42196 add ebx,DWORD[rsp]2197 and esi,ebp2198 xor ebp,eax2199 shrd edx,edx,72200 vaesenc xmm12,xmm12,xmm152201 vmovups xmm14,XMMWORD[r15]2202 vpxor xmm0,xmm0,xmm12203 mov edi,ecx2204 xor esi,ebp2205 vpaddd xmm9,xmm10,xmm72206 shld ecx,ecx,52207 add ebx,esi2208 vpxor xmm0,xmm0,xmm82209 xor edi,edx2210 xor edx,ebp2211 add ebx,ecx2212 add eax,DWORD[4+rsp]2213 vpsrld xmm8,xmm0,302214 vmovdqa XMMWORD[48+rsp],xmm92215 and edi,edx2216 xor edx,ebp2217 shrd ecx,ecx,72218 mov esi,ebx2219 vpslld xmm0,xmm0,22220 xor edi,edx2221 shld ebx,ebx,52222 add eax,edi2223 xor esi,ecx2224 xor ecx,edx2225 add eax,ebx2226 add ebp,DWORD[8+rsp]2227 vaesenc xmm12,xmm12,xmm142228 vmovups xmm15,XMMWORD[16+r15]2229 and esi,ecx2230 vpor xmm0,xmm0,xmm82231 xor ecx,edx2232 shrd ebx,ebx,72233 mov edi,eax2234 xor esi,ecx2235 shld eax,eax,52236 add ebp,esi2237 xor edi,ebx2238 xor ebx,ecx2239 add ebp,eax2240 add edx,DWORD[12+rsp]2241 and edi,ebx2242 xor ebx,ecx2243 shrd eax,eax,72244 mov esi,ebp2245 xor edi,ebx2246 shld ebp,ebp,52247 add edx,edi2248 vaesenc xmm12,xmm12,xmm152249 vmovups xmm14,XMMWORD[32+r15]2250 xor esi,eax2251 xor eax,ebx2252 add edx,ebp2253 vpalignr xmm8,xmm0,xmm7,82254 vpxor xmm1,xmm1,xmm52255 add ecx,DWORD[16+rsp]2256 and esi,eax2257 xor eax,ebx2258 shrd ebp,ebp,72259 vpxor xmm1,xmm1,xmm22260 mov edi,edx2261 xor esi,eax2262 vpaddd xmm9,xmm10,xmm02263 shld edx,edx,52264 add ecx,esi2265 vpxor xmm1,xmm1,xmm82266 xor edi,ebp2267 xor ebp,eax2268 add ecx,edx2269 add ebx,DWORD[20+rsp]2270 vpsrld xmm8,xmm1,302271 vmovdqa XMMWORD[rsp],xmm92272 and edi,ebp2273 xor ebp,eax2274 shrd edx,edx,72275 vaesenc xmm12,xmm12,xmm142276 vmovups xmm15,XMMWORD[48+r15]2277 mov esi,ecx2278 vpslld xmm1,xmm1,22279 xor edi,ebp2280 shld ecx,ecx,52281 add ebx,edi2282 xor esi,edx2283 xor edx,ebp2284 add ebx,ecx2285 add eax,DWORD[24+rsp]2286 and esi,edx2287 vpor xmm1,xmm1,xmm82288 xor edx,ebp2289 shrd ecx,ecx,72290 mov edi,ebx2291 xor esi,edx2292 shld ebx,ebx,52293 add eax,esi2294 xor edi,ecx2295 xor ecx,edx2296 add eax,ebx2297 add ebp,DWORD[28+rsp]2298 cmp r8d,112299 jb NEAR $L$vaesenclast82300 vaesenc xmm12,xmm12,xmm152301 vmovups xmm14,XMMWORD[64+r15]2302 vaesenc xmm12,xmm12,xmm142303 vmovups xmm15,XMMWORD[80+r15]2304 je NEAR $L$vaesenclast82305 vaesenc xmm12,xmm12,xmm152306 vmovups xmm14,XMMWORD[96+r15]2307 vaesenc xmm12,xmm12,xmm142308 vmovups xmm15,XMMWORD[112+r15]2309 $L$vaesenclast8:2310 vaesenclast xmm12,xmm12,xmm152311 vmovups xmm15,XMMWORD[((-112))+r15]2312 vmovups xmm14,XMMWORD[((16-112))+r15]2313 and edi,ecx2314 xor ecx,edx2315 shrd ebx,ebx,72316 mov esi,eax2317 xor edi,ecx2318 shld eax,eax,52319 add ebp,edi2320 xor esi,ebx2321 xor ebx,ecx2322 add ebp,eax2323 vpalignr xmm8,xmm1,xmm0,82324 vpxor xmm2,xmm2,xmm62325 add edx,DWORD[32+rsp]2326 and esi,ebx2327 xor ebx,ecx2328 shrd eax,eax,72329 vpxor xmm2,xmm2,xmm32330 mov edi,ebp2331 xor esi,ebx2332 vpaddd xmm9,xmm10,xmm12333 shld ebp,ebp,52334 add edx,esi2335 vmovdqu xmm13,XMMWORD[48+r12]2336 vpxor xmm13,xmm13,xmm152337 vmovups XMMWORD[32+r12*1+r13],xmm122338 vpxor xmm12,xmm12,xmm132339 vaesenc xmm12,xmm12,xmm142340 vmovups xmm15,XMMWORD[((-80))+r15]2341 vpxor xmm2,xmm2,xmm82342 xor edi,eax2343 xor eax,ebx2344 add edx,ebp2345 add ecx,DWORD[36+rsp]2346 vpsrld xmm8,xmm2,302347 vmovdqa XMMWORD[16+rsp],xmm92348 and edi,eax2349 xor eax,ebx2350 shrd ebp,ebp,72351 mov esi,edx2352 vpslld xmm2,xmm2,22353 xor edi,eax2354 shld edx,edx,52355 add ecx,edi2356 xor esi,ebp2357 xor ebp,eax2358 add ecx,edx2359 add ebx,DWORD[40+rsp]2360 and esi,ebp2361 vpor xmm2,xmm2,xmm82362 xor ebp,eax2363 shrd edx,edx,72364 vaesenc xmm12,xmm12,xmm152365 vmovups xmm14,XMMWORD[((-64))+r15]2366 mov edi,ecx2367 xor esi,ebp2368 shld ecx,ecx,52369 add ebx,esi2370 xor edi,edx2371 xor edx,ebp2372 add ebx,ecx2373 add eax,DWORD[44+rsp]2374 and edi,edx2375 xor edx,ebp2376 shrd ecx,ecx,72377 mov esi,ebx2378 xor edi,edx2379 shld ebx,ebx,52380 add eax,edi2381 xor esi,edx2382 add eax,ebx2383 vpalignr xmm8,xmm2,xmm1,82384 vpxor xmm3,xmm3,xmm72385 add ebp,DWORD[48+rsp]2386 vaesenc xmm12,xmm12,xmm142387 vmovups xmm15,XMMWORD[((-48))+r15]2388 xor esi,ecx2389 mov edi,eax2390 shld eax,eax,52391 vpxor xmm3,xmm3,xmm42392 add ebp,esi2393 xor edi,ecx2394 vpaddd xmm9,xmm10,xmm22395 shrd ebx,ebx,72396 add ebp,eax2397 vpxor xmm3,xmm3,xmm82398 add edx,DWORD[52+rsp]2399 xor edi,ebx2400 mov esi,ebp2401 shld ebp,ebp,52402 vpsrld xmm8,xmm3,302403 vmovdqa XMMWORD[32+rsp],xmm92404 add edx,edi2405 xor esi,ebx2406 shrd eax,eax,72407 add edx,ebp2408 vpslld xmm3,xmm3,22409 add ecx,DWORD[56+rsp]2410 xor esi,eax2411 mov edi,edx2412 shld edx,edx,52413 add ecx,esi2414 vaesenc xmm12,xmm12,xmm152415 vmovups xmm14,XMMWORD[((-32))+r15]2416 xor edi,eax2417 shrd ebp,ebp,72418 add ecx,edx2419 vpor xmm3,xmm3,xmm82420 add ebx,DWORD[60+rsp]2421 xor edi,ebp2422 mov esi,ecx2423 shld ecx,ecx,52424 add ebx,edi2425 xor esi,ebp2426 shrd edx,edx,72427 add ebx,ecx2428 add eax,DWORD[rsp]2429 vpaddd xmm9,xmm10,xmm32430 xor esi,edx2431 mov edi,ebx2432 shld ebx,ebx,52433 add eax,esi2434 vmovdqa XMMWORD[48+rsp],xmm92435 xor edi,edx2436 shrd ecx,ecx,72437 add eax,ebx2438 add ebp,DWORD[4+rsp]2439 vaesenc xmm12,xmm12,xmm142440 vmovups xmm15,XMMWORD[((-16))+r15]2441 xor edi,ecx2442 mov esi,eax2443 shld eax,eax,52444 add ebp,edi2445 xor esi,ecx2446 shrd ebx,ebx,72447 add ebp,eax2448 add edx,DWORD[8+rsp]2449 xor esi,ebx2450 mov edi,ebp2451 shld ebp,ebp,52452 add edx,esi2453 xor edi,ebx2454 shrd eax,eax,72455 add edx,ebp2456 add ecx,DWORD[12+rsp]2457 xor edi,eax2458 mov esi,edx2459 shld edx,edx,52460 add ecx,edi2461 vaesenc xmm12,xmm12,xmm152462 vmovups xmm14,XMMWORD[r15]2463 xor esi,eax2464 shrd ebp,ebp,72465 add ecx,edx2466 cmp r10,r142467 je NEAR $L$done_avx2468 vmovdqa xmm9,XMMWORD[64+r11]2469 vmovdqa xmm10,XMMWORD[r11]2470 vmovdqu xmm0,XMMWORD[r10]2471 vmovdqu xmm1,XMMWORD[16+r10]2472 vmovdqu xmm2,XMMWORD[32+r10]2473 vmovdqu xmm3,XMMWORD[48+r10]2474 vpshufb xmm0,xmm0,xmm92475 add r10,642476 add ebx,DWORD[16+rsp]2477 xor esi,ebp2478 vpshufb xmm1,xmm1,xmm92479 mov edi,ecx2480 shld ecx,ecx,52481 vpaddd xmm8,xmm0,xmm102482 add ebx,esi2483 xor edi,ebp2484 shrd edx,edx,72485 add ebx,ecx2486 vmovdqa XMMWORD[rsp],xmm82487 add eax,DWORD[20+rsp]2488 xor edi,edx2489 mov esi,ebx2490 shld ebx,ebx,52491 add eax,edi2492 xor esi,edx2493 shrd ecx,ecx,72494 add eax,ebx2495 add ebp,DWORD[24+rsp]2496 vaesenc xmm12,xmm12,xmm142497 vmovups xmm15,XMMWORD[16+r15]2498 xor esi,ecx2499 mov edi,eax2500 shld eax,eax,52501 add ebp,esi2502 xor edi,ecx2503 shrd ebx,ebx,72504 add ebp,eax2505 add edx,DWORD[28+rsp]2506 xor edi,ebx2507 mov esi,ebp2508 shld ebp,ebp,52509 add edx,edi2510 xor esi,ebx2511 shrd eax,eax,72512 add edx,ebp2513 add ecx,DWORD[32+rsp]2514 xor esi,eax2515 vpshufb xmm2,xmm2,xmm92516 mov edi,edx2517 shld edx,edx,52518 vpaddd xmm8,xmm1,xmm102519 add ecx,esi2520 vaesenc xmm12,xmm12,xmm152521 vmovups xmm14,XMMWORD[32+r15]2522 xor edi,eax2523 shrd ebp,ebp,72524 add ecx,edx2525 vmovdqa XMMWORD[16+rsp],xmm82526 add ebx,DWORD[36+rsp]2527 xor edi,ebp2528 mov esi,ecx2529 shld ecx,ecx,52530 add ebx,edi2531 xor esi,ebp2532 shrd edx,edx,72533 add ebx,ecx2534 add eax,DWORD[40+rsp]2535 xor esi,edx2536 mov edi,ebx2537 shld ebx,ebx,52538 add eax,esi2539 xor edi,edx2540 shrd ecx,ecx,72541 add eax,ebx2542 add ebp,DWORD[44+rsp]2543 vaesenc xmm12,xmm12,xmm142544 vmovups xmm15,XMMWORD[48+r15]2545 xor edi,ecx2546 mov esi,eax2547 shld eax,eax,52548 add ebp,edi2549 xor esi,ecx2550 shrd ebx,ebx,72551 add ebp,eax2552 add edx,DWORD[48+rsp]2553 xor esi,ebx2554 vpshufb xmm3,xmm3,xmm92555 mov edi,ebp2556 shld ebp,ebp,52557 vpaddd xmm8,xmm2,xmm102558 add edx,esi2559 xor edi,ebx2560 shrd eax,eax,72561 add edx,ebp2562 vmovdqa XMMWORD[32+rsp],xmm82563 add ecx,DWORD[52+rsp]2564 xor edi,eax2565 mov esi,edx2566 shld edx,edx,52567 add ecx,edi2568 cmp r8d,112569 jb NEAR $L$vaesenclast92570 vaesenc xmm12,xmm12,xmm152571 vmovups xmm14,XMMWORD[64+r15]2572 vaesenc xmm12,xmm12,xmm142573 vmovups xmm15,XMMWORD[80+r15]2574 je NEAR $L$vaesenclast92575 vaesenc xmm12,xmm12,xmm152576 vmovups xmm14,XMMWORD[96+r15]2577 vaesenc xmm12,xmm12,xmm142578 vmovups xmm15,XMMWORD[112+r15]2579 $L$vaesenclast9:2580 vaesenclast xmm12,xmm12,xmm152581 vmovups xmm15,XMMWORD[((-112))+r15]2582 vmovups xmm14,XMMWORD[((16-112))+r15]2583 xor esi,eax2584 shrd ebp,ebp,72585 add ecx,edx2586 add ebx,DWORD[56+rsp]2587 xor esi,ebp2588 mov edi,ecx2589 shld ecx,ecx,52590 add ebx,esi2591 xor edi,ebp2592 shrd edx,edx,72593 add ebx,ecx2594 add eax,DWORD[60+rsp]2595 xor edi,edx2596 mov esi,ebx2597 shld ebx,ebx,52598 add eax,edi2599 shrd ecx,ecx,72600 add eax,ebx2601 vmovups XMMWORD[48+r12*1+r13],xmm122602 lea r12,[64+r12]2603 2604 add eax,DWORD[r9]2605 add esi,DWORD[4+r9]2606 add ecx,DWORD[8+r9]2607 add edx,DWORD[12+r9]2608 mov DWORD[r9],eax2609 add ebp,DWORD[16+r9]2610 mov DWORD[4+r9],esi2611 mov ebx,esi2612 mov DWORD[8+r9],ecx2613 mov edi,ecx2614 mov DWORD[12+r9],edx2615 xor edi,edx2616 mov DWORD[16+r9],ebp2617 and esi,edi2618 jmp NEAR $L$oop_avx2619 2620 $L$done_avx:2621 add ebx,DWORD[16+rsp]2622 xor esi,ebp2623 mov edi,ecx2624 shld ecx,ecx,52625 add ebx,esi2626 xor edi,ebp2627 shrd edx,edx,72628 add ebx,ecx2629 add eax,DWORD[20+rsp]2630 xor edi,edx2631 mov esi,ebx2632 shld ebx,ebx,52633 add eax,edi2634 xor esi,edx2635 shrd ecx,ecx,72636 add eax,ebx2637 add ebp,DWORD[24+rsp]2638 vaesenc xmm12,xmm12,xmm142639 vmovups xmm15,XMMWORD[16+r15]2640 xor esi,ecx2641 mov edi,eax2642 shld eax,eax,52643 add ebp,esi2644 xor edi,ecx2645 shrd ebx,ebx,72646 add ebp,eax2647 add edx,DWORD[28+rsp]2648 xor edi,ebx2649 mov esi,ebp2650 shld ebp,ebp,52651 add edx,edi2652 xor esi,ebx2653 shrd eax,eax,72654 add edx,ebp2655 add ecx,DWORD[32+rsp]2656 xor esi,eax2657 mov edi,edx2658 shld edx,edx,52659 add ecx,esi2660 vaesenc xmm12,xmm12,xmm152661 vmovups xmm14,XMMWORD[32+r15]2662 xor edi,eax2663 shrd ebp,ebp,72664 add ecx,edx2665 add ebx,DWORD[36+rsp]2666 xor edi,ebp2667 mov esi,ecx2668 shld ecx,ecx,52669 add ebx,edi2670 xor esi,ebp2671 shrd edx,edx,72672 add ebx,ecx2673 add eax,DWORD[40+rsp]2674 xor esi,edx2675 mov edi,ebx2676 shld ebx,ebx,52677 add eax,esi2678 xor edi,edx2679 shrd ecx,ecx,72680 add eax,ebx2681 add ebp,DWORD[44+rsp]2682 vaesenc xmm12,xmm12,xmm142683 vmovups xmm15,XMMWORD[48+r15]2684 xor edi,ecx2685 mov esi,eax2686 shld eax,eax,52687 add ebp,edi2688 xor esi,ecx2689 shrd ebx,ebx,72690 add ebp,eax2691 add edx,DWORD[48+rsp]2692 xor esi,ebx2693 mov edi,ebp2694 shld ebp,ebp,52695 add edx,esi2696 xor edi,ebx2697 shrd eax,eax,72698 add edx,ebp2699 add ecx,DWORD[52+rsp]2700 xor edi,eax2701 mov esi,edx2702 shld edx,edx,52703 add ecx,edi2704 cmp r8d,112705 jb NEAR $L$vaesenclast102706 vaesenc xmm12,xmm12,xmm152707 vmovups xmm14,XMMWORD[64+r15]2708 vaesenc xmm12,xmm12,xmm142709 vmovups xmm15,XMMWORD[80+r15]2710 je NEAR $L$vaesenclast102711 vaesenc xmm12,xmm12,xmm152712 vmovups xmm14,XMMWORD[96+r15]2713 vaesenc xmm12,xmm12,xmm142714 vmovups xmm15,XMMWORD[112+r15]2715 $L$vaesenclast10:2716 vaesenclast xmm12,xmm12,xmm152717 vmovups xmm15,XMMWORD[((-112))+r15]2718 vmovups xmm14,XMMWORD[((16-112))+r15]2719 xor esi,eax2720 shrd ebp,ebp,72721 add ecx,edx2722 add ebx,DWORD[56+rsp]2723 xor esi,ebp2724 mov edi,ecx2725 shld ecx,ecx,52726 add ebx,esi2727 xor edi,ebp2728 shrd edx,edx,72729 add ebx,ecx2730 add eax,DWORD[60+rsp]2731 xor edi,edx2732 mov esi,ebx2733 shld ebx,ebx,52734 add eax,edi2735 shrd ecx,ecx,72736 add eax,ebx2737 vmovups XMMWORD[48+r12*1+r13],xmm122738 mov r8,QWORD[88+rsp]2739 2740 add eax,DWORD[r9]2741 add esi,DWORD[4+r9]2742 add ecx,DWORD[8+r9]2743 mov DWORD[r9],eax2744 add edx,DWORD[12+r9]2745 mov DWORD[4+r9],esi2746 add ebp,DWORD[16+r9]2747 mov DWORD[8+r9],ecx2748 mov DWORD[12+r9],edx2749 mov DWORD[16+r9],ebp2750 vmovups XMMWORD[r8],xmm122751 vzeroall2752 movaps xmm6,XMMWORD[((96+0))+rsp]2753 movaps xmm7,XMMWORD[((96+16))+rsp]2754 movaps xmm8,XMMWORD[((96+32))+rsp]2755 movaps xmm9,XMMWORD[((96+48))+rsp]2756 movaps xmm10,XMMWORD[((96+64))+rsp]2757 movaps xmm11,XMMWORD[((96+80))+rsp]2758 movaps xmm12,XMMWORD[((96+96))+rsp]2759 movaps xmm13,XMMWORD[((96+112))+rsp]2760 movaps xmm14,XMMWORD[((96+128))+rsp]2761 movaps xmm15,XMMWORD[((96+144))+rsp]2762 lea rsi,[264+rsp]2763 2764 mov r15,QWORD[rsi]2765 2766 mov r14,QWORD[8+rsi]2767 2768 mov r13,QWORD[16+rsi]2769 2770 mov r12,QWORD[24+rsi]2771 2772 mov rbp,QWORD[32+rsi]2773 2774 mov rbx,QWORD[40+rsi]2775 2776 lea rsp,[48+rsi]2777 2778 $L$epilogue_avx:2779 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2780 mov rsi,QWORD[16+rsp]2781 DB 0F3h,0C3h ;repret2782 2783 $L$SEH_end_aesni_cbc_sha1_enc_avx:2784 1429 ALIGN 64 2785 1430 K_XX_XX: … … 2901 1546 DB 15,56,201,243 2902 1547 cmp r11d,11 2903 jb NEAR $L$aesenclast 111548 jb NEAR $L$aesenclast6 2904 1549 movups xmm0,XMMWORD[64+rcx] 2905 1550 DB 102,15,56,220,209 2906 1551 movups xmm1,XMMWORD[80+rcx] 2907 1552 DB 102,15,56,220,208 2908 je NEAR $L$aesenclast 111553 je NEAR $L$aesenclast6 2909 1554 movups xmm0,XMMWORD[96+rcx] 2910 1555 DB 102,15,56,220,209 2911 1556 movups xmm1,XMMWORD[112+rcx] 2912 1557 DB 102,15,56,220,208 2913 $L$aesenclast 11:1558 $L$aesenclast6: 2914 1559 DB 102,15,56,221,209 2915 1560 movups xmm0,XMMWORD[((16-112))+rcx] … … 2967 1612 DB 15,56,201,220 2968 1613 cmp r11d,11 2969 jb NEAR $L$aesenclast 121614 jb NEAR $L$aesenclast7 2970 1615 movups xmm0,XMMWORD[64+rcx] 2971 1616 DB 102,15,56,220,209 2972 1617 movups xmm1,XMMWORD[80+rcx] 2973 1618 DB 102,15,56,220,208 2974 je NEAR $L$aesenclast 121619 je NEAR $L$aesenclast7 2975 1620 movups xmm0,XMMWORD[96+rcx] 2976 1621 DB 102,15,56,220,209 2977 1622 movups xmm1,XMMWORD[112+rcx] 2978 1623 DB 102,15,56,220,208 2979 $L$aesenclast 12:1624 $L$aesenclast7: 2980 1625 DB 102,15,56,221,209 2981 1626 movups xmm0,XMMWORD[((16-112))+rcx] … … 3033 1678 DB 15,56,201,229 3034 1679 cmp r11d,11 3035 jb NEAR $L$aesenclast 131680 jb NEAR $L$aesenclast8 3036 1681 movups xmm0,XMMWORD[64+rcx] 3037 1682 DB 102,15,56,220,209 3038 1683 movups xmm1,XMMWORD[80+rcx] 3039 1684 DB 102,15,56,220,208 3040 je NEAR $L$aesenclast 131685 je NEAR $L$aesenclast8 3041 1686 movups xmm0,XMMWORD[96+rcx] 3042 1687 DB 102,15,56,220,209 3043 1688 movups xmm1,XMMWORD[112+rcx] 3044 1689 DB 102,15,56,220,208 3045 $L$aesenclast 13:1690 $L$aesenclast8: 3046 1691 DB 102,15,56,221,209 3047 1692 movups xmm0,XMMWORD[((16-112))+rcx] … … 3097 1742 DB 102,15,56,220,208 3098 1743 cmp r11d,11 3099 jb NEAR $L$aesenclast 141744 jb NEAR $L$aesenclast9 3100 1745 movups xmm0,XMMWORD[64+rcx] 3101 1746 DB 102,15,56,220,209 3102 1747 movups xmm1,XMMWORD[80+rcx] 3103 1748 DB 102,15,56,220,208 3104 je NEAR $L$aesenclast 141749 je NEAR $L$aesenclast9 3105 1750 movups xmm0,XMMWORD[96+rcx] 3106 1751 DB 102,15,56,220,209 3107 1752 movups xmm1,XMMWORD[112+rcx] 3108 1753 DB 102,15,56,220,208 3109 $L$aesenclast 14:1754 $L$aesenclast9: 3110 1755 DB 102,15,56,221,209 3111 1756 movups xmm0,XMMWORD[((16-112))+rcx] … … 3247 1892 DD $L$SEH_end_aesni_cbc_sha1_enc_ssse3 wrt ..imagebase 3248 1893 DD $L$SEH_info_aesni_cbc_sha1_enc_ssse3 wrt ..imagebase 3249 DD $L$SEH_begin_aesni_cbc_sha1_enc_avx wrt ..imagebase3250 DD $L$SEH_end_aesni_cbc_sha1_enc_avx wrt ..imagebase3251 DD $L$SEH_info_aesni_cbc_sha1_enc_avx wrt ..imagebase3252 1894 DD $L$SEH_begin_aesni_cbc_sha1_enc_shaext wrt ..imagebase 3253 1895 DD $L$SEH_end_aesni_cbc_sha1_enc_shaext wrt ..imagebase … … 3259 1901 DD ssse3_handler wrt ..imagebase 3260 1902 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase 3261 $L$SEH_info_aesni_cbc_sha1_enc_avx:3262 DB 9,0,0,03263 DD ssse3_handler wrt ..imagebase3264 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase3265 1903 $L$SEH_info_aesni_cbc_sha1_enc_shaext: 3266 1904 DB 9,0,0,0 -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/aesni-sha256-x86_64.S
r94083 r95221 12 12 aesni_cbc_sha256_enc: 13 13 14 lea r11,[OPENSSL_ia32cap_P]15 mov eax,116 cmp rcx,017 je NEAR $L$probe18 mov eax,DWORD[r11]19 mov r10,QWORD[4+r11]20 bt r10,6121 jc NEAR aesni_cbc_sha256_enc_shaext22 mov r11,r1023 shr r11,3224 25 test r10d,204826 jnz NEAR aesni_cbc_sha256_enc_xop27 and r11d,29628 cmp r11d,29629 je NEAR aesni_cbc_sha256_enc_avx230 and r10d,26843545631 jnz NEAR aesni_cbc_sha256_enc_avx32 ud233 14 xor eax,eax 34 15 cmp rcx,0 … … 86 67 DB 46,111,114,103,62,0 87 68 ALIGN 64 88 89 ALIGN 6490 aesni_cbc_sha256_enc_xop:91 mov QWORD[8+rsp],rdi ;WIN64 prologue92 mov QWORD[16+rsp],rsi93 mov rax,rsp94 $L$SEH_begin_aesni_cbc_sha256_enc_xop:95 mov rdi,rcx96 mov rsi,rdx97 mov rdx,r898 mov rcx,r999 mov r8,QWORD[40+rsp]100 mov r9,QWORD[48+rsp]101 102 103 104 $L$xop_shortcut:105 mov r10,QWORD[56+rsp]106 mov rax,rsp107 108 push rbx109 110 push rbp111 112 push r12113 114 push r13115 116 push r14117 118 push r15119 120 sub rsp,288121 and rsp,-64122 123 shl rdx,6124 sub rsi,rdi125 sub r10,rdi126 add rdx,rdi127 128 129 mov QWORD[((64+8))+rsp],rsi130 mov QWORD[((64+16))+rsp],rdx131 132 mov QWORD[((64+32))+rsp],r8133 mov QWORD[((64+40))+rsp],r9134 mov QWORD[((64+48))+rsp],r10135 mov QWORD[120+rsp],rax136 137 movaps XMMWORD[128+rsp],xmm6138 movaps XMMWORD[144+rsp],xmm7139 movaps XMMWORD[160+rsp],xmm8140 movaps XMMWORD[176+rsp],xmm9141 movaps XMMWORD[192+rsp],xmm10142 movaps XMMWORD[208+rsp],xmm11143 movaps XMMWORD[224+rsp],xmm12144 movaps XMMWORD[240+rsp],xmm13145 movaps XMMWORD[256+rsp],xmm14146 movaps XMMWORD[272+rsp],xmm15147 $L$prologue_xop:148 vzeroall149 150 mov r12,rdi151 lea rdi,[128+rcx]152 lea r13,[((K256+544))]153 mov r14d,DWORD[((240-128))+rdi]154 mov r15,r9155 mov rsi,r10156 vmovdqu xmm8,XMMWORD[r8]157 sub r14,9158 159 mov eax,DWORD[r15]160 mov ebx,DWORD[4+r15]161 mov ecx,DWORD[8+r15]162 mov edx,DWORD[12+r15]163 mov r8d,DWORD[16+r15]164 mov r9d,DWORD[20+r15]165 mov r10d,DWORD[24+r15]166 mov r11d,DWORD[28+r15]167 168 vmovdqa xmm14,XMMWORD[r14*8+r13]169 vmovdqa xmm13,XMMWORD[16+r14*8+r13]170 vmovdqa xmm12,XMMWORD[32+r14*8+r13]171 vmovdqu xmm10,XMMWORD[((0-128))+rdi]172 jmp NEAR $L$loop_xop173 ALIGN 16174 $L$loop_xop:175 vmovdqa xmm7,XMMWORD[((K256+512))]176 vmovdqu xmm0,XMMWORD[r12*1+rsi]177 vmovdqu xmm1,XMMWORD[16+r12*1+rsi]178 vmovdqu xmm2,XMMWORD[32+r12*1+rsi]179 vmovdqu xmm3,XMMWORD[48+r12*1+rsi]180 vpshufb xmm0,xmm0,xmm7181 lea rbp,[K256]182 vpshufb xmm1,xmm1,xmm7183 vpshufb xmm2,xmm2,xmm7184 vpaddd xmm4,xmm0,XMMWORD[rbp]185 vpshufb xmm3,xmm3,xmm7186 vpaddd xmm5,xmm1,XMMWORD[32+rbp]187 vpaddd xmm6,xmm2,XMMWORD[64+rbp]188 vpaddd xmm7,xmm3,XMMWORD[96+rbp]189 vmovdqa XMMWORD[rsp],xmm4190 mov r14d,eax191 vmovdqa XMMWORD[16+rsp],xmm5192 mov esi,ebx193 vmovdqa XMMWORD[32+rsp],xmm6194 xor esi,ecx195 vmovdqa XMMWORD[48+rsp],xmm7196 mov r13d,r8d197 jmp NEAR $L$xop_00_47198 199 ALIGN 16200 $L$xop_00_47:201 sub rbp,-16*2*4202 vmovdqu xmm9,XMMWORD[r12]203 mov QWORD[((64+0))+rsp],r12204 vpalignr xmm4,xmm1,xmm0,4205 ror r13d,14206 mov eax,r14d207 vpalignr xmm7,xmm3,xmm2,4208 mov r12d,r9d209 xor r13d,r8d210 DB 143,232,120,194,236,14211 ror r14d,9212 xor r12d,r10d213 vpsrld xmm4,xmm4,3214 ror r13d,5215 xor r14d,eax216 vpaddd xmm0,xmm0,xmm7217 and r12d,r8d218 vpxor xmm9,xmm9,xmm10219 vmovdqu xmm10,XMMWORD[((16-128))+rdi]220 xor r13d,r8d221 add r11d,DWORD[rsp]222 mov r15d,eax223 DB 143,232,120,194,245,11224 ror r14d,11225 xor r12d,r10d226 vpxor xmm4,xmm4,xmm5227 xor r15d,ebx228 ror r13d,6229 add r11d,r12d230 and esi,r15d231 DB 143,232,120,194,251,13232 xor r14d,eax233 add r11d,r13d234 vpxor xmm4,xmm4,xmm6235 xor esi,ebx236 add edx,r11d237 vpsrld xmm6,xmm3,10238 ror r14d,2239 add r11d,esi240 vpaddd xmm0,xmm0,xmm4241 mov r13d,edx242 add r14d,r11d243 DB 143,232,120,194,239,2244 ror r13d,14245 mov r11d,r14d246 vpxor xmm7,xmm7,xmm6247 mov r12d,r8d248 xor r13d,edx249 ror r14d,9250 xor r12d,r9d251 vpxor xmm7,xmm7,xmm5252 ror r13d,5253 xor r14d,r11d254 and r12d,edx255 vpxor xmm9,xmm9,xmm8256 xor r13d,edx257 vpsrldq xmm7,xmm7,8258 add r10d,DWORD[4+rsp]259 mov esi,r11d260 ror r14d,11261 xor r12d,r9d262 vpaddd xmm0,xmm0,xmm7263 xor esi,eax264 ror r13d,6265 add r10d,r12d266 and r15d,esi267 DB 143,232,120,194,248,13268 xor r14d,r11d269 add r10d,r13d270 vpsrld xmm6,xmm0,10271 xor r15d,eax272 add ecx,r10d273 DB 143,232,120,194,239,2274 ror r14d,2275 add r10d,r15d276 vpxor xmm7,xmm7,xmm6277 mov r13d,ecx278 add r14d,r10d279 ror r13d,14280 mov r10d,r14d281 vpxor xmm7,xmm7,xmm5282 mov r12d,edx283 xor r13d,ecx284 ror r14d,9285 xor r12d,r8d286 vpslldq xmm7,xmm7,8287 ror r13d,5288 xor r14d,r10d289 and r12d,ecx290 vaesenc xmm9,xmm9,xmm10291 vmovdqu xmm10,XMMWORD[((32-128))+rdi]292 xor r13d,ecx293 vpaddd xmm0,xmm0,xmm7294 add r9d,DWORD[8+rsp]295 mov r15d,r10d296 ror r14d,11297 xor r12d,r8d298 vpaddd xmm6,xmm0,XMMWORD[rbp]299 xor r15d,r11d300 ror r13d,6301 add r9d,r12d302 and esi,r15d303 xor r14d,r10d304 add r9d,r13d305 xor esi,r11d306 add ebx,r9d307 ror r14d,2308 add r9d,esi309 mov r13d,ebx310 add r14d,r9d311 ror r13d,14312 mov r9d,r14d313 mov r12d,ecx314 xor r13d,ebx315 ror r14d,9316 xor r12d,edx317 ror r13d,5318 xor r14d,r9d319 and r12d,ebx320 vaesenc xmm9,xmm9,xmm10321 vmovdqu xmm10,XMMWORD[((48-128))+rdi]322 xor r13d,ebx323 add r8d,DWORD[12+rsp]324 mov esi,r9d325 ror r14d,11326 xor r12d,edx327 xor esi,r10d328 ror r13d,6329 add r8d,r12d330 and r15d,esi331 xor r14d,r9d332 add r8d,r13d333 xor r15d,r10d334 add eax,r8d335 ror r14d,2336 add r8d,r15d337 mov r13d,eax338 add r14d,r8d339 vmovdqa XMMWORD[rsp],xmm6340 vpalignr xmm4,xmm2,xmm1,4341 ror r13d,14342 mov r8d,r14d343 vpalignr xmm7,xmm0,xmm3,4344 mov r12d,ebx345 xor r13d,eax346 DB 143,232,120,194,236,14347 ror r14d,9348 xor r12d,ecx349 vpsrld xmm4,xmm4,3350 ror r13d,5351 xor r14d,r8d352 vpaddd xmm1,xmm1,xmm7353 and r12d,eax354 vaesenc xmm9,xmm9,xmm10355 vmovdqu xmm10,XMMWORD[((64-128))+rdi]356 xor r13d,eax357 add edx,DWORD[16+rsp]358 mov r15d,r8d359 DB 143,232,120,194,245,11360 ror r14d,11361 xor r12d,ecx362 vpxor xmm4,xmm4,xmm5363 xor r15d,r9d364 ror r13d,6365 add edx,r12d366 and esi,r15d367 DB 143,232,120,194,248,13368 xor r14d,r8d369 add edx,r13d370 vpxor xmm4,xmm4,xmm6371 xor esi,r9d372 add r11d,edx373 vpsrld xmm6,xmm0,10374 ror r14d,2375 add edx,esi376 vpaddd xmm1,xmm1,xmm4377 mov r13d,r11d378 add r14d,edx379 DB 143,232,120,194,239,2380 ror r13d,14381 mov edx,r14d382 vpxor xmm7,xmm7,xmm6383 mov r12d,eax384 xor r13d,r11d385 ror r14d,9386 xor r12d,ebx387 vpxor xmm7,xmm7,xmm5388 ror r13d,5389 xor r14d,edx390 and r12d,r11d391 vaesenc xmm9,xmm9,xmm10392 vmovdqu xmm10,XMMWORD[((80-128))+rdi]393 xor r13d,r11d394 vpsrldq xmm7,xmm7,8395 add ecx,DWORD[20+rsp]396 mov esi,edx397 ror r14d,11398 xor r12d,ebx399 vpaddd xmm1,xmm1,xmm7400 xor esi,r8d401 ror r13d,6402 add ecx,r12d403 and r15d,esi404 DB 143,232,120,194,249,13405 xor r14d,edx406 add ecx,r13d407 vpsrld xmm6,xmm1,10408 xor r15d,r8d409 add r10d,ecx410 DB 143,232,120,194,239,2411 ror r14d,2412 add ecx,r15d413 vpxor xmm7,xmm7,xmm6414 mov r13d,r10d415 add r14d,ecx416 ror r13d,14417 mov ecx,r14d418 vpxor xmm7,xmm7,xmm5419 mov r12d,r11d420 xor r13d,r10d421 ror r14d,9422 xor r12d,eax423 vpslldq xmm7,xmm7,8424 ror r13d,5425 xor r14d,ecx426 and r12d,r10d427 vaesenc xmm9,xmm9,xmm10428 vmovdqu xmm10,XMMWORD[((96-128))+rdi]429 xor r13d,r10d430 vpaddd xmm1,xmm1,xmm7431 add ebx,DWORD[24+rsp]432 mov r15d,ecx433 ror r14d,11434 xor r12d,eax435 vpaddd xmm6,xmm1,XMMWORD[32+rbp]436 xor r15d,edx437 ror r13d,6438 add ebx,r12d439 and esi,r15d440 xor r14d,ecx441 add ebx,r13d442 xor esi,edx443 add r9d,ebx444 ror r14d,2445 add ebx,esi446 mov r13d,r9d447 add r14d,ebx448 ror r13d,14449 mov ebx,r14d450 mov r12d,r10d451 xor r13d,r9d452 ror r14d,9453 xor r12d,r11d454 ror r13d,5455 xor r14d,ebx456 and r12d,r9d457 vaesenc xmm9,xmm9,xmm10458 vmovdqu xmm10,XMMWORD[((112-128))+rdi]459 xor r13d,r9d460 add eax,DWORD[28+rsp]461 mov esi,ebx462 ror r14d,11463 xor r12d,r11d464 xor esi,ecx465 ror r13d,6466 add eax,r12d467 and r15d,esi468 xor r14d,ebx469 add eax,r13d470 xor r15d,ecx471 add r8d,eax472 ror r14d,2473 add eax,r15d474 mov r13d,r8d475 add r14d,eax476 vmovdqa XMMWORD[16+rsp],xmm6477 vpalignr xmm4,xmm3,xmm2,4478 ror r13d,14479 mov eax,r14d480 vpalignr xmm7,xmm1,xmm0,4481 mov r12d,r9d482 xor r13d,r8d483 DB 143,232,120,194,236,14484 ror r14d,9485 xor r12d,r10d486 vpsrld xmm4,xmm4,3487 ror r13d,5488 xor r14d,eax489 vpaddd xmm2,xmm2,xmm7490 and r12d,r8d491 vaesenc xmm9,xmm9,xmm10492 vmovdqu xmm10,XMMWORD[((128-128))+rdi]493 xor r13d,r8d494 add r11d,DWORD[32+rsp]495 mov r15d,eax496 DB 143,232,120,194,245,11497 ror r14d,11498 xor r12d,r10d499 vpxor xmm4,xmm4,xmm5500 xor r15d,ebx501 ror r13d,6502 add r11d,r12d503 and esi,r15d504 DB 143,232,120,194,249,13505 xor r14d,eax506 add r11d,r13d507 vpxor xmm4,xmm4,xmm6508 xor esi,ebx509 add edx,r11d510 vpsrld xmm6,xmm1,10511 ror r14d,2512 add r11d,esi513 vpaddd xmm2,xmm2,xmm4514 mov r13d,edx515 add r14d,r11d516 DB 143,232,120,194,239,2517 ror r13d,14518 mov r11d,r14d519 vpxor xmm7,xmm7,xmm6520 mov r12d,r8d521 xor r13d,edx522 ror r14d,9523 xor r12d,r9d524 vpxor xmm7,xmm7,xmm5525 ror r13d,5526 xor r14d,r11d527 and r12d,edx528 vaesenc xmm9,xmm9,xmm10529 vmovdqu xmm10,XMMWORD[((144-128))+rdi]530 xor r13d,edx531 vpsrldq xmm7,xmm7,8532 add r10d,DWORD[36+rsp]533 mov esi,r11d534 ror r14d,11535 xor r12d,r9d536 vpaddd xmm2,xmm2,xmm7537 xor esi,eax538 ror r13d,6539 add r10d,r12d540 and r15d,esi541 DB 143,232,120,194,250,13542 xor r14d,r11d543 add r10d,r13d544 vpsrld xmm6,xmm2,10545 xor r15d,eax546 add ecx,r10d547 DB 143,232,120,194,239,2548 ror r14d,2549 add r10d,r15d550 vpxor xmm7,xmm7,xmm6551 mov r13d,ecx552 add r14d,r10d553 ror r13d,14554 mov r10d,r14d555 vpxor xmm7,xmm7,xmm5556 mov r12d,edx557 xor r13d,ecx558 ror r14d,9559 xor r12d,r8d560 vpslldq xmm7,xmm7,8561 ror r13d,5562 xor r14d,r10d563 and r12d,ecx564 vaesenc xmm9,xmm9,xmm10565 vmovdqu xmm10,XMMWORD[((160-128))+rdi]566 xor r13d,ecx567 vpaddd xmm2,xmm2,xmm7568 add r9d,DWORD[40+rsp]569 mov r15d,r10d570 ror r14d,11571 xor r12d,r8d572 vpaddd xmm6,xmm2,XMMWORD[64+rbp]573 xor r15d,r11d574 ror r13d,6575 add r9d,r12d576 and esi,r15d577 xor r14d,r10d578 add r9d,r13d579 xor esi,r11d580 add ebx,r9d581 ror r14d,2582 add r9d,esi583 mov r13d,ebx584 add r14d,r9d585 ror r13d,14586 mov r9d,r14d587 mov r12d,ecx588 xor r13d,ebx589 ror r14d,9590 xor r12d,edx591 ror r13d,5592 xor r14d,r9d593 and r12d,ebx594 vaesenclast xmm11,xmm9,xmm10595 vaesenc xmm9,xmm9,xmm10596 vmovdqu xmm10,XMMWORD[((176-128))+rdi]597 xor r13d,ebx598 add r8d,DWORD[44+rsp]599 mov esi,r9d600 ror r14d,11601 xor r12d,edx602 xor esi,r10d603 ror r13d,6604 add r8d,r12d605 and r15d,esi606 xor r14d,r9d607 add r8d,r13d608 xor r15d,r10d609 add eax,r8d610 ror r14d,2611 add r8d,r15d612 mov r13d,eax613 add r14d,r8d614 vmovdqa XMMWORD[32+rsp],xmm6615 vpalignr xmm4,xmm0,xmm3,4616 ror r13d,14617 mov r8d,r14d618 vpalignr xmm7,xmm2,xmm1,4619 mov r12d,ebx620 xor r13d,eax621 DB 143,232,120,194,236,14622 ror r14d,9623 xor r12d,ecx624 vpsrld xmm4,xmm4,3625 ror r13d,5626 xor r14d,r8d627 vpaddd xmm3,xmm3,xmm7628 and r12d,eax629 vpand xmm8,xmm11,xmm12630 vaesenc xmm9,xmm9,xmm10631 vmovdqu xmm10,XMMWORD[((192-128))+rdi]632 xor r13d,eax633 add edx,DWORD[48+rsp]634 mov r15d,r8d635 DB 143,232,120,194,245,11636 ror r14d,11637 xor r12d,ecx638 vpxor xmm4,xmm4,xmm5639 xor r15d,r9d640 ror r13d,6641 add edx,r12d642 and esi,r15d643 DB 143,232,120,194,250,13644 xor r14d,r8d645 add edx,r13d646 vpxor xmm4,xmm4,xmm6647 xor esi,r9d648 add r11d,edx649 vpsrld xmm6,xmm2,10650 ror r14d,2651 add edx,esi652 vpaddd xmm3,xmm3,xmm4653 mov r13d,r11d654 add r14d,edx655 DB 143,232,120,194,239,2656 ror r13d,14657 mov edx,r14d658 vpxor xmm7,xmm7,xmm6659 mov r12d,eax660 xor r13d,r11d661 ror r14d,9662 xor r12d,ebx663 vpxor xmm7,xmm7,xmm5664 ror r13d,5665 xor r14d,edx666 and r12d,r11d667 vaesenclast xmm11,xmm9,xmm10668 vaesenc xmm9,xmm9,xmm10669 vmovdqu xmm10,XMMWORD[((208-128))+rdi]670 xor r13d,r11d671 vpsrldq xmm7,xmm7,8672 add ecx,DWORD[52+rsp]673 mov esi,edx674 ror r14d,11675 xor r12d,ebx676 vpaddd xmm3,xmm3,xmm7677 xor esi,r8d678 ror r13d,6679 add ecx,r12d680 and r15d,esi681 DB 143,232,120,194,251,13682 xor r14d,edx683 add ecx,r13d684 vpsrld xmm6,xmm3,10685 xor r15d,r8d686 add r10d,ecx687 DB 143,232,120,194,239,2688 ror r14d,2689 add ecx,r15d690 vpxor xmm7,xmm7,xmm6691 mov r13d,r10d692 add r14d,ecx693 ror r13d,14694 mov ecx,r14d695 vpxor xmm7,xmm7,xmm5696 mov r12d,r11d697 xor r13d,r10d698 ror r14d,9699 xor r12d,eax700 vpslldq xmm7,xmm7,8701 ror r13d,5702 xor r14d,ecx703 and r12d,r10d704 vpand xmm11,xmm11,xmm13705 vaesenc xmm9,xmm9,xmm10706 vmovdqu xmm10,XMMWORD[((224-128))+rdi]707 xor r13d,r10d708 vpaddd xmm3,xmm3,xmm7709 add ebx,DWORD[56+rsp]710 mov r15d,ecx711 ror r14d,11712 xor r12d,eax713 vpaddd xmm6,xmm3,XMMWORD[96+rbp]714 xor r15d,edx715 ror r13d,6716 add ebx,r12d717 and esi,r15d718 xor r14d,ecx719 add ebx,r13d720 xor esi,edx721 add r9d,ebx722 ror r14d,2723 add ebx,esi724 mov r13d,r9d725 add r14d,ebx726 ror r13d,14727 mov ebx,r14d728 mov r12d,r10d729 xor r13d,r9d730 ror r14d,9731 xor r12d,r11d732 ror r13d,5733 xor r14d,ebx734 and r12d,r9d735 vpor xmm8,xmm8,xmm11736 vaesenclast xmm11,xmm9,xmm10737 vmovdqu xmm10,XMMWORD[((0-128))+rdi]738 xor r13d,r9d739 add eax,DWORD[60+rsp]740 mov esi,ebx741 ror r14d,11742 xor r12d,r11d743 xor esi,ecx744 ror r13d,6745 add eax,r12d746 and r15d,esi747 xor r14d,ebx748 add eax,r13d749 xor r15d,ecx750 add r8d,eax751 ror r14d,2752 add eax,r15d753 mov r13d,r8d754 add r14d,eax755 vmovdqa XMMWORD[48+rsp],xmm6756 mov r12,QWORD[((64+0))+rsp]757 vpand xmm11,xmm11,xmm14758 mov r15,QWORD[((64+8))+rsp]759 vpor xmm8,xmm8,xmm11760 vmovdqu XMMWORD[r12*1+r15],xmm8761 lea r12,[16+r12]762 cmp BYTE[131+rbp],0763 jne NEAR $L$xop_00_47764 vmovdqu xmm9,XMMWORD[r12]765 mov QWORD[((64+0))+rsp],r12766 ror r13d,14767 mov eax,r14d768 mov r12d,r9d769 xor r13d,r8d770 ror r14d,9771 xor r12d,r10d772 ror r13d,5773 xor r14d,eax774 and r12d,r8d775 vpxor xmm9,xmm9,xmm10776 vmovdqu xmm10,XMMWORD[((16-128))+rdi]777 xor r13d,r8d778 add r11d,DWORD[rsp]779 mov r15d,eax780 ror r14d,11781 xor r12d,r10d782 xor r15d,ebx783 ror r13d,6784 add r11d,r12d785 and esi,r15d786 xor r14d,eax787 add r11d,r13d788 xor esi,ebx789 add edx,r11d790 ror r14d,2791 add r11d,esi792 mov r13d,edx793 add r14d,r11d794 ror r13d,14795 mov r11d,r14d796 mov r12d,r8d797 xor r13d,edx798 ror r14d,9799 xor r12d,r9d800 ror r13d,5801 xor r14d,r11d802 and r12d,edx803 vpxor xmm9,xmm9,xmm8804 xor r13d,edx805 add r10d,DWORD[4+rsp]806 mov esi,r11d807 ror r14d,11808 xor r12d,r9d809 xor esi,eax810 ror r13d,6811 add r10d,r12d812 and r15d,esi813 xor r14d,r11d814 add r10d,r13d815 xor r15d,eax816 add ecx,r10d817 ror r14d,2818 add r10d,r15d819 mov r13d,ecx820 add r14d,r10d821 ror r13d,14822 mov r10d,r14d823 mov r12d,edx824 xor r13d,ecx825 ror r14d,9826 xor r12d,r8d827 ror r13d,5828 xor r14d,r10d829 and r12d,ecx830 vaesenc xmm9,xmm9,xmm10831 vmovdqu xmm10,XMMWORD[((32-128))+rdi]832 xor r13d,ecx833 add r9d,DWORD[8+rsp]834 mov r15d,r10d835 ror r14d,11836 xor r12d,r8d837 xor r15d,r11d838 ror r13d,6839 add r9d,r12d840 and esi,r15d841 xor r14d,r10d842 add r9d,r13d843 xor esi,r11d844 add ebx,r9d845 ror r14d,2846 add r9d,esi847 mov r13d,ebx848 add r14d,r9d849 ror r13d,14850 mov r9d,r14d851 mov r12d,ecx852 xor r13d,ebx853 ror r14d,9854 xor r12d,edx855 ror r13d,5856 xor r14d,r9d857 and r12d,ebx858 vaesenc xmm9,xmm9,xmm10859 vmovdqu xmm10,XMMWORD[((48-128))+rdi]860 xor r13d,ebx861 add r8d,DWORD[12+rsp]862 mov esi,r9d863 ror r14d,11864 xor r12d,edx865 xor esi,r10d866 ror r13d,6867 add r8d,r12d868 and r15d,esi869 xor r14d,r9d870 add r8d,r13d871 xor r15d,r10d872 add eax,r8d873 ror r14d,2874 add r8d,r15d875 mov r13d,eax876 add r14d,r8d877 ror r13d,14878 mov r8d,r14d879 mov r12d,ebx880 xor r13d,eax881 ror r14d,9882 xor r12d,ecx883 ror r13d,5884 xor r14d,r8d885 and r12d,eax886 vaesenc xmm9,xmm9,xmm10887 vmovdqu xmm10,XMMWORD[((64-128))+rdi]888 xor r13d,eax889 add edx,DWORD[16+rsp]890 mov r15d,r8d891 ror r14d,11892 xor r12d,ecx893 xor r15d,r9d894 ror r13d,6895 add edx,r12d896 and esi,r15d897 xor r14d,r8d898 add edx,r13d899 xor esi,r9d900 add r11d,edx901 ror r14d,2902 add edx,esi903 mov r13d,r11d904 add r14d,edx905 ror r13d,14906 mov edx,r14d907 mov r12d,eax908 xor r13d,r11d909 ror r14d,9910 xor r12d,ebx911 ror r13d,5912 xor r14d,edx913 and r12d,r11d914 vaesenc xmm9,xmm9,xmm10915 vmovdqu xmm10,XMMWORD[((80-128))+rdi]916 xor r13d,r11d917 add ecx,DWORD[20+rsp]918 mov esi,edx919 ror r14d,11920 xor r12d,ebx921 xor esi,r8d922 ror r13d,6923 add ecx,r12d924 and r15d,esi925 xor r14d,edx926 add ecx,r13d927 xor r15d,r8d928 add r10d,ecx929 ror r14d,2930 add ecx,r15d931 mov r13d,r10d932 add r14d,ecx933 ror r13d,14934 mov ecx,r14d935 mov r12d,r11d936 xor r13d,r10d937 ror r14d,9938 xor r12d,eax939 ror r13d,5940 xor r14d,ecx941 and r12d,r10d942 vaesenc xmm9,xmm9,xmm10943 vmovdqu xmm10,XMMWORD[((96-128))+rdi]944 xor r13d,r10d945 add ebx,DWORD[24+rsp]946 mov r15d,ecx947 ror r14d,11948 xor r12d,eax949 xor r15d,edx950 ror r13d,6951 add ebx,r12d952 and esi,r15d953 xor r14d,ecx954 add ebx,r13d955 xor esi,edx956 add r9d,ebx957 ror r14d,2958 add ebx,esi959 mov r13d,r9d960 add r14d,ebx961 ror r13d,14962 mov ebx,r14d963 mov r12d,r10d964 xor r13d,r9d965 ror r14d,9966 xor r12d,r11d967 ror r13d,5968 xor r14d,ebx969 and r12d,r9d970 vaesenc xmm9,xmm9,xmm10971 vmovdqu xmm10,XMMWORD[((112-128))+rdi]972 xor r13d,r9d973 add eax,DWORD[28+rsp]974 mov esi,ebx975 ror r14d,11976 xor r12d,r11d977 xor esi,ecx978 ror r13d,6979 add eax,r12d980 and r15d,esi981 xor r14d,ebx982 add eax,r13d983 xor r15d,ecx984 add r8d,eax985 ror r14d,2986 add eax,r15d987 mov r13d,r8d988 add r14d,eax989 ror r13d,14990 mov eax,r14d991 mov r12d,r9d992 xor r13d,r8d993 ror r14d,9994 xor r12d,r10d995 ror r13d,5996 xor r14d,eax997 and r12d,r8d998 vaesenc xmm9,xmm9,xmm10999 vmovdqu xmm10,XMMWORD[((128-128))+rdi]1000 xor r13d,r8d1001 add r11d,DWORD[32+rsp]1002 mov r15d,eax1003 ror r14d,111004 xor r12d,r10d1005 xor r15d,ebx1006 ror r13d,61007 add r11d,r12d1008 and esi,r15d1009 xor r14d,eax1010 add r11d,r13d1011 xor esi,ebx1012 add edx,r11d1013 ror r14d,21014 add r11d,esi1015 mov r13d,edx1016 add r14d,r11d1017 ror r13d,141018 mov r11d,r14d1019 mov r12d,r8d1020 xor r13d,edx1021 ror r14d,91022 xor r12d,r9d1023 ror r13d,51024 xor r14d,r11d1025 and r12d,edx1026 vaesenc xmm9,xmm9,xmm101027 vmovdqu xmm10,XMMWORD[((144-128))+rdi]1028 xor r13d,edx1029 add r10d,DWORD[36+rsp]1030 mov esi,r11d1031 ror r14d,111032 xor r12d,r9d1033 xor esi,eax1034 ror r13d,61035 add r10d,r12d1036 and r15d,esi1037 xor r14d,r11d1038 add r10d,r13d1039 xor r15d,eax1040 add ecx,r10d1041 ror r14d,21042 add r10d,r15d1043 mov r13d,ecx1044 add r14d,r10d1045 ror r13d,141046 mov r10d,r14d1047 mov r12d,edx1048 xor r13d,ecx1049 ror r14d,91050 xor r12d,r8d1051 ror r13d,51052 xor r14d,r10d1053 and r12d,ecx1054 vaesenc xmm9,xmm9,xmm101055 vmovdqu xmm10,XMMWORD[((160-128))+rdi]1056 xor r13d,ecx1057 add r9d,DWORD[40+rsp]1058 mov r15d,r10d1059 ror r14d,111060 xor r12d,r8d1061 xor r15d,r11d1062 ror r13d,61063 add r9d,r12d1064 and esi,r15d1065 xor r14d,r10d1066 add r9d,r13d1067 xor esi,r11d1068 add ebx,r9d1069 ror r14d,21070 add r9d,esi1071 mov r13d,ebx1072 add r14d,r9d1073 ror r13d,141074 mov r9d,r14d1075 mov r12d,ecx1076 xor r13d,ebx1077 ror r14d,91078 xor r12d,edx1079 ror r13d,51080 xor r14d,r9d1081 and r12d,ebx1082 vaesenclast xmm11,xmm9,xmm101083 vaesenc xmm9,xmm9,xmm101084 vmovdqu xmm10,XMMWORD[((176-128))+rdi]1085 xor r13d,ebx1086 add r8d,DWORD[44+rsp]1087 mov esi,r9d1088 ror r14d,111089 xor r12d,edx1090 xor esi,r10d1091 ror r13d,61092 add r8d,r12d1093 and r15d,esi1094 xor r14d,r9d1095 add r8d,r13d1096 xor r15d,r10d1097 add eax,r8d1098 ror r14d,21099 add r8d,r15d1100 mov r13d,eax1101 add r14d,r8d1102 ror r13d,141103 mov r8d,r14d1104 mov r12d,ebx1105 xor r13d,eax1106 ror r14d,91107 xor r12d,ecx1108 ror r13d,51109 xor r14d,r8d1110 and r12d,eax1111 vpand xmm8,xmm11,xmm121112 vaesenc xmm9,xmm9,xmm101113 vmovdqu xmm10,XMMWORD[((192-128))+rdi]1114 xor r13d,eax1115 add edx,DWORD[48+rsp]1116 mov r15d,r8d1117 ror r14d,111118 xor r12d,ecx1119 xor r15d,r9d1120 ror r13d,61121 add edx,r12d1122 and esi,r15d1123 xor r14d,r8d1124 add edx,r13d1125 xor esi,r9d1126 add r11d,edx1127 ror r14d,21128 add edx,esi1129 mov r13d,r11d1130 add r14d,edx1131 ror r13d,141132 mov edx,r14d1133 mov r12d,eax1134 xor r13d,r11d1135 ror r14d,91136 xor r12d,ebx1137 ror r13d,51138 xor r14d,edx1139 and r12d,r11d1140 vaesenclast xmm11,xmm9,xmm101141 vaesenc xmm9,xmm9,xmm101142 vmovdqu xmm10,XMMWORD[((208-128))+rdi]1143 xor r13d,r11d1144 add ecx,DWORD[52+rsp]1145 mov esi,edx1146 ror r14d,111147 xor r12d,ebx1148 xor esi,r8d1149 ror r13d,61150 add ecx,r12d1151 and r15d,esi1152 xor r14d,edx1153 add ecx,r13d1154 xor r15d,r8d1155 add r10d,ecx1156 ror r14d,21157 add ecx,r15d1158 mov r13d,r10d1159 add r14d,ecx1160 ror r13d,141161 mov ecx,r14d1162 mov r12d,r11d1163 xor r13d,r10d1164 ror r14d,91165 xor r12d,eax1166 ror r13d,51167 xor r14d,ecx1168 and r12d,r10d1169 vpand xmm11,xmm11,xmm131170 vaesenc xmm9,xmm9,xmm101171 vmovdqu xmm10,XMMWORD[((224-128))+rdi]1172 xor r13d,r10d1173 add ebx,DWORD[56+rsp]1174 mov r15d,ecx1175 ror r14d,111176 xor r12d,eax1177 xor r15d,edx1178 ror r13d,61179 add ebx,r12d1180 and esi,r15d1181 xor r14d,ecx1182 add ebx,r13d1183 xor esi,edx1184 add r9d,ebx1185 ror r14d,21186 add ebx,esi1187 mov r13d,r9d1188 add r14d,ebx1189 ror r13d,141190 mov ebx,r14d1191 mov r12d,r10d1192 xor r13d,r9d1193 ror r14d,91194 xor r12d,r11d1195 ror r13d,51196 xor r14d,ebx1197 and r12d,r9d1198 vpor xmm8,xmm8,xmm111199 vaesenclast xmm11,xmm9,xmm101200 vmovdqu xmm10,XMMWORD[((0-128))+rdi]1201 xor r13d,r9d1202 add eax,DWORD[60+rsp]1203 mov esi,ebx1204 ror r14d,111205 xor r12d,r11d1206 xor esi,ecx1207 ror r13d,61208 add eax,r12d1209 and r15d,esi1210 xor r14d,ebx1211 add eax,r13d1212 xor r15d,ecx1213 add r8d,eax1214 ror r14d,21215 add eax,r15d1216 mov r13d,r8d1217 add r14d,eax1218 mov r12,QWORD[((64+0))+rsp]1219 mov r13,QWORD[((64+8))+rsp]1220 mov r15,QWORD[((64+40))+rsp]1221 mov rsi,QWORD[((64+48))+rsp]1222 1223 vpand xmm11,xmm11,xmm141224 mov eax,r14d1225 vpor xmm8,xmm8,xmm111226 vmovdqu XMMWORD[r13*1+r12],xmm81227 lea r12,[16+r12]1228 1229 add eax,DWORD[r15]1230 add ebx,DWORD[4+r15]1231 add ecx,DWORD[8+r15]1232 add edx,DWORD[12+r15]1233 add r8d,DWORD[16+r15]1234 add r9d,DWORD[20+r15]1235 add r10d,DWORD[24+r15]1236 add r11d,DWORD[28+r15]1237 1238 cmp r12,QWORD[((64+16))+rsp]1239 1240 mov DWORD[r15],eax1241 mov DWORD[4+r15],ebx1242 mov DWORD[8+r15],ecx1243 mov DWORD[12+r15],edx1244 mov DWORD[16+r15],r8d1245 mov DWORD[20+r15],r9d1246 mov DWORD[24+r15],r10d1247 mov DWORD[28+r15],r11d1248 1249 jb NEAR $L$loop_xop1250 1251 mov r8,QWORD[((64+32))+rsp]1252 mov rsi,QWORD[120+rsp]1253 1254 vmovdqu XMMWORD[r8],xmm81255 vzeroall1256 movaps xmm6,XMMWORD[128+rsp]1257 movaps xmm7,XMMWORD[144+rsp]1258 movaps xmm8,XMMWORD[160+rsp]1259 movaps xmm9,XMMWORD[176+rsp]1260 movaps xmm10,XMMWORD[192+rsp]1261 movaps xmm11,XMMWORD[208+rsp]1262 movaps xmm12,XMMWORD[224+rsp]1263 movaps xmm13,XMMWORD[240+rsp]1264 movaps xmm14,XMMWORD[256+rsp]1265 movaps xmm15,XMMWORD[272+rsp]1266 mov r15,QWORD[((-48))+rsi]1267 1268 mov r14,QWORD[((-40))+rsi]1269 1270 mov r13,QWORD[((-32))+rsi]1271 1272 mov r12,QWORD[((-24))+rsi]1273 1274 mov rbp,QWORD[((-16))+rsi]1275 1276 mov rbx,QWORD[((-8))+rsi]1277 1278 lea rsp,[rsi]1279 1280 $L$epilogue_xop:1281 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1282 mov rsi,QWORD[16+rsp]1283 DB 0F3h,0C3h ;repret1284 1285 $L$SEH_end_aesni_cbc_sha256_enc_xop:1286 1287 ALIGN 641288 aesni_cbc_sha256_enc_avx:1289 mov QWORD[8+rsp],rdi ;WIN64 prologue1290 mov QWORD[16+rsp],rsi1291 mov rax,rsp1292 $L$SEH_begin_aesni_cbc_sha256_enc_avx:1293 mov rdi,rcx1294 mov rsi,rdx1295 mov rdx,r81296 mov rcx,r91297 mov r8,QWORD[40+rsp]1298 mov r9,QWORD[48+rsp]1299 1300 1301 1302 $L$avx_shortcut:1303 mov r10,QWORD[56+rsp]1304 mov rax,rsp1305 1306 push rbx1307 1308 push rbp1309 1310 push r121311 1312 push r131313 1314 push r141315 1316 push r151317 1318 sub rsp,2881319 and rsp,-641320 1321 shl rdx,61322 sub rsi,rdi1323 sub r10,rdi1324 add rdx,rdi1325 1326 1327 mov QWORD[((64+8))+rsp],rsi1328 mov QWORD[((64+16))+rsp],rdx1329 1330 mov QWORD[((64+32))+rsp],r81331 mov QWORD[((64+40))+rsp],r91332 mov QWORD[((64+48))+rsp],r101333 mov QWORD[120+rsp],rax1334 1335 movaps XMMWORD[128+rsp],xmm61336 movaps XMMWORD[144+rsp],xmm71337 movaps XMMWORD[160+rsp],xmm81338 movaps XMMWORD[176+rsp],xmm91339 movaps XMMWORD[192+rsp],xmm101340 movaps XMMWORD[208+rsp],xmm111341 movaps XMMWORD[224+rsp],xmm121342 movaps XMMWORD[240+rsp],xmm131343 movaps XMMWORD[256+rsp],xmm141344 movaps XMMWORD[272+rsp],xmm151345 $L$prologue_avx:1346 vzeroall1347 1348 mov r12,rdi1349 lea rdi,[128+rcx]1350 lea r13,[((K256+544))]1351 mov r14d,DWORD[((240-128))+rdi]1352 mov r15,r91353 mov rsi,r101354 vmovdqu xmm8,XMMWORD[r8]1355 sub r14,91356 1357 mov eax,DWORD[r15]1358 mov ebx,DWORD[4+r15]1359 mov ecx,DWORD[8+r15]1360 mov edx,DWORD[12+r15]1361 mov r8d,DWORD[16+r15]1362 mov r9d,DWORD[20+r15]1363 mov r10d,DWORD[24+r15]1364 mov r11d,DWORD[28+r15]1365 1366 vmovdqa xmm14,XMMWORD[r14*8+r13]1367 vmovdqa xmm13,XMMWORD[16+r14*8+r13]1368 vmovdqa xmm12,XMMWORD[32+r14*8+r13]1369 vmovdqu xmm10,XMMWORD[((0-128))+rdi]1370 jmp NEAR $L$loop_avx1371 ALIGN 161372 $L$loop_avx:1373 vmovdqa xmm7,XMMWORD[((K256+512))]1374 vmovdqu xmm0,XMMWORD[r12*1+rsi]1375 vmovdqu xmm1,XMMWORD[16+r12*1+rsi]1376 vmovdqu xmm2,XMMWORD[32+r12*1+rsi]1377 vmovdqu xmm3,XMMWORD[48+r12*1+rsi]1378 vpshufb xmm0,xmm0,xmm71379 lea rbp,[K256]1380 vpshufb xmm1,xmm1,xmm71381 vpshufb xmm2,xmm2,xmm71382 vpaddd xmm4,xmm0,XMMWORD[rbp]1383 vpshufb xmm3,xmm3,xmm71384 vpaddd xmm5,xmm1,XMMWORD[32+rbp]1385 vpaddd xmm6,xmm2,XMMWORD[64+rbp]1386 vpaddd xmm7,xmm3,XMMWORD[96+rbp]1387 vmovdqa XMMWORD[rsp],xmm41388 mov r14d,eax1389 vmovdqa XMMWORD[16+rsp],xmm51390 mov esi,ebx1391 vmovdqa XMMWORD[32+rsp],xmm61392 xor esi,ecx1393 vmovdqa XMMWORD[48+rsp],xmm71394 mov r13d,r8d1395 jmp NEAR $L$avx_00_471396 1397 ALIGN 161398 $L$avx_00_47:1399 sub rbp,-16*2*41400 vmovdqu xmm9,XMMWORD[r12]1401 mov QWORD[((64+0))+rsp],r121402 vpalignr xmm4,xmm1,xmm0,41403 shrd r13d,r13d,141404 mov eax,r14d1405 mov r12d,r9d1406 vpalignr xmm7,xmm3,xmm2,41407 xor r13d,r8d1408 shrd r14d,r14d,91409 xor r12d,r10d1410 vpsrld xmm6,xmm4,71411 shrd r13d,r13d,51412 xor r14d,eax1413 and r12d,r8d1414 vpaddd xmm0,xmm0,xmm71415 vpxor xmm9,xmm9,xmm101416 vmovdqu xmm10,XMMWORD[((16-128))+rdi]1417 xor r13d,r8d1418 add r11d,DWORD[rsp]1419 mov r15d,eax1420 vpsrld xmm7,xmm4,31421 shrd r14d,r14d,111422 xor r12d,r10d1423 xor r15d,ebx1424 vpslld xmm5,xmm4,141425 shrd r13d,r13d,61426 add r11d,r12d1427 and esi,r15d1428 vpxor xmm4,xmm7,xmm61429 xor r14d,eax1430 add r11d,r13d1431 xor esi,ebx1432 vpshufd xmm7,xmm3,2501433 add edx,r11d1434 shrd r14d,r14d,21435 add r11d,esi1436 vpsrld xmm6,xmm6,111437 mov r13d,edx1438 add r14d,r11d1439 shrd r13d,r13d,141440 vpxor xmm4,xmm4,xmm51441 mov r11d,r14d1442 mov r12d,r8d1443 xor r13d,edx1444 vpslld xmm5,xmm5,111445 shrd r14d,r14d,91446 xor r12d,r9d1447 shrd r13d,r13d,51448 vpxor xmm4,xmm4,xmm61449 xor r14d,r11d1450 and r12d,edx1451 vpxor xmm9,xmm9,xmm81452 xor r13d,edx1453 vpsrld xmm6,xmm7,101454 add r10d,DWORD[4+rsp]1455 mov esi,r11d1456 shrd r14d,r14d,111457 vpxor xmm4,xmm4,xmm51458 xor r12d,r9d1459 xor esi,eax1460 shrd r13d,r13d,61461 vpsrlq xmm7,xmm7,171462 add r10d,r12d1463 and r15d,esi1464 xor r14d,r11d1465 vpaddd xmm0,xmm0,xmm41466 add r10d,r13d1467 xor r15d,eax1468 add ecx,r10d1469 vpxor xmm6,xmm6,xmm71470 shrd r14d,r14d,21471 add r10d,r15d1472 mov r13d,ecx1473 vpsrlq xmm7,xmm7,21474 add r14d,r10d1475 shrd r13d,r13d,141476 mov r10d,r14d1477 vpxor xmm6,xmm6,xmm71478 mov r12d,edx1479 xor r13d,ecx1480 shrd r14d,r14d,91481 vpshufd xmm6,xmm6,1321482 xor r12d,r8d1483 shrd r13d,r13d,51484 xor r14d,r10d1485 vpsrldq xmm6,xmm6,81486 and r12d,ecx1487 vaesenc xmm9,xmm9,xmm101488 vmovdqu xmm10,XMMWORD[((32-128))+rdi]1489 xor r13d,ecx1490 add r9d,DWORD[8+rsp]1491 vpaddd xmm0,xmm0,xmm61492 mov r15d,r10d1493 shrd r14d,r14d,111494 xor r12d,r8d1495 vpshufd xmm7,xmm0,801496 xor r15d,r11d1497 shrd r13d,r13d,61498 add r9d,r12d1499 vpsrld xmm6,xmm7,101500 and esi,r15d1501 xor r14d,r10d1502 add r9d,r13d1503 vpsrlq xmm7,xmm7,171504 xor esi,r11d1505 add ebx,r9d1506 shrd r14d,r14d,21507 vpxor xmm6,xmm6,xmm71508 add r9d,esi1509 mov r13d,ebx1510 add r14d,r9d1511 vpsrlq xmm7,xmm7,21512 shrd r13d,r13d,141513 mov r9d,r14d1514 mov r12d,ecx1515 vpxor xmm6,xmm6,xmm71516 xor r13d,ebx1517 shrd r14d,r14d,91518 xor r12d,edx1519 vpshufd xmm6,xmm6,2321520 shrd r13d,r13d,51521 xor r14d,r9d1522 and r12d,ebx1523 vpslldq xmm6,xmm6,81524 vaesenc xmm9,xmm9,xmm101525 vmovdqu xmm10,XMMWORD[((48-128))+rdi]1526 xor r13d,ebx1527 add r8d,DWORD[12+rsp]1528 mov esi,r9d1529 vpaddd xmm0,xmm0,xmm61530 shrd r14d,r14d,111531 xor r12d,edx1532 xor esi,r10d1533 vpaddd xmm6,xmm0,XMMWORD[rbp]1534 shrd r13d,r13d,61535 add r8d,r12d1536 and r15d,esi1537 xor r14d,r9d1538 add r8d,r13d1539 xor r15d,r10d1540 add eax,r8d1541 shrd r14d,r14d,21542 add r8d,r15d1543 mov r13d,eax1544 add r14d,r8d1545 vmovdqa XMMWORD[rsp],xmm61546 vpalignr xmm4,xmm2,xmm1,41547 shrd r13d,r13d,141548 mov r8d,r14d1549 mov r12d,ebx1550 vpalignr xmm7,xmm0,xmm3,41551 xor r13d,eax1552 shrd r14d,r14d,91553 xor r12d,ecx1554 vpsrld xmm6,xmm4,71555 shrd r13d,r13d,51556 xor r14d,r8d1557 and r12d,eax1558 vpaddd xmm1,xmm1,xmm71559 vaesenc xmm9,xmm9,xmm101560 vmovdqu xmm10,XMMWORD[((64-128))+rdi]1561 xor r13d,eax1562 add edx,DWORD[16+rsp]1563 mov r15d,r8d1564 vpsrld xmm7,xmm4,31565 shrd r14d,r14d,111566 xor r12d,ecx1567 xor r15d,r9d1568 vpslld xmm5,xmm4,141569 shrd r13d,r13d,61570 add edx,r12d1571 and esi,r15d1572 vpxor xmm4,xmm7,xmm61573 xor r14d,r8d1574 add edx,r13d1575 xor esi,r9d1576 vpshufd xmm7,xmm0,2501577 add r11d,edx1578 shrd r14d,r14d,21579 add edx,esi1580 vpsrld xmm6,xmm6,111581 mov r13d,r11d1582 add r14d,edx1583 shrd r13d,r13d,141584 vpxor xmm4,xmm4,xmm51585 mov edx,r14d1586 mov r12d,eax1587 xor r13d,r11d1588 vpslld xmm5,xmm5,111589 shrd r14d,r14d,91590 xor r12d,ebx1591 shrd r13d,r13d,51592 vpxor xmm4,xmm4,xmm61593 xor r14d,edx1594 and r12d,r11d1595 vaesenc xmm9,xmm9,xmm101596 vmovdqu xmm10,XMMWORD[((80-128))+rdi]1597 xor r13d,r11d1598 vpsrld xmm6,xmm7,101599 add ecx,DWORD[20+rsp]1600 mov esi,edx1601 shrd r14d,r14d,111602 vpxor xmm4,xmm4,xmm51603 xor r12d,ebx1604 xor esi,r8d1605 shrd r13d,r13d,61606 vpsrlq xmm7,xmm7,171607 add ecx,r12d1608 and r15d,esi1609 xor r14d,edx1610 vpaddd xmm1,xmm1,xmm41611 add ecx,r13d1612 xor r15d,r8d1613 add r10d,ecx1614 vpxor xmm6,xmm6,xmm71615 shrd r14d,r14d,21616 add ecx,r15d1617 mov r13d,r10d1618 vpsrlq xmm7,xmm7,21619 add r14d,ecx1620 shrd r13d,r13d,141621 mov ecx,r14d1622 vpxor xmm6,xmm6,xmm71623 mov r12d,r11d1624 xor r13d,r10d1625 shrd r14d,r14d,91626 vpshufd xmm6,xmm6,1321627 xor r12d,eax1628 shrd r13d,r13d,51629 xor r14d,ecx1630 vpsrldq xmm6,xmm6,81631 and r12d,r10d1632 vaesenc xmm9,xmm9,xmm101633 vmovdqu xmm10,XMMWORD[((96-128))+rdi]1634 xor r13d,r10d1635 add ebx,DWORD[24+rsp]1636 vpaddd xmm1,xmm1,xmm61637 mov r15d,ecx1638 shrd r14d,r14d,111639 xor r12d,eax1640 vpshufd xmm7,xmm1,801641 xor r15d,edx1642 shrd r13d,r13d,61643 add ebx,r12d1644 vpsrld xmm6,xmm7,101645 and esi,r15d1646 xor r14d,ecx1647 add ebx,r13d1648 vpsrlq xmm7,xmm7,171649 xor esi,edx1650 add r9d,ebx1651 shrd r14d,r14d,21652 vpxor xmm6,xmm6,xmm71653 add ebx,esi1654 mov r13d,r9d1655 add r14d,ebx1656 vpsrlq xmm7,xmm7,21657 shrd r13d,r13d,141658 mov ebx,r14d1659 mov r12d,r10d1660 vpxor xmm6,xmm6,xmm71661 xor r13d,r9d1662 shrd r14d,r14d,91663 xor r12d,r11d1664 vpshufd xmm6,xmm6,2321665 shrd r13d,r13d,51666 xor r14d,ebx1667 and r12d,r9d1668 vpslldq xmm6,xmm6,81669 vaesenc xmm9,xmm9,xmm101670 vmovdqu xmm10,XMMWORD[((112-128))+rdi]1671 xor r13d,r9d1672 add eax,DWORD[28+rsp]1673 mov esi,ebx1674 vpaddd xmm1,xmm1,xmm61675 shrd r14d,r14d,111676 xor r12d,r11d1677 xor esi,ecx1678 vpaddd xmm6,xmm1,XMMWORD[32+rbp]1679 shrd r13d,r13d,61680 add eax,r12d1681 and r15d,esi1682 xor r14d,ebx1683 add eax,r13d1684 xor r15d,ecx1685 add r8d,eax1686 shrd r14d,r14d,21687 add eax,r15d1688 mov r13d,r8d1689 add r14d,eax1690 vmovdqa XMMWORD[16+rsp],xmm61691 vpalignr xmm4,xmm3,xmm2,41692 shrd r13d,r13d,141693 mov eax,r14d1694 mov r12d,r9d1695 vpalignr xmm7,xmm1,xmm0,41696 xor r13d,r8d1697 shrd r14d,r14d,91698 xor r12d,r10d1699 vpsrld xmm6,xmm4,71700 shrd r13d,r13d,51701 xor r14d,eax1702 and r12d,r8d1703 vpaddd xmm2,xmm2,xmm71704 vaesenc xmm9,xmm9,xmm101705 vmovdqu xmm10,XMMWORD[((128-128))+rdi]1706 xor r13d,r8d1707 add r11d,DWORD[32+rsp]1708 mov r15d,eax1709 vpsrld xmm7,xmm4,31710 shrd r14d,r14d,111711 xor r12d,r10d1712 xor r15d,ebx1713 vpslld xmm5,xmm4,141714 shrd r13d,r13d,61715 add r11d,r12d1716 and esi,r15d1717 vpxor xmm4,xmm7,xmm61718 xor r14d,eax1719 add r11d,r13d1720 xor esi,ebx1721 vpshufd xmm7,xmm1,2501722 add edx,r11d1723 shrd r14d,r14d,21724 add r11d,esi1725 vpsrld xmm6,xmm6,111726 mov r13d,edx1727 add r14d,r11d1728 shrd r13d,r13d,141729 vpxor xmm4,xmm4,xmm51730 mov r11d,r14d1731 mov r12d,r8d1732 xor r13d,edx1733 vpslld xmm5,xmm5,111734 shrd r14d,r14d,91735 xor r12d,r9d1736 shrd r13d,r13d,51737 vpxor xmm4,xmm4,xmm61738 xor r14d,r11d1739 and r12d,edx1740 vaesenc xmm9,xmm9,xmm101741 vmovdqu xmm10,XMMWORD[((144-128))+rdi]1742 xor r13d,edx1743 vpsrld xmm6,xmm7,101744 add r10d,DWORD[36+rsp]1745 mov esi,r11d1746 shrd r14d,r14d,111747 vpxor xmm4,xmm4,xmm51748 xor r12d,r9d1749 xor esi,eax1750 shrd r13d,r13d,61751 vpsrlq xmm7,xmm7,171752 add r10d,r12d1753 and r15d,esi1754 xor r14d,r11d1755 vpaddd xmm2,xmm2,xmm41756 add r10d,r13d1757 xor r15d,eax1758 add ecx,r10d1759 vpxor xmm6,xmm6,xmm71760 shrd r14d,r14d,21761 add r10d,r15d1762 mov r13d,ecx1763 vpsrlq xmm7,xmm7,21764 add r14d,r10d1765 shrd r13d,r13d,141766 mov r10d,r14d1767 vpxor xmm6,xmm6,xmm71768 mov r12d,edx1769 xor r13d,ecx1770 shrd r14d,r14d,91771 vpshufd xmm6,xmm6,1321772 xor r12d,r8d1773 shrd r13d,r13d,51774 xor r14d,r10d1775 vpsrldq xmm6,xmm6,81776 and r12d,ecx1777 vaesenc xmm9,xmm9,xmm101778 vmovdqu xmm10,XMMWORD[((160-128))+rdi]1779 xor r13d,ecx1780 add r9d,DWORD[40+rsp]1781 vpaddd xmm2,xmm2,xmm61782 mov r15d,r10d1783 shrd r14d,r14d,111784 xor r12d,r8d1785 vpshufd xmm7,xmm2,801786 xor r15d,r11d1787 shrd r13d,r13d,61788 add r9d,r12d1789 vpsrld xmm6,xmm7,101790 and esi,r15d1791 xor r14d,r10d1792 add r9d,r13d1793 vpsrlq xmm7,xmm7,171794 xor esi,r11d1795 add ebx,r9d1796 shrd r14d,r14d,21797 vpxor xmm6,xmm6,xmm71798 add r9d,esi1799 mov r13d,ebx1800 add r14d,r9d1801 vpsrlq xmm7,xmm7,21802 shrd r13d,r13d,141803 mov r9d,r14d1804 mov r12d,ecx1805 vpxor xmm6,xmm6,xmm71806 xor r13d,ebx1807 shrd r14d,r14d,91808 xor r12d,edx1809 vpshufd xmm6,xmm6,2321810 shrd r13d,r13d,51811 xor r14d,r9d1812 and r12d,ebx1813 vpslldq xmm6,xmm6,81814 vaesenclast xmm11,xmm9,xmm101815 vaesenc xmm9,xmm9,xmm101816 vmovdqu xmm10,XMMWORD[((176-128))+rdi]1817 xor r13d,ebx1818 add r8d,DWORD[44+rsp]1819 mov esi,r9d1820 vpaddd xmm2,xmm2,xmm61821 shrd r14d,r14d,111822 xor r12d,edx1823 xor esi,r10d1824 vpaddd xmm6,xmm2,XMMWORD[64+rbp]1825 shrd r13d,r13d,61826 add r8d,r12d1827 and r15d,esi1828 xor r14d,r9d1829 add r8d,r13d1830 xor r15d,r10d1831 add eax,r8d1832 shrd r14d,r14d,21833 add r8d,r15d1834 mov r13d,eax1835 add r14d,r8d1836 vmovdqa XMMWORD[32+rsp],xmm61837 vpalignr xmm4,xmm0,xmm3,41838 shrd r13d,r13d,141839 mov r8d,r14d1840 mov r12d,ebx1841 vpalignr xmm7,xmm2,xmm1,41842 xor r13d,eax1843 shrd r14d,r14d,91844 xor r12d,ecx1845 vpsrld xmm6,xmm4,71846 shrd r13d,r13d,51847 xor r14d,r8d1848 and r12d,eax1849 vpaddd xmm3,xmm3,xmm71850 vpand xmm8,xmm11,xmm121851 vaesenc xmm9,xmm9,xmm101852 vmovdqu xmm10,XMMWORD[((192-128))+rdi]1853 xor r13d,eax1854 add edx,DWORD[48+rsp]1855 mov r15d,r8d1856 vpsrld xmm7,xmm4,31857 shrd r14d,r14d,111858 xor r12d,ecx1859 xor r15d,r9d1860 vpslld xmm5,xmm4,141861 shrd r13d,r13d,61862 add edx,r12d1863 and esi,r15d1864 vpxor xmm4,xmm7,xmm61865 xor r14d,r8d1866 add edx,r13d1867 xor esi,r9d1868 vpshufd xmm7,xmm2,2501869 add r11d,edx1870 shrd r14d,r14d,21871 add edx,esi1872 vpsrld xmm6,xmm6,111873 mov r13d,r11d1874 add r14d,edx1875 shrd r13d,r13d,141876 vpxor xmm4,xmm4,xmm51877 mov edx,r14d1878 mov r12d,eax1879 xor r13d,r11d1880 vpslld xmm5,xmm5,111881 shrd r14d,r14d,91882 xor r12d,ebx1883 shrd r13d,r13d,51884 vpxor xmm4,xmm4,xmm61885 xor r14d,edx1886 and r12d,r11d1887 vaesenclast xmm11,xmm9,xmm101888 vaesenc xmm9,xmm9,xmm101889 vmovdqu xmm10,XMMWORD[((208-128))+rdi]1890 xor r13d,r11d1891 vpsrld xmm6,xmm7,101892 add ecx,DWORD[52+rsp]1893 mov esi,edx1894 shrd r14d,r14d,111895 vpxor xmm4,xmm4,xmm51896 xor r12d,ebx1897 xor esi,r8d1898 shrd r13d,r13d,61899 vpsrlq xmm7,xmm7,171900 add ecx,r12d1901 and r15d,esi1902 xor r14d,edx1903 vpaddd xmm3,xmm3,xmm41904 add ecx,r13d1905 xor r15d,r8d1906 add r10d,ecx1907 vpxor xmm6,xmm6,xmm71908 shrd r14d,r14d,21909 add ecx,r15d1910 mov r13d,r10d1911 vpsrlq xmm7,xmm7,21912 add r14d,ecx1913 shrd r13d,r13d,141914 mov ecx,r14d1915 vpxor xmm6,xmm6,xmm71916 mov r12d,r11d1917 xor r13d,r10d1918 shrd r14d,r14d,91919 vpshufd xmm6,xmm6,1321920 xor r12d,eax1921 shrd r13d,r13d,51922 xor r14d,ecx1923 vpsrldq xmm6,xmm6,81924 and r12d,r10d1925 vpand xmm11,xmm11,xmm131926 vaesenc xmm9,xmm9,xmm101927 vmovdqu xmm10,XMMWORD[((224-128))+rdi]1928 xor r13d,r10d1929 add ebx,DWORD[56+rsp]1930 vpaddd xmm3,xmm3,xmm61931 mov r15d,ecx1932 shrd r14d,r14d,111933 xor r12d,eax1934 vpshufd xmm7,xmm3,801935 xor r15d,edx1936 shrd r13d,r13d,61937 add ebx,r12d1938 vpsrld xmm6,xmm7,101939 and esi,r15d1940 xor r14d,ecx1941 add ebx,r13d1942 vpsrlq xmm7,xmm7,171943 xor esi,edx1944 add r9d,ebx1945 shrd r14d,r14d,21946 vpxor xmm6,xmm6,xmm71947 add ebx,esi1948 mov r13d,r9d1949 add r14d,ebx1950 vpsrlq xmm7,xmm7,21951 shrd r13d,r13d,141952 mov ebx,r14d1953 mov r12d,r10d1954 vpxor xmm6,xmm6,xmm71955 xor r13d,r9d1956 shrd r14d,r14d,91957 xor r12d,r11d1958 vpshufd xmm6,xmm6,2321959 shrd r13d,r13d,51960 xor r14d,ebx1961 and r12d,r9d1962 vpslldq xmm6,xmm6,81963 vpor xmm8,xmm8,xmm111964 vaesenclast xmm11,xmm9,xmm101965 vmovdqu xmm10,XMMWORD[((0-128))+rdi]1966 xor r13d,r9d1967 add eax,DWORD[60+rsp]1968 mov esi,ebx1969 vpaddd xmm3,xmm3,xmm61970 shrd r14d,r14d,111971 xor r12d,r11d1972 xor esi,ecx1973 vpaddd xmm6,xmm3,XMMWORD[96+rbp]1974 shrd r13d,r13d,61975 add eax,r12d1976 and r15d,esi1977 xor r14d,ebx1978 add eax,r13d1979 xor r15d,ecx1980 add r8d,eax1981 shrd r14d,r14d,21982 add eax,r15d1983 mov r13d,r8d1984 add r14d,eax1985 vmovdqa XMMWORD[48+rsp],xmm61986 mov r12,QWORD[((64+0))+rsp]1987 vpand xmm11,xmm11,xmm141988 mov r15,QWORD[((64+8))+rsp]1989 vpor xmm8,xmm8,xmm111990 vmovdqu XMMWORD[r12*1+r15],xmm81991 lea r12,[16+r12]1992 cmp BYTE[131+rbp],01993 jne NEAR $L$avx_00_471994 vmovdqu xmm9,XMMWORD[r12]1995 mov QWORD[((64+0))+rsp],r121996 shrd r13d,r13d,141997 mov eax,r14d1998 mov r12d,r9d1999 xor r13d,r8d2000 shrd r14d,r14d,92001 xor r12d,r10d2002 shrd r13d,r13d,52003 xor r14d,eax2004 and r12d,r8d2005 vpxor xmm9,xmm9,xmm102006 vmovdqu xmm10,XMMWORD[((16-128))+rdi]2007 xor r13d,r8d2008 add r11d,DWORD[rsp]2009 mov r15d,eax2010 shrd r14d,r14d,112011 xor r12d,r10d2012 xor r15d,ebx2013 shrd r13d,r13d,62014 add r11d,r12d2015 and esi,r15d2016 xor r14d,eax2017 add r11d,r13d2018 xor esi,ebx2019 add edx,r11d2020 shrd r14d,r14d,22021 add r11d,esi2022 mov r13d,edx2023 add r14d,r11d2024 shrd r13d,r13d,142025 mov r11d,r14d2026 mov r12d,r8d2027 xor r13d,edx2028 shrd r14d,r14d,92029 xor r12d,r9d2030 shrd r13d,r13d,52031 xor r14d,r11d2032 and r12d,edx2033 vpxor xmm9,xmm9,xmm82034 xor r13d,edx2035 add r10d,DWORD[4+rsp]2036 mov esi,r11d2037 shrd r14d,r14d,112038 xor r12d,r9d2039 xor esi,eax2040 shrd r13d,r13d,62041 add r10d,r12d2042 and r15d,esi2043 xor r14d,r11d2044 add r10d,r13d2045 xor r15d,eax2046 add ecx,r10d2047 shrd r14d,r14d,22048 add r10d,r15d2049 mov r13d,ecx2050 add r14d,r10d2051 shrd r13d,r13d,142052 mov r10d,r14d2053 mov r12d,edx2054 xor r13d,ecx2055 shrd r14d,r14d,92056 xor r12d,r8d2057 shrd r13d,r13d,52058 xor r14d,r10d2059 and r12d,ecx2060 vaesenc xmm9,xmm9,xmm102061 vmovdqu xmm10,XMMWORD[((32-128))+rdi]2062 xor r13d,ecx2063 add r9d,DWORD[8+rsp]2064 mov r15d,r10d2065 shrd r14d,r14d,112066 xor r12d,r8d2067 xor r15d,r11d2068 shrd r13d,r13d,62069 add r9d,r12d2070 and esi,r15d2071 xor r14d,r10d2072 add r9d,r13d2073 xor esi,r11d2074 add ebx,r9d2075 shrd r14d,r14d,22076 add r9d,esi2077 mov r13d,ebx2078 add r14d,r9d2079 shrd r13d,r13d,142080 mov r9d,r14d2081 mov r12d,ecx2082 xor r13d,ebx2083 shrd r14d,r14d,92084 xor r12d,edx2085 shrd r13d,r13d,52086 xor r14d,r9d2087 and r12d,ebx2088 vaesenc xmm9,xmm9,xmm102089 vmovdqu xmm10,XMMWORD[((48-128))+rdi]2090 xor r13d,ebx2091 add r8d,DWORD[12+rsp]2092 mov esi,r9d2093 shrd r14d,r14d,112094 xor r12d,edx2095 xor esi,r10d2096 shrd r13d,r13d,62097 add r8d,r12d2098 and r15d,esi2099 xor r14d,r9d2100 add r8d,r13d2101 xor r15d,r10d2102 add eax,r8d2103 shrd r14d,r14d,22104 add r8d,r15d2105 mov r13d,eax2106 add r14d,r8d2107 shrd r13d,r13d,142108 mov r8d,r14d2109 mov r12d,ebx2110 xor r13d,eax2111 shrd r14d,r14d,92112 xor r12d,ecx2113 shrd r13d,r13d,52114 xor r14d,r8d2115 and r12d,eax2116 vaesenc xmm9,xmm9,xmm102117 vmovdqu xmm10,XMMWORD[((64-128))+rdi]2118 xor r13d,eax2119 add edx,DWORD[16+rsp]2120 mov r15d,r8d2121 shrd r14d,r14d,112122 xor r12d,ecx2123 xor r15d,r9d2124 shrd r13d,r13d,62125 add edx,r12d2126 and esi,r15d2127 xor r14d,r8d2128 add edx,r13d2129 xor esi,r9d2130 add r11d,edx2131 shrd r14d,r14d,22132 add edx,esi2133 mov r13d,r11d2134 add r14d,edx2135 shrd r13d,r13d,142136 mov edx,r14d2137 mov r12d,eax2138 xor r13d,r11d2139 shrd r14d,r14d,92140 xor r12d,ebx2141 shrd r13d,r13d,52142 xor r14d,edx2143 and r12d,r11d2144 vaesenc xmm9,xmm9,xmm102145 vmovdqu xmm10,XMMWORD[((80-128))+rdi]2146 xor r13d,r11d2147 add ecx,DWORD[20+rsp]2148 mov esi,edx2149 shrd r14d,r14d,112150 xor r12d,ebx2151 xor esi,r8d2152 shrd r13d,r13d,62153 add ecx,r12d2154 and r15d,esi2155 xor r14d,edx2156 add ecx,r13d2157 xor r15d,r8d2158 add r10d,ecx2159 shrd r14d,r14d,22160 add ecx,r15d2161 mov r13d,r10d2162 add r14d,ecx2163 shrd r13d,r13d,142164 mov ecx,r14d2165 mov r12d,r11d2166 xor r13d,r10d2167 shrd r14d,r14d,92168 xor r12d,eax2169 shrd r13d,r13d,52170 xor r14d,ecx2171 and r12d,r10d2172 vaesenc xmm9,xmm9,xmm102173 vmovdqu xmm10,XMMWORD[((96-128))+rdi]2174 xor r13d,r10d2175 add ebx,DWORD[24+rsp]2176 mov r15d,ecx2177 shrd r14d,r14d,112178 xor r12d,eax2179 xor r15d,edx2180 shrd r13d,r13d,62181 add ebx,r12d2182 and esi,r15d2183 xor r14d,ecx2184 add ebx,r13d2185 xor esi,edx2186 add r9d,ebx2187 shrd r14d,r14d,22188 add ebx,esi2189 mov r13d,r9d2190 add r14d,ebx2191 shrd r13d,r13d,142192 mov ebx,r14d2193 mov r12d,r10d2194 xor r13d,r9d2195 shrd r14d,r14d,92196 xor r12d,r11d2197 shrd r13d,r13d,52198 xor r14d,ebx2199 and r12d,r9d2200 vaesenc xmm9,xmm9,xmm102201 vmovdqu xmm10,XMMWORD[((112-128))+rdi]2202 xor r13d,r9d2203 add eax,DWORD[28+rsp]2204 mov esi,ebx2205 shrd r14d,r14d,112206 xor r12d,r11d2207 xor esi,ecx2208 shrd r13d,r13d,62209 add eax,r12d2210 and r15d,esi2211 xor r14d,ebx2212 add eax,r13d2213 xor r15d,ecx2214 add r8d,eax2215 shrd r14d,r14d,22216 add eax,r15d2217 mov r13d,r8d2218 add r14d,eax2219 shrd r13d,r13d,142220 mov eax,r14d2221 mov r12d,r9d2222 xor r13d,r8d2223 shrd r14d,r14d,92224 xor r12d,r10d2225 shrd r13d,r13d,52226 xor r14d,eax2227 and r12d,r8d2228 vaesenc xmm9,xmm9,xmm102229 vmovdqu xmm10,XMMWORD[((128-128))+rdi]2230 xor r13d,r8d2231 add r11d,DWORD[32+rsp]2232 mov r15d,eax2233 shrd r14d,r14d,112234 xor r12d,r10d2235 xor r15d,ebx2236 shrd r13d,r13d,62237 add r11d,r12d2238 and esi,r15d2239 xor r14d,eax2240 add r11d,r13d2241 xor esi,ebx2242 add edx,r11d2243 shrd r14d,r14d,22244 add r11d,esi2245 mov r13d,edx2246 add r14d,r11d2247 shrd r13d,r13d,142248 mov r11d,r14d2249 mov r12d,r8d2250 xor r13d,edx2251 shrd r14d,r14d,92252 xor r12d,r9d2253 shrd r13d,r13d,52254 xor r14d,r11d2255 and r12d,edx2256 vaesenc xmm9,xmm9,xmm102257 vmovdqu xmm10,XMMWORD[((144-128))+rdi]2258 xor r13d,edx2259 add r10d,DWORD[36+rsp]2260 mov esi,r11d2261 shrd r14d,r14d,112262 xor r12d,r9d2263 xor esi,eax2264 shrd r13d,r13d,62265 add r10d,r12d2266 and r15d,esi2267 xor r14d,r11d2268 add r10d,r13d2269 xor r15d,eax2270 add ecx,r10d2271 shrd r14d,r14d,22272 add r10d,r15d2273 mov r13d,ecx2274 add r14d,r10d2275 shrd r13d,r13d,142276 mov r10d,r14d2277 mov r12d,edx2278 xor r13d,ecx2279 shrd r14d,r14d,92280 xor r12d,r8d2281 shrd r13d,r13d,52282 xor r14d,r10d2283 and r12d,ecx2284 vaesenc xmm9,xmm9,xmm102285 vmovdqu xmm10,XMMWORD[((160-128))+rdi]2286 xor r13d,ecx2287 add r9d,DWORD[40+rsp]2288 mov r15d,r10d2289 shrd r14d,r14d,112290 xor r12d,r8d2291 xor r15d,r11d2292 shrd r13d,r13d,62293 add r9d,r12d2294 and esi,r15d2295 xor r14d,r10d2296 add r9d,r13d2297 xor esi,r11d2298 add ebx,r9d2299 shrd r14d,r14d,22300 add r9d,esi2301 mov r13d,ebx2302 add r14d,r9d2303 shrd r13d,r13d,142304 mov r9d,r14d2305 mov r12d,ecx2306 xor r13d,ebx2307 shrd r14d,r14d,92308 xor r12d,edx2309 shrd r13d,r13d,52310 xor r14d,r9d2311 and r12d,ebx2312 vaesenclast xmm11,xmm9,xmm102313 vaesenc xmm9,xmm9,xmm102314 vmovdqu xmm10,XMMWORD[((176-128))+rdi]2315 xor r13d,ebx2316 add r8d,DWORD[44+rsp]2317 mov esi,r9d2318 shrd r14d,r14d,112319 xor r12d,edx2320 xor esi,r10d2321 shrd r13d,r13d,62322 add r8d,r12d2323 and r15d,esi2324 xor r14d,r9d2325 add r8d,r13d2326 xor r15d,r10d2327 add eax,r8d2328 shrd r14d,r14d,22329 add r8d,r15d2330 mov r13d,eax2331 add r14d,r8d2332 shrd r13d,r13d,142333 mov r8d,r14d2334 mov r12d,ebx2335 xor r13d,eax2336 shrd r14d,r14d,92337 xor r12d,ecx2338 shrd r13d,r13d,52339 xor r14d,r8d2340 and r12d,eax2341 vpand xmm8,xmm11,xmm122342 vaesenc xmm9,xmm9,xmm102343 vmovdqu xmm10,XMMWORD[((192-128))+rdi]2344 xor r13d,eax2345 add edx,DWORD[48+rsp]2346 mov r15d,r8d2347 shrd r14d,r14d,112348 xor r12d,ecx2349 xor r15d,r9d2350 shrd r13d,r13d,62351 add edx,r12d2352 and esi,r15d2353 xor r14d,r8d2354 add edx,r13d2355 xor esi,r9d2356 add r11d,edx2357 shrd r14d,r14d,22358 add edx,esi2359 mov r13d,r11d2360 add r14d,edx2361 shrd r13d,r13d,142362 mov edx,r14d2363 mov r12d,eax2364 xor r13d,r11d2365 shrd r14d,r14d,92366 xor r12d,ebx2367 shrd r13d,r13d,52368 xor r14d,edx2369 and r12d,r11d2370 vaesenclast xmm11,xmm9,xmm102371 vaesenc xmm9,xmm9,xmm102372 vmovdqu xmm10,XMMWORD[((208-128))+rdi]2373 xor r13d,r11d2374 add ecx,DWORD[52+rsp]2375 mov esi,edx2376 shrd r14d,r14d,112377 xor r12d,ebx2378 xor esi,r8d2379 shrd r13d,r13d,62380 add ecx,r12d2381 and r15d,esi2382 xor r14d,edx2383 add ecx,r13d2384 xor r15d,r8d2385 add r10d,ecx2386 shrd r14d,r14d,22387 add ecx,r15d2388 mov r13d,r10d2389 add r14d,ecx2390 shrd r13d,r13d,142391 mov ecx,r14d2392 mov r12d,r11d2393 xor r13d,r10d2394 shrd r14d,r14d,92395 xor r12d,eax2396 shrd r13d,r13d,52397 xor r14d,ecx2398 and r12d,r10d2399 vpand xmm11,xmm11,xmm132400 vaesenc xmm9,xmm9,xmm102401 vmovdqu xmm10,XMMWORD[((224-128))+rdi]2402 xor r13d,r10d2403 add ebx,DWORD[56+rsp]2404 mov r15d,ecx2405 shrd r14d,r14d,112406 xor r12d,eax2407 xor r15d,edx2408 shrd r13d,r13d,62409 add ebx,r12d2410 and esi,r15d2411 xor r14d,ecx2412 add ebx,r13d2413 xor esi,edx2414 add r9d,ebx2415 shrd r14d,r14d,22416 add ebx,esi2417 mov r13d,r9d2418 add r14d,ebx2419 shrd r13d,r13d,142420 mov ebx,r14d2421 mov r12d,r10d2422 xor r13d,r9d2423 shrd r14d,r14d,92424 xor r12d,r11d2425 shrd r13d,r13d,52426 xor r14d,ebx2427 and r12d,r9d2428 vpor xmm8,xmm8,xmm112429 vaesenclast xmm11,xmm9,xmm102430 vmovdqu xmm10,XMMWORD[((0-128))+rdi]2431 xor r13d,r9d2432 add eax,DWORD[60+rsp]2433 mov esi,ebx2434 shrd r14d,r14d,112435 xor r12d,r11d2436 xor esi,ecx2437 shrd r13d,r13d,62438 add eax,r12d2439 and r15d,esi2440 xor r14d,ebx2441 add eax,r13d2442 xor r15d,ecx2443 add r8d,eax2444 shrd r14d,r14d,22445 add eax,r15d2446 mov r13d,r8d2447 add r14d,eax2448 mov r12,QWORD[((64+0))+rsp]2449 mov r13,QWORD[((64+8))+rsp]2450 mov r15,QWORD[((64+40))+rsp]2451 mov rsi,QWORD[((64+48))+rsp]2452 2453 vpand xmm11,xmm11,xmm142454 mov eax,r14d2455 vpor xmm8,xmm8,xmm112456 vmovdqu XMMWORD[r13*1+r12],xmm82457 lea r12,[16+r12]2458 2459 add eax,DWORD[r15]2460 add ebx,DWORD[4+r15]2461 add ecx,DWORD[8+r15]2462 add edx,DWORD[12+r15]2463 add r8d,DWORD[16+r15]2464 add r9d,DWORD[20+r15]2465 add r10d,DWORD[24+r15]2466 add r11d,DWORD[28+r15]2467 2468 cmp r12,QWORD[((64+16))+rsp]2469 2470 mov DWORD[r15],eax2471 mov DWORD[4+r15],ebx2472 mov DWORD[8+r15],ecx2473 mov DWORD[12+r15],edx2474 mov DWORD[16+r15],r8d2475 mov DWORD[20+r15],r9d2476 mov DWORD[24+r15],r10d2477 mov DWORD[28+r15],r11d2478 jb NEAR $L$loop_avx2479 2480 mov r8,QWORD[((64+32))+rsp]2481 mov rsi,QWORD[120+rsp]2482 2483 vmovdqu XMMWORD[r8],xmm82484 vzeroall2485 movaps xmm6,XMMWORD[128+rsp]2486 movaps xmm7,XMMWORD[144+rsp]2487 movaps xmm8,XMMWORD[160+rsp]2488 movaps xmm9,XMMWORD[176+rsp]2489 movaps xmm10,XMMWORD[192+rsp]2490 movaps xmm11,XMMWORD[208+rsp]2491 movaps xmm12,XMMWORD[224+rsp]2492 movaps xmm13,XMMWORD[240+rsp]2493 movaps xmm14,XMMWORD[256+rsp]2494 movaps xmm15,XMMWORD[272+rsp]2495 mov r15,QWORD[((-48))+rsi]2496 2497 mov r14,QWORD[((-40))+rsi]2498 2499 mov r13,QWORD[((-32))+rsi]2500 2501 mov r12,QWORD[((-24))+rsi]2502 2503 mov rbp,QWORD[((-16))+rsi]2504 2505 mov rbx,QWORD[((-8))+rsi]2506 2507 lea rsp,[rsi]2508 2509 $L$epilogue_avx:2510 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2511 mov rsi,QWORD[16+rsp]2512 DB 0F3h,0C3h ;repret2513 2514 $L$SEH_end_aesni_cbc_sha256_enc_avx:2515 2516 ALIGN 642517 aesni_cbc_sha256_enc_avx2:2518 mov QWORD[8+rsp],rdi ;WIN64 prologue2519 mov QWORD[16+rsp],rsi2520 mov rax,rsp2521 $L$SEH_begin_aesni_cbc_sha256_enc_avx2:2522 mov rdi,rcx2523 mov rsi,rdx2524 mov rdx,r82525 mov rcx,r92526 mov r8,QWORD[40+rsp]2527 mov r9,QWORD[48+rsp]2528 2529 2530 2531 $L$avx2_shortcut:2532 mov r10,QWORD[56+rsp]2533 mov rax,rsp2534 2535 push rbx2536 2537 push rbp2538 2539 push r122540 2541 push r132542 2543 push r142544 2545 push r152546 2547 sub rsp,7362548 and rsp,-256*42549 add rsp,4482550 2551 shl rdx,62552 sub rsi,rdi2553 sub r10,rdi2554 add rdx,rdi2555 2556 2557 2558 mov QWORD[((64+16))+rsp],rdx2559 2560 mov QWORD[((64+32))+rsp],r82561 mov QWORD[((64+40))+rsp],r92562 mov QWORD[((64+48))+rsp],r102563 mov QWORD[120+rsp],rax2564 2565 movaps XMMWORD[128+rsp],xmm62566 movaps XMMWORD[144+rsp],xmm72567 movaps XMMWORD[160+rsp],xmm82568 movaps XMMWORD[176+rsp],xmm92569 movaps XMMWORD[192+rsp],xmm102570 movaps XMMWORD[208+rsp],xmm112571 movaps XMMWORD[224+rsp],xmm122572 movaps XMMWORD[240+rsp],xmm132573 movaps XMMWORD[256+rsp],xmm142574 movaps XMMWORD[272+rsp],xmm152575 $L$prologue_avx2:2576 vzeroall2577 2578 mov r13,rdi2579 vpinsrq xmm15,xmm15,rsi,12580 lea rdi,[128+rcx]2581 lea r12,[((K256+544))]2582 mov r14d,DWORD[((240-128))+rdi]2583 mov r15,r92584 mov rsi,r102585 vmovdqu xmm8,XMMWORD[r8]2586 lea r14,[((-9))+r14]2587 2588 vmovdqa xmm14,XMMWORD[r14*8+r12]2589 vmovdqa xmm13,XMMWORD[16+r14*8+r12]2590 vmovdqa xmm12,XMMWORD[32+r14*8+r12]2591 2592 sub r13,-16*42593 mov eax,DWORD[r15]2594 lea r12,[r13*1+rsi]2595 mov ebx,DWORD[4+r15]2596 cmp r13,rdx2597 mov ecx,DWORD[8+r15]2598 cmove r12,rsp2599 mov edx,DWORD[12+r15]2600 mov r8d,DWORD[16+r15]2601 mov r9d,DWORD[20+r15]2602 mov r10d,DWORD[24+r15]2603 mov r11d,DWORD[28+r15]2604 vmovdqu xmm10,XMMWORD[((0-128))+rdi]2605 jmp NEAR $L$oop_avx22606 ALIGN 162607 $L$oop_avx2:2608 vmovdqa ymm7,YMMWORD[((K256+512))]2609 vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi]2610 vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi]2611 vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi]2612 vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi]2613 2614 vinserti128 ymm0,ymm0,XMMWORD[r12],12615 vinserti128 ymm1,ymm1,XMMWORD[16+r12],12616 vpshufb ymm0,ymm0,ymm72617 vinserti128 ymm2,ymm2,XMMWORD[32+r12],12618 vpshufb ymm1,ymm1,ymm72619 vinserti128 ymm3,ymm3,XMMWORD[48+r12],12620 2621 lea rbp,[K256]2622 vpshufb ymm2,ymm2,ymm72623 lea r13,[((-64))+r13]2624 vpaddd ymm4,ymm0,YMMWORD[rbp]2625 vpshufb ymm3,ymm3,ymm72626 vpaddd ymm5,ymm1,YMMWORD[32+rbp]2627 vpaddd ymm6,ymm2,YMMWORD[64+rbp]2628 vpaddd ymm7,ymm3,YMMWORD[96+rbp]2629 vmovdqa YMMWORD[rsp],ymm42630 xor r14d,r14d2631 vmovdqa YMMWORD[32+rsp],ymm52632 lea rsp,[((-64))+rsp]2633 mov esi,ebx2634 vmovdqa YMMWORD[rsp],ymm62635 xor esi,ecx2636 vmovdqa YMMWORD[32+rsp],ymm72637 mov r12d,r9d2638 sub rbp,-16*2*42639 jmp NEAR $L$avx2_00_472640 2641 ALIGN 162642 $L$avx2_00_47:2643 vmovdqu xmm9,XMMWORD[r13]2644 vpinsrq xmm15,xmm15,r13,02645 lea rsp,[((-64))+rsp]2646 vpalignr ymm4,ymm1,ymm0,42647 add r11d,DWORD[((0+128))+rsp]2648 and r12d,r8d2649 rorx r13d,r8d,252650 vpalignr ymm7,ymm3,ymm2,42651 rorx r15d,r8d,112652 lea eax,[r14*1+rax]2653 lea r11d,[r12*1+r11]2654 vpsrld ymm6,ymm4,72655 andn r12d,r8d,r10d2656 xor r13d,r15d2657 rorx r14d,r8d,62658 vpaddd ymm0,ymm0,ymm72659 lea r11d,[r12*1+r11]2660 xor r13d,r14d2661 mov r15d,eax2662 vpsrld ymm7,ymm4,32663 rorx r12d,eax,222664 lea r11d,[r13*1+r11]2665 xor r15d,ebx2666 vpslld ymm5,ymm4,142667 rorx r14d,eax,132668 rorx r13d,eax,22669 lea edx,[r11*1+rdx]2670 vpxor ymm4,ymm7,ymm62671 and esi,r15d2672 vpxor xmm9,xmm9,xmm102673 vmovdqu xmm10,XMMWORD[((16-128))+rdi]2674 xor r14d,r12d2675 xor esi,ebx2676 vpshufd ymm7,ymm3,2502677 xor r14d,r13d2678 lea r11d,[rsi*1+r11]2679 mov r12d,r8d2680 vpsrld ymm6,ymm6,112681 add r10d,DWORD[((4+128))+rsp]2682 and r12d,edx2683 rorx r13d,edx,252684 vpxor ymm4,ymm4,ymm52685 rorx esi,edx,112686 lea r11d,[r14*1+r11]2687 lea r10d,[r12*1+r10]2688 vpslld ymm5,ymm5,112689 andn r12d,edx,r9d2690 xor r13d,esi2691 rorx r14d,edx,62692 vpxor ymm4,ymm4,ymm62693 lea r10d,[r12*1+r10]2694 xor r13d,r14d2695 mov esi,r11d2696 vpsrld ymm6,ymm7,102697 rorx r12d,r11d,222698 lea r10d,[r13*1+r10]2699 xor esi,eax2700 vpxor ymm4,ymm4,ymm52701 rorx r14d,r11d,132702 rorx r13d,r11d,22703 lea ecx,[r10*1+rcx]2704 vpsrlq ymm7,ymm7,172705 and r15d,esi2706 vpxor xmm9,xmm9,xmm82707 xor r14d,r12d2708 xor r15d,eax2709 vpaddd ymm0,ymm0,ymm42710 xor r14d,r13d2711 lea r10d,[r15*1+r10]2712 mov r12d,edx2713 vpxor ymm6,ymm6,ymm72714 add r9d,DWORD[((8+128))+rsp]2715 and r12d,ecx2716 rorx r13d,ecx,252717 vpsrlq ymm7,ymm7,22718 rorx r15d,ecx,112719 lea r10d,[r14*1+r10]2720 lea r9d,[r12*1+r9]2721 vpxor ymm6,ymm6,ymm72722 andn r12d,ecx,r8d2723 xor r13d,r15d2724 rorx r14d,ecx,62725 vpshufd ymm6,ymm6,1322726 lea r9d,[r12*1+r9]2727 xor r13d,r14d2728 mov r15d,r10d2729 vpsrldq ymm6,ymm6,82730 rorx r12d,r10d,222731 lea r9d,[r13*1+r9]2732 xor r15d,r11d2733 vpaddd ymm0,ymm0,ymm62734 rorx r14d,r10d,132735 rorx r13d,r10d,22736 lea ebx,[r9*1+rbx]2737 vpshufd ymm7,ymm0,802738 and esi,r15d2739 vaesenc xmm9,xmm9,xmm102740 vmovdqu xmm10,XMMWORD[((32-128))+rdi]2741 xor r14d,r12d2742 xor esi,r11d2743 vpsrld ymm6,ymm7,102744 xor r14d,r13d2745 lea r9d,[rsi*1+r9]2746 mov r12d,ecx2747 vpsrlq ymm7,ymm7,172748 add r8d,DWORD[((12+128))+rsp]2749 and r12d,ebx2750 rorx r13d,ebx,252751 vpxor ymm6,ymm6,ymm72752 rorx esi,ebx,112753 lea r9d,[r14*1+r9]2754 lea r8d,[r12*1+r8]2755 vpsrlq ymm7,ymm7,22756 andn r12d,ebx,edx2757 xor r13d,esi2758 rorx r14d,ebx,62759 vpxor ymm6,ymm6,ymm72760 lea r8d,[r12*1+r8]2761 xor r13d,r14d2762 mov esi,r9d2763 vpshufd ymm6,ymm6,2322764 rorx r12d,r9d,222765 lea r8d,[r13*1+r8]2766 xor esi,r10d2767 vpslldq ymm6,ymm6,82768 rorx r14d,r9d,132769 rorx r13d,r9d,22770 lea eax,[r8*1+rax]2771 vpaddd ymm0,ymm0,ymm62772 and r15d,esi2773 vaesenc xmm9,xmm9,xmm102774 vmovdqu xmm10,XMMWORD[((48-128))+rdi]2775 xor r14d,r12d2776 xor r15d,r10d2777 vpaddd ymm6,ymm0,YMMWORD[rbp]2778 xor r14d,r13d2779 lea r8d,[r15*1+r8]2780 mov r12d,ebx2781 vmovdqa YMMWORD[rsp],ymm62782 vpalignr ymm4,ymm2,ymm1,42783 add edx,DWORD[((32+128))+rsp]2784 and r12d,eax2785 rorx r13d,eax,252786 vpalignr ymm7,ymm0,ymm3,42787 rorx r15d,eax,112788 lea r8d,[r14*1+r8]2789 lea edx,[r12*1+rdx]2790 vpsrld ymm6,ymm4,72791 andn r12d,eax,ecx2792 xor r13d,r15d2793 rorx r14d,eax,62794 vpaddd ymm1,ymm1,ymm72795 lea edx,[r12*1+rdx]2796 xor r13d,r14d2797 mov r15d,r8d2798 vpsrld ymm7,ymm4,32799 rorx r12d,r8d,222800 lea edx,[r13*1+rdx]2801 xor r15d,r9d2802 vpslld ymm5,ymm4,142803 rorx r14d,r8d,132804 rorx r13d,r8d,22805 lea r11d,[rdx*1+r11]2806 vpxor ymm4,ymm7,ymm62807 and esi,r15d2808 vaesenc xmm9,xmm9,xmm102809 vmovdqu xmm10,XMMWORD[((64-128))+rdi]2810 xor r14d,r12d2811 xor esi,r9d2812 vpshufd ymm7,ymm0,2502813 xor r14d,r13d2814 lea edx,[rsi*1+rdx]2815 mov r12d,eax2816 vpsrld ymm6,ymm6,112817 add ecx,DWORD[((36+128))+rsp]2818 and r12d,r11d2819 rorx r13d,r11d,252820 vpxor ymm4,ymm4,ymm52821 rorx esi,r11d,112822 lea edx,[r14*1+rdx]2823 lea ecx,[r12*1+rcx]2824 vpslld ymm5,ymm5,112825 andn r12d,r11d,ebx2826 xor r13d,esi2827 rorx r14d,r11d,62828 vpxor ymm4,ymm4,ymm62829 lea ecx,[r12*1+rcx]2830 xor r13d,r14d2831 mov esi,edx2832 vpsrld ymm6,ymm7,102833 rorx r12d,edx,222834 lea ecx,[r13*1+rcx]2835 xor esi,r8d2836 vpxor ymm4,ymm4,ymm52837 rorx r14d,edx,132838 rorx r13d,edx,22839 lea r10d,[rcx*1+r10]2840 vpsrlq ymm7,ymm7,172841 and r15d,esi2842 vaesenc xmm9,xmm9,xmm102843 vmovdqu xmm10,XMMWORD[((80-128))+rdi]2844 xor r14d,r12d2845 xor r15d,r8d2846 vpaddd ymm1,ymm1,ymm42847 xor r14d,r13d2848 lea ecx,[r15*1+rcx]2849 mov r12d,r11d2850 vpxor ymm6,ymm6,ymm72851 add ebx,DWORD[((40+128))+rsp]2852 and r12d,r10d2853 rorx r13d,r10d,252854 vpsrlq ymm7,ymm7,22855 rorx r15d,r10d,112856 lea ecx,[r14*1+rcx]2857 lea ebx,[r12*1+rbx]2858 vpxor ymm6,ymm6,ymm72859 andn r12d,r10d,eax2860 xor r13d,r15d2861 rorx r14d,r10d,62862 vpshufd ymm6,ymm6,1322863 lea ebx,[r12*1+rbx]2864 xor r13d,r14d2865 mov r15d,ecx2866 vpsrldq ymm6,ymm6,82867 rorx r12d,ecx,222868 lea ebx,[r13*1+rbx]2869 xor r15d,edx2870 vpaddd ymm1,ymm1,ymm62871 rorx r14d,ecx,132872 rorx r13d,ecx,22873 lea r9d,[rbx*1+r9]2874 vpshufd ymm7,ymm1,802875 and esi,r15d2876 vaesenc xmm9,xmm9,xmm102877 vmovdqu xmm10,XMMWORD[((96-128))+rdi]2878 xor r14d,r12d2879 xor esi,edx2880 vpsrld ymm6,ymm7,102881 xor r14d,r13d2882 lea ebx,[rsi*1+rbx]2883 mov r12d,r10d2884 vpsrlq ymm7,ymm7,172885 add eax,DWORD[((44+128))+rsp]2886 and r12d,r9d2887 rorx r13d,r9d,252888 vpxor ymm6,ymm6,ymm72889 rorx esi,r9d,112890 lea ebx,[r14*1+rbx]2891 lea eax,[r12*1+rax]2892 vpsrlq ymm7,ymm7,22893 andn r12d,r9d,r11d2894 xor r13d,esi2895 rorx r14d,r9d,62896 vpxor ymm6,ymm6,ymm72897 lea eax,[r12*1+rax]2898 xor r13d,r14d2899 mov esi,ebx2900 vpshufd ymm6,ymm6,2322901 rorx r12d,ebx,222902 lea eax,[r13*1+rax]2903 xor esi,ecx2904 vpslldq ymm6,ymm6,82905 rorx r14d,ebx,132906 rorx r13d,ebx,22907 lea r8d,[rax*1+r8]2908 vpaddd ymm1,ymm1,ymm62909 and r15d,esi2910 vaesenc xmm9,xmm9,xmm102911 vmovdqu xmm10,XMMWORD[((112-128))+rdi]2912 xor r14d,r12d2913 xor r15d,ecx2914 vpaddd ymm6,ymm1,YMMWORD[32+rbp]2915 xor r14d,r13d2916 lea eax,[r15*1+rax]2917 mov r12d,r9d2918 vmovdqa YMMWORD[32+rsp],ymm62919 lea rsp,[((-64))+rsp]2920 vpalignr ymm4,ymm3,ymm2,42921 add r11d,DWORD[((0+128))+rsp]2922 and r12d,r8d2923 rorx r13d,r8d,252924 vpalignr ymm7,ymm1,ymm0,42925 rorx r15d,r8d,112926 lea eax,[r14*1+rax]2927 lea r11d,[r12*1+r11]2928 vpsrld ymm6,ymm4,72929 andn r12d,r8d,r10d2930 xor r13d,r15d2931 rorx r14d,r8d,62932 vpaddd ymm2,ymm2,ymm72933 lea r11d,[r12*1+r11]2934 xor r13d,r14d2935 mov r15d,eax2936 vpsrld ymm7,ymm4,32937 rorx r12d,eax,222938 lea r11d,[r13*1+r11]2939 xor r15d,ebx2940 vpslld ymm5,ymm4,142941 rorx r14d,eax,132942 rorx r13d,eax,22943 lea edx,[r11*1+rdx]2944 vpxor ymm4,ymm7,ymm62945 and esi,r15d2946 vaesenc xmm9,xmm9,xmm102947 vmovdqu xmm10,XMMWORD[((128-128))+rdi]2948 xor r14d,r12d2949 xor esi,ebx2950 vpshufd ymm7,ymm1,2502951 xor r14d,r13d2952 lea r11d,[rsi*1+r11]2953 mov r12d,r8d2954 vpsrld ymm6,ymm6,112955 add r10d,DWORD[((4+128))+rsp]2956 and r12d,edx2957 rorx r13d,edx,252958 vpxor ymm4,ymm4,ymm52959 rorx esi,edx,112960 lea r11d,[r14*1+r11]2961 lea r10d,[r12*1+r10]2962 vpslld ymm5,ymm5,112963 andn r12d,edx,r9d2964 xor r13d,esi2965 rorx r14d,edx,62966 vpxor ymm4,ymm4,ymm62967 lea r10d,[r12*1+r10]2968 xor r13d,r14d2969 mov esi,r11d2970 vpsrld ymm6,ymm7,102971 rorx r12d,r11d,222972 lea r10d,[r13*1+r10]2973 xor esi,eax2974 vpxor ymm4,ymm4,ymm52975 rorx r14d,r11d,132976 rorx r13d,r11d,22977 lea ecx,[r10*1+rcx]2978 vpsrlq ymm7,ymm7,172979 and r15d,esi2980 vaesenc xmm9,xmm9,xmm102981 vmovdqu xmm10,XMMWORD[((144-128))+rdi]2982 xor r14d,r12d2983 xor r15d,eax2984 vpaddd ymm2,ymm2,ymm42985 xor r14d,r13d2986 lea r10d,[r15*1+r10]2987 mov r12d,edx2988 vpxor ymm6,ymm6,ymm72989 add r9d,DWORD[((8+128))+rsp]2990 and r12d,ecx2991 rorx r13d,ecx,252992 vpsrlq ymm7,ymm7,22993 rorx r15d,ecx,112994 lea r10d,[r14*1+r10]2995 lea r9d,[r12*1+r9]2996 vpxor ymm6,ymm6,ymm72997 andn r12d,ecx,r8d2998 xor r13d,r15d2999 rorx r14d,ecx,63000 vpshufd ymm6,ymm6,1323001 lea r9d,[r12*1+r9]3002 xor r13d,r14d3003 mov r15d,r10d3004 vpsrldq ymm6,ymm6,83005 rorx r12d,r10d,223006 lea r9d,[r13*1+r9]3007 xor r15d,r11d3008 vpaddd ymm2,ymm2,ymm63009 rorx r14d,r10d,133010 rorx r13d,r10d,23011 lea ebx,[r9*1+rbx]3012 vpshufd ymm7,ymm2,803013 and esi,r15d3014 vaesenc xmm9,xmm9,xmm103015 vmovdqu xmm10,XMMWORD[((160-128))+rdi]3016 xor r14d,r12d3017 xor esi,r11d3018 vpsrld ymm6,ymm7,103019 xor r14d,r13d3020 lea r9d,[rsi*1+r9]3021 mov r12d,ecx3022 vpsrlq ymm7,ymm7,173023 add r8d,DWORD[((12+128))+rsp]3024 and r12d,ebx3025 rorx r13d,ebx,253026 vpxor ymm6,ymm6,ymm73027 rorx esi,ebx,113028 lea r9d,[r14*1+r9]3029 lea r8d,[r12*1+r8]3030 vpsrlq ymm7,ymm7,23031 andn r12d,ebx,edx3032 xor r13d,esi3033 rorx r14d,ebx,63034 vpxor ymm6,ymm6,ymm73035 lea r8d,[r12*1+r8]3036 xor r13d,r14d3037 mov esi,r9d3038 vpshufd ymm6,ymm6,2323039 rorx r12d,r9d,223040 lea r8d,[r13*1+r8]3041 xor esi,r10d3042 vpslldq ymm6,ymm6,83043 rorx r14d,r9d,133044 rorx r13d,r9d,23045 lea eax,[r8*1+rax]3046 vpaddd ymm2,ymm2,ymm63047 and r15d,esi3048 vaesenclast xmm11,xmm9,xmm103049 vaesenc xmm9,xmm9,xmm103050 vmovdqu xmm10,XMMWORD[((176-128))+rdi]3051 xor r14d,r12d3052 xor r15d,r10d3053 vpaddd ymm6,ymm2,YMMWORD[64+rbp]3054 xor r14d,r13d3055 lea r8d,[r15*1+r8]3056 mov r12d,ebx3057 vmovdqa YMMWORD[rsp],ymm63058 vpalignr ymm4,ymm0,ymm3,43059 add edx,DWORD[((32+128))+rsp]3060 and r12d,eax3061 rorx r13d,eax,253062 vpalignr ymm7,ymm2,ymm1,43063 rorx r15d,eax,113064 lea r8d,[r14*1+r8]3065 lea edx,[r12*1+rdx]3066 vpsrld ymm6,ymm4,73067 andn r12d,eax,ecx3068 xor r13d,r15d3069 rorx r14d,eax,63070 vpaddd ymm3,ymm3,ymm73071 lea edx,[r12*1+rdx]3072 xor r13d,r14d3073 mov r15d,r8d3074 vpsrld ymm7,ymm4,33075 rorx r12d,r8d,223076 lea edx,[r13*1+rdx]3077 xor r15d,r9d3078 vpslld ymm5,ymm4,143079 rorx r14d,r8d,133080 rorx r13d,r8d,23081 lea r11d,[rdx*1+r11]3082 vpxor ymm4,ymm7,ymm63083 and esi,r15d3084 vpand xmm8,xmm11,xmm123085 vaesenc xmm9,xmm9,xmm103086 vmovdqu xmm10,XMMWORD[((192-128))+rdi]3087 xor r14d,r12d3088 xor esi,r9d3089 vpshufd ymm7,ymm2,2503090 xor r14d,r13d3091 lea edx,[rsi*1+rdx]3092 mov r12d,eax3093 vpsrld ymm6,ymm6,113094 add ecx,DWORD[((36+128))+rsp]3095 and r12d,r11d3096 rorx r13d,r11d,253097 vpxor ymm4,ymm4,ymm53098 rorx esi,r11d,113099 lea edx,[r14*1+rdx]3100 lea ecx,[r12*1+rcx]3101 vpslld ymm5,ymm5,113102 andn r12d,r11d,ebx3103 xor r13d,esi3104 rorx r14d,r11d,63105 vpxor ymm4,ymm4,ymm63106 lea ecx,[r12*1+rcx]3107 xor r13d,r14d3108 mov esi,edx3109 vpsrld ymm6,ymm7,103110 rorx r12d,edx,223111 lea ecx,[r13*1+rcx]3112 xor esi,r8d3113 vpxor ymm4,ymm4,ymm53114 rorx r14d,edx,133115 rorx r13d,edx,23116 lea r10d,[rcx*1+r10]3117 vpsrlq ymm7,ymm7,173118 and r15d,esi3119 vaesenclast xmm11,xmm9,xmm103120 vaesenc xmm9,xmm9,xmm103121 vmovdqu xmm10,XMMWORD[((208-128))+rdi]3122 xor r14d,r12d3123 xor r15d,r8d3124 vpaddd ymm3,ymm3,ymm43125 xor r14d,r13d3126 lea ecx,[r15*1+rcx]3127 mov r12d,r11d3128 vpxor ymm6,ymm6,ymm73129 add ebx,DWORD[((40+128))+rsp]3130 and r12d,r10d3131 rorx r13d,r10d,253132 vpsrlq ymm7,ymm7,23133 rorx r15d,r10d,113134 lea ecx,[r14*1+rcx]3135 lea ebx,[r12*1+rbx]3136 vpxor ymm6,ymm6,ymm73137 andn r12d,r10d,eax3138 xor r13d,r15d3139 rorx r14d,r10d,63140 vpshufd ymm6,ymm6,1323141 lea ebx,[r12*1+rbx]3142 xor r13d,r14d3143 mov r15d,ecx3144 vpsrldq ymm6,ymm6,83145 rorx r12d,ecx,223146 lea ebx,[r13*1+rbx]3147 xor r15d,edx3148 vpaddd ymm3,ymm3,ymm63149 rorx r14d,ecx,133150 rorx r13d,ecx,23151 lea r9d,[rbx*1+r9]3152 vpshufd ymm7,ymm3,803153 and esi,r15d3154 vpand xmm11,xmm11,xmm133155 vaesenc xmm9,xmm9,xmm103156 vmovdqu xmm10,XMMWORD[((224-128))+rdi]3157 xor r14d,r12d3158 xor esi,edx3159 vpsrld ymm6,ymm7,103160 xor r14d,r13d3161 lea ebx,[rsi*1+rbx]3162 mov r12d,r10d3163 vpsrlq ymm7,ymm7,173164 add eax,DWORD[((44+128))+rsp]3165 and r12d,r9d3166 rorx r13d,r9d,253167 vpxor ymm6,ymm6,ymm73168 rorx esi,r9d,113169 lea ebx,[r14*1+rbx]3170 lea eax,[r12*1+rax]3171 vpsrlq ymm7,ymm7,23172 andn r12d,r9d,r11d3173 xor r13d,esi3174 rorx r14d,r9d,63175 vpxor ymm6,ymm6,ymm73176 lea eax,[r12*1+rax]3177 xor r13d,r14d3178 mov esi,ebx3179 vpshufd ymm6,ymm6,2323180 rorx r12d,ebx,223181 lea eax,[r13*1+rax]3182 xor esi,ecx3183 vpslldq ymm6,ymm6,83184 rorx r14d,ebx,133185 rorx r13d,ebx,23186 lea r8d,[rax*1+r8]3187 vpaddd ymm3,ymm3,ymm63188 and r15d,esi3189 vpor xmm8,xmm8,xmm113190 vaesenclast xmm11,xmm9,xmm103191 vmovdqu xmm10,XMMWORD[((0-128))+rdi]3192 xor r14d,r12d3193 xor r15d,ecx3194 vpaddd ymm6,ymm3,YMMWORD[96+rbp]3195 xor r14d,r13d3196 lea eax,[r15*1+rax]3197 mov r12d,r9d3198 vmovdqa YMMWORD[32+rsp],ymm63199 vmovq r13,xmm153200 vpextrq r15,xmm15,13201 vpand xmm11,xmm11,xmm143202 vpor xmm8,xmm8,xmm113203 vmovdqu XMMWORD[r13*1+r15],xmm83204 lea r13,[16+r13]3205 lea rbp,[128+rbp]3206 cmp BYTE[3+rbp],03207 jne NEAR $L$avx2_00_473208 vmovdqu xmm9,XMMWORD[r13]3209 vpinsrq xmm15,xmm15,r13,03210 add r11d,DWORD[((0+64))+rsp]3211 and r12d,r8d3212 rorx r13d,r8d,253213 rorx r15d,r8d,113214 lea eax,[r14*1+rax]3215 lea r11d,[r12*1+r11]3216 andn r12d,r8d,r10d3217 xor r13d,r15d3218 rorx r14d,r8d,63219 lea r11d,[r12*1+r11]3220 xor r13d,r14d3221 mov r15d,eax3222 rorx r12d,eax,223223 lea r11d,[r13*1+r11]3224 xor r15d,ebx3225 rorx r14d,eax,133226 rorx r13d,eax,23227 lea edx,[r11*1+rdx]3228 and esi,r15d3229 vpxor xmm9,xmm9,xmm103230 vmovdqu xmm10,XMMWORD[((16-128))+rdi]3231 xor r14d,r12d3232 xor esi,ebx3233 xor r14d,r13d3234 lea r11d,[rsi*1+r11]3235 mov r12d,r8d3236 add r10d,DWORD[((4+64))+rsp]3237 and r12d,edx3238 rorx r13d,edx,253239 rorx esi,edx,113240 lea r11d,[r14*1+r11]3241 lea r10d,[r12*1+r10]3242 andn r12d,edx,r9d3243 xor r13d,esi3244 rorx r14d,edx,63245 lea r10d,[r12*1+r10]3246 xor r13d,r14d3247 mov esi,r11d3248 rorx r12d,r11d,223249 lea r10d,[r13*1+r10]3250 xor esi,eax3251 rorx r14d,r11d,133252 rorx r13d,r11d,23253 lea ecx,[r10*1+rcx]3254 and r15d,esi3255 vpxor xmm9,xmm9,xmm83256 xor r14d,r12d3257 xor r15d,eax3258 xor r14d,r13d3259 lea r10d,[r15*1+r10]3260 mov r12d,edx3261 add r9d,DWORD[((8+64))+rsp]3262 and r12d,ecx3263 rorx r13d,ecx,253264 rorx r15d,ecx,113265 lea r10d,[r14*1+r10]3266 lea r9d,[r12*1+r9]3267 andn r12d,ecx,r8d3268 xor r13d,r15d3269 rorx r14d,ecx,63270 lea r9d,[r12*1+r9]3271 xor r13d,r14d3272 mov r15d,r10d3273 rorx r12d,r10d,223274 lea r9d,[r13*1+r9]3275 xor r15d,r11d3276 rorx r14d,r10d,133277 rorx r13d,r10d,23278 lea ebx,[r9*1+rbx]3279 and esi,r15d3280 vaesenc xmm9,xmm9,xmm103281 vmovdqu xmm10,XMMWORD[((32-128))+rdi]3282 xor r14d,r12d3283 xor esi,r11d3284 xor r14d,r13d3285 lea r9d,[rsi*1+r9]3286 mov r12d,ecx3287 add r8d,DWORD[((12+64))+rsp]3288 and r12d,ebx3289 rorx r13d,ebx,253290 rorx esi,ebx,113291 lea r9d,[r14*1+r9]3292 lea r8d,[r12*1+r8]3293 andn r12d,ebx,edx3294 xor r13d,esi3295 rorx r14d,ebx,63296 lea r8d,[r12*1+r8]3297 xor r13d,r14d3298 mov esi,r9d3299 rorx r12d,r9d,223300 lea r8d,[r13*1+r8]3301 xor esi,r10d3302 rorx r14d,r9d,133303 rorx r13d,r9d,23304 lea eax,[r8*1+rax]3305 and r15d,esi3306 vaesenc xmm9,xmm9,xmm103307 vmovdqu xmm10,XMMWORD[((48-128))+rdi]3308 xor r14d,r12d3309 xor r15d,r10d3310 xor r14d,r13d3311 lea r8d,[r15*1+r8]3312 mov r12d,ebx3313 add edx,DWORD[((32+64))+rsp]3314 and r12d,eax3315 rorx r13d,eax,253316 rorx r15d,eax,113317 lea r8d,[r14*1+r8]3318 lea edx,[r12*1+rdx]3319 andn r12d,eax,ecx3320 xor r13d,r15d3321 rorx r14d,eax,63322 lea edx,[r12*1+rdx]3323 xor r13d,r14d3324 mov r15d,r8d3325 rorx r12d,r8d,223326 lea edx,[r13*1+rdx]3327 xor r15d,r9d3328 rorx r14d,r8d,133329 rorx r13d,r8d,23330 lea r11d,[rdx*1+r11]3331 and esi,r15d3332 vaesenc xmm9,xmm9,xmm103333 vmovdqu xmm10,XMMWORD[((64-128))+rdi]3334 xor r14d,r12d3335 xor esi,r9d3336 xor r14d,r13d3337 lea edx,[rsi*1+rdx]3338 mov r12d,eax3339 add ecx,DWORD[((36+64))+rsp]3340 and r12d,r11d3341 rorx r13d,r11d,253342 rorx esi,r11d,113343 lea edx,[r14*1+rdx]3344 lea ecx,[r12*1+rcx]3345 andn r12d,r11d,ebx3346 xor r13d,esi3347 rorx r14d,r11d,63348 lea ecx,[r12*1+rcx]3349 xor r13d,r14d3350 mov esi,edx3351 rorx r12d,edx,223352 lea ecx,[r13*1+rcx]3353 xor esi,r8d3354 rorx r14d,edx,133355 rorx r13d,edx,23356 lea r10d,[rcx*1+r10]3357 and r15d,esi3358 vaesenc xmm9,xmm9,xmm103359 vmovdqu xmm10,XMMWORD[((80-128))+rdi]3360 xor r14d,r12d3361 xor r15d,r8d3362 xor r14d,r13d3363 lea ecx,[r15*1+rcx]3364 mov r12d,r11d3365 add ebx,DWORD[((40+64))+rsp]3366 and r12d,r10d3367 rorx r13d,r10d,253368 rorx r15d,r10d,113369 lea ecx,[r14*1+rcx]3370 lea ebx,[r12*1+rbx]3371 andn r12d,r10d,eax3372 xor r13d,r15d3373 rorx r14d,r10d,63374 lea ebx,[r12*1+rbx]3375 xor r13d,r14d3376 mov r15d,ecx3377 rorx r12d,ecx,223378 lea ebx,[r13*1+rbx]3379 xor r15d,edx3380 rorx r14d,ecx,133381 rorx r13d,ecx,23382 lea r9d,[rbx*1+r9]3383 and esi,r15d3384 vaesenc xmm9,xmm9,xmm103385 vmovdqu xmm10,XMMWORD[((96-128))+rdi]3386 xor r14d,r12d3387 xor esi,edx3388 xor r14d,r13d3389 lea ebx,[rsi*1+rbx]3390 mov r12d,r10d3391 add eax,DWORD[((44+64))+rsp]3392 and r12d,r9d3393 rorx r13d,r9d,253394 rorx esi,r9d,113395 lea ebx,[r14*1+rbx]3396 lea eax,[r12*1+rax]3397 andn r12d,r9d,r11d3398 xor r13d,esi3399 rorx r14d,r9d,63400 lea eax,[r12*1+rax]3401 xor r13d,r14d3402 mov esi,ebx3403 rorx r12d,ebx,223404 lea eax,[r13*1+rax]3405 xor esi,ecx3406 rorx r14d,ebx,133407 rorx r13d,ebx,23408 lea r8d,[rax*1+r8]3409 and r15d,esi3410 vaesenc xmm9,xmm9,xmm103411 vmovdqu xmm10,XMMWORD[((112-128))+rdi]3412 xor r14d,r12d3413 xor r15d,ecx3414 xor r14d,r13d3415 lea eax,[r15*1+rax]3416 mov r12d,r9d3417 add r11d,DWORD[rsp]3418 and r12d,r8d3419 rorx r13d,r8d,253420 rorx r15d,r8d,113421 lea eax,[r14*1+rax]3422 lea r11d,[r12*1+r11]3423 andn r12d,r8d,r10d3424 xor r13d,r15d3425 rorx r14d,r8d,63426 lea r11d,[r12*1+r11]3427 xor r13d,r14d3428 mov r15d,eax3429 rorx r12d,eax,223430 lea r11d,[r13*1+r11]3431 xor r15d,ebx3432 rorx r14d,eax,133433 rorx r13d,eax,23434 lea edx,[r11*1+rdx]3435 and esi,r15d3436 vaesenc xmm9,xmm9,xmm103437 vmovdqu xmm10,XMMWORD[((128-128))+rdi]3438 xor r14d,r12d3439 xor esi,ebx3440 xor r14d,r13d3441 lea r11d,[rsi*1+r11]3442 mov r12d,r8d3443 add r10d,DWORD[4+rsp]3444 and r12d,edx3445 rorx r13d,edx,253446 rorx esi,edx,113447 lea r11d,[r14*1+r11]3448 lea r10d,[r12*1+r10]3449 andn r12d,edx,r9d3450 xor r13d,esi3451 rorx r14d,edx,63452 lea r10d,[r12*1+r10]3453 xor r13d,r14d3454 mov esi,r11d3455 rorx r12d,r11d,223456 lea r10d,[r13*1+r10]3457 xor esi,eax3458 rorx r14d,r11d,133459 rorx r13d,r11d,23460 lea ecx,[r10*1+rcx]3461 and r15d,esi3462 vaesenc xmm9,xmm9,xmm103463 vmovdqu xmm10,XMMWORD[((144-128))+rdi]3464 xor r14d,r12d3465 xor r15d,eax3466 xor r14d,r13d3467 lea r10d,[r15*1+r10]3468 mov r12d,edx3469 add r9d,DWORD[8+rsp]3470 and r12d,ecx3471 rorx r13d,ecx,253472 rorx r15d,ecx,113473 lea r10d,[r14*1+r10]3474 lea r9d,[r12*1+r9]3475 andn r12d,ecx,r8d3476 xor r13d,r15d3477 rorx r14d,ecx,63478 lea r9d,[r12*1+r9]3479 xor r13d,r14d3480 mov r15d,r10d3481 rorx r12d,r10d,223482 lea r9d,[r13*1+r9]3483 xor r15d,r11d3484 rorx r14d,r10d,133485 rorx r13d,r10d,23486 lea ebx,[r9*1+rbx]3487 and esi,r15d3488 vaesenc xmm9,xmm9,xmm103489 vmovdqu xmm10,XMMWORD[((160-128))+rdi]3490 xor r14d,r12d3491 xor esi,r11d3492 xor r14d,r13d3493 lea r9d,[rsi*1+r9]3494 mov r12d,ecx3495 add r8d,DWORD[12+rsp]3496 and r12d,ebx3497 rorx r13d,ebx,253498 rorx esi,ebx,113499 lea r9d,[r14*1+r9]3500 lea r8d,[r12*1+r8]3501 andn r12d,ebx,edx3502 xor r13d,esi3503 rorx r14d,ebx,63504 lea r8d,[r12*1+r8]3505 xor r13d,r14d3506 mov esi,r9d3507 rorx r12d,r9d,223508 lea r8d,[r13*1+r8]3509 xor esi,r10d3510 rorx r14d,r9d,133511 rorx r13d,r9d,23512 lea eax,[r8*1+rax]3513 and r15d,esi3514 vaesenclast xmm11,xmm9,xmm103515 vaesenc xmm9,xmm9,xmm103516 vmovdqu xmm10,XMMWORD[((176-128))+rdi]3517 xor r14d,r12d3518 xor r15d,r10d3519 xor r14d,r13d3520 lea r8d,[r15*1+r8]3521 mov r12d,ebx3522 add edx,DWORD[32+rsp]3523 and r12d,eax3524 rorx r13d,eax,253525 rorx r15d,eax,113526 lea r8d,[r14*1+r8]3527 lea edx,[r12*1+rdx]3528 andn r12d,eax,ecx3529 xor r13d,r15d3530 rorx r14d,eax,63531 lea edx,[r12*1+rdx]3532 xor r13d,r14d3533 mov r15d,r8d3534 rorx r12d,r8d,223535 lea edx,[r13*1+rdx]3536 xor r15d,r9d3537 rorx r14d,r8d,133538 rorx r13d,r8d,23539 lea r11d,[rdx*1+r11]3540 and esi,r15d3541 vpand xmm8,xmm11,xmm123542 vaesenc xmm9,xmm9,xmm103543 vmovdqu xmm10,XMMWORD[((192-128))+rdi]3544 xor r14d,r12d3545 xor esi,r9d3546 xor r14d,r13d3547 lea edx,[rsi*1+rdx]3548 mov r12d,eax3549 add ecx,DWORD[36+rsp]3550 and r12d,r11d3551 rorx r13d,r11d,253552 rorx esi,r11d,113553 lea edx,[r14*1+rdx]3554 lea ecx,[r12*1+rcx]3555 andn r12d,r11d,ebx3556 xor r13d,esi3557 rorx r14d,r11d,63558 lea ecx,[r12*1+rcx]3559 xor r13d,r14d3560 mov esi,edx3561 rorx r12d,edx,223562 lea ecx,[r13*1+rcx]3563 xor esi,r8d3564 rorx r14d,edx,133565 rorx r13d,edx,23566 lea r10d,[rcx*1+r10]3567 and r15d,esi3568 vaesenclast xmm11,xmm9,xmm103569 vaesenc xmm9,xmm9,xmm103570 vmovdqu xmm10,XMMWORD[((208-128))+rdi]3571 xor r14d,r12d3572 xor r15d,r8d3573 xor r14d,r13d3574 lea ecx,[r15*1+rcx]3575 mov r12d,r11d3576 add ebx,DWORD[40+rsp]3577 and r12d,r10d3578 rorx r13d,r10d,253579 rorx r15d,r10d,113580 lea ecx,[r14*1+rcx]3581 lea ebx,[r12*1+rbx]3582 andn r12d,r10d,eax3583 xor r13d,r15d3584 rorx r14d,r10d,63585 lea ebx,[r12*1+rbx]3586 xor r13d,r14d3587 mov r15d,ecx3588 rorx r12d,ecx,223589 lea ebx,[r13*1+rbx]3590 xor r15d,edx3591 rorx r14d,ecx,133592 rorx r13d,ecx,23593 lea r9d,[rbx*1+r9]3594 and esi,r15d3595 vpand xmm11,xmm11,xmm133596 vaesenc xmm9,xmm9,xmm103597 vmovdqu xmm10,XMMWORD[((224-128))+rdi]3598 xor r14d,r12d3599 xor esi,edx3600 xor r14d,r13d3601 lea ebx,[rsi*1+rbx]3602 mov r12d,r10d3603 add eax,DWORD[44+rsp]3604 and r12d,r9d3605 rorx r13d,r9d,253606 rorx esi,r9d,113607 lea ebx,[r14*1+rbx]3608 lea eax,[r12*1+rax]3609 andn r12d,r9d,r11d3610 xor r13d,esi3611 rorx r14d,r9d,63612 lea eax,[r12*1+rax]3613 xor r13d,r14d3614 mov esi,ebx3615 rorx r12d,ebx,223616 lea eax,[r13*1+rax]3617 xor esi,ecx3618 rorx r14d,ebx,133619 rorx r13d,ebx,23620 lea r8d,[rax*1+r8]3621 and r15d,esi3622 vpor xmm8,xmm8,xmm113623 vaesenclast xmm11,xmm9,xmm103624 vmovdqu xmm10,XMMWORD[((0-128))+rdi]3625 xor r14d,r12d3626 xor r15d,ecx3627 xor r14d,r13d3628 lea eax,[r15*1+rax]3629 mov r12d,r9d3630 vpextrq r12,xmm15,13631 vmovq r13,xmm153632 mov r15,QWORD[552+rsp]3633 add eax,r14d3634 lea rbp,[448+rsp]3635 3636 vpand xmm11,xmm11,xmm143637 vpor xmm8,xmm8,xmm113638 vmovdqu XMMWORD[r13*1+r12],xmm83639 lea r13,[16+r13]3640 3641 add eax,DWORD[r15]3642 add ebx,DWORD[4+r15]3643 add ecx,DWORD[8+r15]3644 add edx,DWORD[12+r15]3645 add r8d,DWORD[16+r15]3646 add r9d,DWORD[20+r15]3647 add r10d,DWORD[24+r15]3648 add r11d,DWORD[28+r15]3649 3650 mov DWORD[r15],eax3651 mov DWORD[4+r15],ebx3652 mov DWORD[8+r15],ecx3653 mov DWORD[12+r15],edx3654 mov DWORD[16+r15],r8d3655 mov DWORD[20+r15],r9d3656 mov DWORD[24+r15],r10d3657 mov DWORD[28+r15],r11d3658 3659 cmp r13,QWORD[80+rbp]3660 je NEAR $L$done_avx23661 3662 xor r14d,r14d3663 mov esi,ebx3664 mov r12d,r9d3665 xor esi,ecx3666 jmp NEAR $L$ower_avx23667 ALIGN 163668 $L$ower_avx2:3669 vmovdqu xmm9,XMMWORD[r13]3670 vpinsrq xmm15,xmm15,r13,03671 add r11d,DWORD[((0+16))+rbp]3672 and r12d,r8d3673 rorx r13d,r8d,253674 rorx r15d,r8d,113675 lea eax,[r14*1+rax]3676 lea r11d,[r12*1+r11]3677 andn r12d,r8d,r10d3678 xor r13d,r15d3679 rorx r14d,r8d,63680 lea r11d,[r12*1+r11]3681 xor r13d,r14d3682 mov r15d,eax3683 rorx r12d,eax,223684 lea r11d,[r13*1+r11]3685 xor r15d,ebx3686 rorx r14d,eax,133687 rorx r13d,eax,23688 lea edx,[r11*1+rdx]3689 and esi,r15d3690 vpxor xmm9,xmm9,xmm103691 vmovdqu xmm10,XMMWORD[((16-128))+rdi]3692 xor r14d,r12d3693 xor esi,ebx3694 xor r14d,r13d3695 lea r11d,[rsi*1+r11]3696 mov r12d,r8d3697 add r10d,DWORD[((4+16))+rbp]3698 and r12d,edx3699 rorx r13d,edx,253700 rorx esi,edx,113701 lea r11d,[r14*1+r11]3702 lea r10d,[r12*1+r10]3703 andn r12d,edx,r9d3704 xor r13d,esi3705 rorx r14d,edx,63706 lea r10d,[r12*1+r10]3707 xor r13d,r14d3708 mov esi,r11d3709 rorx r12d,r11d,223710 lea r10d,[r13*1+r10]3711 xor esi,eax3712 rorx r14d,r11d,133713 rorx r13d,r11d,23714 lea ecx,[r10*1+rcx]3715 and r15d,esi3716 vpxor xmm9,xmm9,xmm83717 xor r14d,r12d3718 xor r15d,eax3719 xor r14d,r13d3720 lea r10d,[r15*1+r10]3721 mov r12d,edx3722 add r9d,DWORD[((8+16))+rbp]3723 and r12d,ecx3724 rorx r13d,ecx,253725 rorx r15d,ecx,113726 lea r10d,[r14*1+r10]3727 lea r9d,[r12*1+r9]3728 andn r12d,ecx,r8d3729 xor r13d,r15d3730 rorx r14d,ecx,63731 lea r9d,[r12*1+r9]3732 xor r13d,r14d3733 mov r15d,r10d3734 rorx r12d,r10d,223735 lea r9d,[r13*1+r9]3736 xor r15d,r11d3737 rorx r14d,r10d,133738 rorx r13d,r10d,23739 lea ebx,[r9*1+rbx]3740 and esi,r15d3741 vaesenc xmm9,xmm9,xmm103742 vmovdqu xmm10,XMMWORD[((32-128))+rdi]3743 xor r14d,r12d3744 xor esi,r11d3745 xor r14d,r13d3746 lea r9d,[rsi*1+r9]3747 mov r12d,ecx3748 add r8d,DWORD[((12+16))+rbp]3749 and r12d,ebx3750 rorx r13d,ebx,253751 rorx esi,ebx,113752 lea r9d,[r14*1+r9]3753 lea r8d,[r12*1+r8]3754 andn r12d,ebx,edx3755 xor r13d,esi3756 rorx r14d,ebx,63757 lea r8d,[r12*1+r8]3758 xor r13d,r14d3759 mov esi,r9d3760 rorx r12d,r9d,223761 lea r8d,[r13*1+r8]3762 xor esi,r10d3763 rorx r14d,r9d,133764 rorx r13d,r9d,23765 lea eax,[r8*1+rax]3766 and r15d,esi3767 vaesenc xmm9,xmm9,xmm103768 vmovdqu xmm10,XMMWORD[((48-128))+rdi]3769 xor r14d,r12d3770 xor r15d,r10d3771 xor r14d,r13d3772 lea r8d,[r15*1+r8]3773 mov r12d,ebx3774 add edx,DWORD[((32+16))+rbp]3775 and r12d,eax3776 rorx r13d,eax,253777 rorx r15d,eax,113778 lea r8d,[r14*1+r8]3779 lea edx,[r12*1+rdx]3780 andn r12d,eax,ecx3781 xor r13d,r15d3782 rorx r14d,eax,63783 lea edx,[r12*1+rdx]3784 xor r13d,r14d3785 mov r15d,r8d3786 rorx r12d,r8d,223787 lea edx,[r13*1+rdx]3788 xor r15d,r9d3789 rorx r14d,r8d,133790 rorx r13d,r8d,23791 lea r11d,[rdx*1+r11]3792 and esi,r15d3793 vaesenc xmm9,xmm9,xmm103794 vmovdqu xmm10,XMMWORD[((64-128))+rdi]3795 xor r14d,r12d3796 xor esi,r9d3797 xor r14d,r13d3798 lea edx,[rsi*1+rdx]3799 mov r12d,eax3800 add ecx,DWORD[((36+16))+rbp]3801 and r12d,r11d3802 rorx r13d,r11d,253803 rorx esi,r11d,113804 lea edx,[r14*1+rdx]3805 lea ecx,[r12*1+rcx]3806 andn r12d,r11d,ebx3807 xor r13d,esi3808 rorx r14d,r11d,63809 lea ecx,[r12*1+rcx]3810 xor r13d,r14d3811 mov esi,edx3812 rorx r12d,edx,223813 lea ecx,[r13*1+rcx]3814 xor esi,r8d3815 rorx r14d,edx,133816 rorx r13d,edx,23817 lea r10d,[rcx*1+r10]3818 and r15d,esi3819 vaesenc xmm9,xmm9,xmm103820 vmovdqu xmm10,XMMWORD[((80-128))+rdi]3821 xor r14d,r12d3822 xor r15d,r8d3823 xor r14d,r13d3824 lea ecx,[r15*1+rcx]3825 mov r12d,r11d3826 add ebx,DWORD[((40+16))+rbp]3827 and r12d,r10d3828 rorx r13d,r10d,253829 rorx r15d,r10d,113830 lea ecx,[r14*1+rcx]3831 lea ebx,[r12*1+rbx]3832 andn r12d,r10d,eax3833 xor r13d,r15d3834 rorx r14d,r10d,63835 lea ebx,[r12*1+rbx]3836 xor r13d,r14d3837 mov r15d,ecx3838 rorx r12d,ecx,223839 lea ebx,[r13*1+rbx]3840 xor r15d,edx3841 rorx r14d,ecx,133842 rorx r13d,ecx,23843 lea r9d,[rbx*1+r9]3844 and esi,r15d3845 vaesenc xmm9,xmm9,xmm103846 vmovdqu xmm10,XMMWORD[((96-128))+rdi]3847 xor r14d,r12d3848 xor esi,edx3849 xor r14d,r13d3850 lea ebx,[rsi*1+rbx]3851 mov r12d,r10d3852 add eax,DWORD[((44+16))+rbp]3853 and r12d,r9d3854 rorx r13d,r9d,253855 rorx esi,r9d,113856 lea ebx,[r14*1+rbx]3857 lea eax,[r12*1+rax]3858 andn r12d,r9d,r11d3859 xor r13d,esi3860 rorx r14d,r9d,63861 lea eax,[r12*1+rax]3862 xor r13d,r14d3863 mov esi,ebx3864 rorx r12d,ebx,223865 lea eax,[r13*1+rax]3866 xor esi,ecx3867 rorx r14d,ebx,133868 rorx r13d,ebx,23869 lea r8d,[rax*1+r8]3870 and r15d,esi3871 vaesenc xmm9,xmm9,xmm103872 vmovdqu xmm10,XMMWORD[((112-128))+rdi]3873 xor r14d,r12d3874 xor r15d,ecx3875 xor r14d,r13d3876 lea eax,[r15*1+rax]3877 mov r12d,r9d3878 lea rbp,[((-64))+rbp]3879 add r11d,DWORD[((0+16))+rbp]3880 and r12d,r8d3881 rorx r13d,r8d,253882 rorx r15d,r8d,113883 lea eax,[r14*1+rax]3884 lea r11d,[r12*1+r11]3885 andn r12d,r8d,r10d3886 xor r13d,r15d3887 rorx r14d,r8d,63888 lea r11d,[r12*1+r11]3889 xor r13d,r14d3890 mov r15d,eax3891 rorx r12d,eax,223892 lea r11d,[r13*1+r11]3893 xor r15d,ebx3894 rorx r14d,eax,133895 rorx r13d,eax,23896 lea edx,[r11*1+rdx]3897 and esi,r15d3898 vaesenc xmm9,xmm9,xmm103899 vmovdqu xmm10,XMMWORD[((128-128))+rdi]3900 xor r14d,r12d3901 xor esi,ebx3902 xor r14d,r13d3903 lea r11d,[rsi*1+r11]3904 mov r12d,r8d3905 add r10d,DWORD[((4+16))+rbp]3906 and r12d,edx3907 rorx r13d,edx,253908 rorx esi,edx,113909 lea r11d,[r14*1+r11]3910 lea r10d,[r12*1+r10]3911 andn r12d,edx,r9d3912 xor r13d,esi3913 rorx r14d,edx,63914 lea r10d,[r12*1+r10]3915 xor r13d,r14d3916 mov esi,r11d3917 rorx r12d,r11d,223918 lea r10d,[r13*1+r10]3919 xor esi,eax3920 rorx r14d,r11d,133921 rorx r13d,r11d,23922 lea ecx,[r10*1+rcx]3923 and r15d,esi3924 vaesenc xmm9,xmm9,xmm103925 vmovdqu xmm10,XMMWORD[((144-128))+rdi]3926 xor r14d,r12d3927 xor r15d,eax3928 xor r14d,r13d3929 lea r10d,[r15*1+r10]3930 mov r12d,edx3931 add r9d,DWORD[((8+16))+rbp]3932 and r12d,ecx3933 rorx r13d,ecx,253934 rorx r15d,ecx,113935 lea r10d,[r14*1+r10]3936 lea r9d,[r12*1+r9]3937 andn r12d,ecx,r8d3938 xor r13d,r15d3939 rorx r14d,ecx,63940 lea r9d,[r12*1+r9]3941 xor r13d,r14d3942 mov r15d,r10d3943 rorx r12d,r10d,223944 lea r9d,[r13*1+r9]3945 xor r15d,r11d3946 rorx r14d,r10d,133947 rorx r13d,r10d,23948 lea ebx,[r9*1+rbx]3949 and esi,r15d3950 vaesenc xmm9,xmm9,xmm103951 vmovdqu xmm10,XMMWORD[((160-128))+rdi]3952 xor r14d,r12d3953 xor esi,r11d3954 xor r14d,r13d3955 lea r9d,[rsi*1+r9]3956 mov r12d,ecx3957 add r8d,DWORD[((12+16))+rbp]3958 and r12d,ebx3959 rorx r13d,ebx,253960 rorx esi,ebx,113961 lea r9d,[r14*1+r9]3962 lea r8d,[r12*1+r8]3963 andn r12d,ebx,edx3964 xor r13d,esi3965 rorx r14d,ebx,63966 lea r8d,[r12*1+r8]3967 xor r13d,r14d3968 mov esi,r9d3969 rorx r12d,r9d,223970 lea r8d,[r13*1+r8]3971 xor esi,r10d3972 rorx r14d,r9d,133973 rorx r13d,r9d,23974 lea eax,[r8*1+rax]3975 and r15d,esi3976 vaesenclast xmm11,xmm9,xmm103977 vaesenc xmm9,xmm9,xmm103978 vmovdqu xmm10,XMMWORD[((176-128))+rdi]3979 xor r14d,r12d3980 xor r15d,r10d3981 xor r14d,r13d3982 lea r8d,[r15*1+r8]3983 mov r12d,ebx3984 add edx,DWORD[((32+16))+rbp]3985 and r12d,eax3986 rorx r13d,eax,253987 rorx r15d,eax,113988 lea r8d,[r14*1+r8]3989 lea edx,[r12*1+rdx]3990 andn r12d,eax,ecx3991 xor r13d,r15d3992 rorx r14d,eax,63993 lea edx,[r12*1+rdx]3994 xor r13d,r14d3995 mov r15d,r8d3996 rorx r12d,r8d,223997 lea edx,[r13*1+rdx]3998 xor r15d,r9d3999 rorx r14d,r8d,134000 rorx r13d,r8d,24001 lea r11d,[rdx*1+r11]4002 and esi,r15d4003 vpand xmm8,xmm11,xmm124004 vaesenc xmm9,xmm9,xmm104005 vmovdqu xmm10,XMMWORD[((192-128))+rdi]4006 xor r14d,r12d4007 xor esi,r9d4008 xor r14d,r13d4009 lea edx,[rsi*1+rdx]4010 mov r12d,eax4011 add ecx,DWORD[((36+16))+rbp]4012 and r12d,r11d4013 rorx r13d,r11d,254014 rorx esi,r11d,114015 lea edx,[r14*1+rdx]4016 lea ecx,[r12*1+rcx]4017 andn r12d,r11d,ebx4018 xor r13d,esi4019 rorx r14d,r11d,64020 lea ecx,[r12*1+rcx]4021 xor r13d,r14d4022 mov esi,edx4023 rorx r12d,edx,224024 lea ecx,[r13*1+rcx]4025 xor esi,r8d4026 rorx r14d,edx,134027 rorx r13d,edx,24028 lea r10d,[rcx*1+r10]4029 and r15d,esi4030 vaesenclast xmm11,xmm9,xmm104031 vaesenc xmm9,xmm9,xmm104032 vmovdqu xmm10,XMMWORD[((208-128))+rdi]4033 xor r14d,r12d4034 xor r15d,r8d4035 xor r14d,r13d4036 lea ecx,[r15*1+rcx]4037 mov r12d,r11d4038 add ebx,DWORD[((40+16))+rbp]4039 and r12d,r10d4040 rorx r13d,r10d,254041 rorx r15d,r10d,114042 lea ecx,[r14*1+rcx]4043 lea ebx,[r12*1+rbx]4044 andn r12d,r10d,eax4045 xor r13d,r15d4046 rorx r14d,r10d,64047 lea ebx,[r12*1+rbx]4048 xor r13d,r14d4049 mov r15d,ecx4050 rorx r12d,ecx,224051 lea ebx,[r13*1+rbx]4052 xor r15d,edx4053 rorx r14d,ecx,134054 rorx r13d,ecx,24055 lea r9d,[rbx*1+r9]4056 and esi,r15d4057 vpand xmm11,xmm11,xmm134058 vaesenc xmm9,xmm9,xmm104059 vmovdqu xmm10,XMMWORD[((224-128))+rdi]4060 xor r14d,r12d4061 xor esi,edx4062 xor r14d,r13d4063 lea ebx,[rsi*1+rbx]4064 mov r12d,r10d4065 add eax,DWORD[((44+16))+rbp]4066 and r12d,r9d4067 rorx r13d,r9d,254068 rorx esi,r9d,114069 lea ebx,[r14*1+rbx]4070 lea eax,[r12*1+rax]4071 andn r12d,r9d,r11d4072 xor r13d,esi4073 rorx r14d,r9d,64074 lea eax,[r12*1+rax]4075 xor r13d,r14d4076 mov esi,ebx4077 rorx r12d,ebx,224078 lea eax,[r13*1+rax]4079 xor esi,ecx4080 rorx r14d,ebx,134081 rorx r13d,ebx,24082 lea r8d,[rax*1+r8]4083 and r15d,esi4084 vpor xmm8,xmm8,xmm114085 vaesenclast xmm11,xmm9,xmm104086 vmovdqu xmm10,XMMWORD[((0-128))+rdi]4087 xor r14d,r12d4088 xor r15d,ecx4089 xor r14d,r13d4090 lea eax,[r15*1+rax]4091 mov r12d,r9d4092 vmovq r13,xmm154093 vpextrq r15,xmm15,14094 vpand xmm11,xmm11,xmm144095 vpor xmm8,xmm8,xmm114096 lea rbp,[((-64))+rbp]4097 vmovdqu XMMWORD[r13*1+r15],xmm84098 lea r13,[16+r13]4099 cmp rbp,rsp4100 jae NEAR $L$ower_avx24101 4102 mov r15,QWORD[552+rsp]4103 lea r13,[64+r13]4104 mov rsi,QWORD[560+rsp]4105 add eax,r14d4106 lea rsp,[448+rsp]4107 4108 add eax,DWORD[r15]4109 add ebx,DWORD[4+r15]4110 add ecx,DWORD[8+r15]4111 add edx,DWORD[12+r15]4112 add r8d,DWORD[16+r15]4113 add r9d,DWORD[20+r15]4114 add r10d,DWORD[24+r15]4115 lea r12,[r13*1+rsi]4116 add r11d,DWORD[28+r15]4117 4118 cmp r13,QWORD[((64+16))+rsp]4119 4120 mov DWORD[r15],eax4121 cmove r12,rsp4122 mov DWORD[4+r15],ebx4123 mov DWORD[8+r15],ecx4124 mov DWORD[12+r15],edx4125 mov DWORD[16+r15],r8d4126 mov DWORD[20+r15],r9d4127 mov DWORD[24+r15],r10d4128 mov DWORD[28+r15],r11d4129 4130 jbe NEAR $L$oop_avx24131 lea rbp,[rsp]4132 4133 4134 4135 4136 $L$done_avx2:4137 mov r8,QWORD[((64+32))+rbp]4138 mov rsi,QWORD[((64+56))+rbp]4139 4140 vmovdqu XMMWORD[r8],xmm84141 vzeroall4142 movaps xmm6,XMMWORD[128+rbp]4143 movaps xmm7,XMMWORD[144+rbp]4144 movaps xmm8,XMMWORD[160+rbp]4145 movaps xmm9,XMMWORD[176+rbp]4146 movaps xmm10,XMMWORD[192+rbp]4147 movaps xmm11,XMMWORD[208+rbp]4148 movaps xmm12,XMMWORD[224+rbp]4149 movaps xmm13,XMMWORD[240+rbp]4150 movaps xmm14,XMMWORD[256+rbp]4151 movaps xmm15,XMMWORD[272+rbp]4152 mov r15,QWORD[((-48))+rsi]4153 4154 mov r14,QWORD[((-40))+rsi]4155 4156 mov r13,QWORD[((-32))+rsi]4157 4158 mov r12,QWORD[((-24))+rsi]4159 4160 mov rbp,QWORD[((-16))+rsi]4161 4162 mov rbx,QWORD[((-8))+rsi]4163 4164 lea rsp,[rsi]4165 4166 $L$epilogue_avx2:4167 mov rdi,QWORD[8+rsp] ;WIN64 epilogue4168 mov rsi,QWORD[16+rsp]4169 DB 0F3h,0C3h ;repret4170 4171 $L$SEH_end_aesni_cbc_sha256_enc_avx2:4172 4173 ALIGN 324174 aesni_cbc_sha256_enc_shaext:4175 mov QWORD[8+rsp],rdi ;WIN64 prologue4176 mov QWORD[16+rsp],rsi4177 mov rax,rsp4178 $L$SEH_begin_aesni_cbc_sha256_enc_shaext:4179 mov rdi,rcx4180 mov rsi,rdx4181 mov rdx,r84182 mov rcx,r94183 mov r8,QWORD[40+rsp]4184 mov r9,QWORD[48+rsp]4185 4186 4187 4188 mov r10,QWORD[56+rsp]4189 lea rsp,[((-168))+rsp]4190 movaps XMMWORD[(-8-160)+rax],xmm64191 movaps XMMWORD[(-8-144)+rax],xmm74192 movaps XMMWORD[(-8-128)+rax],xmm84193 movaps XMMWORD[(-8-112)+rax],xmm94194 movaps XMMWORD[(-8-96)+rax],xmm104195 movaps XMMWORD[(-8-80)+rax],xmm114196 movaps XMMWORD[(-8-64)+rax],xmm124197 movaps XMMWORD[(-8-48)+rax],xmm134198 movaps XMMWORD[(-8-32)+rax],xmm144199 movaps XMMWORD[(-8-16)+rax],xmm154200 $L$prologue_shaext:4201 lea rax,[((K256+128))]4202 movdqu xmm1,XMMWORD[r9]4203 movdqu xmm2,XMMWORD[16+r9]4204 movdqa xmm3,XMMWORD[((512-128))+rax]4205 4206 mov r11d,DWORD[240+rcx]4207 sub rsi,rdi4208 movups xmm15,XMMWORD[rcx]4209 movups xmm6,XMMWORD[r8]4210 movups xmm4,XMMWORD[16+rcx]4211 lea rcx,[112+rcx]4212 4213 pshufd xmm0,xmm1,0x1b4214 pshufd xmm1,xmm1,0xb14215 pshufd xmm2,xmm2,0x1b4216 movdqa xmm7,xmm34217 DB 102,15,58,15,202,84218 punpcklqdq xmm2,xmm04219 4220 jmp NEAR $L$oop_shaext4221 4222 ALIGN 164223 $L$oop_shaext:4224 movdqu xmm10,XMMWORD[r10]4225 movdqu xmm11,XMMWORD[16+r10]4226 movdqu xmm12,XMMWORD[32+r10]4227 DB 102,68,15,56,0,2114228 movdqu xmm13,XMMWORD[48+r10]4229 4230 movdqa xmm0,XMMWORD[((0-128))+rax]4231 paddd xmm0,xmm104232 DB 102,68,15,56,0,2194233 movdqa xmm9,xmm24234 movdqa xmm8,xmm14235 movups xmm14,XMMWORD[rdi]4236 xorps xmm14,xmm154237 xorps xmm6,xmm144238 movups xmm5,XMMWORD[((-80))+rcx]4239 aesenc xmm6,xmm44240 DB 15,56,203,2094241 pshufd xmm0,xmm0,0x0e4242 movups xmm4,XMMWORD[((-64))+rcx]4243 aesenc xmm6,xmm54244 DB 15,56,203,2024245 4246 movdqa xmm0,XMMWORD[((32-128))+rax]4247 paddd xmm0,xmm114248 DB 102,68,15,56,0,2274249 lea r10,[64+r10]4250 movups xmm5,XMMWORD[((-48))+rcx]4251 aesenc xmm6,xmm44252 DB 15,56,203,2094253 pshufd xmm0,xmm0,0x0e4254 movups xmm4,XMMWORD[((-32))+rcx]4255 aesenc xmm6,xmm54256 DB 15,56,203,2024257 4258 movdqa xmm0,XMMWORD[((64-128))+rax]4259 paddd xmm0,xmm124260 DB 102,68,15,56,0,2354261 DB 69,15,56,204,2114262 movups xmm5,XMMWORD[((-16))+rcx]4263 aesenc xmm6,xmm44264 DB 15,56,203,2094265 pshufd xmm0,xmm0,0x0e4266 movdqa xmm3,xmm134267 DB 102,65,15,58,15,220,44268 paddd xmm10,xmm34269 movups xmm4,XMMWORD[rcx]4270 aesenc xmm6,xmm54271 DB 15,56,203,2024272 4273 movdqa xmm0,XMMWORD[((96-128))+rax]4274 paddd xmm0,xmm134275 DB 69,15,56,205,2134276 DB 69,15,56,204,2204277 movups xmm5,XMMWORD[16+rcx]4278 aesenc xmm6,xmm44279 DB 15,56,203,2094280 pshufd xmm0,xmm0,0x0e4281 movups xmm4,XMMWORD[32+rcx]4282 aesenc xmm6,xmm54283 movdqa xmm3,xmm104284 DB 102,65,15,58,15,221,44285 paddd xmm11,xmm34286 DB 15,56,203,2024287 movdqa xmm0,XMMWORD[((128-128))+rax]4288 paddd xmm0,xmm104289 DB 69,15,56,205,2184290 DB 69,15,56,204,2294291 movups xmm5,XMMWORD[48+rcx]4292 aesenc xmm6,xmm44293 DB 15,56,203,2094294 pshufd xmm0,xmm0,0x0e4295 movdqa xmm3,xmm114296 DB 102,65,15,58,15,218,44297 paddd xmm12,xmm34298 cmp r11d,114299 jb NEAR $L$aesenclast14300 movups xmm4,XMMWORD[64+rcx]4301 aesenc xmm6,xmm54302 movups xmm5,XMMWORD[80+rcx]4303 aesenc xmm6,xmm44304 je NEAR $L$aesenclast14305 movups xmm4,XMMWORD[96+rcx]4306 aesenc xmm6,xmm54307 movups xmm5,XMMWORD[112+rcx]4308 aesenc xmm6,xmm44309 $L$aesenclast1:4310 aesenclast xmm6,xmm54311 movups xmm4,XMMWORD[((16-112))+rcx]4312 nop4313 DB 15,56,203,2024314 movups xmm14,XMMWORD[16+rdi]4315 xorps xmm14,xmm154316 movups XMMWORD[rdi*1+rsi],xmm64317 xorps xmm6,xmm144318 movups xmm5,XMMWORD[((-80))+rcx]4319 aesenc xmm6,xmm44320 movdqa xmm0,XMMWORD[((160-128))+rax]4321 paddd xmm0,xmm114322 DB 69,15,56,205,2274323 DB 69,15,56,204,2344324 movups xmm4,XMMWORD[((-64))+rcx]4325 aesenc xmm6,xmm54326 DB 15,56,203,2094327 pshufd xmm0,xmm0,0x0e4328 movdqa xmm3,xmm124329 DB 102,65,15,58,15,219,44330 paddd xmm13,xmm34331 movups xmm5,XMMWORD[((-48))+rcx]4332 aesenc xmm6,xmm44333 DB 15,56,203,2024334 movdqa xmm0,XMMWORD[((192-128))+rax]4335 paddd xmm0,xmm124336 DB 69,15,56,205,2364337 DB 69,15,56,204,2114338 movups xmm4,XMMWORD[((-32))+rcx]4339 aesenc xmm6,xmm54340 DB 15,56,203,2094341 pshufd xmm0,xmm0,0x0e4342 movdqa xmm3,xmm134343 DB 102,65,15,58,15,220,44344 paddd xmm10,xmm34345 movups xmm5,XMMWORD[((-16))+rcx]4346 aesenc xmm6,xmm44347 DB 15,56,203,2024348 movdqa xmm0,XMMWORD[((224-128))+rax]4349 paddd xmm0,xmm134350 DB 69,15,56,205,2134351 DB 69,15,56,204,2204352 movups xmm4,XMMWORD[rcx]4353 aesenc xmm6,xmm54354 DB 15,56,203,2094355 pshufd xmm0,xmm0,0x0e4356 movdqa xmm3,xmm104357 DB 102,65,15,58,15,221,44358 paddd xmm11,xmm34359 movups xmm5,XMMWORD[16+rcx]4360 aesenc xmm6,xmm44361 DB 15,56,203,2024362 movdqa xmm0,XMMWORD[((256-128))+rax]4363 paddd xmm0,xmm104364 DB 69,15,56,205,2184365 DB 69,15,56,204,2294366 movups xmm4,XMMWORD[32+rcx]4367 aesenc xmm6,xmm54368 DB 15,56,203,2094369 pshufd xmm0,xmm0,0x0e4370 movdqa xmm3,xmm114371 DB 102,65,15,58,15,218,44372 paddd xmm12,xmm34373 movups xmm5,XMMWORD[48+rcx]4374 aesenc xmm6,xmm44375 cmp r11d,114376 jb NEAR $L$aesenclast24377 movups xmm4,XMMWORD[64+rcx]4378 aesenc xmm6,xmm54379 movups xmm5,XMMWORD[80+rcx]4380 aesenc xmm6,xmm44381 je NEAR $L$aesenclast24382 movups xmm4,XMMWORD[96+rcx]4383 aesenc xmm6,xmm54384 movups xmm5,XMMWORD[112+rcx]4385 aesenc xmm6,xmm44386 $L$aesenclast2:4387 aesenclast xmm6,xmm54388 movups xmm4,XMMWORD[((16-112))+rcx]4389 nop4390 DB 15,56,203,2024391 movups xmm14,XMMWORD[32+rdi]4392 xorps xmm14,xmm154393 movups XMMWORD[16+rdi*1+rsi],xmm64394 xorps xmm6,xmm144395 movups xmm5,XMMWORD[((-80))+rcx]4396 aesenc xmm6,xmm44397 movdqa xmm0,XMMWORD[((288-128))+rax]4398 paddd xmm0,xmm114399 DB 69,15,56,205,2274400 DB 69,15,56,204,2344401 movups xmm4,XMMWORD[((-64))+rcx]4402 aesenc xmm6,xmm54403 DB 15,56,203,2094404 pshufd xmm0,xmm0,0x0e4405 movdqa xmm3,xmm124406 DB 102,65,15,58,15,219,44407 paddd xmm13,xmm34408 movups xmm5,XMMWORD[((-48))+rcx]4409 aesenc xmm6,xmm44410 DB 15,56,203,2024411 movdqa xmm0,XMMWORD[((320-128))+rax]4412 paddd xmm0,xmm124413 DB 69,15,56,205,2364414 DB 69,15,56,204,2114415 movups xmm4,XMMWORD[((-32))+rcx]4416 aesenc xmm6,xmm54417 DB 15,56,203,2094418 pshufd xmm0,xmm0,0x0e4419 movdqa xmm3,xmm134420 DB 102,65,15,58,15,220,44421 paddd xmm10,xmm34422 movups xmm5,XMMWORD[((-16))+rcx]4423 aesenc xmm6,xmm44424 DB 15,56,203,2024425 movdqa xmm0,XMMWORD[((352-128))+rax]4426 paddd xmm0,xmm134427 DB 69,15,56,205,2134428 DB 69,15,56,204,2204429 movups xmm4,XMMWORD[rcx]4430 aesenc xmm6,xmm54431 DB 15,56,203,2094432 pshufd xmm0,xmm0,0x0e4433 movdqa xmm3,xmm104434 DB 102,65,15,58,15,221,44435 paddd xmm11,xmm34436 movups xmm5,XMMWORD[16+rcx]4437 aesenc xmm6,xmm44438 DB 15,56,203,2024439 movdqa xmm0,XMMWORD[((384-128))+rax]4440 paddd xmm0,xmm104441 DB 69,15,56,205,2184442 DB 69,15,56,204,2294443 movups xmm4,XMMWORD[32+rcx]4444 aesenc xmm6,xmm54445 DB 15,56,203,2094446 pshufd xmm0,xmm0,0x0e4447 movdqa xmm3,xmm114448 DB 102,65,15,58,15,218,44449 paddd xmm12,xmm34450 movups xmm5,XMMWORD[48+rcx]4451 aesenc xmm6,xmm44452 DB 15,56,203,2024453 movdqa xmm0,XMMWORD[((416-128))+rax]4454 paddd xmm0,xmm114455 DB 69,15,56,205,2274456 DB 69,15,56,204,2344457 cmp r11d,114458 jb NEAR $L$aesenclast34459 movups xmm4,XMMWORD[64+rcx]4460 aesenc xmm6,xmm54461 movups xmm5,XMMWORD[80+rcx]4462 aesenc xmm6,xmm44463 je NEAR $L$aesenclast34464 movups xmm4,XMMWORD[96+rcx]4465 aesenc xmm6,xmm54466 movups xmm5,XMMWORD[112+rcx]4467 aesenc xmm6,xmm44468 $L$aesenclast3:4469 aesenclast xmm6,xmm54470 movups xmm4,XMMWORD[((16-112))+rcx]4471 nop4472 DB 15,56,203,2094473 pshufd xmm0,xmm0,0x0e4474 movdqa xmm3,xmm124475 DB 102,65,15,58,15,219,44476 paddd xmm13,xmm34477 movups xmm14,XMMWORD[48+rdi]4478 xorps xmm14,xmm154479 movups XMMWORD[32+rdi*1+rsi],xmm64480 xorps xmm6,xmm144481 movups xmm5,XMMWORD[((-80))+rcx]4482 aesenc xmm6,xmm44483 movups xmm4,XMMWORD[((-64))+rcx]4484 aesenc xmm6,xmm54485 DB 15,56,203,2024486 4487 movdqa xmm0,XMMWORD[((448-128))+rax]4488 paddd xmm0,xmm124489 DB 69,15,56,205,2364490 movdqa xmm3,xmm74491 movups xmm5,XMMWORD[((-48))+rcx]4492 aesenc xmm6,xmm44493 DB 15,56,203,2094494 pshufd xmm0,xmm0,0x0e4495 movups xmm4,XMMWORD[((-32))+rcx]4496 aesenc xmm6,xmm54497 DB 15,56,203,2024498 4499 movdqa xmm0,XMMWORD[((480-128))+rax]4500 paddd xmm0,xmm134501 movups xmm5,XMMWORD[((-16))+rcx]4502 aesenc xmm6,xmm44503 movups xmm4,XMMWORD[rcx]4504 aesenc xmm6,xmm54505 DB 15,56,203,2094506 pshufd xmm0,xmm0,0x0e4507 movups xmm5,XMMWORD[16+rcx]4508 aesenc xmm6,xmm44509 DB 15,56,203,2024510 4511 movups xmm4,XMMWORD[32+rcx]4512 aesenc xmm6,xmm54513 movups xmm5,XMMWORD[48+rcx]4514 aesenc xmm6,xmm44515 cmp r11d,114516 jb NEAR $L$aesenclast44517 movups xmm4,XMMWORD[64+rcx]4518 aesenc xmm6,xmm54519 movups xmm5,XMMWORD[80+rcx]4520 aesenc xmm6,xmm44521 je NEAR $L$aesenclast44522 movups xmm4,XMMWORD[96+rcx]4523 aesenc xmm6,xmm54524 movups xmm5,XMMWORD[112+rcx]4525 aesenc xmm6,xmm44526 $L$aesenclast4:4527 aesenclast xmm6,xmm54528 movups xmm4,XMMWORD[((16-112))+rcx]4529 nop4530 4531 paddd xmm2,xmm94532 paddd xmm1,xmm84533 4534 dec rdx4535 movups XMMWORD[48+rdi*1+rsi],xmm64536 lea rdi,[64+rdi]4537 jnz NEAR $L$oop_shaext4538 4539 pshufd xmm2,xmm2,0xb14540 pshufd xmm3,xmm1,0x1b4541 pshufd xmm1,xmm1,0xb14542 punpckhqdq xmm1,xmm24543 DB 102,15,58,15,211,84544 4545 movups XMMWORD[r8],xmm64546 movdqu XMMWORD[r9],xmm14547 movdqu XMMWORD[16+r9],xmm24548 movaps xmm6,XMMWORD[rsp]4549 movaps xmm7,XMMWORD[16+rsp]4550 movaps xmm8,XMMWORD[32+rsp]4551 movaps xmm9,XMMWORD[48+rsp]4552 movaps xmm10,XMMWORD[64+rsp]4553 movaps xmm11,XMMWORD[80+rsp]4554 movaps xmm12,XMMWORD[96+rsp]4555 movaps xmm13,XMMWORD[112+rsp]4556 movaps xmm14,XMMWORD[128+rsp]4557 movaps xmm15,XMMWORD[144+rsp]4558 lea rsp,[((8+160))+rsp]4559 $L$epilogue_shaext:4560 mov rdi,QWORD[8+rsp] ;WIN64 epilogue4561 mov rsi,QWORD[16+rsp]4562 DB 0F3h,0C3h ;repret4563 4564 $L$SEH_end_aesni_cbc_sha256_enc_shaext:4565 EXTERN __imp_RtlVirtualUnwind4566 4567 ALIGN 164568 se_handler:4569 push rsi4570 push rdi4571 push rbx4572 push rbp4573 push r124574 push r134575 push r144576 push r154577 pushfq4578 sub rsp,644579 4580 mov rax,QWORD[120+r8]4581 mov rbx,QWORD[248+r8]4582 4583 mov rsi,QWORD[8+r9]4584 mov r11,QWORD[56+r9]4585 4586 mov r10d,DWORD[r11]4587 lea r10,[r10*1+rsi]4588 cmp rbx,r104589 jb NEAR $L$in_prologue4590 4591 mov rax,QWORD[152+r8]4592 4593 mov r10d,DWORD[4+r11]4594 lea r10,[r10*1+rsi]4595 cmp rbx,r104596 jae NEAR $L$in_prologue4597 lea r10,[aesni_cbc_sha256_enc_shaext]4598 cmp rbx,r104599 jb NEAR $L$not_in_shaext4600 4601 lea rsi,[rax]4602 lea rdi,[512+r8]4603 mov ecx,204604 DD 0xa548f3fc4605 lea rax,[168+rax]4606 jmp NEAR $L$in_prologue4607 $L$not_in_shaext:4608 lea r10,[$L$avx2_shortcut]4609 cmp rbx,r104610 jb NEAR $L$not_in_avx24611 4612 and rax,-256*44613 add rax,4484614 $L$not_in_avx2:4615 mov rsi,rax4616 mov rax,QWORD[((64+56))+rax]4617 4618 mov rbx,QWORD[((-8))+rax]4619 mov rbp,QWORD[((-16))+rax]4620 mov r12,QWORD[((-24))+rax]4621 mov r13,QWORD[((-32))+rax]4622 mov r14,QWORD[((-40))+rax]4623 mov r15,QWORD[((-48))+rax]4624 mov QWORD[144+r8],rbx4625 mov QWORD[160+r8],rbp4626 mov QWORD[216+r8],r124627 mov QWORD[224+r8],r134628 mov QWORD[232+r8],r144629 mov QWORD[240+r8],r154630 4631 lea rsi,[((64+64))+rsi]4632 lea rdi,[512+r8]4633 mov ecx,204634 DD 0xa548f3fc4635 4636 $L$in_prologue:4637 mov rdi,QWORD[8+rax]4638 mov rsi,QWORD[16+rax]4639 mov QWORD[152+r8],rax4640 mov QWORD[168+r8],rsi4641 mov QWORD[176+r8],rdi4642 4643 mov rdi,QWORD[40+r9]4644 mov rsi,r84645 mov ecx,1544646 DD 0xa548f3fc4647 4648 mov rsi,r94649 xor rcx,rcx4650 mov rdx,QWORD[8+rsi]4651 mov r8,QWORD[rsi]4652 mov r9,QWORD[16+rsi]4653 mov r10,QWORD[40+rsi]4654 lea r11,[56+rsi]4655 lea r12,[24+rsi]4656 mov QWORD[32+rsp],r104657 mov QWORD[40+rsp],r114658 mov QWORD[48+rsp],r124659 mov QWORD[56+rsp],rcx4660 call QWORD[__imp_RtlVirtualUnwind]4661 4662 mov eax,14663 add rsp,644664 popfq4665 pop r154666 pop r144667 pop r134668 pop r124669 pop rbp4670 pop rbx4671 pop rdi4672 pop rsi4673 DB 0F3h,0C3h ;repret4674 4675 4676 section .pdata rdata align=44677 DD $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase4678 DD $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase4679 DD $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase4680 4681 DD $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase4682 DD $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase4683 DD $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase4684 DD $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase4685 DD $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase4686 DD $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase4687 DD $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase4688 DD $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase4689 DD $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase4690 section .xdata rdata align=84691 ALIGN 84692 $L$SEH_info_aesni_cbc_sha256_enc_xop:4693 DB 9,0,0,04694 DD se_handler wrt ..imagebase4695 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase4696 4697 $L$SEH_info_aesni_cbc_sha256_enc_avx:4698 DB 9,0,0,04699 DD se_handler wrt ..imagebase4700 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase4701 $L$SEH_info_aesni_cbc_sha256_enc_avx2:4702 DB 9,0,0,04703 DD se_handler wrt ..imagebase4704 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase4705 $L$SEH_info_aesni_cbc_sha256_enc_shaext:4706 DB 9,0,0,04707 DD se_handler wrt ..imagebase4708 DD $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/chacha-x86_64.S
r94083 r95221 62 62 je NEAR $L$no_data 63 63 mov r10,QWORD[((OPENSSL_ia32cap_P+4))] 64 bt r10,4865 jc NEAR $L$ChaCha20_avx51266 test r10,r1067 js NEAR $L$ChaCha20_avx512vl68 64 test r10d,512 69 65 jnz NEAR $L$ChaCha20_ssse3 … … 361 357 mov r9,rsp 362 358 363 test r10d,2048364 jnz NEAR $L$ChaCha20_4xop365 359 cmp rdx,128 366 360 je NEAR $L$ChaCha20_128 … … 702 696 703 697 mov r11,r10 704 shr r10,32705 test r10,32706 jnz NEAR $L$ChaCha20_8x707 698 cmp rdx,192 708 699 ja NEAR $L$proceed4x … … 1269 1260 1270 1261 $L$SEH_end_ChaCha20_4x: 1271 1272 ALIGN 321273 ChaCha20_4xop:1274 mov QWORD[8+rsp],rdi ;WIN64 prologue1275 mov QWORD[16+rsp],rsi1276 mov rax,rsp1277 $L$SEH_begin_ChaCha20_4xop:1278 mov rdi,rcx1279 mov rsi,rdx1280 mov rdx,r81281 mov rcx,r91282 mov r8,QWORD[40+rsp]1283 1284 1285 1286 $L$ChaCha20_4xop:1287 mov r9,rsp1288 1289 sub rsp,0x140+1681290 movaps XMMWORD[(-168)+r9],xmm61291 movaps XMMWORD[(-152)+r9],xmm71292 movaps XMMWORD[(-136)+r9],xmm81293 movaps XMMWORD[(-120)+r9],xmm91294 movaps XMMWORD[(-104)+r9],xmm101295 movaps XMMWORD[(-88)+r9],xmm111296 movaps XMMWORD[(-72)+r9],xmm121297 movaps XMMWORD[(-56)+r9],xmm131298 movaps XMMWORD[(-40)+r9],xmm141299 movaps XMMWORD[(-24)+r9],xmm151300 $L$4xop_body:1301 vzeroupper1302 1303 vmovdqa xmm11,XMMWORD[$L$sigma]1304 vmovdqu xmm3,XMMWORD[rcx]1305 vmovdqu xmm15,XMMWORD[16+rcx]1306 vmovdqu xmm7,XMMWORD[r8]1307 lea rcx,[256+rsp]1308 1309 vpshufd xmm8,xmm11,0x001310 vpshufd xmm9,xmm11,0x551311 vmovdqa XMMWORD[64+rsp],xmm81312 vpshufd xmm10,xmm11,0xaa1313 vmovdqa XMMWORD[80+rsp],xmm91314 vpshufd xmm11,xmm11,0xff1315 vmovdqa XMMWORD[96+rsp],xmm101316 vmovdqa XMMWORD[112+rsp],xmm111317 1318 vpshufd xmm0,xmm3,0x001319 vpshufd xmm1,xmm3,0x551320 vmovdqa XMMWORD[(128-256)+rcx],xmm01321 vpshufd xmm2,xmm3,0xaa1322 vmovdqa XMMWORD[(144-256)+rcx],xmm11323 vpshufd xmm3,xmm3,0xff1324 vmovdqa XMMWORD[(160-256)+rcx],xmm21325 vmovdqa XMMWORD[(176-256)+rcx],xmm31326 1327 vpshufd xmm12,xmm15,0x001328 vpshufd xmm13,xmm15,0x551329 vmovdqa XMMWORD[(192-256)+rcx],xmm121330 vpshufd xmm14,xmm15,0xaa1331 vmovdqa XMMWORD[(208-256)+rcx],xmm131332 vpshufd xmm15,xmm15,0xff1333 vmovdqa XMMWORD[(224-256)+rcx],xmm141334 vmovdqa XMMWORD[(240-256)+rcx],xmm151335 1336 vpshufd xmm4,xmm7,0x001337 vpshufd xmm5,xmm7,0x551338 vpaddd xmm4,xmm4,XMMWORD[$L$inc]1339 vpshufd xmm6,xmm7,0xaa1340 vmovdqa XMMWORD[(272-256)+rcx],xmm51341 vpshufd xmm7,xmm7,0xff1342 vmovdqa XMMWORD[(288-256)+rcx],xmm61343 vmovdqa XMMWORD[(304-256)+rcx],xmm71344 1345 jmp NEAR $L$oop_enter4xop1346 1347 ALIGN 321348 $L$oop_outer4xop:1349 vmovdqa xmm8,XMMWORD[64+rsp]1350 vmovdqa xmm9,XMMWORD[80+rsp]1351 vmovdqa xmm10,XMMWORD[96+rsp]1352 vmovdqa xmm11,XMMWORD[112+rsp]1353 vmovdqa xmm0,XMMWORD[((128-256))+rcx]1354 vmovdqa xmm1,XMMWORD[((144-256))+rcx]1355 vmovdqa xmm2,XMMWORD[((160-256))+rcx]1356 vmovdqa xmm3,XMMWORD[((176-256))+rcx]1357 vmovdqa xmm12,XMMWORD[((192-256))+rcx]1358 vmovdqa xmm13,XMMWORD[((208-256))+rcx]1359 vmovdqa xmm14,XMMWORD[((224-256))+rcx]1360 vmovdqa xmm15,XMMWORD[((240-256))+rcx]1361 vmovdqa xmm4,XMMWORD[((256-256))+rcx]1362 vmovdqa xmm5,XMMWORD[((272-256))+rcx]1363 vmovdqa xmm6,XMMWORD[((288-256))+rcx]1364 vmovdqa xmm7,XMMWORD[((304-256))+rcx]1365 vpaddd xmm4,xmm4,XMMWORD[$L$four]1366 1367 $L$oop_enter4xop:1368 mov eax,101369 vmovdqa XMMWORD[(256-256)+rcx],xmm41370 jmp NEAR $L$oop4xop1371 1372 ALIGN 321373 $L$oop4xop:1374 vpaddd xmm8,xmm8,xmm01375 vpaddd xmm9,xmm9,xmm11376 vpaddd xmm10,xmm10,xmm21377 vpaddd xmm11,xmm11,xmm31378 vpxor xmm4,xmm8,xmm41379 vpxor xmm5,xmm9,xmm51380 vpxor xmm6,xmm10,xmm61381 vpxor xmm7,xmm11,xmm71382 DB 143,232,120,194,228,161383 DB 143,232,120,194,237,161384 DB 143,232,120,194,246,161385 DB 143,232,120,194,255,161386 vpaddd xmm12,xmm12,xmm41387 vpaddd xmm13,xmm13,xmm51388 vpaddd xmm14,xmm14,xmm61389 vpaddd xmm15,xmm15,xmm71390 vpxor xmm0,xmm12,xmm01391 vpxor xmm1,xmm13,xmm11392 vpxor xmm2,xmm2,xmm141393 vpxor xmm3,xmm3,xmm151394 DB 143,232,120,194,192,121395 DB 143,232,120,194,201,121396 DB 143,232,120,194,210,121397 DB 143,232,120,194,219,121398 vpaddd xmm8,xmm0,xmm81399 vpaddd xmm9,xmm1,xmm91400 vpaddd xmm10,xmm10,xmm21401 vpaddd xmm11,xmm11,xmm31402 vpxor xmm4,xmm8,xmm41403 vpxor xmm5,xmm9,xmm51404 vpxor xmm6,xmm10,xmm61405 vpxor xmm7,xmm11,xmm71406 DB 143,232,120,194,228,81407 DB 143,232,120,194,237,81408 DB 143,232,120,194,246,81409 DB 143,232,120,194,255,81410 vpaddd xmm12,xmm12,xmm41411 vpaddd xmm13,xmm13,xmm51412 vpaddd xmm14,xmm14,xmm61413 vpaddd xmm15,xmm15,xmm71414 vpxor xmm0,xmm12,xmm01415 vpxor xmm1,xmm13,xmm11416 vpxor xmm2,xmm2,xmm141417 vpxor xmm3,xmm3,xmm151418 DB 143,232,120,194,192,71419 DB 143,232,120,194,201,71420 DB 143,232,120,194,210,71421 DB 143,232,120,194,219,71422 vpaddd xmm8,xmm8,xmm11423 vpaddd xmm9,xmm9,xmm21424 vpaddd xmm10,xmm10,xmm31425 vpaddd xmm11,xmm11,xmm01426 vpxor xmm7,xmm8,xmm71427 vpxor xmm4,xmm9,xmm41428 vpxor xmm5,xmm10,xmm51429 vpxor xmm6,xmm11,xmm61430 DB 143,232,120,194,255,161431 DB 143,232,120,194,228,161432 DB 143,232,120,194,237,161433 DB 143,232,120,194,246,161434 vpaddd xmm14,xmm14,xmm71435 vpaddd xmm15,xmm15,xmm41436 vpaddd xmm12,xmm12,xmm51437 vpaddd xmm13,xmm13,xmm61438 vpxor xmm1,xmm14,xmm11439 vpxor xmm2,xmm15,xmm21440 vpxor xmm3,xmm3,xmm121441 vpxor xmm0,xmm0,xmm131442 DB 143,232,120,194,201,121443 DB 143,232,120,194,210,121444 DB 143,232,120,194,219,121445 DB 143,232,120,194,192,121446 vpaddd xmm8,xmm1,xmm81447 vpaddd xmm9,xmm2,xmm91448 vpaddd xmm10,xmm10,xmm31449 vpaddd xmm11,xmm11,xmm01450 vpxor xmm7,xmm8,xmm71451 vpxor xmm4,xmm9,xmm41452 vpxor xmm5,xmm10,xmm51453 vpxor xmm6,xmm11,xmm61454 DB 143,232,120,194,255,81455 DB 143,232,120,194,228,81456 DB 143,232,120,194,237,81457 DB 143,232,120,194,246,81458 vpaddd xmm14,xmm14,xmm71459 vpaddd xmm15,xmm15,xmm41460 vpaddd xmm12,xmm12,xmm51461 vpaddd xmm13,xmm13,xmm61462 vpxor xmm1,xmm14,xmm11463 vpxor xmm2,xmm15,xmm21464 vpxor xmm3,xmm3,xmm121465 vpxor xmm0,xmm0,xmm131466 DB 143,232,120,194,201,71467 DB 143,232,120,194,210,71468 DB 143,232,120,194,219,71469 DB 143,232,120,194,192,71470 dec eax1471 jnz NEAR $L$oop4xop1472 1473 vpaddd xmm8,xmm8,XMMWORD[64+rsp]1474 vpaddd xmm9,xmm9,XMMWORD[80+rsp]1475 vpaddd xmm10,xmm10,XMMWORD[96+rsp]1476 vpaddd xmm11,xmm11,XMMWORD[112+rsp]1477 1478 vmovdqa XMMWORD[32+rsp],xmm141479 vmovdqa XMMWORD[48+rsp],xmm151480 1481 vpunpckldq xmm14,xmm8,xmm91482 vpunpckldq xmm15,xmm10,xmm111483 vpunpckhdq xmm8,xmm8,xmm91484 vpunpckhdq xmm10,xmm10,xmm111485 vpunpcklqdq xmm9,xmm14,xmm151486 vpunpckhqdq xmm14,xmm14,xmm151487 vpunpcklqdq xmm11,xmm8,xmm101488 vpunpckhqdq xmm8,xmm8,xmm101489 vpaddd xmm0,xmm0,XMMWORD[((128-256))+rcx]1490 vpaddd xmm1,xmm1,XMMWORD[((144-256))+rcx]1491 vpaddd xmm2,xmm2,XMMWORD[((160-256))+rcx]1492 vpaddd xmm3,xmm3,XMMWORD[((176-256))+rcx]1493 1494 vmovdqa XMMWORD[rsp],xmm91495 vmovdqa XMMWORD[16+rsp],xmm141496 vmovdqa xmm9,XMMWORD[32+rsp]1497 vmovdqa xmm14,XMMWORD[48+rsp]1498 1499 vpunpckldq xmm10,xmm0,xmm11500 vpunpckldq xmm15,xmm2,xmm31501 vpunpckhdq xmm0,xmm0,xmm11502 vpunpckhdq xmm2,xmm2,xmm31503 vpunpcklqdq xmm1,xmm10,xmm151504 vpunpckhqdq xmm10,xmm10,xmm151505 vpunpcklqdq xmm3,xmm0,xmm21506 vpunpckhqdq xmm0,xmm0,xmm21507 vpaddd xmm12,xmm12,XMMWORD[((192-256))+rcx]1508 vpaddd xmm13,xmm13,XMMWORD[((208-256))+rcx]1509 vpaddd xmm9,xmm9,XMMWORD[((224-256))+rcx]1510 vpaddd xmm14,xmm14,XMMWORD[((240-256))+rcx]1511 1512 vpunpckldq xmm2,xmm12,xmm131513 vpunpckldq xmm15,xmm9,xmm141514 vpunpckhdq xmm12,xmm12,xmm131515 vpunpckhdq xmm9,xmm9,xmm141516 vpunpcklqdq xmm13,xmm2,xmm151517 vpunpckhqdq xmm2,xmm2,xmm151518 vpunpcklqdq xmm14,xmm12,xmm91519 vpunpckhqdq xmm12,xmm12,xmm91520 vpaddd xmm4,xmm4,XMMWORD[((256-256))+rcx]1521 vpaddd xmm5,xmm5,XMMWORD[((272-256))+rcx]1522 vpaddd xmm6,xmm6,XMMWORD[((288-256))+rcx]1523 vpaddd xmm7,xmm7,XMMWORD[((304-256))+rcx]1524 1525 vpunpckldq xmm9,xmm4,xmm51526 vpunpckldq xmm15,xmm6,xmm71527 vpunpckhdq xmm4,xmm4,xmm51528 vpunpckhdq xmm6,xmm6,xmm71529 vpunpcklqdq xmm5,xmm9,xmm151530 vpunpckhqdq xmm9,xmm9,xmm151531 vpunpcklqdq xmm7,xmm4,xmm61532 vpunpckhqdq xmm4,xmm4,xmm61533 vmovdqa xmm6,XMMWORD[rsp]1534 vmovdqa xmm15,XMMWORD[16+rsp]1535 1536 cmp rdx,64*41537 jb NEAR $L$tail4xop1538 1539 vpxor xmm6,xmm6,XMMWORD[rsi]1540 vpxor xmm1,xmm1,XMMWORD[16+rsi]1541 vpxor xmm13,xmm13,XMMWORD[32+rsi]1542 vpxor xmm5,xmm5,XMMWORD[48+rsi]1543 vpxor xmm15,xmm15,XMMWORD[64+rsi]1544 vpxor xmm10,xmm10,XMMWORD[80+rsi]1545 vpxor xmm2,xmm2,XMMWORD[96+rsi]1546 vpxor xmm9,xmm9,XMMWORD[112+rsi]1547 lea rsi,[128+rsi]1548 vpxor xmm11,xmm11,XMMWORD[rsi]1549 vpxor xmm3,xmm3,XMMWORD[16+rsi]1550 vpxor xmm14,xmm14,XMMWORD[32+rsi]1551 vpxor xmm7,xmm7,XMMWORD[48+rsi]1552 vpxor xmm8,xmm8,XMMWORD[64+rsi]1553 vpxor xmm0,xmm0,XMMWORD[80+rsi]1554 vpxor xmm12,xmm12,XMMWORD[96+rsi]1555 vpxor xmm4,xmm4,XMMWORD[112+rsi]1556 lea rsi,[128+rsi]1557 1558 vmovdqu XMMWORD[rdi],xmm61559 vmovdqu XMMWORD[16+rdi],xmm11560 vmovdqu XMMWORD[32+rdi],xmm131561 vmovdqu XMMWORD[48+rdi],xmm51562 vmovdqu XMMWORD[64+rdi],xmm151563 vmovdqu XMMWORD[80+rdi],xmm101564 vmovdqu XMMWORD[96+rdi],xmm21565 vmovdqu XMMWORD[112+rdi],xmm91566 lea rdi,[128+rdi]1567 vmovdqu XMMWORD[rdi],xmm111568 vmovdqu XMMWORD[16+rdi],xmm31569 vmovdqu XMMWORD[32+rdi],xmm141570 vmovdqu XMMWORD[48+rdi],xmm71571 vmovdqu XMMWORD[64+rdi],xmm81572 vmovdqu XMMWORD[80+rdi],xmm01573 vmovdqu XMMWORD[96+rdi],xmm121574 vmovdqu XMMWORD[112+rdi],xmm41575 lea rdi,[128+rdi]1576 1577 sub rdx,64*41578 jnz NEAR $L$oop_outer4xop1579 1580 jmp NEAR $L$done4xop1581 1582 ALIGN 321583 $L$tail4xop:1584 cmp rdx,1921585 jae NEAR $L$192_or_more4xop1586 cmp rdx,1281587 jae NEAR $L$128_or_more4xop1588 cmp rdx,641589 jae NEAR $L$64_or_more4xop1590 1591 xor r10,r101592 vmovdqa XMMWORD[rsp],xmm61593 vmovdqa XMMWORD[16+rsp],xmm11594 vmovdqa XMMWORD[32+rsp],xmm131595 vmovdqa XMMWORD[48+rsp],xmm51596 jmp NEAR $L$oop_tail4xop1597 1598 ALIGN 321599 $L$64_or_more4xop:1600 vpxor xmm6,xmm6,XMMWORD[rsi]1601 vpxor xmm1,xmm1,XMMWORD[16+rsi]1602 vpxor xmm13,xmm13,XMMWORD[32+rsi]1603 vpxor xmm5,xmm5,XMMWORD[48+rsi]1604 vmovdqu XMMWORD[rdi],xmm61605 vmovdqu XMMWORD[16+rdi],xmm11606 vmovdqu XMMWORD[32+rdi],xmm131607 vmovdqu XMMWORD[48+rdi],xmm51608 je NEAR $L$done4xop1609 1610 lea rsi,[64+rsi]1611 vmovdqa XMMWORD[rsp],xmm151612 xor r10,r101613 vmovdqa XMMWORD[16+rsp],xmm101614 lea rdi,[64+rdi]1615 vmovdqa XMMWORD[32+rsp],xmm21616 sub rdx,641617 vmovdqa XMMWORD[48+rsp],xmm91618 jmp NEAR $L$oop_tail4xop1619 1620 ALIGN 321621 $L$128_or_more4xop:1622 vpxor xmm6,xmm6,XMMWORD[rsi]1623 vpxor xmm1,xmm1,XMMWORD[16+rsi]1624 vpxor xmm13,xmm13,XMMWORD[32+rsi]1625 vpxor xmm5,xmm5,XMMWORD[48+rsi]1626 vpxor xmm15,xmm15,XMMWORD[64+rsi]1627 vpxor xmm10,xmm10,XMMWORD[80+rsi]1628 vpxor xmm2,xmm2,XMMWORD[96+rsi]1629 vpxor xmm9,xmm9,XMMWORD[112+rsi]1630 1631 vmovdqu XMMWORD[rdi],xmm61632 vmovdqu XMMWORD[16+rdi],xmm11633 vmovdqu XMMWORD[32+rdi],xmm131634 vmovdqu XMMWORD[48+rdi],xmm51635 vmovdqu XMMWORD[64+rdi],xmm151636 vmovdqu XMMWORD[80+rdi],xmm101637 vmovdqu XMMWORD[96+rdi],xmm21638 vmovdqu XMMWORD[112+rdi],xmm91639 je NEAR $L$done4xop1640 1641 lea rsi,[128+rsi]1642 vmovdqa XMMWORD[rsp],xmm111643 xor r10,r101644 vmovdqa XMMWORD[16+rsp],xmm31645 lea rdi,[128+rdi]1646 vmovdqa XMMWORD[32+rsp],xmm141647 sub rdx,1281648 vmovdqa XMMWORD[48+rsp],xmm71649 jmp NEAR $L$oop_tail4xop1650 1651 ALIGN 321652 $L$192_or_more4xop:1653 vpxor xmm6,xmm6,XMMWORD[rsi]1654 vpxor xmm1,xmm1,XMMWORD[16+rsi]1655 vpxor xmm13,xmm13,XMMWORD[32+rsi]1656 vpxor xmm5,xmm5,XMMWORD[48+rsi]1657 vpxor xmm15,xmm15,XMMWORD[64+rsi]1658 vpxor xmm10,xmm10,XMMWORD[80+rsi]1659 vpxor xmm2,xmm2,XMMWORD[96+rsi]1660 vpxor xmm9,xmm9,XMMWORD[112+rsi]1661 lea rsi,[128+rsi]1662 vpxor xmm11,xmm11,XMMWORD[rsi]1663 vpxor xmm3,xmm3,XMMWORD[16+rsi]1664 vpxor xmm14,xmm14,XMMWORD[32+rsi]1665 vpxor xmm7,xmm7,XMMWORD[48+rsi]1666 1667 vmovdqu XMMWORD[rdi],xmm61668 vmovdqu XMMWORD[16+rdi],xmm11669 vmovdqu XMMWORD[32+rdi],xmm131670 vmovdqu XMMWORD[48+rdi],xmm51671 vmovdqu XMMWORD[64+rdi],xmm151672 vmovdqu XMMWORD[80+rdi],xmm101673 vmovdqu XMMWORD[96+rdi],xmm21674 vmovdqu XMMWORD[112+rdi],xmm91675 lea rdi,[128+rdi]1676 vmovdqu XMMWORD[rdi],xmm111677 vmovdqu XMMWORD[16+rdi],xmm31678 vmovdqu XMMWORD[32+rdi],xmm141679 vmovdqu XMMWORD[48+rdi],xmm71680 je NEAR $L$done4xop1681 1682 lea rsi,[64+rsi]1683 vmovdqa XMMWORD[rsp],xmm81684 xor r10,r101685 vmovdqa XMMWORD[16+rsp],xmm01686 lea rdi,[64+rdi]1687 vmovdqa XMMWORD[32+rsp],xmm121688 sub rdx,1921689 vmovdqa XMMWORD[48+rsp],xmm41690 1691 $L$oop_tail4xop:1692 movzx eax,BYTE[r10*1+rsi]1693 movzx ecx,BYTE[r10*1+rsp]1694 lea r10,[1+r10]1695 xor eax,ecx1696 mov BYTE[((-1))+r10*1+rdi],al1697 dec rdx1698 jnz NEAR $L$oop_tail4xop1699 1700 $L$done4xop:1701 vzeroupper1702 movaps xmm6,XMMWORD[((-168))+r9]1703 movaps xmm7,XMMWORD[((-152))+r9]1704 movaps xmm8,XMMWORD[((-136))+r9]1705 movaps xmm9,XMMWORD[((-120))+r9]1706 movaps xmm10,XMMWORD[((-104))+r9]1707 movaps xmm11,XMMWORD[((-88))+r9]1708 movaps xmm12,XMMWORD[((-72))+r9]1709 movaps xmm13,XMMWORD[((-56))+r9]1710 movaps xmm14,XMMWORD[((-40))+r9]1711 movaps xmm15,XMMWORD[((-24))+r9]1712 lea rsp,[r9]1713 1714 $L$4xop_epilogue:1715 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1716 mov rsi,QWORD[16+rsp]1717 DB 0F3h,0C3h ;repret1718 1719 $L$SEH_end_ChaCha20_4xop:1720 1721 ALIGN 321722 ChaCha20_8x:1723 mov QWORD[8+rsp],rdi ;WIN64 prologue1724 mov QWORD[16+rsp],rsi1725 mov rax,rsp1726 $L$SEH_begin_ChaCha20_8x:1727 mov rdi,rcx1728 mov rsi,rdx1729 mov rdx,r81730 mov rcx,r91731 mov r8,QWORD[40+rsp]1732 1733 1734 1735 $L$ChaCha20_8x:1736 mov r9,rsp1737 1738 sub rsp,0x280+1681739 and rsp,-321740 movaps XMMWORD[(-168)+r9],xmm61741 movaps XMMWORD[(-152)+r9],xmm71742 movaps XMMWORD[(-136)+r9],xmm81743 movaps XMMWORD[(-120)+r9],xmm91744 movaps XMMWORD[(-104)+r9],xmm101745 movaps XMMWORD[(-88)+r9],xmm111746 movaps XMMWORD[(-72)+r9],xmm121747 movaps XMMWORD[(-56)+r9],xmm131748 movaps XMMWORD[(-40)+r9],xmm141749 movaps XMMWORD[(-24)+r9],xmm151750 $L$8x_body:1751 vzeroupper1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 vbroadcasti128 ymm11,XMMWORD[$L$sigma]1763 vbroadcasti128 ymm3,XMMWORD[rcx]1764 vbroadcasti128 ymm15,XMMWORD[16+rcx]1765 vbroadcasti128 ymm7,XMMWORD[r8]1766 lea rcx,[256+rsp]1767 lea rax,[512+rsp]1768 lea r10,[$L$rot16]1769 lea r11,[$L$rot24]1770 1771 vpshufd ymm8,ymm11,0x001772 vpshufd ymm9,ymm11,0x551773 vmovdqa YMMWORD[(128-256)+rcx],ymm81774 vpshufd ymm10,ymm11,0xaa1775 vmovdqa YMMWORD[(160-256)+rcx],ymm91776 vpshufd ymm11,ymm11,0xff1777 vmovdqa YMMWORD[(192-256)+rcx],ymm101778 vmovdqa YMMWORD[(224-256)+rcx],ymm111779 1780 vpshufd ymm0,ymm3,0x001781 vpshufd ymm1,ymm3,0x551782 vmovdqa YMMWORD[(256-256)+rcx],ymm01783 vpshufd ymm2,ymm3,0xaa1784 vmovdqa YMMWORD[(288-256)+rcx],ymm11785 vpshufd ymm3,ymm3,0xff1786 vmovdqa YMMWORD[(320-256)+rcx],ymm21787 vmovdqa YMMWORD[(352-256)+rcx],ymm31788 1789 vpshufd ymm12,ymm15,0x001790 vpshufd ymm13,ymm15,0x551791 vmovdqa YMMWORD[(384-512)+rax],ymm121792 vpshufd ymm14,ymm15,0xaa1793 vmovdqa YMMWORD[(416-512)+rax],ymm131794 vpshufd ymm15,ymm15,0xff1795 vmovdqa YMMWORD[(448-512)+rax],ymm141796 vmovdqa YMMWORD[(480-512)+rax],ymm151797 1798 vpshufd ymm4,ymm7,0x001799 vpshufd ymm5,ymm7,0x551800 vpaddd ymm4,ymm4,YMMWORD[$L$incy]1801 vpshufd ymm6,ymm7,0xaa1802 vmovdqa YMMWORD[(544-512)+rax],ymm51803 vpshufd ymm7,ymm7,0xff1804 vmovdqa YMMWORD[(576-512)+rax],ymm61805 vmovdqa YMMWORD[(608-512)+rax],ymm71806 1807 jmp NEAR $L$oop_enter8x1808 1809 ALIGN 321810 $L$oop_outer8x:1811 vmovdqa ymm8,YMMWORD[((128-256))+rcx]1812 vmovdqa ymm9,YMMWORD[((160-256))+rcx]1813 vmovdqa ymm10,YMMWORD[((192-256))+rcx]1814 vmovdqa ymm11,YMMWORD[((224-256))+rcx]1815 vmovdqa ymm0,YMMWORD[((256-256))+rcx]1816 vmovdqa ymm1,YMMWORD[((288-256))+rcx]1817 vmovdqa ymm2,YMMWORD[((320-256))+rcx]1818 vmovdqa ymm3,YMMWORD[((352-256))+rcx]1819 vmovdqa ymm12,YMMWORD[((384-512))+rax]1820 vmovdqa ymm13,YMMWORD[((416-512))+rax]1821 vmovdqa ymm14,YMMWORD[((448-512))+rax]1822 vmovdqa ymm15,YMMWORD[((480-512))+rax]1823 vmovdqa ymm4,YMMWORD[((512-512))+rax]1824 vmovdqa ymm5,YMMWORD[((544-512))+rax]1825 vmovdqa ymm6,YMMWORD[((576-512))+rax]1826 vmovdqa ymm7,YMMWORD[((608-512))+rax]1827 vpaddd ymm4,ymm4,YMMWORD[$L$eight]1828 1829 $L$oop_enter8x:1830 vmovdqa YMMWORD[64+rsp],ymm141831 vmovdqa YMMWORD[96+rsp],ymm151832 vbroadcasti128 ymm15,XMMWORD[r10]1833 vmovdqa YMMWORD[(512-512)+rax],ymm41834 mov eax,101835 jmp NEAR $L$oop8x1836 1837 ALIGN 321838 $L$oop8x:1839 vpaddd ymm8,ymm8,ymm01840 vpxor ymm4,ymm8,ymm41841 vpshufb ymm4,ymm4,ymm151842 vpaddd ymm9,ymm9,ymm11843 vpxor ymm5,ymm9,ymm51844 vpshufb ymm5,ymm5,ymm151845 vpaddd ymm12,ymm12,ymm41846 vpxor ymm0,ymm12,ymm01847 vpslld ymm14,ymm0,121848 vpsrld ymm0,ymm0,201849 vpor ymm0,ymm14,ymm01850 vbroadcasti128 ymm14,XMMWORD[r11]1851 vpaddd ymm13,ymm13,ymm51852 vpxor ymm1,ymm13,ymm11853 vpslld ymm15,ymm1,121854 vpsrld ymm1,ymm1,201855 vpor ymm1,ymm15,ymm11856 vpaddd ymm8,ymm8,ymm01857 vpxor ymm4,ymm8,ymm41858 vpshufb ymm4,ymm4,ymm141859 vpaddd ymm9,ymm9,ymm11860 vpxor ymm5,ymm9,ymm51861 vpshufb ymm5,ymm5,ymm141862 vpaddd ymm12,ymm12,ymm41863 vpxor ymm0,ymm12,ymm01864 vpslld ymm15,ymm0,71865 vpsrld ymm0,ymm0,251866 vpor ymm0,ymm15,ymm01867 vbroadcasti128 ymm15,XMMWORD[r10]1868 vpaddd ymm13,ymm13,ymm51869 vpxor ymm1,ymm13,ymm11870 vpslld ymm14,ymm1,71871 vpsrld ymm1,ymm1,251872 vpor ymm1,ymm14,ymm11873 vmovdqa YMMWORD[rsp],ymm121874 vmovdqa YMMWORD[32+rsp],ymm131875 vmovdqa ymm12,YMMWORD[64+rsp]1876 vmovdqa ymm13,YMMWORD[96+rsp]1877 vpaddd ymm10,ymm10,ymm21878 vpxor ymm6,ymm10,ymm61879 vpshufb ymm6,ymm6,ymm151880 vpaddd ymm11,ymm11,ymm31881 vpxor ymm7,ymm11,ymm71882 vpshufb ymm7,ymm7,ymm151883 vpaddd ymm12,ymm12,ymm61884 vpxor ymm2,ymm12,ymm21885 vpslld ymm14,ymm2,121886 vpsrld ymm2,ymm2,201887 vpor ymm2,ymm14,ymm21888 vbroadcasti128 ymm14,XMMWORD[r11]1889 vpaddd ymm13,ymm13,ymm71890 vpxor ymm3,ymm13,ymm31891 vpslld ymm15,ymm3,121892 vpsrld ymm3,ymm3,201893 vpor ymm3,ymm15,ymm31894 vpaddd ymm10,ymm10,ymm21895 vpxor ymm6,ymm10,ymm61896 vpshufb ymm6,ymm6,ymm141897 vpaddd ymm11,ymm11,ymm31898 vpxor ymm7,ymm11,ymm71899 vpshufb ymm7,ymm7,ymm141900 vpaddd ymm12,ymm12,ymm61901 vpxor ymm2,ymm12,ymm21902 vpslld ymm15,ymm2,71903 vpsrld ymm2,ymm2,251904 vpor ymm2,ymm15,ymm21905 vbroadcasti128 ymm15,XMMWORD[r10]1906 vpaddd ymm13,ymm13,ymm71907 vpxor ymm3,ymm13,ymm31908 vpslld ymm14,ymm3,71909 vpsrld ymm3,ymm3,251910 vpor ymm3,ymm14,ymm31911 vpaddd ymm8,ymm8,ymm11912 vpxor ymm7,ymm8,ymm71913 vpshufb ymm7,ymm7,ymm151914 vpaddd ymm9,ymm9,ymm21915 vpxor ymm4,ymm9,ymm41916 vpshufb ymm4,ymm4,ymm151917 vpaddd ymm12,ymm12,ymm71918 vpxor ymm1,ymm12,ymm11919 vpslld ymm14,ymm1,121920 vpsrld ymm1,ymm1,201921 vpor ymm1,ymm14,ymm11922 vbroadcasti128 ymm14,XMMWORD[r11]1923 vpaddd ymm13,ymm13,ymm41924 vpxor ymm2,ymm13,ymm21925 vpslld ymm15,ymm2,121926 vpsrld ymm2,ymm2,201927 vpor ymm2,ymm15,ymm21928 vpaddd ymm8,ymm8,ymm11929 vpxor ymm7,ymm8,ymm71930 vpshufb ymm7,ymm7,ymm141931 vpaddd ymm9,ymm9,ymm21932 vpxor ymm4,ymm9,ymm41933 vpshufb ymm4,ymm4,ymm141934 vpaddd ymm12,ymm12,ymm71935 vpxor ymm1,ymm12,ymm11936 vpslld ymm15,ymm1,71937 vpsrld ymm1,ymm1,251938 vpor ymm1,ymm15,ymm11939 vbroadcasti128 ymm15,XMMWORD[r10]1940 vpaddd ymm13,ymm13,ymm41941 vpxor ymm2,ymm13,ymm21942 vpslld ymm14,ymm2,71943 vpsrld ymm2,ymm2,251944 vpor ymm2,ymm14,ymm21945 vmovdqa YMMWORD[64+rsp],ymm121946 vmovdqa YMMWORD[96+rsp],ymm131947 vmovdqa ymm12,YMMWORD[rsp]1948 vmovdqa ymm13,YMMWORD[32+rsp]1949 vpaddd ymm10,ymm10,ymm31950 vpxor ymm5,ymm10,ymm51951 vpshufb ymm5,ymm5,ymm151952 vpaddd ymm11,ymm11,ymm01953 vpxor ymm6,ymm11,ymm61954 vpshufb ymm6,ymm6,ymm151955 vpaddd ymm12,ymm12,ymm51956 vpxor ymm3,ymm12,ymm31957 vpslld ymm14,ymm3,121958 vpsrld ymm3,ymm3,201959 vpor ymm3,ymm14,ymm31960 vbroadcasti128 ymm14,XMMWORD[r11]1961 vpaddd ymm13,ymm13,ymm61962 vpxor ymm0,ymm13,ymm01963 vpslld ymm15,ymm0,121964 vpsrld ymm0,ymm0,201965 vpor ymm0,ymm15,ymm01966 vpaddd ymm10,ymm10,ymm31967 vpxor ymm5,ymm10,ymm51968 vpshufb ymm5,ymm5,ymm141969 vpaddd ymm11,ymm11,ymm01970 vpxor ymm6,ymm11,ymm61971 vpshufb ymm6,ymm6,ymm141972 vpaddd ymm12,ymm12,ymm51973 vpxor ymm3,ymm12,ymm31974 vpslld ymm15,ymm3,71975 vpsrld ymm3,ymm3,251976 vpor ymm3,ymm15,ymm31977 vbroadcasti128 ymm15,XMMWORD[r10]1978 vpaddd ymm13,ymm13,ymm61979 vpxor ymm0,ymm13,ymm01980 vpslld ymm14,ymm0,71981 vpsrld ymm0,ymm0,251982 vpor ymm0,ymm14,ymm01983 dec eax1984 jnz NEAR $L$oop8x1985 1986 lea rax,[512+rsp]1987 vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx]1988 vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx]1989 vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx]1990 vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx]1991 1992 vpunpckldq ymm14,ymm8,ymm91993 vpunpckldq ymm15,ymm10,ymm111994 vpunpckhdq ymm8,ymm8,ymm91995 vpunpckhdq ymm10,ymm10,ymm111996 vpunpcklqdq ymm9,ymm14,ymm151997 vpunpckhqdq ymm14,ymm14,ymm151998 vpunpcklqdq ymm11,ymm8,ymm101999 vpunpckhqdq ymm8,ymm8,ymm102000 vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx]2001 vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx]2002 vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx]2003 vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx]2004 2005 vpunpckldq ymm10,ymm0,ymm12006 vpunpckldq ymm15,ymm2,ymm32007 vpunpckhdq ymm0,ymm0,ymm12008 vpunpckhdq ymm2,ymm2,ymm32009 vpunpcklqdq ymm1,ymm10,ymm152010 vpunpckhqdq ymm10,ymm10,ymm152011 vpunpcklqdq ymm3,ymm0,ymm22012 vpunpckhqdq ymm0,ymm0,ymm22013 vperm2i128 ymm15,ymm9,ymm1,0x202014 vperm2i128 ymm1,ymm9,ymm1,0x312015 vperm2i128 ymm9,ymm14,ymm10,0x202016 vperm2i128 ymm10,ymm14,ymm10,0x312017 vperm2i128 ymm14,ymm11,ymm3,0x202018 vperm2i128 ymm3,ymm11,ymm3,0x312019 vperm2i128 ymm11,ymm8,ymm0,0x202020 vperm2i128 ymm0,ymm8,ymm0,0x312021 vmovdqa YMMWORD[rsp],ymm152022 vmovdqa YMMWORD[32+rsp],ymm92023 vmovdqa ymm15,YMMWORD[64+rsp]2024 vmovdqa ymm9,YMMWORD[96+rsp]2025 2026 vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax]2027 vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax]2028 vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax]2029 vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax]2030 2031 vpunpckldq ymm2,ymm12,ymm132032 vpunpckldq ymm8,ymm15,ymm92033 vpunpckhdq ymm12,ymm12,ymm132034 vpunpckhdq ymm15,ymm15,ymm92035 vpunpcklqdq ymm13,ymm2,ymm82036 vpunpckhqdq ymm2,ymm2,ymm82037 vpunpcklqdq ymm9,ymm12,ymm152038 vpunpckhqdq ymm12,ymm12,ymm152039 vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax]2040 vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax]2041 vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax]2042 vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax]2043 2044 vpunpckldq ymm15,ymm4,ymm52045 vpunpckldq ymm8,ymm6,ymm72046 vpunpckhdq ymm4,ymm4,ymm52047 vpunpckhdq ymm6,ymm6,ymm72048 vpunpcklqdq ymm5,ymm15,ymm82049 vpunpckhqdq ymm15,ymm15,ymm82050 vpunpcklqdq ymm7,ymm4,ymm62051 vpunpckhqdq ymm4,ymm4,ymm62052 vperm2i128 ymm8,ymm13,ymm5,0x202053 vperm2i128 ymm5,ymm13,ymm5,0x312054 vperm2i128 ymm13,ymm2,ymm15,0x202055 vperm2i128 ymm15,ymm2,ymm15,0x312056 vperm2i128 ymm2,ymm9,ymm7,0x202057 vperm2i128 ymm7,ymm9,ymm7,0x312058 vperm2i128 ymm9,ymm12,ymm4,0x202059 vperm2i128 ymm4,ymm12,ymm4,0x312060 vmovdqa ymm6,YMMWORD[rsp]2061 vmovdqa ymm12,YMMWORD[32+rsp]2062 2063 cmp rdx,64*82064 jb NEAR $L$tail8x2065 2066 vpxor ymm6,ymm6,YMMWORD[rsi]2067 vpxor ymm8,ymm8,YMMWORD[32+rsi]2068 vpxor ymm1,ymm1,YMMWORD[64+rsi]2069 vpxor ymm5,ymm5,YMMWORD[96+rsi]2070 lea rsi,[128+rsi]2071 vmovdqu YMMWORD[rdi],ymm62072 vmovdqu YMMWORD[32+rdi],ymm82073 vmovdqu YMMWORD[64+rdi],ymm12074 vmovdqu YMMWORD[96+rdi],ymm52075 lea rdi,[128+rdi]2076 2077 vpxor ymm12,ymm12,YMMWORD[rsi]2078 vpxor ymm13,ymm13,YMMWORD[32+rsi]2079 vpxor ymm10,ymm10,YMMWORD[64+rsi]2080 vpxor ymm15,ymm15,YMMWORD[96+rsi]2081 lea rsi,[128+rsi]2082 vmovdqu YMMWORD[rdi],ymm122083 vmovdqu YMMWORD[32+rdi],ymm132084 vmovdqu YMMWORD[64+rdi],ymm102085 vmovdqu YMMWORD[96+rdi],ymm152086 lea rdi,[128+rdi]2087 2088 vpxor ymm14,ymm14,YMMWORD[rsi]2089 vpxor ymm2,ymm2,YMMWORD[32+rsi]2090 vpxor ymm3,ymm3,YMMWORD[64+rsi]2091 vpxor ymm7,ymm7,YMMWORD[96+rsi]2092 lea rsi,[128+rsi]2093 vmovdqu YMMWORD[rdi],ymm142094 vmovdqu YMMWORD[32+rdi],ymm22095 vmovdqu YMMWORD[64+rdi],ymm32096 vmovdqu YMMWORD[96+rdi],ymm72097 lea rdi,[128+rdi]2098 2099 vpxor ymm11,ymm11,YMMWORD[rsi]2100 vpxor ymm9,ymm9,YMMWORD[32+rsi]2101 vpxor ymm0,ymm0,YMMWORD[64+rsi]2102 vpxor ymm4,ymm4,YMMWORD[96+rsi]2103 lea rsi,[128+rsi]2104 vmovdqu YMMWORD[rdi],ymm112105 vmovdqu YMMWORD[32+rdi],ymm92106 vmovdqu YMMWORD[64+rdi],ymm02107 vmovdqu YMMWORD[96+rdi],ymm42108 lea rdi,[128+rdi]2109 2110 sub rdx,64*82111 jnz NEAR $L$oop_outer8x2112 2113 jmp NEAR $L$done8x2114 2115 $L$tail8x:2116 cmp rdx,4482117 jae NEAR $L$448_or_more8x2118 cmp rdx,3842119 jae NEAR $L$384_or_more8x2120 cmp rdx,3202121 jae NEAR $L$320_or_more8x2122 cmp rdx,2562123 jae NEAR $L$256_or_more8x2124 cmp rdx,1922125 jae NEAR $L$192_or_more8x2126 cmp rdx,1282127 jae NEAR $L$128_or_more8x2128 cmp rdx,642129 jae NEAR $L$64_or_more8x2130 2131 xor r10,r102132 vmovdqa YMMWORD[rsp],ymm62133 vmovdqa YMMWORD[32+rsp],ymm82134 jmp NEAR $L$oop_tail8x2135 2136 ALIGN 322137 $L$64_or_more8x:2138 vpxor ymm6,ymm6,YMMWORD[rsi]2139 vpxor ymm8,ymm8,YMMWORD[32+rsi]2140 vmovdqu YMMWORD[rdi],ymm62141 vmovdqu YMMWORD[32+rdi],ymm82142 je NEAR $L$done8x2143 2144 lea rsi,[64+rsi]2145 xor r10,r102146 vmovdqa YMMWORD[rsp],ymm12147 lea rdi,[64+rdi]2148 sub rdx,642149 vmovdqa YMMWORD[32+rsp],ymm52150 jmp NEAR $L$oop_tail8x2151 2152 ALIGN 322153 $L$128_or_more8x:2154 vpxor ymm6,ymm6,YMMWORD[rsi]2155 vpxor ymm8,ymm8,YMMWORD[32+rsi]2156 vpxor ymm1,ymm1,YMMWORD[64+rsi]2157 vpxor ymm5,ymm5,YMMWORD[96+rsi]2158 vmovdqu YMMWORD[rdi],ymm62159 vmovdqu YMMWORD[32+rdi],ymm82160 vmovdqu YMMWORD[64+rdi],ymm12161 vmovdqu YMMWORD[96+rdi],ymm52162 je NEAR $L$done8x2163 2164 lea rsi,[128+rsi]2165 xor r10,r102166 vmovdqa YMMWORD[rsp],ymm122167 lea rdi,[128+rdi]2168 sub rdx,1282169 vmovdqa YMMWORD[32+rsp],ymm132170 jmp NEAR $L$oop_tail8x2171 2172 ALIGN 322173 $L$192_or_more8x:2174 vpxor ymm6,ymm6,YMMWORD[rsi]2175 vpxor ymm8,ymm8,YMMWORD[32+rsi]2176 vpxor ymm1,ymm1,YMMWORD[64+rsi]2177 vpxor ymm5,ymm5,YMMWORD[96+rsi]2178 vpxor ymm12,ymm12,YMMWORD[128+rsi]2179 vpxor ymm13,ymm13,YMMWORD[160+rsi]2180 vmovdqu YMMWORD[rdi],ymm62181 vmovdqu YMMWORD[32+rdi],ymm82182 vmovdqu YMMWORD[64+rdi],ymm12183 vmovdqu YMMWORD[96+rdi],ymm52184 vmovdqu YMMWORD[128+rdi],ymm122185 vmovdqu YMMWORD[160+rdi],ymm132186 je NEAR $L$done8x2187 2188 lea rsi,[192+rsi]2189 xor r10,r102190 vmovdqa YMMWORD[rsp],ymm102191 lea rdi,[192+rdi]2192 sub rdx,1922193 vmovdqa YMMWORD[32+rsp],ymm152194 jmp NEAR $L$oop_tail8x2195 2196 ALIGN 322197 $L$256_or_more8x:2198 vpxor ymm6,ymm6,YMMWORD[rsi]2199 vpxor ymm8,ymm8,YMMWORD[32+rsi]2200 vpxor ymm1,ymm1,YMMWORD[64+rsi]2201 vpxor ymm5,ymm5,YMMWORD[96+rsi]2202 vpxor ymm12,ymm12,YMMWORD[128+rsi]2203 vpxor ymm13,ymm13,YMMWORD[160+rsi]2204 vpxor ymm10,ymm10,YMMWORD[192+rsi]2205 vpxor ymm15,ymm15,YMMWORD[224+rsi]2206 vmovdqu YMMWORD[rdi],ymm62207 vmovdqu YMMWORD[32+rdi],ymm82208 vmovdqu YMMWORD[64+rdi],ymm12209 vmovdqu YMMWORD[96+rdi],ymm52210 vmovdqu YMMWORD[128+rdi],ymm122211 vmovdqu YMMWORD[160+rdi],ymm132212 vmovdqu YMMWORD[192+rdi],ymm102213 vmovdqu YMMWORD[224+rdi],ymm152214 je NEAR $L$done8x2215 2216 lea rsi,[256+rsi]2217 xor r10,r102218 vmovdqa YMMWORD[rsp],ymm142219 lea rdi,[256+rdi]2220 sub rdx,2562221 vmovdqa YMMWORD[32+rsp],ymm22222 jmp NEAR $L$oop_tail8x2223 2224 ALIGN 322225 $L$320_or_more8x:2226 vpxor ymm6,ymm6,YMMWORD[rsi]2227 vpxor ymm8,ymm8,YMMWORD[32+rsi]2228 vpxor ymm1,ymm1,YMMWORD[64+rsi]2229 vpxor ymm5,ymm5,YMMWORD[96+rsi]2230 vpxor ymm12,ymm12,YMMWORD[128+rsi]2231 vpxor ymm13,ymm13,YMMWORD[160+rsi]2232 vpxor ymm10,ymm10,YMMWORD[192+rsi]2233 vpxor ymm15,ymm15,YMMWORD[224+rsi]2234 vpxor ymm14,ymm14,YMMWORD[256+rsi]2235 vpxor ymm2,ymm2,YMMWORD[288+rsi]2236 vmovdqu YMMWORD[rdi],ymm62237 vmovdqu YMMWORD[32+rdi],ymm82238 vmovdqu YMMWORD[64+rdi],ymm12239 vmovdqu YMMWORD[96+rdi],ymm52240 vmovdqu YMMWORD[128+rdi],ymm122241 vmovdqu YMMWORD[160+rdi],ymm132242 vmovdqu YMMWORD[192+rdi],ymm102243 vmovdqu YMMWORD[224+rdi],ymm152244 vmovdqu YMMWORD[256+rdi],ymm142245 vmovdqu YMMWORD[288+rdi],ymm22246 je NEAR $L$done8x2247 2248 lea rsi,[320+rsi]2249 xor r10,r102250 vmovdqa YMMWORD[rsp],ymm32251 lea rdi,[320+rdi]2252 sub rdx,3202253 vmovdqa YMMWORD[32+rsp],ymm72254 jmp NEAR $L$oop_tail8x2255 2256 ALIGN 322257 $L$384_or_more8x:2258 vpxor ymm6,ymm6,YMMWORD[rsi]2259 vpxor ymm8,ymm8,YMMWORD[32+rsi]2260 vpxor ymm1,ymm1,YMMWORD[64+rsi]2261 vpxor ymm5,ymm5,YMMWORD[96+rsi]2262 vpxor ymm12,ymm12,YMMWORD[128+rsi]2263 vpxor ymm13,ymm13,YMMWORD[160+rsi]2264 vpxor ymm10,ymm10,YMMWORD[192+rsi]2265 vpxor ymm15,ymm15,YMMWORD[224+rsi]2266 vpxor ymm14,ymm14,YMMWORD[256+rsi]2267 vpxor ymm2,ymm2,YMMWORD[288+rsi]2268 vpxor ymm3,ymm3,YMMWORD[320+rsi]2269 vpxor ymm7,ymm7,YMMWORD[352+rsi]2270 vmovdqu YMMWORD[rdi],ymm62271 vmovdqu YMMWORD[32+rdi],ymm82272 vmovdqu YMMWORD[64+rdi],ymm12273 vmovdqu YMMWORD[96+rdi],ymm52274 vmovdqu YMMWORD[128+rdi],ymm122275 vmovdqu YMMWORD[160+rdi],ymm132276 vmovdqu YMMWORD[192+rdi],ymm102277 vmovdqu YMMWORD[224+rdi],ymm152278 vmovdqu YMMWORD[256+rdi],ymm142279 vmovdqu YMMWORD[288+rdi],ymm22280 vmovdqu YMMWORD[320+rdi],ymm32281 vmovdqu YMMWORD[352+rdi],ymm72282 je NEAR $L$done8x2283 2284 lea rsi,[384+rsi]2285 xor r10,r102286 vmovdqa YMMWORD[rsp],ymm112287 lea rdi,[384+rdi]2288 sub rdx,3842289 vmovdqa YMMWORD[32+rsp],ymm92290 jmp NEAR $L$oop_tail8x2291 2292 ALIGN 322293 $L$448_or_more8x:2294 vpxor ymm6,ymm6,YMMWORD[rsi]2295 vpxor ymm8,ymm8,YMMWORD[32+rsi]2296 vpxor ymm1,ymm1,YMMWORD[64+rsi]2297 vpxor ymm5,ymm5,YMMWORD[96+rsi]2298 vpxor ymm12,ymm12,YMMWORD[128+rsi]2299 vpxor ymm13,ymm13,YMMWORD[160+rsi]2300 vpxor ymm10,ymm10,YMMWORD[192+rsi]2301 vpxor ymm15,ymm15,YMMWORD[224+rsi]2302 vpxor ymm14,ymm14,YMMWORD[256+rsi]2303 vpxor ymm2,ymm2,YMMWORD[288+rsi]2304 vpxor ymm3,ymm3,YMMWORD[320+rsi]2305 vpxor ymm7,ymm7,YMMWORD[352+rsi]2306 vpxor ymm11,ymm11,YMMWORD[384+rsi]2307 vpxor ymm9,ymm9,YMMWORD[416+rsi]2308 vmovdqu YMMWORD[rdi],ymm62309 vmovdqu YMMWORD[32+rdi],ymm82310 vmovdqu YMMWORD[64+rdi],ymm12311 vmovdqu YMMWORD[96+rdi],ymm52312 vmovdqu YMMWORD[128+rdi],ymm122313 vmovdqu YMMWORD[160+rdi],ymm132314 vmovdqu YMMWORD[192+rdi],ymm102315 vmovdqu YMMWORD[224+rdi],ymm152316 vmovdqu YMMWORD[256+rdi],ymm142317 vmovdqu YMMWORD[288+rdi],ymm22318 vmovdqu YMMWORD[320+rdi],ymm32319 vmovdqu YMMWORD[352+rdi],ymm72320 vmovdqu YMMWORD[384+rdi],ymm112321 vmovdqu YMMWORD[416+rdi],ymm92322 je NEAR $L$done8x2323 2324 lea rsi,[448+rsi]2325 xor r10,r102326 vmovdqa YMMWORD[rsp],ymm02327 lea rdi,[448+rdi]2328 sub rdx,4482329 vmovdqa YMMWORD[32+rsp],ymm42330 2331 $L$oop_tail8x:2332 movzx eax,BYTE[r10*1+rsi]2333 movzx ecx,BYTE[r10*1+rsp]2334 lea r10,[1+r10]2335 xor eax,ecx2336 mov BYTE[((-1))+r10*1+rdi],al2337 dec rdx2338 jnz NEAR $L$oop_tail8x2339 2340 $L$done8x:2341 vzeroall2342 movaps xmm6,XMMWORD[((-168))+r9]2343 movaps xmm7,XMMWORD[((-152))+r9]2344 movaps xmm8,XMMWORD[((-136))+r9]2345 movaps xmm9,XMMWORD[((-120))+r9]2346 movaps xmm10,XMMWORD[((-104))+r9]2347 movaps xmm11,XMMWORD[((-88))+r9]2348 movaps xmm12,XMMWORD[((-72))+r9]2349 movaps xmm13,XMMWORD[((-56))+r9]2350 movaps xmm14,XMMWORD[((-40))+r9]2351 movaps xmm15,XMMWORD[((-24))+r9]2352 lea rsp,[r9]2353 2354 $L$8x_epilogue:2355 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2356 mov rsi,QWORD[16+rsp]2357 DB 0F3h,0C3h ;repret2358 2359 $L$SEH_end_ChaCha20_8x:2360 2361 ALIGN 322362 ChaCha20_avx512:2363 mov QWORD[8+rsp],rdi ;WIN64 prologue2364 mov QWORD[16+rsp],rsi2365 mov rax,rsp2366 $L$SEH_begin_ChaCha20_avx512:2367 mov rdi,rcx2368 mov rsi,rdx2369 mov rdx,r82370 mov rcx,r92371 mov r8,QWORD[40+rsp]2372 2373 2374 2375 $L$ChaCha20_avx512:2376 mov r9,rsp2377 2378 cmp rdx,5122379 ja NEAR $L$ChaCha20_16x2380 2381 sub rsp,64+1682382 movaps XMMWORD[(-168)+r9],xmm62383 movaps XMMWORD[(-152)+r9],xmm72384 movaps XMMWORD[(-136)+r9],xmm82385 movaps XMMWORD[(-120)+r9],xmm92386 movaps XMMWORD[(-104)+r9],xmm102387 movaps XMMWORD[(-88)+r9],xmm112388 movaps XMMWORD[(-72)+r9],xmm122389 movaps XMMWORD[(-56)+r9],xmm132390 movaps XMMWORD[(-40)+r9],xmm142391 movaps XMMWORD[(-24)+r9],xmm152392 $L$avx512_body:2393 vbroadcasti32x4 zmm0,ZMMWORD[$L$sigma]2394 vbroadcasti32x4 zmm1,ZMMWORD[rcx]2395 vbroadcasti32x4 zmm2,ZMMWORD[16+rcx]2396 vbroadcasti32x4 zmm3,ZMMWORD[r8]2397 2398 vmovdqa32 zmm16,zmm02399 vmovdqa32 zmm17,zmm12400 vmovdqa32 zmm18,zmm22401 vpaddd zmm3,zmm3,ZMMWORD[$L$zeroz]2402 vmovdqa32 zmm20,ZMMWORD[$L$fourz]2403 mov r8,102404 vmovdqa32 zmm19,zmm32405 jmp NEAR $L$oop_avx5122406 2407 ALIGN 162408 $L$oop_outer_avx512:2409 vmovdqa32 zmm0,zmm162410 vmovdqa32 zmm1,zmm172411 vmovdqa32 zmm2,zmm182412 vpaddd zmm3,zmm19,zmm202413 mov r8,102414 vmovdqa32 zmm19,zmm32415 jmp NEAR $L$oop_avx5122416 2417 ALIGN 322418 $L$oop_avx512:2419 vpaddd zmm0,zmm0,zmm12420 vpxord zmm3,zmm3,zmm02421 vprold zmm3,zmm3,162422 vpaddd zmm2,zmm2,zmm32423 vpxord zmm1,zmm1,zmm22424 vprold zmm1,zmm1,122425 vpaddd zmm0,zmm0,zmm12426 vpxord zmm3,zmm3,zmm02427 vprold zmm3,zmm3,82428 vpaddd zmm2,zmm2,zmm32429 vpxord zmm1,zmm1,zmm22430 vprold zmm1,zmm1,72431 vpshufd zmm2,zmm2,782432 vpshufd zmm1,zmm1,572433 vpshufd zmm3,zmm3,1472434 vpaddd zmm0,zmm0,zmm12435 vpxord zmm3,zmm3,zmm02436 vprold zmm3,zmm3,162437 vpaddd zmm2,zmm2,zmm32438 vpxord zmm1,zmm1,zmm22439 vprold zmm1,zmm1,122440 vpaddd zmm0,zmm0,zmm12441 vpxord zmm3,zmm3,zmm02442 vprold zmm3,zmm3,82443 vpaddd zmm2,zmm2,zmm32444 vpxord zmm1,zmm1,zmm22445 vprold zmm1,zmm1,72446 vpshufd zmm2,zmm2,782447 vpshufd zmm1,zmm1,1472448 vpshufd zmm3,zmm3,572449 dec r82450 jnz NEAR $L$oop_avx5122451 vpaddd zmm0,zmm0,zmm162452 vpaddd zmm1,zmm1,zmm172453 vpaddd zmm2,zmm2,zmm182454 vpaddd zmm3,zmm3,zmm192455 2456 sub rdx,642457 jb NEAR $L$tail64_avx5122458 2459 vpxor xmm4,xmm0,XMMWORD[rsi]2460 vpxor xmm5,xmm1,XMMWORD[16+rsi]2461 vpxor xmm6,xmm2,XMMWORD[32+rsi]2462 vpxor xmm7,xmm3,XMMWORD[48+rsi]2463 lea rsi,[64+rsi]2464 2465 vmovdqu XMMWORD[rdi],xmm42466 vmovdqu XMMWORD[16+rdi],xmm52467 vmovdqu XMMWORD[32+rdi],xmm62468 vmovdqu XMMWORD[48+rdi],xmm72469 lea rdi,[64+rdi]2470 2471 jz NEAR $L$done_avx5122472 2473 vextracti32x4 xmm4,zmm0,12474 vextracti32x4 xmm5,zmm1,12475 vextracti32x4 xmm6,zmm2,12476 vextracti32x4 xmm7,zmm3,12477 2478 sub rdx,642479 jb NEAR $L$tail_avx5122480 2481 vpxor xmm4,xmm4,XMMWORD[rsi]2482 vpxor xmm5,xmm5,XMMWORD[16+rsi]2483 vpxor xmm6,xmm6,XMMWORD[32+rsi]2484 vpxor xmm7,xmm7,XMMWORD[48+rsi]2485 lea rsi,[64+rsi]2486 2487 vmovdqu XMMWORD[rdi],xmm42488 vmovdqu XMMWORD[16+rdi],xmm52489 vmovdqu XMMWORD[32+rdi],xmm62490 vmovdqu XMMWORD[48+rdi],xmm72491 lea rdi,[64+rdi]2492 2493 jz NEAR $L$done_avx5122494 2495 vextracti32x4 xmm4,zmm0,22496 vextracti32x4 xmm5,zmm1,22497 vextracti32x4 xmm6,zmm2,22498 vextracti32x4 xmm7,zmm3,22499 2500 sub rdx,642501 jb NEAR $L$tail_avx5122502 2503 vpxor xmm4,xmm4,XMMWORD[rsi]2504 vpxor xmm5,xmm5,XMMWORD[16+rsi]2505 vpxor xmm6,xmm6,XMMWORD[32+rsi]2506 vpxor xmm7,xmm7,XMMWORD[48+rsi]2507 lea rsi,[64+rsi]2508 2509 vmovdqu XMMWORD[rdi],xmm42510 vmovdqu XMMWORD[16+rdi],xmm52511 vmovdqu XMMWORD[32+rdi],xmm62512 vmovdqu XMMWORD[48+rdi],xmm72513 lea rdi,[64+rdi]2514 2515 jz NEAR $L$done_avx5122516 2517 vextracti32x4 xmm4,zmm0,32518 vextracti32x4 xmm5,zmm1,32519 vextracti32x4 xmm6,zmm2,32520 vextracti32x4 xmm7,zmm3,32521 2522 sub rdx,642523 jb NEAR $L$tail_avx5122524 2525 vpxor xmm4,xmm4,XMMWORD[rsi]2526 vpxor xmm5,xmm5,XMMWORD[16+rsi]2527 vpxor xmm6,xmm6,XMMWORD[32+rsi]2528 vpxor xmm7,xmm7,XMMWORD[48+rsi]2529 lea rsi,[64+rsi]2530 2531 vmovdqu XMMWORD[rdi],xmm42532 vmovdqu XMMWORD[16+rdi],xmm52533 vmovdqu XMMWORD[32+rdi],xmm62534 vmovdqu XMMWORD[48+rdi],xmm72535 lea rdi,[64+rdi]2536 2537 jnz NEAR $L$oop_outer_avx5122538 2539 jmp NEAR $L$done_avx5122540 2541 ALIGN 162542 $L$tail64_avx512:2543 vmovdqa XMMWORD[rsp],xmm02544 vmovdqa XMMWORD[16+rsp],xmm12545 vmovdqa XMMWORD[32+rsp],xmm22546 vmovdqa XMMWORD[48+rsp],xmm32547 add rdx,642548 jmp NEAR $L$oop_tail_avx5122549 2550 ALIGN 162551 $L$tail_avx512:2552 vmovdqa XMMWORD[rsp],xmm42553 vmovdqa XMMWORD[16+rsp],xmm52554 vmovdqa XMMWORD[32+rsp],xmm62555 vmovdqa XMMWORD[48+rsp],xmm72556 add rdx,642557 2558 $L$oop_tail_avx512:2559 movzx eax,BYTE[r8*1+rsi]2560 movzx ecx,BYTE[r8*1+rsp]2561 lea r8,[1+r8]2562 xor eax,ecx2563 mov BYTE[((-1))+r8*1+rdi],al2564 dec rdx2565 jnz NEAR $L$oop_tail_avx5122566 2567 vmovdqu32 ZMMWORD[rsp],zmm162568 2569 $L$done_avx512:2570 vzeroall2571 movaps xmm6,XMMWORD[((-168))+r9]2572 movaps xmm7,XMMWORD[((-152))+r9]2573 movaps xmm8,XMMWORD[((-136))+r9]2574 movaps xmm9,XMMWORD[((-120))+r9]2575 movaps xmm10,XMMWORD[((-104))+r9]2576 movaps xmm11,XMMWORD[((-88))+r9]2577 movaps xmm12,XMMWORD[((-72))+r9]2578 movaps xmm13,XMMWORD[((-56))+r9]2579 movaps xmm14,XMMWORD[((-40))+r9]2580 movaps xmm15,XMMWORD[((-24))+r9]2581 lea rsp,[r9]2582 2583 $L$avx512_epilogue:2584 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2585 mov rsi,QWORD[16+rsp]2586 DB 0F3h,0C3h ;repret2587 2588 $L$SEH_end_ChaCha20_avx512:2589 2590 ALIGN 322591 ChaCha20_avx512vl:2592 mov QWORD[8+rsp],rdi ;WIN64 prologue2593 mov QWORD[16+rsp],rsi2594 mov rax,rsp2595 $L$SEH_begin_ChaCha20_avx512vl:2596 mov rdi,rcx2597 mov rsi,rdx2598 mov rdx,r82599 mov rcx,r92600 mov r8,QWORD[40+rsp]2601 2602 2603 2604 $L$ChaCha20_avx512vl:2605 mov r9,rsp2606 2607 cmp rdx,1282608 ja NEAR $L$ChaCha20_8xvl2609 2610 sub rsp,64+1682611 movaps XMMWORD[(-168)+r9],xmm62612 movaps XMMWORD[(-152)+r9],xmm72613 movaps XMMWORD[(-136)+r9],xmm82614 movaps XMMWORD[(-120)+r9],xmm92615 movaps XMMWORD[(-104)+r9],xmm102616 movaps XMMWORD[(-88)+r9],xmm112617 movaps XMMWORD[(-72)+r9],xmm122618 movaps XMMWORD[(-56)+r9],xmm132619 movaps XMMWORD[(-40)+r9],xmm142620 movaps XMMWORD[(-24)+r9],xmm152621 $L$avx512vl_body:2622 vbroadcasti128 ymm0,XMMWORD[$L$sigma]2623 vbroadcasti128 ymm1,XMMWORD[rcx]2624 vbroadcasti128 ymm2,XMMWORD[16+rcx]2625 vbroadcasti128 ymm3,XMMWORD[r8]2626 2627 vmovdqa32 ymm16,ymm02628 vmovdqa32 ymm17,ymm12629 vmovdqa32 ymm18,ymm22630 vpaddd ymm3,ymm3,YMMWORD[$L$zeroz]2631 vmovdqa32 ymm20,YMMWORD[$L$twoy]2632 mov r8,102633 vmovdqa32 ymm19,ymm32634 jmp NEAR $L$oop_avx512vl2635 2636 ALIGN 162637 $L$oop_outer_avx512vl:2638 vmovdqa32 ymm2,ymm182639 vpaddd ymm3,ymm19,ymm202640 mov r8,102641 vmovdqa32 ymm19,ymm32642 jmp NEAR $L$oop_avx512vl2643 2644 ALIGN 322645 $L$oop_avx512vl:2646 vpaddd ymm0,ymm0,ymm12647 vpxor ymm3,ymm3,ymm02648 vprold ymm3,ymm3,162649 vpaddd ymm2,ymm2,ymm32650 vpxor ymm1,ymm1,ymm22651 vprold ymm1,ymm1,122652 vpaddd ymm0,ymm0,ymm12653 vpxor ymm3,ymm3,ymm02654 vprold ymm3,ymm3,82655 vpaddd ymm2,ymm2,ymm32656 vpxor ymm1,ymm1,ymm22657 vprold ymm1,ymm1,72658 vpshufd ymm2,ymm2,782659 vpshufd ymm1,ymm1,572660 vpshufd ymm3,ymm3,1472661 vpaddd ymm0,ymm0,ymm12662 vpxor ymm3,ymm3,ymm02663 vprold ymm3,ymm3,162664 vpaddd ymm2,ymm2,ymm32665 vpxor ymm1,ymm1,ymm22666 vprold ymm1,ymm1,122667 vpaddd ymm0,ymm0,ymm12668 vpxor ymm3,ymm3,ymm02669 vprold ymm3,ymm3,82670 vpaddd ymm2,ymm2,ymm32671 vpxor ymm1,ymm1,ymm22672 vprold ymm1,ymm1,72673 vpshufd ymm2,ymm2,782674 vpshufd ymm1,ymm1,1472675 vpshufd ymm3,ymm3,572676 dec r82677 jnz NEAR $L$oop_avx512vl2678 vpaddd ymm0,ymm0,ymm162679 vpaddd ymm1,ymm1,ymm172680 vpaddd ymm2,ymm2,ymm182681 vpaddd ymm3,ymm3,ymm192682 2683 sub rdx,642684 jb NEAR $L$tail64_avx512vl2685 2686 vpxor xmm4,xmm0,XMMWORD[rsi]2687 vpxor xmm5,xmm1,XMMWORD[16+rsi]2688 vpxor xmm6,xmm2,XMMWORD[32+rsi]2689 vpxor xmm7,xmm3,XMMWORD[48+rsi]2690 lea rsi,[64+rsi]2691 2692 vmovdqu XMMWORD[rdi],xmm42693 vmovdqu XMMWORD[16+rdi],xmm52694 vmovdqu XMMWORD[32+rdi],xmm62695 vmovdqu XMMWORD[48+rdi],xmm72696 lea rdi,[64+rdi]2697 2698 jz NEAR $L$done_avx512vl2699 2700 vextracti128 xmm4,ymm0,12701 vextracti128 xmm5,ymm1,12702 vextracti128 xmm6,ymm2,12703 vextracti128 xmm7,ymm3,12704 2705 sub rdx,642706 jb NEAR $L$tail_avx512vl2707 2708 vpxor xmm4,xmm4,XMMWORD[rsi]2709 vpxor xmm5,xmm5,XMMWORD[16+rsi]2710 vpxor xmm6,xmm6,XMMWORD[32+rsi]2711 vpxor xmm7,xmm7,XMMWORD[48+rsi]2712 lea rsi,[64+rsi]2713 2714 vmovdqu XMMWORD[rdi],xmm42715 vmovdqu XMMWORD[16+rdi],xmm52716 vmovdqu XMMWORD[32+rdi],xmm62717 vmovdqu XMMWORD[48+rdi],xmm72718 lea rdi,[64+rdi]2719 2720 vmovdqa32 ymm0,ymm162721 vmovdqa32 ymm1,ymm172722 jnz NEAR $L$oop_outer_avx512vl2723 2724 jmp NEAR $L$done_avx512vl2725 2726 ALIGN 162727 $L$tail64_avx512vl:2728 vmovdqa XMMWORD[rsp],xmm02729 vmovdqa XMMWORD[16+rsp],xmm12730 vmovdqa XMMWORD[32+rsp],xmm22731 vmovdqa XMMWORD[48+rsp],xmm32732 add rdx,642733 jmp NEAR $L$oop_tail_avx512vl2734 2735 ALIGN 162736 $L$tail_avx512vl:2737 vmovdqa XMMWORD[rsp],xmm42738 vmovdqa XMMWORD[16+rsp],xmm52739 vmovdqa XMMWORD[32+rsp],xmm62740 vmovdqa XMMWORD[48+rsp],xmm72741 add rdx,642742 2743 $L$oop_tail_avx512vl:2744 movzx eax,BYTE[r8*1+rsi]2745 movzx ecx,BYTE[r8*1+rsp]2746 lea r8,[1+r8]2747 xor eax,ecx2748 mov BYTE[((-1))+r8*1+rdi],al2749 dec rdx2750 jnz NEAR $L$oop_tail_avx512vl2751 2752 vmovdqu32 YMMWORD[rsp],ymm162753 vmovdqu32 YMMWORD[32+rsp],ymm162754 2755 $L$done_avx512vl:2756 vzeroall2757 movaps xmm6,XMMWORD[((-168))+r9]2758 movaps xmm7,XMMWORD[((-152))+r9]2759 movaps xmm8,XMMWORD[((-136))+r9]2760 movaps xmm9,XMMWORD[((-120))+r9]2761 movaps xmm10,XMMWORD[((-104))+r9]2762 movaps xmm11,XMMWORD[((-88))+r9]2763 movaps xmm12,XMMWORD[((-72))+r9]2764 movaps xmm13,XMMWORD[((-56))+r9]2765 movaps xmm14,XMMWORD[((-40))+r9]2766 movaps xmm15,XMMWORD[((-24))+r9]2767 lea rsp,[r9]2768 2769 $L$avx512vl_epilogue:2770 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2771 mov rsi,QWORD[16+rsp]2772 DB 0F3h,0C3h ;repret2773 2774 $L$SEH_end_ChaCha20_avx512vl:2775 2776 ALIGN 322777 ChaCha20_16x:2778 mov QWORD[8+rsp],rdi ;WIN64 prologue2779 mov QWORD[16+rsp],rsi2780 mov rax,rsp2781 $L$SEH_begin_ChaCha20_16x:2782 mov rdi,rcx2783 mov rsi,rdx2784 mov rdx,r82785 mov rcx,r92786 mov r8,QWORD[40+rsp]2787 2788 2789 2790 $L$ChaCha20_16x:2791 mov r9,rsp2792 2793 sub rsp,64+1682794 and rsp,-642795 movaps XMMWORD[(-168)+r9],xmm62796 movaps XMMWORD[(-152)+r9],xmm72797 movaps XMMWORD[(-136)+r9],xmm82798 movaps XMMWORD[(-120)+r9],xmm92799 movaps XMMWORD[(-104)+r9],xmm102800 movaps XMMWORD[(-88)+r9],xmm112801 movaps XMMWORD[(-72)+r9],xmm122802 movaps XMMWORD[(-56)+r9],xmm132803 movaps XMMWORD[(-40)+r9],xmm142804 movaps XMMWORD[(-24)+r9],xmm152805 $L$16x_body:2806 vzeroupper2807 2808 lea r10,[$L$sigma]2809 vbroadcasti32x4 zmm3,ZMMWORD[r10]2810 vbroadcasti32x4 zmm7,ZMMWORD[rcx]2811 vbroadcasti32x4 zmm11,ZMMWORD[16+rcx]2812 vbroadcasti32x4 zmm15,ZMMWORD[r8]2813 2814 vpshufd zmm0,zmm3,0x002815 vpshufd zmm1,zmm3,0x552816 vpshufd zmm2,zmm3,0xaa2817 vpshufd zmm3,zmm3,0xff2818 vmovdqa64 zmm16,zmm02819 vmovdqa64 zmm17,zmm12820 vmovdqa64 zmm18,zmm22821 vmovdqa64 zmm19,zmm32822 2823 vpshufd zmm4,zmm7,0x002824 vpshufd zmm5,zmm7,0x552825 vpshufd zmm6,zmm7,0xaa2826 vpshufd zmm7,zmm7,0xff2827 vmovdqa64 zmm20,zmm42828 vmovdqa64 zmm21,zmm52829 vmovdqa64 zmm22,zmm62830 vmovdqa64 zmm23,zmm72831 2832 vpshufd zmm8,zmm11,0x002833 vpshufd zmm9,zmm11,0x552834 vpshufd zmm10,zmm11,0xaa2835 vpshufd zmm11,zmm11,0xff2836 vmovdqa64 zmm24,zmm82837 vmovdqa64 zmm25,zmm92838 vmovdqa64 zmm26,zmm102839 vmovdqa64 zmm27,zmm112840 2841 vpshufd zmm12,zmm15,0x002842 vpshufd zmm13,zmm15,0x552843 vpshufd zmm14,zmm15,0xaa2844 vpshufd zmm15,zmm15,0xff2845 vpaddd zmm12,zmm12,ZMMWORD[$L$incz]2846 vmovdqa64 zmm28,zmm122847 vmovdqa64 zmm29,zmm132848 vmovdqa64 zmm30,zmm142849 vmovdqa64 zmm31,zmm152850 2851 mov eax,102852 jmp NEAR $L$oop16x2853 2854 ALIGN 322855 $L$oop_outer16x:2856 vpbroadcastd zmm0,DWORD[r10]2857 vpbroadcastd zmm1,DWORD[4+r10]2858 vpbroadcastd zmm2,DWORD[8+r10]2859 vpbroadcastd zmm3,DWORD[12+r10]2860 vpaddd zmm28,zmm28,ZMMWORD[$L$sixteen]2861 vmovdqa64 zmm4,zmm202862 vmovdqa64 zmm5,zmm212863 vmovdqa64 zmm6,zmm222864 vmovdqa64 zmm7,zmm232865 vmovdqa64 zmm8,zmm242866 vmovdqa64 zmm9,zmm252867 vmovdqa64 zmm10,zmm262868 vmovdqa64 zmm11,zmm272869 vmovdqa64 zmm12,zmm282870 vmovdqa64 zmm13,zmm292871 vmovdqa64 zmm14,zmm302872 vmovdqa64 zmm15,zmm312873 2874 vmovdqa64 zmm16,zmm02875 vmovdqa64 zmm17,zmm12876 vmovdqa64 zmm18,zmm22877 vmovdqa64 zmm19,zmm32878 2879 mov eax,102880 jmp NEAR $L$oop16x2881 2882 ALIGN 322883 $L$oop16x:2884 vpaddd zmm0,zmm0,zmm42885 vpaddd zmm1,zmm1,zmm52886 vpaddd zmm2,zmm2,zmm62887 vpaddd zmm3,zmm3,zmm72888 vpxord zmm12,zmm12,zmm02889 vpxord zmm13,zmm13,zmm12890 vpxord zmm14,zmm14,zmm22891 vpxord zmm15,zmm15,zmm32892 vprold zmm12,zmm12,162893 vprold zmm13,zmm13,162894 vprold zmm14,zmm14,162895 vprold zmm15,zmm15,162896 vpaddd zmm8,zmm8,zmm122897 vpaddd zmm9,zmm9,zmm132898 vpaddd zmm10,zmm10,zmm142899 vpaddd zmm11,zmm11,zmm152900 vpxord zmm4,zmm4,zmm82901 vpxord zmm5,zmm5,zmm92902 vpxord zmm6,zmm6,zmm102903 vpxord zmm7,zmm7,zmm112904 vprold zmm4,zmm4,122905 vprold zmm5,zmm5,122906 vprold zmm6,zmm6,122907 vprold zmm7,zmm7,122908 vpaddd zmm0,zmm0,zmm42909 vpaddd zmm1,zmm1,zmm52910 vpaddd zmm2,zmm2,zmm62911 vpaddd zmm3,zmm3,zmm72912 vpxord zmm12,zmm12,zmm02913 vpxord zmm13,zmm13,zmm12914 vpxord zmm14,zmm14,zmm22915 vpxord zmm15,zmm15,zmm32916 vprold zmm12,zmm12,82917 vprold zmm13,zmm13,82918 vprold zmm14,zmm14,82919 vprold zmm15,zmm15,82920 vpaddd zmm8,zmm8,zmm122921 vpaddd zmm9,zmm9,zmm132922 vpaddd zmm10,zmm10,zmm142923 vpaddd zmm11,zmm11,zmm152924 vpxord zmm4,zmm4,zmm82925 vpxord zmm5,zmm5,zmm92926 vpxord zmm6,zmm6,zmm102927 vpxord zmm7,zmm7,zmm112928 vprold zmm4,zmm4,72929 vprold zmm5,zmm5,72930 vprold zmm6,zmm6,72931 vprold zmm7,zmm7,72932 vpaddd zmm0,zmm0,zmm52933 vpaddd zmm1,zmm1,zmm62934 vpaddd zmm2,zmm2,zmm72935 vpaddd zmm3,zmm3,zmm42936 vpxord zmm15,zmm15,zmm02937 vpxord zmm12,zmm12,zmm12938 vpxord zmm13,zmm13,zmm22939 vpxord zmm14,zmm14,zmm32940 vprold zmm15,zmm15,162941 vprold zmm12,zmm12,162942 vprold zmm13,zmm13,162943 vprold zmm14,zmm14,162944 vpaddd zmm10,zmm10,zmm152945 vpaddd zmm11,zmm11,zmm122946 vpaddd zmm8,zmm8,zmm132947 vpaddd zmm9,zmm9,zmm142948 vpxord zmm5,zmm5,zmm102949 vpxord zmm6,zmm6,zmm112950 vpxord zmm7,zmm7,zmm82951 vpxord zmm4,zmm4,zmm92952 vprold zmm5,zmm5,122953 vprold zmm6,zmm6,122954 vprold zmm7,zmm7,122955 vprold zmm4,zmm4,122956 vpaddd zmm0,zmm0,zmm52957 vpaddd zmm1,zmm1,zmm62958 vpaddd zmm2,zmm2,zmm72959 vpaddd zmm3,zmm3,zmm42960 vpxord zmm15,zmm15,zmm02961 vpxord zmm12,zmm12,zmm12962 vpxord zmm13,zmm13,zmm22963 vpxord zmm14,zmm14,zmm32964 vprold zmm15,zmm15,82965 vprold zmm12,zmm12,82966 vprold zmm13,zmm13,82967 vprold zmm14,zmm14,82968 vpaddd zmm10,zmm10,zmm152969 vpaddd zmm11,zmm11,zmm122970 vpaddd zmm8,zmm8,zmm132971 vpaddd zmm9,zmm9,zmm142972 vpxord zmm5,zmm5,zmm102973 vpxord zmm6,zmm6,zmm112974 vpxord zmm7,zmm7,zmm82975 vpxord zmm4,zmm4,zmm92976 vprold zmm5,zmm5,72977 vprold zmm6,zmm6,72978 vprold zmm7,zmm7,72979 vprold zmm4,zmm4,72980 dec eax2981 jnz NEAR $L$oop16x2982 2983 vpaddd zmm0,zmm0,zmm162984 vpaddd zmm1,zmm1,zmm172985 vpaddd zmm2,zmm2,zmm182986 vpaddd zmm3,zmm3,zmm192987 2988 vpunpckldq zmm18,zmm0,zmm12989 vpunpckldq zmm19,zmm2,zmm32990 vpunpckhdq zmm0,zmm0,zmm12991 vpunpckhdq zmm2,zmm2,zmm32992 vpunpcklqdq zmm1,zmm18,zmm192993 vpunpckhqdq zmm18,zmm18,zmm192994 vpunpcklqdq zmm3,zmm0,zmm22995 vpunpckhqdq zmm0,zmm0,zmm22996 vpaddd zmm4,zmm4,zmm202997 vpaddd zmm5,zmm5,zmm212998 vpaddd zmm6,zmm6,zmm222999 vpaddd zmm7,zmm7,zmm233000 3001 vpunpckldq zmm2,zmm4,zmm53002 vpunpckldq zmm19,zmm6,zmm73003 vpunpckhdq zmm4,zmm4,zmm53004 vpunpckhdq zmm6,zmm6,zmm73005 vpunpcklqdq zmm5,zmm2,zmm193006 vpunpckhqdq zmm2,zmm2,zmm193007 vpunpcklqdq zmm7,zmm4,zmm63008 vpunpckhqdq zmm4,zmm4,zmm63009 vshufi32x4 zmm19,zmm1,zmm5,0x443010 vshufi32x4 zmm5,zmm1,zmm5,0xee3011 vshufi32x4 zmm1,zmm18,zmm2,0x443012 vshufi32x4 zmm2,zmm18,zmm2,0xee3013 vshufi32x4 zmm18,zmm3,zmm7,0x443014 vshufi32x4 zmm7,zmm3,zmm7,0xee3015 vshufi32x4 zmm3,zmm0,zmm4,0x443016 vshufi32x4 zmm4,zmm0,zmm4,0xee3017 vpaddd zmm8,zmm8,zmm243018 vpaddd zmm9,zmm9,zmm253019 vpaddd zmm10,zmm10,zmm263020 vpaddd zmm11,zmm11,zmm273021 3022 vpunpckldq zmm6,zmm8,zmm93023 vpunpckldq zmm0,zmm10,zmm113024 vpunpckhdq zmm8,zmm8,zmm93025 vpunpckhdq zmm10,zmm10,zmm113026 vpunpcklqdq zmm9,zmm6,zmm03027 vpunpckhqdq zmm6,zmm6,zmm03028 vpunpcklqdq zmm11,zmm8,zmm103029 vpunpckhqdq zmm8,zmm8,zmm103030 vpaddd zmm12,zmm12,zmm283031 vpaddd zmm13,zmm13,zmm293032 vpaddd zmm14,zmm14,zmm303033 vpaddd zmm15,zmm15,zmm313034 3035 vpunpckldq zmm10,zmm12,zmm133036 vpunpckldq zmm0,zmm14,zmm153037 vpunpckhdq zmm12,zmm12,zmm133038 vpunpckhdq zmm14,zmm14,zmm153039 vpunpcklqdq zmm13,zmm10,zmm03040 vpunpckhqdq zmm10,zmm10,zmm03041 vpunpcklqdq zmm15,zmm12,zmm143042 vpunpckhqdq zmm12,zmm12,zmm143043 vshufi32x4 zmm0,zmm9,zmm13,0x443044 vshufi32x4 zmm13,zmm9,zmm13,0xee3045 vshufi32x4 zmm9,zmm6,zmm10,0x443046 vshufi32x4 zmm10,zmm6,zmm10,0xee3047 vshufi32x4 zmm6,zmm11,zmm15,0x443048 vshufi32x4 zmm15,zmm11,zmm15,0xee3049 vshufi32x4 zmm11,zmm8,zmm12,0x443050 vshufi32x4 zmm12,zmm8,zmm12,0xee3051 vshufi32x4 zmm16,zmm19,zmm0,0x883052 vshufi32x4 zmm19,zmm19,zmm0,0xdd3053 vshufi32x4 zmm0,zmm5,zmm13,0x883054 vshufi32x4 zmm13,zmm5,zmm13,0xdd3055 vshufi32x4 zmm17,zmm1,zmm9,0x883056 vshufi32x4 zmm1,zmm1,zmm9,0xdd3057 vshufi32x4 zmm9,zmm2,zmm10,0x883058 vshufi32x4 zmm10,zmm2,zmm10,0xdd3059 vshufi32x4 zmm14,zmm18,zmm6,0x883060 vshufi32x4 zmm18,zmm18,zmm6,0xdd3061 vshufi32x4 zmm6,zmm7,zmm15,0x883062 vshufi32x4 zmm15,zmm7,zmm15,0xdd3063 vshufi32x4 zmm8,zmm3,zmm11,0x883064 vshufi32x4 zmm3,zmm3,zmm11,0xdd3065 vshufi32x4 zmm11,zmm4,zmm12,0x883066 vshufi32x4 zmm12,zmm4,zmm12,0xdd3067 cmp rdx,64*163068 jb NEAR $L$tail16x3069 3070 vpxord zmm16,zmm16,ZMMWORD[rsi]3071 vpxord zmm17,zmm17,ZMMWORD[64+rsi]3072 vpxord zmm14,zmm14,ZMMWORD[128+rsi]3073 vpxord zmm8,zmm8,ZMMWORD[192+rsi]3074 vmovdqu32 ZMMWORD[rdi],zmm163075 vmovdqu32 ZMMWORD[64+rdi],zmm173076 vmovdqu32 ZMMWORD[128+rdi],zmm143077 vmovdqu32 ZMMWORD[192+rdi],zmm83078 3079 vpxord zmm19,zmm19,ZMMWORD[256+rsi]3080 vpxord zmm1,zmm1,ZMMWORD[320+rsi]3081 vpxord zmm18,zmm18,ZMMWORD[384+rsi]3082 vpxord zmm3,zmm3,ZMMWORD[448+rsi]3083 vmovdqu32 ZMMWORD[256+rdi],zmm193084 vmovdqu32 ZMMWORD[320+rdi],zmm13085 vmovdqu32 ZMMWORD[384+rdi],zmm183086 vmovdqu32 ZMMWORD[448+rdi],zmm33087 3088 vpxord zmm0,zmm0,ZMMWORD[512+rsi]3089 vpxord zmm9,zmm9,ZMMWORD[576+rsi]3090 vpxord zmm6,zmm6,ZMMWORD[640+rsi]3091 vpxord zmm11,zmm11,ZMMWORD[704+rsi]3092 vmovdqu32 ZMMWORD[512+rdi],zmm03093 vmovdqu32 ZMMWORD[576+rdi],zmm93094 vmovdqu32 ZMMWORD[640+rdi],zmm63095 vmovdqu32 ZMMWORD[704+rdi],zmm113096 3097 vpxord zmm13,zmm13,ZMMWORD[768+rsi]3098 vpxord zmm10,zmm10,ZMMWORD[832+rsi]3099 vpxord zmm15,zmm15,ZMMWORD[896+rsi]3100 vpxord zmm12,zmm12,ZMMWORD[960+rsi]3101 lea rsi,[1024+rsi]3102 vmovdqu32 ZMMWORD[768+rdi],zmm133103 vmovdqu32 ZMMWORD[832+rdi],zmm103104 vmovdqu32 ZMMWORD[896+rdi],zmm153105 vmovdqu32 ZMMWORD[960+rdi],zmm123106 lea rdi,[1024+rdi]3107 3108 sub rdx,64*163109 jnz NEAR $L$oop_outer16x3110 3111 jmp NEAR $L$done16x3112 3113 ALIGN 323114 $L$tail16x:3115 xor r10,r103116 sub rdi,rsi3117 cmp rdx,64*13118 jb NEAR $L$ess_than_64_16x3119 vpxord zmm16,zmm16,ZMMWORD[rsi]3120 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm163121 je NEAR $L$done16x3122 vmovdqa32 zmm16,zmm173123 lea rsi,[64+rsi]3124 3125 cmp rdx,64*23126 jb NEAR $L$ess_than_64_16x3127 vpxord zmm17,zmm17,ZMMWORD[rsi]3128 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm173129 je NEAR $L$done16x3130 vmovdqa32 zmm16,zmm143131 lea rsi,[64+rsi]3132 3133 cmp rdx,64*33134 jb NEAR $L$ess_than_64_16x3135 vpxord zmm14,zmm14,ZMMWORD[rsi]3136 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm143137 je NEAR $L$done16x3138 vmovdqa32 zmm16,zmm83139 lea rsi,[64+rsi]3140 3141 cmp rdx,64*43142 jb NEAR $L$ess_than_64_16x3143 vpxord zmm8,zmm8,ZMMWORD[rsi]3144 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm83145 je NEAR $L$done16x3146 vmovdqa32 zmm16,zmm193147 lea rsi,[64+rsi]3148 3149 cmp rdx,64*53150 jb NEAR $L$ess_than_64_16x3151 vpxord zmm19,zmm19,ZMMWORD[rsi]3152 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm193153 je NEAR $L$done16x3154 vmovdqa32 zmm16,zmm13155 lea rsi,[64+rsi]3156 3157 cmp rdx,64*63158 jb NEAR $L$ess_than_64_16x3159 vpxord zmm1,zmm1,ZMMWORD[rsi]3160 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm13161 je NEAR $L$done16x3162 vmovdqa32 zmm16,zmm183163 lea rsi,[64+rsi]3164 3165 cmp rdx,64*73166 jb NEAR $L$ess_than_64_16x3167 vpxord zmm18,zmm18,ZMMWORD[rsi]3168 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm183169 je NEAR $L$done16x3170 vmovdqa32 zmm16,zmm33171 lea rsi,[64+rsi]3172 3173 cmp rdx,64*83174 jb NEAR $L$ess_than_64_16x3175 vpxord zmm3,zmm3,ZMMWORD[rsi]3176 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm33177 je NEAR $L$done16x3178 vmovdqa32 zmm16,zmm03179 lea rsi,[64+rsi]3180 3181 cmp rdx,64*93182 jb NEAR $L$ess_than_64_16x3183 vpxord zmm0,zmm0,ZMMWORD[rsi]3184 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm03185 je NEAR $L$done16x3186 vmovdqa32 zmm16,zmm93187 lea rsi,[64+rsi]3188 3189 cmp rdx,64*103190 jb NEAR $L$ess_than_64_16x3191 vpxord zmm9,zmm9,ZMMWORD[rsi]3192 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm93193 je NEAR $L$done16x3194 vmovdqa32 zmm16,zmm63195 lea rsi,[64+rsi]3196 3197 cmp rdx,64*113198 jb NEAR $L$ess_than_64_16x3199 vpxord zmm6,zmm6,ZMMWORD[rsi]3200 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm63201 je NEAR $L$done16x3202 vmovdqa32 zmm16,zmm113203 lea rsi,[64+rsi]3204 3205 cmp rdx,64*123206 jb NEAR $L$ess_than_64_16x3207 vpxord zmm11,zmm11,ZMMWORD[rsi]3208 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm113209 je NEAR $L$done16x3210 vmovdqa32 zmm16,zmm133211 lea rsi,[64+rsi]3212 3213 cmp rdx,64*133214 jb NEAR $L$ess_than_64_16x3215 vpxord zmm13,zmm13,ZMMWORD[rsi]3216 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm133217 je NEAR $L$done16x3218 vmovdqa32 zmm16,zmm103219 lea rsi,[64+rsi]3220 3221 cmp rdx,64*143222 jb NEAR $L$ess_than_64_16x3223 vpxord zmm10,zmm10,ZMMWORD[rsi]3224 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm103225 je NEAR $L$done16x3226 vmovdqa32 zmm16,zmm153227 lea rsi,[64+rsi]3228 3229 cmp rdx,64*153230 jb NEAR $L$ess_than_64_16x3231 vpxord zmm15,zmm15,ZMMWORD[rsi]3232 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm153233 je NEAR $L$done16x3234 vmovdqa32 zmm16,zmm123235 lea rsi,[64+rsi]3236 3237 $L$ess_than_64_16x:3238 vmovdqa32 ZMMWORD[rsp],zmm163239 lea rdi,[rsi*1+rdi]3240 and rdx,633241 3242 $L$oop_tail16x:3243 movzx eax,BYTE[r10*1+rsi]3244 movzx ecx,BYTE[r10*1+rsp]3245 lea r10,[1+r10]3246 xor eax,ecx3247 mov BYTE[((-1))+r10*1+rdi],al3248 dec rdx3249 jnz NEAR $L$oop_tail16x3250 3251 vpxord zmm16,zmm16,zmm163252 vmovdqa32 ZMMWORD[rsp],zmm163253 3254 $L$done16x:3255 vzeroall3256 movaps xmm6,XMMWORD[((-168))+r9]3257 movaps xmm7,XMMWORD[((-152))+r9]3258 movaps xmm8,XMMWORD[((-136))+r9]3259 movaps xmm9,XMMWORD[((-120))+r9]3260 movaps xmm10,XMMWORD[((-104))+r9]3261 movaps xmm11,XMMWORD[((-88))+r9]3262 movaps xmm12,XMMWORD[((-72))+r9]3263 movaps xmm13,XMMWORD[((-56))+r9]3264 movaps xmm14,XMMWORD[((-40))+r9]3265 movaps xmm15,XMMWORD[((-24))+r9]3266 lea rsp,[r9]3267 3268 $L$16x_epilogue:3269 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3270 mov rsi,QWORD[16+rsp]3271 DB 0F3h,0C3h ;repret3272 3273 $L$SEH_end_ChaCha20_16x:3274 3275 ALIGN 323276 ChaCha20_8xvl:3277 mov QWORD[8+rsp],rdi ;WIN64 prologue3278 mov QWORD[16+rsp],rsi3279 mov rax,rsp3280 $L$SEH_begin_ChaCha20_8xvl:3281 mov rdi,rcx3282 mov rsi,rdx3283 mov rdx,r83284 mov rcx,r93285 mov r8,QWORD[40+rsp]3286 3287 3288 3289 $L$ChaCha20_8xvl:3290 mov r9,rsp3291 3292 sub rsp,64+1683293 and rsp,-643294 movaps XMMWORD[(-168)+r9],xmm63295 movaps XMMWORD[(-152)+r9],xmm73296 movaps XMMWORD[(-136)+r9],xmm83297 movaps XMMWORD[(-120)+r9],xmm93298 movaps XMMWORD[(-104)+r9],xmm103299 movaps XMMWORD[(-88)+r9],xmm113300 movaps XMMWORD[(-72)+r9],xmm123301 movaps XMMWORD[(-56)+r9],xmm133302 movaps XMMWORD[(-40)+r9],xmm143303 movaps XMMWORD[(-24)+r9],xmm153304 $L$8xvl_body:3305 vzeroupper3306 3307 lea r10,[$L$sigma]3308 vbroadcasti128 ymm3,XMMWORD[r10]3309 vbroadcasti128 ymm7,XMMWORD[rcx]3310 vbroadcasti128 ymm11,XMMWORD[16+rcx]3311 vbroadcasti128 ymm15,XMMWORD[r8]3312 3313 vpshufd ymm0,ymm3,0x003314 vpshufd ymm1,ymm3,0x553315 vpshufd ymm2,ymm3,0xaa3316 vpshufd ymm3,ymm3,0xff3317 vmovdqa64 ymm16,ymm03318 vmovdqa64 ymm17,ymm13319 vmovdqa64 ymm18,ymm23320 vmovdqa64 ymm19,ymm33321 3322 vpshufd ymm4,ymm7,0x003323 vpshufd ymm5,ymm7,0x553324 vpshufd ymm6,ymm7,0xaa3325 vpshufd ymm7,ymm7,0xff3326 vmovdqa64 ymm20,ymm43327 vmovdqa64 ymm21,ymm53328 vmovdqa64 ymm22,ymm63329 vmovdqa64 ymm23,ymm73330 3331 vpshufd ymm8,ymm11,0x003332 vpshufd ymm9,ymm11,0x553333 vpshufd ymm10,ymm11,0xaa3334 vpshufd ymm11,ymm11,0xff3335 vmovdqa64 ymm24,ymm83336 vmovdqa64 ymm25,ymm93337 vmovdqa64 ymm26,ymm103338 vmovdqa64 ymm27,ymm113339 3340 vpshufd ymm12,ymm15,0x003341 vpshufd ymm13,ymm15,0x553342 vpshufd ymm14,ymm15,0xaa3343 vpshufd ymm15,ymm15,0xff3344 vpaddd ymm12,ymm12,YMMWORD[$L$incy]3345 vmovdqa64 ymm28,ymm123346 vmovdqa64 ymm29,ymm133347 vmovdqa64 ymm30,ymm143348 vmovdqa64 ymm31,ymm153349 3350 mov eax,103351 jmp NEAR $L$oop8xvl3352 3353 ALIGN 323354 $L$oop_outer8xvl:3355 3356 3357 vpbroadcastd ymm2,DWORD[8+r10]3358 vpbroadcastd ymm3,DWORD[12+r10]3359 vpaddd ymm28,ymm28,YMMWORD[$L$eight]3360 vmovdqa64 ymm4,ymm203361 vmovdqa64 ymm5,ymm213362 vmovdqa64 ymm6,ymm223363 vmovdqa64 ymm7,ymm233364 vmovdqa64 ymm8,ymm243365 vmovdqa64 ymm9,ymm253366 vmovdqa64 ymm10,ymm263367 vmovdqa64 ymm11,ymm273368 vmovdqa64 ymm12,ymm283369 vmovdqa64 ymm13,ymm293370 vmovdqa64 ymm14,ymm303371 vmovdqa64 ymm15,ymm313372 3373 vmovdqa64 ymm16,ymm03374 vmovdqa64 ymm17,ymm13375 vmovdqa64 ymm18,ymm23376 vmovdqa64 ymm19,ymm33377 3378 mov eax,103379 jmp NEAR $L$oop8xvl3380 3381 ALIGN 323382 $L$oop8xvl:3383 vpaddd ymm0,ymm0,ymm43384 vpaddd ymm1,ymm1,ymm53385 vpaddd ymm2,ymm2,ymm63386 vpaddd ymm3,ymm3,ymm73387 vpxor ymm12,ymm12,ymm03388 vpxor ymm13,ymm13,ymm13389 vpxor ymm14,ymm14,ymm23390 vpxor ymm15,ymm15,ymm33391 vprold ymm12,ymm12,163392 vprold ymm13,ymm13,163393 vprold ymm14,ymm14,163394 vprold ymm15,ymm15,163395 vpaddd ymm8,ymm8,ymm123396 vpaddd ymm9,ymm9,ymm133397 vpaddd ymm10,ymm10,ymm143398 vpaddd ymm11,ymm11,ymm153399 vpxor ymm4,ymm4,ymm83400 vpxor ymm5,ymm5,ymm93401 vpxor ymm6,ymm6,ymm103402 vpxor ymm7,ymm7,ymm113403 vprold ymm4,ymm4,123404 vprold ymm5,ymm5,123405 vprold ymm6,ymm6,123406 vprold ymm7,ymm7,123407 vpaddd ymm0,ymm0,ymm43408 vpaddd ymm1,ymm1,ymm53409 vpaddd ymm2,ymm2,ymm63410 vpaddd ymm3,ymm3,ymm73411 vpxor ymm12,ymm12,ymm03412 vpxor ymm13,ymm13,ymm13413 vpxor ymm14,ymm14,ymm23414 vpxor ymm15,ymm15,ymm33415 vprold ymm12,ymm12,83416 vprold ymm13,ymm13,83417 vprold ymm14,ymm14,83418 vprold ymm15,ymm15,83419 vpaddd ymm8,ymm8,ymm123420 vpaddd ymm9,ymm9,ymm133421 vpaddd ymm10,ymm10,ymm143422 vpaddd ymm11,ymm11,ymm153423 vpxor ymm4,ymm4,ymm83424 vpxor ymm5,ymm5,ymm93425 vpxor ymm6,ymm6,ymm103426 vpxor ymm7,ymm7,ymm113427 vprold ymm4,ymm4,73428 vprold ymm5,ymm5,73429 vprold ymm6,ymm6,73430 vprold ymm7,ymm7,73431 vpaddd ymm0,ymm0,ymm53432 vpaddd ymm1,ymm1,ymm63433 vpaddd ymm2,ymm2,ymm73434 vpaddd ymm3,ymm3,ymm43435 vpxor ymm15,ymm15,ymm03436 vpxor ymm12,ymm12,ymm13437 vpxor ymm13,ymm13,ymm23438 vpxor ymm14,ymm14,ymm33439 vprold ymm15,ymm15,163440 vprold ymm12,ymm12,163441 vprold ymm13,ymm13,163442 vprold ymm14,ymm14,163443 vpaddd ymm10,ymm10,ymm153444 vpaddd ymm11,ymm11,ymm123445 vpaddd ymm8,ymm8,ymm133446 vpaddd ymm9,ymm9,ymm143447 vpxor ymm5,ymm5,ymm103448 vpxor ymm6,ymm6,ymm113449 vpxor ymm7,ymm7,ymm83450 vpxor ymm4,ymm4,ymm93451 vprold ymm5,ymm5,123452 vprold ymm6,ymm6,123453 vprold ymm7,ymm7,123454 vprold ymm4,ymm4,123455 vpaddd ymm0,ymm0,ymm53456 vpaddd ymm1,ymm1,ymm63457 vpaddd ymm2,ymm2,ymm73458 vpaddd ymm3,ymm3,ymm43459 vpxor ymm15,ymm15,ymm03460 vpxor ymm12,ymm12,ymm13461 vpxor ymm13,ymm13,ymm23462 vpxor ymm14,ymm14,ymm33463 vprold ymm15,ymm15,83464 vprold ymm12,ymm12,83465 vprold ymm13,ymm13,83466 vprold ymm14,ymm14,83467 vpaddd ymm10,ymm10,ymm153468 vpaddd ymm11,ymm11,ymm123469 vpaddd ymm8,ymm8,ymm133470 vpaddd ymm9,ymm9,ymm143471 vpxor ymm5,ymm5,ymm103472 vpxor ymm6,ymm6,ymm113473 vpxor ymm7,ymm7,ymm83474 vpxor ymm4,ymm4,ymm93475 vprold ymm5,ymm5,73476 vprold ymm6,ymm6,73477 vprold ymm7,ymm7,73478 vprold ymm4,ymm4,73479 dec eax3480 jnz NEAR $L$oop8xvl3481 3482 vpaddd ymm0,ymm0,ymm163483 vpaddd ymm1,ymm1,ymm173484 vpaddd ymm2,ymm2,ymm183485 vpaddd ymm3,ymm3,ymm193486 3487 vpunpckldq ymm18,ymm0,ymm13488 vpunpckldq ymm19,ymm2,ymm33489 vpunpckhdq ymm0,ymm0,ymm13490 vpunpckhdq ymm2,ymm2,ymm33491 vpunpcklqdq ymm1,ymm18,ymm193492 vpunpckhqdq ymm18,ymm18,ymm193493 vpunpcklqdq ymm3,ymm0,ymm23494 vpunpckhqdq ymm0,ymm0,ymm23495 vpaddd ymm4,ymm4,ymm203496 vpaddd ymm5,ymm5,ymm213497 vpaddd ymm6,ymm6,ymm223498 vpaddd ymm7,ymm7,ymm233499 3500 vpunpckldq ymm2,ymm4,ymm53501 vpunpckldq ymm19,ymm6,ymm73502 vpunpckhdq ymm4,ymm4,ymm53503 vpunpckhdq ymm6,ymm6,ymm73504 vpunpcklqdq ymm5,ymm2,ymm193505 vpunpckhqdq ymm2,ymm2,ymm193506 vpunpcklqdq ymm7,ymm4,ymm63507 vpunpckhqdq ymm4,ymm4,ymm63508 vshufi32x4 ymm19,ymm1,ymm5,03509 vshufi32x4 ymm5,ymm1,ymm5,33510 vshufi32x4 ymm1,ymm18,ymm2,03511 vshufi32x4 ymm2,ymm18,ymm2,33512 vshufi32x4 ymm18,ymm3,ymm7,03513 vshufi32x4 ymm7,ymm3,ymm7,33514 vshufi32x4 ymm3,ymm0,ymm4,03515 vshufi32x4 ymm4,ymm0,ymm4,33516 vpaddd ymm8,ymm8,ymm243517 vpaddd ymm9,ymm9,ymm253518 vpaddd ymm10,ymm10,ymm263519 vpaddd ymm11,ymm11,ymm273520 3521 vpunpckldq ymm6,ymm8,ymm93522 vpunpckldq ymm0,ymm10,ymm113523 vpunpckhdq ymm8,ymm8,ymm93524 vpunpckhdq ymm10,ymm10,ymm113525 vpunpcklqdq ymm9,ymm6,ymm03526 vpunpckhqdq ymm6,ymm6,ymm03527 vpunpcklqdq ymm11,ymm8,ymm103528 vpunpckhqdq ymm8,ymm8,ymm103529 vpaddd ymm12,ymm12,ymm283530 vpaddd ymm13,ymm13,ymm293531 vpaddd ymm14,ymm14,ymm303532 vpaddd ymm15,ymm15,ymm313533 3534 vpunpckldq ymm10,ymm12,ymm133535 vpunpckldq ymm0,ymm14,ymm153536 vpunpckhdq ymm12,ymm12,ymm133537 vpunpckhdq ymm14,ymm14,ymm153538 vpunpcklqdq ymm13,ymm10,ymm03539 vpunpckhqdq ymm10,ymm10,ymm03540 vpunpcklqdq ymm15,ymm12,ymm143541 vpunpckhqdq ymm12,ymm12,ymm143542 vperm2i128 ymm0,ymm9,ymm13,0x203543 vperm2i128 ymm13,ymm9,ymm13,0x313544 vperm2i128 ymm9,ymm6,ymm10,0x203545 vperm2i128 ymm10,ymm6,ymm10,0x313546 vperm2i128 ymm6,ymm11,ymm15,0x203547 vperm2i128 ymm15,ymm11,ymm15,0x313548 vperm2i128 ymm11,ymm8,ymm12,0x203549 vperm2i128 ymm12,ymm8,ymm12,0x313550 cmp rdx,64*83551 jb NEAR $L$tail8xvl3552 3553 mov eax,0x803554 vpxord ymm19,ymm19,YMMWORD[rsi]3555 vpxor ymm0,ymm0,YMMWORD[32+rsi]3556 vpxor ymm5,ymm5,YMMWORD[64+rsi]3557 vpxor ymm13,ymm13,YMMWORD[96+rsi]3558 lea rsi,[rax*1+rsi]3559 vmovdqu32 YMMWORD[rdi],ymm193560 vmovdqu YMMWORD[32+rdi],ymm03561 vmovdqu YMMWORD[64+rdi],ymm53562 vmovdqu YMMWORD[96+rdi],ymm133563 lea rdi,[rax*1+rdi]3564 3565 vpxor ymm1,ymm1,YMMWORD[rsi]3566 vpxor ymm9,ymm9,YMMWORD[32+rsi]3567 vpxor ymm2,ymm2,YMMWORD[64+rsi]3568 vpxor ymm10,ymm10,YMMWORD[96+rsi]3569 lea rsi,[rax*1+rsi]3570 vmovdqu YMMWORD[rdi],ymm13571 vmovdqu YMMWORD[32+rdi],ymm93572 vmovdqu YMMWORD[64+rdi],ymm23573 vmovdqu YMMWORD[96+rdi],ymm103574 lea rdi,[rax*1+rdi]3575 3576 vpxord ymm18,ymm18,YMMWORD[rsi]3577 vpxor ymm6,ymm6,YMMWORD[32+rsi]3578 vpxor ymm7,ymm7,YMMWORD[64+rsi]3579 vpxor ymm15,ymm15,YMMWORD[96+rsi]3580 lea rsi,[rax*1+rsi]3581 vmovdqu32 YMMWORD[rdi],ymm183582 vmovdqu YMMWORD[32+rdi],ymm63583 vmovdqu YMMWORD[64+rdi],ymm73584 vmovdqu YMMWORD[96+rdi],ymm153585 lea rdi,[rax*1+rdi]3586 3587 vpxor ymm3,ymm3,YMMWORD[rsi]3588 vpxor ymm11,ymm11,YMMWORD[32+rsi]3589 vpxor ymm4,ymm4,YMMWORD[64+rsi]3590 vpxor ymm12,ymm12,YMMWORD[96+rsi]3591 lea rsi,[rax*1+rsi]3592 vmovdqu YMMWORD[rdi],ymm33593 vmovdqu YMMWORD[32+rdi],ymm113594 vmovdqu YMMWORD[64+rdi],ymm43595 vmovdqu YMMWORD[96+rdi],ymm123596 lea rdi,[rax*1+rdi]3597 3598 vpbroadcastd ymm0,DWORD[r10]3599 vpbroadcastd ymm1,DWORD[4+r10]3600 3601 sub rdx,64*83602 jnz NEAR $L$oop_outer8xvl3603 3604 jmp NEAR $L$done8xvl3605 3606 ALIGN 323607 $L$tail8xvl:3608 vmovdqa64 ymm8,ymm193609 xor r10,r103610 sub rdi,rsi3611 cmp rdx,64*13612 jb NEAR $L$ess_than_64_8xvl3613 vpxor ymm8,ymm8,YMMWORD[rsi]3614 vpxor ymm0,ymm0,YMMWORD[32+rsi]3615 vmovdqu YMMWORD[rsi*1+rdi],ymm83616 vmovdqu YMMWORD[32+rsi*1+rdi],ymm03617 je NEAR $L$done8xvl3618 vmovdqa ymm8,ymm53619 vmovdqa ymm0,ymm133620 lea rsi,[64+rsi]3621 3622 cmp rdx,64*23623 jb NEAR $L$ess_than_64_8xvl3624 vpxor ymm5,ymm5,YMMWORD[rsi]3625 vpxor ymm13,ymm13,YMMWORD[32+rsi]3626 vmovdqu YMMWORD[rsi*1+rdi],ymm53627 vmovdqu YMMWORD[32+rsi*1+rdi],ymm133628 je NEAR $L$done8xvl3629 vmovdqa ymm8,ymm13630 vmovdqa ymm0,ymm93631 lea rsi,[64+rsi]3632 3633 cmp rdx,64*33634 jb NEAR $L$ess_than_64_8xvl3635 vpxor ymm1,ymm1,YMMWORD[rsi]3636 vpxor ymm9,ymm9,YMMWORD[32+rsi]3637 vmovdqu YMMWORD[rsi*1+rdi],ymm13638 vmovdqu YMMWORD[32+rsi*1+rdi],ymm93639 je NEAR $L$done8xvl3640 vmovdqa ymm8,ymm23641 vmovdqa ymm0,ymm103642 lea rsi,[64+rsi]3643 3644 cmp rdx,64*43645 jb NEAR $L$ess_than_64_8xvl3646 vpxor ymm2,ymm2,YMMWORD[rsi]3647 vpxor ymm10,ymm10,YMMWORD[32+rsi]3648 vmovdqu YMMWORD[rsi*1+rdi],ymm23649 vmovdqu YMMWORD[32+rsi*1+rdi],ymm103650 je NEAR $L$done8xvl3651 vmovdqa32 ymm8,ymm183652 vmovdqa ymm0,ymm63653 lea rsi,[64+rsi]3654 3655 cmp rdx,64*53656 jb NEAR $L$ess_than_64_8xvl3657 vpxord ymm18,ymm18,YMMWORD[rsi]3658 vpxor ymm6,ymm6,YMMWORD[32+rsi]3659 vmovdqu32 YMMWORD[rsi*1+rdi],ymm183660 vmovdqu YMMWORD[32+rsi*1+rdi],ymm63661 je NEAR $L$done8xvl3662 vmovdqa ymm8,ymm73663 vmovdqa ymm0,ymm153664 lea rsi,[64+rsi]3665 3666 cmp rdx,64*63667 jb NEAR $L$ess_than_64_8xvl3668 vpxor ymm7,ymm7,YMMWORD[rsi]3669 vpxor ymm15,ymm15,YMMWORD[32+rsi]3670 vmovdqu YMMWORD[rsi*1+rdi],ymm73671 vmovdqu YMMWORD[32+rsi*1+rdi],ymm153672 je NEAR $L$done8xvl3673 vmovdqa ymm8,ymm33674 vmovdqa ymm0,ymm113675 lea rsi,[64+rsi]3676 3677 cmp rdx,64*73678 jb NEAR $L$ess_than_64_8xvl3679 vpxor ymm3,ymm3,YMMWORD[rsi]3680 vpxor ymm11,ymm11,YMMWORD[32+rsi]3681 vmovdqu YMMWORD[rsi*1+rdi],ymm33682 vmovdqu YMMWORD[32+rsi*1+rdi],ymm113683 je NEAR $L$done8xvl3684 vmovdqa ymm8,ymm43685 vmovdqa ymm0,ymm123686 lea rsi,[64+rsi]3687 3688 $L$ess_than_64_8xvl:3689 vmovdqa YMMWORD[rsp],ymm83690 vmovdqa YMMWORD[32+rsp],ymm03691 lea rdi,[rsi*1+rdi]3692 and rdx,633693 3694 $L$oop_tail8xvl:3695 movzx eax,BYTE[r10*1+rsi]3696 movzx ecx,BYTE[r10*1+rsp]3697 lea r10,[1+r10]3698 xor eax,ecx3699 mov BYTE[((-1))+r10*1+rdi],al3700 dec rdx3701 jnz NEAR $L$oop_tail8xvl3702 3703 vpxor ymm8,ymm8,ymm83704 vmovdqa YMMWORD[rsp],ymm83705 vmovdqa YMMWORD[32+rsp],ymm83706 3707 $L$done8xvl:3708 vzeroall3709 movaps xmm6,XMMWORD[((-168))+r9]3710 movaps xmm7,XMMWORD[((-152))+r9]3711 movaps xmm8,XMMWORD[((-136))+r9]3712 movaps xmm9,XMMWORD[((-120))+r9]3713 movaps xmm10,XMMWORD[((-104))+r9]3714 movaps xmm11,XMMWORD[((-88))+r9]3715 movaps xmm12,XMMWORD[((-72))+r9]3716 movaps xmm13,XMMWORD[((-56))+r9]3717 movaps xmm14,XMMWORD[((-40))+r9]3718 movaps xmm15,XMMWORD[((-24))+r9]3719 lea rsp,[r9]3720 3721 $L$8xvl_epilogue:3722 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3723 mov rsi,QWORD[16+rsp]3724 DB 0F3h,0C3h ;repret3725 3726 $L$SEH_end_ChaCha20_8xvl:3727 1262 EXTERN __imp_RtlVirtualUnwind 3728 1263 … … 3871 1406 DD $L$SEH_end_ChaCha20_4x wrt ..imagebase 3872 1407 DD $L$SEH_info_ChaCha20_4x wrt ..imagebase 3873 DD $L$SEH_begin_ChaCha20_4xop wrt ..imagebase3874 DD $L$SEH_end_ChaCha20_4xop wrt ..imagebase3875 DD $L$SEH_info_ChaCha20_4xop wrt ..imagebase3876 DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase3877 DD $L$SEH_end_ChaCha20_8x wrt ..imagebase3878 DD $L$SEH_info_ChaCha20_8x wrt ..imagebase3879 DD $L$SEH_begin_ChaCha20_avx512 wrt ..imagebase3880 DD $L$SEH_end_ChaCha20_avx512 wrt ..imagebase3881 DD $L$SEH_info_ChaCha20_avx512 wrt ..imagebase3882 3883 DD $L$SEH_begin_ChaCha20_avx512vl wrt ..imagebase3884 DD $L$SEH_end_ChaCha20_avx512vl wrt ..imagebase3885 DD $L$SEH_info_ChaCha20_avx512vl wrt ..imagebase3886 3887 DD $L$SEH_begin_ChaCha20_16x wrt ..imagebase3888 DD $L$SEH_end_ChaCha20_16x wrt ..imagebase3889 DD $L$SEH_info_ChaCha20_16x wrt ..imagebase3890 3891 DD $L$SEH_begin_ChaCha20_8xvl wrt ..imagebase3892 DD $L$SEH_end_ChaCha20_8xvl wrt ..imagebase3893 DD $L$SEH_info_ChaCha20_8xvl wrt ..imagebase3894 1408 section .xdata rdata align=8 3895 1409 ALIGN 8 … … 3915 1429 DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase 3916 1430 DD 0xa0,0 3917 $L$SEH_info_ChaCha20_4xop:3918 DB 9,0,0,03919 DD simd_handler wrt ..imagebase3920 DD $L$4xop_body wrt ..imagebase,$L$4xop_epilogue wrt ..imagebase3921 DD 0xa0,03922 $L$SEH_info_ChaCha20_8x:3923 DB 9,0,0,03924 DD simd_handler wrt ..imagebase3925 DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase3926 DD 0xa0,03927 $L$SEH_info_ChaCha20_avx512:3928 DB 9,0,0,03929 DD simd_handler wrt ..imagebase3930 DD $L$avx512_body wrt ..imagebase,$L$avx512_epilogue wrt ..imagebase3931 DD 0x20,03932 3933 $L$SEH_info_ChaCha20_avx512vl:3934 DB 9,0,0,03935 DD simd_handler wrt ..imagebase3936 DD $L$avx512vl_body wrt ..imagebase,$L$avx512vl_epilogue wrt ..imagebase3937 DD 0x20,03938 3939 $L$SEH_info_ChaCha20_16x:3940 DB 9,0,0,03941 DD simd_handler wrt ..imagebase3942 DD $L$16x_body wrt ..imagebase,$L$16x_epilogue wrt ..imagebase3943 DD 0xa0,03944 3945 $L$SEH_info_ChaCha20_8xvl:3946 DB 9,0,0,03947 DD simd_handler wrt ..imagebase3948 DD $L$8xvl_body wrt ..imagebase,$L$8xvl_epilogue wrt ..imagebase3949 DD 0xa0,0 -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/ecp_nistz256-x86_64.S
r94083 r95221 2854 2854 2855 2855 2856 mov ecx,0x801002857 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]2858 cmp ecx,0x801002859 je NEAR $L$ecp_nistz256_ord_mul_montx2860 2856 push rbp 2861 2857 … … 3191 3187 3192 3188 3193 mov ecx,0x801003194 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]3195 cmp ecx,0x801003196 je NEAR $L$ecp_nistz256_ord_sqr_montx3197 3189 push rbp 3198 3190 … … 3479 3471 3480 3472 3481 ALIGN 323482 ecp_nistz256_ord_mul_montx:3483 mov QWORD[8+rsp],rdi ;WIN64 prologue3484 mov QWORD[16+rsp],rsi3485 mov rax,rsp3486 $L$SEH_begin_ecp_nistz256_ord_mul_montx:3487 mov rdi,rcx3488 mov rsi,rdx3489 mov rdx,r83490 3491 3492 3493 $L$ecp_nistz256_ord_mul_montx:3494 push rbp3495 3496 push rbx3497 3498 push r123499 3500 push r133501 3502 push r143503 3504 push r153505 3506 $L$ord_mulx_body:3507 3508 mov rbx,rdx3509 mov rdx,QWORD[rdx]3510 mov r9,QWORD[rsi]3511 mov r10,QWORD[8+rsi]3512 mov r11,QWORD[16+rsi]3513 mov r12,QWORD[24+rsi]3514 lea rsi,[((-128))+rsi]3515 lea r14,[(($L$ord-128))]3516 mov r15,QWORD[$L$ordK]3517 3518 3519 mulx r9,r8,r93520 mulx r10,rcx,r103521 mulx r11,rbp,r113522 add r9,rcx3523 mulx r12,rcx,r123524 mov rdx,r83525 mulx rax,rdx,r153526 adc r10,rbp3527 adc r11,rcx3528 adc r12,03529 3530 3531 xor r13,r133532 mulx rbp,rcx,QWORD[((0+128))+r14]3533 adcx r8,rcx3534 adox r9,rbp3535 3536 mulx rbp,rcx,QWORD[((8+128))+r14]3537 adcx r9,rcx3538 adox r10,rbp3539 3540 mulx rbp,rcx,QWORD[((16+128))+r14]3541 adcx r10,rcx3542 adox r11,rbp3543 3544 mulx rbp,rcx,QWORD[((24+128))+r14]3545 mov rdx,QWORD[8+rbx]3546 adcx r11,rcx3547 adox r12,rbp3548 adcx r12,r83549 adox r13,r83550 adc r13,03551 3552 3553 mulx rbp,rcx,QWORD[((0+128))+rsi]3554 adcx r9,rcx3555 adox r10,rbp3556 3557 mulx rbp,rcx,QWORD[((8+128))+rsi]3558 adcx r10,rcx3559 adox r11,rbp3560 3561 mulx rbp,rcx,QWORD[((16+128))+rsi]3562 adcx r11,rcx3563 adox r12,rbp3564 3565 mulx rbp,rcx,QWORD[((24+128))+rsi]3566 mov rdx,r93567 mulx rax,rdx,r153568 adcx r12,rcx3569 adox r13,rbp3570 3571 adcx r13,r83572 adox r8,r83573 adc r8,03574 3575 3576 mulx rbp,rcx,QWORD[((0+128))+r14]3577 adcx r9,rcx3578 adox r10,rbp3579 3580 mulx rbp,rcx,QWORD[((8+128))+r14]3581 adcx r10,rcx3582 adox r11,rbp3583 3584 mulx rbp,rcx,QWORD[((16+128))+r14]3585 adcx r11,rcx3586 adox r12,rbp3587 3588 mulx rbp,rcx,QWORD[((24+128))+r14]3589 mov rdx,QWORD[16+rbx]3590 adcx r12,rcx3591 adox r13,rbp3592 adcx r13,r93593 adox r8,r93594 adc r8,03595 3596 3597 mulx rbp,rcx,QWORD[((0+128))+rsi]3598 adcx r10,rcx3599 adox r11,rbp3600 3601 mulx rbp,rcx,QWORD[((8+128))+rsi]3602 adcx r11,rcx3603 adox r12,rbp3604 3605 mulx rbp,rcx,QWORD[((16+128))+rsi]3606 adcx r12,rcx3607 adox r13,rbp3608 3609 mulx rbp,rcx,QWORD[((24+128))+rsi]3610 mov rdx,r103611 mulx rax,rdx,r153612 adcx r13,rcx3613 adox r8,rbp3614 3615 adcx r8,r93616 adox r9,r93617 adc r9,03618 3619 3620 mulx rbp,rcx,QWORD[((0+128))+r14]3621 adcx r10,rcx3622 adox r11,rbp3623 3624 mulx rbp,rcx,QWORD[((8+128))+r14]3625 adcx r11,rcx3626 adox r12,rbp3627 3628 mulx rbp,rcx,QWORD[((16+128))+r14]3629 adcx r12,rcx3630 adox r13,rbp3631 3632 mulx rbp,rcx,QWORD[((24+128))+r14]3633 mov rdx,QWORD[24+rbx]3634 adcx r13,rcx3635 adox r8,rbp3636 adcx r8,r103637 adox r9,r103638 adc r9,03639 3640 3641 mulx rbp,rcx,QWORD[((0+128))+rsi]3642 adcx r11,rcx3643 adox r12,rbp3644 3645 mulx rbp,rcx,QWORD[((8+128))+rsi]3646 adcx r12,rcx3647 adox r13,rbp3648 3649 mulx rbp,rcx,QWORD[((16+128))+rsi]3650 adcx r13,rcx3651 adox r8,rbp3652 3653 mulx rbp,rcx,QWORD[((24+128))+rsi]3654 mov rdx,r113655 mulx rax,rdx,r153656 adcx r8,rcx3657 adox r9,rbp3658 3659 adcx r9,r103660 adox r10,r103661 adc r10,03662 3663 3664 mulx rbp,rcx,QWORD[((0+128))+r14]3665 adcx r11,rcx3666 adox r12,rbp3667 3668 mulx rbp,rcx,QWORD[((8+128))+r14]3669 adcx r12,rcx3670 adox r13,rbp3671 3672 mulx rbp,rcx,QWORD[((16+128))+r14]3673 adcx r13,rcx3674 adox r8,rbp3675 3676 mulx rbp,rcx,QWORD[((24+128))+r14]3677 lea r14,[128+r14]3678 mov rbx,r123679 adcx r8,rcx3680 adox r9,rbp3681 mov rdx,r133682 adcx r9,r113683 adox r10,r113684 adc r10,03685 3686 3687 3688 mov rcx,r83689 sub r12,QWORD[r14]3690 sbb r13,QWORD[8+r14]3691 sbb r8,QWORD[16+r14]3692 mov rbp,r93693 sbb r9,QWORD[24+r14]3694 sbb r10,03695 3696 cmovc r12,rbx3697 cmovc r13,rdx3698 cmovc r8,rcx3699 cmovc r9,rbp3700 3701 mov QWORD[rdi],r123702 mov QWORD[8+rdi],r133703 mov QWORD[16+rdi],r83704 mov QWORD[24+rdi],r93705 3706 mov r15,QWORD[rsp]3707 3708 mov r14,QWORD[8+rsp]3709 3710 mov r13,QWORD[16+rsp]3711 3712 mov r12,QWORD[24+rsp]3713 3714 mov rbx,QWORD[32+rsp]3715 3716 mov rbp,QWORD[40+rsp]3717 3718 lea rsp,[48+rsp]3719 3720 $L$ord_mulx_epilogue:3721 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3722 mov rsi,QWORD[16+rsp]3723 DB 0F3h,0C3h ;repret3724 3725 $L$SEH_end_ecp_nistz256_ord_mul_montx:3726 3727 3728 ALIGN 323729 ecp_nistz256_ord_sqr_montx:3730 mov QWORD[8+rsp],rdi ;WIN64 prologue3731 mov QWORD[16+rsp],rsi3732 mov rax,rsp3733 $L$SEH_begin_ecp_nistz256_ord_sqr_montx:3734 mov rdi,rcx3735 mov rsi,rdx3736 mov rdx,r83737 3738 3739 3740 $L$ecp_nistz256_ord_sqr_montx:3741 push rbp3742 3743 push rbx3744 3745 push r123746 3747 push r133748 3749 push r143750 3751 push r153752 3753 $L$ord_sqrx_body:3754 3755 mov rbx,rdx3756 mov rdx,QWORD[rsi]3757 mov r14,QWORD[8+rsi]3758 mov r15,QWORD[16+rsi]3759 mov r8,QWORD[24+rsi]3760 lea rsi,[$L$ord]3761 jmp NEAR $L$oop_ord_sqrx3762 3763 ALIGN 323764 $L$oop_ord_sqrx:3765 mulx r10,r9,r143766 mulx r11,rcx,r153767 mov rax,rdx3768 DB 102,73,15,110,2063769 mulx r12,rbp,r83770 mov rdx,r143771 add r10,rcx3772 DB 102,73,15,110,2153773 adc r11,rbp3774 adc r12,03775 xor r13,r133776 3777 mulx rbp,rcx,r153778 adcx r11,rcx3779 adox r12,rbp3780 3781 mulx rbp,rcx,r83782 mov rdx,r153783 adcx r12,rcx3784 adox r13,rbp3785 adc r13,03786 3787 mulx r14,rcx,r83788 mov rdx,rax3789 DB 102,73,15,110,2163790 xor r15,r153791 adcx r9,r93792 adox r13,rcx3793 adcx r10,r103794 adox r14,r153795 3796 3797 mulx rbp,r8,rdx3798 DB 102,72,15,126,2023799 adcx r11,r113800 adox r9,rbp3801 adcx r12,r123802 mulx rax,rcx,rdx3803 DB 102,72,15,126,2103804 adcx r13,r133805 adox r10,rcx3806 adcx r14,r143807 mulx rbp,rcx,rdx3808 DB 0x673809 DB 102,72,15,126,2183810 adox r11,rax3811 adcx r15,r153812 adox r12,rcx3813 adox r13,rbp3814 mulx rax,rcx,rdx3815 adox r14,rcx3816 adox r15,rax3817 3818 3819 mov rdx,r83820 mulx rcx,rdx,QWORD[32+rsi]3821 3822 xor rax,rax3823 mulx rbp,rcx,QWORD[rsi]3824 adcx r8,rcx3825 adox r9,rbp3826 mulx rbp,rcx,QWORD[8+rsi]3827 adcx r9,rcx3828 adox r10,rbp3829 mulx rbp,rcx,QWORD[16+rsi]3830 adcx r10,rcx3831 adox r11,rbp3832 mulx rbp,rcx,QWORD[24+rsi]3833 adcx r11,rcx3834 adox r8,rbp3835 adcx r8,rax3836 3837 3838 mov rdx,r93839 mulx rcx,rdx,QWORD[32+rsi]3840 3841 mulx rbp,rcx,QWORD[rsi]3842 adox r9,rcx3843 adcx r10,rbp3844 mulx rbp,rcx,QWORD[8+rsi]3845 adox r10,rcx3846 adcx r11,rbp3847 mulx rbp,rcx,QWORD[16+rsi]3848 adox r11,rcx3849 adcx r8,rbp3850 mulx rbp,rcx,QWORD[24+rsi]3851 adox r8,rcx3852 adcx r9,rbp3853 adox r9,rax3854 3855 3856 mov rdx,r103857 mulx rcx,rdx,QWORD[32+rsi]3858 3859 mulx rbp,rcx,QWORD[rsi]3860 adcx r10,rcx3861 adox r11,rbp3862 mulx rbp,rcx,QWORD[8+rsi]3863 adcx r11,rcx3864 adox r8,rbp3865 mulx rbp,rcx,QWORD[16+rsi]3866 adcx r8,rcx3867 adox r9,rbp3868 mulx rbp,rcx,QWORD[24+rsi]3869 adcx r9,rcx3870 adox r10,rbp3871 adcx r10,rax3872 3873 3874 mov rdx,r113875 mulx rcx,rdx,QWORD[32+rsi]3876 3877 mulx rbp,rcx,QWORD[rsi]3878 adox r11,rcx3879 adcx r8,rbp3880 mulx rbp,rcx,QWORD[8+rsi]3881 adox r8,rcx3882 adcx r9,rbp3883 mulx rbp,rcx,QWORD[16+rsi]3884 adox r9,rcx3885 adcx r10,rbp3886 mulx rbp,rcx,QWORD[24+rsi]3887 adox r10,rcx3888 adcx r11,rbp3889 adox r11,rax3890 3891 3892 add r12,r83893 adc r9,r133894 mov rdx,r123895 adc r10,r143896 adc r11,r153897 mov r14,r93898 adc rax,03899 3900 3901 sub r12,QWORD[rsi]3902 mov r15,r103903 sbb r9,QWORD[8+rsi]3904 sbb r10,QWORD[16+rsi]3905 mov r8,r113906 sbb r11,QWORD[24+rsi]3907 sbb rax,03908 3909 cmovnc rdx,r123910 cmovnc r14,r93911 cmovnc r15,r103912 cmovnc r8,r113913 3914 dec rbx3915 jnz NEAR $L$oop_ord_sqrx3916 3917 mov QWORD[rdi],rdx3918 mov QWORD[8+rdi],r143919 pxor xmm1,xmm13920 mov QWORD[16+rdi],r153921 pxor xmm2,xmm23922 mov QWORD[24+rdi],r83923 pxor xmm3,xmm33924 3925 mov r15,QWORD[rsp]3926 3927 mov r14,QWORD[8+rsp]3928 3929 mov r13,QWORD[16+rsp]3930 3931 mov r12,QWORD[24+rsp]3932 3933 mov rbx,QWORD[32+rsp]3934 3935 mov rbp,QWORD[40+rsp]3936 3937 lea rsp,[48+rsp]3938 3939 $L$ord_sqrx_epilogue:3940 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3941 mov rsi,QWORD[16+rsp]3942 DB 0F3h,0C3h ;repret3943 3944 $L$SEH_end_ecp_nistz256_ord_sqr_montx:3945 3946 3947 3473 3948 3474 … … 3960 3486 3961 3487 3962 mov ecx,0x801003963 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]3964 3488 lea rdx,[$L$RR] 3965 3489 jmp NEAR $L$mul_mont … … 3987 3511 3988 3512 3989 mov ecx,0x801003990 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]3991 3513 $L$mul_mont: 3992 3514 push rbp … … 4003 3525 4004 3526 $L$mul_body: 4005 cmp ecx,0x801004006 je NEAR $L$mul_montx4007 3527 mov rbx,rdx 4008 3528 mov rax,QWORD[rdx] … … 4013 3533 4014 3534 call __ecp_nistz256_mul_montq 4015 jmp NEAR $L$mul_mont_done4016 4017 ALIGN 324018 $L$mul_montx:4019 mov rbx,rdx4020 mov rdx,QWORD[rdx]4021 mov r9,QWORD[rsi]4022 mov r10,QWORD[8+rsi]4023 mov r11,QWORD[16+rsi]4024 mov r12,QWORD[24+rsi]4025 lea rsi,[((-128))+rsi]4026 4027 call __ecp_nistz256_mul_montx4028 3535 $L$mul_mont_done: 4029 3536 mov r15,QWORD[rsp] … … 4286 3793 4287 3794 4288 mov ecx,0x801004289 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]4290 3795 push rbp 4291 3796 … … 4301 3806 4302 3807 $L$sqr_body: 4303 cmp ecx,0x801004304 je NEAR $L$sqr_montx4305 3808 mov rax,QWORD[rsi] 4306 3809 mov r14,QWORD[8+rsi] … … 4309 3812 4310 3813 call __ecp_nistz256_sqr_montq 4311 jmp NEAR $L$sqr_mont_done4312 4313 ALIGN 324314 $L$sqr_montx:4315 mov rdx,QWORD[rsi]4316 mov r14,QWORD[8+rsi]4317 mov r15,QWORD[16+rsi]4318 mov r8,QWORD[24+rsi]4319 lea rsi,[((-128))+rsi]4320 4321 call __ecp_nistz256_sqr_montx4322 3814 $L$sqr_mont_done: 4323 3815 mov r15,QWORD[rsp] … … 4499 3991 mov QWORD[8+rdi],r13 4500 3992 cmovc r15,rcx 4501 mov QWORD[16+rdi],r144502 mov QWORD[24+rdi],r154503 4504 DB 0F3h,0C3h ;repret4505 4506 4507 4508 ALIGN 324509 __ecp_nistz256_mul_montx:4510 4511 4512 4513 mulx r9,r8,r94514 mulx r10,rcx,r104515 mov r14,324516 xor r13,r134517 mulx r11,rbp,r114518 mov r15,QWORD[(($L$poly+24))]4519 adc r9,rcx4520 mulx r12,rcx,r124521 mov rdx,r84522 adc r10,rbp4523 shlx rbp,r8,r144524 adc r11,rcx4525 shrx rcx,r8,r144526 adc r12,04527 4528 4529 4530 add r9,rbp4531 adc r10,rcx4532 4533 mulx rbp,rcx,r154534 mov rdx,QWORD[8+rbx]4535 adc r11,rcx4536 adc r12,rbp4537 adc r13,04538 xor r8,r84539 4540 4541 4542 mulx rbp,rcx,QWORD[((0+128))+rsi]4543 adcx r9,rcx4544 adox r10,rbp4545 4546 mulx rbp,rcx,QWORD[((8+128))+rsi]4547 adcx r10,rcx4548 adox r11,rbp4549 4550 mulx rbp,rcx,QWORD[((16+128))+rsi]4551 adcx r11,rcx4552 adox r12,rbp4553 4554 mulx rbp,rcx,QWORD[((24+128))+rsi]4555 mov rdx,r94556 adcx r12,rcx4557 shlx rcx,r9,r144558 adox r13,rbp4559 shrx rbp,r9,r144560 4561 adcx r13,r84562 adox r8,r84563 adc r8,04564 4565 4566 4567 add r10,rcx4568 adc r11,rbp4569 4570 mulx rbp,rcx,r154571 mov rdx,QWORD[16+rbx]4572 adc r12,rcx4573 adc r13,rbp4574 adc r8,04575 xor r9,r94576 4577 4578 4579 mulx rbp,rcx,QWORD[((0+128))+rsi]4580 adcx r10,rcx4581 adox r11,rbp4582 4583 mulx rbp,rcx,QWORD[((8+128))+rsi]4584 adcx r11,rcx4585 adox r12,rbp4586 4587 mulx rbp,rcx,QWORD[((16+128))+rsi]4588 adcx r12,rcx4589 adox r13,rbp4590 4591 mulx rbp,rcx,QWORD[((24+128))+rsi]4592 mov rdx,r104593 adcx r13,rcx4594 shlx rcx,r10,r144595 adox r8,rbp4596 shrx rbp,r10,r144597 4598 adcx r8,r94599 adox r9,r94600 adc r9,04601 4602 4603 4604 add r11,rcx4605 adc r12,rbp4606 4607 mulx rbp,rcx,r154608 mov rdx,QWORD[24+rbx]4609 adc r13,rcx4610 adc r8,rbp4611 adc r9,04612 xor r10,r104613 4614 4615 4616 mulx rbp,rcx,QWORD[((0+128))+rsi]4617 adcx r11,rcx4618 adox r12,rbp4619 4620 mulx rbp,rcx,QWORD[((8+128))+rsi]4621 adcx r12,rcx4622 adox r13,rbp4623 4624 mulx rbp,rcx,QWORD[((16+128))+rsi]4625 adcx r13,rcx4626 adox r8,rbp4627 4628 mulx rbp,rcx,QWORD[((24+128))+rsi]4629 mov rdx,r114630 adcx r8,rcx4631 shlx rcx,r11,r144632 adox r9,rbp4633 shrx rbp,r11,r144634 4635 adcx r9,r104636 adox r10,r104637 adc r10,04638 4639 4640 4641 add r12,rcx4642 adc r13,rbp4643 4644 mulx rbp,rcx,r154645 mov rbx,r124646 mov r14,QWORD[(($L$poly+8))]4647 adc r8,rcx4648 mov rdx,r134649 adc r9,rbp4650 adc r10,04651 4652 4653 4654 xor eax,eax4655 mov rcx,r84656 sbb r12,-14657 sbb r13,r144658 sbb r8,04659 mov rbp,r94660 sbb r9,r154661 sbb r10,04662 4663 cmovc r12,rbx4664 cmovc r13,rdx4665 mov QWORD[rdi],r124666 cmovc r8,rcx4667 mov QWORD[8+rdi],r134668 cmovc r9,rbp4669 mov QWORD[16+rdi],r84670 mov QWORD[24+rdi],r94671 4672 DB 0F3h,0C3h ;repret4673 4674 4675 4676 4677 ALIGN 324678 __ecp_nistz256_sqr_montx:4679 4680 mulx r10,r9,r144681 mulx r11,rcx,r154682 xor eax,eax4683 adc r10,rcx4684 mulx r12,rbp,r84685 mov rdx,r144686 adc r11,rbp4687 adc r12,04688 xor r13,r134689 4690 4691 mulx rbp,rcx,r154692 adcx r11,rcx4693 adox r12,rbp4694 4695 mulx rbp,rcx,r84696 mov rdx,r154697 adcx r12,rcx4698 adox r13,rbp4699 adc r13,04700 4701 4702 mulx r14,rcx,r84703 mov rdx,QWORD[((0+128))+rsi]4704 xor r15,r154705 adcx r9,r94706 adox r13,rcx4707 adcx r10,r104708 adox r14,r154709 4710 mulx rbp,r8,rdx4711 mov rdx,QWORD[((8+128))+rsi]4712 adcx r11,r114713 adox r9,rbp4714 adcx r12,r124715 mulx rax,rcx,rdx4716 mov rdx,QWORD[((16+128))+rsi]4717 adcx r13,r134718 adox r10,rcx4719 adcx r14,r144720 DB 0x674721 mulx rbp,rcx,rdx4722 mov rdx,QWORD[((24+128))+rsi]4723 adox r11,rax4724 adcx r15,r154725 adox r12,rcx4726 mov rsi,324727 adox r13,rbp4728 DB 0x67,0x674729 mulx rax,rcx,rdx4730 mov rdx,QWORD[(($L$poly+24))]4731 adox r14,rcx4732 shlx rcx,r8,rsi4733 adox r15,rax4734 shrx rax,r8,rsi4735 mov rbp,rdx4736 4737 4738 add r9,rcx4739 adc r10,rax4740 4741 mulx r8,rcx,r84742 adc r11,rcx4743 shlx rcx,r9,rsi4744 adc r8,04745 shrx rax,r9,rsi4746 4747 4748 add r10,rcx4749 adc r11,rax4750 4751 mulx r9,rcx,r94752 adc r8,rcx4753 shlx rcx,r10,rsi4754 adc r9,04755 shrx rax,r10,rsi4756 4757 4758 add r11,rcx4759 adc r8,rax4760 4761 mulx r10,rcx,r104762 adc r9,rcx4763 shlx rcx,r11,rsi4764 adc r10,04765 shrx rax,r11,rsi4766 4767 4768 add r8,rcx4769 adc r9,rax4770 4771 mulx r11,rcx,r114772 adc r10,rcx4773 adc r11,04774 4775 xor rdx,rdx4776 add r12,r84777 mov rsi,QWORD[(($L$poly+8))]4778 adc r13,r94779 mov r8,r124780 adc r14,r104781 adc r15,r114782 mov r9,r134783 adc rdx,04784 4785 sub r12,-14786 mov r10,r144787 sbb r13,rsi4788 sbb r14,04789 mov r11,r154790 sbb r15,rbp4791 sbb rdx,04792 4793 cmovc r12,r84794 cmovc r13,r94795 mov QWORD[rdi],r124796 cmovc r14,r104797 mov QWORD[8+rdi],r134798 cmovc r15,r114799 3993 mov QWORD[16+rdi],r14 4800 3994 mov QWORD[24+rdi],r15 … … 4952 4146 ecp_nistz256_gather_w5: 4953 4147 4954 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]4955 test eax,324956 jnz NEAR $L$avx2_gather_w54957 4148 lea rax,[((-136))+rsp] 4958 4149 $L$SEH_begin_ecp_nistz256_gather_w5: … … 5062 4253 ecp_nistz256_gather_w7: 5063 4254 5064 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]5065 test eax,325066 jnz NEAR $L$avx2_gather_w75067 4255 lea rax,[((-136))+rsp] 5068 4256 $L$SEH_begin_ecp_nistz256_gather_w7: … … 5132 4320 $L$SEH_end_ecp_nistz256_gather_w7: 5133 4321 5134 5135 5136 5137 ALIGN 325138 ecp_nistz256_avx2_gather_w5:5139 5140 $L$avx2_gather_w5:5141 vzeroupper5142 lea rax,[((-136))+rsp]5143 mov r11,rsp5144 $L$SEH_begin_ecp_nistz256_avx2_gather_w5:5145 DB 0x48,0x8d,0x60,0xe05146 DB 0xc5,0xf8,0x29,0x70,0xe05147 DB 0xc5,0xf8,0x29,0x78,0xf05148 DB 0xc5,0x78,0x29,0x40,0x005149 DB 0xc5,0x78,0x29,0x48,0x105150 DB 0xc5,0x78,0x29,0x50,0x205151 DB 0xc5,0x78,0x29,0x58,0x305152 DB 0xc5,0x78,0x29,0x60,0x405153 DB 0xc5,0x78,0x29,0x68,0x505154 DB 0xc5,0x78,0x29,0x70,0x605155 DB 0xc5,0x78,0x29,0x78,0x705156 vmovdqa ymm0,YMMWORD[$L$Two]5157 5158 vpxor ymm2,ymm2,ymm25159 vpxor ymm3,ymm3,ymm35160 vpxor ymm4,ymm4,ymm45161 5162 vmovdqa ymm5,YMMWORD[$L$One]5163 vmovdqa ymm10,YMMWORD[$L$Two]5164 5165 vmovd xmm1,r8d5166 vpermd ymm1,ymm2,ymm15167 5168 mov rax,85169 $L$select_loop_avx2_w5:5170 5171 vmovdqa ymm6,YMMWORD[rdx]5172 vmovdqa ymm7,YMMWORD[32+rdx]5173 vmovdqa ymm8,YMMWORD[64+rdx]5174 5175 vmovdqa ymm11,YMMWORD[96+rdx]5176 vmovdqa ymm12,YMMWORD[128+rdx]5177 vmovdqa ymm13,YMMWORD[160+rdx]5178 5179 vpcmpeqd ymm9,ymm5,ymm15180 vpcmpeqd ymm14,ymm10,ymm15181 5182 vpaddd ymm5,ymm5,ymm05183 vpaddd ymm10,ymm10,ymm05184 lea rdx,[192+rdx]5185 5186 vpand ymm6,ymm6,ymm95187 vpand ymm7,ymm7,ymm95188 vpand ymm8,ymm8,ymm95189 vpand ymm11,ymm11,ymm145190 vpand ymm12,ymm12,ymm145191 vpand ymm13,ymm13,ymm145192 5193 vpxor ymm2,ymm2,ymm65194 vpxor ymm3,ymm3,ymm75195 vpxor ymm4,ymm4,ymm85196 vpxor ymm2,ymm2,ymm115197 vpxor ymm3,ymm3,ymm125198 vpxor ymm4,ymm4,ymm135199 5200 dec rax5201 jnz NEAR $L$select_loop_avx2_w55202 5203 vmovdqu YMMWORD[rcx],ymm25204 vmovdqu YMMWORD[32+rcx],ymm35205 vmovdqu YMMWORD[64+rcx],ymm45206 vzeroupper5207 movaps xmm6,XMMWORD[rsp]5208 movaps xmm7,XMMWORD[16+rsp]5209 movaps xmm8,XMMWORD[32+rsp]5210 movaps xmm9,XMMWORD[48+rsp]5211 movaps xmm10,XMMWORD[64+rsp]5212 movaps xmm11,XMMWORD[80+rsp]5213 movaps xmm12,XMMWORD[96+rsp]5214 movaps xmm13,XMMWORD[112+rsp]5215 movaps xmm14,XMMWORD[128+rsp]5216 movaps xmm15,XMMWORD[144+rsp]5217 lea rsp,[r11]5218 DB 0F3h,0C3h ;repret5219 5220 $L$SEH_end_ecp_nistz256_avx2_gather_w5:5221 5222 5223 5224 5225 4322 global ecp_nistz256_avx2_gather_w7 5226 4323 5227 4324 ALIGN 32 5228 4325 ecp_nistz256_avx2_gather_w7: 5229 5230 $L$avx2_gather_w7: 5231 vzeroupper 5232 mov r11,rsp 5233 lea rax,[((-136))+rsp] 4326 mov QWORD[8+rsp],rdi ;WIN64 prologue 4327 mov QWORD[16+rsp],rsi 4328 mov rax,rsp 5234 4329 $L$SEH_begin_ecp_nistz256_avx2_gather_w7: 5235 DB 0x48,0x8d,0x60,0xe0 5236 DB 0xc5,0xf8,0x29,0x70,0xe0 5237 DB 0xc5,0xf8,0x29,0x78,0xf0 5238 DB 0xc5,0x78,0x29,0x40,0x00 5239 DB 0xc5,0x78,0x29,0x48,0x10 5240 DB 0xc5,0x78,0x29,0x50,0x20 5241 DB 0xc5,0x78,0x29,0x58,0x30 5242 DB 0xc5,0x78,0x29,0x60,0x40 5243 DB 0xc5,0x78,0x29,0x68,0x50 5244 DB 0xc5,0x78,0x29,0x70,0x60 5245 DB 0xc5,0x78,0x29,0x78,0x70 5246 vmovdqa ymm0,YMMWORD[$L$Three] 5247 5248 vpxor ymm2,ymm2,ymm2 5249 vpxor ymm3,ymm3,ymm3 5250 5251 vmovdqa ymm4,YMMWORD[$L$One] 5252 vmovdqa ymm8,YMMWORD[$L$Two] 5253 vmovdqa ymm12,YMMWORD[$L$Three] 5254 5255 vmovd xmm1,r8d 5256 vpermd ymm1,ymm2,ymm1 5257 5258 5259 mov rax,21 5260 $L$select_loop_avx2_w7: 5261 5262 vmovdqa ymm5,YMMWORD[rdx] 5263 vmovdqa ymm6,YMMWORD[32+rdx] 5264 5265 vmovdqa ymm9,YMMWORD[64+rdx] 5266 vmovdqa ymm10,YMMWORD[96+rdx] 5267 5268 vmovdqa ymm13,YMMWORD[128+rdx] 5269 vmovdqa ymm14,YMMWORD[160+rdx] 5270 5271 vpcmpeqd ymm7,ymm4,ymm1 5272 vpcmpeqd ymm11,ymm8,ymm1 5273 vpcmpeqd ymm15,ymm12,ymm1 5274 5275 vpaddd ymm4,ymm4,ymm0 5276 vpaddd ymm8,ymm8,ymm0 5277 vpaddd ymm12,ymm12,ymm0 5278 lea rdx,[192+rdx] 5279 5280 vpand ymm5,ymm5,ymm7 5281 vpand ymm6,ymm6,ymm7 5282 vpand ymm9,ymm9,ymm11 5283 vpand ymm10,ymm10,ymm11 5284 vpand ymm13,ymm13,ymm15 5285 vpand ymm14,ymm14,ymm15 5286 5287 vpxor ymm2,ymm2,ymm5 5288 vpxor ymm3,ymm3,ymm6 5289 vpxor ymm2,ymm2,ymm9 5290 vpxor ymm3,ymm3,ymm10 5291 vpxor ymm2,ymm2,ymm13 5292 vpxor ymm3,ymm3,ymm14 5293 5294 dec rax 5295 jnz NEAR $L$select_loop_avx2_w7 5296 5297 5298 vmovdqa ymm5,YMMWORD[rdx] 5299 vmovdqa ymm6,YMMWORD[32+rdx] 5300 5301 vpcmpeqd ymm7,ymm4,ymm1 5302 5303 vpand ymm5,ymm5,ymm7 5304 vpand ymm6,ymm6,ymm7 5305 5306 vpxor ymm2,ymm2,ymm5 5307 vpxor ymm3,ymm3,ymm6 5308 5309 vmovdqu YMMWORD[rcx],ymm2 5310 vmovdqu YMMWORD[32+rcx],ymm3 5311 vzeroupper 5312 movaps xmm6,XMMWORD[rsp] 5313 movaps xmm7,XMMWORD[16+rsp] 5314 movaps xmm8,XMMWORD[32+rsp] 5315 movaps xmm9,XMMWORD[48+rsp] 5316 movaps xmm10,XMMWORD[64+rsp] 5317 movaps xmm11,XMMWORD[80+rsp] 5318 movaps xmm12,XMMWORD[96+rsp] 5319 movaps xmm13,XMMWORD[112+rsp] 5320 movaps xmm14,XMMWORD[128+rsp] 5321 movaps xmm15,XMMWORD[144+rsp] 5322 lea rsp,[r11] 4330 mov rdi,rcx 4331 mov rsi,rdx 4332 mov rdx,r8 4333 4334 4335 4336 DB 0x0f,0x0b 4337 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 4338 mov rsi,QWORD[16+rsp] 5323 4339 DB 0F3h,0C3h ;repret 5324 4340 5325 4341 $L$SEH_end_ecp_nistz256_avx2_gather_w7: 5326 5327 4342 5328 4343 ALIGN 32 … … 5467 4482 5468 4483 5469 mov ecx,0x801005470 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]5471 cmp ecx,0x801005472 je NEAR $L$point_doublex5473 4484 push rbp 5474 4485 … … 5704 4715 5705 4716 5706 mov ecx,0x801005707 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]5708 cmp ecx,0x801005709 je NEAR $L$point_addx5710 4717 push rbp 5711 4718 … … 6127 5134 6128 5135 6129 mov ecx,0x801006130 and ecx,DWORD[((OPENSSL_ia32cap_P+8))]6131 cmp ecx,0x801006132 je NEAR $L$point_add_affinex6133 5136 push rbp 6134 5137 … … 6450 5453 6451 5454 $L$SEH_end_ecp_nistz256_point_add_affine: 6452 6453 ALIGN 326454 __ecp_nistz256_add_tox:6455 6456 xor r11,r116457 adc r12,QWORD[rbx]6458 adc r13,QWORD[8+rbx]6459 mov rax,r126460 adc r8,QWORD[16+rbx]6461 adc r9,QWORD[24+rbx]6462 mov rbp,r136463 adc r11,06464 6465 xor r10,r106466 sbb r12,-16467 mov rcx,r86468 sbb r13,r146469 sbb r8,06470 mov r10,r96471 sbb r9,r156472 sbb r11,06473 6474 cmovc r12,rax6475 cmovc r13,rbp6476 mov QWORD[rdi],r126477 cmovc r8,rcx6478 mov QWORD[8+rdi],r136479 cmovc r9,r106480 mov QWORD[16+rdi],r86481 mov QWORD[24+rdi],r96482 6483 DB 0F3h,0C3h ;repret6484 6485 6486 6487 6488 ALIGN 326489 __ecp_nistz256_sub_fromx:6490 6491 xor r11,r116492 sbb r12,QWORD[rbx]6493 sbb r13,QWORD[8+rbx]6494 mov rax,r126495 sbb r8,QWORD[16+rbx]6496 sbb r9,QWORD[24+rbx]6497 mov rbp,r136498 sbb r11,06499 6500 xor r10,r106501 adc r12,-16502 mov rcx,r86503 adc r13,r146504 adc r8,06505 mov r10,r96506 adc r9,r156507 6508 bt r11,06509 cmovnc r12,rax6510 cmovnc r13,rbp6511 mov QWORD[rdi],r126512 cmovnc r8,rcx6513 mov QWORD[8+rdi],r136514 cmovnc r9,r106515 mov QWORD[16+rdi],r86516 mov QWORD[24+rdi],r96517 6518 DB 0F3h,0C3h ;repret6519 6520 6521 6522 6523 ALIGN 326524 __ecp_nistz256_subx:6525 6526 xor r11,r116527 sbb rax,r126528 sbb rbp,r136529 mov r12,rax6530 sbb rcx,r86531 sbb r10,r96532 mov r13,rbp6533 sbb r11,06534 6535 xor r9,r96536 adc rax,-16537 mov r8,rcx6538 adc rbp,r146539 adc rcx,06540 mov r9,r106541 adc r10,r156542 6543 bt r11,06544 cmovc r12,rax6545 cmovc r13,rbp6546 cmovc r8,rcx6547 cmovc r9,r106548 6549 DB 0F3h,0C3h ;repret6550 6551 6552 6553 6554 ALIGN 326555 __ecp_nistz256_mul_by_2x:6556 6557 xor r11,r116558 adc r12,r126559 adc r13,r136560 mov rax,r126561 adc r8,r86562 adc r9,r96563 mov rbp,r136564 adc r11,06565 6566 xor r10,r106567 sbb r12,-16568 mov rcx,r86569 sbb r13,r146570 sbb r8,06571 mov r10,r96572 sbb r9,r156573 sbb r11,06574 6575 cmovc r12,rax6576 cmovc r13,rbp6577 mov QWORD[rdi],r126578 cmovc r8,rcx6579 mov QWORD[8+rdi],r136580 cmovc r9,r106581 mov QWORD[16+rdi],r86582 mov QWORD[24+rdi],r96583 6584 DB 0F3h,0C3h ;repret6585 6586 6587 6588 ALIGN 326589 ecp_nistz256_point_doublex:6590 mov QWORD[8+rsp],rdi ;WIN64 prologue6591 mov QWORD[16+rsp],rsi6592 mov rax,rsp6593 $L$SEH_begin_ecp_nistz256_point_doublex:6594 mov rdi,rcx6595 mov rsi,rdx6596 6597 6598 6599 $L$point_doublex:6600 push rbp6601 6602 push rbx6603 6604 push r126605 6606 push r136607 6608 push r146609 6610 push r156611 6612 sub rsp,32*5+86613 6614 $L$point_doublex_body:6615 6616 $L$point_double_shortcutx:6617 movdqu xmm0,XMMWORD[rsi]6618 mov rbx,rsi6619 movdqu xmm1,XMMWORD[16+rsi]6620 mov r12,QWORD[((32+0))+rsi]6621 mov r13,QWORD[((32+8))+rsi]6622 mov r8,QWORD[((32+16))+rsi]6623 mov r9,QWORD[((32+24))+rsi]6624 mov r14,QWORD[(($L$poly+8))]6625 mov r15,QWORD[(($L$poly+24))]6626 movdqa XMMWORD[96+rsp],xmm06627 movdqa XMMWORD[(96+16)+rsp],xmm16628 lea r10,[32+rdi]6629 lea r11,[64+rdi]6630 DB 102,72,15,110,1996631 DB 102,73,15,110,2026632 DB 102,73,15,110,2116633 6634 lea rdi,[rsp]6635 call __ecp_nistz256_mul_by_2x6636 6637 mov rdx,QWORD[((64+0))+rsi]6638 mov r14,QWORD[((64+8))+rsi]6639 mov r15,QWORD[((64+16))+rsi]6640 mov r8,QWORD[((64+24))+rsi]6641 lea rsi,[((64-128))+rsi]6642 lea rdi,[64+rsp]6643 call __ecp_nistz256_sqr_montx6644 6645 mov rdx,QWORD[((0+0))+rsp]6646 mov r14,QWORD[((8+0))+rsp]6647 lea rsi,[((-128+0))+rsp]6648 mov r15,QWORD[((16+0))+rsp]6649 mov r8,QWORD[((24+0))+rsp]6650 lea rdi,[rsp]6651 call __ecp_nistz256_sqr_montx6652 6653 mov rdx,QWORD[32+rbx]6654 mov r9,QWORD[((64+0))+rbx]6655 mov r10,QWORD[((64+8))+rbx]6656 mov r11,QWORD[((64+16))+rbx]6657 mov r12,QWORD[((64+24))+rbx]6658 lea rsi,[((64-128))+rbx]6659 lea rbx,[32+rbx]6660 DB 102,72,15,126,2156661 call __ecp_nistz256_mul_montx6662 call __ecp_nistz256_mul_by_2x6663 6664 mov r12,QWORD[((96+0))+rsp]6665 mov r13,QWORD[((96+8))+rsp]6666 lea rbx,[64+rsp]6667 mov r8,QWORD[((96+16))+rsp]6668 mov r9,QWORD[((96+24))+rsp]6669 lea rdi,[32+rsp]6670 call __ecp_nistz256_add_tox6671 6672 mov r12,QWORD[((96+0))+rsp]6673 mov r13,QWORD[((96+8))+rsp]6674 lea rbx,[64+rsp]6675 mov r8,QWORD[((96+16))+rsp]6676 mov r9,QWORD[((96+24))+rsp]6677 lea rdi,[64+rsp]6678 call __ecp_nistz256_sub_fromx6679 6680 mov rdx,QWORD[((0+0))+rsp]6681 mov r14,QWORD[((8+0))+rsp]6682 lea rsi,[((-128+0))+rsp]6683 mov r15,QWORD[((16+0))+rsp]6684 mov r8,QWORD[((24+0))+rsp]6685 DB 102,72,15,126,2076686 call __ecp_nistz256_sqr_montx6687 xor r9,r96688 mov rax,r126689 add r12,-16690 mov r10,r136691 adc r13,rsi6692 mov rcx,r146693 adc r14,06694 mov r8,r156695 adc r15,rbp6696 adc r9,06697 xor rsi,rsi6698 test rax,16699 6700 cmovz r12,rax6701 cmovz r13,r106702 cmovz r14,rcx6703 cmovz r15,r86704 cmovz r9,rsi6705 6706 mov rax,r136707 shr r12,16708 shl rax,636709 mov r10,r146710 shr r13,16711 or r12,rax6712 shl r10,636713 mov rcx,r156714 shr r14,16715 or r13,r106716 shl rcx,636717 mov QWORD[rdi],r126718 shr r15,16719 mov QWORD[8+rdi],r136720 shl r9,636721 or r14,rcx6722 or r15,r96723 mov QWORD[16+rdi],r146724 mov QWORD[24+rdi],r156725 mov rdx,QWORD[64+rsp]6726 lea rbx,[64+rsp]6727 mov r9,QWORD[((0+32))+rsp]6728 mov r10,QWORD[((8+32))+rsp]6729 lea rsi,[((-128+32))+rsp]6730 mov r11,QWORD[((16+32))+rsp]6731 mov r12,QWORD[((24+32))+rsp]6732 lea rdi,[32+rsp]6733 call __ecp_nistz256_mul_montx6734 6735 lea rdi,[128+rsp]6736 call __ecp_nistz256_mul_by_2x6737 6738 lea rbx,[32+rsp]6739 lea rdi,[32+rsp]6740 call __ecp_nistz256_add_tox6741 6742 mov rdx,QWORD[96+rsp]6743 lea rbx,[96+rsp]6744 mov r9,QWORD[((0+0))+rsp]6745 mov r10,QWORD[((8+0))+rsp]6746 lea rsi,[((-128+0))+rsp]6747 mov r11,QWORD[((16+0))+rsp]6748 mov r12,QWORD[((24+0))+rsp]6749 lea rdi,[rsp]6750 call __ecp_nistz256_mul_montx6751 6752 lea rdi,[128+rsp]6753 call __ecp_nistz256_mul_by_2x6754 6755 mov rdx,QWORD[((0+32))+rsp]6756 mov r14,QWORD[((8+32))+rsp]6757 lea rsi,[((-128+32))+rsp]6758 mov r15,QWORD[((16+32))+rsp]6759 mov r8,QWORD[((24+32))+rsp]6760 DB 102,72,15,126,1996761 call __ecp_nistz256_sqr_montx6762 6763 lea rbx,[128+rsp]6764 mov r8,r146765 mov r9,r156766 mov r14,rsi6767 mov r15,rbp6768 call __ecp_nistz256_sub_fromx6769 6770 mov rax,QWORD[((0+0))+rsp]6771 mov rbp,QWORD[((0+8))+rsp]6772 mov rcx,QWORD[((0+16))+rsp]6773 mov r10,QWORD[((0+24))+rsp]6774 lea rdi,[rsp]6775 call __ecp_nistz256_subx6776 6777 mov rdx,QWORD[32+rsp]6778 lea rbx,[32+rsp]6779 mov r14,r126780 xor ecx,ecx6781 mov QWORD[((0+0))+rsp],r126782 mov r10,r136783 mov QWORD[((0+8))+rsp],r136784 cmovz r11,r86785 mov QWORD[((0+16))+rsp],r86786 lea rsi,[((0-128))+rsp]6787 cmovz r12,r96788 mov QWORD[((0+24))+rsp],r96789 mov r9,r146790 lea rdi,[rsp]6791 call __ecp_nistz256_mul_montx6792 6793 DB 102,72,15,126,2036794 DB 102,72,15,126,2076795 call __ecp_nistz256_sub_fromx6796 6797 lea rsi,[((160+56))+rsp]6798 6799 mov r15,QWORD[((-48))+rsi]6800 6801 mov r14,QWORD[((-40))+rsi]6802 6803 mov r13,QWORD[((-32))+rsi]6804 6805 mov r12,QWORD[((-24))+rsi]6806 6807 mov rbx,QWORD[((-16))+rsi]6808 6809 mov rbp,QWORD[((-8))+rsi]6810 6811 lea rsp,[rsi]6812 6813 $L$point_doublex_epilogue:6814 mov rdi,QWORD[8+rsp] ;WIN64 epilogue6815 mov rsi,QWORD[16+rsp]6816 DB 0F3h,0C3h ;repret6817 6818 $L$SEH_end_ecp_nistz256_point_doublex:6819 6820 ALIGN 326821 ecp_nistz256_point_addx:6822 mov QWORD[8+rsp],rdi ;WIN64 prologue6823 mov QWORD[16+rsp],rsi6824 mov rax,rsp6825 $L$SEH_begin_ecp_nistz256_point_addx:6826 mov rdi,rcx6827 mov rsi,rdx6828 mov rdx,r86829 6830 6831 6832 $L$point_addx:6833 push rbp6834 6835 push rbx6836 6837 push r126838 6839 push r136840 6841 push r146842 6843 push r156844 6845 sub rsp,32*18+86846 6847 $L$point_addx_body:6848 6849 movdqu xmm0,XMMWORD[rsi]6850 movdqu xmm1,XMMWORD[16+rsi]6851 movdqu xmm2,XMMWORD[32+rsi]6852 movdqu xmm3,XMMWORD[48+rsi]6853 movdqu xmm4,XMMWORD[64+rsi]6854 movdqu xmm5,XMMWORD[80+rsi]6855 mov rbx,rsi6856 mov rsi,rdx6857 movdqa XMMWORD[384+rsp],xmm06858 movdqa XMMWORD[(384+16)+rsp],xmm16859 movdqa XMMWORD[416+rsp],xmm26860 movdqa XMMWORD[(416+16)+rsp],xmm36861 movdqa XMMWORD[448+rsp],xmm46862 movdqa XMMWORD[(448+16)+rsp],xmm56863 por xmm5,xmm46864 6865 movdqu xmm0,XMMWORD[rsi]6866 pshufd xmm3,xmm5,0xb16867 movdqu xmm1,XMMWORD[16+rsi]6868 movdqu xmm2,XMMWORD[32+rsi]6869 por xmm5,xmm36870 movdqu xmm3,XMMWORD[48+rsi]6871 mov rdx,QWORD[((64+0))+rsi]6872 mov r14,QWORD[((64+8))+rsi]6873 mov r15,QWORD[((64+16))+rsi]6874 mov r8,QWORD[((64+24))+rsi]6875 movdqa XMMWORD[480+rsp],xmm06876 pshufd xmm4,xmm5,0x1e6877 movdqa XMMWORD[(480+16)+rsp],xmm16878 movdqu xmm0,XMMWORD[64+rsi]6879 movdqu xmm1,XMMWORD[80+rsi]6880 movdqa XMMWORD[512+rsp],xmm26881 movdqa XMMWORD[(512+16)+rsp],xmm36882 por xmm5,xmm46883 pxor xmm4,xmm46884 por xmm1,xmm06885 DB 102,72,15,110,1996886 6887 lea rsi,[((64-128))+rsi]6888 mov QWORD[((544+0))+rsp],rdx6889 mov QWORD[((544+8))+rsp],r146890 mov QWORD[((544+16))+rsp],r156891 mov QWORD[((544+24))+rsp],r86892 lea rdi,[96+rsp]6893 call __ecp_nistz256_sqr_montx6894 6895 pcmpeqd xmm5,xmm46896 pshufd xmm4,xmm1,0xb16897 por xmm4,xmm16898 pshufd xmm5,xmm5,06899 pshufd xmm3,xmm4,0x1e6900 por xmm4,xmm36901 pxor xmm3,xmm36902 pcmpeqd xmm4,xmm36903 pshufd xmm4,xmm4,06904 mov rdx,QWORD[((64+0))+rbx]6905 mov r14,QWORD[((64+8))+rbx]6906 mov r15,QWORD[((64+16))+rbx]6907 mov r8,QWORD[((64+24))+rbx]6908 DB 102,72,15,110,2036909 6910 lea rsi,[((64-128))+rbx]6911 lea rdi,[32+rsp]6912 call __ecp_nistz256_sqr_montx6913 6914 mov rdx,QWORD[544+rsp]6915 lea rbx,[544+rsp]6916 mov r9,QWORD[((0+96))+rsp]6917 mov r10,QWORD[((8+96))+rsp]6918 lea rsi,[((-128+96))+rsp]6919 mov r11,QWORD[((16+96))+rsp]6920 mov r12,QWORD[((24+96))+rsp]6921 lea rdi,[224+rsp]6922 call __ecp_nistz256_mul_montx6923 6924 mov rdx,QWORD[448+rsp]6925 lea rbx,[448+rsp]6926 mov r9,QWORD[((0+32))+rsp]6927 mov r10,QWORD[((8+32))+rsp]6928 lea rsi,[((-128+32))+rsp]6929 mov r11,QWORD[((16+32))+rsp]6930 mov r12,QWORD[((24+32))+rsp]6931 lea rdi,[256+rsp]6932 call __ecp_nistz256_mul_montx6933 6934 mov rdx,QWORD[416+rsp]6935 lea rbx,[416+rsp]6936 mov r9,QWORD[((0+224))+rsp]6937 mov r10,QWORD[((8+224))+rsp]6938 lea rsi,[((-128+224))+rsp]6939 mov r11,QWORD[((16+224))+rsp]6940 mov r12,QWORD[((24+224))+rsp]6941 lea rdi,[224+rsp]6942 call __ecp_nistz256_mul_montx6943 6944 mov rdx,QWORD[512+rsp]6945 lea rbx,[512+rsp]6946 mov r9,QWORD[((0+256))+rsp]6947 mov r10,QWORD[((8+256))+rsp]6948 lea rsi,[((-128+256))+rsp]6949 mov r11,QWORD[((16+256))+rsp]6950 mov r12,QWORD[((24+256))+rsp]6951 lea rdi,[256+rsp]6952 call __ecp_nistz256_mul_montx6953 6954 lea rbx,[224+rsp]6955 lea rdi,[64+rsp]6956 call __ecp_nistz256_sub_fromx6957 6958 or r12,r136959 movdqa xmm2,xmm46960 or r12,r86961 or r12,r96962 por xmm2,xmm56963 DB 102,73,15,110,2206964 6965 mov rdx,QWORD[384+rsp]6966 lea rbx,[384+rsp]6967 mov r9,QWORD[((0+96))+rsp]6968 mov r10,QWORD[((8+96))+rsp]6969 lea rsi,[((-128+96))+rsp]6970 mov r11,QWORD[((16+96))+rsp]6971 mov r12,QWORD[((24+96))+rsp]6972 lea rdi,[160+rsp]6973 call __ecp_nistz256_mul_montx6974 6975 mov rdx,QWORD[480+rsp]6976 lea rbx,[480+rsp]6977 mov r9,QWORD[((0+32))+rsp]6978 mov r10,QWORD[((8+32))+rsp]6979 lea rsi,[((-128+32))+rsp]6980 mov r11,QWORD[((16+32))+rsp]6981 mov r12,QWORD[((24+32))+rsp]6982 lea rdi,[192+rsp]6983 call __ecp_nistz256_mul_montx6984 6985 lea rbx,[160+rsp]6986 lea rdi,[rsp]6987 call __ecp_nistz256_sub_fromx6988 6989 or r12,r136990 or r12,r86991 or r12,r96992 6993 DB 102,73,15,126,2086994 DB 102,73,15,126,2176995 6996 or r12,r86997 or r12,r96998 6999 7000 DB 0x3e7001 jnz NEAR $L$add_proceedx7002 7003 $L$add_doublex:7004 DB 102,72,15,126,2067005 DB 102,72,15,126,1997006 add rsp,4167007 7008 jmp NEAR $L$point_double_shortcutx7009 7010 7011 ALIGN 327012 $L$add_proceedx:7013 mov rdx,QWORD[((0+64))+rsp]7014 mov r14,QWORD[((8+64))+rsp]7015 lea rsi,[((-128+64))+rsp]7016 mov r15,QWORD[((16+64))+rsp]7017 mov r8,QWORD[((24+64))+rsp]7018 lea rdi,[96+rsp]7019 call __ecp_nistz256_sqr_montx7020 7021 mov rdx,QWORD[448+rsp]7022 lea rbx,[448+rsp]7023 mov r9,QWORD[((0+0))+rsp]7024 mov r10,QWORD[((8+0))+rsp]7025 lea rsi,[((-128+0))+rsp]7026 mov r11,QWORD[((16+0))+rsp]7027 mov r12,QWORD[((24+0))+rsp]7028 lea rdi,[352+rsp]7029 call __ecp_nistz256_mul_montx7030 7031 mov rdx,QWORD[((0+0))+rsp]7032 mov r14,QWORD[((8+0))+rsp]7033 lea rsi,[((-128+0))+rsp]7034 mov r15,QWORD[((16+0))+rsp]7035 mov r8,QWORD[((24+0))+rsp]7036 lea rdi,[32+rsp]7037 call __ecp_nistz256_sqr_montx7038 7039 mov rdx,QWORD[544+rsp]7040 lea rbx,[544+rsp]7041 mov r9,QWORD[((0+352))+rsp]7042 mov r10,QWORD[((8+352))+rsp]7043 lea rsi,[((-128+352))+rsp]7044 mov r11,QWORD[((16+352))+rsp]7045 mov r12,QWORD[((24+352))+rsp]7046 lea rdi,[352+rsp]7047 call __ecp_nistz256_mul_montx7048 7049 mov rdx,QWORD[rsp]7050 lea rbx,[rsp]7051 mov r9,QWORD[((0+32))+rsp]7052 mov r10,QWORD[((8+32))+rsp]7053 lea rsi,[((-128+32))+rsp]7054 mov r11,QWORD[((16+32))+rsp]7055 mov r12,QWORD[((24+32))+rsp]7056 lea rdi,[128+rsp]7057 call __ecp_nistz256_mul_montx7058 7059 mov rdx,QWORD[160+rsp]7060 lea rbx,[160+rsp]7061 mov r9,QWORD[((0+32))+rsp]7062 mov r10,QWORD[((8+32))+rsp]7063 lea rsi,[((-128+32))+rsp]7064 mov r11,QWORD[((16+32))+rsp]7065 mov r12,QWORD[((24+32))+rsp]7066 lea rdi,[192+rsp]7067 call __ecp_nistz256_mul_montx7068 7069 7070 7071 7072 xor r11,r117073 add r12,r127074 lea rsi,[96+rsp]7075 adc r13,r137076 mov rax,r127077 adc r8,r87078 adc r9,r97079 mov rbp,r137080 adc r11,07081 7082 sub r12,-17083 mov rcx,r87084 sbb r13,r147085 sbb r8,07086 mov r10,r97087 sbb r9,r157088 sbb r11,07089 7090 cmovc r12,rax7091 mov rax,QWORD[rsi]7092 cmovc r13,rbp7093 mov rbp,QWORD[8+rsi]7094 cmovc r8,rcx7095 mov rcx,QWORD[16+rsi]7096 cmovc r9,r107097 mov r10,QWORD[24+rsi]7098 7099 call __ecp_nistz256_subx7100 7101 lea rbx,[128+rsp]7102 lea rdi,[288+rsp]7103 call __ecp_nistz256_sub_fromx7104 7105 mov rax,QWORD[((192+0))+rsp]7106 mov rbp,QWORD[((192+8))+rsp]7107 mov rcx,QWORD[((192+16))+rsp]7108 mov r10,QWORD[((192+24))+rsp]7109 lea rdi,[320+rsp]7110 7111 call __ecp_nistz256_subx7112 7113 mov QWORD[rdi],r127114 mov QWORD[8+rdi],r137115 mov QWORD[16+rdi],r87116 mov QWORD[24+rdi],r97117 mov rdx,QWORD[128+rsp]7118 lea rbx,[128+rsp]7119 mov r9,QWORD[((0+224))+rsp]7120 mov r10,QWORD[((8+224))+rsp]7121 lea rsi,[((-128+224))+rsp]7122 mov r11,QWORD[((16+224))+rsp]7123 mov r12,QWORD[((24+224))+rsp]7124 lea rdi,[256+rsp]7125 call __ecp_nistz256_mul_montx7126 7127 mov rdx,QWORD[320+rsp]7128 lea rbx,[320+rsp]7129 mov r9,QWORD[((0+64))+rsp]7130 mov r10,QWORD[((8+64))+rsp]7131 lea rsi,[((-128+64))+rsp]7132 mov r11,QWORD[((16+64))+rsp]7133 mov r12,QWORD[((24+64))+rsp]7134 lea rdi,[320+rsp]7135 call __ecp_nistz256_mul_montx7136 7137 lea rbx,[256+rsp]7138 lea rdi,[320+rsp]7139 call __ecp_nistz256_sub_fromx7140 7141 DB 102,72,15,126,1997142 7143 movdqa xmm0,xmm57144 movdqa xmm1,xmm57145 pandn xmm0,XMMWORD[352+rsp]7146 movdqa xmm2,xmm57147 pandn xmm1,XMMWORD[((352+16))+rsp]7148 movdqa xmm3,xmm57149 pand xmm2,XMMWORD[544+rsp]7150 pand xmm3,XMMWORD[((544+16))+rsp]7151 por xmm2,xmm07152 por xmm3,xmm17153 7154 movdqa xmm0,xmm47155 movdqa xmm1,xmm47156 pandn xmm0,xmm27157 movdqa xmm2,xmm47158 pandn xmm1,xmm37159 movdqa xmm3,xmm47160 pand xmm2,XMMWORD[448+rsp]7161 pand xmm3,XMMWORD[((448+16))+rsp]7162 por xmm2,xmm07163 por xmm3,xmm17164 movdqu XMMWORD[64+rdi],xmm27165 movdqu XMMWORD[80+rdi],xmm37166 7167 movdqa xmm0,xmm57168 movdqa xmm1,xmm57169 pandn xmm0,XMMWORD[288+rsp]7170 movdqa xmm2,xmm57171 pandn xmm1,XMMWORD[((288+16))+rsp]7172 movdqa xmm3,xmm57173 pand xmm2,XMMWORD[480+rsp]7174 pand xmm3,XMMWORD[((480+16))+rsp]7175 por xmm2,xmm07176 por xmm3,xmm17177 7178 movdqa xmm0,xmm47179 movdqa xmm1,xmm47180 pandn xmm0,xmm27181 movdqa xmm2,xmm47182 pandn xmm1,xmm37183 movdqa xmm3,xmm47184 pand xmm2,XMMWORD[384+rsp]7185 pand xmm3,XMMWORD[((384+16))+rsp]7186 por xmm2,xmm07187 por xmm3,xmm17188 movdqu XMMWORD[rdi],xmm27189 movdqu XMMWORD[16+rdi],xmm37190 7191 movdqa xmm0,xmm57192 movdqa xmm1,xmm57193 pandn xmm0,XMMWORD[320+rsp]7194 movdqa xmm2,xmm57195 pandn xmm1,XMMWORD[((320+16))+rsp]7196 movdqa xmm3,xmm57197 pand xmm2,XMMWORD[512+rsp]7198 pand xmm3,XMMWORD[((512+16))+rsp]7199 por xmm2,xmm07200 por xmm3,xmm17201 7202 movdqa xmm0,xmm47203 movdqa xmm1,xmm47204 pandn xmm0,xmm27205 movdqa xmm2,xmm47206 pandn xmm1,xmm37207 movdqa xmm3,xmm47208 pand xmm2,XMMWORD[416+rsp]7209 pand xmm3,XMMWORD[((416+16))+rsp]7210 por xmm2,xmm07211 por xmm3,xmm17212 movdqu XMMWORD[32+rdi],xmm27213 movdqu XMMWORD[48+rdi],xmm37214 7215 $L$add_donex:7216 lea rsi,[((576+56))+rsp]7217 7218 mov r15,QWORD[((-48))+rsi]7219 7220 mov r14,QWORD[((-40))+rsi]7221 7222 mov r13,QWORD[((-32))+rsi]7223 7224 mov r12,QWORD[((-24))+rsi]7225 7226 mov rbx,QWORD[((-16))+rsi]7227 7228 mov rbp,QWORD[((-8))+rsi]7229 7230 lea rsp,[rsi]7231 7232 $L$point_addx_epilogue:7233 mov rdi,QWORD[8+rsp] ;WIN64 epilogue7234 mov rsi,QWORD[16+rsp]7235 DB 0F3h,0C3h ;repret7236 7237 $L$SEH_end_ecp_nistz256_point_addx:7238 7239 ALIGN 327240 ecp_nistz256_point_add_affinex:7241 mov QWORD[8+rsp],rdi ;WIN64 prologue7242 mov QWORD[16+rsp],rsi7243 mov rax,rsp7244 $L$SEH_begin_ecp_nistz256_point_add_affinex:7245 mov rdi,rcx7246 mov rsi,rdx7247 mov rdx,r87248 7249 7250 7251 $L$point_add_affinex:7252 push rbp7253 7254 push rbx7255 7256 push r127257 7258 push r137259 7260 push r147261 7262 push r157263 7264 sub rsp,32*15+87265 7266 $L$add_affinex_body:7267 7268 movdqu xmm0,XMMWORD[rsi]7269 mov rbx,rdx7270 movdqu xmm1,XMMWORD[16+rsi]7271 movdqu xmm2,XMMWORD[32+rsi]7272 movdqu xmm3,XMMWORD[48+rsi]7273 movdqu xmm4,XMMWORD[64+rsi]7274 movdqu xmm5,XMMWORD[80+rsi]7275 mov rdx,QWORD[((64+0))+rsi]7276 mov r14,QWORD[((64+8))+rsi]7277 mov r15,QWORD[((64+16))+rsi]7278 mov r8,QWORD[((64+24))+rsi]7279 movdqa XMMWORD[320+rsp],xmm07280 movdqa XMMWORD[(320+16)+rsp],xmm17281 movdqa XMMWORD[352+rsp],xmm27282 movdqa XMMWORD[(352+16)+rsp],xmm37283 movdqa XMMWORD[384+rsp],xmm47284 movdqa XMMWORD[(384+16)+rsp],xmm57285 por xmm5,xmm47286 7287 movdqu xmm0,XMMWORD[rbx]7288 pshufd xmm3,xmm5,0xb17289 movdqu xmm1,XMMWORD[16+rbx]7290 movdqu xmm2,XMMWORD[32+rbx]7291 por xmm5,xmm37292 movdqu xmm3,XMMWORD[48+rbx]7293 movdqa XMMWORD[416+rsp],xmm07294 pshufd xmm4,xmm5,0x1e7295 movdqa XMMWORD[(416+16)+rsp],xmm17296 por xmm1,xmm07297 DB 102,72,15,110,1997298 movdqa XMMWORD[448+rsp],xmm27299 movdqa XMMWORD[(448+16)+rsp],xmm37300 por xmm3,xmm27301 por xmm5,xmm47302 pxor xmm4,xmm47303 por xmm3,xmm17304 7305 lea rsi,[((64-128))+rsi]7306 lea rdi,[32+rsp]7307 call __ecp_nistz256_sqr_montx7308 7309 pcmpeqd xmm5,xmm47310 pshufd xmm4,xmm3,0xb17311 mov rdx,QWORD[rbx]7312 7313 mov r9,r127314 por xmm4,xmm37315 pshufd xmm5,xmm5,07316 pshufd xmm3,xmm4,0x1e7317 mov r10,r137318 por xmm4,xmm37319 pxor xmm3,xmm37320 mov r11,r147321 pcmpeqd xmm4,xmm37322 pshufd xmm4,xmm4,07323 7324 lea rsi,[((32-128))+rsp]7325 mov r12,r157326 lea rdi,[rsp]7327 call __ecp_nistz256_mul_montx7328 7329 lea rbx,[320+rsp]7330 lea rdi,[64+rsp]7331 call __ecp_nistz256_sub_fromx7332 7333 mov rdx,QWORD[384+rsp]7334 lea rbx,[384+rsp]7335 mov r9,QWORD[((0+32))+rsp]7336 mov r10,QWORD[((8+32))+rsp]7337 lea rsi,[((-128+32))+rsp]7338 mov r11,QWORD[((16+32))+rsp]7339 mov r12,QWORD[((24+32))+rsp]7340 lea rdi,[32+rsp]7341 call __ecp_nistz256_mul_montx7342 7343 mov rdx,QWORD[384+rsp]7344 lea rbx,[384+rsp]7345 mov r9,QWORD[((0+64))+rsp]7346 mov r10,QWORD[((8+64))+rsp]7347 lea rsi,[((-128+64))+rsp]7348 mov r11,QWORD[((16+64))+rsp]7349 mov r12,QWORD[((24+64))+rsp]7350 lea rdi,[288+rsp]7351 call __ecp_nistz256_mul_montx7352 7353 mov rdx,QWORD[448+rsp]7354 lea rbx,[448+rsp]7355 mov r9,QWORD[((0+32))+rsp]7356 mov r10,QWORD[((8+32))+rsp]7357 lea rsi,[((-128+32))+rsp]7358 mov r11,QWORD[((16+32))+rsp]7359 mov r12,QWORD[((24+32))+rsp]7360 lea rdi,[32+rsp]7361 call __ecp_nistz256_mul_montx7362 7363 lea rbx,[352+rsp]7364 lea rdi,[96+rsp]7365 call __ecp_nistz256_sub_fromx7366 7367 mov rdx,QWORD[((0+64))+rsp]7368 mov r14,QWORD[((8+64))+rsp]7369 lea rsi,[((-128+64))+rsp]7370 mov r15,QWORD[((16+64))+rsp]7371 mov r8,QWORD[((24+64))+rsp]7372 lea rdi,[128+rsp]7373 call __ecp_nistz256_sqr_montx7374 7375 mov rdx,QWORD[((0+96))+rsp]7376 mov r14,QWORD[((8+96))+rsp]7377 lea rsi,[((-128+96))+rsp]7378 mov r15,QWORD[((16+96))+rsp]7379 mov r8,QWORD[((24+96))+rsp]7380 lea rdi,[192+rsp]7381 call __ecp_nistz256_sqr_montx7382 7383 mov rdx,QWORD[128+rsp]7384 lea rbx,[128+rsp]7385 mov r9,QWORD[((0+64))+rsp]7386 mov r10,QWORD[((8+64))+rsp]7387 lea rsi,[((-128+64))+rsp]7388 mov r11,QWORD[((16+64))+rsp]7389 mov r12,QWORD[((24+64))+rsp]7390 lea rdi,[160+rsp]7391 call __ecp_nistz256_mul_montx7392 7393 mov rdx,QWORD[320+rsp]7394 lea rbx,[320+rsp]7395 mov r9,QWORD[((0+128))+rsp]7396 mov r10,QWORD[((8+128))+rsp]7397 lea rsi,[((-128+128))+rsp]7398 mov r11,QWORD[((16+128))+rsp]7399 mov r12,QWORD[((24+128))+rsp]7400 lea rdi,[rsp]7401 call __ecp_nistz256_mul_montx7402 7403 7404 7405 7406 xor r11,r117407 add r12,r127408 lea rsi,[192+rsp]7409 adc r13,r137410 mov rax,r127411 adc r8,r87412 adc r9,r97413 mov rbp,r137414 adc r11,07415 7416 sub r12,-17417 mov rcx,r87418 sbb r13,r147419 sbb r8,07420 mov r10,r97421 sbb r9,r157422 sbb r11,07423 7424 cmovc r12,rax7425 mov rax,QWORD[rsi]7426 cmovc r13,rbp7427 mov rbp,QWORD[8+rsi]7428 cmovc r8,rcx7429 mov rcx,QWORD[16+rsi]7430 cmovc r9,r107431 mov r10,QWORD[24+rsi]7432 7433 call __ecp_nistz256_subx7434 7435 lea rbx,[160+rsp]7436 lea rdi,[224+rsp]7437 call __ecp_nistz256_sub_fromx7438 7439 mov rax,QWORD[((0+0))+rsp]7440 mov rbp,QWORD[((0+8))+rsp]7441 mov rcx,QWORD[((0+16))+rsp]7442 mov r10,QWORD[((0+24))+rsp]7443 lea rdi,[64+rsp]7444 7445 call __ecp_nistz256_subx7446 7447 mov QWORD[rdi],r127448 mov QWORD[8+rdi],r137449 mov QWORD[16+rdi],r87450 mov QWORD[24+rdi],r97451 mov rdx,QWORD[352+rsp]7452 lea rbx,[352+rsp]7453 mov r9,QWORD[((0+160))+rsp]7454 mov r10,QWORD[((8+160))+rsp]7455 lea rsi,[((-128+160))+rsp]7456 mov r11,QWORD[((16+160))+rsp]7457 mov r12,QWORD[((24+160))+rsp]7458 lea rdi,[32+rsp]7459 call __ecp_nistz256_mul_montx7460 7461 mov rdx,QWORD[96+rsp]7462 lea rbx,[96+rsp]7463 mov r9,QWORD[((0+64))+rsp]7464 mov r10,QWORD[((8+64))+rsp]7465 lea rsi,[((-128+64))+rsp]7466 mov r11,QWORD[((16+64))+rsp]7467 mov r12,QWORD[((24+64))+rsp]7468 lea rdi,[64+rsp]7469 call __ecp_nistz256_mul_montx7470 7471 lea rbx,[32+rsp]7472 lea rdi,[256+rsp]7473 call __ecp_nistz256_sub_fromx7474 7475 DB 102,72,15,126,1997476 7477 movdqa xmm0,xmm57478 movdqa xmm1,xmm57479 pandn xmm0,XMMWORD[288+rsp]7480 movdqa xmm2,xmm57481 pandn xmm1,XMMWORD[((288+16))+rsp]7482 movdqa xmm3,xmm57483 pand xmm2,XMMWORD[$L$ONE_mont]7484 pand xmm3,XMMWORD[(($L$ONE_mont+16))]7485 por xmm2,xmm07486 por xmm3,xmm17487 7488 movdqa xmm0,xmm47489 movdqa xmm1,xmm47490 pandn xmm0,xmm27491 movdqa xmm2,xmm47492 pandn xmm1,xmm37493 movdqa xmm3,xmm47494 pand xmm2,XMMWORD[384+rsp]7495 pand xmm3,XMMWORD[((384+16))+rsp]7496 por xmm2,xmm07497 por xmm3,xmm17498 movdqu XMMWORD[64+rdi],xmm27499 movdqu XMMWORD[80+rdi],xmm37500 7501 movdqa xmm0,xmm57502 movdqa xmm1,xmm57503 pandn xmm0,XMMWORD[224+rsp]7504 movdqa xmm2,xmm57505 pandn xmm1,XMMWORD[((224+16))+rsp]7506 movdqa xmm3,xmm57507 pand xmm2,XMMWORD[416+rsp]7508 pand xmm3,XMMWORD[((416+16))+rsp]7509 por xmm2,xmm07510 por xmm3,xmm17511 7512 movdqa xmm0,xmm47513 movdqa xmm1,xmm47514 pandn xmm0,xmm27515 movdqa xmm2,xmm47516 pandn xmm1,xmm37517 movdqa xmm3,xmm47518 pand xmm2,XMMWORD[320+rsp]7519 pand xmm3,XMMWORD[((320+16))+rsp]7520 por xmm2,xmm07521 por xmm3,xmm17522 movdqu XMMWORD[rdi],xmm27523 movdqu XMMWORD[16+rdi],xmm37524 7525 movdqa xmm0,xmm57526 movdqa xmm1,xmm57527 pandn xmm0,XMMWORD[256+rsp]7528 movdqa xmm2,xmm57529 pandn xmm1,XMMWORD[((256+16))+rsp]7530 movdqa xmm3,xmm57531 pand xmm2,XMMWORD[448+rsp]7532 pand xmm3,XMMWORD[((448+16))+rsp]7533 por xmm2,xmm07534 por xmm3,xmm17535 7536 movdqa xmm0,xmm47537 movdqa xmm1,xmm47538 pandn xmm0,xmm27539 movdqa xmm2,xmm47540 pandn xmm1,xmm37541 movdqa xmm3,xmm47542 pand xmm2,XMMWORD[352+rsp]7543 pand xmm3,XMMWORD[((352+16))+rsp]7544 por xmm2,xmm07545 por xmm3,xmm17546 movdqu XMMWORD[32+rdi],xmm27547 movdqu XMMWORD[48+rdi],xmm37548 7549 lea rsi,[((480+56))+rsp]7550 7551 mov r15,QWORD[((-48))+rsi]7552 7553 mov r14,QWORD[((-40))+rsi]7554 7555 mov r13,QWORD[((-32))+rsi]7556 7557 mov r12,QWORD[((-24))+rsi]7558 7559 mov rbx,QWORD[((-16))+rsi]7560 7561 mov rbp,QWORD[((-8))+rsi]7562 7563 lea rsp,[rsi]7564 7565 $L$add_affinex_epilogue:7566 mov rdi,QWORD[8+rsp] ;WIN64 epilogue7567 mov rsi,QWORD[16+rsp]7568 DB 0F3h,0C3h ;repret7569 7570 $L$SEH_end_ecp_nistz256_point_add_affinex:7571 5455 EXTERN __imp_RtlVirtualUnwind 7572 5456 … … 7734 5618 DD $L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase 7735 5619 DD $L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase 7736 DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase7737 DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase7738 DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase7739 7740 DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase7741 DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase7742 DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase7743 5620 DD $L$SEH_begin_ecp_nistz256_to_mont wrt ..imagebase 7744 5621 DD $L$SEH_end_ecp_nistz256_to_mont wrt ..imagebase … … 7764 5641 DD $L$SEH_end_ecp_nistz256_gather_w7 wrt ..imagebase 7765 5642 DD $L$SEH_info_ecp_nistz256_gather_wX wrt ..imagebase 7766 DD $L$SEH_begin_ecp_nistz256_avx2_gather_w5 wrt ..imagebase7767 DD $L$SEH_end_ecp_nistz256_avx2_gather_w5 wrt ..imagebase7768 DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase7769 7770 DD $L$SEH_begin_ecp_nistz256_avx2_gather_w7 wrt ..imagebase7771 DD $L$SEH_end_ecp_nistz256_avx2_gather_w7 wrt ..imagebase7772 DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase7773 5643 DD $L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase 7774 5644 DD $L$SEH_end_ecp_nistz256_point_double wrt ..imagebase … … 7782 5652 DD $L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase 7783 5653 DD $L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase 7784 DD $L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase7785 DD $L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase7786 DD $L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase7787 7788 DD $L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase7789 DD $L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase7790 DD $L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase7791 7792 DD $L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase7793 DD $L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase7794 DD $L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase7795 5654 7796 5655 section .xdata rdata align=8 … … 7830 5689 DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase 7831 5690 DD 48,0 7832 $L$SEH_info_ecp_nistz256_ord_mul_montx:7833 DB 9,0,0,07834 DD full_handler wrt ..imagebase7835 DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase7836 DD 48,07837 $L$SEH_info_ecp_nistz256_ord_sqr_montx:7838 DB 9,0,0,07839 DD full_handler wrt ..imagebase7840 DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase7841 DD 48,07842 5691 $L$SEH_info_ecp_nistz256_to_mont: 7843 5692 DB 9,0,0,0 … … 7873 5722 DB 0x04,0x01,0x15,0x00 7874 5723 ALIGN 8 7875 $L$SEH_info_ecp_nistz256_avx2_gather_wX:7876 DB 0x01,0x36,0x17,0x0b7877 DB 0x36,0xf8,0x09,0x007878 DB 0x31,0xe8,0x08,0x007879 DB 0x2c,0xd8,0x07,0x007880 DB 0x27,0xc8,0x06,0x007881 DB 0x22,0xb8,0x05,0x007882 DB 0x1d,0xa8,0x04,0x007883 DB 0x18,0x98,0x03,0x007884 DB 0x13,0x88,0x02,0x007885 DB 0x0e,0x78,0x01,0x007886 DB 0x09,0x68,0x00,0x007887 DB 0x04,0x01,0x15,0x007888 DB 0x00,0xb3,0x00,0x007889 ALIGN 87890 5724 $L$SEH_info_ecp_nistz256_point_double: 7891 5725 DB 9,0,0,0 … … 7903 5737 DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase 7904 5738 DD 32*15+56,0 7905 ALIGN 87906 $L$SEH_info_ecp_nistz256_point_doublex:7907 DB 9,0,0,07908 DD full_handler wrt ..imagebase7909 DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase7910 DD 32*5+56,07911 $L$SEH_info_ecp_nistz256_point_addx:7912 DB 9,0,0,07913 DD full_handler wrt ..imagebase7914 DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase7915 DD 32*18+56,07916 $L$SEH_info_ecp_nistz256_point_add_affinex:7917 DB 9,0,0,07918 DD full_handler wrt ..imagebase7919 DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase7920 DD 32*15+56,0 -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/ghash-x86_64.S
r94083 r95221 1355 1355 gcm_init_avx: 1356 1356 1357 $L$SEH_begin_gcm_init_avx: 1358 1359 DB 0x48,0x83,0xec,0x18 1360 DB 0x0f,0x29,0x34,0x24 1361 vzeroupper 1362 1363 vmovdqu xmm2,XMMWORD[rdx] 1364 vpshufd xmm2,xmm2,78 1365 1366 1367 vpshufd xmm4,xmm2,255 1368 vpsrlq xmm3,xmm2,63 1369 vpsllq xmm2,xmm2,1 1370 vpxor xmm5,xmm5,xmm5 1371 vpcmpgtd xmm5,xmm5,xmm4 1372 vpslldq xmm3,xmm3,8 1373 vpor xmm2,xmm2,xmm3 1374 1375 1376 vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial] 1377 vpxor xmm2,xmm2,xmm5 1378 1379 vpunpckhqdq xmm6,xmm2,xmm2 1380 vmovdqa xmm0,xmm2 1381 vpxor xmm6,xmm6,xmm2 1382 mov r10,4 1383 jmp NEAR $L$init_start_avx 1384 ALIGN 32 1385 $L$init_loop_avx: 1386 vpalignr xmm5,xmm4,xmm3,8 1387 vmovdqu XMMWORD[(-16)+rcx],xmm5 1388 vpunpckhqdq xmm3,xmm0,xmm0 1389 vpxor xmm3,xmm3,xmm0 1390 vpclmulqdq xmm1,xmm0,xmm2,0x11 1391 vpclmulqdq xmm0,xmm0,xmm2,0x00 1392 vpclmulqdq xmm3,xmm3,xmm6,0x00 1393 vpxor xmm4,xmm1,xmm0 1394 vpxor xmm3,xmm3,xmm4 1395 1396 vpslldq xmm4,xmm3,8 1397 vpsrldq xmm3,xmm3,8 1398 vpxor xmm0,xmm0,xmm4 1399 vpxor xmm1,xmm1,xmm3 1400 vpsllq xmm3,xmm0,57 1401 vpsllq xmm4,xmm0,62 1402 vpxor xmm4,xmm4,xmm3 1403 vpsllq xmm3,xmm0,63 1404 vpxor xmm4,xmm4,xmm3 1405 vpslldq xmm3,xmm4,8 1406 vpsrldq xmm4,xmm4,8 1407 vpxor xmm0,xmm0,xmm3 1408 vpxor xmm1,xmm1,xmm4 1409 1410 vpsrlq xmm4,xmm0,1 1411 vpxor xmm1,xmm1,xmm0 1412 vpxor xmm0,xmm0,xmm4 1413 vpsrlq xmm4,xmm4,5 1414 vpxor xmm0,xmm0,xmm4 1415 vpsrlq xmm0,xmm0,1 1416 vpxor xmm0,xmm0,xmm1 1417 $L$init_start_avx: 1418 vmovdqa xmm5,xmm0 1419 vpunpckhqdq xmm3,xmm0,xmm0 1420 vpxor xmm3,xmm3,xmm0 1421 vpclmulqdq xmm1,xmm0,xmm2,0x11 1422 vpclmulqdq xmm0,xmm0,xmm2,0x00 1423 vpclmulqdq xmm3,xmm3,xmm6,0x00 1424 vpxor xmm4,xmm1,xmm0 1425 vpxor xmm3,xmm3,xmm4 1426 1427 vpslldq xmm4,xmm3,8 1428 vpsrldq xmm3,xmm3,8 1429 vpxor xmm0,xmm0,xmm4 1430 vpxor xmm1,xmm1,xmm3 1431 vpsllq xmm3,xmm0,57 1432 vpsllq xmm4,xmm0,62 1433 vpxor xmm4,xmm4,xmm3 1434 vpsllq xmm3,xmm0,63 1435 vpxor xmm4,xmm4,xmm3 1436 vpslldq xmm3,xmm4,8 1437 vpsrldq xmm4,xmm4,8 1438 vpxor xmm0,xmm0,xmm3 1439 vpxor xmm1,xmm1,xmm4 1440 1441 vpsrlq xmm4,xmm0,1 1442 vpxor xmm1,xmm1,xmm0 1443 vpxor xmm0,xmm0,xmm4 1444 vpsrlq xmm4,xmm4,5 1445 vpxor xmm0,xmm0,xmm4 1446 vpsrlq xmm0,xmm0,1 1447 vpxor xmm0,xmm0,xmm1 1448 vpshufd xmm3,xmm5,78 1449 vpshufd xmm4,xmm0,78 1450 vpxor xmm3,xmm3,xmm5 1451 vmovdqu XMMWORD[rcx],xmm5 1452 vpxor xmm4,xmm4,xmm0 1453 vmovdqu XMMWORD[16+rcx],xmm0 1454 lea rcx,[48+rcx] 1455 sub r10,1 1456 jnz NEAR $L$init_loop_avx 1457 1458 vpalignr xmm5,xmm3,xmm4,8 1459 vmovdqu XMMWORD[(-16)+rcx],xmm5 1460 1461 vzeroupper 1462 movaps xmm6,XMMWORD[rsp] 1463 lea rsp,[24+rsp] 1464 $L$SEH_end_gcm_init_avx: 1465 DB 0F3h,0C3h ;repret 1357 jmp NEAR $L$_init_clmul 1466 1358 1467 1359 … … 1481 1373 1482 1374 DB 243,15,30,250 1483 lea rax,[((-136))+rsp] 1484 $L$SEH_begin_gcm_ghash_avx: 1485 1486 DB 0x48,0x8d,0x60,0xe0 1487 DB 0x0f,0x29,0x70,0xe0 1488 DB 0x0f,0x29,0x78,0xf0 1489 DB 0x44,0x0f,0x29,0x00 1490 DB 0x44,0x0f,0x29,0x48,0x10 1491 DB 0x44,0x0f,0x29,0x50,0x20 1492 DB 0x44,0x0f,0x29,0x58,0x30 1493 DB 0x44,0x0f,0x29,0x60,0x40 1494 DB 0x44,0x0f,0x29,0x68,0x50 1495 DB 0x44,0x0f,0x29,0x70,0x60 1496 DB 0x44,0x0f,0x29,0x78,0x70 1497 vzeroupper 1498 1499 vmovdqu xmm10,XMMWORD[rcx] 1500 lea r10,[$L$0x1c2_polynomial] 1501 lea rdx,[64+rdx] 1502 vmovdqu xmm13,XMMWORD[$L$bswap_mask] 1503 vpshufb xmm10,xmm10,xmm13 1504 cmp r9,0x80 1505 jb NEAR $L$short_avx 1506 sub r9,0x80 1507 1508 vmovdqu xmm14,XMMWORD[112+r8] 1509 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1510 vpshufb xmm14,xmm14,xmm13 1511 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1512 1513 vpunpckhqdq xmm9,xmm14,xmm14 1514 vmovdqu xmm15,XMMWORD[96+r8] 1515 vpclmulqdq xmm0,xmm14,xmm6,0x00 1516 vpxor xmm9,xmm9,xmm14 1517 vpshufb xmm15,xmm15,xmm13 1518 vpclmulqdq xmm1,xmm14,xmm6,0x11 1519 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1520 vpunpckhqdq xmm8,xmm15,xmm15 1521 vmovdqu xmm14,XMMWORD[80+r8] 1522 vpclmulqdq xmm2,xmm9,xmm7,0x00 1523 vpxor xmm8,xmm8,xmm15 1524 1525 vpshufb xmm14,xmm14,xmm13 1526 vpclmulqdq xmm3,xmm15,xmm6,0x00 1527 vpunpckhqdq xmm9,xmm14,xmm14 1528 vpclmulqdq xmm4,xmm15,xmm6,0x11 1529 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1530 vpxor xmm9,xmm9,xmm14 1531 vmovdqu xmm15,XMMWORD[64+r8] 1532 vpclmulqdq xmm5,xmm8,xmm7,0x10 1533 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1534 1535 vpshufb xmm15,xmm15,xmm13 1536 vpxor xmm3,xmm3,xmm0 1537 vpclmulqdq xmm0,xmm14,xmm6,0x00 1538 vpxor xmm4,xmm4,xmm1 1539 vpunpckhqdq xmm8,xmm15,xmm15 1540 vpclmulqdq xmm1,xmm14,xmm6,0x11 1541 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1542 vpxor xmm5,xmm5,xmm2 1543 vpclmulqdq xmm2,xmm9,xmm7,0x00 1544 vpxor xmm8,xmm8,xmm15 1545 1546 vmovdqu xmm14,XMMWORD[48+r8] 1547 vpxor xmm0,xmm0,xmm3 1548 vpclmulqdq xmm3,xmm15,xmm6,0x00 1549 vpxor xmm1,xmm1,xmm4 1550 vpshufb xmm14,xmm14,xmm13 1551 vpclmulqdq xmm4,xmm15,xmm6,0x11 1552 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1553 vpxor xmm2,xmm2,xmm5 1554 vpunpckhqdq xmm9,xmm14,xmm14 1555 vpclmulqdq xmm5,xmm8,xmm7,0x10 1556 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1557 vpxor xmm9,xmm9,xmm14 1558 1559 vmovdqu xmm15,XMMWORD[32+r8] 1560 vpxor xmm3,xmm3,xmm0 1561 vpclmulqdq xmm0,xmm14,xmm6,0x00 1562 vpxor xmm4,xmm4,xmm1 1563 vpshufb xmm15,xmm15,xmm13 1564 vpclmulqdq xmm1,xmm14,xmm6,0x11 1565 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1566 vpxor xmm5,xmm5,xmm2 1567 vpunpckhqdq xmm8,xmm15,xmm15 1568 vpclmulqdq xmm2,xmm9,xmm7,0x00 1569 vpxor xmm8,xmm8,xmm15 1570 1571 vmovdqu xmm14,XMMWORD[16+r8] 1572 vpxor xmm0,xmm0,xmm3 1573 vpclmulqdq xmm3,xmm15,xmm6,0x00 1574 vpxor xmm1,xmm1,xmm4 1575 vpshufb xmm14,xmm14,xmm13 1576 vpclmulqdq xmm4,xmm15,xmm6,0x11 1577 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1578 vpxor xmm2,xmm2,xmm5 1579 vpunpckhqdq xmm9,xmm14,xmm14 1580 vpclmulqdq xmm5,xmm8,xmm7,0x10 1581 vmovdqu xmm7,XMMWORD[((176-64))+rdx] 1582 vpxor xmm9,xmm9,xmm14 1583 1584 vmovdqu xmm15,XMMWORD[r8] 1585 vpxor xmm3,xmm3,xmm0 1586 vpclmulqdq xmm0,xmm14,xmm6,0x00 1587 vpxor xmm4,xmm4,xmm1 1588 vpshufb xmm15,xmm15,xmm13 1589 vpclmulqdq xmm1,xmm14,xmm6,0x11 1590 vmovdqu xmm6,XMMWORD[((160-64))+rdx] 1591 vpxor xmm5,xmm5,xmm2 1592 vpclmulqdq xmm2,xmm9,xmm7,0x10 1593 1594 lea r8,[128+r8] 1595 cmp r9,0x80 1596 jb NEAR $L$tail_avx 1597 1598 vpxor xmm15,xmm15,xmm10 1599 sub r9,0x80 1600 jmp NEAR $L$oop8x_avx 1601 1602 ALIGN 32 1603 $L$oop8x_avx: 1604 vpunpckhqdq xmm8,xmm15,xmm15 1605 vmovdqu xmm14,XMMWORD[112+r8] 1606 vpxor xmm3,xmm3,xmm0 1607 vpxor xmm8,xmm8,xmm15 1608 vpclmulqdq xmm10,xmm15,xmm6,0x00 1609 vpshufb xmm14,xmm14,xmm13 1610 vpxor xmm4,xmm4,xmm1 1611 vpclmulqdq xmm11,xmm15,xmm6,0x11 1612 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1613 vpunpckhqdq xmm9,xmm14,xmm14 1614 vpxor xmm5,xmm5,xmm2 1615 vpclmulqdq xmm12,xmm8,xmm7,0x00 1616 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1617 vpxor xmm9,xmm9,xmm14 1618 1619 vmovdqu xmm15,XMMWORD[96+r8] 1620 vpclmulqdq xmm0,xmm14,xmm6,0x00 1621 vpxor xmm10,xmm10,xmm3 1622 vpshufb xmm15,xmm15,xmm13 1623 vpclmulqdq xmm1,xmm14,xmm6,0x11 1624 vxorps xmm11,xmm11,xmm4 1625 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1626 vpunpckhqdq xmm8,xmm15,xmm15 1627 vpclmulqdq xmm2,xmm9,xmm7,0x00 1628 vpxor xmm12,xmm12,xmm5 1629 vxorps xmm8,xmm8,xmm15 1630 1631 vmovdqu xmm14,XMMWORD[80+r8] 1632 vpxor xmm12,xmm12,xmm10 1633 vpclmulqdq xmm3,xmm15,xmm6,0x00 1634 vpxor xmm12,xmm12,xmm11 1635 vpslldq xmm9,xmm12,8 1636 vpxor xmm3,xmm3,xmm0 1637 vpclmulqdq xmm4,xmm15,xmm6,0x11 1638 vpsrldq xmm12,xmm12,8 1639 vpxor xmm10,xmm10,xmm9 1640 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1641 vpshufb xmm14,xmm14,xmm13 1642 vxorps xmm11,xmm11,xmm12 1643 vpxor xmm4,xmm4,xmm1 1644 vpunpckhqdq xmm9,xmm14,xmm14 1645 vpclmulqdq xmm5,xmm8,xmm7,0x10 1646 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1647 vpxor xmm9,xmm9,xmm14 1648 vpxor xmm5,xmm5,xmm2 1649 1650 vmovdqu xmm15,XMMWORD[64+r8] 1651 vpalignr xmm12,xmm10,xmm10,8 1652 vpclmulqdq xmm0,xmm14,xmm6,0x00 1653 vpshufb xmm15,xmm15,xmm13 1654 vpxor xmm0,xmm0,xmm3 1655 vpclmulqdq xmm1,xmm14,xmm6,0x11 1656 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1657 vpunpckhqdq xmm8,xmm15,xmm15 1658 vpxor xmm1,xmm1,xmm4 1659 vpclmulqdq xmm2,xmm9,xmm7,0x00 1660 vxorps xmm8,xmm8,xmm15 1661 vpxor xmm2,xmm2,xmm5 1662 1663 vmovdqu xmm14,XMMWORD[48+r8] 1664 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 1665 vpclmulqdq xmm3,xmm15,xmm6,0x00 1666 vpshufb xmm14,xmm14,xmm13 1667 vpxor xmm3,xmm3,xmm0 1668 vpclmulqdq xmm4,xmm15,xmm6,0x11 1669 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1670 vpunpckhqdq xmm9,xmm14,xmm14 1671 vpxor xmm4,xmm4,xmm1 1672 vpclmulqdq xmm5,xmm8,xmm7,0x10 1673 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1674 vpxor xmm9,xmm9,xmm14 1675 vpxor xmm5,xmm5,xmm2 1676 1677 vmovdqu xmm15,XMMWORD[32+r8] 1678 vpclmulqdq xmm0,xmm14,xmm6,0x00 1679 vpshufb xmm15,xmm15,xmm13 1680 vpxor xmm0,xmm0,xmm3 1681 vpclmulqdq xmm1,xmm14,xmm6,0x11 1682 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1683 vpunpckhqdq xmm8,xmm15,xmm15 1684 vpxor xmm1,xmm1,xmm4 1685 vpclmulqdq xmm2,xmm9,xmm7,0x00 1686 vpxor xmm8,xmm8,xmm15 1687 vpxor xmm2,xmm2,xmm5 1688 vxorps xmm10,xmm10,xmm12 1689 1690 vmovdqu xmm14,XMMWORD[16+r8] 1691 vpalignr xmm12,xmm10,xmm10,8 1692 vpclmulqdq xmm3,xmm15,xmm6,0x00 1693 vpshufb xmm14,xmm14,xmm13 1694 vpxor xmm3,xmm3,xmm0 1695 vpclmulqdq xmm4,xmm15,xmm6,0x11 1696 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1697 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 1698 vxorps xmm12,xmm12,xmm11 1699 vpunpckhqdq xmm9,xmm14,xmm14 1700 vpxor xmm4,xmm4,xmm1 1701 vpclmulqdq xmm5,xmm8,xmm7,0x10 1702 vmovdqu xmm7,XMMWORD[((176-64))+rdx] 1703 vpxor xmm9,xmm9,xmm14 1704 vpxor xmm5,xmm5,xmm2 1705 1706 vmovdqu xmm15,XMMWORD[r8] 1707 vpclmulqdq xmm0,xmm14,xmm6,0x00 1708 vpshufb xmm15,xmm15,xmm13 1709 vpclmulqdq xmm1,xmm14,xmm6,0x11 1710 vmovdqu xmm6,XMMWORD[((160-64))+rdx] 1711 vpxor xmm15,xmm15,xmm12 1712 vpclmulqdq xmm2,xmm9,xmm7,0x10 1713 vpxor xmm15,xmm15,xmm10 1714 1715 lea r8,[128+r8] 1716 sub r9,0x80 1717 jnc NEAR $L$oop8x_avx 1718 1719 add r9,0x80 1720 jmp NEAR $L$tail_no_xor_avx 1721 1722 ALIGN 32 1723 $L$short_avx: 1724 vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8] 1725 lea r8,[r9*1+r8] 1726 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1727 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1728 vpshufb xmm15,xmm14,xmm13 1729 1730 vmovdqa xmm3,xmm0 1731 vmovdqa xmm4,xmm1 1732 vmovdqa xmm5,xmm2 1733 sub r9,0x10 1734 jz NEAR $L$tail_avx 1735 1736 vpunpckhqdq xmm8,xmm15,xmm15 1737 vpxor xmm3,xmm3,xmm0 1738 vpclmulqdq xmm0,xmm15,xmm6,0x00 1739 vpxor xmm8,xmm8,xmm15 1740 vmovdqu xmm14,XMMWORD[((-32))+r8] 1741 vpxor xmm4,xmm4,xmm1 1742 vpclmulqdq xmm1,xmm15,xmm6,0x11 1743 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1744 vpshufb xmm15,xmm14,xmm13 1745 vpxor xmm5,xmm5,xmm2 1746 vpclmulqdq xmm2,xmm8,xmm7,0x00 1747 vpsrldq xmm7,xmm7,8 1748 sub r9,0x10 1749 jz NEAR $L$tail_avx 1750 1751 vpunpckhqdq xmm8,xmm15,xmm15 1752 vpxor xmm3,xmm3,xmm0 1753 vpclmulqdq xmm0,xmm15,xmm6,0x00 1754 vpxor xmm8,xmm8,xmm15 1755 vmovdqu xmm14,XMMWORD[((-48))+r8] 1756 vpxor xmm4,xmm4,xmm1 1757 vpclmulqdq xmm1,xmm15,xmm6,0x11 1758 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1759 vpshufb xmm15,xmm14,xmm13 1760 vpxor xmm5,xmm5,xmm2 1761 vpclmulqdq xmm2,xmm8,xmm7,0x00 1762 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1763 sub r9,0x10 1764 jz NEAR $L$tail_avx 1765 1766 vpunpckhqdq xmm8,xmm15,xmm15 1767 vpxor xmm3,xmm3,xmm0 1768 vpclmulqdq xmm0,xmm15,xmm6,0x00 1769 vpxor xmm8,xmm8,xmm15 1770 vmovdqu xmm14,XMMWORD[((-64))+r8] 1771 vpxor xmm4,xmm4,xmm1 1772 vpclmulqdq xmm1,xmm15,xmm6,0x11 1773 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1774 vpshufb xmm15,xmm14,xmm13 1775 vpxor xmm5,xmm5,xmm2 1776 vpclmulqdq xmm2,xmm8,xmm7,0x00 1777 vpsrldq xmm7,xmm7,8 1778 sub r9,0x10 1779 jz NEAR $L$tail_avx 1780 1781 vpunpckhqdq xmm8,xmm15,xmm15 1782 vpxor xmm3,xmm3,xmm0 1783 vpclmulqdq xmm0,xmm15,xmm6,0x00 1784 vpxor xmm8,xmm8,xmm15 1785 vmovdqu xmm14,XMMWORD[((-80))+r8] 1786 vpxor xmm4,xmm4,xmm1 1787 vpclmulqdq xmm1,xmm15,xmm6,0x11 1788 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1789 vpshufb xmm15,xmm14,xmm13 1790 vpxor xmm5,xmm5,xmm2 1791 vpclmulqdq xmm2,xmm8,xmm7,0x00 1792 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1793 sub r9,0x10 1794 jz NEAR $L$tail_avx 1795 1796 vpunpckhqdq xmm8,xmm15,xmm15 1797 vpxor xmm3,xmm3,xmm0 1798 vpclmulqdq xmm0,xmm15,xmm6,0x00 1799 vpxor xmm8,xmm8,xmm15 1800 vmovdqu xmm14,XMMWORD[((-96))+r8] 1801 vpxor xmm4,xmm4,xmm1 1802 vpclmulqdq xmm1,xmm15,xmm6,0x11 1803 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1804 vpshufb xmm15,xmm14,xmm13 1805 vpxor xmm5,xmm5,xmm2 1806 vpclmulqdq xmm2,xmm8,xmm7,0x00 1807 vpsrldq xmm7,xmm7,8 1808 sub r9,0x10 1809 jz NEAR $L$tail_avx 1810 1811 vpunpckhqdq xmm8,xmm15,xmm15 1812 vpxor xmm3,xmm3,xmm0 1813 vpclmulqdq xmm0,xmm15,xmm6,0x00 1814 vpxor xmm8,xmm8,xmm15 1815 vmovdqu xmm14,XMMWORD[((-112))+r8] 1816 vpxor xmm4,xmm4,xmm1 1817 vpclmulqdq xmm1,xmm15,xmm6,0x11 1818 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1819 vpshufb xmm15,xmm14,xmm13 1820 vpxor xmm5,xmm5,xmm2 1821 vpclmulqdq xmm2,xmm8,xmm7,0x00 1822 vmovq xmm7,QWORD[((184-64))+rdx] 1823 sub r9,0x10 1824 jmp NEAR $L$tail_avx 1825 1826 ALIGN 32 1827 $L$tail_avx: 1828 vpxor xmm15,xmm15,xmm10 1829 $L$tail_no_xor_avx: 1830 vpunpckhqdq xmm8,xmm15,xmm15 1831 vpxor xmm3,xmm3,xmm0 1832 vpclmulqdq xmm0,xmm15,xmm6,0x00 1833 vpxor xmm8,xmm8,xmm15 1834 vpxor xmm4,xmm4,xmm1 1835 vpclmulqdq xmm1,xmm15,xmm6,0x11 1836 vpxor xmm5,xmm5,xmm2 1837 vpclmulqdq xmm2,xmm8,xmm7,0x00 1838 1839 vmovdqu xmm12,XMMWORD[r10] 1840 1841 vpxor xmm10,xmm3,xmm0 1842 vpxor xmm11,xmm4,xmm1 1843 vpxor xmm5,xmm5,xmm2 1844 1845 vpxor xmm5,xmm5,xmm10 1846 vpxor xmm5,xmm5,xmm11 1847 vpslldq xmm9,xmm5,8 1848 vpsrldq xmm5,xmm5,8 1849 vpxor xmm10,xmm10,xmm9 1850 vpxor xmm11,xmm11,xmm5 1851 1852 vpclmulqdq xmm9,xmm10,xmm12,0x10 1853 vpalignr xmm10,xmm10,xmm10,8 1854 vpxor xmm10,xmm10,xmm9 1855 1856 vpclmulqdq xmm9,xmm10,xmm12,0x10 1857 vpalignr xmm10,xmm10,xmm10,8 1858 vpxor xmm10,xmm10,xmm11 1859 vpxor xmm10,xmm10,xmm9 1860 1861 cmp r9,0 1862 jne NEAR $L$short_avx 1863 1864 vpshufb xmm10,xmm10,xmm13 1865 vmovdqu XMMWORD[rcx],xmm10 1866 vzeroupper 1867 movaps xmm6,XMMWORD[rsp] 1868 movaps xmm7,XMMWORD[16+rsp] 1869 movaps xmm8,XMMWORD[32+rsp] 1870 movaps xmm9,XMMWORD[48+rsp] 1871 movaps xmm10,XMMWORD[64+rsp] 1872 movaps xmm11,XMMWORD[80+rsp] 1873 movaps xmm12,XMMWORD[96+rsp] 1874 movaps xmm13,XMMWORD[112+rsp] 1875 movaps xmm14,XMMWORD[128+rsp] 1876 movaps xmm15,XMMWORD[144+rsp] 1877 lea rsp,[168+rsp] 1878 $L$SEH_end_gcm_ghash_avx: 1879 DB 0F3h,0C3h ;repret 1375 jmp NEAR $L$_ghash_clmul 1880 1376 1881 1377 … … 2041 1537 DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase 2042 1538 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase 2043 DD $L$SEH_begin_gcm_init_avx wrt ..imagebase2044 DD $L$SEH_end_gcm_init_avx wrt ..imagebase2045 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase2046 2047 DD $L$SEH_begin_gcm_ghash_avx wrt ..imagebase2048 DD $L$SEH_end_gcm_ghash_avx wrt ..imagebase2049 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase2050 1539 section .xdata rdata align=8 2051 1540 ALIGN 8 -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/poly1305-x86_64.S
r95219 r95221 38 38 lea r10,[poly1305_blocks] 39 39 lea r11,[poly1305_emit] 40 mov r9,QWORD[((OPENSSL_ia32cap_P+4))]41 lea rax,[poly1305_blocks_avx]42 lea rcx,[poly1305_emit_avx]43 bt r9,2844 cmovc r10,rax45 cmovc r11,rcx46 lea rax,[poly1305_blocks_avx2]47 bt r9,3748 cmovc r10,rax49 mov rax,214964633650 shr r9,3251 and r9,rax52 cmp r9,rax53 je NEAR $L$init_base2_4454 40 mov rax,0x0ffffffc0fffffff 55 41 mov rcx,0x0ffffffc0ffffffc … … 226 212 227 213 $L$SEH_end_poly1305_emit: 228 229 ALIGN 32230 __poly1305_block:231 232 mul r14233 mov r9,rax234 mov rax,r11235 mov r10,rdx236 237 mul r14238 mov r14,rax239 mov rax,r11240 mov r8,rdx241 242 mul rbx243 add r9,rax244 mov rax,r13245 adc r10,rdx246 247 mul rbx248 mov rbx,rbp249 add r14,rax250 adc r8,rdx251 252 imul rbx,r13253 add r9,rbx254 mov rbx,r8255 adc r10,0256 257 imul rbp,r11258 add rbx,r9259 mov rax,-4260 adc r10,rbp261 262 and rax,r10263 mov rbp,r10264 shr r10,2265 and rbp,3266 add rax,r10267 add r14,rax268 adc rbx,0269 adc rbp,0270 DB 0F3h,0C3h ;repret271 272 273 274 275 ALIGN 32276 __poly1305_init_avx:277 278 mov r14,r11279 mov rbx,r12280 xor rbp,rbp281 282 lea rdi,[((48+64))+rdi]283 284 mov rax,r12285 call __poly1305_block286 287 mov eax,0x3ffffff288 mov edx,0x3ffffff289 mov r8,r14290 and eax,r14d291 mov r9,r11292 and edx,r11d293 mov DWORD[((-64))+rdi],eax294 shr r8,26295 mov DWORD[((-60))+rdi],edx296 shr r9,26297 298 mov eax,0x3ffffff299 mov edx,0x3ffffff300 and eax,r8d301 and edx,r9d302 mov DWORD[((-48))+rdi],eax303 lea eax,[rax*4+rax]304 mov DWORD[((-44))+rdi],edx305 lea edx,[rdx*4+rdx]306 mov DWORD[((-32))+rdi],eax307 shr r8,26308 mov DWORD[((-28))+rdi],edx309 shr r9,26310 311 mov rax,rbx312 mov rdx,r12313 shl rax,12314 shl rdx,12315 or rax,r8316 or rdx,r9317 and eax,0x3ffffff318 and edx,0x3ffffff319 mov DWORD[((-16))+rdi],eax320 lea eax,[rax*4+rax]321 mov DWORD[((-12))+rdi],edx322 lea edx,[rdx*4+rdx]323 mov DWORD[rdi],eax324 mov r8,rbx325 mov DWORD[4+rdi],edx326 mov r9,r12327 328 mov eax,0x3ffffff329 mov edx,0x3ffffff330 shr r8,14331 shr r9,14332 and eax,r8d333 and edx,r9d334 mov DWORD[16+rdi],eax335 lea eax,[rax*4+rax]336 mov DWORD[20+rdi],edx337 lea edx,[rdx*4+rdx]338 mov DWORD[32+rdi],eax339 shr r8,26340 mov DWORD[36+rdi],edx341 shr r9,26342 343 mov rax,rbp344 shl rax,24345 or r8,rax346 mov DWORD[48+rdi],r8d347 lea r8,[r8*4+r8]348 mov DWORD[52+rdi],r9d349 lea r9,[r9*4+r9]350 mov DWORD[64+rdi],r8d351 mov DWORD[68+rdi],r9d352 353 mov rax,r12354 call __poly1305_block355 356 mov eax,0x3ffffff357 mov r8,r14358 and eax,r14d359 shr r8,26360 mov DWORD[((-52))+rdi],eax361 362 mov edx,0x3ffffff363 and edx,r8d364 mov DWORD[((-36))+rdi],edx365 lea edx,[rdx*4+rdx]366 shr r8,26367 mov DWORD[((-20))+rdi],edx368 369 mov rax,rbx370 shl rax,12371 or rax,r8372 and eax,0x3ffffff373 mov DWORD[((-4))+rdi],eax374 lea eax,[rax*4+rax]375 mov r8,rbx376 mov DWORD[12+rdi],eax377 378 mov edx,0x3ffffff379 shr r8,14380 and edx,r8d381 mov DWORD[28+rdi],edx382 lea edx,[rdx*4+rdx]383 shr r8,26384 mov DWORD[44+rdi],edx385 386 mov rax,rbp387 shl rax,24388 or r8,rax389 mov DWORD[60+rdi],r8d390 lea r8,[r8*4+r8]391 mov DWORD[76+rdi],r8d392 393 mov rax,r12394 call __poly1305_block395 396 mov eax,0x3ffffff397 mov r8,r14398 and eax,r14d399 shr r8,26400 mov DWORD[((-56))+rdi],eax401 402 mov edx,0x3ffffff403 and edx,r8d404 mov DWORD[((-40))+rdi],edx405 lea edx,[rdx*4+rdx]406 shr r8,26407 mov DWORD[((-24))+rdi],edx408 409 mov rax,rbx410 shl rax,12411 or rax,r8412 and eax,0x3ffffff413 mov DWORD[((-8))+rdi],eax414 lea eax,[rax*4+rax]415 mov r8,rbx416 mov DWORD[8+rdi],eax417 418 mov edx,0x3ffffff419 shr r8,14420 and edx,r8d421 mov DWORD[24+rdi],edx422 lea edx,[rdx*4+rdx]423 shr r8,26424 mov DWORD[40+rdi],edx425 426 mov rax,rbp427 shl rax,24428 or r8,rax429 mov DWORD[56+rdi],r8d430 lea r8,[r8*4+r8]431 mov DWORD[72+rdi],r8d432 433 lea rdi,[((-48-64))+rdi]434 DB 0F3h,0C3h ;repret435 436 437 438 439 ALIGN 32440 poly1305_blocks_avx:441 mov QWORD[8+rsp],rdi ;WIN64 prologue442 mov QWORD[16+rsp],rsi443 mov rax,rsp444 $L$SEH_begin_poly1305_blocks_avx:445 mov rdi,rcx446 mov rsi,rdx447 mov rdx,r8448 mov rcx,r9449 450 451 452 mov r8d,DWORD[20+rdi]453 cmp rdx,128454 jae NEAR $L$blocks_avx455 test r8d,r8d456 jz NEAR $L$blocks457 458 $L$blocks_avx:459 and rdx,-16460 jz NEAR $L$no_data_avx461 462 vzeroupper463 464 test r8d,r8d465 jz NEAR $L$base2_64_avx466 467 test rdx,31468 jz NEAR $L$even_avx469 470 push rbx471 472 push rbp473 474 push r12475 476 push r13477 478 push r14479 480 push r15481 482 $L$blocks_avx_body:483 484 mov r15,rdx485 486 mov r8,QWORD[rdi]487 mov r9,QWORD[8+rdi]488 mov ebp,DWORD[16+rdi]489 490 mov r11,QWORD[24+rdi]491 mov r13,QWORD[32+rdi]492 493 494 mov r14d,r8d495 and r8,-2147483648496 mov r12,r9497 mov ebx,r9d498 and r9,-2147483648499 500 shr r8,6501 shl r12,52502 add r14,r8503 shr rbx,12504 shr r9,18505 add r14,r12506 adc rbx,r9507 508 mov r8,rbp509 shl r8,40510 shr rbp,24511 add rbx,r8512 adc rbp,0513 514 mov r9,-4515 mov r8,rbp516 and r9,rbp517 shr r8,2518 and rbp,3519 add r8,r9520 add r14,r8521 adc rbx,0522 adc rbp,0523 524 mov r12,r13525 mov rax,r13526 shr r13,2527 add r13,r12528 529 add r14,QWORD[rsi]530 adc rbx,QWORD[8+rsi]531 lea rsi,[16+rsi]532 adc rbp,rcx533 534 call __poly1305_block535 536 test rcx,rcx537 jz NEAR $L$store_base2_64_avx538 539 540 mov rax,r14541 mov rdx,r14542 shr r14,52543 mov r11,rbx544 mov r12,rbx545 shr rdx,26546 and rax,0x3ffffff547 shl r11,12548 and rdx,0x3ffffff549 shr rbx,14550 or r14,r11551 shl rbp,24552 and r14,0x3ffffff553 shr r12,40554 and rbx,0x3ffffff555 or rbp,r12556 557 sub r15,16558 jz NEAR $L$store_base2_26_avx559 560 vmovd xmm0,eax561 vmovd xmm1,edx562 vmovd xmm2,r14d563 vmovd xmm3,ebx564 vmovd xmm4,ebp565 jmp NEAR $L$proceed_avx566 567 ALIGN 32568 $L$store_base2_64_avx:569 mov QWORD[rdi],r14570 mov QWORD[8+rdi],rbx571 mov QWORD[16+rdi],rbp572 jmp NEAR $L$done_avx573 574 ALIGN 16575 $L$store_base2_26_avx:576 mov DWORD[rdi],eax577 mov DWORD[4+rdi],edx578 mov DWORD[8+rdi],r14d579 mov DWORD[12+rdi],ebx580 mov DWORD[16+rdi],ebp581 ALIGN 16582 $L$done_avx:583 mov r15,QWORD[rsp]584 585 mov r14,QWORD[8+rsp]586 587 mov r13,QWORD[16+rsp]588 589 mov r12,QWORD[24+rsp]590 591 mov rbp,QWORD[32+rsp]592 593 mov rbx,QWORD[40+rsp]594 595 lea rsp,[48+rsp]596 597 $L$no_data_avx:598 $L$blocks_avx_epilogue:599 mov rdi,QWORD[8+rsp] ;WIN64 epilogue600 mov rsi,QWORD[16+rsp]601 DB 0F3h,0C3h ;repret602 603 604 ALIGN 32605 $L$base2_64_avx:606 607 push rbx608 609 push rbp610 611 push r12612 613 push r13614 615 push r14616 617 push r15618 619 $L$base2_64_avx_body:620 621 mov r15,rdx622 623 mov r11,QWORD[24+rdi]624 mov r13,QWORD[32+rdi]625 626 mov r14,QWORD[rdi]627 mov rbx,QWORD[8+rdi]628 mov ebp,DWORD[16+rdi]629 630 mov r12,r13631 mov rax,r13632 shr r13,2633 add r13,r12634 635 test rdx,31636 jz NEAR $L$init_avx637 638 add r14,QWORD[rsi]639 adc rbx,QWORD[8+rsi]640 lea rsi,[16+rsi]641 adc rbp,rcx642 sub r15,16643 644 call __poly1305_block645 646 $L$init_avx:647 648 mov rax,r14649 mov rdx,r14650 shr r14,52651 mov r8,rbx652 mov r9,rbx653 shr rdx,26654 and rax,0x3ffffff655 shl r8,12656 and rdx,0x3ffffff657 shr rbx,14658 or r14,r8659 shl rbp,24660 and r14,0x3ffffff661 shr r9,40662 and rbx,0x3ffffff663 or rbp,r9664 665 vmovd xmm0,eax666 vmovd xmm1,edx667 vmovd xmm2,r14d668 vmovd xmm3,ebx669 vmovd xmm4,ebp670 mov DWORD[20+rdi],1671 672 call __poly1305_init_avx673 674 $L$proceed_avx:675 mov rdx,r15676 677 mov r15,QWORD[rsp]678 679 mov r14,QWORD[8+rsp]680 681 mov r13,QWORD[16+rsp]682 683 mov r12,QWORD[24+rsp]684 685 mov rbp,QWORD[32+rsp]686 687 mov rbx,QWORD[40+rsp]688 689 lea rax,[48+rsp]690 lea rsp,[48+rsp]691 692 $L$base2_64_avx_epilogue:693 jmp NEAR $L$do_avx694 695 696 ALIGN 32697 $L$even_avx:698 699 vmovd xmm0,DWORD[rdi]700 vmovd xmm1,DWORD[4+rdi]701 vmovd xmm2,DWORD[8+rdi]702 vmovd xmm3,DWORD[12+rdi]703 vmovd xmm4,DWORD[16+rdi]704 705 $L$do_avx:706 lea r11,[((-248))+rsp]707 sub rsp,0x218708 vmovdqa XMMWORD[80+r11],xmm6709 vmovdqa XMMWORD[96+r11],xmm7710 vmovdqa XMMWORD[112+r11],xmm8711 vmovdqa XMMWORD[128+r11],xmm9712 vmovdqa XMMWORD[144+r11],xmm10713 vmovdqa XMMWORD[160+r11],xmm11714 vmovdqa XMMWORD[176+r11],xmm12715 vmovdqa XMMWORD[192+r11],xmm13716 vmovdqa XMMWORD[208+r11],xmm14717 vmovdqa XMMWORD[224+r11],xmm15718 $L$do_avx_body:719 sub rdx,64720 lea rax,[((-32))+rsi]721 cmovc rsi,rax722 723 vmovdqu xmm14,XMMWORD[48+rdi]724 lea rdi,[112+rdi]725 lea rcx,[$L$const]726 727 728 729 vmovdqu xmm5,XMMWORD[32+rsi]730 vmovdqu xmm6,XMMWORD[48+rsi]731 vmovdqa xmm15,XMMWORD[64+rcx]732 733 vpsrldq xmm7,xmm5,6734 vpsrldq xmm8,xmm6,6735 vpunpckhqdq xmm9,xmm5,xmm6736 vpunpcklqdq xmm5,xmm5,xmm6737 vpunpcklqdq xmm8,xmm7,xmm8738 739 vpsrlq xmm9,xmm9,40740 vpsrlq xmm6,xmm5,26741 vpand xmm5,xmm5,xmm15742 vpsrlq xmm7,xmm8,4743 vpand xmm6,xmm6,xmm15744 vpsrlq xmm8,xmm8,30745 vpand xmm7,xmm7,xmm15746 vpand xmm8,xmm8,xmm15747 vpor xmm9,xmm9,XMMWORD[32+rcx]748 749 jbe NEAR $L$skip_loop_avx750 751 752 vmovdqu xmm11,XMMWORD[((-48))+rdi]753 vmovdqu xmm12,XMMWORD[((-32))+rdi]754 vpshufd xmm13,xmm14,0xEE755 vpshufd xmm10,xmm14,0x44756 vmovdqa XMMWORD[(-144)+r11],xmm13757 vmovdqa XMMWORD[rsp],xmm10758 vpshufd xmm14,xmm11,0xEE759 vmovdqu xmm10,XMMWORD[((-16))+rdi]760 vpshufd xmm11,xmm11,0x44761 vmovdqa XMMWORD[(-128)+r11],xmm14762 vmovdqa XMMWORD[16+rsp],xmm11763 vpshufd xmm13,xmm12,0xEE764 vmovdqu xmm11,XMMWORD[rdi]765 vpshufd xmm12,xmm12,0x44766 vmovdqa XMMWORD[(-112)+r11],xmm13767 vmovdqa XMMWORD[32+rsp],xmm12768 vpshufd xmm14,xmm10,0xEE769 vmovdqu xmm12,XMMWORD[16+rdi]770 vpshufd xmm10,xmm10,0x44771 vmovdqa XMMWORD[(-96)+r11],xmm14772 vmovdqa XMMWORD[48+rsp],xmm10773 vpshufd xmm13,xmm11,0xEE774 vmovdqu xmm10,XMMWORD[32+rdi]775 vpshufd xmm11,xmm11,0x44776 vmovdqa XMMWORD[(-80)+r11],xmm13777 vmovdqa XMMWORD[64+rsp],xmm11778 vpshufd xmm14,xmm12,0xEE779 vmovdqu xmm11,XMMWORD[48+rdi]780 vpshufd xmm12,xmm12,0x44781 vmovdqa XMMWORD[(-64)+r11],xmm14782 vmovdqa XMMWORD[80+rsp],xmm12783 vpshufd xmm13,xmm10,0xEE784 vmovdqu xmm12,XMMWORD[64+rdi]785 vpshufd xmm10,xmm10,0x44786 vmovdqa XMMWORD[(-48)+r11],xmm13787 vmovdqa XMMWORD[96+rsp],xmm10788 vpshufd xmm14,xmm11,0xEE789 vpshufd xmm11,xmm11,0x44790 vmovdqa XMMWORD[(-32)+r11],xmm14791 vmovdqa XMMWORD[112+rsp],xmm11792 vpshufd xmm13,xmm12,0xEE793 vmovdqa xmm14,XMMWORD[rsp]794 vpshufd xmm12,xmm12,0x44795 vmovdqa XMMWORD[(-16)+r11],xmm13796 vmovdqa XMMWORD[128+rsp],xmm12797 798 jmp NEAR $L$oop_avx799 800 ALIGN 32801 $L$oop_avx:802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 vpmuludq xmm10,xmm14,xmm5823 vpmuludq xmm11,xmm14,xmm6824 vmovdqa XMMWORD[32+r11],xmm2825 vpmuludq xmm12,xmm14,xmm7826 vmovdqa xmm2,XMMWORD[16+rsp]827 vpmuludq xmm13,xmm14,xmm8828 vpmuludq xmm14,xmm14,xmm9829 830 vmovdqa XMMWORD[r11],xmm0831 vpmuludq xmm0,xmm9,XMMWORD[32+rsp]832 vmovdqa XMMWORD[16+r11],xmm1833 vpmuludq xmm1,xmm2,xmm8834 vpaddq xmm10,xmm10,xmm0835 vpaddq xmm14,xmm14,xmm1836 vmovdqa XMMWORD[48+r11],xmm3837 vpmuludq xmm0,xmm2,xmm7838 vpmuludq xmm1,xmm2,xmm6839 vpaddq xmm13,xmm13,xmm0840 vmovdqa xmm3,XMMWORD[48+rsp]841 vpaddq xmm12,xmm12,xmm1842 vmovdqa XMMWORD[64+r11],xmm4843 vpmuludq xmm2,xmm2,xmm5844 vpmuludq xmm0,xmm3,xmm7845 vpaddq xmm11,xmm11,xmm2846 847 vmovdqa xmm4,XMMWORD[64+rsp]848 vpaddq xmm14,xmm14,xmm0849 vpmuludq xmm1,xmm3,xmm6850 vpmuludq xmm3,xmm3,xmm5851 vpaddq xmm13,xmm13,xmm1852 vmovdqa xmm2,XMMWORD[80+rsp]853 vpaddq xmm12,xmm12,xmm3854 vpmuludq xmm0,xmm4,xmm9855 vpmuludq xmm4,xmm4,xmm8856 vpaddq xmm11,xmm11,xmm0857 vmovdqa xmm3,XMMWORD[96+rsp]858 vpaddq xmm10,xmm10,xmm4859 860 vmovdqa xmm4,XMMWORD[128+rsp]861 vpmuludq xmm1,xmm2,xmm6862 vpmuludq xmm2,xmm2,xmm5863 vpaddq xmm14,xmm14,xmm1864 vpaddq xmm13,xmm13,xmm2865 vpmuludq xmm0,xmm3,xmm9866 vpmuludq xmm1,xmm3,xmm8867 vpaddq xmm12,xmm12,xmm0868 vmovdqu xmm0,XMMWORD[rsi]869 vpaddq xmm11,xmm11,xmm1870 vpmuludq xmm3,xmm3,xmm7871 vpmuludq xmm7,xmm4,xmm7872 vpaddq xmm10,xmm10,xmm3873 874 vmovdqu xmm1,XMMWORD[16+rsi]875 vpaddq xmm11,xmm11,xmm7876 vpmuludq xmm8,xmm4,xmm8877 vpmuludq xmm9,xmm4,xmm9878 vpsrldq xmm2,xmm0,6879 vpaddq xmm12,xmm12,xmm8880 vpaddq xmm13,xmm13,xmm9881 vpsrldq xmm3,xmm1,6882 vpmuludq xmm9,xmm5,XMMWORD[112+rsp]883 vpmuludq xmm5,xmm4,xmm6884 vpunpckhqdq xmm4,xmm0,xmm1885 vpaddq xmm14,xmm14,xmm9886 vmovdqa xmm9,XMMWORD[((-144))+r11]887 vpaddq xmm10,xmm10,xmm5888 889 vpunpcklqdq xmm0,xmm0,xmm1890 vpunpcklqdq xmm3,xmm2,xmm3891 892 893 vpsrldq xmm4,xmm4,5894 vpsrlq xmm1,xmm0,26895 vpand xmm0,xmm0,xmm15896 vpsrlq xmm2,xmm3,4897 vpand xmm1,xmm1,xmm15898 vpand xmm4,xmm4,XMMWORD[rcx]899 vpsrlq xmm3,xmm3,30900 vpand xmm2,xmm2,xmm15901 vpand xmm3,xmm3,xmm15902 vpor xmm4,xmm4,XMMWORD[32+rcx]903 904 vpaddq xmm0,xmm0,XMMWORD[r11]905 vpaddq xmm1,xmm1,XMMWORD[16+r11]906 vpaddq xmm2,xmm2,XMMWORD[32+r11]907 vpaddq xmm3,xmm3,XMMWORD[48+r11]908 vpaddq xmm4,xmm4,XMMWORD[64+r11]909 910 lea rax,[32+rsi]911 lea rsi,[64+rsi]912 sub rdx,64913 cmovc rsi,rax914 915 916 917 918 919 920 921 922 923 924 vpmuludq xmm5,xmm9,xmm0925 vpmuludq xmm6,xmm9,xmm1926 vpaddq xmm10,xmm10,xmm5927 vpaddq xmm11,xmm11,xmm6928 vmovdqa xmm7,XMMWORD[((-128))+r11]929 vpmuludq xmm5,xmm9,xmm2930 vpmuludq xmm6,xmm9,xmm3931 vpaddq xmm12,xmm12,xmm5932 vpaddq xmm13,xmm13,xmm6933 vpmuludq xmm9,xmm9,xmm4934 vpmuludq xmm5,xmm4,XMMWORD[((-112))+r11]935 vpaddq xmm14,xmm14,xmm9936 937 vpaddq xmm10,xmm10,xmm5938 vpmuludq xmm6,xmm7,xmm2939 vpmuludq xmm5,xmm7,xmm3940 vpaddq xmm13,xmm13,xmm6941 vmovdqa xmm8,XMMWORD[((-96))+r11]942 vpaddq xmm14,xmm14,xmm5943 vpmuludq xmm6,xmm7,xmm1944 vpmuludq xmm7,xmm7,xmm0945 vpaddq xmm12,xmm12,xmm6946 vpaddq xmm11,xmm11,xmm7947 948 vmovdqa xmm9,XMMWORD[((-80))+r11]949 vpmuludq xmm5,xmm8,xmm2950 vpmuludq xmm6,xmm8,xmm1951 vpaddq xmm14,xmm14,xmm5952 vpaddq xmm13,xmm13,xmm6953 vmovdqa xmm7,XMMWORD[((-64))+r11]954 vpmuludq xmm8,xmm8,xmm0955 vpmuludq xmm5,xmm9,xmm4956 vpaddq xmm12,xmm12,xmm8957 vpaddq xmm11,xmm11,xmm5958 vmovdqa xmm8,XMMWORD[((-48))+r11]959 vpmuludq xmm9,xmm9,xmm3960 vpmuludq xmm6,xmm7,xmm1961 vpaddq xmm10,xmm10,xmm9962 963 vmovdqa xmm9,XMMWORD[((-16))+r11]964 vpaddq xmm14,xmm14,xmm6965 vpmuludq xmm7,xmm7,xmm0966 vpmuludq xmm5,xmm8,xmm4967 vpaddq xmm13,xmm13,xmm7968 vpaddq xmm12,xmm12,xmm5969 vmovdqu xmm5,XMMWORD[32+rsi]970 vpmuludq xmm7,xmm8,xmm3971 vpmuludq xmm8,xmm8,xmm2972 vpaddq xmm11,xmm11,xmm7973 vmovdqu xmm6,XMMWORD[48+rsi]974 vpaddq xmm10,xmm10,xmm8975 976 vpmuludq xmm2,xmm9,xmm2977 vpmuludq xmm3,xmm9,xmm3978 vpsrldq xmm7,xmm5,6979 vpaddq xmm11,xmm11,xmm2980 vpmuludq xmm4,xmm9,xmm4981 vpsrldq xmm8,xmm6,6982 vpaddq xmm2,xmm12,xmm3983 vpaddq xmm3,xmm13,xmm4984 vpmuludq xmm4,xmm0,XMMWORD[((-32))+r11]985 vpmuludq xmm0,xmm9,xmm1986 vpunpckhqdq xmm9,xmm5,xmm6987 vpaddq xmm4,xmm14,xmm4988 vpaddq xmm0,xmm10,xmm0989 990 vpunpcklqdq xmm5,xmm5,xmm6991 vpunpcklqdq xmm8,xmm7,xmm8992 993 994 vpsrldq xmm9,xmm9,5995 vpsrlq xmm6,xmm5,26996 vmovdqa xmm14,XMMWORD[rsp]997 vpand xmm5,xmm5,xmm15998 vpsrlq xmm7,xmm8,4999 vpand xmm6,xmm6,xmm151000 vpand xmm9,xmm9,XMMWORD[rcx]1001 vpsrlq xmm8,xmm8,301002 vpand xmm7,xmm7,xmm151003 vpand xmm8,xmm8,xmm151004 vpor xmm9,xmm9,XMMWORD[32+rcx]1005 1006 1007 1008 1009 1010 vpsrlq xmm13,xmm3,261011 vpand xmm3,xmm3,xmm151012 vpaddq xmm4,xmm4,xmm131013 1014 vpsrlq xmm10,xmm0,261015 vpand xmm0,xmm0,xmm151016 vpaddq xmm1,xmm11,xmm101017 1018 vpsrlq xmm10,xmm4,261019 vpand xmm4,xmm4,xmm151020 1021 vpsrlq xmm11,xmm1,261022 vpand xmm1,xmm1,xmm151023 vpaddq xmm2,xmm2,xmm111024 1025 vpaddq xmm0,xmm0,xmm101026 vpsllq xmm10,xmm10,21027 vpaddq xmm0,xmm0,xmm101028 1029 vpsrlq xmm12,xmm2,261030 vpand xmm2,xmm2,xmm151031 vpaddq xmm3,xmm3,xmm121032 1033 vpsrlq xmm10,xmm0,261034 vpand xmm0,xmm0,xmm151035 vpaddq xmm1,xmm1,xmm101036 1037 vpsrlq xmm13,xmm3,261038 vpand xmm3,xmm3,xmm151039 vpaddq xmm4,xmm4,xmm131040 1041 ja NEAR $L$oop_avx1042 1043 $L$skip_loop_avx:1044 1045 1046 1047 vpshufd xmm14,xmm14,0x101048 add rdx,321049 jnz NEAR $L$ong_tail_avx1050 1051 vpaddq xmm7,xmm7,xmm21052 vpaddq xmm5,xmm5,xmm01053 vpaddq xmm6,xmm6,xmm11054 vpaddq xmm8,xmm8,xmm31055 vpaddq xmm9,xmm9,xmm41056 1057 $L$ong_tail_avx:1058 vmovdqa XMMWORD[32+r11],xmm21059 vmovdqa XMMWORD[r11],xmm01060 vmovdqa XMMWORD[16+r11],xmm11061 vmovdqa XMMWORD[48+r11],xmm31062 vmovdqa XMMWORD[64+r11],xmm41063 1064 1065 1066 1067 1068 1069 1070 vpmuludq xmm12,xmm14,xmm71071 vpmuludq xmm10,xmm14,xmm51072 vpshufd xmm2,XMMWORD[((-48))+rdi],0x101073 vpmuludq xmm11,xmm14,xmm61074 vpmuludq xmm13,xmm14,xmm81075 vpmuludq xmm14,xmm14,xmm91076 1077 vpmuludq xmm0,xmm2,xmm81078 vpaddq xmm14,xmm14,xmm01079 vpshufd xmm3,XMMWORD[((-32))+rdi],0x101080 vpmuludq xmm1,xmm2,xmm71081 vpaddq xmm13,xmm13,xmm11082 vpshufd xmm4,XMMWORD[((-16))+rdi],0x101083 vpmuludq xmm0,xmm2,xmm61084 vpaddq xmm12,xmm12,xmm01085 vpmuludq xmm2,xmm2,xmm51086 vpaddq xmm11,xmm11,xmm21087 vpmuludq xmm3,xmm3,xmm91088 vpaddq xmm10,xmm10,xmm31089 1090 vpshufd xmm2,XMMWORD[rdi],0x101091 vpmuludq xmm1,xmm4,xmm71092 vpaddq xmm14,xmm14,xmm11093 vpmuludq xmm0,xmm4,xmm61094 vpaddq xmm13,xmm13,xmm01095 vpshufd xmm3,XMMWORD[16+rdi],0x101096 vpmuludq xmm4,xmm4,xmm51097 vpaddq xmm12,xmm12,xmm41098 vpmuludq xmm1,xmm2,xmm91099 vpaddq xmm11,xmm11,xmm11100 vpshufd xmm4,XMMWORD[32+rdi],0x101101 vpmuludq xmm2,xmm2,xmm81102 vpaddq xmm10,xmm10,xmm21103 1104 vpmuludq xmm0,xmm3,xmm61105 vpaddq xmm14,xmm14,xmm01106 vpmuludq xmm3,xmm3,xmm51107 vpaddq xmm13,xmm13,xmm31108 vpshufd xmm2,XMMWORD[48+rdi],0x101109 vpmuludq xmm1,xmm4,xmm91110 vpaddq xmm12,xmm12,xmm11111 vpshufd xmm3,XMMWORD[64+rdi],0x101112 vpmuludq xmm0,xmm4,xmm81113 vpaddq xmm11,xmm11,xmm01114 vpmuludq xmm4,xmm4,xmm71115 vpaddq xmm10,xmm10,xmm41116 1117 vpmuludq xmm2,xmm2,xmm51118 vpaddq xmm14,xmm14,xmm21119 vpmuludq xmm1,xmm3,xmm91120 vpaddq xmm13,xmm13,xmm11121 vpmuludq xmm0,xmm3,xmm81122 vpaddq xmm12,xmm12,xmm01123 vpmuludq xmm1,xmm3,xmm71124 vpaddq xmm11,xmm11,xmm11125 vpmuludq xmm3,xmm3,xmm61126 vpaddq xmm10,xmm10,xmm31127 1128 jz NEAR $L$short_tail_avx1129 1130 vmovdqu xmm0,XMMWORD[rsi]1131 vmovdqu xmm1,XMMWORD[16+rsi]1132 1133 vpsrldq xmm2,xmm0,61134 vpsrldq xmm3,xmm1,61135 vpunpckhqdq xmm4,xmm0,xmm11136 vpunpcklqdq xmm0,xmm0,xmm11137 vpunpcklqdq xmm3,xmm2,xmm31138 1139 vpsrlq xmm4,xmm4,401140 vpsrlq xmm1,xmm0,261141 vpand xmm0,xmm0,xmm151142 vpsrlq xmm2,xmm3,41143 vpand xmm1,xmm1,xmm151144 vpsrlq xmm3,xmm3,301145 vpand xmm2,xmm2,xmm151146 vpand xmm3,xmm3,xmm151147 vpor xmm4,xmm4,XMMWORD[32+rcx]1148 1149 vpshufd xmm9,XMMWORD[((-64))+rdi],0x321150 vpaddq xmm0,xmm0,XMMWORD[r11]1151 vpaddq xmm1,xmm1,XMMWORD[16+r11]1152 vpaddq xmm2,xmm2,XMMWORD[32+r11]1153 vpaddq xmm3,xmm3,XMMWORD[48+r11]1154 vpaddq xmm4,xmm4,XMMWORD[64+r11]1155 1156 1157 1158 1159 vpmuludq xmm5,xmm9,xmm01160 vpaddq xmm10,xmm10,xmm51161 vpmuludq xmm6,xmm9,xmm11162 vpaddq xmm11,xmm11,xmm61163 vpmuludq xmm5,xmm9,xmm21164 vpaddq xmm12,xmm12,xmm51165 vpshufd xmm7,XMMWORD[((-48))+rdi],0x321166 vpmuludq xmm6,xmm9,xmm31167 vpaddq xmm13,xmm13,xmm61168 vpmuludq xmm9,xmm9,xmm41169 vpaddq xmm14,xmm14,xmm91170 1171 vpmuludq xmm5,xmm7,xmm31172 vpaddq xmm14,xmm14,xmm51173 vpshufd xmm8,XMMWORD[((-32))+rdi],0x321174 vpmuludq xmm6,xmm7,xmm21175 vpaddq xmm13,xmm13,xmm61176 vpshufd xmm9,XMMWORD[((-16))+rdi],0x321177 vpmuludq xmm5,xmm7,xmm11178 vpaddq xmm12,xmm12,xmm51179 vpmuludq xmm7,xmm7,xmm01180 vpaddq xmm11,xmm11,xmm71181 vpmuludq xmm8,xmm8,xmm41182 vpaddq xmm10,xmm10,xmm81183 1184 vpshufd xmm7,XMMWORD[rdi],0x321185 vpmuludq xmm6,xmm9,xmm21186 vpaddq xmm14,xmm14,xmm61187 vpmuludq xmm5,xmm9,xmm11188 vpaddq xmm13,xmm13,xmm51189 vpshufd xmm8,XMMWORD[16+rdi],0x321190 vpmuludq xmm9,xmm9,xmm01191 vpaddq xmm12,xmm12,xmm91192 vpmuludq xmm6,xmm7,xmm41193 vpaddq xmm11,xmm11,xmm61194 vpshufd xmm9,XMMWORD[32+rdi],0x321195 vpmuludq xmm7,xmm7,xmm31196 vpaddq xmm10,xmm10,xmm71197 1198 vpmuludq xmm5,xmm8,xmm11199 vpaddq xmm14,xmm14,xmm51200 vpmuludq xmm8,xmm8,xmm01201 vpaddq xmm13,xmm13,xmm81202 vpshufd xmm7,XMMWORD[48+rdi],0x321203 vpmuludq xmm6,xmm9,xmm41204 vpaddq xmm12,xmm12,xmm61205 vpshufd xmm8,XMMWORD[64+rdi],0x321206 vpmuludq xmm5,xmm9,xmm31207 vpaddq xmm11,xmm11,xmm51208 vpmuludq xmm9,xmm9,xmm21209 vpaddq xmm10,xmm10,xmm91210 1211 vpmuludq xmm7,xmm7,xmm01212 vpaddq xmm14,xmm14,xmm71213 vpmuludq xmm6,xmm8,xmm41214 vpaddq xmm13,xmm13,xmm61215 vpmuludq xmm5,xmm8,xmm31216 vpaddq xmm12,xmm12,xmm51217 vpmuludq xmm6,xmm8,xmm21218 vpaddq xmm11,xmm11,xmm61219 vpmuludq xmm8,xmm8,xmm11220 vpaddq xmm10,xmm10,xmm81221 1222 $L$short_tail_avx:1223 1224 1225 1226 vpsrldq xmm9,xmm14,81227 vpsrldq xmm8,xmm13,81228 vpsrldq xmm6,xmm11,81229 vpsrldq xmm5,xmm10,81230 vpsrldq xmm7,xmm12,81231 vpaddq xmm13,xmm13,xmm81232 vpaddq xmm14,xmm14,xmm91233 vpaddq xmm10,xmm10,xmm51234 vpaddq xmm11,xmm11,xmm61235 vpaddq xmm12,xmm12,xmm71236 1237 1238 1239 1240 vpsrlq xmm3,xmm13,261241 vpand xmm13,xmm13,xmm151242 vpaddq xmm14,xmm14,xmm31243 1244 vpsrlq xmm0,xmm10,261245 vpand xmm10,xmm10,xmm151246 vpaddq xmm11,xmm11,xmm01247 1248 vpsrlq xmm4,xmm14,261249 vpand xmm14,xmm14,xmm151250 1251 vpsrlq xmm1,xmm11,261252 vpand xmm11,xmm11,xmm151253 vpaddq xmm12,xmm12,xmm11254 1255 vpaddq xmm10,xmm10,xmm41256 vpsllq xmm4,xmm4,21257 vpaddq xmm10,xmm10,xmm41258 1259 vpsrlq xmm2,xmm12,261260 vpand xmm12,xmm12,xmm151261 vpaddq xmm13,xmm13,xmm21262 1263 vpsrlq xmm0,xmm10,261264 vpand xmm10,xmm10,xmm151265 vpaddq xmm11,xmm11,xmm01266 1267 vpsrlq xmm3,xmm13,261268 vpand xmm13,xmm13,xmm151269 vpaddq xmm14,xmm14,xmm31270 1271 vmovd DWORD[(-112)+rdi],xmm101272 vmovd DWORD[(-108)+rdi],xmm111273 vmovd DWORD[(-104)+rdi],xmm121274 vmovd DWORD[(-100)+rdi],xmm131275 vmovd DWORD[(-96)+rdi],xmm141276 vmovdqa xmm6,XMMWORD[80+r11]1277 vmovdqa xmm7,XMMWORD[96+r11]1278 vmovdqa xmm8,XMMWORD[112+r11]1279 vmovdqa xmm9,XMMWORD[128+r11]1280 vmovdqa xmm10,XMMWORD[144+r11]1281 vmovdqa xmm11,XMMWORD[160+r11]1282 vmovdqa xmm12,XMMWORD[176+r11]1283 vmovdqa xmm13,XMMWORD[192+r11]1284 vmovdqa xmm14,XMMWORD[208+r11]1285 vmovdqa xmm15,XMMWORD[224+r11]1286 lea rsp,[248+r11]1287 $L$do_avx_epilogue:1288 vzeroupper1289 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1290 mov rsi,QWORD[16+rsp]1291 DB 0F3h,0C3h ;repret1292 1293 $L$SEH_end_poly1305_blocks_avx:1294 1295 1296 ALIGN 321297 poly1305_emit_avx:1298 mov QWORD[8+rsp],rdi ;WIN64 prologue1299 mov QWORD[16+rsp],rsi1300 mov rax,rsp1301 $L$SEH_begin_poly1305_emit_avx:1302 mov rdi,rcx1303 mov rsi,rdx1304 mov rdx,r81305 1306 1307 1308 cmp DWORD[20+rdi],01309 je NEAR $L$emit1310 1311 mov eax,DWORD[rdi]1312 mov ecx,DWORD[4+rdi]1313 mov r8d,DWORD[8+rdi]1314 mov r11d,DWORD[12+rdi]1315 mov r10d,DWORD[16+rdi]1316 1317 shl rcx,261318 mov r9,r81319 shl r8,521320 add rax,rcx1321 shr r9,121322 add r8,rax1323 adc r9,01324 1325 shl r11,141326 mov rax,r101327 shr r10,241328 add r9,r111329 shl rax,401330 add r9,rax1331 adc r10,01332 1333 mov rax,r101334 mov rcx,r101335 and r10,31336 shr rax,21337 and rcx,-41338 add rax,rcx1339 add r8,rax1340 adc r9,01341 adc r10,01342 1343 mov rax,r81344 add r8,51345 mov rcx,r91346 adc r9,01347 adc r10,01348 shr r10,21349 cmovnz rax,r81350 cmovnz rcx,r91351 1352 add rax,QWORD[rdx]1353 adc rcx,QWORD[8+rdx]1354 mov QWORD[rsi],rax1355 mov QWORD[8+rsi],rcx1356 1357 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1358 mov rsi,QWORD[16+rsp]1359 DB 0F3h,0C3h ;repret1360 1361 $L$SEH_end_poly1305_emit_avx:1362 1363 ALIGN 321364 poly1305_blocks_avx2:1365 mov QWORD[8+rsp],rdi ;WIN64 prologue1366 mov QWORD[16+rsp],rsi1367 mov rax,rsp1368 $L$SEH_begin_poly1305_blocks_avx2:1369 mov rdi,rcx1370 mov rsi,rdx1371 mov rdx,r81372 mov rcx,r91373 1374 1375 1376 mov r8d,DWORD[20+rdi]1377 cmp rdx,1281378 jae NEAR $L$blocks_avx21379 test r8d,r8d1380 jz NEAR $L$blocks1381 1382 $L$blocks_avx2:1383 and rdx,-161384 jz NEAR $L$no_data_avx21385 1386 vzeroupper1387 1388 test r8d,r8d1389 jz NEAR $L$base2_64_avx21390 1391 test rdx,631392 jz NEAR $L$even_avx21393 1394 push rbx1395 1396 push rbp1397 1398 push r121399 1400 push r131401 1402 push r141403 1404 push r151405 1406 $L$blocks_avx2_body:1407 1408 mov r15,rdx1409 1410 mov r8,QWORD[rdi]1411 mov r9,QWORD[8+rdi]1412 mov ebp,DWORD[16+rdi]1413 1414 mov r11,QWORD[24+rdi]1415 mov r13,QWORD[32+rdi]1416 1417 1418 mov r14d,r8d1419 and r8,-21474836481420 mov r12,r91421 mov ebx,r9d1422 and r9,-21474836481423 1424 shr r8,61425 shl r12,521426 add r14,r81427 shr rbx,121428 shr r9,181429 add r14,r121430 adc rbx,r91431 1432 mov r8,rbp1433 shl r8,401434 shr rbp,241435 add rbx,r81436 adc rbp,01437 1438 mov r9,-41439 mov r8,rbp1440 and r9,rbp1441 shr r8,21442 and rbp,31443 add r8,r91444 add r14,r81445 adc rbx,01446 adc rbp,01447 1448 mov r12,r131449 mov rax,r131450 shr r13,21451 add r13,r121452 1453 $L$base2_26_pre_avx2:1454 add r14,QWORD[rsi]1455 adc rbx,QWORD[8+rsi]1456 lea rsi,[16+rsi]1457 adc rbp,rcx1458 sub r15,161459 1460 call __poly1305_block1461 mov rax,r121462 1463 test r15,631464 jnz NEAR $L$base2_26_pre_avx21465 1466 test rcx,rcx1467 jz NEAR $L$store_base2_64_avx21468 1469 1470 mov rax,r141471 mov rdx,r141472 shr r14,521473 mov r11,rbx1474 mov r12,rbx1475 shr rdx,261476 and rax,0x3ffffff1477 shl r11,121478 and rdx,0x3ffffff1479 shr rbx,141480 or r14,r111481 shl rbp,241482 and r14,0x3ffffff1483 shr r12,401484 and rbx,0x3ffffff1485 or rbp,r121486 1487 test r15,r151488 jz NEAR $L$store_base2_26_avx21489 1490 vmovd xmm0,eax1491 vmovd xmm1,edx1492 vmovd xmm2,r14d1493 vmovd xmm3,ebx1494 vmovd xmm4,ebp1495 jmp NEAR $L$proceed_avx21496 1497 ALIGN 321498 $L$store_base2_64_avx2:1499 mov QWORD[rdi],r141500 mov QWORD[8+rdi],rbx1501 mov QWORD[16+rdi],rbp1502 jmp NEAR $L$done_avx21503 1504 ALIGN 161505 $L$store_base2_26_avx2:1506 mov DWORD[rdi],eax1507 mov DWORD[4+rdi],edx1508 mov DWORD[8+rdi],r14d1509 mov DWORD[12+rdi],ebx1510 mov DWORD[16+rdi],ebp1511 ALIGN 161512 $L$done_avx2:1513 mov r15,QWORD[rsp]1514 1515 mov r14,QWORD[8+rsp]1516 1517 mov r13,QWORD[16+rsp]1518 1519 mov r12,QWORD[24+rsp]1520 1521 mov rbp,QWORD[32+rsp]1522 1523 mov rbx,QWORD[40+rsp]1524 1525 lea rsp,[48+rsp]1526 1527 $L$no_data_avx2:1528 $L$blocks_avx2_epilogue:1529 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1530 mov rsi,QWORD[16+rsp]1531 DB 0F3h,0C3h ;repret1532 1533 1534 ALIGN 321535 $L$base2_64_avx2:1536 1537 push rbx1538 1539 push rbp1540 1541 push r121542 1543 push r131544 1545 push r141546 1547 push r151548 1549 $L$base2_64_avx2_body:1550 1551 mov r15,rdx1552 1553 mov r11,QWORD[24+rdi]1554 mov r13,QWORD[32+rdi]1555 1556 mov r14,QWORD[rdi]1557 mov rbx,QWORD[8+rdi]1558 mov ebp,DWORD[16+rdi]1559 1560 mov r12,r131561 mov rax,r131562 shr r13,21563 add r13,r121564 1565 test rdx,631566 jz NEAR $L$init_avx21567 1568 $L$base2_64_pre_avx2:1569 add r14,QWORD[rsi]1570 adc rbx,QWORD[8+rsi]1571 lea rsi,[16+rsi]1572 adc rbp,rcx1573 sub r15,161574 1575 call __poly1305_block1576 mov rax,r121577 1578 test r15,631579 jnz NEAR $L$base2_64_pre_avx21580 1581 $L$init_avx2:1582 1583 mov rax,r141584 mov rdx,r141585 shr r14,521586 mov r8,rbx1587 mov r9,rbx1588 shr rdx,261589 and rax,0x3ffffff1590 shl r8,121591 and rdx,0x3ffffff1592 shr rbx,141593 or r14,r81594 shl rbp,241595 and r14,0x3ffffff1596 shr r9,401597 and rbx,0x3ffffff1598 or rbp,r91599 1600 vmovd xmm0,eax1601 vmovd xmm1,edx1602 vmovd xmm2,r14d1603 vmovd xmm3,ebx1604 vmovd xmm4,ebp1605 mov DWORD[20+rdi],11606 1607 call __poly1305_init_avx1608 1609 $L$proceed_avx2:1610 mov rdx,r151611 mov r10d,DWORD[((OPENSSL_ia32cap_P+8))]1612 mov r11d,32212910081613 1614 mov r15,QWORD[rsp]1615 1616 mov r14,QWORD[8+rsp]1617 1618 mov r13,QWORD[16+rsp]1619 1620 mov r12,QWORD[24+rsp]1621 1622 mov rbp,QWORD[32+rsp]1623 1624 mov rbx,QWORD[40+rsp]1625 1626 lea rax,[48+rsp]1627 lea rsp,[48+rsp]1628 1629 $L$base2_64_avx2_epilogue:1630 jmp NEAR $L$do_avx21631 1632 1633 ALIGN 321634 $L$even_avx2:1635 1636 mov r10d,DWORD[((OPENSSL_ia32cap_P+8))]1637 vmovd xmm0,DWORD[rdi]1638 vmovd xmm1,DWORD[4+rdi]1639 vmovd xmm2,DWORD[8+rdi]1640 vmovd xmm3,DWORD[12+rdi]1641 vmovd xmm4,DWORD[16+rdi]1642 1643 $L$do_avx2:1644 cmp rdx,5121645 jb NEAR $L$skip_avx5121646 and r10d,r11d1647 test r10d,655361648 jnz NEAR $L$blocks_avx5121649 $L$skip_avx512:1650 lea r11,[((-248))+rsp]1651 sub rsp,0x1c81652 vmovdqa XMMWORD[80+r11],xmm61653 vmovdqa XMMWORD[96+r11],xmm71654 vmovdqa XMMWORD[112+r11],xmm81655 vmovdqa XMMWORD[128+r11],xmm91656 vmovdqa XMMWORD[144+r11],xmm101657 vmovdqa XMMWORD[160+r11],xmm111658 vmovdqa XMMWORD[176+r11],xmm121659 vmovdqa XMMWORD[192+r11],xmm131660 vmovdqa XMMWORD[208+r11],xmm141661 vmovdqa XMMWORD[224+r11],xmm151662 $L$do_avx2_body:1663 lea rcx,[$L$const]1664 lea rdi,[((48+64))+rdi]1665 vmovdqa ymm7,YMMWORD[96+rcx]1666 1667 1668 vmovdqu xmm9,XMMWORD[((-64))+rdi]1669 and rsp,-5121670 vmovdqu xmm10,XMMWORD[((-48))+rdi]1671 vmovdqu xmm6,XMMWORD[((-32))+rdi]1672 vmovdqu xmm11,XMMWORD[((-16))+rdi]1673 vmovdqu xmm12,XMMWORD[rdi]1674 vmovdqu xmm13,XMMWORD[16+rdi]1675 lea rax,[144+rsp]1676 vmovdqu xmm14,XMMWORD[32+rdi]1677 vpermd ymm9,ymm7,ymm91678 vmovdqu xmm15,XMMWORD[48+rdi]1679 vpermd ymm10,ymm7,ymm101680 vmovdqu xmm5,XMMWORD[64+rdi]1681 vpermd ymm6,ymm7,ymm61682 vmovdqa YMMWORD[rsp],ymm91683 vpermd ymm11,ymm7,ymm111684 vmovdqa YMMWORD[(32-144)+rax],ymm101685 vpermd ymm12,ymm7,ymm121686 vmovdqa YMMWORD[(64-144)+rax],ymm61687 vpermd ymm13,ymm7,ymm131688 vmovdqa YMMWORD[(96-144)+rax],ymm111689 vpermd ymm14,ymm7,ymm141690 vmovdqa YMMWORD[(128-144)+rax],ymm121691 vpermd ymm15,ymm7,ymm151692 vmovdqa YMMWORD[(160-144)+rax],ymm131693 vpermd ymm5,ymm7,ymm51694 vmovdqa YMMWORD[(192-144)+rax],ymm141695 vmovdqa YMMWORD[(224-144)+rax],ymm151696 vmovdqa YMMWORD[(256-144)+rax],ymm51697 vmovdqa ymm5,YMMWORD[64+rcx]1698 1699 1700 1701 vmovdqu xmm7,XMMWORD[rsi]1702 vmovdqu xmm8,XMMWORD[16+rsi]1703 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],11704 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],11705 lea rsi,[64+rsi]1706 1707 vpsrldq ymm9,ymm7,61708 vpsrldq ymm10,ymm8,61709 vpunpckhqdq ymm6,ymm7,ymm81710 vpunpcklqdq ymm9,ymm9,ymm101711 vpunpcklqdq ymm7,ymm7,ymm81712 1713 vpsrlq ymm10,ymm9,301714 vpsrlq ymm9,ymm9,41715 vpsrlq ymm8,ymm7,261716 vpsrlq ymm6,ymm6,401717 vpand ymm9,ymm9,ymm51718 vpand ymm7,ymm7,ymm51719 vpand ymm8,ymm8,ymm51720 vpand ymm10,ymm10,ymm51721 vpor ymm6,ymm6,YMMWORD[32+rcx]1722 1723 vpaddq ymm2,ymm9,ymm21724 sub rdx,641725 jz NEAR $L$tail_avx21726 jmp NEAR $L$oop_avx21727 1728 ALIGN 321729 $L$oop_avx2:1730 1731 1732 1733 1734 1735 1736 1737 1738 vpaddq ymm0,ymm7,ymm01739 vmovdqa ymm7,YMMWORD[rsp]1740 vpaddq ymm1,ymm8,ymm11741 vmovdqa ymm8,YMMWORD[32+rsp]1742 vpaddq ymm3,ymm10,ymm31743 vmovdqa ymm9,YMMWORD[96+rsp]1744 vpaddq ymm4,ymm6,ymm41745 vmovdqa ymm10,YMMWORD[48+rax]1746 vmovdqa ymm5,YMMWORD[112+rax]1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 vpmuludq ymm13,ymm7,ymm21764 vpmuludq ymm14,ymm8,ymm21765 vpmuludq ymm15,ymm9,ymm21766 vpmuludq ymm11,ymm10,ymm21767 vpmuludq ymm12,ymm5,ymm21768 1769 vpmuludq ymm6,ymm8,ymm01770 vpmuludq ymm2,ymm8,ymm11771 vpaddq ymm12,ymm12,ymm61772 vpaddq ymm13,ymm13,ymm21773 vpmuludq ymm6,ymm8,ymm31774 vpmuludq ymm2,ymm4,YMMWORD[64+rsp]1775 vpaddq ymm15,ymm15,ymm61776 vpaddq ymm11,ymm11,ymm21777 vmovdqa ymm8,YMMWORD[((-16))+rax]1778 1779 vpmuludq ymm6,ymm7,ymm01780 vpmuludq ymm2,ymm7,ymm11781 vpaddq ymm11,ymm11,ymm61782 vpaddq ymm12,ymm12,ymm21783 vpmuludq ymm6,ymm7,ymm31784 vpmuludq ymm2,ymm7,ymm41785 vmovdqu xmm7,XMMWORD[rsi]1786 vpaddq ymm14,ymm14,ymm61787 vpaddq ymm15,ymm15,ymm21788 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],11789 1790 vpmuludq ymm6,ymm8,ymm31791 vpmuludq ymm2,ymm8,ymm41792 vmovdqu xmm8,XMMWORD[16+rsi]1793 vpaddq ymm11,ymm11,ymm61794 vpaddq ymm12,ymm12,ymm21795 vmovdqa ymm2,YMMWORD[16+rax]1796 vpmuludq ymm6,ymm9,ymm11797 vpmuludq ymm9,ymm9,ymm01798 vpaddq ymm14,ymm14,ymm61799 vpaddq ymm13,ymm13,ymm91800 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],11801 lea rsi,[64+rsi]1802 1803 vpmuludq ymm6,ymm2,ymm11804 vpmuludq ymm2,ymm2,ymm01805 vpsrldq ymm9,ymm7,61806 vpaddq ymm15,ymm15,ymm61807 vpaddq ymm14,ymm14,ymm21808 vpmuludq ymm6,ymm10,ymm31809 vpmuludq ymm2,ymm10,ymm41810 vpsrldq ymm10,ymm8,61811 vpaddq ymm12,ymm12,ymm61812 vpaddq ymm13,ymm13,ymm21813 vpunpckhqdq ymm6,ymm7,ymm81814 1815 vpmuludq ymm3,ymm5,ymm31816 vpmuludq ymm4,ymm5,ymm41817 vpunpcklqdq ymm7,ymm7,ymm81818 vpaddq ymm2,ymm13,ymm31819 vpaddq ymm3,ymm14,ymm41820 vpunpcklqdq ymm10,ymm9,ymm101821 vpmuludq ymm4,ymm0,YMMWORD[80+rax]1822 vpmuludq ymm0,ymm5,ymm11823 vmovdqa ymm5,YMMWORD[64+rcx]1824 vpaddq ymm4,ymm15,ymm41825 vpaddq ymm0,ymm11,ymm01826 1827 1828 1829 1830 vpsrlq ymm14,ymm3,261831 vpand ymm3,ymm3,ymm51832 vpaddq ymm4,ymm4,ymm141833 1834 vpsrlq ymm11,ymm0,261835 vpand ymm0,ymm0,ymm51836 vpaddq ymm1,ymm12,ymm111837 1838 vpsrlq ymm15,ymm4,261839 vpand ymm4,ymm4,ymm51840 1841 vpsrlq ymm9,ymm10,41842 1843 vpsrlq ymm12,ymm1,261844 vpand ymm1,ymm1,ymm51845 vpaddq ymm2,ymm2,ymm121846 1847 vpaddq ymm0,ymm0,ymm151848 vpsllq ymm15,ymm15,21849 vpaddq ymm0,ymm0,ymm151850 1851 vpand ymm9,ymm9,ymm51852 vpsrlq ymm8,ymm7,261853 1854 vpsrlq ymm13,ymm2,261855 vpand ymm2,ymm2,ymm51856 vpaddq ymm3,ymm3,ymm131857 1858 vpaddq ymm2,ymm2,ymm91859 vpsrlq ymm10,ymm10,301860 1861 vpsrlq ymm11,ymm0,261862 vpand ymm0,ymm0,ymm51863 vpaddq ymm1,ymm1,ymm111864 1865 vpsrlq ymm6,ymm6,401866 1867 vpsrlq ymm14,ymm3,261868 vpand ymm3,ymm3,ymm51869 vpaddq ymm4,ymm4,ymm141870 1871 vpand ymm7,ymm7,ymm51872 vpand ymm8,ymm8,ymm51873 vpand ymm10,ymm10,ymm51874 vpor ymm6,ymm6,YMMWORD[32+rcx]1875 1876 sub rdx,641877 jnz NEAR $L$oop_avx21878 1879 DB 0x66,0x901880 $L$tail_avx2:1881 1882 1883 1884 1885 1886 1887 1888 vpaddq ymm0,ymm7,ymm01889 vmovdqu ymm7,YMMWORD[4+rsp]1890 vpaddq ymm1,ymm8,ymm11891 vmovdqu ymm8,YMMWORD[36+rsp]1892 vpaddq ymm3,ymm10,ymm31893 vmovdqu ymm9,YMMWORD[100+rsp]1894 vpaddq ymm4,ymm6,ymm41895 vmovdqu ymm10,YMMWORD[52+rax]1896 vmovdqu ymm5,YMMWORD[116+rax]1897 1898 vpmuludq ymm13,ymm7,ymm21899 vpmuludq ymm14,ymm8,ymm21900 vpmuludq ymm15,ymm9,ymm21901 vpmuludq ymm11,ymm10,ymm21902 vpmuludq ymm12,ymm5,ymm21903 1904 vpmuludq ymm6,ymm8,ymm01905 vpmuludq ymm2,ymm8,ymm11906 vpaddq ymm12,ymm12,ymm61907 vpaddq ymm13,ymm13,ymm21908 vpmuludq ymm6,ymm8,ymm31909 vpmuludq ymm2,ymm4,YMMWORD[68+rsp]1910 vpaddq ymm15,ymm15,ymm61911 vpaddq ymm11,ymm11,ymm21912 1913 vpmuludq ymm6,ymm7,ymm01914 vpmuludq ymm2,ymm7,ymm11915 vpaddq ymm11,ymm11,ymm61916 vmovdqu ymm8,YMMWORD[((-12))+rax]1917 vpaddq ymm12,ymm12,ymm21918 vpmuludq ymm6,ymm7,ymm31919 vpmuludq ymm2,ymm7,ymm41920 vpaddq ymm14,ymm14,ymm61921 vpaddq ymm15,ymm15,ymm21922 1923 vpmuludq ymm6,ymm8,ymm31924 vpmuludq ymm2,ymm8,ymm41925 vpaddq ymm11,ymm11,ymm61926 vpaddq ymm12,ymm12,ymm21927 vmovdqu ymm2,YMMWORD[20+rax]1928 vpmuludq ymm6,ymm9,ymm11929 vpmuludq ymm9,ymm9,ymm01930 vpaddq ymm14,ymm14,ymm61931 vpaddq ymm13,ymm13,ymm91932 1933 vpmuludq ymm6,ymm2,ymm11934 vpmuludq ymm2,ymm2,ymm01935 vpaddq ymm15,ymm15,ymm61936 vpaddq ymm14,ymm14,ymm21937 vpmuludq ymm6,ymm10,ymm31938 vpmuludq ymm2,ymm10,ymm41939 vpaddq ymm12,ymm12,ymm61940 vpaddq ymm13,ymm13,ymm21941 1942 vpmuludq ymm3,ymm5,ymm31943 vpmuludq ymm4,ymm5,ymm41944 vpaddq ymm2,ymm13,ymm31945 vpaddq ymm3,ymm14,ymm41946 vpmuludq ymm4,ymm0,YMMWORD[84+rax]1947 vpmuludq ymm0,ymm5,ymm11948 vmovdqa ymm5,YMMWORD[64+rcx]1949 vpaddq ymm4,ymm15,ymm41950 vpaddq ymm0,ymm11,ymm01951 1952 1953 1954 1955 vpsrldq ymm8,ymm12,81956 vpsrldq ymm9,ymm2,81957 vpsrldq ymm10,ymm3,81958 vpsrldq ymm6,ymm4,81959 vpsrldq ymm7,ymm0,81960 vpaddq ymm12,ymm12,ymm81961 vpaddq ymm2,ymm2,ymm91962 vpaddq ymm3,ymm3,ymm101963 vpaddq ymm4,ymm4,ymm61964 vpaddq ymm0,ymm0,ymm71965 1966 vpermq ymm10,ymm3,0x21967 vpermq ymm6,ymm4,0x21968 vpermq ymm7,ymm0,0x21969 vpermq ymm8,ymm12,0x21970 vpermq ymm9,ymm2,0x21971 vpaddq ymm3,ymm3,ymm101972 vpaddq ymm4,ymm4,ymm61973 vpaddq ymm0,ymm0,ymm71974 vpaddq ymm12,ymm12,ymm81975 vpaddq ymm2,ymm2,ymm91976 1977 1978 1979 1980 vpsrlq ymm14,ymm3,261981 vpand ymm3,ymm3,ymm51982 vpaddq ymm4,ymm4,ymm141983 1984 vpsrlq ymm11,ymm0,261985 vpand ymm0,ymm0,ymm51986 vpaddq ymm1,ymm12,ymm111987 1988 vpsrlq ymm15,ymm4,261989 vpand ymm4,ymm4,ymm51990 1991 vpsrlq ymm12,ymm1,261992 vpand ymm1,ymm1,ymm51993 vpaddq ymm2,ymm2,ymm121994 1995 vpaddq ymm0,ymm0,ymm151996 vpsllq ymm15,ymm15,21997 vpaddq ymm0,ymm0,ymm151998 1999 vpsrlq ymm13,ymm2,262000 vpand ymm2,ymm2,ymm52001 vpaddq ymm3,ymm3,ymm132002 2003 vpsrlq ymm11,ymm0,262004 vpand ymm0,ymm0,ymm52005 vpaddq ymm1,ymm1,ymm112006 2007 vpsrlq ymm14,ymm3,262008 vpand ymm3,ymm3,ymm52009 vpaddq ymm4,ymm4,ymm142010 2011 vmovd DWORD[(-112)+rdi],xmm02012 vmovd DWORD[(-108)+rdi],xmm12013 vmovd DWORD[(-104)+rdi],xmm22014 vmovd DWORD[(-100)+rdi],xmm32015 vmovd DWORD[(-96)+rdi],xmm42016 vmovdqa xmm6,XMMWORD[80+r11]2017 vmovdqa xmm7,XMMWORD[96+r11]2018 vmovdqa xmm8,XMMWORD[112+r11]2019 vmovdqa xmm9,XMMWORD[128+r11]2020 vmovdqa xmm10,XMMWORD[144+r11]2021 vmovdqa xmm11,XMMWORD[160+r11]2022 vmovdqa xmm12,XMMWORD[176+r11]2023 vmovdqa xmm13,XMMWORD[192+r11]2024 vmovdqa xmm14,XMMWORD[208+r11]2025 vmovdqa xmm15,XMMWORD[224+r11]2026 lea rsp,[248+r11]2027 $L$do_avx2_epilogue:2028 vzeroupper2029 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2030 mov rsi,QWORD[16+rsp]2031 DB 0F3h,0C3h ;repret2032 2033 $L$SEH_end_poly1305_blocks_avx2:2034 2035 ALIGN 322036 poly1305_blocks_avx512:2037 mov QWORD[8+rsp],rdi ;WIN64 prologue2038 mov QWORD[16+rsp],rsi2039 mov rax,rsp2040 $L$SEH_begin_poly1305_blocks_avx512:2041 mov rdi,rcx2042 mov rsi,rdx2043 mov rdx,r82044 mov rcx,r92045 2046 2047 2048 $L$blocks_avx512:2049 mov eax,152050 kmovw k2,eax2051 lea r11,[((-248))+rsp]2052 sub rsp,0x1c82053 vmovdqa XMMWORD[80+r11],xmm62054 vmovdqa XMMWORD[96+r11],xmm72055 vmovdqa XMMWORD[112+r11],xmm82056 vmovdqa XMMWORD[128+r11],xmm92057 vmovdqa XMMWORD[144+r11],xmm102058 vmovdqa XMMWORD[160+r11],xmm112059 vmovdqa XMMWORD[176+r11],xmm122060 vmovdqa XMMWORD[192+r11],xmm132061 vmovdqa XMMWORD[208+r11],xmm142062 vmovdqa XMMWORD[224+r11],xmm152063 $L$do_avx512_body:2064 lea rcx,[$L$const]2065 lea rdi,[((48+64))+rdi]2066 vmovdqa ymm9,YMMWORD[96+rcx]2067 2068 2069 vmovdqu xmm11,XMMWORD[((-64))+rdi]2070 and rsp,-5122071 vmovdqu xmm12,XMMWORD[((-48))+rdi]2072 mov rax,0x202073 vmovdqu xmm7,XMMWORD[((-32))+rdi]2074 vmovdqu xmm13,XMMWORD[((-16))+rdi]2075 vmovdqu xmm8,XMMWORD[rdi]2076 vmovdqu xmm14,XMMWORD[16+rdi]2077 vmovdqu xmm10,XMMWORD[32+rdi]2078 vmovdqu xmm15,XMMWORD[48+rdi]2079 vmovdqu xmm6,XMMWORD[64+rdi]2080 vpermd zmm16,zmm9,zmm112081 vpbroadcastq zmm5,QWORD[64+rcx]2082 vpermd zmm17,zmm9,zmm122083 vpermd zmm21,zmm9,zmm72084 vpermd zmm18,zmm9,zmm132085 vmovdqa64 ZMMWORD[rsp]{k2},zmm162086 vpsrlq zmm7,zmm16,322087 vpermd zmm22,zmm9,zmm82088 vmovdqu64 ZMMWORD[rax*1+rsp]{k2},zmm172089 vpsrlq zmm8,zmm17,322090 vpermd zmm19,zmm9,zmm142091 vmovdqa64 ZMMWORD[64+rsp]{k2},zmm212092 vpermd zmm23,zmm9,zmm102093 vpermd zmm20,zmm9,zmm152094 vmovdqu64 ZMMWORD[64+rax*1+rsp]{k2},zmm182095 vpermd zmm24,zmm9,zmm62096 vmovdqa64 ZMMWORD[128+rsp]{k2},zmm222097 vmovdqu64 ZMMWORD[128+rax*1+rsp]{k2},zmm192098 vmovdqa64 ZMMWORD[192+rsp]{k2},zmm232099 vmovdqu64 ZMMWORD[192+rax*1+rsp]{k2},zmm202100 vmovdqa64 ZMMWORD[256+rsp]{k2},zmm242101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 vpmuludq zmm11,zmm16,zmm72112 vpmuludq zmm12,zmm17,zmm72113 vpmuludq zmm13,zmm18,zmm72114 vpmuludq zmm14,zmm19,zmm72115 vpmuludq zmm15,zmm20,zmm72116 vpsrlq zmm9,zmm18,322117 2118 vpmuludq zmm25,zmm24,zmm82119 vpmuludq zmm26,zmm16,zmm82120 vpmuludq zmm27,zmm17,zmm82121 vpmuludq zmm28,zmm18,zmm82122 vpmuludq zmm29,zmm19,zmm82123 vpsrlq zmm10,zmm19,322124 vpaddq zmm11,zmm11,zmm252125 vpaddq zmm12,zmm12,zmm262126 vpaddq zmm13,zmm13,zmm272127 vpaddq zmm14,zmm14,zmm282128 vpaddq zmm15,zmm15,zmm292129 2130 vpmuludq zmm25,zmm23,zmm92131 vpmuludq zmm26,zmm24,zmm92132 vpmuludq zmm28,zmm17,zmm92133 vpmuludq zmm29,zmm18,zmm92134 vpmuludq zmm27,zmm16,zmm92135 vpsrlq zmm6,zmm20,322136 vpaddq zmm11,zmm11,zmm252137 vpaddq zmm12,zmm12,zmm262138 vpaddq zmm14,zmm14,zmm282139 vpaddq zmm15,zmm15,zmm292140 vpaddq zmm13,zmm13,zmm272141 2142 vpmuludq zmm25,zmm22,zmm102143 vpmuludq zmm28,zmm16,zmm102144 vpmuludq zmm29,zmm17,zmm102145 vpmuludq zmm26,zmm23,zmm102146 vpmuludq zmm27,zmm24,zmm102147 vpaddq zmm11,zmm11,zmm252148 vpaddq zmm14,zmm14,zmm282149 vpaddq zmm15,zmm15,zmm292150 vpaddq zmm12,zmm12,zmm262151 vpaddq zmm13,zmm13,zmm272152 2153 vpmuludq zmm28,zmm24,zmm62154 vpmuludq zmm29,zmm16,zmm62155 vpmuludq zmm25,zmm21,zmm62156 vpmuludq zmm26,zmm22,zmm62157 vpmuludq zmm27,zmm23,zmm62158 vpaddq zmm14,zmm14,zmm282159 vpaddq zmm15,zmm15,zmm292160 vpaddq zmm11,zmm11,zmm252161 vpaddq zmm12,zmm12,zmm262162 vpaddq zmm13,zmm13,zmm272163 2164 2165 2166 vmovdqu64 zmm10,ZMMWORD[rsi]2167 vmovdqu64 zmm6,ZMMWORD[64+rsi]2168 lea rsi,[128+rsi]2169 2170 2171 2172 2173 vpsrlq zmm28,zmm14,262174 vpandq zmm14,zmm14,zmm52175 vpaddq zmm15,zmm15,zmm282176 2177 vpsrlq zmm25,zmm11,262178 vpandq zmm11,zmm11,zmm52179 vpaddq zmm12,zmm12,zmm252180 2181 vpsrlq zmm29,zmm15,262182 vpandq zmm15,zmm15,zmm52183 2184 vpsrlq zmm26,zmm12,262185 vpandq zmm12,zmm12,zmm52186 vpaddq zmm13,zmm13,zmm262187 2188 vpaddq zmm11,zmm11,zmm292189 vpsllq zmm29,zmm29,22190 vpaddq zmm11,zmm11,zmm292191 2192 vpsrlq zmm27,zmm13,262193 vpandq zmm13,zmm13,zmm52194 vpaddq zmm14,zmm14,zmm272195 2196 vpsrlq zmm25,zmm11,262197 vpandq zmm11,zmm11,zmm52198 vpaddq zmm12,zmm12,zmm252199 2200 vpsrlq zmm28,zmm14,262201 vpandq zmm14,zmm14,zmm52202 vpaddq zmm15,zmm15,zmm282203 2204 2205 2206 2207 2208 vpunpcklqdq zmm7,zmm10,zmm62209 vpunpckhqdq zmm6,zmm10,zmm62210 2211 2212 2213 2214 2215 2216 vmovdqa32 zmm25,ZMMWORD[128+rcx]2217 mov eax,0x77772218 kmovw k1,eax2219 2220 vpermd zmm16,zmm25,zmm162221 vpermd zmm17,zmm25,zmm172222 vpermd zmm18,zmm25,zmm182223 vpermd zmm19,zmm25,zmm192224 vpermd zmm20,zmm25,zmm202225 2226 vpermd zmm16{k1},zmm25,zmm112227 vpermd zmm17{k1},zmm25,zmm122228 vpermd zmm18{k1},zmm25,zmm132229 vpermd zmm19{k1},zmm25,zmm142230 vpermd zmm20{k1},zmm25,zmm152231 2232 vpslld zmm21,zmm17,22233 vpslld zmm22,zmm18,22234 vpslld zmm23,zmm19,22235 vpslld zmm24,zmm20,22236 vpaddd zmm21,zmm21,zmm172237 vpaddd zmm22,zmm22,zmm182238 vpaddd zmm23,zmm23,zmm192239 vpaddd zmm24,zmm24,zmm202240 2241 vpbroadcastq zmm30,QWORD[32+rcx]2242 2243 vpsrlq zmm9,zmm7,522244 vpsllq zmm10,zmm6,122245 vporq zmm9,zmm9,zmm102246 vpsrlq zmm8,zmm7,262247 vpsrlq zmm10,zmm6,142248 vpsrlq zmm6,zmm6,402249 vpandq zmm9,zmm9,zmm52250 vpandq zmm7,zmm7,zmm52251 2252 2253 2254 2255 vpaddq zmm2,zmm9,zmm22256 sub rdx,1922257 jbe NEAR $L$tail_avx5122258 jmp NEAR $L$oop_avx5122259 2260 ALIGN 322261 $L$oop_avx512:2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 vpmuludq zmm14,zmm17,zmm22291 vpaddq zmm0,zmm7,zmm02292 vpmuludq zmm15,zmm18,zmm22293 vpandq zmm8,zmm8,zmm52294 vpmuludq zmm11,zmm23,zmm22295 vpandq zmm10,zmm10,zmm52296 vpmuludq zmm12,zmm24,zmm22297 vporq zmm6,zmm6,zmm302298 vpmuludq zmm13,zmm16,zmm22299 vpaddq zmm1,zmm8,zmm12300 vpaddq zmm3,zmm10,zmm32301 vpaddq zmm4,zmm6,zmm42302 2303 vmovdqu64 zmm10,ZMMWORD[rsi]2304 vmovdqu64 zmm6,ZMMWORD[64+rsi]2305 lea rsi,[128+rsi]2306 vpmuludq zmm28,zmm19,zmm02307 vpmuludq zmm29,zmm20,zmm02308 vpmuludq zmm25,zmm16,zmm02309 vpmuludq zmm26,zmm17,zmm02310 vpaddq zmm14,zmm14,zmm282311 vpaddq zmm15,zmm15,zmm292312 vpaddq zmm11,zmm11,zmm252313 vpaddq zmm12,zmm12,zmm262314 2315 vpmuludq zmm28,zmm18,zmm12316 vpmuludq zmm29,zmm19,zmm12317 vpmuludq zmm25,zmm24,zmm12318 vpmuludq zmm27,zmm18,zmm02319 vpaddq zmm14,zmm14,zmm282320 vpaddq zmm15,zmm15,zmm292321 vpaddq zmm11,zmm11,zmm252322 vpaddq zmm13,zmm13,zmm272323 2324 vpunpcklqdq zmm7,zmm10,zmm62325 vpunpckhqdq zmm6,zmm10,zmm62326 2327 vpmuludq zmm28,zmm16,zmm32328 vpmuludq zmm29,zmm17,zmm32329 vpmuludq zmm26,zmm16,zmm12330 vpmuludq zmm27,zmm17,zmm12331 vpaddq zmm14,zmm14,zmm282332 vpaddq zmm15,zmm15,zmm292333 vpaddq zmm12,zmm12,zmm262334 vpaddq zmm13,zmm13,zmm272335 2336 vpmuludq zmm28,zmm24,zmm42337 vpmuludq zmm29,zmm16,zmm42338 vpmuludq zmm25,zmm22,zmm32339 vpmuludq zmm26,zmm23,zmm32340 vpaddq zmm14,zmm14,zmm282341 vpmuludq zmm27,zmm24,zmm32342 vpaddq zmm15,zmm15,zmm292343 vpaddq zmm11,zmm11,zmm252344 vpaddq zmm12,zmm12,zmm262345 vpaddq zmm13,zmm13,zmm272346 2347 vpmuludq zmm25,zmm21,zmm42348 vpmuludq zmm26,zmm22,zmm42349 vpmuludq zmm27,zmm23,zmm42350 vpaddq zmm0,zmm11,zmm252351 vpaddq zmm1,zmm12,zmm262352 vpaddq zmm2,zmm13,zmm272353 2354 2355 2356 2357 vpsrlq zmm9,zmm7,522358 vpsllq zmm10,zmm6,122359 2360 vpsrlq zmm3,zmm14,262361 vpandq zmm14,zmm14,zmm52362 vpaddq zmm4,zmm15,zmm32363 2364 vporq zmm9,zmm9,zmm102365 2366 vpsrlq zmm11,zmm0,262367 vpandq zmm0,zmm0,zmm52368 vpaddq zmm1,zmm1,zmm112369 2370 vpandq zmm9,zmm9,zmm52371 2372 vpsrlq zmm15,zmm4,262373 vpandq zmm4,zmm4,zmm52374 2375 vpsrlq zmm12,zmm1,262376 vpandq zmm1,zmm1,zmm52377 vpaddq zmm2,zmm2,zmm122378 2379 vpaddq zmm0,zmm0,zmm152380 vpsllq zmm15,zmm15,22381 vpaddq zmm0,zmm0,zmm152382 2383 vpaddq zmm2,zmm2,zmm92384 vpsrlq zmm8,zmm7,262385 2386 vpsrlq zmm13,zmm2,262387 vpandq zmm2,zmm2,zmm52388 vpaddq zmm3,zmm14,zmm132389 2390 vpsrlq zmm10,zmm6,142391 2392 vpsrlq zmm11,zmm0,262393 vpandq zmm0,zmm0,zmm52394 vpaddq zmm1,zmm1,zmm112395 2396 vpsrlq zmm6,zmm6,402397 2398 vpsrlq zmm14,zmm3,262399 vpandq zmm3,zmm3,zmm52400 vpaddq zmm4,zmm4,zmm142401 2402 vpandq zmm7,zmm7,zmm52403 2404 2405 2406 2407 sub rdx,1282408 ja NEAR $L$oop_avx5122409 2410 $L$tail_avx512:2411 2412 2413 2414 2415 2416 vpsrlq zmm16,zmm16,322417 vpsrlq zmm17,zmm17,322418 vpsrlq zmm18,zmm18,322419 vpsrlq zmm23,zmm23,322420 vpsrlq zmm24,zmm24,322421 vpsrlq zmm19,zmm19,322422 vpsrlq zmm20,zmm20,322423 vpsrlq zmm21,zmm21,322424 vpsrlq zmm22,zmm22,322425 2426 2427 2428 lea rsi,[rdx*1+rsi]2429 2430 2431 vpaddq zmm0,zmm7,zmm02432 2433 vpmuludq zmm14,zmm17,zmm22434 vpmuludq zmm15,zmm18,zmm22435 vpmuludq zmm11,zmm23,zmm22436 vpandq zmm8,zmm8,zmm52437 vpmuludq zmm12,zmm24,zmm22438 vpandq zmm10,zmm10,zmm52439 vpmuludq zmm13,zmm16,zmm22440 vporq zmm6,zmm6,zmm302441 vpaddq zmm1,zmm8,zmm12442 vpaddq zmm3,zmm10,zmm32443 vpaddq zmm4,zmm6,zmm42444 2445 vmovdqu xmm7,XMMWORD[rsi]2446 vpmuludq zmm28,zmm19,zmm02447 vpmuludq zmm29,zmm20,zmm02448 vpmuludq zmm25,zmm16,zmm02449 vpmuludq zmm26,zmm17,zmm02450 vpaddq zmm14,zmm14,zmm282451 vpaddq zmm15,zmm15,zmm292452 vpaddq zmm11,zmm11,zmm252453 vpaddq zmm12,zmm12,zmm262454 2455 vmovdqu xmm8,XMMWORD[16+rsi]2456 vpmuludq zmm28,zmm18,zmm12457 vpmuludq zmm29,zmm19,zmm12458 vpmuludq zmm25,zmm24,zmm12459 vpmuludq zmm27,zmm18,zmm02460 vpaddq zmm14,zmm14,zmm282461 vpaddq zmm15,zmm15,zmm292462 vpaddq zmm11,zmm11,zmm252463 vpaddq zmm13,zmm13,zmm272464 2465 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],12466 vpmuludq zmm28,zmm16,zmm32467 vpmuludq zmm29,zmm17,zmm32468 vpmuludq zmm26,zmm16,zmm12469 vpmuludq zmm27,zmm17,zmm12470 vpaddq zmm14,zmm14,zmm282471 vpaddq zmm15,zmm15,zmm292472 vpaddq zmm12,zmm12,zmm262473 vpaddq zmm13,zmm13,zmm272474 2475 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],12476 vpmuludq zmm28,zmm24,zmm42477 vpmuludq zmm29,zmm16,zmm42478 vpmuludq zmm25,zmm22,zmm32479 vpmuludq zmm26,zmm23,zmm32480 vpmuludq zmm27,zmm24,zmm32481 vpaddq zmm3,zmm14,zmm282482 vpaddq zmm15,zmm15,zmm292483 vpaddq zmm11,zmm11,zmm252484 vpaddq zmm12,zmm12,zmm262485 vpaddq zmm13,zmm13,zmm272486 2487 vpmuludq zmm25,zmm21,zmm42488 vpmuludq zmm26,zmm22,zmm42489 vpmuludq zmm27,zmm23,zmm42490 vpaddq zmm0,zmm11,zmm252491 vpaddq zmm1,zmm12,zmm262492 vpaddq zmm2,zmm13,zmm272493 2494 2495 2496 2497 mov eax,12498 vpermq zmm14,zmm3,0xb12499 vpermq zmm4,zmm15,0xb12500 vpermq zmm11,zmm0,0xb12501 vpermq zmm12,zmm1,0xb12502 vpermq zmm13,zmm2,0xb12503 vpaddq zmm3,zmm3,zmm142504 vpaddq zmm4,zmm4,zmm152505 vpaddq zmm0,zmm0,zmm112506 vpaddq zmm1,zmm1,zmm122507 vpaddq zmm2,zmm2,zmm132508 2509 kmovw k3,eax2510 vpermq zmm14,zmm3,0x22511 vpermq zmm15,zmm4,0x22512 vpermq zmm11,zmm0,0x22513 vpermq zmm12,zmm1,0x22514 vpermq zmm13,zmm2,0x22515 vpaddq zmm3,zmm3,zmm142516 vpaddq zmm4,zmm4,zmm152517 vpaddq zmm0,zmm0,zmm112518 vpaddq zmm1,zmm1,zmm122519 vpaddq zmm2,zmm2,zmm132520 2521 vextracti64x4 ymm14,zmm3,0x12522 vextracti64x4 ymm15,zmm4,0x12523 vextracti64x4 ymm11,zmm0,0x12524 vextracti64x4 ymm12,zmm1,0x12525 vextracti64x4 ymm13,zmm2,0x12526 vpaddq zmm3{k3}{z},zmm3,zmm142527 vpaddq zmm4{k3}{z},zmm4,zmm152528 vpaddq zmm0{k3}{z},zmm0,zmm112529 vpaddq zmm1{k3}{z},zmm1,zmm122530 vpaddq zmm2{k3}{z},zmm2,zmm132531 2532 2533 2534 vpsrlq ymm14,ymm3,262535 vpand ymm3,ymm3,ymm52536 vpsrldq ymm9,ymm7,62537 vpsrldq ymm10,ymm8,62538 vpunpckhqdq ymm6,ymm7,ymm82539 vpaddq ymm4,ymm4,ymm142540 2541 vpsrlq ymm11,ymm0,262542 vpand ymm0,ymm0,ymm52543 vpunpcklqdq ymm9,ymm9,ymm102544 vpunpcklqdq ymm7,ymm7,ymm82545 vpaddq ymm1,ymm1,ymm112546 2547 vpsrlq ymm15,ymm4,262548 vpand ymm4,ymm4,ymm52549 2550 vpsrlq ymm12,ymm1,262551 vpand ymm1,ymm1,ymm52552 vpsrlq ymm10,ymm9,302553 vpsrlq ymm9,ymm9,42554 vpaddq ymm2,ymm2,ymm122555 2556 vpaddq ymm0,ymm0,ymm152557 vpsllq ymm15,ymm15,22558 vpsrlq ymm8,ymm7,262559 vpsrlq ymm6,ymm6,402560 vpaddq ymm0,ymm0,ymm152561 2562 vpsrlq ymm13,ymm2,262563 vpand ymm2,ymm2,ymm52564 vpand ymm9,ymm9,ymm52565 vpand ymm7,ymm7,ymm52566 vpaddq ymm3,ymm3,ymm132567 2568 vpsrlq ymm11,ymm0,262569 vpand ymm0,ymm0,ymm52570 vpaddq ymm2,ymm9,ymm22571 vpand ymm8,ymm8,ymm52572 vpaddq ymm1,ymm1,ymm112573 2574 vpsrlq ymm14,ymm3,262575 vpand ymm3,ymm3,ymm52576 vpand ymm10,ymm10,ymm52577 vpor ymm6,ymm6,YMMWORD[32+rcx]2578 vpaddq ymm4,ymm4,ymm142579 2580 lea rax,[144+rsp]2581 add rdx,642582 jnz NEAR $L$tail_avx22583 2584 vpsubq ymm2,ymm2,ymm92585 vmovd DWORD[(-112)+rdi],xmm02586 vmovd DWORD[(-108)+rdi],xmm12587 vmovd DWORD[(-104)+rdi],xmm22588 vmovd DWORD[(-100)+rdi],xmm32589 vmovd DWORD[(-96)+rdi],xmm42590 vzeroall2591 movdqa xmm6,XMMWORD[80+r11]2592 movdqa xmm7,XMMWORD[96+r11]2593 movdqa xmm8,XMMWORD[112+r11]2594 movdqa xmm9,XMMWORD[128+r11]2595 movdqa xmm10,XMMWORD[144+r11]2596 movdqa xmm11,XMMWORD[160+r11]2597 movdqa xmm12,XMMWORD[176+r11]2598 movdqa xmm13,XMMWORD[192+r11]2599 movdqa xmm14,XMMWORD[208+r11]2600 movdqa xmm15,XMMWORD[224+r11]2601 lea rsp,[248+r11]2602 $L$do_avx512_epilogue:2603 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2604 mov rsi,QWORD[16+rsp]2605 DB 0F3h,0C3h ;repret2606 2607 $L$SEH_end_poly1305_blocks_avx512:2608 2609 ALIGN 322610 poly1305_init_base2_44:2611 mov QWORD[8+rsp],rdi ;WIN64 prologue2612 mov QWORD[16+rsp],rsi2613 mov rax,rsp2614 $L$SEH_begin_poly1305_init_base2_44:2615 mov rdi,rcx2616 mov rsi,rdx2617 mov rdx,r82618 2619 2620 2621 xor rax,rax2622 mov QWORD[rdi],rax2623 mov QWORD[8+rdi],rax2624 mov QWORD[16+rdi],rax2625 2626 $L$init_base2_44:2627 lea r10,[poly1305_blocks_vpmadd52]2628 lea r11,[poly1305_emit_base2_44]2629 2630 mov rax,0x0ffffffc0fffffff2631 mov rcx,0x0ffffffc0ffffffc2632 and rax,QWORD[rsi]2633 mov r8,0x00000fffffffffff2634 and rcx,QWORD[8+rsi]2635 mov r9,0x00000fffffffffff2636 and r8,rax2637 shrd rax,rcx,442638 mov QWORD[40+rdi],r82639 and rax,r92640 shr rcx,242641 mov QWORD[48+rdi],rax2642 lea rax,[rax*4+rax]2643 mov QWORD[56+rdi],rcx2644 shl rax,22645 lea rcx,[rcx*4+rcx]2646 shl rcx,22647 mov QWORD[24+rdi],rax2648 mov QWORD[32+rdi],rcx2649 mov QWORD[64+rdi],-12650 mov QWORD[rdx],r102651 mov QWORD[8+rdx],r112652 mov eax,12653 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2654 mov rsi,QWORD[16+rsp]2655 DB 0F3h,0C3h ;repret2656 2657 $L$SEH_end_poly1305_init_base2_44:2658 2659 ALIGN 322660 poly1305_blocks_vpmadd52:2661 mov QWORD[8+rsp],rdi ;WIN64 prologue2662 mov QWORD[16+rsp],rsi2663 mov rax,rsp2664 $L$SEH_begin_poly1305_blocks_vpmadd52:2665 mov rdi,rcx2666 mov rsi,rdx2667 mov rdx,r82668 mov rcx,r92669 2670 2671 2672 DB 243,15,30,2502673 shr rdx,42674 jz NEAR $L$no_data_vpmadd522675 2676 shl rcx,402677 mov r8,QWORD[64+rdi]2678 2679 2680 2681 2682 2683 2684 mov rax,32685 mov r10,12686 cmp rdx,42687 cmovae rax,r102688 test r8,r82689 cmovns rax,r102690 2691 and rax,rdx2692 jz NEAR $L$blocks_vpmadd52_4x2693 2694 sub rdx,rax2695 mov r10d,72696 mov r11d,12697 kmovw k7,r10d2698 lea r10,[$L$2_44_inp_permd]2699 kmovw k1,r11d2700 2701 vmovq xmm21,rcx2702 vmovdqa64 ymm19,YMMWORD[r10]2703 vmovdqa64 ymm20,YMMWORD[32+r10]2704 vpermq ymm21,ymm21,0xcf2705 vmovdqa64 ymm22,YMMWORD[64+r10]2706 2707 vmovdqu64 ymm16{k7}{z},[rdi]2708 vmovdqu64 ymm3{k7}{z},[40+rdi]2709 vmovdqu64 ymm4{k7}{z},[32+rdi]2710 vmovdqu64 ymm5{k7}{z},[24+rdi]2711 2712 vmovdqa64 ymm23,YMMWORD[96+r10]2713 vmovdqa64 ymm24,YMMWORD[128+r10]2714 2715 jmp NEAR $L$oop_vpmadd522716 2717 ALIGN 322718 $L$oop_vpmadd52:2719 vmovdqu32 xmm18,XMMWORD[rsi]2720 lea rsi,[16+rsi]2721 2722 vpermd ymm18,ymm19,ymm182723 vpsrlvq ymm18,ymm18,ymm202724 vpandq ymm18,ymm18,ymm222725 vporq ymm18,ymm18,ymm212726 2727 vpaddq ymm16,ymm16,ymm182728 2729 vpermq ymm0{k7}{z},ymm16,02730 vpermq ymm1{k7}{z},ymm16,852731 vpermq ymm2{k7}{z},ymm16,1702732 2733 vpxord ymm16,ymm16,ymm162734 vpxord ymm17,ymm17,ymm172735 2736 vpmadd52luq ymm16,ymm0,ymm32737 vpmadd52huq ymm17,ymm0,ymm32738 2739 vpmadd52luq ymm16,ymm1,ymm42740 vpmadd52huq ymm17,ymm1,ymm42741 2742 vpmadd52luq ymm16,ymm2,ymm52743 vpmadd52huq ymm17,ymm2,ymm52744 2745 vpsrlvq ymm18,ymm16,ymm232746 vpsllvq ymm17,ymm17,ymm242747 vpandq ymm16,ymm16,ymm222748 2749 vpaddq ymm17,ymm17,ymm182750 2751 vpermq ymm17,ymm17,1472752 2753 vpaddq ymm16,ymm16,ymm172754 2755 vpsrlvq ymm18,ymm16,ymm232756 vpandq ymm16,ymm16,ymm222757 2758 vpermq ymm18,ymm18,1472759 2760 vpaddq ymm16,ymm16,ymm182761 2762 vpermq ymm18{k1}{z},ymm16,1472763 2764 vpaddq ymm16,ymm16,ymm182765 vpsllq ymm18,ymm18,22766 2767 vpaddq ymm16,ymm16,ymm182768 2769 dec rax2770 jnz NEAR $L$oop_vpmadd522771 2772 vmovdqu64 YMMWORD[rdi]{k7},ymm162773 2774 test rdx,rdx2775 jnz NEAR $L$blocks_vpmadd52_4x2776 2777 $L$no_data_vpmadd52:2778 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2779 mov rsi,QWORD[16+rsp]2780 DB 0F3h,0C3h ;repret2781 2782 $L$SEH_end_poly1305_blocks_vpmadd52:2783 2784 ALIGN 322785 poly1305_blocks_vpmadd52_4x:2786 mov QWORD[8+rsp],rdi ;WIN64 prologue2787 mov QWORD[16+rsp],rsi2788 mov rax,rsp2789 $L$SEH_begin_poly1305_blocks_vpmadd52_4x:2790 mov rdi,rcx2791 mov rsi,rdx2792 mov rdx,r82793 mov rcx,r92794 2795 2796 2797 shr rdx,42798 jz NEAR $L$no_data_vpmadd52_4x2799 2800 shl rcx,402801 mov r8,QWORD[64+rdi]2802 2803 $L$blocks_vpmadd52_4x:2804 vpbroadcastq ymm31,rcx2805 2806 vmovdqa64 ymm28,YMMWORD[$L$x_mask44]2807 mov eax,52808 vmovdqa64 ymm29,YMMWORD[$L$x_mask42]2809 kmovw k1,eax2810 2811 test r8,r82812 js NEAR $L$init_vpmadd522813 2814 vmovq xmm0,QWORD[rdi]2815 vmovq xmm1,QWORD[8+rdi]2816 vmovq xmm2,QWORD[16+rdi]2817 2818 test rdx,32819 jnz NEAR $L$blocks_vpmadd52_2x_do2820 2821 $L$blocks_vpmadd52_4x_do:2822 vpbroadcastq ymm3,QWORD[64+rdi]2823 vpbroadcastq ymm4,QWORD[96+rdi]2824 vpbroadcastq ymm5,QWORD[128+rdi]2825 vpbroadcastq ymm16,QWORD[160+rdi]2826 2827 $L$blocks_vpmadd52_4x_key_loaded:2828 vpsllq ymm17,ymm5,22829 vpaddq ymm17,ymm17,ymm52830 vpsllq ymm17,ymm17,22831 2832 test rdx,72833 jz NEAR $L$blocks_vpmadd52_8x2834 2835 vmovdqu64 ymm26,YMMWORD[rsi]2836 vmovdqu64 ymm27,YMMWORD[32+rsi]2837 lea rsi,[64+rsi]2838 2839 vpunpcklqdq ymm25,ymm26,ymm272840 vpunpckhqdq ymm27,ymm26,ymm272841 2842 2843 2844 vpsrlq ymm26,ymm27,242845 vporq ymm26,ymm26,ymm312846 vpaddq ymm2,ymm2,ymm262847 vpandq ymm24,ymm25,ymm282848 vpsrlq ymm25,ymm25,442849 vpsllq ymm27,ymm27,202850 vporq ymm25,ymm25,ymm272851 vpandq ymm25,ymm25,ymm282852 2853 sub rdx,42854 jz NEAR $L$tail_vpmadd52_4x2855 jmp NEAR $L$oop_vpmadd52_4x2856 ud22857 2858 ALIGN 322859 $L$init_vpmadd52:2860 vmovq xmm16,QWORD[24+rdi]2861 vmovq xmm2,QWORD[56+rdi]2862 vmovq xmm17,QWORD[32+rdi]2863 vmovq xmm3,QWORD[40+rdi]2864 vmovq xmm4,QWORD[48+rdi]2865 2866 vmovdqa ymm0,ymm32867 vmovdqa ymm1,ymm42868 vmovdqa ymm5,ymm22869 2870 mov eax,22871 2872 $L$mul_init_vpmadd52:2873 vpxorq ymm18,ymm18,ymm182874 vpmadd52luq ymm18,ymm16,ymm22875 vpxorq ymm19,ymm19,ymm192876 vpmadd52huq ymm19,ymm16,ymm22877 vpxorq ymm20,ymm20,ymm202878 vpmadd52luq ymm20,ymm17,ymm22879 vpxorq ymm21,ymm21,ymm212880 vpmadd52huq ymm21,ymm17,ymm22881 vpxorq ymm22,ymm22,ymm222882 vpmadd52luq ymm22,ymm3,ymm22883 vpxorq ymm23,ymm23,ymm232884 vpmadd52huq ymm23,ymm3,ymm22885 2886 vpmadd52luq ymm18,ymm3,ymm02887 vpmadd52huq ymm19,ymm3,ymm02888 vpmadd52luq ymm20,ymm4,ymm02889 vpmadd52huq ymm21,ymm4,ymm02890 vpmadd52luq ymm22,ymm5,ymm02891 vpmadd52huq ymm23,ymm5,ymm02892 2893 vpmadd52luq ymm18,ymm17,ymm12894 vpmadd52huq ymm19,ymm17,ymm12895 vpmadd52luq ymm20,ymm3,ymm12896 vpmadd52huq ymm21,ymm3,ymm12897 vpmadd52luq ymm22,ymm4,ymm12898 vpmadd52huq ymm23,ymm4,ymm12899 2900 2901 2902 vpsrlq ymm30,ymm18,442903 vpsllq ymm19,ymm19,82904 vpandq ymm0,ymm18,ymm282905 vpaddq ymm19,ymm19,ymm302906 2907 vpaddq ymm20,ymm20,ymm192908 2909 vpsrlq ymm30,ymm20,442910 vpsllq ymm21,ymm21,82911 vpandq ymm1,ymm20,ymm282912 vpaddq ymm21,ymm21,ymm302913 2914 vpaddq ymm22,ymm22,ymm212915 2916 vpsrlq ymm30,ymm22,422917 vpsllq ymm23,ymm23,102918 vpandq ymm2,ymm22,ymm292919 vpaddq ymm23,ymm23,ymm302920 2921 vpaddq ymm0,ymm0,ymm232922 vpsllq ymm23,ymm23,22923 2924 vpaddq ymm0,ymm0,ymm232925 2926 vpsrlq ymm30,ymm0,442927 vpandq ymm0,ymm0,ymm282928 2929 vpaddq ymm1,ymm1,ymm302930 2931 dec eax2932 jz NEAR $L$done_init_vpmadd522933 2934 vpunpcklqdq ymm4,ymm1,ymm42935 vpbroadcastq xmm1,xmm12936 vpunpcklqdq ymm5,ymm2,ymm52937 vpbroadcastq xmm2,xmm22938 vpunpcklqdq ymm3,ymm0,ymm32939 vpbroadcastq xmm0,xmm02940 2941 vpsllq ymm16,ymm4,22942 vpsllq ymm17,ymm5,22943 vpaddq ymm16,ymm16,ymm42944 vpaddq ymm17,ymm17,ymm52945 vpsllq ymm16,ymm16,22946 vpsllq ymm17,ymm17,22947 2948 jmp NEAR $L$mul_init_vpmadd522949 ud22950 2951 ALIGN 322952 $L$done_init_vpmadd52:2953 vinserti128 ymm4,ymm1,xmm4,12954 vinserti128 ymm5,ymm2,xmm5,12955 vinserti128 ymm3,ymm0,xmm3,12956 2957 vpermq ymm4,ymm4,2162958 vpermq ymm5,ymm5,2162959 vpermq ymm3,ymm3,2162960 2961 vpsllq ymm16,ymm4,22962 vpaddq ymm16,ymm16,ymm42963 vpsllq ymm16,ymm16,22964 2965 vmovq xmm0,QWORD[rdi]2966 vmovq xmm1,QWORD[8+rdi]2967 vmovq xmm2,QWORD[16+rdi]2968 2969 test rdx,32970 jnz NEAR $L$done_init_vpmadd52_2x2971 2972 vmovdqu64 YMMWORD[64+rdi],ymm32973 vpbroadcastq ymm3,xmm32974 vmovdqu64 YMMWORD[96+rdi],ymm42975 vpbroadcastq ymm4,xmm42976 vmovdqu64 YMMWORD[128+rdi],ymm52977 vpbroadcastq ymm5,xmm52978 vmovdqu64 YMMWORD[160+rdi],ymm162979 vpbroadcastq ymm16,xmm162980 2981 jmp NEAR $L$blocks_vpmadd52_4x_key_loaded2982 ud22983 2984 ALIGN 322985 $L$done_init_vpmadd52_2x:2986 vmovdqu64 YMMWORD[64+rdi],ymm32987 vpsrldq ymm3,ymm3,82988 vmovdqu64 YMMWORD[96+rdi],ymm42989 vpsrldq ymm4,ymm4,82990 vmovdqu64 YMMWORD[128+rdi],ymm52991 vpsrldq ymm5,ymm5,82992 vmovdqu64 YMMWORD[160+rdi],ymm162993 vpsrldq ymm16,ymm16,82994 jmp NEAR $L$blocks_vpmadd52_2x_key_loaded2995 ud22996 2997 ALIGN 322998 $L$blocks_vpmadd52_2x_do:2999 vmovdqu64 ymm5{k1}{z},[((128+8))+rdi]3000 vmovdqu64 ymm16{k1}{z},[((160+8))+rdi]3001 vmovdqu64 ymm3{k1}{z},[((64+8))+rdi]3002 vmovdqu64 ymm4{k1}{z},[((96+8))+rdi]3003 3004 $L$blocks_vpmadd52_2x_key_loaded:3005 vmovdqu64 ymm26,YMMWORD[rsi]3006 vpxorq ymm27,ymm27,ymm273007 lea rsi,[32+rsi]3008 3009 vpunpcklqdq ymm25,ymm26,ymm273010 vpunpckhqdq ymm27,ymm26,ymm273011 3012 3013 3014 vpsrlq ymm26,ymm27,243015 vporq ymm26,ymm26,ymm313016 vpaddq ymm2,ymm2,ymm263017 vpandq ymm24,ymm25,ymm283018 vpsrlq ymm25,ymm25,443019 vpsllq ymm27,ymm27,203020 vporq ymm25,ymm25,ymm273021 vpandq ymm25,ymm25,ymm283022 3023 jmp NEAR $L$tail_vpmadd52_2x3024 ud23025 3026 ALIGN 323027 $L$oop_vpmadd52_4x:3028 3029 vpaddq ymm0,ymm0,ymm243030 vpaddq ymm1,ymm1,ymm253031 3032 vpxorq ymm18,ymm18,ymm183033 vpmadd52luq ymm18,ymm16,ymm23034 vpxorq ymm19,ymm19,ymm193035 vpmadd52huq ymm19,ymm16,ymm23036 vpxorq ymm20,ymm20,ymm203037 vpmadd52luq ymm20,ymm17,ymm23038 vpxorq ymm21,ymm21,ymm213039 vpmadd52huq ymm21,ymm17,ymm23040 vpxorq ymm22,ymm22,ymm223041 vpmadd52luq ymm22,ymm3,ymm23042 vpxorq ymm23,ymm23,ymm233043 vpmadd52huq ymm23,ymm3,ymm23044 3045 vmovdqu64 ymm26,YMMWORD[rsi]3046 vmovdqu64 ymm27,YMMWORD[32+rsi]3047 lea rsi,[64+rsi]3048 vpmadd52luq ymm18,ymm3,ymm03049 vpmadd52huq ymm19,ymm3,ymm03050 vpmadd52luq ymm20,ymm4,ymm03051 vpmadd52huq ymm21,ymm4,ymm03052 vpmadd52luq ymm22,ymm5,ymm03053 vpmadd52huq ymm23,ymm5,ymm03054 3055 vpunpcklqdq ymm25,ymm26,ymm273056 vpunpckhqdq ymm27,ymm26,ymm273057 vpmadd52luq ymm18,ymm17,ymm13058 vpmadd52huq ymm19,ymm17,ymm13059 vpmadd52luq ymm20,ymm3,ymm13060 vpmadd52huq ymm21,ymm3,ymm13061 vpmadd52luq ymm22,ymm4,ymm13062 vpmadd52huq ymm23,ymm4,ymm13063 3064 3065 3066 vpsrlq ymm30,ymm18,443067 vpsllq ymm19,ymm19,83068 vpandq ymm0,ymm18,ymm283069 vpaddq ymm19,ymm19,ymm303070 3071 vpsrlq ymm26,ymm27,243072 vporq ymm26,ymm26,ymm313073 vpaddq ymm20,ymm20,ymm193074 3075 vpsrlq ymm30,ymm20,443076 vpsllq ymm21,ymm21,83077 vpandq ymm1,ymm20,ymm283078 vpaddq ymm21,ymm21,ymm303079 3080 vpandq ymm24,ymm25,ymm283081 vpsrlq ymm25,ymm25,443082 vpsllq ymm27,ymm27,203083 vpaddq ymm22,ymm22,ymm213084 3085 vpsrlq ymm30,ymm22,423086 vpsllq ymm23,ymm23,103087 vpandq ymm2,ymm22,ymm293088 vpaddq ymm23,ymm23,ymm303089 3090 vpaddq ymm2,ymm2,ymm263091 vpaddq ymm0,ymm0,ymm233092 vpsllq ymm23,ymm23,23093 3094 vpaddq ymm0,ymm0,ymm233095 vporq ymm25,ymm25,ymm273096 vpandq ymm25,ymm25,ymm283097 3098 vpsrlq ymm30,ymm0,443099 vpandq ymm0,ymm0,ymm283100 3101 vpaddq ymm1,ymm1,ymm303102 3103 sub rdx,43104 jnz NEAR $L$oop_vpmadd52_4x3105 3106 $L$tail_vpmadd52_4x:3107 vmovdqu64 ymm5,YMMWORD[128+rdi]3108 vmovdqu64 ymm16,YMMWORD[160+rdi]3109 vmovdqu64 ymm3,YMMWORD[64+rdi]3110 vmovdqu64 ymm4,YMMWORD[96+rdi]3111 3112 $L$tail_vpmadd52_2x:3113 vpsllq ymm17,ymm5,23114 vpaddq ymm17,ymm17,ymm53115 vpsllq ymm17,ymm17,23116 3117 3118 vpaddq ymm0,ymm0,ymm243119 vpaddq ymm1,ymm1,ymm253120 3121 vpxorq ymm18,ymm18,ymm183122 vpmadd52luq ymm18,ymm16,ymm23123 vpxorq ymm19,ymm19,ymm193124 vpmadd52huq ymm19,ymm16,ymm23125 vpxorq ymm20,ymm20,ymm203126 vpmadd52luq ymm20,ymm17,ymm23127 vpxorq ymm21,ymm21,ymm213128 vpmadd52huq ymm21,ymm17,ymm23129 vpxorq ymm22,ymm22,ymm223130 vpmadd52luq ymm22,ymm3,ymm23131 vpxorq ymm23,ymm23,ymm233132 vpmadd52huq ymm23,ymm3,ymm23133 3134 vpmadd52luq ymm18,ymm3,ymm03135 vpmadd52huq ymm19,ymm3,ymm03136 vpmadd52luq ymm20,ymm4,ymm03137 vpmadd52huq ymm21,ymm4,ymm03138 vpmadd52luq ymm22,ymm5,ymm03139 vpmadd52huq ymm23,ymm5,ymm03140 3141 vpmadd52luq ymm18,ymm17,ymm13142 vpmadd52huq ymm19,ymm17,ymm13143 vpmadd52luq ymm20,ymm3,ymm13144 vpmadd52huq ymm21,ymm3,ymm13145 vpmadd52luq ymm22,ymm4,ymm13146 vpmadd52huq ymm23,ymm4,ymm13147 3148 3149 3150 3151 mov eax,13152 kmovw k1,eax3153 vpsrldq ymm24,ymm18,83154 vpsrldq ymm0,ymm19,83155 vpsrldq ymm25,ymm20,83156 vpsrldq ymm1,ymm21,83157 vpaddq ymm18,ymm18,ymm243158 vpaddq ymm19,ymm19,ymm03159 vpsrldq ymm26,ymm22,83160 vpsrldq ymm2,ymm23,83161 vpaddq ymm20,ymm20,ymm253162 vpaddq ymm21,ymm21,ymm13163 vpermq ymm24,ymm18,0x23164 vpermq ymm0,ymm19,0x23165 vpaddq ymm22,ymm22,ymm263166 vpaddq ymm23,ymm23,ymm23167 3168 vpermq ymm25,ymm20,0x23169 vpermq ymm1,ymm21,0x23170 vpaddq ymm18{k1}{z},ymm18,ymm243171 vpaddq ymm19{k1}{z},ymm19,ymm03172 vpermq ymm26,ymm22,0x23173 vpermq ymm2,ymm23,0x23174 vpaddq ymm20{k1}{z},ymm20,ymm253175 vpaddq ymm21{k1}{z},ymm21,ymm13176 vpaddq ymm22{k1}{z},ymm22,ymm263177 vpaddq ymm23{k1}{z},ymm23,ymm23178 3179 3180 3181 vpsrlq ymm30,ymm18,443182 vpsllq ymm19,ymm19,83183 vpandq ymm0,ymm18,ymm283184 vpaddq ymm19,ymm19,ymm303185 3186 vpaddq ymm20,ymm20,ymm193187 3188 vpsrlq ymm30,ymm20,443189 vpsllq ymm21,ymm21,83190 vpandq ymm1,ymm20,ymm283191 vpaddq ymm21,ymm21,ymm303192 3193 vpaddq ymm22,ymm22,ymm213194 3195 vpsrlq ymm30,ymm22,423196 vpsllq ymm23,ymm23,103197 vpandq ymm2,ymm22,ymm293198 vpaddq ymm23,ymm23,ymm303199 3200 vpaddq ymm0,ymm0,ymm233201 vpsllq ymm23,ymm23,23202 3203 vpaddq ymm0,ymm0,ymm233204 3205 vpsrlq ymm30,ymm0,443206 vpandq ymm0,ymm0,ymm283207 3208 vpaddq ymm1,ymm1,ymm303209 3210 3211 sub rdx,23212 ja NEAR $L$blocks_vpmadd52_4x_do3213 3214 vmovq QWORD[rdi],xmm03215 vmovq QWORD[8+rdi],xmm13216 vmovq QWORD[16+rdi],xmm23217 vzeroall3218 3219 $L$no_data_vpmadd52_4x:3220 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3221 mov rsi,QWORD[16+rsp]3222 DB 0F3h,0C3h ;repret3223 3224 $L$SEH_end_poly1305_blocks_vpmadd52_4x:3225 3226 ALIGN 323227 poly1305_blocks_vpmadd52_8x:3228 mov QWORD[8+rsp],rdi ;WIN64 prologue3229 mov QWORD[16+rsp],rsi3230 mov rax,rsp3231 $L$SEH_begin_poly1305_blocks_vpmadd52_8x:3232 mov rdi,rcx3233 mov rsi,rdx3234 mov rdx,r83235 mov rcx,r93236 3237 3238 3239 shr rdx,43240 jz NEAR $L$no_data_vpmadd52_8x3241 3242 shl rcx,403243 mov r8,QWORD[64+rdi]3244 3245 vmovdqa64 ymm28,YMMWORD[$L$x_mask44]3246 vmovdqa64 ymm29,YMMWORD[$L$x_mask42]3247 3248 test r8,r83249 js NEAR $L$init_vpmadd523250 3251 vmovq xmm0,QWORD[rdi]3252 vmovq xmm1,QWORD[8+rdi]3253 vmovq xmm2,QWORD[16+rdi]3254 3255 $L$blocks_vpmadd52_8x:3256 3257 3258 3259 vmovdqu64 ymm5,YMMWORD[128+rdi]3260 vmovdqu64 ymm16,YMMWORD[160+rdi]3261 vmovdqu64 ymm3,YMMWORD[64+rdi]3262 vmovdqu64 ymm4,YMMWORD[96+rdi]3263 3264 vpsllq ymm17,ymm5,23265 vpaddq ymm17,ymm17,ymm53266 vpsllq ymm17,ymm17,23267 3268 vpbroadcastq ymm8,xmm53269 vpbroadcastq ymm6,xmm33270 vpbroadcastq ymm7,xmm43271 3272 vpxorq ymm18,ymm18,ymm183273 vpmadd52luq ymm18,ymm16,ymm83274 vpxorq ymm19,ymm19,ymm193275 vpmadd52huq ymm19,ymm16,ymm83276 vpxorq ymm20,ymm20,ymm203277 vpmadd52luq ymm20,ymm17,ymm83278 vpxorq ymm21,ymm21,ymm213279 vpmadd52huq ymm21,ymm17,ymm83280 vpxorq ymm22,ymm22,ymm223281 vpmadd52luq ymm22,ymm3,ymm83282 vpxorq ymm23,ymm23,ymm233283 vpmadd52huq ymm23,ymm3,ymm83284 3285 vpmadd52luq ymm18,ymm3,ymm63286 vpmadd52huq ymm19,ymm3,ymm63287 vpmadd52luq ymm20,ymm4,ymm63288 vpmadd52huq ymm21,ymm4,ymm63289 vpmadd52luq ymm22,ymm5,ymm63290 vpmadd52huq ymm23,ymm5,ymm63291 3292 vpmadd52luq ymm18,ymm17,ymm73293 vpmadd52huq ymm19,ymm17,ymm73294 vpmadd52luq ymm20,ymm3,ymm73295 vpmadd52huq ymm21,ymm3,ymm73296 vpmadd52luq ymm22,ymm4,ymm73297 vpmadd52huq ymm23,ymm4,ymm73298 3299 3300 3301 vpsrlq ymm30,ymm18,443302 vpsllq ymm19,ymm19,83303 vpandq ymm6,ymm18,ymm283304 vpaddq ymm19,ymm19,ymm303305 3306 vpaddq ymm20,ymm20,ymm193307 3308 vpsrlq ymm30,ymm20,443309 vpsllq ymm21,ymm21,83310 vpandq ymm7,ymm20,ymm283311 vpaddq ymm21,ymm21,ymm303312 3313 vpaddq ymm22,ymm22,ymm213314 3315 vpsrlq ymm30,ymm22,423316 vpsllq ymm23,ymm23,103317 vpandq ymm8,ymm22,ymm293318 vpaddq ymm23,ymm23,ymm303319 3320 vpaddq ymm6,ymm6,ymm233321 vpsllq ymm23,ymm23,23322 3323 vpaddq ymm6,ymm6,ymm233324 3325 vpsrlq ymm30,ymm6,443326 vpandq ymm6,ymm6,ymm283327 3328 vpaddq ymm7,ymm7,ymm303329 3330 3331 3332 3333 3334 vpunpcklqdq ymm26,ymm8,ymm53335 vpunpckhqdq ymm5,ymm8,ymm53336 vpunpcklqdq ymm24,ymm6,ymm33337 vpunpckhqdq ymm3,ymm6,ymm33338 vpunpcklqdq ymm25,ymm7,ymm43339 vpunpckhqdq ymm4,ymm7,ymm43340 vshufi64x2 zmm8,zmm26,zmm5,0x443341 vshufi64x2 zmm6,zmm24,zmm3,0x443342 vshufi64x2 zmm7,zmm25,zmm4,0x443343 3344 vmovdqu64 zmm26,ZMMWORD[rsi]3345 vmovdqu64 zmm27,ZMMWORD[64+rsi]3346 lea rsi,[128+rsi]3347 3348 vpsllq zmm10,zmm8,23349 vpsllq zmm9,zmm7,23350 vpaddq zmm10,zmm10,zmm83351 vpaddq zmm9,zmm9,zmm73352 vpsllq zmm10,zmm10,23353 vpsllq zmm9,zmm9,23354 3355 vpbroadcastq zmm31,rcx3356 vpbroadcastq zmm28,xmm283357 vpbroadcastq zmm29,xmm293358 3359 vpbroadcastq zmm16,xmm93360 vpbroadcastq zmm17,xmm103361 vpbroadcastq zmm3,xmm63362 vpbroadcastq zmm4,xmm73363 vpbroadcastq zmm5,xmm83364 3365 vpunpcklqdq zmm25,zmm26,zmm273366 vpunpckhqdq zmm27,zmm26,zmm273367 3368 3369 3370 vpsrlq zmm26,zmm27,243371 vporq zmm26,zmm26,zmm313372 vpaddq zmm2,zmm2,zmm263373 vpandq zmm24,zmm25,zmm283374 vpsrlq zmm25,zmm25,443375 vpsllq zmm27,zmm27,203376 vporq zmm25,zmm25,zmm273377 vpandq zmm25,zmm25,zmm283378 3379 sub rdx,83380 jz NEAR $L$tail_vpmadd52_8x3381 jmp NEAR $L$oop_vpmadd52_8x3382 3383 ALIGN 323384 $L$oop_vpmadd52_8x:3385 3386 vpaddq zmm0,zmm0,zmm243387 vpaddq zmm1,zmm1,zmm253388 3389 vpxorq zmm18,zmm18,zmm183390 vpmadd52luq zmm18,zmm16,zmm23391 vpxorq zmm19,zmm19,zmm193392 vpmadd52huq zmm19,zmm16,zmm23393 vpxorq zmm20,zmm20,zmm203394 vpmadd52luq zmm20,zmm17,zmm23395 vpxorq zmm21,zmm21,zmm213396 vpmadd52huq zmm21,zmm17,zmm23397 vpxorq zmm22,zmm22,zmm223398 vpmadd52luq zmm22,zmm3,zmm23399 vpxorq zmm23,zmm23,zmm233400 vpmadd52huq zmm23,zmm3,zmm23401 3402 vmovdqu64 zmm26,ZMMWORD[rsi]3403 vmovdqu64 zmm27,ZMMWORD[64+rsi]3404 lea rsi,[128+rsi]3405 vpmadd52luq zmm18,zmm3,zmm03406 vpmadd52huq zmm19,zmm3,zmm03407 vpmadd52luq zmm20,zmm4,zmm03408 vpmadd52huq zmm21,zmm4,zmm03409 vpmadd52luq zmm22,zmm5,zmm03410 vpmadd52huq zmm23,zmm5,zmm03411 3412 vpunpcklqdq zmm25,zmm26,zmm273413 vpunpckhqdq zmm27,zmm26,zmm273414 vpmadd52luq zmm18,zmm17,zmm13415 vpmadd52huq zmm19,zmm17,zmm13416 vpmadd52luq zmm20,zmm3,zmm13417 vpmadd52huq zmm21,zmm3,zmm13418 vpmadd52luq zmm22,zmm4,zmm13419 vpmadd52huq zmm23,zmm4,zmm13420 3421 3422 3423 vpsrlq zmm30,zmm18,443424 vpsllq zmm19,zmm19,83425 vpandq zmm0,zmm18,zmm283426 vpaddq zmm19,zmm19,zmm303427 3428 vpsrlq zmm26,zmm27,243429 vporq zmm26,zmm26,zmm313430 vpaddq zmm20,zmm20,zmm193431 3432 vpsrlq zmm30,zmm20,443433 vpsllq zmm21,zmm21,83434 vpandq zmm1,zmm20,zmm283435 vpaddq zmm21,zmm21,zmm303436 3437 vpandq zmm24,zmm25,zmm283438 vpsrlq zmm25,zmm25,443439 vpsllq zmm27,zmm27,203440 vpaddq zmm22,zmm22,zmm213441 3442 vpsrlq zmm30,zmm22,423443 vpsllq zmm23,zmm23,103444 vpandq zmm2,zmm22,zmm293445 vpaddq zmm23,zmm23,zmm303446 3447 vpaddq zmm2,zmm2,zmm263448 vpaddq zmm0,zmm0,zmm233449 vpsllq zmm23,zmm23,23450 3451 vpaddq zmm0,zmm0,zmm233452 vporq zmm25,zmm25,zmm273453 vpandq zmm25,zmm25,zmm283454 3455 vpsrlq zmm30,zmm0,443456 vpandq zmm0,zmm0,zmm283457 3458 vpaddq zmm1,zmm1,zmm303459 3460 sub rdx,83461 jnz NEAR $L$oop_vpmadd52_8x3462 3463 $L$tail_vpmadd52_8x:3464 3465 vpaddq zmm0,zmm0,zmm243466 vpaddq zmm1,zmm1,zmm253467 3468 vpxorq zmm18,zmm18,zmm183469 vpmadd52luq zmm18,zmm9,zmm23470 vpxorq zmm19,zmm19,zmm193471 vpmadd52huq zmm19,zmm9,zmm23472 vpxorq zmm20,zmm20,zmm203473 vpmadd52luq zmm20,zmm10,zmm23474 vpxorq zmm21,zmm21,zmm213475 vpmadd52huq zmm21,zmm10,zmm23476 vpxorq zmm22,zmm22,zmm223477 vpmadd52luq zmm22,zmm6,zmm23478 vpxorq zmm23,zmm23,zmm233479 vpmadd52huq zmm23,zmm6,zmm23480 3481 vpmadd52luq zmm18,zmm6,zmm03482 vpmadd52huq zmm19,zmm6,zmm03483 vpmadd52luq zmm20,zmm7,zmm03484 vpmadd52huq zmm21,zmm7,zmm03485 vpmadd52luq zmm22,zmm8,zmm03486 vpmadd52huq zmm23,zmm8,zmm03487 3488 vpmadd52luq zmm18,zmm10,zmm13489 vpmadd52huq zmm19,zmm10,zmm13490 vpmadd52luq zmm20,zmm6,zmm13491 vpmadd52huq zmm21,zmm6,zmm13492 vpmadd52luq zmm22,zmm7,zmm13493 vpmadd52huq zmm23,zmm7,zmm13494 3495 3496 3497 3498 mov eax,13499 kmovw k1,eax3500 vpsrldq zmm24,zmm18,83501 vpsrldq zmm0,zmm19,83502 vpsrldq zmm25,zmm20,83503 vpsrldq zmm1,zmm21,83504 vpaddq zmm18,zmm18,zmm243505 vpaddq zmm19,zmm19,zmm03506 vpsrldq zmm26,zmm22,83507 vpsrldq zmm2,zmm23,83508 vpaddq zmm20,zmm20,zmm253509 vpaddq zmm21,zmm21,zmm13510 vpermq zmm24,zmm18,0x23511 vpermq zmm0,zmm19,0x23512 vpaddq zmm22,zmm22,zmm263513 vpaddq zmm23,zmm23,zmm23514 3515 vpermq zmm25,zmm20,0x23516 vpermq zmm1,zmm21,0x23517 vpaddq zmm18,zmm18,zmm243518 vpaddq zmm19,zmm19,zmm03519 vpermq zmm26,zmm22,0x23520 vpermq zmm2,zmm23,0x23521 vpaddq zmm20,zmm20,zmm253522 vpaddq zmm21,zmm21,zmm13523 vextracti64x4 ymm24,zmm18,13524 vextracti64x4 ymm0,zmm19,13525 vpaddq zmm22,zmm22,zmm263526 vpaddq zmm23,zmm23,zmm23527 3528 vextracti64x4 ymm25,zmm20,13529 vextracti64x4 ymm1,zmm21,13530 vextracti64x4 ymm26,zmm22,13531 vextracti64x4 ymm2,zmm23,13532 vpaddq ymm18{k1}{z},ymm18,ymm243533 vpaddq ymm19{k1}{z},ymm19,ymm03534 vpaddq ymm20{k1}{z},ymm20,ymm253535 vpaddq ymm21{k1}{z},ymm21,ymm13536 vpaddq ymm22{k1}{z},ymm22,ymm263537 vpaddq ymm23{k1}{z},ymm23,ymm23538 3539 3540 3541 vpsrlq ymm30,ymm18,443542 vpsllq ymm19,ymm19,83543 vpandq ymm0,ymm18,ymm283544 vpaddq ymm19,ymm19,ymm303545 3546 vpaddq ymm20,ymm20,ymm193547 3548 vpsrlq ymm30,ymm20,443549 vpsllq ymm21,ymm21,83550 vpandq ymm1,ymm20,ymm283551 vpaddq ymm21,ymm21,ymm303552 3553 vpaddq ymm22,ymm22,ymm213554 3555 vpsrlq ymm30,ymm22,423556 vpsllq ymm23,ymm23,103557 vpandq ymm2,ymm22,ymm293558 vpaddq ymm23,ymm23,ymm303559 3560 vpaddq ymm0,ymm0,ymm233561 vpsllq ymm23,ymm23,23562 3563 vpaddq ymm0,ymm0,ymm233564 3565 vpsrlq ymm30,ymm0,443566 vpandq ymm0,ymm0,ymm283567 3568 vpaddq ymm1,ymm1,ymm303569 3570 3571 3572 vmovq QWORD[rdi],xmm03573 vmovq QWORD[8+rdi],xmm13574 vmovq QWORD[16+rdi],xmm23575 vzeroall3576 3577 $L$no_data_vpmadd52_8x:3578 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3579 mov rsi,QWORD[16+rsp]3580 DB 0F3h,0C3h ;repret3581 3582 $L$SEH_end_poly1305_blocks_vpmadd52_8x:3583 3584 ALIGN 323585 poly1305_emit_base2_44:3586 mov QWORD[8+rsp],rdi ;WIN64 prologue3587 mov QWORD[16+rsp],rsi3588 mov rax,rsp3589 $L$SEH_begin_poly1305_emit_base2_44:3590 mov rdi,rcx3591 mov rsi,rdx3592 mov rdx,r83593 3594 3595 3596 DB 243,15,30,2503597 mov r8,QWORD[rdi]3598 mov r9,QWORD[8+rdi]3599 mov r10,QWORD[16+rdi]3600 3601 mov rax,r93602 shr r9,203603 shl rax,443604 mov rcx,r103605 shr r10,403606 shl rcx,243607 3608 add r8,rax3609 adc r9,rcx3610 adc r10,03611 3612 mov rax,r83613 add r8,53614 mov rcx,r93615 adc r9,03616 adc r10,03617 shr r10,23618 cmovnz rax,r83619 cmovnz rcx,r93620 3621 add rax,QWORD[rdx]3622 adc rcx,QWORD[8+rdx]3623 mov QWORD[rsi],rax3624 mov QWORD[8+rsi],rcx3625 3626 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3627 mov rsi,QWORD[16+rsp]3628 DB 0F3h,0C3h ;repret3629 3630 $L$SEH_end_poly1305_emit_base2_44:3631 ALIGN 643632 $L$const:3633 $L$mask24:3634 DD 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,03635 $L$129:3636 DD 16777216,0,16777216,0,16777216,0,16777216,03637 $L$mask26:3638 DD 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,03639 $L$permd_avx2:3640 DD 2,2,2,3,2,0,2,13641 $L$permd_avx512:3642 DD 0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,73643 3644 $L$2_44_inp_permd:3645 DD 0,1,1,2,2,3,7,73646 $L$2_44_inp_shift:3647 DQ 0,12,24,643648 $L$2_44_mask:3649 DQ 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff3650 $L$2_44_shift_rgt:3651 DQ 44,44,42,643652 $L$2_44_shift_lft:3653 DQ 8,8,10,643654 3655 ALIGN 643656 $L$x_mask44:3657 DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff3658 DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff3659 $L$x_mask42:3660 DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff3661 DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff3662 214 DB 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 3663 215 DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 … … 3910 462 DD $L$SEH_end_poly1305_emit wrt ..imagebase 3911 463 DD $L$SEH_info_poly1305_emit wrt ..imagebase 3912 DD $L$SEH_begin_poly1305_blocks_avx wrt ..imagebase3913 DD $L$base2_64_avx wrt ..imagebase3914 DD $L$SEH_info_poly1305_blocks_avx_1 wrt ..imagebase3915 3916 DD $L$base2_64_avx wrt ..imagebase3917 DD $L$even_avx wrt ..imagebase3918 DD $L$SEH_info_poly1305_blocks_avx_2 wrt ..imagebase3919 3920 DD $L$even_avx wrt ..imagebase3921 DD $L$SEH_end_poly1305_blocks_avx wrt ..imagebase3922 DD $L$SEH_info_poly1305_blocks_avx_3 wrt ..imagebase3923 3924 DD $L$SEH_begin_poly1305_emit_avx wrt ..imagebase3925 DD $L$SEH_end_poly1305_emit_avx wrt ..imagebase3926 DD $L$SEH_info_poly1305_emit_avx wrt ..imagebase3927 DD $L$SEH_begin_poly1305_blocks_avx2 wrt ..imagebase3928 DD $L$base2_64_avx2 wrt ..imagebase3929 DD $L$SEH_info_poly1305_blocks_avx2_1 wrt ..imagebase3930 3931 DD $L$base2_64_avx2 wrt ..imagebase3932 DD $L$even_avx2 wrt ..imagebase3933 DD $L$SEH_info_poly1305_blocks_avx2_2 wrt ..imagebase3934 3935 DD $L$even_avx2 wrt ..imagebase3936 DD $L$SEH_end_poly1305_blocks_avx2 wrt ..imagebase3937 DD $L$SEH_info_poly1305_blocks_avx2_3 wrt ..imagebase3938 DD $L$SEH_begin_poly1305_blocks_avx512 wrt ..imagebase3939 DD $L$SEH_end_poly1305_blocks_avx512 wrt ..imagebase3940 DD $L$SEH_info_poly1305_blocks_avx512 wrt ..imagebase3941 464 section .xdata rdata align=8 3942 465 ALIGN 8 … … 3955 478 DD se_handler wrt ..imagebase 3956 479 DD $L$SEH_begin_poly1305_emit wrt ..imagebase,$L$SEH_begin_poly1305_emit wrt ..imagebase 3957 $L$SEH_info_poly1305_blocks_avx_1:3958 DB 9,0,0,03959 DD se_handler wrt ..imagebase3960 DD $L$blocks_avx_body wrt ..imagebase,$L$blocks_avx_epilogue wrt ..imagebase3961 3962 $L$SEH_info_poly1305_blocks_avx_2:3963 DB 9,0,0,03964 DD se_handler wrt ..imagebase3965 DD $L$base2_64_avx_body wrt ..imagebase,$L$base2_64_avx_epilogue wrt ..imagebase3966 3967 $L$SEH_info_poly1305_blocks_avx_3:3968 DB 9,0,0,03969 DD avx_handler wrt ..imagebase3970 DD $L$do_avx_body wrt ..imagebase,$L$do_avx_epilogue wrt ..imagebase3971 3972 $L$SEH_info_poly1305_emit_avx:3973 DB 9,0,0,03974 DD se_handler wrt ..imagebase3975 DD $L$SEH_begin_poly1305_emit_avx wrt ..imagebase,$L$SEH_begin_poly1305_emit_avx wrt ..imagebase3976 $L$SEH_info_poly1305_blocks_avx2_1:3977 DB 9,0,0,03978 DD se_handler wrt ..imagebase3979 DD $L$blocks_avx2_body wrt ..imagebase,$L$blocks_avx2_epilogue wrt ..imagebase3980 3981 $L$SEH_info_poly1305_blocks_avx2_2:3982 DB 9,0,0,03983 DD se_handler wrt ..imagebase3984 DD $L$base2_64_avx2_body wrt ..imagebase,$L$base2_64_avx2_epilogue wrt ..imagebase3985 3986 $L$SEH_info_poly1305_blocks_avx2_3:3987 DB 9,0,0,03988 DD avx_handler wrt ..imagebase3989 DD $L$do_avx2_body wrt ..imagebase,$L$do_avx2_epilogue wrt ..imagebase3990 $L$SEH_info_poly1305_blocks_avx512:3991 DB 9,0,0,03992 DD avx_handler wrt ..imagebase3993 DD $L$do_avx512_body wrt ..imagebase,$L$do_avx512_epilogue wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/rsaz-avx2.S
r95219 r95221 6 6 7 7 8 global rsaz_ 1024_sqr_avx28 global rsaz_avx2_eligible 9 9 10 ALIGN 64 11 rsaz_1024_sqr_avx2: 12 mov QWORD[8+rsp],rdi ;WIN64 prologue 13 mov QWORD[16+rsp],rsi 14 mov rax,rsp 15 $L$SEH_begin_rsaz_1024_sqr_avx2: 16 mov rdi,rcx 17 mov rsi,rdx 18 mov rdx,r8 19 mov rcx,r9 20 mov r8,QWORD[40+rsp] 21 22 23 24 lea rax,[rsp] 25 26 push rbx 27 28 push rbp 29 30 push r12 31 32 push r13 33 34 push r14 35 36 push r15 37 38 vzeroupper 39 lea rsp,[((-168))+rsp] 40 vmovaps XMMWORD[(-216)+rax],xmm6 41 vmovaps XMMWORD[(-200)+rax],xmm7 42 vmovaps XMMWORD[(-184)+rax],xmm8 43 vmovaps XMMWORD[(-168)+rax],xmm9 44 vmovaps XMMWORD[(-152)+rax],xmm10 45 vmovaps XMMWORD[(-136)+rax],xmm11 46 vmovaps XMMWORD[(-120)+rax],xmm12 47 vmovaps XMMWORD[(-104)+rax],xmm13 48 vmovaps XMMWORD[(-88)+rax],xmm14 49 vmovaps XMMWORD[(-72)+rax],xmm15 50 $L$sqr_1024_body: 51 mov rbp,rax 52 53 mov r13,rdx 54 sub rsp,832 55 mov r15,r13 56 sub rdi,-128 57 sub rsi,-128 58 sub r13,-128 59 60 and r15,4095 61 add r15,32*10 62 shr r15,12 63 vpxor ymm9,ymm9,ymm9 64 jz NEAR $L$sqr_1024_no_n_copy 65 66 67 68 69 70 sub rsp,32*10 71 vmovdqu ymm0,YMMWORD[((0-128))+r13] 72 and rsp,-2048 73 vmovdqu ymm1,YMMWORD[((32-128))+r13] 74 vmovdqu ymm2,YMMWORD[((64-128))+r13] 75 vmovdqu ymm3,YMMWORD[((96-128))+r13] 76 vmovdqu ymm4,YMMWORD[((128-128))+r13] 77 vmovdqu ymm5,YMMWORD[((160-128))+r13] 78 vmovdqu ymm6,YMMWORD[((192-128))+r13] 79 vmovdqu ymm7,YMMWORD[((224-128))+r13] 80 vmovdqu ymm8,YMMWORD[((256-128))+r13] 81 lea r13,[((832+128))+rsp] 82 vmovdqu YMMWORD[(0-128)+r13],ymm0 83 vmovdqu YMMWORD[(32-128)+r13],ymm1 84 vmovdqu YMMWORD[(64-128)+r13],ymm2 85 vmovdqu YMMWORD[(96-128)+r13],ymm3 86 vmovdqu YMMWORD[(128-128)+r13],ymm4 87 vmovdqu YMMWORD[(160-128)+r13],ymm5 88 vmovdqu YMMWORD[(192-128)+r13],ymm6 89 vmovdqu YMMWORD[(224-128)+r13],ymm7 90 vmovdqu YMMWORD[(256-128)+r13],ymm8 91 vmovdqu YMMWORD[(288-128)+r13],ymm9 92 93 $L$sqr_1024_no_n_copy: 94 and rsp,-1024 95 96 vmovdqu ymm1,YMMWORD[((32-128))+rsi] 97 vmovdqu ymm2,YMMWORD[((64-128))+rsi] 98 vmovdqu ymm3,YMMWORD[((96-128))+rsi] 99 vmovdqu ymm4,YMMWORD[((128-128))+rsi] 100 vmovdqu ymm5,YMMWORD[((160-128))+rsi] 101 vmovdqu ymm6,YMMWORD[((192-128))+rsi] 102 vmovdqu ymm7,YMMWORD[((224-128))+rsi] 103 vmovdqu ymm8,YMMWORD[((256-128))+rsi] 104 105 lea rbx,[192+rsp] 106 vmovdqu ymm15,YMMWORD[$L$and_mask] 107 jmp NEAR $L$OOP_GRANDE_SQR_1024 108 109 ALIGN 32 110 $L$OOP_GRANDE_SQR_1024: 111 lea r9,[((576+128))+rsp] 112 lea r12,[448+rsp] 113 114 115 116 117 vpaddq ymm1,ymm1,ymm1 118 vpbroadcastq ymm10,QWORD[((0-128))+rsi] 119 vpaddq ymm2,ymm2,ymm2 120 vmovdqa YMMWORD[(0-128)+r9],ymm1 121 vpaddq ymm3,ymm3,ymm3 122 vmovdqa YMMWORD[(32-128)+r9],ymm2 123 vpaddq ymm4,ymm4,ymm4 124 vmovdqa YMMWORD[(64-128)+r9],ymm3 125 vpaddq ymm5,ymm5,ymm5 126 vmovdqa YMMWORD[(96-128)+r9],ymm4 127 vpaddq ymm6,ymm6,ymm6 128 vmovdqa YMMWORD[(128-128)+r9],ymm5 129 vpaddq ymm7,ymm7,ymm7 130 vmovdqa YMMWORD[(160-128)+r9],ymm6 131 vpaddq ymm8,ymm8,ymm8 132 vmovdqa YMMWORD[(192-128)+r9],ymm7 133 vpxor ymm9,ymm9,ymm9 134 vmovdqa YMMWORD[(224-128)+r9],ymm8 135 136 vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] 137 vpbroadcastq ymm11,QWORD[((32-128))+rsi] 138 vmovdqu YMMWORD[(288-192)+rbx],ymm9 139 vpmuludq ymm1,ymm1,ymm10 140 vmovdqu YMMWORD[(320-448)+r12],ymm9 141 vpmuludq ymm2,ymm2,ymm10 142 vmovdqu YMMWORD[(352-448)+r12],ymm9 143 vpmuludq ymm3,ymm3,ymm10 144 vmovdqu YMMWORD[(384-448)+r12],ymm9 145 vpmuludq ymm4,ymm4,ymm10 146 vmovdqu YMMWORD[(416-448)+r12],ymm9 147 vpmuludq ymm5,ymm5,ymm10 148 vmovdqu YMMWORD[(448-448)+r12],ymm9 149 vpmuludq ymm6,ymm6,ymm10 150 vmovdqu YMMWORD[(480-448)+r12],ymm9 151 vpmuludq ymm7,ymm7,ymm10 152 vmovdqu YMMWORD[(512-448)+r12],ymm9 153 vpmuludq ymm8,ymm8,ymm10 154 vpbroadcastq ymm10,QWORD[((64-128))+rsi] 155 vmovdqu YMMWORD[(544-448)+r12],ymm9 156 157 mov r15,rsi 158 mov r14d,4 159 jmp NEAR $L$sqr_entry_1024 160 ALIGN 32 161 $L$OOP_SQR_1024: 162 vpbroadcastq ymm11,QWORD[((32-128))+r15] 163 vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] 164 vpaddq ymm0,ymm0,YMMWORD[((0-192))+rbx] 165 vpmuludq ymm1,ymm10,YMMWORD[((0-128))+r9] 166 vpaddq ymm1,ymm1,YMMWORD[((32-192))+rbx] 167 vpmuludq ymm2,ymm10,YMMWORD[((32-128))+r9] 168 vpaddq ymm2,ymm2,YMMWORD[((64-192))+rbx] 169 vpmuludq ymm3,ymm10,YMMWORD[((64-128))+r9] 170 vpaddq ymm3,ymm3,YMMWORD[((96-192))+rbx] 171 vpmuludq ymm4,ymm10,YMMWORD[((96-128))+r9] 172 vpaddq ymm4,ymm4,YMMWORD[((128-192))+rbx] 173 vpmuludq ymm5,ymm10,YMMWORD[((128-128))+r9] 174 vpaddq ymm5,ymm5,YMMWORD[((160-192))+rbx] 175 vpmuludq ymm6,ymm10,YMMWORD[((160-128))+r9] 176 vpaddq ymm6,ymm6,YMMWORD[((192-192))+rbx] 177 vpmuludq ymm7,ymm10,YMMWORD[((192-128))+r9] 178 vpaddq ymm7,ymm7,YMMWORD[((224-192))+rbx] 179 vpmuludq ymm8,ymm10,YMMWORD[((224-128))+r9] 180 vpbroadcastq ymm10,QWORD[((64-128))+r15] 181 vpaddq ymm8,ymm8,YMMWORD[((256-192))+rbx] 182 $L$sqr_entry_1024: 183 vmovdqu YMMWORD[(0-192)+rbx],ymm0 184 vmovdqu YMMWORD[(32-192)+rbx],ymm1 185 186 vpmuludq ymm12,ymm11,YMMWORD[((32-128))+rsi] 187 vpaddq ymm2,ymm2,ymm12 188 vpmuludq ymm14,ymm11,YMMWORD[((32-128))+r9] 189 vpaddq ymm3,ymm3,ymm14 190 vpmuludq ymm13,ymm11,YMMWORD[((64-128))+r9] 191 vpaddq ymm4,ymm4,ymm13 192 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+r9] 193 vpaddq ymm5,ymm5,ymm12 194 vpmuludq ymm14,ymm11,YMMWORD[((128-128))+r9] 195 vpaddq ymm6,ymm6,ymm14 196 vpmuludq ymm13,ymm11,YMMWORD[((160-128))+r9] 197 vpaddq ymm7,ymm7,ymm13 198 vpmuludq ymm12,ymm11,YMMWORD[((192-128))+r9] 199 vpaddq ymm8,ymm8,ymm12 200 vpmuludq ymm0,ymm11,YMMWORD[((224-128))+r9] 201 vpbroadcastq ymm11,QWORD[((96-128))+r15] 202 vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] 203 204 vmovdqu YMMWORD[(64-192)+rbx],ymm2 205 vmovdqu YMMWORD[(96-192)+rbx],ymm3 206 207 vpmuludq ymm13,ymm10,YMMWORD[((64-128))+rsi] 208 vpaddq ymm4,ymm4,ymm13 209 vpmuludq ymm12,ymm10,YMMWORD[((64-128))+r9] 210 vpaddq ymm5,ymm5,ymm12 211 vpmuludq ymm14,ymm10,YMMWORD[((96-128))+r9] 212 vpaddq ymm6,ymm6,ymm14 213 vpmuludq ymm13,ymm10,YMMWORD[((128-128))+r9] 214 vpaddq ymm7,ymm7,ymm13 215 vpmuludq ymm12,ymm10,YMMWORD[((160-128))+r9] 216 vpaddq ymm8,ymm8,ymm12 217 vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] 218 vpaddq ymm0,ymm0,ymm14 219 vpmuludq ymm1,ymm10,YMMWORD[((224-128))+r9] 220 vpbroadcastq ymm10,QWORD[((128-128))+r15] 221 vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] 222 223 vmovdqu YMMWORD[(128-192)+rbx],ymm4 224 vmovdqu YMMWORD[(160-192)+rbx],ymm5 225 226 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rsi] 227 vpaddq ymm6,ymm6,ymm12 228 vpmuludq ymm14,ymm11,YMMWORD[((96-128))+r9] 229 vpaddq ymm7,ymm7,ymm14 230 vpmuludq ymm13,ymm11,YMMWORD[((128-128))+r9] 231 vpaddq ymm8,ymm8,ymm13 232 vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] 233 vpaddq ymm0,ymm0,ymm12 234 vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] 235 vpaddq ymm1,ymm1,ymm14 236 vpmuludq ymm2,ymm11,YMMWORD[((224-128))+r9] 237 vpbroadcastq ymm11,QWORD[((160-128))+r15] 238 vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] 239 240 vmovdqu YMMWORD[(192-192)+rbx],ymm6 241 vmovdqu YMMWORD[(224-192)+rbx],ymm7 242 243 vpmuludq ymm12,ymm10,YMMWORD[((128-128))+rsi] 244 vpaddq ymm8,ymm8,ymm12 245 vpmuludq ymm14,ymm10,YMMWORD[((128-128))+r9] 246 vpaddq ymm0,ymm0,ymm14 247 vpmuludq ymm13,ymm10,YMMWORD[((160-128))+r9] 248 vpaddq ymm1,ymm1,ymm13 249 vpmuludq ymm12,ymm10,YMMWORD[((192-128))+r9] 250 vpaddq ymm2,ymm2,ymm12 251 vpmuludq ymm3,ymm10,YMMWORD[((224-128))+r9] 252 vpbroadcastq ymm10,QWORD[((192-128))+r15] 253 vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] 254 255 vmovdqu YMMWORD[(256-192)+rbx],ymm8 256 vmovdqu YMMWORD[(288-192)+rbx],ymm0 257 lea rbx,[8+rbx] 258 259 vpmuludq ymm13,ymm11,YMMWORD[((160-128))+rsi] 260 vpaddq ymm1,ymm1,ymm13 261 vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] 262 vpaddq ymm2,ymm2,ymm12 263 vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] 264 vpaddq ymm3,ymm3,ymm14 265 vpmuludq ymm4,ymm11,YMMWORD[((224-128))+r9] 266 vpbroadcastq ymm11,QWORD[((224-128))+r15] 267 vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] 268 269 vmovdqu YMMWORD[(320-448)+r12],ymm1 270 vmovdqu YMMWORD[(352-448)+r12],ymm2 271 272 vpmuludq ymm12,ymm10,YMMWORD[((192-128))+rsi] 273 vpaddq ymm3,ymm3,ymm12 274 vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] 275 vpbroadcastq ymm0,QWORD[((256-128))+r15] 276 vpaddq ymm4,ymm4,ymm14 277 vpmuludq ymm5,ymm10,YMMWORD[((224-128))+r9] 278 vpbroadcastq ymm10,QWORD[((0+8-128))+r15] 279 vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] 280 281 vmovdqu YMMWORD[(384-448)+r12],ymm3 282 vmovdqu YMMWORD[(416-448)+r12],ymm4 283 lea r15,[8+r15] 284 285 vpmuludq ymm12,ymm11,YMMWORD[((224-128))+rsi] 286 vpaddq ymm5,ymm5,ymm12 287 vpmuludq ymm6,ymm11,YMMWORD[((224-128))+r9] 288 vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] 289 290 vpmuludq ymm7,ymm0,YMMWORD[((256-128))+rsi] 291 vmovdqu YMMWORD[(448-448)+r12],ymm5 292 vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] 293 vmovdqu YMMWORD[(480-448)+r12],ymm6 294 vmovdqu YMMWORD[(512-448)+r12],ymm7 295 lea r12,[8+r12] 296 297 dec r14d 298 jnz NEAR $L$OOP_SQR_1024 299 300 vmovdqu ymm8,YMMWORD[256+rsp] 301 vmovdqu ymm1,YMMWORD[288+rsp] 302 vmovdqu ymm2,YMMWORD[320+rsp] 303 lea rbx,[192+rsp] 304 305 vpsrlq ymm14,ymm8,29 306 vpand ymm8,ymm8,ymm15 307 vpsrlq ymm11,ymm1,29 308 vpand ymm1,ymm1,ymm15 309 310 vpermq ymm14,ymm14,0x93 311 vpxor ymm9,ymm9,ymm9 312 vpermq ymm11,ymm11,0x93 313 314 vpblendd ymm10,ymm14,ymm9,3 315 vpblendd ymm14,ymm11,ymm14,3 316 vpaddq ymm8,ymm8,ymm10 317 vpblendd ymm11,ymm9,ymm11,3 318 vpaddq ymm1,ymm1,ymm14 319 vpaddq ymm2,ymm2,ymm11 320 vmovdqu YMMWORD[(288-192)+rbx],ymm1 321 vmovdqu YMMWORD[(320-192)+rbx],ymm2 322 323 mov rax,QWORD[rsp] 324 mov r10,QWORD[8+rsp] 325 mov r11,QWORD[16+rsp] 326 mov r12,QWORD[24+rsp] 327 vmovdqu ymm1,YMMWORD[32+rsp] 328 vmovdqu ymm2,YMMWORD[((64-192))+rbx] 329 vmovdqu ymm3,YMMWORD[((96-192))+rbx] 330 vmovdqu ymm4,YMMWORD[((128-192))+rbx] 331 vmovdqu ymm5,YMMWORD[((160-192))+rbx] 332 vmovdqu ymm6,YMMWORD[((192-192))+rbx] 333 vmovdqu ymm7,YMMWORD[((224-192))+rbx] 334 335 mov r9,rax 336 imul eax,ecx 337 and eax,0x1fffffff 338 vmovd xmm12,eax 339 340 mov rdx,rax 341 imul rax,QWORD[((-128))+r13] 342 vpbroadcastq ymm12,xmm12 343 add r9,rax 344 mov rax,rdx 345 imul rax,QWORD[((8-128))+r13] 346 shr r9,29 347 add r10,rax 348 mov rax,rdx 349 imul rax,QWORD[((16-128))+r13] 350 add r10,r9 351 add r11,rax 352 imul rdx,QWORD[((24-128))+r13] 353 add r12,rdx 354 355 mov rax,r10 356 imul eax,ecx 357 and eax,0x1fffffff 358 359 mov r14d,9 360 jmp NEAR $L$OOP_REDUCE_1024 361 362 ALIGN 32 363 $L$OOP_REDUCE_1024: 364 vmovd xmm13,eax 365 vpbroadcastq ymm13,xmm13 366 367 vpmuludq ymm10,ymm12,YMMWORD[((32-128))+r13] 368 mov rdx,rax 369 imul rax,QWORD[((-128))+r13] 370 vpaddq ymm1,ymm1,ymm10 371 add r10,rax 372 vpmuludq ymm14,ymm12,YMMWORD[((64-128))+r13] 373 mov rax,rdx 374 imul rax,QWORD[((8-128))+r13] 375 vpaddq ymm2,ymm2,ymm14 376 vpmuludq ymm11,ymm12,YMMWORD[((96-128))+r13] 377 DB 0x67 378 add r11,rax 379 DB 0x67 380 mov rax,rdx 381 imul rax,QWORD[((16-128))+r13] 382 shr r10,29 383 vpaddq ymm3,ymm3,ymm11 384 vpmuludq ymm10,ymm12,YMMWORD[((128-128))+r13] 385 add r12,rax 386 add r11,r10 387 vpaddq ymm4,ymm4,ymm10 388 vpmuludq ymm14,ymm12,YMMWORD[((160-128))+r13] 389 mov rax,r11 390 imul eax,ecx 391 vpaddq ymm5,ymm5,ymm14 392 vpmuludq ymm11,ymm12,YMMWORD[((192-128))+r13] 393 and eax,0x1fffffff 394 vpaddq ymm6,ymm6,ymm11 395 vpmuludq ymm10,ymm12,YMMWORD[((224-128))+r13] 396 vpaddq ymm7,ymm7,ymm10 397 vpmuludq ymm14,ymm12,YMMWORD[((256-128))+r13] 398 vmovd xmm12,eax 399 400 vpaddq ymm8,ymm8,ymm14 401 402 vpbroadcastq ymm12,xmm12 403 404 vpmuludq ymm11,ymm13,YMMWORD[((32-8-128))+r13] 405 vmovdqu ymm14,YMMWORD[((96-8-128))+r13] 406 mov rdx,rax 407 imul rax,QWORD[((-128))+r13] 408 vpaddq ymm1,ymm1,ymm11 409 vpmuludq ymm10,ymm13,YMMWORD[((64-8-128))+r13] 410 vmovdqu ymm11,YMMWORD[((128-8-128))+r13] 411 add r11,rax 412 mov rax,rdx 413 imul rax,QWORD[((8-128))+r13] 414 vpaddq ymm2,ymm2,ymm10 415 add rax,r12 416 shr r11,29 417 vpmuludq ymm14,ymm14,ymm13 418 vmovdqu ymm10,YMMWORD[((160-8-128))+r13] 419 add rax,r11 420 vpaddq ymm3,ymm3,ymm14 421 vpmuludq ymm11,ymm11,ymm13 422 vmovdqu ymm14,YMMWORD[((192-8-128))+r13] 423 DB 0x67 424 mov r12,rax 425 imul eax,ecx 426 vpaddq ymm4,ymm4,ymm11 427 vpmuludq ymm10,ymm10,ymm13 428 DB 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 429 and eax,0x1fffffff 430 vpaddq ymm5,ymm5,ymm10 431 vpmuludq ymm14,ymm14,ymm13 432 vmovdqu ymm10,YMMWORD[((256-8-128))+r13] 433 vpaddq ymm6,ymm6,ymm14 434 vpmuludq ymm11,ymm11,ymm13 435 vmovdqu ymm9,YMMWORD[((288-8-128))+r13] 436 vmovd xmm0,eax 437 imul rax,QWORD[((-128))+r13] 438 vpaddq ymm7,ymm7,ymm11 439 vpmuludq ymm10,ymm10,ymm13 440 vmovdqu ymm14,YMMWORD[((32-16-128))+r13] 441 vpbroadcastq ymm0,xmm0 442 vpaddq ymm8,ymm8,ymm10 443 vpmuludq ymm9,ymm9,ymm13 444 vmovdqu ymm11,YMMWORD[((64-16-128))+r13] 445 add r12,rax 446 447 vmovdqu ymm13,YMMWORD[((32-24-128))+r13] 448 vpmuludq ymm14,ymm14,ymm12 449 vmovdqu ymm10,YMMWORD[((96-16-128))+r13] 450 vpaddq ymm1,ymm1,ymm14 451 vpmuludq ymm13,ymm13,ymm0 452 vpmuludq ymm11,ymm11,ymm12 453 DB 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff 454 vpaddq ymm13,ymm13,ymm1 455 vpaddq ymm2,ymm2,ymm11 456 vpmuludq ymm10,ymm10,ymm12 457 vmovdqu ymm11,YMMWORD[((160-16-128))+r13] 458 DB 0x67 459 vmovq rax,xmm13 460 vmovdqu YMMWORD[rsp],ymm13 461 vpaddq ymm3,ymm3,ymm10 462 vpmuludq ymm14,ymm14,ymm12 463 vmovdqu ymm10,YMMWORD[((192-16-128))+r13] 464 vpaddq ymm4,ymm4,ymm14 465 vpmuludq ymm11,ymm11,ymm12 466 vmovdqu ymm14,YMMWORD[((224-16-128))+r13] 467 vpaddq ymm5,ymm5,ymm11 468 vpmuludq ymm10,ymm10,ymm12 469 vmovdqu ymm11,YMMWORD[((256-16-128))+r13] 470 vpaddq ymm6,ymm6,ymm10 471 vpmuludq ymm14,ymm14,ymm12 472 shr r12,29 473 vmovdqu ymm10,YMMWORD[((288-16-128))+r13] 474 add rax,r12 475 vpaddq ymm7,ymm7,ymm14 476 vpmuludq ymm11,ymm11,ymm12 477 478 mov r9,rax 479 imul eax,ecx 480 vpaddq ymm8,ymm8,ymm11 481 vpmuludq ymm10,ymm10,ymm12 482 and eax,0x1fffffff 483 vmovd xmm12,eax 484 vmovdqu ymm11,YMMWORD[((96-24-128))+r13] 485 DB 0x67 486 vpaddq ymm9,ymm9,ymm10 487 vpbroadcastq ymm12,xmm12 488 489 vpmuludq ymm14,ymm0,YMMWORD[((64-24-128))+r13] 490 vmovdqu ymm10,YMMWORD[((128-24-128))+r13] 491 mov rdx,rax 492 imul rax,QWORD[((-128))+r13] 493 mov r10,QWORD[8+rsp] 494 vpaddq ymm1,ymm2,ymm14 495 vpmuludq ymm11,ymm11,ymm0 496 vmovdqu ymm14,YMMWORD[((160-24-128))+r13] 497 add r9,rax 498 mov rax,rdx 499 imul rax,QWORD[((8-128))+r13] 500 DB 0x67 501 shr r9,29 502 mov r11,QWORD[16+rsp] 503 vpaddq ymm2,ymm3,ymm11 504 vpmuludq ymm10,ymm10,ymm0 505 vmovdqu ymm11,YMMWORD[((192-24-128))+r13] 506 add r10,rax 507 mov rax,rdx 508 imul rax,QWORD[((16-128))+r13] 509 vpaddq ymm3,ymm4,ymm10 510 vpmuludq ymm14,ymm14,ymm0 511 vmovdqu ymm10,YMMWORD[((224-24-128))+r13] 512 imul rdx,QWORD[((24-128))+r13] 513 add r11,rax 514 lea rax,[r10*1+r9] 515 vpaddq ymm4,ymm5,ymm14 516 vpmuludq ymm11,ymm11,ymm0 517 vmovdqu ymm14,YMMWORD[((256-24-128))+r13] 518 mov r10,rax 519 imul eax,ecx 520 vpmuludq ymm10,ymm10,ymm0 521 vpaddq ymm5,ymm6,ymm11 522 vmovdqu ymm11,YMMWORD[((288-24-128))+r13] 523 and eax,0x1fffffff 524 vpaddq ymm6,ymm7,ymm10 525 vpmuludq ymm14,ymm14,ymm0 526 add rdx,QWORD[24+rsp] 527 vpaddq ymm7,ymm8,ymm14 528 vpmuludq ymm11,ymm11,ymm0 529 vpaddq ymm8,ymm9,ymm11 530 vmovq xmm9,r12 531 mov r12,rdx 532 533 dec r14d 534 jnz NEAR $L$OOP_REDUCE_1024 535 lea r12,[448+rsp] 536 vpaddq ymm0,ymm13,ymm9 537 vpxor ymm9,ymm9,ymm9 538 539 vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] 540 vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] 541 vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] 542 vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] 543 vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] 544 vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] 545 vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] 546 vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] 547 vpaddq ymm8,ymm8,YMMWORD[((544-448))+r12] 548 549 vpsrlq ymm14,ymm0,29 550 vpand ymm0,ymm0,ymm15 551 vpsrlq ymm11,ymm1,29 552 vpand ymm1,ymm1,ymm15 553 vpsrlq ymm12,ymm2,29 554 vpermq ymm14,ymm14,0x93 555 vpand ymm2,ymm2,ymm15 556 vpsrlq ymm13,ymm3,29 557 vpermq ymm11,ymm11,0x93 558 vpand ymm3,ymm3,ymm15 559 vpermq ymm12,ymm12,0x93 560 561 vpblendd ymm10,ymm14,ymm9,3 562 vpermq ymm13,ymm13,0x93 563 vpblendd ymm14,ymm11,ymm14,3 564 vpaddq ymm0,ymm0,ymm10 565 vpblendd ymm11,ymm12,ymm11,3 566 vpaddq ymm1,ymm1,ymm14 567 vpblendd ymm12,ymm13,ymm12,3 568 vpaddq ymm2,ymm2,ymm11 569 vpblendd ymm13,ymm9,ymm13,3 570 vpaddq ymm3,ymm3,ymm12 571 vpaddq ymm4,ymm4,ymm13 572 573 vpsrlq ymm14,ymm0,29 574 vpand ymm0,ymm0,ymm15 575 vpsrlq ymm11,ymm1,29 576 vpand ymm1,ymm1,ymm15 577 vpsrlq ymm12,ymm2,29 578 vpermq ymm14,ymm14,0x93 579 vpand ymm2,ymm2,ymm15 580 vpsrlq ymm13,ymm3,29 581 vpermq ymm11,ymm11,0x93 582 vpand ymm3,ymm3,ymm15 583 vpermq ymm12,ymm12,0x93 584 585 vpblendd ymm10,ymm14,ymm9,3 586 vpermq ymm13,ymm13,0x93 587 vpblendd ymm14,ymm11,ymm14,3 588 vpaddq ymm0,ymm0,ymm10 589 vpblendd ymm11,ymm12,ymm11,3 590 vpaddq ymm1,ymm1,ymm14 591 vmovdqu YMMWORD[(0-128)+rdi],ymm0 592 vpblendd ymm12,ymm13,ymm12,3 593 vpaddq ymm2,ymm2,ymm11 594 vmovdqu YMMWORD[(32-128)+rdi],ymm1 595 vpblendd ymm13,ymm9,ymm13,3 596 vpaddq ymm3,ymm3,ymm12 597 vmovdqu YMMWORD[(64-128)+rdi],ymm2 598 vpaddq ymm4,ymm4,ymm13 599 vmovdqu YMMWORD[(96-128)+rdi],ymm3 600 vpsrlq ymm14,ymm4,29 601 vpand ymm4,ymm4,ymm15 602 vpsrlq ymm11,ymm5,29 603 vpand ymm5,ymm5,ymm15 604 vpsrlq ymm12,ymm6,29 605 vpermq ymm14,ymm14,0x93 606 vpand ymm6,ymm6,ymm15 607 vpsrlq ymm13,ymm7,29 608 vpermq ymm11,ymm11,0x93 609 vpand ymm7,ymm7,ymm15 610 vpsrlq ymm0,ymm8,29 611 vpermq ymm12,ymm12,0x93 612 vpand ymm8,ymm8,ymm15 613 vpermq ymm13,ymm13,0x93 614 615 vpblendd ymm10,ymm14,ymm9,3 616 vpermq ymm0,ymm0,0x93 617 vpblendd ymm14,ymm11,ymm14,3 618 vpaddq ymm4,ymm4,ymm10 619 vpblendd ymm11,ymm12,ymm11,3 620 vpaddq ymm5,ymm5,ymm14 621 vpblendd ymm12,ymm13,ymm12,3 622 vpaddq ymm6,ymm6,ymm11 623 vpblendd ymm13,ymm0,ymm13,3 624 vpaddq ymm7,ymm7,ymm12 625 vpaddq ymm8,ymm8,ymm13 626 627 vpsrlq ymm14,ymm4,29 628 vpand ymm4,ymm4,ymm15 629 vpsrlq ymm11,ymm5,29 630 vpand ymm5,ymm5,ymm15 631 vpsrlq ymm12,ymm6,29 632 vpermq ymm14,ymm14,0x93 633 vpand ymm6,ymm6,ymm15 634 vpsrlq ymm13,ymm7,29 635 vpermq ymm11,ymm11,0x93 636 vpand ymm7,ymm7,ymm15 637 vpsrlq ymm0,ymm8,29 638 vpermq ymm12,ymm12,0x93 639 vpand ymm8,ymm8,ymm15 640 vpermq ymm13,ymm13,0x93 641 642 vpblendd ymm10,ymm14,ymm9,3 643 vpermq ymm0,ymm0,0x93 644 vpblendd ymm14,ymm11,ymm14,3 645 vpaddq ymm4,ymm4,ymm10 646 vpblendd ymm11,ymm12,ymm11,3 647 vpaddq ymm5,ymm5,ymm14 648 vmovdqu YMMWORD[(128-128)+rdi],ymm4 649 vpblendd ymm12,ymm13,ymm12,3 650 vpaddq ymm6,ymm6,ymm11 651 vmovdqu YMMWORD[(160-128)+rdi],ymm5 652 vpblendd ymm13,ymm0,ymm13,3 653 vpaddq ymm7,ymm7,ymm12 654 vmovdqu YMMWORD[(192-128)+rdi],ymm6 655 vpaddq ymm8,ymm8,ymm13 656 vmovdqu YMMWORD[(224-128)+rdi],ymm7 657 vmovdqu YMMWORD[(256-128)+rdi],ymm8 658 659 mov rsi,rdi 660 dec r8d 661 jne NEAR $L$OOP_GRANDE_SQR_1024 662 663 vzeroall 664 mov rax,rbp 665 666 $L$sqr_1024_in_tail: 667 movaps xmm6,XMMWORD[((-216))+rax] 668 movaps xmm7,XMMWORD[((-200))+rax] 669 movaps xmm8,XMMWORD[((-184))+rax] 670 movaps xmm9,XMMWORD[((-168))+rax] 671 movaps xmm10,XMMWORD[((-152))+rax] 672 movaps xmm11,XMMWORD[((-136))+rax] 673 movaps xmm12,XMMWORD[((-120))+rax] 674 movaps xmm13,XMMWORD[((-104))+rax] 675 movaps xmm14,XMMWORD[((-88))+rax] 676 movaps xmm15,XMMWORD[((-72))+rax] 677 mov r15,QWORD[((-48))+rax] 678 679 mov r14,QWORD[((-40))+rax] 680 681 mov r13,QWORD[((-32))+rax] 682 683 mov r12,QWORD[((-24))+rax] 684 685 mov rbp,QWORD[((-16))+rax] 686 687 mov rbx,QWORD[((-8))+rax] 688 689 lea rsp,[rax] 690 691 $L$sqr_1024_epilogue: 692 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 693 mov rsi,QWORD[16+rsp] 694 DB 0F3h,0C3h ;repret 695 696 $L$SEH_end_rsaz_1024_sqr_avx2: 697 global rsaz_1024_mul_avx2 698 699 ALIGN 64 700 rsaz_1024_mul_avx2: 701 mov QWORD[8+rsp],rdi ;WIN64 prologue 702 mov QWORD[16+rsp],rsi 703 mov rax,rsp 704 $L$SEH_begin_rsaz_1024_mul_avx2: 705 mov rdi,rcx 706 mov rsi,rdx 707 mov rdx,r8 708 mov rcx,r9 709 mov r8,QWORD[40+rsp] 710 711 712 713 lea rax,[rsp] 714 715 push rbx 716 717 push rbp 718 719 push r12 720 721 push r13 722 723 push r14 724 725 push r15 726 727 vzeroupper 728 lea rsp,[((-168))+rsp] 729 vmovaps XMMWORD[(-216)+rax],xmm6 730 vmovaps XMMWORD[(-200)+rax],xmm7 731 vmovaps XMMWORD[(-184)+rax],xmm8 732 vmovaps XMMWORD[(-168)+rax],xmm9 733 vmovaps XMMWORD[(-152)+rax],xmm10 734 vmovaps XMMWORD[(-136)+rax],xmm11 735 vmovaps XMMWORD[(-120)+rax],xmm12 736 vmovaps XMMWORD[(-104)+rax],xmm13 737 vmovaps XMMWORD[(-88)+rax],xmm14 738 vmovaps XMMWORD[(-72)+rax],xmm15 739 $L$mul_1024_body: 740 mov rbp,rax 741 742 vzeroall 743 mov r13,rdx 744 sub rsp,64 745 746 747 748 749 750 751 DB 0x67,0x67 752 mov r15,rsi 753 and r15,4095 754 add r15,32*10 755 shr r15,12 756 mov r15,rsi 757 cmovnz rsi,r13 758 cmovnz r13,r15 759 760 mov r15,rcx 761 sub rsi,-128 762 sub rcx,-128 763 sub rdi,-128 764 765 and r15,4095 766 add r15,32*10 767 DB 0x67,0x67 768 shr r15,12 769 jz NEAR $L$mul_1024_no_n_copy 770 771 772 773 774 775 sub rsp,32*10 776 vmovdqu ymm0,YMMWORD[((0-128))+rcx] 777 and rsp,-512 778 vmovdqu ymm1,YMMWORD[((32-128))+rcx] 779 vmovdqu ymm2,YMMWORD[((64-128))+rcx] 780 vmovdqu ymm3,YMMWORD[((96-128))+rcx] 781 vmovdqu ymm4,YMMWORD[((128-128))+rcx] 782 vmovdqu ymm5,YMMWORD[((160-128))+rcx] 783 vmovdqu ymm6,YMMWORD[((192-128))+rcx] 784 vmovdqu ymm7,YMMWORD[((224-128))+rcx] 785 vmovdqu ymm8,YMMWORD[((256-128))+rcx] 786 lea rcx,[((64+128))+rsp] 787 vmovdqu YMMWORD[(0-128)+rcx],ymm0 788 vpxor ymm0,ymm0,ymm0 789 vmovdqu YMMWORD[(32-128)+rcx],ymm1 790 vpxor ymm1,ymm1,ymm1 791 vmovdqu YMMWORD[(64-128)+rcx],ymm2 792 vpxor ymm2,ymm2,ymm2 793 vmovdqu YMMWORD[(96-128)+rcx],ymm3 794 vpxor ymm3,ymm3,ymm3 795 vmovdqu YMMWORD[(128-128)+rcx],ymm4 796 vpxor ymm4,ymm4,ymm4 797 vmovdqu YMMWORD[(160-128)+rcx],ymm5 798 vpxor ymm5,ymm5,ymm5 799 vmovdqu YMMWORD[(192-128)+rcx],ymm6 800 vpxor ymm6,ymm6,ymm6 801 vmovdqu YMMWORD[(224-128)+rcx],ymm7 802 vpxor ymm7,ymm7,ymm7 803 vmovdqu YMMWORD[(256-128)+rcx],ymm8 804 vmovdqa ymm8,ymm0 805 vmovdqu YMMWORD[(288-128)+rcx],ymm9 806 $L$mul_1024_no_n_copy: 807 and rsp,-64 808 809 mov rbx,QWORD[r13] 810 vpbroadcastq ymm10,QWORD[r13] 811 vmovdqu YMMWORD[rsp],ymm0 812 xor r9,r9 813 DB 0x67 814 xor r10,r10 815 xor r11,r11 816 xor r12,r12 817 818 vmovdqu ymm15,YMMWORD[$L$and_mask] 819 mov r14d,9 820 vmovdqu YMMWORD[(288-128)+rdi],ymm9 821 jmp NEAR $L$oop_mul_1024 822 823 ALIGN 32 824 $L$oop_mul_1024: 825 vpsrlq ymm9,ymm3,29 826 mov rax,rbx 827 imul rax,QWORD[((-128))+rsi] 828 add rax,r9 829 mov r10,rbx 830 imul r10,QWORD[((8-128))+rsi] 831 add r10,QWORD[8+rsp] 832 833 mov r9,rax 834 imul eax,r8d 835 and eax,0x1fffffff 836 837 mov r11,rbx 838 imul r11,QWORD[((16-128))+rsi] 839 add r11,QWORD[16+rsp] 840 841 mov r12,rbx 842 imul r12,QWORD[((24-128))+rsi] 843 add r12,QWORD[24+rsp] 844 vpmuludq ymm0,ymm10,YMMWORD[((32-128))+rsi] 845 vmovd xmm11,eax 846 vpaddq ymm1,ymm1,ymm0 847 vpmuludq ymm12,ymm10,YMMWORD[((64-128))+rsi] 848 vpbroadcastq ymm11,xmm11 849 vpaddq ymm2,ymm2,ymm12 850 vpmuludq ymm13,ymm10,YMMWORD[((96-128))+rsi] 851 vpand ymm3,ymm3,ymm15 852 vpaddq ymm3,ymm3,ymm13 853 vpmuludq ymm0,ymm10,YMMWORD[((128-128))+rsi] 854 vpaddq ymm4,ymm4,ymm0 855 vpmuludq ymm12,ymm10,YMMWORD[((160-128))+rsi] 856 vpaddq ymm5,ymm5,ymm12 857 vpmuludq ymm13,ymm10,YMMWORD[((192-128))+rsi] 858 vpaddq ymm6,ymm6,ymm13 859 vpmuludq ymm0,ymm10,YMMWORD[((224-128))+rsi] 860 vpermq ymm9,ymm9,0x93 861 vpaddq ymm7,ymm7,ymm0 862 vpmuludq ymm12,ymm10,YMMWORD[((256-128))+rsi] 863 vpbroadcastq ymm10,QWORD[8+r13] 864 vpaddq ymm8,ymm8,ymm12 865 866 mov rdx,rax 867 imul rax,QWORD[((-128))+rcx] 868 add r9,rax 869 mov rax,rdx 870 imul rax,QWORD[((8-128))+rcx] 871 add r10,rax 872 mov rax,rdx 873 imul rax,QWORD[((16-128))+rcx] 874 add r11,rax 875 shr r9,29 876 imul rdx,QWORD[((24-128))+rcx] 877 add r12,rdx 878 add r10,r9 879 880 vpmuludq ymm13,ymm11,YMMWORD[((32-128))+rcx] 881 vmovq rbx,xmm10 882 vpaddq ymm1,ymm1,ymm13 883 vpmuludq ymm0,ymm11,YMMWORD[((64-128))+rcx] 884 vpaddq ymm2,ymm2,ymm0 885 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rcx] 886 vpaddq ymm3,ymm3,ymm12 887 vpmuludq ymm13,ymm11,YMMWORD[((128-128))+rcx] 888 vpaddq ymm4,ymm4,ymm13 889 vpmuludq ymm0,ymm11,YMMWORD[((160-128))+rcx] 890 vpaddq ymm5,ymm5,ymm0 891 vpmuludq ymm12,ymm11,YMMWORD[((192-128))+rcx] 892 vpaddq ymm6,ymm6,ymm12 893 vpmuludq ymm13,ymm11,YMMWORD[((224-128))+rcx] 894 vpblendd ymm12,ymm9,ymm14,3 895 vpaddq ymm7,ymm7,ymm13 896 vpmuludq ymm0,ymm11,YMMWORD[((256-128))+rcx] 897 vpaddq ymm3,ymm3,ymm12 898 vpaddq ymm8,ymm8,ymm0 899 900 mov rax,rbx 901 imul rax,QWORD[((-128))+rsi] 902 add r10,rax 903 vmovdqu ymm12,YMMWORD[((-8+32-128))+rsi] 904 mov rax,rbx 905 imul rax,QWORD[((8-128))+rsi] 906 add r11,rax 907 vmovdqu ymm13,YMMWORD[((-8+64-128))+rsi] 908 909 mov rax,r10 910 vpblendd ymm9,ymm9,ymm14,0xfc 911 imul eax,r8d 912 vpaddq ymm4,ymm4,ymm9 913 and eax,0x1fffffff 914 915 imul rbx,QWORD[((16-128))+rsi] 916 add r12,rbx 917 vpmuludq ymm12,ymm12,ymm10 918 vmovd xmm11,eax 919 vmovdqu ymm0,YMMWORD[((-8+96-128))+rsi] 920 vpaddq ymm1,ymm1,ymm12 921 vpmuludq ymm13,ymm13,ymm10 922 vpbroadcastq ymm11,xmm11 923 vmovdqu ymm12,YMMWORD[((-8+128-128))+rsi] 924 vpaddq ymm2,ymm2,ymm13 925 vpmuludq ymm0,ymm0,ymm10 926 vmovdqu ymm13,YMMWORD[((-8+160-128))+rsi] 927 vpaddq ymm3,ymm3,ymm0 928 vpmuludq ymm12,ymm12,ymm10 929 vmovdqu ymm0,YMMWORD[((-8+192-128))+rsi] 930 vpaddq ymm4,ymm4,ymm12 931 vpmuludq ymm13,ymm13,ymm10 932 vmovdqu ymm12,YMMWORD[((-8+224-128))+rsi] 933 vpaddq ymm5,ymm5,ymm13 934 vpmuludq ymm0,ymm0,ymm10 935 vmovdqu ymm13,YMMWORD[((-8+256-128))+rsi] 936 vpaddq ymm6,ymm6,ymm0 937 vpmuludq ymm12,ymm12,ymm10 938 vmovdqu ymm9,YMMWORD[((-8+288-128))+rsi] 939 vpaddq ymm7,ymm7,ymm12 940 vpmuludq ymm13,ymm13,ymm10 941 vpaddq ymm8,ymm8,ymm13 942 vpmuludq ymm9,ymm9,ymm10 943 vpbroadcastq ymm10,QWORD[16+r13] 944 945 mov rdx,rax 946 imul rax,QWORD[((-128))+rcx] 947 add r10,rax 948 vmovdqu ymm0,YMMWORD[((-8+32-128))+rcx] 949 mov rax,rdx 950 imul rax,QWORD[((8-128))+rcx] 951 add r11,rax 952 vmovdqu ymm12,YMMWORD[((-8+64-128))+rcx] 953 shr r10,29 954 imul rdx,QWORD[((16-128))+rcx] 955 add r12,rdx 956 add r11,r10 957 958 vpmuludq ymm0,ymm0,ymm11 959 vmovq rbx,xmm10 960 vmovdqu ymm13,YMMWORD[((-8+96-128))+rcx] 961 vpaddq ymm1,ymm1,ymm0 962 vpmuludq ymm12,ymm12,ymm11 963 vmovdqu ymm0,YMMWORD[((-8+128-128))+rcx] 964 vpaddq ymm2,ymm2,ymm12 965 vpmuludq ymm13,ymm13,ymm11 966 vmovdqu ymm12,YMMWORD[((-8+160-128))+rcx] 967 vpaddq ymm3,ymm3,ymm13 968 vpmuludq ymm0,ymm0,ymm11 969 vmovdqu ymm13,YMMWORD[((-8+192-128))+rcx] 970 vpaddq ymm4,ymm4,ymm0 971 vpmuludq ymm12,ymm12,ymm11 972 vmovdqu ymm0,YMMWORD[((-8+224-128))+rcx] 973 vpaddq ymm5,ymm5,ymm12 974 vpmuludq ymm13,ymm13,ymm11 975 vmovdqu ymm12,YMMWORD[((-8+256-128))+rcx] 976 vpaddq ymm6,ymm6,ymm13 977 vpmuludq ymm0,ymm0,ymm11 978 vmovdqu ymm13,YMMWORD[((-8+288-128))+rcx] 979 vpaddq ymm7,ymm7,ymm0 980 vpmuludq ymm12,ymm12,ymm11 981 vpaddq ymm8,ymm8,ymm12 982 vpmuludq ymm13,ymm13,ymm11 983 vpaddq ymm9,ymm9,ymm13 984 985 vmovdqu ymm0,YMMWORD[((-16+32-128))+rsi] 986 mov rax,rbx 987 imul rax,QWORD[((-128))+rsi] 988 add rax,r11 989 990 vmovdqu ymm12,YMMWORD[((-16+64-128))+rsi] 991 mov r11,rax 992 imul eax,r8d 993 and eax,0x1fffffff 994 995 imul rbx,QWORD[((8-128))+rsi] 996 add r12,rbx 997 vpmuludq ymm0,ymm0,ymm10 998 vmovd xmm11,eax 999 vmovdqu ymm13,YMMWORD[((-16+96-128))+rsi] 1000 vpaddq ymm1,ymm1,ymm0 1001 vpmuludq ymm12,ymm12,ymm10 1002 vpbroadcastq ymm11,xmm11 1003 vmovdqu ymm0,YMMWORD[((-16+128-128))+rsi] 1004 vpaddq ymm2,ymm2,ymm12 1005 vpmuludq ymm13,ymm13,ymm10 1006 vmovdqu ymm12,YMMWORD[((-16+160-128))+rsi] 1007 vpaddq ymm3,ymm3,ymm13 1008 vpmuludq ymm0,ymm0,ymm10 1009 vmovdqu ymm13,YMMWORD[((-16+192-128))+rsi] 1010 vpaddq ymm4,ymm4,ymm0 1011 vpmuludq ymm12,ymm12,ymm10 1012 vmovdqu ymm0,YMMWORD[((-16+224-128))+rsi] 1013 vpaddq ymm5,ymm5,ymm12 1014 vpmuludq ymm13,ymm13,ymm10 1015 vmovdqu ymm12,YMMWORD[((-16+256-128))+rsi] 1016 vpaddq ymm6,ymm6,ymm13 1017 vpmuludq ymm0,ymm0,ymm10 1018 vmovdqu ymm13,YMMWORD[((-16+288-128))+rsi] 1019 vpaddq ymm7,ymm7,ymm0 1020 vpmuludq ymm12,ymm12,ymm10 1021 vpaddq ymm8,ymm8,ymm12 1022 vpmuludq ymm13,ymm13,ymm10 1023 vpbroadcastq ymm10,QWORD[24+r13] 1024 vpaddq ymm9,ymm9,ymm13 1025 1026 vmovdqu ymm0,YMMWORD[((-16+32-128))+rcx] 1027 mov rdx,rax 1028 imul rax,QWORD[((-128))+rcx] 1029 add r11,rax 1030 vmovdqu ymm12,YMMWORD[((-16+64-128))+rcx] 1031 imul rdx,QWORD[((8-128))+rcx] 1032 add r12,rdx 1033 shr r11,29 1034 1035 vpmuludq ymm0,ymm0,ymm11 1036 vmovq rbx,xmm10 1037 vmovdqu ymm13,YMMWORD[((-16+96-128))+rcx] 1038 vpaddq ymm1,ymm1,ymm0 1039 vpmuludq ymm12,ymm12,ymm11 1040 vmovdqu ymm0,YMMWORD[((-16+128-128))+rcx] 1041 vpaddq ymm2,ymm2,ymm12 1042 vpmuludq ymm13,ymm13,ymm11 1043 vmovdqu ymm12,YMMWORD[((-16+160-128))+rcx] 1044 vpaddq ymm3,ymm3,ymm13 1045 vpmuludq ymm0,ymm0,ymm11 1046 vmovdqu ymm13,YMMWORD[((-16+192-128))+rcx] 1047 vpaddq ymm4,ymm4,ymm0 1048 vpmuludq ymm12,ymm12,ymm11 1049 vmovdqu ymm0,YMMWORD[((-16+224-128))+rcx] 1050 vpaddq ymm5,ymm5,ymm12 1051 vpmuludq ymm13,ymm13,ymm11 1052 vmovdqu ymm12,YMMWORD[((-16+256-128))+rcx] 1053 vpaddq ymm6,ymm6,ymm13 1054 vpmuludq ymm0,ymm0,ymm11 1055 vmovdqu ymm13,YMMWORD[((-16+288-128))+rcx] 1056 vpaddq ymm7,ymm7,ymm0 1057 vpmuludq ymm12,ymm12,ymm11 1058 vmovdqu ymm0,YMMWORD[((-24+32-128))+rsi] 1059 vpaddq ymm8,ymm8,ymm12 1060 vpmuludq ymm13,ymm13,ymm11 1061 vmovdqu ymm12,YMMWORD[((-24+64-128))+rsi] 1062 vpaddq ymm9,ymm9,ymm13 1063 1064 add r12,r11 1065 imul rbx,QWORD[((-128))+rsi] 1066 add r12,rbx 1067 1068 mov rax,r12 1069 imul eax,r8d 1070 and eax,0x1fffffff 1071 1072 vpmuludq ymm0,ymm0,ymm10 1073 vmovd xmm11,eax 1074 vmovdqu ymm13,YMMWORD[((-24+96-128))+rsi] 1075 vpaddq ymm1,ymm1,ymm0 1076 vpmuludq ymm12,ymm12,ymm10 1077 vpbroadcastq ymm11,xmm11 1078 vmovdqu ymm0,YMMWORD[((-24+128-128))+rsi] 1079 vpaddq ymm2,ymm2,ymm12 1080 vpmuludq ymm13,ymm13,ymm10 1081 vmovdqu ymm12,YMMWORD[((-24+160-128))+rsi] 1082 vpaddq ymm3,ymm3,ymm13 1083 vpmuludq ymm0,ymm0,ymm10 1084 vmovdqu ymm13,YMMWORD[((-24+192-128))+rsi] 1085 vpaddq ymm4,ymm4,ymm0 1086 vpmuludq ymm12,ymm12,ymm10 1087 vmovdqu ymm0,YMMWORD[((-24+224-128))+rsi] 1088 vpaddq ymm5,ymm5,ymm12 1089 vpmuludq ymm13,ymm13,ymm10 1090 vmovdqu ymm12,YMMWORD[((-24+256-128))+rsi] 1091 vpaddq ymm6,ymm6,ymm13 1092 vpmuludq ymm0,ymm0,ymm10 1093 vmovdqu ymm13,YMMWORD[((-24+288-128))+rsi] 1094 vpaddq ymm7,ymm7,ymm0 1095 vpmuludq ymm12,ymm12,ymm10 1096 vpaddq ymm8,ymm8,ymm12 1097 vpmuludq ymm13,ymm13,ymm10 1098 vpbroadcastq ymm10,QWORD[32+r13] 1099 vpaddq ymm9,ymm9,ymm13 1100 add r13,32 1101 1102 vmovdqu ymm0,YMMWORD[((-24+32-128))+rcx] 1103 imul rax,QWORD[((-128))+rcx] 1104 add r12,rax 1105 shr r12,29 1106 1107 vmovdqu ymm12,YMMWORD[((-24+64-128))+rcx] 1108 vpmuludq ymm0,ymm0,ymm11 1109 vmovq rbx,xmm10 1110 vmovdqu ymm13,YMMWORD[((-24+96-128))+rcx] 1111 vpaddq ymm0,ymm1,ymm0 1112 vpmuludq ymm12,ymm12,ymm11 1113 vmovdqu YMMWORD[rsp],ymm0 1114 vpaddq ymm1,ymm2,ymm12 1115 vmovdqu ymm0,YMMWORD[((-24+128-128))+rcx] 1116 vpmuludq ymm13,ymm13,ymm11 1117 vmovdqu ymm12,YMMWORD[((-24+160-128))+rcx] 1118 vpaddq ymm2,ymm3,ymm13 1119 vpmuludq ymm0,ymm0,ymm11 1120 vmovdqu ymm13,YMMWORD[((-24+192-128))+rcx] 1121 vpaddq ymm3,ymm4,ymm0 1122 vpmuludq ymm12,ymm12,ymm11 1123 vmovdqu ymm0,YMMWORD[((-24+224-128))+rcx] 1124 vpaddq ymm4,ymm5,ymm12 1125 vpmuludq ymm13,ymm13,ymm11 1126 vmovdqu ymm12,YMMWORD[((-24+256-128))+rcx] 1127 vpaddq ymm5,ymm6,ymm13 1128 vpmuludq ymm0,ymm0,ymm11 1129 vmovdqu ymm13,YMMWORD[((-24+288-128))+rcx] 1130 mov r9,r12 1131 vpaddq ymm6,ymm7,ymm0 1132 vpmuludq ymm12,ymm12,ymm11 1133 add r9,QWORD[rsp] 1134 vpaddq ymm7,ymm8,ymm12 1135 vpmuludq ymm13,ymm13,ymm11 1136 vmovq xmm12,r12 1137 vpaddq ymm8,ymm9,ymm13 1138 1139 dec r14d 1140 jnz NEAR $L$oop_mul_1024 1141 vpaddq ymm0,ymm12,YMMWORD[rsp] 1142 1143 vpsrlq ymm12,ymm0,29 1144 vpand ymm0,ymm0,ymm15 1145 vpsrlq ymm13,ymm1,29 1146 vpand ymm1,ymm1,ymm15 1147 vpsrlq ymm10,ymm2,29 1148 vpermq ymm12,ymm12,0x93 1149 vpand ymm2,ymm2,ymm15 1150 vpsrlq ymm11,ymm3,29 1151 vpermq ymm13,ymm13,0x93 1152 vpand ymm3,ymm3,ymm15 1153 1154 vpblendd ymm9,ymm12,ymm14,3 1155 vpermq ymm10,ymm10,0x93 1156 vpblendd ymm12,ymm13,ymm12,3 1157 vpermq ymm11,ymm11,0x93 1158 vpaddq ymm0,ymm0,ymm9 1159 vpblendd ymm13,ymm10,ymm13,3 1160 vpaddq ymm1,ymm1,ymm12 1161 vpblendd ymm10,ymm11,ymm10,3 1162 vpaddq ymm2,ymm2,ymm13 1163 vpblendd ymm11,ymm14,ymm11,3 1164 vpaddq ymm3,ymm3,ymm10 1165 vpaddq ymm4,ymm4,ymm11 1166 1167 vpsrlq ymm12,ymm0,29 1168 vpand ymm0,ymm0,ymm15 1169 vpsrlq ymm13,ymm1,29 1170 vpand ymm1,ymm1,ymm15 1171 vpsrlq ymm10,ymm2,29 1172 vpermq ymm12,ymm12,0x93 1173 vpand ymm2,ymm2,ymm15 1174 vpsrlq ymm11,ymm3,29 1175 vpermq ymm13,ymm13,0x93 1176 vpand ymm3,ymm3,ymm15 1177 vpermq ymm10,ymm10,0x93 1178 1179 vpblendd ymm9,ymm12,ymm14,3 1180 vpermq ymm11,ymm11,0x93 1181 vpblendd ymm12,ymm13,ymm12,3 1182 vpaddq ymm0,ymm0,ymm9 1183 vpblendd ymm13,ymm10,ymm13,3 1184 vpaddq ymm1,ymm1,ymm12 1185 vpblendd ymm10,ymm11,ymm10,3 1186 vpaddq ymm2,ymm2,ymm13 1187 vpblendd ymm11,ymm14,ymm11,3 1188 vpaddq ymm3,ymm3,ymm10 1189 vpaddq ymm4,ymm4,ymm11 1190 1191 vmovdqu YMMWORD[(0-128)+rdi],ymm0 1192 vmovdqu YMMWORD[(32-128)+rdi],ymm1 1193 vmovdqu YMMWORD[(64-128)+rdi],ymm2 1194 vmovdqu YMMWORD[(96-128)+rdi],ymm3 1195 vpsrlq ymm12,ymm4,29 1196 vpand ymm4,ymm4,ymm15 1197 vpsrlq ymm13,ymm5,29 1198 vpand ymm5,ymm5,ymm15 1199 vpsrlq ymm10,ymm6,29 1200 vpermq ymm12,ymm12,0x93 1201 vpand ymm6,ymm6,ymm15 1202 vpsrlq ymm11,ymm7,29 1203 vpermq ymm13,ymm13,0x93 1204 vpand ymm7,ymm7,ymm15 1205 vpsrlq ymm0,ymm8,29 1206 vpermq ymm10,ymm10,0x93 1207 vpand ymm8,ymm8,ymm15 1208 vpermq ymm11,ymm11,0x93 1209 1210 vpblendd ymm9,ymm12,ymm14,3 1211 vpermq ymm0,ymm0,0x93 1212 vpblendd ymm12,ymm13,ymm12,3 1213 vpaddq ymm4,ymm4,ymm9 1214 vpblendd ymm13,ymm10,ymm13,3 1215 vpaddq ymm5,ymm5,ymm12 1216 vpblendd ymm10,ymm11,ymm10,3 1217 vpaddq ymm6,ymm6,ymm13 1218 vpblendd ymm11,ymm0,ymm11,3 1219 vpaddq ymm7,ymm7,ymm10 1220 vpaddq ymm8,ymm8,ymm11 1221 1222 vpsrlq ymm12,ymm4,29 1223 vpand ymm4,ymm4,ymm15 1224 vpsrlq ymm13,ymm5,29 1225 vpand ymm5,ymm5,ymm15 1226 vpsrlq ymm10,ymm6,29 1227 vpermq ymm12,ymm12,0x93 1228 vpand ymm6,ymm6,ymm15 1229 vpsrlq ymm11,ymm7,29 1230 vpermq ymm13,ymm13,0x93 1231 vpand ymm7,ymm7,ymm15 1232 vpsrlq ymm0,ymm8,29 1233 vpermq ymm10,ymm10,0x93 1234 vpand ymm8,ymm8,ymm15 1235 vpermq ymm11,ymm11,0x93 1236 1237 vpblendd ymm9,ymm12,ymm14,3 1238 vpermq ymm0,ymm0,0x93 1239 vpblendd ymm12,ymm13,ymm12,3 1240 vpaddq ymm4,ymm4,ymm9 1241 vpblendd ymm13,ymm10,ymm13,3 1242 vpaddq ymm5,ymm5,ymm12 1243 vpblendd ymm10,ymm11,ymm10,3 1244 vpaddq ymm6,ymm6,ymm13 1245 vpblendd ymm11,ymm0,ymm11,3 1246 vpaddq ymm7,ymm7,ymm10 1247 vpaddq ymm8,ymm8,ymm11 1248 1249 vmovdqu YMMWORD[(128-128)+rdi],ymm4 1250 vmovdqu YMMWORD[(160-128)+rdi],ymm5 1251 vmovdqu YMMWORD[(192-128)+rdi],ymm6 1252 vmovdqu YMMWORD[(224-128)+rdi],ymm7 1253 vmovdqu YMMWORD[(256-128)+rdi],ymm8 1254 vzeroupper 1255 1256 mov rax,rbp 1257 1258 $L$mul_1024_in_tail: 1259 movaps xmm6,XMMWORD[((-216))+rax] 1260 movaps xmm7,XMMWORD[((-200))+rax] 1261 movaps xmm8,XMMWORD[((-184))+rax] 1262 movaps xmm9,XMMWORD[((-168))+rax] 1263 movaps xmm10,XMMWORD[((-152))+rax] 1264 movaps xmm11,XMMWORD[((-136))+rax] 1265 movaps xmm12,XMMWORD[((-120))+rax] 1266 movaps xmm13,XMMWORD[((-104))+rax] 1267 movaps xmm14,XMMWORD[((-88))+rax] 1268 movaps xmm15,XMMWORD[((-72))+rax] 1269 mov r15,QWORD[((-48))+rax] 1270 1271 mov r14,QWORD[((-40))+rax] 1272 1273 mov r13,QWORD[((-32))+rax] 1274 1275 mov r12,QWORD[((-24))+rax] 1276 1277 mov rbp,QWORD[((-16))+rax] 1278 1279 mov rbx,QWORD[((-8))+rax] 1280 1281 lea rsp,[rax] 1282 1283 $L$mul_1024_epilogue: 1284 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1285 mov rsi,QWORD[16+rsp] 1286 DB 0F3h,0C3h ;repret 1287 1288 $L$SEH_end_rsaz_1024_mul_avx2: 1289 global rsaz_1024_red2norm_avx2 1290 1291 ALIGN 32 1292 rsaz_1024_red2norm_avx2: 1293 1294 sub rdx,-128 1295 xor rax,rax 1296 mov r8,QWORD[((-128))+rdx] 1297 mov r9,QWORD[((-120))+rdx] 1298 mov r10,QWORD[((-112))+rdx] 1299 shl r8,0 1300 shl r9,29 1301 mov r11,r10 1302 shl r10,58 1303 shr r11,6 1304 add rax,r8 1305 add rax,r9 1306 add rax,r10 1307 adc r11,0 1308 mov QWORD[rcx],rax 1309 mov rax,r11 1310 mov r8,QWORD[((-104))+rdx] 1311 mov r9,QWORD[((-96))+rdx] 1312 shl r8,23 1313 mov r10,r9 1314 shl r9,52 1315 shr r10,12 1316 add rax,r8 1317 add rax,r9 1318 adc r10,0 1319 mov QWORD[8+rcx],rax 1320 mov rax,r10 1321 mov r11,QWORD[((-88))+rdx] 1322 mov r8,QWORD[((-80))+rdx] 1323 shl r11,17 1324 mov r9,r8 1325 shl r8,46 1326 shr r9,18 1327 add rax,r11 1328 add rax,r8 1329 adc r9,0 1330 mov QWORD[16+rcx],rax 1331 mov rax,r9 1332 mov r10,QWORD[((-72))+rdx] 1333 mov r11,QWORD[((-64))+rdx] 1334 shl r10,11 1335 mov r8,r11 1336 shl r11,40 1337 shr r8,24 1338 add rax,r10 1339 add rax,r11 1340 adc r8,0 1341 mov QWORD[24+rcx],rax 1342 mov rax,r8 1343 mov r9,QWORD[((-56))+rdx] 1344 mov r10,QWORD[((-48))+rdx] 1345 mov r11,QWORD[((-40))+rdx] 1346 shl r9,5 1347 shl r10,34 1348 mov r8,r11 1349 shl r11,63 1350 shr r8,1 1351 add rax,r9 1352 add rax,r10 1353 add rax,r11 1354 adc r8,0 1355 mov QWORD[32+rcx],rax 1356 mov rax,r8 1357 mov r9,QWORD[((-32))+rdx] 1358 mov r10,QWORD[((-24))+rdx] 1359 shl r9,28 1360 mov r11,r10 1361 shl r10,57 1362 shr r11,7 1363 add rax,r9 1364 add rax,r10 1365 adc r11,0 1366 mov QWORD[40+rcx],rax 1367 mov rax,r11 1368 mov r8,QWORD[((-16))+rdx] 1369 mov r9,QWORD[((-8))+rdx] 1370 shl r8,22 1371 mov r10,r9 1372 shl r9,51 1373 shr r10,13 1374 add rax,r8 1375 add rax,r9 1376 adc r10,0 1377 mov QWORD[48+rcx],rax 1378 mov rax,r10 1379 mov r11,QWORD[rdx] 1380 mov r8,QWORD[8+rdx] 1381 shl r11,16 1382 mov r9,r8 1383 shl r8,45 1384 shr r9,19 1385 add rax,r11 1386 add rax,r8 1387 adc r9,0 1388 mov QWORD[56+rcx],rax 1389 mov rax,r9 1390 mov r10,QWORD[16+rdx] 1391 mov r11,QWORD[24+rdx] 1392 shl r10,10 1393 mov r8,r11 1394 shl r11,39 1395 shr r8,25 1396 add rax,r10 1397 add rax,r11 1398 adc r8,0 1399 mov QWORD[64+rcx],rax 1400 mov rax,r8 1401 mov r9,QWORD[32+rdx] 1402 mov r10,QWORD[40+rdx] 1403 mov r11,QWORD[48+rdx] 1404 shl r9,4 1405 shl r10,33 1406 mov r8,r11 1407 shl r11,62 1408 shr r8,2 1409 add rax,r9 1410 add rax,r10 1411 add rax,r11 1412 adc r8,0 1413 mov QWORD[72+rcx],rax 1414 mov rax,r8 1415 mov r9,QWORD[56+rdx] 1416 mov r10,QWORD[64+rdx] 1417 shl r9,27 1418 mov r11,r10 1419 shl r10,56 1420 shr r11,8 1421 add rax,r9 1422 add rax,r10 1423 adc r11,0 1424 mov QWORD[80+rcx],rax 1425 mov rax,r11 1426 mov r8,QWORD[72+rdx] 1427 mov r9,QWORD[80+rdx] 1428 shl r8,21 1429 mov r10,r9 1430 shl r9,50 1431 shr r10,14 1432 add rax,r8 1433 add rax,r9 1434 adc r10,0 1435 mov QWORD[88+rcx],rax 1436 mov rax,r10 1437 mov r11,QWORD[88+rdx] 1438 mov r8,QWORD[96+rdx] 1439 shl r11,15 1440 mov r9,r8 1441 shl r8,44 1442 shr r9,20 1443 add rax,r11 1444 add rax,r8 1445 adc r9,0 1446 mov QWORD[96+rcx],rax 1447 mov rax,r9 1448 mov r10,QWORD[104+rdx] 1449 mov r11,QWORD[112+rdx] 1450 shl r10,9 1451 mov r8,r11 1452 shl r11,38 1453 shr r8,26 1454 add rax,r10 1455 add rax,r11 1456 adc r8,0 1457 mov QWORD[104+rcx],rax 1458 mov rax,r8 1459 mov r9,QWORD[120+rdx] 1460 mov r10,QWORD[128+rdx] 1461 mov r11,QWORD[136+rdx] 1462 shl r9,3 1463 shl r10,32 1464 mov r8,r11 1465 shl r11,61 1466 shr r8,3 1467 add rax,r9 1468 add rax,r10 1469 add rax,r11 1470 adc r8,0 1471 mov QWORD[112+rcx],rax 1472 mov rax,r8 1473 mov r9,QWORD[144+rdx] 1474 mov r10,QWORD[152+rdx] 1475 shl r9,26 1476 mov r11,r10 1477 shl r10,55 1478 shr r11,9 1479 add rax,r9 1480 add rax,r10 1481 adc r11,0 1482 mov QWORD[120+rcx],rax 1483 mov rax,r11 10 rsaz_avx2_eligible: 11 xor eax,eax 1484 12 DB 0F3h,0C3h ;repret 1485 13 1486 14 15 global rsaz_1024_sqr_avx2 16 global rsaz_1024_mul_avx2 17 global rsaz_1024_norm2red_avx2 18 global rsaz_1024_red2norm_avx2 19 global rsaz_1024_scatter5_avx2 20 global rsaz_1024_gather5_avx2 1487 21 1488 global rsaz_1024_norm2red_avx2 1489 1490 ALIGN 32 22 rsaz_1024_sqr_avx2: 23 rsaz_1024_mul_avx2: 1491 24 rsaz_1024_norm2red_avx2: 1492 1493 sub rcx,-128 1494 mov r8,QWORD[rdx] 1495 mov eax,0x1fffffff 1496 mov r9,QWORD[8+rdx] 1497 mov r11,r8 1498 shr r11,0 1499 and r11,rax 1500 mov QWORD[((-128))+rcx],r11 1501 mov r10,r8 1502 shr r10,29 1503 and r10,rax 1504 mov QWORD[((-120))+rcx],r10 1505 shrd r8,r9,58 1506 and r8,rax 1507 mov QWORD[((-112))+rcx],r8 1508 mov r10,QWORD[16+rdx] 1509 mov r8,r9 1510 shr r8,23 1511 and r8,rax 1512 mov QWORD[((-104))+rcx],r8 1513 shrd r9,r10,52 1514 and r9,rax 1515 mov QWORD[((-96))+rcx],r9 1516 mov r11,QWORD[24+rdx] 1517 mov r9,r10 1518 shr r9,17 1519 and r9,rax 1520 mov QWORD[((-88))+rcx],r9 1521 shrd r10,r11,46 1522 and r10,rax 1523 mov QWORD[((-80))+rcx],r10 1524 mov r8,QWORD[32+rdx] 1525 mov r10,r11 1526 shr r10,11 1527 and r10,rax 1528 mov QWORD[((-72))+rcx],r10 1529 shrd r11,r8,40 1530 and r11,rax 1531 mov QWORD[((-64))+rcx],r11 1532 mov r9,QWORD[40+rdx] 1533 mov r11,r8 1534 shr r11,5 1535 and r11,rax 1536 mov QWORD[((-56))+rcx],r11 1537 mov r10,r8 1538 shr r10,34 1539 and r10,rax 1540 mov QWORD[((-48))+rcx],r10 1541 shrd r8,r9,63 1542 and r8,rax 1543 mov QWORD[((-40))+rcx],r8 1544 mov r10,QWORD[48+rdx] 1545 mov r8,r9 1546 shr r8,28 1547 and r8,rax 1548 mov QWORD[((-32))+rcx],r8 1549 shrd r9,r10,57 1550 and r9,rax 1551 mov QWORD[((-24))+rcx],r9 1552 mov r11,QWORD[56+rdx] 1553 mov r9,r10 1554 shr r9,22 1555 and r9,rax 1556 mov QWORD[((-16))+rcx],r9 1557 shrd r10,r11,51 1558 and r10,rax 1559 mov QWORD[((-8))+rcx],r10 1560 mov r8,QWORD[64+rdx] 1561 mov r10,r11 1562 shr r10,16 1563 and r10,rax 1564 mov QWORD[rcx],r10 1565 shrd r11,r8,45 1566 and r11,rax 1567 mov QWORD[8+rcx],r11 1568 mov r9,QWORD[72+rdx] 1569 mov r11,r8 1570 shr r11,10 1571 and r11,rax 1572 mov QWORD[16+rcx],r11 1573 shrd r8,r9,39 1574 and r8,rax 1575 mov QWORD[24+rcx],r8 1576 mov r10,QWORD[80+rdx] 1577 mov r8,r9 1578 shr r8,4 1579 and r8,rax 1580 mov QWORD[32+rcx],r8 1581 mov r11,r9 1582 shr r11,33 1583 and r11,rax 1584 mov QWORD[40+rcx],r11 1585 shrd r9,r10,62 1586 and r9,rax 1587 mov QWORD[48+rcx],r9 1588 mov r11,QWORD[88+rdx] 1589 mov r9,r10 1590 shr r9,27 1591 and r9,rax 1592 mov QWORD[56+rcx],r9 1593 shrd r10,r11,56 1594 and r10,rax 1595 mov QWORD[64+rcx],r10 1596 mov r8,QWORD[96+rdx] 1597 mov r10,r11 1598 shr r10,21 1599 and r10,rax 1600 mov QWORD[72+rcx],r10 1601 shrd r11,r8,50 1602 and r11,rax 1603 mov QWORD[80+rcx],r11 1604 mov r9,QWORD[104+rdx] 1605 mov r11,r8 1606 shr r11,15 1607 and r11,rax 1608 mov QWORD[88+rcx],r11 1609 shrd r8,r9,44 1610 and r8,rax 1611 mov QWORD[96+rcx],r8 1612 mov r10,QWORD[112+rdx] 1613 mov r8,r9 1614 shr r8,9 1615 and r8,rax 1616 mov QWORD[104+rcx],r8 1617 shrd r9,r10,38 1618 and r9,rax 1619 mov QWORD[112+rcx],r9 1620 mov r11,QWORD[120+rdx] 1621 mov r9,r10 1622 shr r9,3 1623 and r9,rax 1624 mov QWORD[120+rcx],r9 1625 mov r8,r10 1626 shr r8,32 1627 and r8,rax 1628 mov QWORD[128+rcx],r8 1629 shrd r10,r11,61 1630 and r10,rax 1631 mov QWORD[136+rcx],r10 1632 xor r8,r8 1633 mov r10,r11 1634 shr r10,26 1635 and r10,rax 1636 mov QWORD[144+rcx],r10 1637 shrd r11,r8,55 1638 and r11,rax 1639 mov QWORD[152+rcx],r11 1640 mov QWORD[160+rcx],r8 1641 mov QWORD[168+rcx],r8 1642 mov QWORD[176+rcx],r8 1643 mov QWORD[184+rcx],r8 25 rsaz_1024_red2norm_avx2: 26 rsaz_1024_scatter5_avx2: 27 rsaz_1024_gather5_avx2: 28 DB 0x0f,0x0b 1644 29 DB 0F3h,0C3h ;repret 1645 30 1646 1647 global rsaz_1024_scatter5_avx21648 1649 ALIGN 321650 rsaz_1024_scatter5_avx2:1651 1652 vzeroupper1653 vmovdqu ymm5,YMMWORD[$L$scatter_permd]1654 shl r8d,41655 lea rcx,[r8*1+rcx]1656 mov eax,91657 jmp NEAR $L$oop_scatter_10241658 1659 ALIGN 321660 $L$oop_scatter_1024:1661 vmovdqu ymm0,YMMWORD[rdx]1662 lea rdx,[32+rdx]1663 vpermd ymm0,ymm5,ymm01664 vmovdqu XMMWORD[rcx],xmm01665 lea rcx,[512+rcx]1666 dec eax1667 jnz NEAR $L$oop_scatter_10241668 1669 vzeroupper1670 DB 0F3h,0C3h ;repret1671 1672 1673 1674 global rsaz_1024_gather5_avx21675 1676 ALIGN 321677 rsaz_1024_gather5_avx2:1678 1679 vzeroupper1680 mov r11,rsp1681 1682 lea rax,[((-136))+rsp]1683 $L$SEH_begin_rsaz_1024_gather5:1684 1685 DB 0x48,0x8d,0x60,0xe01686 DB 0xc5,0xf8,0x29,0x70,0xe01687 DB 0xc5,0xf8,0x29,0x78,0xf01688 DB 0xc5,0x78,0x29,0x40,0x001689 DB 0xc5,0x78,0x29,0x48,0x101690 DB 0xc5,0x78,0x29,0x50,0x201691 DB 0xc5,0x78,0x29,0x58,0x301692 DB 0xc5,0x78,0x29,0x60,0x401693 DB 0xc5,0x78,0x29,0x68,0x501694 DB 0xc5,0x78,0x29,0x70,0x601695 DB 0xc5,0x78,0x29,0x78,0x701696 lea rsp,[((-256))+rsp]1697 and rsp,-321698 lea r10,[$L$inc]1699 lea rax,[((-128))+rsp]1700 1701 vmovd xmm4,r8d1702 vmovdqa ymm0,YMMWORD[r10]1703 vmovdqa ymm1,YMMWORD[32+r10]1704 vmovdqa ymm5,YMMWORD[64+r10]1705 vpbroadcastd ymm4,xmm41706 1707 vpaddd ymm2,ymm0,ymm51708 vpcmpeqd ymm0,ymm0,ymm41709 vpaddd ymm3,ymm1,ymm51710 vpcmpeqd ymm1,ymm1,ymm41711 vmovdqa YMMWORD[(0+128)+rax],ymm01712 vpaddd ymm0,ymm2,ymm51713 vpcmpeqd ymm2,ymm2,ymm41714 vmovdqa YMMWORD[(32+128)+rax],ymm11715 vpaddd ymm1,ymm3,ymm51716 vpcmpeqd ymm3,ymm3,ymm41717 vmovdqa YMMWORD[(64+128)+rax],ymm21718 vpaddd ymm2,ymm0,ymm51719 vpcmpeqd ymm0,ymm0,ymm41720 vmovdqa YMMWORD[(96+128)+rax],ymm31721 vpaddd ymm3,ymm1,ymm51722 vpcmpeqd ymm1,ymm1,ymm41723 vmovdqa YMMWORD[(128+128)+rax],ymm01724 vpaddd ymm8,ymm2,ymm51725 vpcmpeqd ymm2,ymm2,ymm41726 vmovdqa YMMWORD[(160+128)+rax],ymm11727 vpaddd ymm9,ymm3,ymm51728 vpcmpeqd ymm3,ymm3,ymm41729 vmovdqa YMMWORD[(192+128)+rax],ymm21730 vpaddd ymm10,ymm8,ymm51731 vpcmpeqd ymm8,ymm8,ymm41732 vmovdqa YMMWORD[(224+128)+rax],ymm31733 vpaddd ymm11,ymm9,ymm51734 vpcmpeqd ymm9,ymm9,ymm41735 vpaddd ymm12,ymm10,ymm51736 vpcmpeqd ymm10,ymm10,ymm41737 vpaddd ymm13,ymm11,ymm51738 vpcmpeqd ymm11,ymm11,ymm41739 vpaddd ymm14,ymm12,ymm51740 vpcmpeqd ymm12,ymm12,ymm41741 vpaddd ymm15,ymm13,ymm51742 vpcmpeqd ymm13,ymm13,ymm41743 vpcmpeqd ymm14,ymm14,ymm41744 vpcmpeqd ymm15,ymm15,ymm41745 1746 vmovdqa ymm7,YMMWORD[((-32))+r10]1747 lea rdx,[128+rdx]1748 mov r8d,91749 1750 $L$oop_gather_1024:1751 vmovdqa ymm0,YMMWORD[((0-128))+rdx]1752 vmovdqa ymm1,YMMWORD[((32-128))+rdx]1753 vmovdqa ymm2,YMMWORD[((64-128))+rdx]1754 vmovdqa ymm3,YMMWORD[((96-128))+rdx]1755 vpand ymm0,ymm0,YMMWORD[((0+128))+rax]1756 vpand ymm1,ymm1,YMMWORD[((32+128))+rax]1757 vpand ymm2,ymm2,YMMWORD[((64+128))+rax]1758 vpor ymm4,ymm1,ymm01759 vpand ymm3,ymm3,YMMWORD[((96+128))+rax]1760 vmovdqa ymm0,YMMWORD[((128-128))+rdx]1761 vmovdqa ymm1,YMMWORD[((160-128))+rdx]1762 vpor ymm5,ymm3,ymm21763 vmovdqa ymm2,YMMWORD[((192-128))+rdx]1764 vmovdqa ymm3,YMMWORD[((224-128))+rdx]1765 vpand ymm0,ymm0,YMMWORD[((128+128))+rax]1766 vpand ymm1,ymm1,YMMWORD[((160+128))+rax]1767 vpand ymm2,ymm2,YMMWORD[((192+128))+rax]1768 vpor ymm4,ymm4,ymm01769 vpand ymm3,ymm3,YMMWORD[((224+128))+rax]1770 vpand ymm0,ymm8,YMMWORD[((256-128))+rdx]1771 vpor ymm5,ymm5,ymm11772 vpand ymm1,ymm9,YMMWORD[((288-128))+rdx]1773 vpor ymm4,ymm4,ymm21774 vpand ymm2,ymm10,YMMWORD[((320-128))+rdx]1775 vpor ymm5,ymm5,ymm31776 vpand ymm3,ymm11,YMMWORD[((352-128))+rdx]1777 vpor ymm4,ymm4,ymm01778 vpand ymm0,ymm12,YMMWORD[((384-128))+rdx]1779 vpor ymm5,ymm5,ymm11780 vpand ymm1,ymm13,YMMWORD[((416-128))+rdx]1781 vpor ymm4,ymm4,ymm21782 vpand ymm2,ymm14,YMMWORD[((448-128))+rdx]1783 vpor ymm5,ymm5,ymm31784 vpand ymm3,ymm15,YMMWORD[((480-128))+rdx]1785 lea rdx,[512+rdx]1786 vpor ymm4,ymm4,ymm01787 vpor ymm5,ymm5,ymm11788 vpor ymm4,ymm4,ymm21789 vpor ymm5,ymm5,ymm31790 1791 vpor ymm4,ymm4,ymm51792 vextracti128 xmm5,ymm4,11793 vpor xmm5,xmm5,xmm41794 vpermd ymm5,ymm7,ymm51795 vmovdqu YMMWORD[rcx],ymm51796 lea rcx,[32+rcx]1797 dec r8d1798 jnz NEAR $L$oop_gather_10241799 1800 vpxor ymm0,ymm0,ymm01801 vmovdqu YMMWORD[rcx],ymm01802 vzeroupper1803 movaps xmm6,XMMWORD[((-168))+r11]1804 movaps xmm7,XMMWORD[((-152))+r11]1805 movaps xmm8,XMMWORD[((-136))+r11]1806 movaps xmm9,XMMWORD[((-120))+r11]1807 movaps xmm10,XMMWORD[((-104))+r11]1808 movaps xmm11,XMMWORD[((-88))+r11]1809 movaps xmm12,XMMWORD[((-72))+r11]1810 movaps xmm13,XMMWORD[((-56))+r11]1811 movaps xmm14,XMMWORD[((-40))+r11]1812 movaps xmm15,XMMWORD[((-24))+r11]1813 lea rsp,[r11]1814 1815 DB 0F3h,0C3h ;repret1816 1817 $L$SEH_end_rsaz_1024_gather5:1818 1819 EXTERN OPENSSL_ia32cap_P1820 global rsaz_avx2_eligible1821 1822 ALIGN 321823 rsaz_avx2_eligible:1824 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]1825 mov ecx,5245441826 mov edx,01827 and ecx,eax1828 cmp ecx,5245441829 cmove eax,edx1830 and eax,321831 shr eax,51832 DB 0F3h,0C3h ;repret1833 1834 1835 ALIGN 641836 $L$and_mask:1837 DQ 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff1838 $L$scatter_permd:1839 DD 0,2,4,6,7,7,7,71840 $L$gather_permd:1841 DD 0,7,1,7,2,7,3,71842 $L$inc:1843 DD 0,0,0,0,1,1,1,11844 DD 2,2,2,2,3,3,3,31845 DD 4,4,4,4,4,4,4,41846 ALIGN 641847 EXTERN __imp_RtlVirtualUnwind1848 1849 ALIGN 161850 rsaz_se_handler:1851 push rsi1852 push rdi1853 push rbx1854 push rbp1855 push r121856 push r131857 push r141858 push r151859 pushfq1860 sub rsp,641861 1862 mov rax,QWORD[120+r8]1863 mov rbx,QWORD[248+r8]1864 1865 mov rsi,QWORD[8+r9]1866 mov r11,QWORD[56+r9]1867 1868 mov r10d,DWORD[r11]1869 lea r10,[r10*1+rsi]1870 cmp rbx,r101871 jb NEAR $L$common_seh_tail1872 1873 mov r10d,DWORD[4+r11]1874 lea r10,[r10*1+rsi]1875 cmp rbx,r101876 jae NEAR $L$common_seh_tail1877 1878 mov rbp,QWORD[160+r8]1879 1880 mov r10d,DWORD[8+r11]1881 lea r10,[r10*1+rsi]1882 cmp rbx,r101883 cmovc rax,rbp1884 1885 mov r15,QWORD[((-48))+rax]1886 mov r14,QWORD[((-40))+rax]1887 mov r13,QWORD[((-32))+rax]1888 mov r12,QWORD[((-24))+rax]1889 mov rbp,QWORD[((-16))+rax]1890 mov rbx,QWORD[((-8))+rax]1891 mov QWORD[240+r8],r151892 mov QWORD[232+r8],r141893 mov QWORD[224+r8],r131894 mov QWORD[216+r8],r121895 mov QWORD[160+r8],rbp1896 mov QWORD[144+r8],rbx1897 1898 lea rsi,[((-216))+rax]1899 lea rdi,[512+r8]1900 mov ecx,201901 DD 0xa548f3fc1902 1903 $L$common_seh_tail:1904 mov rdi,QWORD[8+rax]1905 mov rsi,QWORD[16+rax]1906 mov QWORD[152+r8],rax1907 mov QWORD[168+r8],rsi1908 mov QWORD[176+r8],rdi1909 1910 mov rdi,QWORD[40+r9]1911 mov rsi,r81912 mov ecx,1541913 DD 0xa548f3fc1914 1915 mov rsi,r91916 xor rcx,rcx1917 mov rdx,QWORD[8+rsi]1918 mov r8,QWORD[rsi]1919 mov r9,QWORD[16+rsi]1920 mov r10,QWORD[40+rsi]1921 lea r11,[56+rsi]1922 lea r12,[24+rsi]1923 mov QWORD[32+rsp],r101924 mov QWORD[40+rsp],r111925 mov QWORD[48+rsp],r121926 mov QWORD[56+rsp],rcx1927 call QWORD[__imp_RtlVirtualUnwind]1928 1929 mov eax,11930 add rsp,641931 popfq1932 pop r151933 pop r141934 pop r131935 pop r121936 pop rbp1937 pop rbx1938 pop rdi1939 pop rsi1940 DB 0F3h,0C3h ;repret1941 1942 1943 section .pdata rdata align=41944 ALIGN 41945 DD $L$SEH_begin_rsaz_1024_sqr_avx2 wrt ..imagebase1946 DD $L$SEH_end_rsaz_1024_sqr_avx2 wrt ..imagebase1947 DD $L$SEH_info_rsaz_1024_sqr_avx2 wrt ..imagebase1948 1949 DD $L$SEH_begin_rsaz_1024_mul_avx2 wrt ..imagebase1950 DD $L$SEH_end_rsaz_1024_mul_avx2 wrt ..imagebase1951 DD $L$SEH_info_rsaz_1024_mul_avx2 wrt ..imagebase1952 1953 DD $L$SEH_begin_rsaz_1024_gather5 wrt ..imagebase1954 DD $L$SEH_end_rsaz_1024_gather5 wrt ..imagebase1955 DD $L$SEH_info_rsaz_1024_gather5 wrt ..imagebase1956 section .xdata rdata align=81957 ALIGN 81958 $L$SEH_info_rsaz_1024_sqr_avx2:1959 DB 9,0,0,01960 DD rsaz_se_handler wrt ..imagebase1961 DD $L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase,$L$sqr_1024_in_tail wrt ..imagebase1962 DD 01963 $L$SEH_info_rsaz_1024_mul_avx2:1964 DB 9,0,0,01965 DD rsaz_se_handler wrt ..imagebase1966 DD $L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase,$L$mul_1024_in_tail wrt ..imagebase1967 DD 01968 $L$SEH_info_rsaz_1024_gather5:1969 DB 0x01,0x36,0x17,0x0b1970 DB 0x36,0xf8,0x09,0x001971 DB 0x31,0xe8,0x08,0x001972 DB 0x2c,0xd8,0x07,0x001973 DB 0x27,0xc8,0x06,0x001974 DB 0x22,0xb8,0x05,0x001975 DB 0x1d,0xa8,0x04,0x001976 DB 0x18,0x98,0x03,0x001977 DB 0x13,0x88,0x02,0x001978 DB 0x0e,0x78,0x01,0x001979 DB 0x09,0x68,0x00,0x001980 DB 0x04,0x01,0x15,0x001981 DB 0x00,0xb3,0x00,0x00 -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/rsaz-avx512.S
r95219 r95221 3 3 %define YMMWORD 4 4 %define ZMMWORD 5 EXTERN OPENSSL_ia32cap_P6 global ossl_rsaz_avx512ifma_eligible7 8 ALIGN 329 ossl_rsaz_avx512ifma_eligible:10 mov ecx,DWORD[((OPENSSL_ia32cap_P+8))]11 xor eax,eax12 and ecx,214977740813 cmp ecx,214977740814 cmove eax,ecx15 DB 0F3h,0C3h ;repret16 17 5 section .text code align=64 18 6 19 7 20 global ossl_rsaz_a mm52x20_x1_2568 global ossl_rsaz_avx512ifma_eligible 21 9 22 ALIGN 32 23 ossl_rsaz_amm52x20_x1_256: 24 mov QWORD[8+rsp],rdi ;WIN64 prologue 25 mov QWORD[16+rsp],rsi 26 mov rax,rsp 27 $L$SEH_begin_ossl_rsaz_amm52x20_x1_256: 28 mov rdi,rcx 29 mov rsi,rdx 30 mov rdx,r8 31 mov rcx,r9 32 mov r8,QWORD[40+rsp] 33 34 35 36 DB 243,15,30,250 37 push rbx 38 39 push rbp 40 41 push r12 42 43 push r13 44 45 push r14 46 47 push r15 48 49 $L$rsaz_amm52x20_x1_256_body: 50 51 52 vpxord ymm0,ymm0,ymm0 53 vmovdqa64 ymm1,ymm0 54 vmovdqa64 ymm16,ymm0 55 vmovdqa64 ymm17,ymm0 56 vmovdqa64 ymm18,ymm0 57 vmovdqa64 ymm19,ymm0 58 59 xor r9d,r9d 60 61 mov r11,rdx 62 mov rax,0xfffffffffffff 63 64 65 mov ebx,5 66 67 ALIGN 32 68 $L$loop5: 69 mov r13,QWORD[r11] 70 71 vpbroadcastq ymm3,r13 72 mov rdx,QWORD[rsi] 73 mulx r12,r13,r13 74 add r9,r13 75 mov r10,r12 76 adc r10,0 77 78 mov r13,r8 79 imul r13,r9 80 and r13,rax 81 82 vpbroadcastq ymm4,r13 83 mov rdx,QWORD[rcx] 84 mulx r12,r13,r13 85 add r9,r13 86 adc r10,r12 87 88 shr r9,52 89 sal r10,12 90 or r9,r10 91 92 vpmadd52luq ymm1,ymm3,YMMWORD[rsi] 93 vpmadd52luq ymm16,ymm3,YMMWORD[32+rsi] 94 vpmadd52luq ymm17,ymm3,YMMWORD[64+rsi] 95 vpmadd52luq ymm18,ymm3,YMMWORD[96+rsi] 96 vpmadd52luq ymm19,ymm3,YMMWORD[128+rsi] 97 98 vpmadd52luq ymm1,ymm4,YMMWORD[rcx] 99 vpmadd52luq ymm16,ymm4,YMMWORD[32+rcx] 100 vpmadd52luq ymm17,ymm4,YMMWORD[64+rcx] 101 vpmadd52luq ymm18,ymm4,YMMWORD[96+rcx] 102 vpmadd52luq ymm19,ymm4,YMMWORD[128+rcx] 103 104 105 valignq ymm1,ymm16,ymm1,1 106 valignq ymm16,ymm17,ymm16,1 107 valignq ymm17,ymm18,ymm17,1 108 valignq ymm18,ymm19,ymm18,1 109 valignq ymm19,ymm0,ymm19,1 110 111 vmovq r13,xmm1 112 add r9,r13 113 114 vpmadd52huq ymm1,ymm3,YMMWORD[rsi] 115 vpmadd52huq ymm16,ymm3,YMMWORD[32+rsi] 116 vpmadd52huq ymm17,ymm3,YMMWORD[64+rsi] 117 vpmadd52huq ymm18,ymm3,YMMWORD[96+rsi] 118 vpmadd52huq ymm19,ymm3,YMMWORD[128+rsi] 119 120 vpmadd52huq ymm1,ymm4,YMMWORD[rcx] 121 vpmadd52huq ymm16,ymm4,YMMWORD[32+rcx] 122 vpmadd52huq ymm17,ymm4,YMMWORD[64+rcx] 123 vpmadd52huq ymm18,ymm4,YMMWORD[96+rcx] 124 vpmadd52huq ymm19,ymm4,YMMWORD[128+rcx] 125 mov r13,QWORD[8+r11] 126 127 vpbroadcastq ymm3,r13 128 mov rdx,QWORD[rsi] 129 mulx r12,r13,r13 130 add r9,r13 131 mov r10,r12 132 adc r10,0 133 134 mov r13,r8 135 imul r13,r9 136 and r13,rax 137 138 vpbroadcastq ymm4,r13 139 mov rdx,QWORD[rcx] 140 mulx r12,r13,r13 141 add r9,r13 142 adc r10,r12 143 144 shr r9,52 145 sal r10,12 146 or r9,r10 147 148 vpmadd52luq ymm1,ymm3,YMMWORD[rsi] 149 vpmadd52luq ymm16,ymm3,YMMWORD[32+rsi] 150 vpmadd52luq ymm17,ymm3,YMMWORD[64+rsi] 151 vpmadd52luq ymm18,ymm3,YMMWORD[96+rsi] 152 vpmadd52luq ymm19,ymm3,YMMWORD[128+rsi] 153 154 vpmadd52luq ymm1,ymm4,YMMWORD[rcx] 155 vpmadd52luq ymm16,ymm4,YMMWORD[32+rcx] 156 vpmadd52luq ymm17,ymm4,YMMWORD[64+rcx] 157 vpmadd52luq ymm18,ymm4,YMMWORD[96+rcx] 158 vpmadd52luq ymm19,ymm4,YMMWORD[128+rcx] 159 160 161 valignq ymm1,ymm16,ymm1,1 162 valignq ymm16,ymm17,ymm16,1 163 valignq ymm17,ymm18,ymm17,1 164 valignq ymm18,ymm19,ymm18,1 165 valignq ymm19,ymm0,ymm19,1 166 167 vmovq r13,xmm1 168 add r9,r13 169 170 vpmadd52huq ymm1,ymm3,YMMWORD[rsi] 171 vpmadd52huq ymm16,ymm3,YMMWORD[32+rsi] 172 vpmadd52huq ymm17,ymm3,YMMWORD[64+rsi] 173 vpmadd52huq ymm18,ymm3,YMMWORD[96+rsi] 174 vpmadd52huq ymm19,ymm3,YMMWORD[128+rsi] 175 176 vpmadd52huq ymm1,ymm4,YMMWORD[rcx] 177 vpmadd52huq ymm16,ymm4,YMMWORD[32+rcx] 178 vpmadd52huq ymm17,ymm4,YMMWORD[64+rcx] 179 vpmadd52huq ymm18,ymm4,YMMWORD[96+rcx] 180 vpmadd52huq ymm19,ymm4,YMMWORD[128+rcx] 181 mov r13,QWORD[16+r11] 182 183 vpbroadcastq ymm3,r13 184 mov rdx,QWORD[rsi] 185 mulx r12,r13,r13 186 add r9,r13 187 mov r10,r12 188 adc r10,0 189 190 mov r13,r8 191 imul r13,r9 192 and r13,rax 193 194 vpbroadcastq ymm4,r13 195 mov rdx,QWORD[rcx] 196 mulx r12,r13,r13 197 add r9,r13 198 adc r10,r12 199 200 shr r9,52 201 sal r10,12 202 or r9,r10 203 204 vpmadd52luq ymm1,ymm3,YMMWORD[rsi] 205 vpmadd52luq ymm16,ymm3,YMMWORD[32+rsi] 206 vpmadd52luq ymm17,ymm3,YMMWORD[64+rsi] 207 vpmadd52luq ymm18,ymm3,YMMWORD[96+rsi] 208 vpmadd52luq ymm19,ymm3,YMMWORD[128+rsi] 209 210 vpmadd52luq ymm1,ymm4,YMMWORD[rcx] 211 vpmadd52luq ymm16,ymm4,YMMWORD[32+rcx] 212 vpmadd52luq ymm17,ymm4,YMMWORD[64+rcx] 213 vpmadd52luq ymm18,ymm4,YMMWORD[96+rcx] 214 vpmadd52luq ymm19,ymm4,YMMWORD[128+rcx] 215 216 217 valignq ymm1,ymm16,ymm1,1 218 valignq ymm16,ymm17,ymm16,1 219 valignq ymm17,ymm18,ymm17,1 220 valignq ymm18,ymm19,ymm18,1 221 valignq ymm19,ymm0,ymm19,1 222 223 vmovq r13,xmm1 224 add r9,r13 225 226 vpmadd52huq ymm1,ymm3,YMMWORD[rsi] 227 vpmadd52huq ymm16,ymm3,YMMWORD[32+rsi] 228 vpmadd52huq ymm17,ymm3,YMMWORD[64+rsi] 229 vpmadd52huq ymm18,ymm3,YMMWORD[96+rsi] 230 vpmadd52huq ymm19,ymm3,YMMWORD[128+rsi] 231 232 vpmadd52huq ymm1,ymm4,YMMWORD[rcx] 233 vpmadd52huq ymm16,ymm4,YMMWORD[32+rcx] 234 vpmadd52huq ymm17,ymm4,YMMWORD[64+rcx] 235 vpmadd52huq ymm18,ymm4,YMMWORD[96+rcx] 236 vpmadd52huq ymm19,ymm4,YMMWORD[128+rcx] 237 mov r13,QWORD[24+r11] 238 239 vpbroadcastq ymm3,r13 240 mov rdx,QWORD[rsi] 241 mulx r12,r13,r13 242 add r9,r13 243 mov r10,r12 244 adc r10,0 245 246 mov r13,r8 247 imul r13,r9 248 and r13,rax 249 250 vpbroadcastq ymm4,r13 251 mov rdx,QWORD[rcx] 252 mulx r12,r13,r13 253 add r9,r13 254 adc r10,r12 255 256 shr r9,52 257 sal r10,12 258 or r9,r10 259 260 vpmadd52luq ymm1,ymm3,YMMWORD[rsi] 261 vpmadd52luq ymm16,ymm3,YMMWORD[32+rsi] 262 vpmadd52luq ymm17,ymm3,YMMWORD[64+rsi] 263 vpmadd52luq ymm18,ymm3,YMMWORD[96+rsi] 264 vpmadd52luq ymm19,ymm3,YMMWORD[128+rsi] 265 266 vpmadd52luq ymm1,ymm4,YMMWORD[rcx] 267 vpmadd52luq ymm16,ymm4,YMMWORD[32+rcx] 268 vpmadd52luq ymm17,ymm4,YMMWORD[64+rcx] 269 vpmadd52luq ymm18,ymm4,YMMWORD[96+rcx] 270 vpmadd52luq ymm19,ymm4,YMMWORD[128+rcx] 271 272 273 valignq ymm1,ymm16,ymm1,1 274 valignq ymm16,ymm17,ymm16,1 275 valignq ymm17,ymm18,ymm17,1 276 valignq ymm18,ymm19,ymm18,1 277 valignq ymm19,ymm0,ymm19,1 278 279 vmovq r13,xmm1 280 add r9,r13 281 282 vpmadd52huq ymm1,ymm3,YMMWORD[rsi] 283 vpmadd52huq ymm16,ymm3,YMMWORD[32+rsi] 284 vpmadd52huq ymm17,ymm3,YMMWORD[64+rsi] 285 vpmadd52huq ymm18,ymm3,YMMWORD[96+rsi] 286 vpmadd52huq ymm19,ymm3,YMMWORD[128+rsi] 287 288 vpmadd52huq ymm1,ymm4,YMMWORD[rcx] 289 vpmadd52huq ymm16,ymm4,YMMWORD[32+rcx] 290 vpmadd52huq ymm17,ymm4,YMMWORD[64+rcx] 291 vpmadd52huq ymm18,ymm4,YMMWORD[96+rcx] 292 vpmadd52huq ymm19,ymm4,YMMWORD[128+rcx] 293 lea r11,[32+r11] 294 dec ebx 295 jne NEAR $L$loop5 296 297 vmovdqa64 ymm4,YMMWORD[$L$mask52x4] 298 299 vpbroadcastq ymm3,r9 300 vpblendd ymm1,ymm1,ymm3,3 301 302 303 304 vpsrlq ymm24,ymm1,52 305 vpsrlq ymm25,ymm16,52 306 vpsrlq ymm26,ymm17,52 307 vpsrlq ymm27,ymm18,52 308 vpsrlq ymm28,ymm19,52 309 310 311 valignq ymm28,ymm28,ymm27,3 312 valignq ymm27,ymm27,ymm26,3 313 valignq ymm26,ymm26,ymm25,3 314 valignq ymm25,ymm25,ymm24,3 315 valignq ymm24,ymm24,ymm0,3 316 317 318 vpandq ymm1,ymm1,ymm4 319 vpandq ymm16,ymm16,ymm4 320 vpandq ymm17,ymm17,ymm4 321 vpandq ymm18,ymm18,ymm4 322 vpandq ymm19,ymm19,ymm4 323 324 325 vpaddq ymm1,ymm1,ymm24 326 vpaddq ymm16,ymm16,ymm25 327 vpaddq ymm17,ymm17,ymm26 328 vpaddq ymm18,ymm18,ymm27 329 vpaddq ymm19,ymm19,ymm28 330 331 332 333 vpcmpuq k1,ymm4,ymm1,1 334 vpcmpuq k2,ymm4,ymm16,1 335 vpcmpuq k3,ymm4,ymm17,1 336 vpcmpuq k4,ymm4,ymm18,1 337 vpcmpuq k5,ymm4,ymm19,1 338 kmovb r14d,k1 339 kmovb r13d,k2 340 kmovb r12d,k3 341 kmovb r11d,k4 342 kmovb r10d,k5 343 344 345 vpcmpuq k1,ymm4,ymm1,0 346 vpcmpuq k2,ymm4,ymm16,0 347 vpcmpuq k3,ymm4,ymm17,0 348 vpcmpuq k4,ymm4,ymm18,0 349 vpcmpuq k5,ymm4,ymm19,0 350 kmovb r9d,k1 351 kmovb r8d,k2 352 kmovb ebx,k3 353 kmovb ecx,k4 354 kmovb edx,k5 355 356 357 358 shl r13b,4 359 or r14b,r13b 360 shl r11b,4 361 or r12b,r11b 362 363 add r14b,r14b 364 adc r12b,r12b 365 adc r10b,r10b 366 367 shl r8b,4 368 or r9b,r8b 369 shl cl,4 370 or bl,cl 371 372 add r14b,r9b 373 adc r12b,bl 374 adc r10b,dl 375 376 xor r14b,r9b 377 xor r12b,bl 378 xor r10b,dl 379 380 kmovb k1,r14d 381 shr r14b,4 382 kmovb k2,r14d 383 kmovb k3,r12d 384 shr r12b,4 385 kmovb k4,r12d 386 kmovb k5,r10d 387 388 389 vpsubq ymm1{k1},ymm1,ymm4 390 vpsubq ymm16{k2},ymm16,ymm4 391 vpsubq ymm17{k3},ymm17,ymm4 392 vpsubq ymm18{k4},ymm18,ymm4 393 vpsubq ymm19{k5},ymm19,ymm4 394 395 vpandq ymm1,ymm1,ymm4 396 vpandq ymm16,ymm16,ymm4 397 vpandq ymm17,ymm17,ymm4 398 vpandq ymm18,ymm18,ymm4 399 vpandq ymm19,ymm19,ymm4 400 401 vmovdqu64 YMMWORD[rdi],ymm1 402 vmovdqu64 YMMWORD[32+rdi],ymm16 403 vmovdqu64 YMMWORD[64+rdi],ymm17 404 vmovdqu64 YMMWORD[96+rdi],ymm18 405 vmovdqu64 YMMWORD[128+rdi],ymm19 406 407 vzeroupper 408 mov r15,QWORD[rsp] 409 410 mov r14,QWORD[8+rsp] 411 412 mov r13,QWORD[16+rsp] 413 414 mov r12,QWORD[24+rsp] 415 416 mov rbp,QWORD[32+rsp] 417 418 mov rbx,QWORD[40+rsp] 419 420 lea rsp,[48+rsp] 421 422 $L$rsaz_amm52x20_x1_256_epilogue: 423 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 424 mov rsi,QWORD[16+rsp] 425 DB 0F3h,0C3h ;repret 426 427 $L$SEH_end_ossl_rsaz_amm52x20_x1_256: 428 section .data data align=8 429 430 ALIGN 32 431 $L$mask52x4: 432 DQ 0xfffffffffffff 433 DQ 0xfffffffffffff 434 DQ 0xfffffffffffff 435 DQ 0xfffffffffffff 436 section .text code align=64 437 438 439 global ossl_rsaz_amm52x20_x2_256 440 441 ALIGN 32 442 ossl_rsaz_amm52x20_x2_256: 443 mov QWORD[8+rsp],rdi ;WIN64 prologue 444 mov QWORD[16+rsp],rsi 445 mov rax,rsp 446 $L$SEH_begin_ossl_rsaz_amm52x20_x2_256: 447 mov rdi,rcx 448 mov rsi,rdx 449 mov rdx,r8 450 mov rcx,r9 451 mov r8,QWORD[40+rsp] 452 453 454 455 DB 243,15,30,250 456 push rbx 457 458 push rbp 459 460 push r12 461 462 push r13 463 464 push r14 465 466 push r15 467 468 $L$rsaz_amm52x20_x2_256_body: 469 470 471 vpxord ymm0,ymm0,ymm0 472 vmovdqa64 ymm1,ymm0 473 vmovdqa64 ymm16,ymm0 474 vmovdqa64 ymm17,ymm0 475 vmovdqa64 ymm18,ymm0 476 vmovdqa64 ymm19,ymm0 477 vmovdqa64 ymm2,ymm0 478 vmovdqa64 ymm20,ymm0 479 vmovdqa64 ymm21,ymm0 480 vmovdqa64 ymm22,ymm0 481 vmovdqa64 ymm23,ymm0 482 483 xor r9d,r9d 484 xor r15d,r15d 485 486 mov r11,rdx 487 mov rax,0xfffffffffffff 488 489 mov ebx,20 490 491 ALIGN 32 492 $L$loop20: 493 mov r13,QWORD[r11] 494 495 vpbroadcastq ymm3,r13 496 mov rdx,QWORD[rsi] 497 mulx r12,r13,r13 498 add r9,r13 499 mov r10,r12 500 adc r10,0 501 502 mov r13,QWORD[r8] 503 imul r13,r9 504 and r13,rax 505 506 vpbroadcastq ymm4,r13 507 mov rdx,QWORD[rcx] 508 mulx r12,r13,r13 509 add r9,r13 510 adc r10,r12 511 512 shr r9,52 513 sal r10,12 514 or r9,r10 515 516 vpmadd52luq ymm1,ymm3,YMMWORD[rsi] 517 vpmadd52luq ymm16,ymm3,YMMWORD[32+rsi] 518 vpmadd52luq ymm17,ymm3,YMMWORD[64+rsi] 519 vpmadd52luq ymm18,ymm3,YMMWORD[96+rsi] 520 vpmadd52luq ymm19,ymm3,YMMWORD[128+rsi] 521 522 vpmadd52luq ymm1,ymm4,YMMWORD[rcx] 523 vpmadd52luq ymm16,ymm4,YMMWORD[32+rcx] 524 vpmadd52luq ymm17,ymm4,YMMWORD[64+rcx] 525 vpmadd52luq ymm18,ymm4,YMMWORD[96+rcx] 526 vpmadd52luq ymm19,ymm4,YMMWORD[128+rcx] 527 528 529 valignq ymm1,ymm16,ymm1,1 530 valignq ymm16,ymm17,ymm16,1 531 valignq ymm17,ymm18,ymm17,1 532 valignq ymm18,ymm19,ymm18,1 533 valignq ymm19,ymm0,ymm19,1 534 535 vmovq r13,xmm1 536 add r9,r13 537 538 vpmadd52huq ymm1,ymm3,YMMWORD[rsi] 539 vpmadd52huq ymm16,ymm3,YMMWORD[32+rsi] 540 vpmadd52huq ymm17,ymm3,YMMWORD[64+rsi] 541 vpmadd52huq ymm18,ymm3,YMMWORD[96+rsi] 542 vpmadd52huq ymm19,ymm3,YMMWORD[128+rsi] 543 544 vpmadd52huq ymm1,ymm4,YMMWORD[rcx] 545 vpmadd52huq ymm16,ymm4,YMMWORD[32+rcx] 546 vpmadd52huq ymm17,ymm4,YMMWORD[64+rcx] 547 vpmadd52huq ymm18,ymm4,YMMWORD[96+rcx] 548 vpmadd52huq ymm19,ymm4,YMMWORD[128+rcx] 549 mov r13,QWORD[160+r11] 550 551 vpbroadcastq ymm3,r13 552 mov rdx,QWORD[160+rsi] 553 mulx r12,r13,r13 554 add r15,r13 555 mov r10,r12 556 adc r10,0 557 558 mov r13,QWORD[8+r8] 559 imul r13,r15 560 and r13,rax 561 562 vpbroadcastq ymm4,r13 563 mov rdx,QWORD[160+rcx] 564 mulx r12,r13,r13 565 add r15,r13 566 adc r10,r12 567 568 shr r15,52 569 sal r10,12 570 or r15,r10 571 572 vpmadd52luq ymm2,ymm3,YMMWORD[160+rsi] 573 vpmadd52luq ymm20,ymm3,YMMWORD[192+rsi] 574 vpmadd52luq ymm21,ymm3,YMMWORD[224+rsi] 575 vpmadd52luq ymm22,ymm3,YMMWORD[256+rsi] 576 vpmadd52luq ymm23,ymm3,YMMWORD[288+rsi] 577 578 vpmadd52luq ymm2,ymm4,YMMWORD[160+rcx] 579 vpmadd52luq ymm20,ymm4,YMMWORD[192+rcx] 580 vpmadd52luq ymm21,ymm4,YMMWORD[224+rcx] 581 vpmadd52luq ymm22,ymm4,YMMWORD[256+rcx] 582 vpmadd52luq ymm23,ymm4,YMMWORD[288+rcx] 583 584 585 valignq ymm2,ymm20,ymm2,1 586 valignq ymm20,ymm21,ymm20,1 587 valignq ymm21,ymm22,ymm21,1 588 valignq ymm22,ymm23,ymm22,1 589 valignq ymm23,ymm0,ymm23,1 590 591 vmovq r13,xmm2 592 add r15,r13 593 594 vpmadd52huq ymm2,ymm3,YMMWORD[160+rsi] 595 vpmadd52huq ymm20,ymm3,YMMWORD[192+rsi] 596 vpmadd52huq ymm21,ymm3,YMMWORD[224+rsi] 597 vpmadd52huq ymm22,ymm3,YMMWORD[256+rsi] 598 vpmadd52huq ymm23,ymm3,YMMWORD[288+rsi] 599 600 vpmadd52huq ymm2,ymm4,YMMWORD[160+rcx] 601 vpmadd52huq ymm20,ymm4,YMMWORD[192+rcx] 602 vpmadd52huq ymm21,ymm4,YMMWORD[224+rcx] 603 vpmadd52huq ymm22,ymm4,YMMWORD[256+rcx] 604 vpmadd52huq ymm23,ymm4,YMMWORD[288+rcx] 605 lea r11,[8+r11] 606 dec ebx 607 jne NEAR $L$loop20 608 609 vmovdqa64 ymm4,YMMWORD[$L$mask52x4] 610 611 vpbroadcastq ymm3,r9 612 vpblendd ymm1,ymm1,ymm3,3 613 614 615 616 vpsrlq ymm24,ymm1,52 617 vpsrlq ymm25,ymm16,52 618 vpsrlq ymm26,ymm17,52 619 vpsrlq ymm27,ymm18,52 620 vpsrlq ymm28,ymm19,52 621 622 623 valignq ymm28,ymm28,ymm27,3 624 valignq ymm27,ymm27,ymm26,3 625 valignq ymm26,ymm26,ymm25,3 626 valignq ymm25,ymm25,ymm24,3 627 valignq ymm24,ymm24,ymm0,3 628 629 630 vpandq ymm1,ymm1,ymm4 631 vpandq ymm16,ymm16,ymm4 632 vpandq ymm17,ymm17,ymm4 633 vpandq ymm18,ymm18,ymm4 634 vpandq ymm19,ymm19,ymm4 635 636 637 vpaddq ymm1,ymm1,ymm24 638 vpaddq ymm16,ymm16,ymm25 639 vpaddq ymm17,ymm17,ymm26 640 vpaddq ymm18,ymm18,ymm27 641 vpaddq ymm19,ymm19,ymm28 642 643 644 645 vpcmpuq k1,ymm4,ymm1,1 646 vpcmpuq k2,ymm4,ymm16,1 647 vpcmpuq k3,ymm4,ymm17,1 648 vpcmpuq k4,ymm4,ymm18,1 649 vpcmpuq k5,ymm4,ymm19,1 650 kmovb r14d,k1 651 kmovb r13d,k2 652 kmovb r12d,k3 653 kmovb r11d,k4 654 kmovb r10d,k5 655 656 657 vpcmpuq k1,ymm4,ymm1,0 658 vpcmpuq k2,ymm4,ymm16,0 659 vpcmpuq k3,ymm4,ymm17,0 660 vpcmpuq k4,ymm4,ymm18,0 661 vpcmpuq k5,ymm4,ymm19,0 662 kmovb r9d,k1 663 kmovb r8d,k2 664 kmovb ebx,k3 665 kmovb ecx,k4 666 kmovb edx,k5 667 668 669 670 shl r13b,4 671 or r14b,r13b 672 shl r11b,4 673 or r12b,r11b 674 675 add r14b,r14b 676 adc r12b,r12b 677 adc r10b,r10b 678 679 shl r8b,4 680 or r9b,r8b 681 shl cl,4 682 or bl,cl 683 684 add r14b,r9b 685 adc r12b,bl 686 adc r10b,dl 687 688 xor r14b,r9b 689 xor r12b,bl 690 xor r10b,dl 691 692 kmovb k1,r14d 693 shr r14b,4 694 kmovb k2,r14d 695 kmovb k3,r12d 696 shr r12b,4 697 kmovb k4,r12d 698 kmovb k5,r10d 699 700 701 vpsubq ymm1{k1},ymm1,ymm4 702 vpsubq ymm16{k2},ymm16,ymm4 703 vpsubq ymm17{k3},ymm17,ymm4 704 vpsubq ymm18{k4},ymm18,ymm4 705 vpsubq ymm19{k5},ymm19,ymm4 706 707 vpandq ymm1,ymm1,ymm4 708 vpandq ymm16,ymm16,ymm4 709 vpandq ymm17,ymm17,ymm4 710 vpandq ymm18,ymm18,ymm4 711 vpandq ymm19,ymm19,ymm4 712 713 vpbroadcastq ymm3,r15 714 vpblendd ymm2,ymm2,ymm3,3 715 716 717 718 vpsrlq ymm24,ymm2,52 719 vpsrlq ymm25,ymm20,52 720 vpsrlq ymm26,ymm21,52 721 vpsrlq ymm27,ymm22,52 722 vpsrlq ymm28,ymm23,52 723 724 725 valignq ymm28,ymm28,ymm27,3 726 valignq ymm27,ymm27,ymm26,3 727 valignq ymm26,ymm26,ymm25,3 728 valignq ymm25,ymm25,ymm24,3 729 valignq ymm24,ymm24,ymm0,3 730 731 732 vpandq ymm2,ymm2,ymm4 733 vpandq ymm20,ymm20,ymm4 734 vpandq ymm21,ymm21,ymm4 735 vpandq ymm22,ymm22,ymm4 736 vpandq ymm23,ymm23,ymm4 737 738 739 vpaddq ymm2,ymm2,ymm24 740 vpaddq ymm20,ymm20,ymm25 741 vpaddq ymm21,ymm21,ymm26 742 vpaddq ymm22,ymm22,ymm27 743 vpaddq ymm23,ymm23,ymm28 744 745 746 747 vpcmpuq k1,ymm4,ymm2,1 748 vpcmpuq k2,ymm4,ymm20,1 749 vpcmpuq k3,ymm4,ymm21,1 750 vpcmpuq k4,ymm4,ymm22,1 751 vpcmpuq k5,ymm4,ymm23,1 752 kmovb r14d,k1 753 kmovb r13d,k2 754 kmovb r12d,k3 755 kmovb r11d,k4 756 kmovb r10d,k5 757 758 759 vpcmpuq k1,ymm4,ymm2,0 760 vpcmpuq k2,ymm4,ymm20,0 761 vpcmpuq k3,ymm4,ymm21,0 762 vpcmpuq k4,ymm4,ymm22,0 763 vpcmpuq k5,ymm4,ymm23,0 764 kmovb r9d,k1 765 kmovb r8d,k2 766 kmovb ebx,k3 767 kmovb ecx,k4 768 kmovb edx,k5 769 770 771 772 shl r13b,4 773 or r14b,r13b 774 shl r11b,4 775 or r12b,r11b 776 777 add r14b,r14b 778 adc r12b,r12b 779 adc r10b,r10b 780 781 shl r8b,4 782 or r9b,r8b 783 shl cl,4 784 or bl,cl 785 786 add r14b,r9b 787 adc r12b,bl 788 adc r10b,dl 789 790 xor r14b,r9b 791 xor r12b,bl 792 xor r10b,dl 793 794 kmovb k1,r14d 795 shr r14b,4 796 kmovb k2,r14d 797 kmovb k3,r12d 798 shr r12b,4 799 kmovb k4,r12d 800 kmovb k5,r10d 801 802 803 vpsubq ymm2{k1},ymm2,ymm4 804 vpsubq ymm20{k2},ymm20,ymm4 805 vpsubq ymm21{k3},ymm21,ymm4 806 vpsubq ymm22{k4},ymm22,ymm4 807 vpsubq ymm23{k5},ymm23,ymm4 808 809 vpandq ymm2,ymm2,ymm4 810 vpandq ymm20,ymm20,ymm4 811 vpandq ymm21,ymm21,ymm4 812 vpandq ymm22,ymm22,ymm4 813 vpandq ymm23,ymm23,ymm4 814 815 vmovdqu64 YMMWORD[rdi],ymm1 816 vmovdqu64 YMMWORD[32+rdi],ymm16 817 vmovdqu64 YMMWORD[64+rdi],ymm17 818 vmovdqu64 YMMWORD[96+rdi],ymm18 819 vmovdqu64 YMMWORD[128+rdi],ymm19 820 821 vmovdqu64 YMMWORD[160+rdi],ymm2 822 vmovdqu64 YMMWORD[192+rdi],ymm20 823 vmovdqu64 YMMWORD[224+rdi],ymm21 824 vmovdqu64 YMMWORD[256+rdi],ymm22 825 vmovdqu64 YMMWORD[288+rdi],ymm23 826 827 vzeroupper 828 mov r15,QWORD[rsp] 829 830 mov r14,QWORD[8+rsp] 831 832 mov r13,QWORD[16+rsp] 833 834 mov r12,QWORD[24+rsp] 835 836 mov rbp,QWORD[32+rsp] 837 838 mov rbx,QWORD[40+rsp] 839 840 lea rsp,[48+rsp] 841 842 $L$rsaz_amm52x20_x2_256_epilogue: 843 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 844 mov rsi,QWORD[16+rsp] 845 DB 0F3h,0C3h ;repret 846 847 $L$SEH_end_ossl_rsaz_amm52x20_x2_256: 848 section .text code align=64 849 850 851 ALIGN 32 852 global ossl_extract_multiplier_2x20_win5 853 854 ossl_extract_multiplier_2x20_win5: 855 mov QWORD[8+rsp],rdi ;WIN64 prologue 856 mov QWORD[16+rsp],rsi 857 mov rax,rsp 858 $L$SEH_begin_ossl_extract_multiplier_2x20_win5: 859 mov rdi,rcx 860 mov rsi,rdx 861 mov rdx,r8 862 mov rcx,r9 863 864 865 866 DB 243,15,30,250 867 lea rax,[rcx*4+rcx] 868 sal rax,5 869 add rsi,rax 870 871 vmovdqa64 ymm23,YMMWORD[$L$ones] 872 vpbroadcastq ymm22,rdx 873 lea rax,[10240+rsi] 874 875 vpxor xmm4,xmm4,xmm4 876 vmovdqa64 ymm3,ymm4 877 vmovdqa64 ymm2,ymm4 878 vmovdqa64 ymm1,ymm4 879 vmovdqa64 ymm0,ymm4 880 vmovdqa64 ymm21,ymm4 881 882 ALIGN 32 883 $L$loop: 884 vpcmpq k1,ymm22,ymm21,0 885 add rsi,320 886 vpaddq ymm21,ymm21,ymm23 887 vmovdqu64 ymm16,YMMWORD[((-320))+rsi] 888 vmovdqu64 ymm17,YMMWORD[((-288))+rsi] 889 vmovdqu64 ymm18,YMMWORD[((-256))+rsi] 890 vmovdqu64 ymm19,YMMWORD[((-224))+rsi] 891 vmovdqu64 ymm20,YMMWORD[((-192))+rsi] 892 vpblendmq ymm0{k1},ymm0,ymm16 893 vpblendmq ymm1{k1},ymm1,ymm17 894 vpblendmq ymm2{k1},ymm2,ymm18 895 vpblendmq ymm3{k1},ymm3,ymm19 896 vpblendmq ymm4{k1},ymm4,ymm20 897 cmp rax,rsi 898 jne NEAR $L$loop 899 900 vmovdqu64 YMMWORD[rdi],ymm0 901 vmovdqu64 YMMWORD[32+rdi],ymm1 902 vmovdqu64 YMMWORD[64+rdi],ymm2 903 vmovdqu64 YMMWORD[96+rdi],ymm3 904 vmovdqu64 YMMWORD[128+rdi],ymm4 905 906 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 907 mov rsi,QWORD[16+rsp] 908 DB 0F3h,0C3h ;repret 909 910 $L$SEH_end_ossl_extract_multiplier_2x20_win5: 911 section .data data align=8 912 913 ALIGN 32 914 $L$ones: 915 DQ 1,1,1,1 916 EXTERN __imp_RtlVirtualUnwind 917 918 ALIGN 16 919 rsaz_def_handler: 920 push rsi 921 push rdi 922 push rbx 923 push rbp 924 push r12 925 push r13 926 push r14 927 push r15 928 pushfq 929 sub rsp,64 930 931 mov rax,QWORD[120+r8] 932 mov rbx,QWORD[248+r8] 933 934 mov rsi,QWORD[8+r9] 935 mov r11,QWORD[56+r9] 936 937 mov r10d,DWORD[r11] 938 lea r10,[r10*1+rsi] 939 cmp rbx,r10 940 jb NEAR $L$common_seh_tail 941 942 mov rax,QWORD[152+r8] 943 944 mov r10d,DWORD[4+r11] 945 lea r10,[r10*1+rsi] 946 cmp rbx,r10 947 jae NEAR $L$common_seh_tail 948 949 lea rax,[48+rax] 950 951 mov rbx,QWORD[((-8))+rax] 952 mov rbp,QWORD[((-16))+rax] 953 mov r12,QWORD[((-24))+rax] 954 mov r13,QWORD[((-32))+rax] 955 mov r14,QWORD[((-40))+rax] 956 mov r15,QWORD[((-48))+rax] 957 mov QWORD[144+r8],rbx 958 mov QWORD[160+r8],rbp 959 mov QWORD[216+r8],r12 960 mov QWORD[224+r8],r13 961 mov QWORD[232+r8],r14 962 mov QWORD[240+r8],r15 963 964 $L$common_seh_tail: 965 mov rdi,QWORD[8+rax] 966 mov rsi,QWORD[16+rax] 967 mov QWORD[152+r8],rax 968 mov QWORD[168+r8],rsi 969 mov QWORD[176+r8],rdi 970 971 mov rdi,QWORD[40+r9] 972 mov rsi,r8 973 mov ecx,154 974 DD 0xa548f3fc 975 976 mov rsi,r9 977 xor rcx,rcx 978 mov rdx,QWORD[8+rsi] 979 mov r8,QWORD[rsi] 980 mov r9,QWORD[16+rsi] 981 mov r10,QWORD[40+rsi] 982 lea r11,[56+rsi] 983 lea r12,[24+rsi] 984 mov QWORD[32+rsp],r10 985 mov QWORD[40+rsp],r11 986 mov QWORD[48+rsp],r12 987 mov QWORD[56+rsp],rcx 988 call QWORD[__imp_RtlVirtualUnwind] 989 990 mov eax,1 991 add rsp,64 992 popfq 993 pop r15 994 pop r14 995 pop r13 996 pop r12 997 pop rbp 998 pop rbx 999 pop rdi 1000 pop rsi 10 ossl_rsaz_avx512ifma_eligible: 11 xor eax,eax 1001 12 DB 0F3h,0C3h ;repret 1002 13 1003 14 1004 section .pdata rdata align=4 1005 ALIGN 4 1006 DD $L$SEH_begin_ossl_rsaz_amm52x20_x1_256 wrt ..imagebase 1007 DD $L$SEH_end_ossl_rsaz_amm52x20_x1_256 wrt ..imagebase 1008 DD $L$SEH_info_ossl_rsaz_amm52x20_x1_256 wrt ..imagebase 15 global ossl_rsaz_amm52x20_x1_256 16 global ossl_rsaz_amm52x20_x2_256 17 global ossl_extract_multiplier_2x20_win5 1009 18 1010 DD $L$SEH_begin_ossl_rsaz_amm52x20_x2_256 wrt ..imagebase 1011 DD $L$SEH_end_ossl_rsaz_amm52x20_x2_256 wrt ..imagebase 1012 DD $L$SEH_info_ossl_rsaz_amm52x20_x2_256 wrt ..imagebase 19 ossl_rsaz_amm52x20_x1_256: 20 ossl_rsaz_amm52x20_x2_256: 21 ossl_extract_multiplier_2x20_win5: 22 DB 0x0f,0x0b 23 DB 0F3h,0C3h ;repret 1013 24 1014 DD $L$SEH_begin_ossl_extract_multiplier_2x20_win5 wrt ..imagebase1015 DD $L$SEH_end_ossl_extract_multiplier_2x20_win5 wrt ..imagebase1016 DD $L$SEH_info_ossl_extract_multiplier_2x20_win5 wrt ..imagebase1017 1018 section .xdata rdata align=81019 ALIGN 81020 $L$SEH_info_ossl_rsaz_amm52x20_x1_256:1021 DB 9,0,0,01022 DD rsaz_def_handler wrt ..imagebase1023 DD $L$rsaz_amm52x20_x1_256_body wrt ..imagebase,$L$rsaz_amm52x20_x1_256_epilogue wrt ..imagebase1024 $L$SEH_info_ossl_rsaz_amm52x20_x2_256:1025 DB 9,0,0,01026 DD rsaz_def_handler wrt ..imagebase1027 DD $L$rsaz_amm52x20_x2_256_body wrt ..imagebase,$L$rsaz_amm52x20_x2_256_epilogue wrt ..imagebase1028 $L$SEH_info_ossl_extract_multiplier_2x20_win5:1029 DB 9,0,0,01030 DD rsaz_def_handler wrt ..imagebase1031 DD $L$SEH_begin_ossl_extract_multiplier_2x20_win5 wrt ..imagebase,$L$SEH_begin_ossl_extract_multiplier_2x20_win5 wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/rsaz-x86_64.S
r95219 r95221 44 44 mov rax,QWORD[8+rsi] 45 45 mov QWORD[128+rsp],rcx 46 mov r11d,0x8010047 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]48 cmp r11d,0x8010049 je NEAR $L$oop_sqrx50 46 jmp NEAR $L$oop_sqr 51 47 … … 418 414 dec r8d 419 415 jnz NEAR $L$oop_sqr 420 jmp NEAR $L$sqr_tail421 422 ALIGN 32423 $L$oop_sqrx:424 mov DWORD[((128+8))+rsp],r8d425 DB 102,72,15,110,199426 427 mulx r9,r8,rax428 mov rbx,rax429 430 mulx r10,rcx,QWORD[16+rsi]431 xor rbp,rbp432 433 mulx r11,rax,QWORD[24+rsi]434 adcx r9,rcx435 436 DB 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00437 adcx r10,rax438 439 DB 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00440 adcx r11,rcx441 442 mulx r14,rcx,QWORD[48+rsi]443 adcx r12,rax444 adcx r13,rcx445 446 mulx r15,rax,QWORD[56+rsi]447 adcx r14,rax448 adcx r15,rbp449 450 mulx rdi,rax,rdx451 mov rdx,rbx452 xor rcx,rcx453 adox r8,r8454 adcx r8,rdi455 adox rcx,rbp456 adcx rcx,rbp457 458 mov QWORD[rsp],rax459 mov QWORD[8+rsp],r8460 461 462 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00463 adox r10,rax464 adcx r11,rbx465 466 mulx r8,rdi,QWORD[24+rsi]467 adox r11,rdi468 DB 0x66469 adcx r12,r8470 471 mulx rbx,rax,QWORD[32+rsi]472 adox r12,rax473 adcx r13,rbx474 475 mulx r8,rdi,QWORD[40+rsi]476 adox r13,rdi477 adcx r14,r8478 479 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00480 adox r14,rax481 adcx r15,rbx482 483 DB 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00484 adox r15,rdi485 adcx r8,rbp486 mulx rdi,rax,rdx487 adox r8,rbp488 DB 0x48,0x8b,0x96,0x10,0x00,0x00,0x00489 490 xor rbx,rbx491 adox r9,r9492 493 adcx rax,rcx494 adox r10,r10495 adcx r9,rax496 adox rbx,rbp497 adcx r10,rdi498 adcx rbx,rbp499 500 mov QWORD[16+rsp],r9501 DB 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00502 503 504 mulx r9,rdi,QWORD[24+rsi]505 adox r12,rdi506 adcx r13,r9507 508 mulx rcx,rax,QWORD[32+rsi]509 adox r13,rax510 adcx r14,rcx511 512 DB 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00513 adox r14,rdi514 adcx r15,r9515 516 DB 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00517 adox r15,rax518 adcx r8,rcx519 520 mulx r9,rdi,QWORD[56+rsi]521 adox r8,rdi522 adcx r9,rbp523 mulx rdi,rax,rdx524 adox r9,rbp525 mov rdx,QWORD[24+rsi]526 527 xor rcx,rcx528 adox r11,r11529 530 adcx rax,rbx531 adox r12,r12532 adcx r11,rax533 adox rcx,rbp534 adcx r12,rdi535 adcx rcx,rbp536 537 mov QWORD[32+rsp],r11538 mov QWORD[40+rsp],r12539 540 541 mulx rbx,rax,QWORD[32+rsi]542 adox r14,rax543 adcx r15,rbx544 545 mulx r10,rdi,QWORD[40+rsi]546 adox r15,rdi547 adcx r8,r10548 549 mulx rbx,rax,QWORD[48+rsi]550 adox r8,rax551 adcx r9,rbx552 553 mulx r10,rdi,QWORD[56+rsi]554 adox r9,rdi555 adcx r10,rbp556 mulx rdi,rax,rdx557 adox r10,rbp558 mov rdx,QWORD[32+rsi]559 560 xor rbx,rbx561 adox r13,r13562 563 adcx rax,rcx564 adox r14,r14565 adcx r13,rax566 adox rbx,rbp567 adcx r14,rdi568 adcx rbx,rbp569 570 mov QWORD[48+rsp],r13571 mov QWORD[56+rsp],r14572 573 574 mulx r11,rdi,QWORD[40+rsi]575 adox r8,rdi576 adcx r9,r11577 578 mulx rcx,rax,QWORD[48+rsi]579 adox r9,rax580 adcx r10,rcx581 582 mulx r11,rdi,QWORD[56+rsi]583 adox r10,rdi584 adcx r11,rbp585 mulx rdi,rax,rdx586 mov rdx,QWORD[40+rsi]587 adox r11,rbp588 589 xor rcx,rcx590 adox r15,r15591 592 adcx rax,rbx593 adox r8,r8594 adcx r15,rax595 adox rcx,rbp596 adcx r8,rdi597 adcx rcx,rbp598 599 mov QWORD[64+rsp],r15600 mov QWORD[72+rsp],r8601 602 603 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00604 adox r10,rax605 adcx r11,rbx606 607 DB 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00608 adox r11,rdi609 adcx r12,rbp610 mulx rdi,rax,rdx611 adox r12,rbp612 mov rdx,QWORD[48+rsi]613 614 xor rbx,rbx615 adox r9,r9616 617 adcx rax,rcx618 adox r10,r10619 adcx r9,rax620 adcx r10,rdi621 adox rbx,rbp622 adcx rbx,rbp623 624 mov QWORD[80+rsp],r9625 mov QWORD[88+rsp],r10626 627 628 DB 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00629 adox r12,rax630 adox r13,rbp631 632 mulx rdi,rax,rdx633 xor rcx,rcx634 mov rdx,QWORD[56+rsi]635 adox r11,r11636 637 adcx rax,rbx638 adox r12,r12639 adcx r11,rax640 adox rcx,rbp641 adcx r12,rdi642 adcx rcx,rbp643 644 DB 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00645 DB 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00646 647 648 mulx rdx,rax,rdx649 xor rbx,rbx650 adox r13,r13651 652 adcx rax,rcx653 adox rbx,rbp654 adcx rax,r13655 adcx rbx,rdx656 657 DB 102,72,15,126,199658 DB 102,72,15,126,205659 660 mov rdx,QWORD[128+rsp]661 mov r8,QWORD[rsp]662 mov r9,QWORD[8+rsp]663 mov r10,QWORD[16+rsp]664 mov r11,QWORD[24+rsp]665 mov r12,QWORD[32+rsp]666 mov r13,QWORD[40+rsp]667 mov r14,QWORD[48+rsp]668 mov r15,QWORD[56+rsp]669 670 mov QWORD[112+rsp],rax671 mov QWORD[120+rsp],rbx672 673 call __rsaz_512_reducex674 675 add r8,QWORD[64+rsp]676 adc r9,QWORD[72+rsp]677 adc r10,QWORD[80+rsp]678 adc r11,QWORD[88+rsp]679 adc r12,QWORD[96+rsp]680 adc r13,QWORD[104+rsp]681 adc r14,QWORD[112+rsp]682 adc r15,QWORD[120+rsp]683 sbb rcx,rcx684 685 call __rsaz_512_subtract686 687 mov rdx,r8688 mov rax,r9689 mov r8d,DWORD[((128+8))+rsp]690 mov rsi,rdi691 692 dec r8d693 jnz NEAR $L$oop_sqrx694 695 $L$sqr_tail:696 416 697 417 lea rax,[((128+24+48))+rsp] … … 752 472 DB 102,72,15,110,201 753 473 mov QWORD[128+rsp],r8 754 mov r11d,0x80100755 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]756 cmp r11d,0x80100757 je NEAR $L$mulx758 474 mov rbx,QWORD[rdx] 759 475 mov rbp,rdx … … 773 489 774 490 call __rsaz_512_reduce 775 jmp NEAR $L$mul_tail776 777 ALIGN 32778 $L$mulx:779 mov rbp,rdx780 mov rdx,QWORD[rdx]781 call __rsaz_512_mulx782 783 DB 102,72,15,126,199784 DB 102,72,15,126,205785 786 mov rdx,QWORD[128+rsp]787 mov r8,QWORD[rsp]788 mov r9,QWORD[8+rsp]789 mov r10,QWORD[16+rsp]790 mov r11,QWORD[24+rsp]791 mov r12,QWORD[32+rsp]792 mov r13,QWORD[40+rsp]793 mov r14,QWORD[48+rsp]794 mov r15,QWORD[56+rsp]795 796 call __rsaz_512_reducex797 $L$mul_tail:798 491 add r8,QWORD[64+rsp] 799 492 adc r9,QWORD[72+rsp] … … 927 620 pshufd xmm9,xmm8,0x4e 928 621 por xmm8,xmm9 929 mov r11d,0x80100930 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]931 cmp r11d,0x80100932 je NEAR $L$mulx_gather933 622 DB 102,76,15,126,195 934 623 … … 1111 800 1112 801 call __rsaz_512_reduce 1113 jmp NEAR $L$mul_gather_tail1114 1115 ALIGN 321116 $L$mulx_gather:1117 DB 102,76,15,126,1941118 1119 mov QWORD[128+rsp],r81120 mov QWORD[((128+8))+rsp],rdi1121 mov QWORD[((128+16))+rsp],rcx1122 1123 mulx r8,rbx,QWORD[rsi]1124 mov QWORD[rsp],rbx1125 xor edi,edi1126 1127 mulx r9,rax,QWORD[8+rsi]1128 1129 mulx r10,rbx,QWORD[16+rsi]1130 adcx r8,rax1131 1132 mulx r11,rax,QWORD[24+rsi]1133 adcx r9,rbx1134 1135 mulx r12,rbx,QWORD[32+rsi]1136 adcx r10,rax1137 1138 mulx r13,rax,QWORD[40+rsi]1139 adcx r11,rbx1140 1141 mulx r14,rbx,QWORD[48+rsi]1142 adcx r12,rax1143 1144 mulx r15,rax,QWORD[56+rsi]1145 adcx r13,rbx1146 adcx r14,rax1147 DB 0x671148 mov rbx,r81149 adcx r15,rdi1150 1151 mov rcx,-71152 jmp NEAR $L$oop_mulx_gather1153 1154 ALIGN 321155 $L$oop_mulx_gather:1156 movdqa xmm8,XMMWORD[rbp]1157 movdqa xmm9,XMMWORD[16+rbp]1158 movdqa xmm10,XMMWORD[32+rbp]1159 movdqa xmm11,XMMWORD[48+rbp]1160 pand xmm8,xmm01161 movdqa xmm12,XMMWORD[64+rbp]1162 pand xmm9,xmm11163 movdqa xmm13,XMMWORD[80+rbp]1164 pand xmm10,xmm21165 movdqa xmm14,XMMWORD[96+rbp]1166 pand xmm11,xmm31167 movdqa xmm15,XMMWORD[112+rbp]1168 lea rbp,[128+rbp]1169 pand xmm12,xmm41170 pand xmm13,xmm51171 pand xmm14,xmm61172 pand xmm15,xmm71173 por xmm8,xmm101174 por xmm9,xmm111175 por xmm8,xmm121176 por xmm9,xmm131177 por xmm8,xmm141178 por xmm9,xmm151179 1180 por xmm8,xmm91181 pshufd xmm9,xmm8,0x4e1182 por xmm8,xmm91183 DB 102,76,15,126,1941184 1185 DB 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x001186 adcx rbx,rax1187 adox r8,r91188 1189 mulx r9,rax,QWORD[8+rsi]1190 adcx r8,rax1191 adox r9,r101192 1193 mulx r10,rax,QWORD[16+rsi]1194 adcx r9,rax1195 adox r10,r111196 1197 DB 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x001198 adcx r10,rax1199 adox r11,r121200 1201 mulx r12,rax,QWORD[32+rsi]1202 adcx r11,rax1203 adox r12,r131204 1205 mulx r13,rax,QWORD[40+rsi]1206 adcx r12,rax1207 adox r13,r141208 1209 DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x001210 adcx r13,rax1211 DB 0x671212 adox r14,r151213 1214 mulx r15,rax,QWORD[56+rsi]1215 mov QWORD[64+rcx*8+rsp],rbx1216 adcx r14,rax1217 adox r15,rdi1218 mov rbx,r81219 adcx r15,rdi1220 1221 inc rcx1222 jnz NEAR $L$oop_mulx_gather1223 1224 mov QWORD[64+rsp],r81225 mov QWORD[((64+8))+rsp],r91226 mov QWORD[((64+16))+rsp],r101227 mov QWORD[((64+24))+rsp],r111228 mov QWORD[((64+32))+rsp],r121229 mov QWORD[((64+40))+rsp],r131230 mov QWORD[((64+48))+rsp],r141231 mov QWORD[((64+56))+rsp],r151232 1233 mov rdx,QWORD[128+rsp]1234 mov rdi,QWORD[((128+8))+rsp]1235 mov rbp,QWORD[((128+16))+rsp]1236 1237 mov r8,QWORD[rsp]1238 mov r9,QWORD[8+rsp]1239 mov r10,QWORD[16+rsp]1240 mov r11,QWORD[24+rsp]1241 mov r12,QWORD[32+rsp]1242 mov r13,QWORD[40+rsp]1243 mov r14,QWORD[48+rsp]1244 mov r15,QWORD[56+rsp]1245 1246 call __rsaz_512_reducex1247 1248 $L$mul_gather_tail:1249 802 add r8,QWORD[64+rsp] 1250 803 adc r9,QWORD[72+rsp] … … 1333 886 1334 887 mov rbp,rdi 1335 mov r11d,0x801001336 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]1337 cmp r11d,0x801001338 je NEAR $L$mulx_scatter1339 888 mov rbx,QWORD[rdi] 1340 889 call __rsaz_512_mul … … 1353 902 1354 903 call __rsaz_512_reduce 1355 jmp NEAR $L$mul_scatter_tail1356 1357 ALIGN 321358 $L$mulx_scatter:1359 mov rdx,QWORD[rdi]1360 call __rsaz_512_mulx1361 1362 DB 102,72,15,126,1991363 DB 102,72,15,126,2051364 1365 mov rdx,QWORD[128+rsp]1366 mov r8,QWORD[rsp]1367 mov r9,QWORD[8+rsp]1368 mov r10,QWORD[16+rsp]1369 mov r11,QWORD[24+rsp]1370 mov r12,QWORD[32+rsp]1371 mov r13,QWORD[40+rsp]1372 mov r14,QWORD[48+rsp]1373 mov r15,QWORD[56+rsp]1374 1375 call __rsaz_512_reducex1376 1377 $L$mul_scatter_tail:1378 904 add r8,QWORD[64+rsp] 1379 905 adc r9,QWORD[72+rsp] … … 1451 977 1452 978 $L$mul_by_one_body: 1453 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]1454 979 mov rbp,rdx 1455 980 mov QWORD[128+rsp],rcx … … 1472 997 movdqa XMMWORD[80+rsp],xmm0 1473 998 movdqa XMMWORD[96+rsp],xmm0 1474 and eax,0x801001475 cmp eax,0x801001476 je NEAR $L$by_one_callx1477 999 call __rsaz_512_reduce 1478 jmp NEAR $L$by_one_tail1479 ALIGN 321480 $L$by_one_callx:1481 mov rdx,QWORD[128+rsp]1482 call __rsaz_512_reducex1483 $L$by_one_tail:1484 1000 mov QWORD[rdi],r8 1485 1001 mov QWORD[8+rdi],r9 … … 1594 1110 dec ecx 1595 1111 jne NEAR $L$reduction_loop 1596 1597 DB 0F3h,0C3h ;repret1598 1599 1600 1601 ALIGN 321602 __rsaz_512_reducex:1603 1604 1605 imul rdx,r81606 xor rsi,rsi1607 mov ecx,81608 jmp NEAR $L$reduction_loopx1609 1610 ALIGN 321611 $L$reduction_loopx:1612 mov rbx,r81613 mulx r8,rax,QWORD[rbp]1614 adcx rax,rbx1615 adox r8,r91616 1617 mulx r9,rax,QWORD[8+rbp]1618 adcx r8,rax1619 adox r9,r101620 1621 mulx r10,rbx,QWORD[16+rbp]1622 adcx r9,rbx1623 adox r10,r111624 1625 mulx r11,rbx,QWORD[24+rbp]1626 adcx r10,rbx1627 adox r11,r121628 1629 DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x001630 mov rax,rdx1631 mov rdx,r81632 adcx r11,rbx1633 adox r12,r131634 1635 mulx rdx,rbx,QWORD[((128+8))+rsp]1636 mov rdx,rax1637 1638 mulx r13,rax,QWORD[40+rbp]1639 adcx r12,rax1640 adox r13,r141641 1642 DB 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x001643 adcx r13,rax1644 adox r14,r151645 1646 mulx r15,rax,QWORD[56+rbp]1647 mov rdx,rbx1648 adcx r14,rax1649 adox r15,rsi1650 adcx r15,rsi1651 1652 dec ecx1653 jne NEAR $L$reduction_loopx1654 1112 1655 1113 DB 0F3h,0C3h ;repret … … 1855 1313 mov QWORD[48+rdi],r14 1856 1314 mov QWORD[56+rdi],r15 1857 1858 DB 0F3h,0C3h ;repret1859 1860 1861 1862 ALIGN 321863 __rsaz_512_mulx:1864 1865 mulx r8,rbx,QWORD[rsi]1866 mov rcx,-61867 1868 mulx r9,rax,QWORD[8+rsi]1869 mov QWORD[8+rsp],rbx1870 1871 mulx r10,rbx,QWORD[16+rsi]1872 adc r8,rax1873 1874 mulx r11,rax,QWORD[24+rsi]1875 adc r9,rbx1876 1877 mulx r12,rbx,QWORD[32+rsi]1878 adc r10,rax1879 1880 mulx r13,rax,QWORD[40+rsi]1881 adc r11,rbx1882 1883 mulx r14,rbx,QWORD[48+rsi]1884 adc r12,rax1885 1886 mulx r15,rax,QWORD[56+rsi]1887 mov rdx,QWORD[8+rbp]1888 adc r13,rbx1889 adc r14,rax1890 adc r15,01891 1892 xor rdi,rdi1893 jmp NEAR $L$oop_mulx1894 1895 ALIGN 321896 $L$oop_mulx:1897 mov rbx,r81898 mulx r8,rax,QWORD[rsi]1899 adcx rbx,rax1900 adox r8,r91901 1902 mulx r9,rax,QWORD[8+rsi]1903 adcx r8,rax1904 adox r9,r101905 1906 mulx r10,rax,QWORD[16+rsi]1907 adcx r9,rax1908 adox r10,r111909 1910 mulx r11,rax,QWORD[24+rsi]1911 adcx r10,rax1912 adox r11,r121913 1914 DB 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x001915 adcx r11,rax1916 adox r12,r131917 1918 mulx r13,rax,QWORD[40+rsi]1919 adcx r12,rax1920 adox r13,r141921 1922 mulx r14,rax,QWORD[48+rsi]1923 adcx r13,rax1924 adox r14,r151925 1926 mulx r15,rax,QWORD[56+rsi]1927 mov rdx,QWORD[64+rcx*8+rbp]1928 mov QWORD[((8+64-8))+rcx*8+rsp],rbx1929 adcx r14,rax1930 adox r15,rdi1931 adcx r15,rdi1932 1933 inc rcx1934 jnz NEAR $L$oop_mulx1935 1936 mov rbx,r81937 mulx r8,rax,QWORD[rsi]1938 adcx rbx,rax1939 adox r8,r91940 1941 DB 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x001942 adcx r8,rax1943 adox r9,r101944 1945 DB 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x001946 adcx r9,rax1947 adox r10,r111948 1949 mulx r11,rax,QWORD[24+rsi]1950 adcx r10,rax1951 adox r11,r121952 1953 mulx r12,rax,QWORD[32+rsi]1954 adcx r11,rax1955 adox r12,r131956 1957 mulx r13,rax,QWORD[40+rsi]1958 adcx r12,rax1959 adox r13,r141960 1961 DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x001962 adcx r13,rax1963 adox r14,r151964 1965 DB 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x001966 adcx r14,rax1967 adox r15,rdi1968 adcx r15,rdi1969 1970 mov QWORD[((8+64-8))+rsp],rbx1971 mov QWORD[((8+64))+rsp],r81972 mov QWORD[((8+64+8))+rsp],r91973 mov QWORD[((8+64+16))+rsp],r101974 mov QWORD[((8+64+24))+rsp],r111975 mov QWORD[((8+64+32))+rsp],r121976 mov QWORD[((8+64+40))+rsp],r131977 mov QWORD[((8+64+48))+rsp],r141978 mov QWORD[((8+64+56))+rsp],r151979 1315 1980 1316 DB 0F3h,0C3h ;repret -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/sha1-mb-x86_64.S
r94083 r95221 25 25 bt rcx,61 26 26 jc NEAR _shaext_shortcut 27 test ecx,26843545628 jnz NEAR _avx_shortcut29 27 mov rax,rsp 30 28 … … 3020 3018 $L$SEH_end_sha1_multi_block_shaext: 3021 3019 3022 ALIGN 323023 sha1_multi_block_avx:3024 mov QWORD[8+rsp],rdi ;WIN64 prologue3025 mov QWORD[16+rsp],rsi3026 mov rax,rsp3027 $L$SEH_begin_sha1_multi_block_avx:3028 mov rdi,rcx3029 mov rsi,rdx3030 mov rdx,r83031 3032 3033 3034 _avx_shortcut:3035 shr rcx,323036 cmp edx,23037 jb NEAR $L$avx3038 test ecx,323039 jnz NEAR _avx2_shortcut3040 jmp NEAR $L$avx3041 ALIGN 323042 $L$avx:3043 mov rax,rsp3044 3045 push rbx3046 3047 push rbp3048 3049 lea rsp,[((-168))+rsp]3050 movaps XMMWORD[rsp],xmm63051 movaps XMMWORD[16+rsp],xmm73052 movaps XMMWORD[32+rsp],xmm83053 movaps XMMWORD[48+rsp],xmm93054 movaps XMMWORD[(-120)+rax],xmm103055 movaps XMMWORD[(-104)+rax],xmm113056 movaps XMMWORD[(-88)+rax],xmm123057 movaps XMMWORD[(-72)+rax],xmm133058 movaps XMMWORD[(-56)+rax],xmm143059 movaps XMMWORD[(-40)+rax],xmm153060 sub rsp,2883061 and rsp,-2563062 mov QWORD[272+rsp],rax3063 3064 $L$body_avx:3065 lea rbp,[K_XX_XX]3066 lea rbx,[256+rsp]3067 3068 vzeroupper3069 $L$oop_grande_avx:3070 mov DWORD[280+rsp],edx3071 xor edx,edx3072 3073 mov r8,QWORD[rsi]3074 3075 mov ecx,DWORD[8+rsi]3076 cmp ecx,edx3077 cmovg edx,ecx3078 test ecx,ecx3079 mov DWORD[rbx],ecx3080 cmovle r8,rbp3081 3082 mov r9,QWORD[16+rsi]3083 3084 mov ecx,DWORD[24+rsi]3085 cmp ecx,edx3086 cmovg edx,ecx3087 test ecx,ecx3088 mov DWORD[4+rbx],ecx3089 cmovle r9,rbp3090 3091 mov r10,QWORD[32+rsi]3092 3093 mov ecx,DWORD[40+rsi]3094 cmp ecx,edx3095 cmovg edx,ecx3096 test ecx,ecx3097 mov DWORD[8+rbx],ecx3098 cmovle r10,rbp3099 3100 mov r11,QWORD[48+rsi]3101 3102 mov ecx,DWORD[56+rsi]3103 cmp ecx,edx3104 cmovg edx,ecx3105 test ecx,ecx3106 mov DWORD[12+rbx],ecx3107 cmovle r11,rbp3108 test edx,edx3109 jz NEAR $L$done_avx3110 3111 vmovdqu xmm10,XMMWORD[rdi]3112 lea rax,[128+rsp]3113 vmovdqu xmm11,XMMWORD[32+rdi]3114 vmovdqu xmm12,XMMWORD[64+rdi]3115 vmovdqu xmm13,XMMWORD[96+rdi]3116 vmovdqu xmm14,XMMWORD[128+rdi]3117 vmovdqu xmm5,XMMWORD[96+rbp]3118 jmp NEAR $L$oop_avx3119 3120 ALIGN 323121 $L$oop_avx:3122 vmovdqa xmm15,XMMWORD[((-32))+rbp]3123 vmovd xmm0,DWORD[r8]3124 lea r8,[64+r8]3125 vmovd xmm2,DWORD[r9]3126 lea r9,[64+r9]3127 vpinsrd xmm0,xmm0,DWORD[r10],13128 lea r10,[64+r10]3129 vpinsrd xmm2,xmm2,DWORD[r11],13130 lea r11,[64+r11]3131 vmovd xmm1,DWORD[((-60))+r8]3132 vpunpckldq xmm0,xmm0,xmm23133 vmovd xmm9,DWORD[((-60))+r9]3134 vpshufb xmm0,xmm0,xmm53135 vpinsrd xmm1,xmm1,DWORD[((-60))+r10],13136 vpinsrd xmm9,xmm9,DWORD[((-60))+r11],13137 vpaddd xmm14,xmm14,xmm153138 vpslld xmm8,xmm10,53139 vpandn xmm7,xmm11,xmm133140 vpand xmm6,xmm11,xmm123141 3142 vmovdqa XMMWORD[(0-128)+rax],xmm03143 vpaddd xmm14,xmm14,xmm03144 vpunpckldq xmm1,xmm1,xmm93145 vpsrld xmm9,xmm10,273146 vpxor xmm6,xmm6,xmm73147 vmovd xmm2,DWORD[((-56))+r8]3148 3149 vpslld xmm7,xmm11,303150 vpor xmm8,xmm8,xmm93151 vmovd xmm9,DWORD[((-56))+r9]3152 vpaddd xmm14,xmm14,xmm63153 3154 vpsrld xmm11,xmm11,23155 vpaddd xmm14,xmm14,xmm83156 vpshufb xmm1,xmm1,xmm53157 vpor xmm11,xmm11,xmm73158 vpinsrd xmm2,xmm2,DWORD[((-56))+r10],13159 vpinsrd xmm9,xmm9,DWORD[((-56))+r11],13160 vpaddd xmm13,xmm13,xmm153161 vpslld xmm8,xmm14,53162 vpandn xmm7,xmm10,xmm123163 vpand xmm6,xmm10,xmm113164 3165 vmovdqa XMMWORD[(16-128)+rax],xmm13166 vpaddd xmm13,xmm13,xmm13167 vpunpckldq xmm2,xmm2,xmm93168 vpsrld xmm9,xmm14,273169 vpxor xmm6,xmm6,xmm73170 vmovd xmm3,DWORD[((-52))+r8]3171 3172 vpslld xmm7,xmm10,303173 vpor xmm8,xmm8,xmm93174 vmovd xmm9,DWORD[((-52))+r9]3175 vpaddd xmm13,xmm13,xmm63176 3177 vpsrld xmm10,xmm10,23178 vpaddd xmm13,xmm13,xmm83179 vpshufb xmm2,xmm2,xmm53180 vpor xmm10,xmm10,xmm73181 vpinsrd xmm3,xmm3,DWORD[((-52))+r10],13182 vpinsrd xmm9,xmm9,DWORD[((-52))+r11],13183 vpaddd xmm12,xmm12,xmm153184 vpslld xmm8,xmm13,53185 vpandn xmm7,xmm14,xmm113186 vpand xmm6,xmm14,xmm103187 3188 vmovdqa XMMWORD[(32-128)+rax],xmm23189 vpaddd xmm12,xmm12,xmm23190 vpunpckldq xmm3,xmm3,xmm93191 vpsrld xmm9,xmm13,273192 vpxor xmm6,xmm6,xmm73193 vmovd xmm4,DWORD[((-48))+r8]3194 3195 vpslld xmm7,xmm14,303196 vpor xmm8,xmm8,xmm93197 vmovd xmm9,DWORD[((-48))+r9]3198 vpaddd xmm12,xmm12,xmm63199 3200 vpsrld xmm14,xmm14,23201 vpaddd xmm12,xmm12,xmm83202 vpshufb xmm3,xmm3,xmm53203 vpor xmm14,xmm14,xmm73204 vpinsrd xmm4,xmm4,DWORD[((-48))+r10],13205 vpinsrd xmm9,xmm9,DWORD[((-48))+r11],13206 vpaddd xmm11,xmm11,xmm153207 vpslld xmm8,xmm12,53208 vpandn xmm7,xmm13,xmm103209 vpand xmm6,xmm13,xmm143210 3211 vmovdqa XMMWORD[(48-128)+rax],xmm33212 vpaddd xmm11,xmm11,xmm33213 vpunpckldq xmm4,xmm4,xmm93214 vpsrld xmm9,xmm12,273215 vpxor xmm6,xmm6,xmm73216 vmovd xmm0,DWORD[((-44))+r8]3217 3218 vpslld xmm7,xmm13,303219 vpor xmm8,xmm8,xmm93220 vmovd xmm9,DWORD[((-44))+r9]3221 vpaddd xmm11,xmm11,xmm63222 3223 vpsrld xmm13,xmm13,23224 vpaddd xmm11,xmm11,xmm83225 vpshufb xmm4,xmm4,xmm53226 vpor xmm13,xmm13,xmm73227 vpinsrd xmm0,xmm0,DWORD[((-44))+r10],13228 vpinsrd xmm9,xmm9,DWORD[((-44))+r11],13229 vpaddd xmm10,xmm10,xmm153230 vpslld xmm8,xmm11,53231 vpandn xmm7,xmm12,xmm143232 vpand xmm6,xmm12,xmm133233 3234 vmovdqa XMMWORD[(64-128)+rax],xmm43235 vpaddd xmm10,xmm10,xmm43236 vpunpckldq xmm0,xmm0,xmm93237 vpsrld xmm9,xmm11,273238 vpxor xmm6,xmm6,xmm73239 vmovd xmm1,DWORD[((-40))+r8]3240 3241 vpslld xmm7,xmm12,303242 vpor xmm8,xmm8,xmm93243 vmovd xmm9,DWORD[((-40))+r9]3244 vpaddd xmm10,xmm10,xmm63245 3246 vpsrld xmm12,xmm12,23247 vpaddd xmm10,xmm10,xmm83248 vpshufb xmm0,xmm0,xmm53249 vpor xmm12,xmm12,xmm73250 vpinsrd xmm1,xmm1,DWORD[((-40))+r10],13251 vpinsrd xmm9,xmm9,DWORD[((-40))+r11],13252 vpaddd xmm14,xmm14,xmm153253 vpslld xmm8,xmm10,53254 vpandn xmm7,xmm11,xmm133255 vpand xmm6,xmm11,xmm123256 3257 vmovdqa XMMWORD[(80-128)+rax],xmm03258 vpaddd xmm14,xmm14,xmm03259 vpunpckldq xmm1,xmm1,xmm93260 vpsrld xmm9,xmm10,273261 vpxor xmm6,xmm6,xmm73262 vmovd xmm2,DWORD[((-36))+r8]3263 3264 vpslld xmm7,xmm11,303265 vpor xmm8,xmm8,xmm93266 vmovd xmm9,DWORD[((-36))+r9]3267 vpaddd xmm14,xmm14,xmm63268 3269 vpsrld xmm11,xmm11,23270 vpaddd xmm14,xmm14,xmm83271 vpshufb xmm1,xmm1,xmm53272 vpor xmm11,xmm11,xmm73273 vpinsrd xmm2,xmm2,DWORD[((-36))+r10],13274 vpinsrd xmm9,xmm9,DWORD[((-36))+r11],13275 vpaddd xmm13,xmm13,xmm153276 vpslld xmm8,xmm14,53277 vpandn xmm7,xmm10,xmm123278 vpand xmm6,xmm10,xmm113279 3280 vmovdqa XMMWORD[(96-128)+rax],xmm13281 vpaddd xmm13,xmm13,xmm13282 vpunpckldq xmm2,xmm2,xmm93283 vpsrld xmm9,xmm14,273284 vpxor xmm6,xmm6,xmm73285 vmovd xmm3,DWORD[((-32))+r8]3286 3287 vpslld xmm7,xmm10,303288 vpor xmm8,xmm8,xmm93289 vmovd xmm9,DWORD[((-32))+r9]3290 vpaddd xmm13,xmm13,xmm63291 3292 vpsrld xmm10,xmm10,23293 vpaddd xmm13,xmm13,xmm83294 vpshufb xmm2,xmm2,xmm53295 vpor xmm10,xmm10,xmm73296 vpinsrd xmm3,xmm3,DWORD[((-32))+r10],13297 vpinsrd xmm9,xmm9,DWORD[((-32))+r11],13298 vpaddd xmm12,xmm12,xmm153299 vpslld xmm8,xmm13,53300 vpandn xmm7,xmm14,xmm113301 vpand xmm6,xmm14,xmm103302 3303 vmovdqa XMMWORD[(112-128)+rax],xmm23304 vpaddd xmm12,xmm12,xmm23305 vpunpckldq xmm3,xmm3,xmm93306 vpsrld xmm9,xmm13,273307 vpxor xmm6,xmm6,xmm73308 vmovd xmm4,DWORD[((-28))+r8]3309 3310 vpslld xmm7,xmm14,303311 vpor xmm8,xmm8,xmm93312 vmovd xmm9,DWORD[((-28))+r9]3313 vpaddd xmm12,xmm12,xmm63314 3315 vpsrld xmm14,xmm14,23316 vpaddd xmm12,xmm12,xmm83317 vpshufb xmm3,xmm3,xmm53318 vpor xmm14,xmm14,xmm73319 vpinsrd xmm4,xmm4,DWORD[((-28))+r10],13320 vpinsrd xmm9,xmm9,DWORD[((-28))+r11],13321 vpaddd xmm11,xmm11,xmm153322 vpslld xmm8,xmm12,53323 vpandn xmm7,xmm13,xmm103324 vpand xmm6,xmm13,xmm143325 3326 vmovdqa XMMWORD[(128-128)+rax],xmm33327 vpaddd xmm11,xmm11,xmm33328 vpunpckldq xmm4,xmm4,xmm93329 vpsrld xmm9,xmm12,273330 vpxor xmm6,xmm6,xmm73331 vmovd xmm0,DWORD[((-24))+r8]3332 3333 vpslld xmm7,xmm13,303334 vpor xmm8,xmm8,xmm93335 vmovd xmm9,DWORD[((-24))+r9]3336 vpaddd xmm11,xmm11,xmm63337 3338 vpsrld xmm13,xmm13,23339 vpaddd xmm11,xmm11,xmm83340 vpshufb xmm4,xmm4,xmm53341 vpor xmm13,xmm13,xmm73342 vpinsrd xmm0,xmm0,DWORD[((-24))+r10],13343 vpinsrd xmm9,xmm9,DWORD[((-24))+r11],13344 vpaddd xmm10,xmm10,xmm153345 vpslld xmm8,xmm11,53346 vpandn xmm7,xmm12,xmm143347 vpand xmm6,xmm12,xmm133348 3349 vmovdqa XMMWORD[(144-128)+rax],xmm43350 vpaddd xmm10,xmm10,xmm43351 vpunpckldq xmm0,xmm0,xmm93352 vpsrld xmm9,xmm11,273353 vpxor xmm6,xmm6,xmm73354 vmovd xmm1,DWORD[((-20))+r8]3355 3356 vpslld xmm7,xmm12,303357 vpor xmm8,xmm8,xmm93358 vmovd xmm9,DWORD[((-20))+r9]3359 vpaddd xmm10,xmm10,xmm63360 3361 vpsrld xmm12,xmm12,23362 vpaddd xmm10,xmm10,xmm83363 vpshufb xmm0,xmm0,xmm53364 vpor xmm12,xmm12,xmm73365 vpinsrd xmm1,xmm1,DWORD[((-20))+r10],13366 vpinsrd xmm9,xmm9,DWORD[((-20))+r11],13367 vpaddd xmm14,xmm14,xmm153368 vpslld xmm8,xmm10,53369 vpandn xmm7,xmm11,xmm133370 vpand xmm6,xmm11,xmm123371 3372 vmovdqa XMMWORD[(160-128)+rax],xmm03373 vpaddd xmm14,xmm14,xmm03374 vpunpckldq xmm1,xmm1,xmm93375 vpsrld xmm9,xmm10,273376 vpxor xmm6,xmm6,xmm73377 vmovd xmm2,DWORD[((-16))+r8]3378 3379 vpslld xmm7,xmm11,303380 vpor xmm8,xmm8,xmm93381 vmovd xmm9,DWORD[((-16))+r9]3382 vpaddd xmm14,xmm14,xmm63383 3384 vpsrld xmm11,xmm11,23385 vpaddd xmm14,xmm14,xmm83386 vpshufb xmm1,xmm1,xmm53387 vpor xmm11,xmm11,xmm73388 vpinsrd xmm2,xmm2,DWORD[((-16))+r10],13389 vpinsrd xmm9,xmm9,DWORD[((-16))+r11],13390 vpaddd xmm13,xmm13,xmm153391 vpslld xmm8,xmm14,53392 vpandn xmm7,xmm10,xmm123393 vpand xmm6,xmm10,xmm113394 3395 vmovdqa XMMWORD[(176-128)+rax],xmm13396 vpaddd xmm13,xmm13,xmm13397 vpunpckldq xmm2,xmm2,xmm93398 vpsrld xmm9,xmm14,273399 vpxor xmm6,xmm6,xmm73400 vmovd xmm3,DWORD[((-12))+r8]3401 3402 vpslld xmm7,xmm10,303403 vpor xmm8,xmm8,xmm93404 vmovd xmm9,DWORD[((-12))+r9]3405 vpaddd xmm13,xmm13,xmm63406 3407 vpsrld xmm10,xmm10,23408 vpaddd xmm13,xmm13,xmm83409 vpshufb xmm2,xmm2,xmm53410 vpor xmm10,xmm10,xmm73411 vpinsrd xmm3,xmm3,DWORD[((-12))+r10],13412 vpinsrd xmm9,xmm9,DWORD[((-12))+r11],13413 vpaddd xmm12,xmm12,xmm153414 vpslld xmm8,xmm13,53415 vpandn xmm7,xmm14,xmm113416 vpand xmm6,xmm14,xmm103417 3418 vmovdqa XMMWORD[(192-128)+rax],xmm23419 vpaddd xmm12,xmm12,xmm23420 vpunpckldq xmm3,xmm3,xmm93421 vpsrld xmm9,xmm13,273422 vpxor xmm6,xmm6,xmm73423 vmovd xmm4,DWORD[((-8))+r8]3424 3425 vpslld xmm7,xmm14,303426 vpor xmm8,xmm8,xmm93427 vmovd xmm9,DWORD[((-8))+r9]3428 vpaddd xmm12,xmm12,xmm63429 3430 vpsrld xmm14,xmm14,23431 vpaddd xmm12,xmm12,xmm83432 vpshufb xmm3,xmm3,xmm53433 vpor xmm14,xmm14,xmm73434 vpinsrd xmm4,xmm4,DWORD[((-8))+r10],13435 vpinsrd xmm9,xmm9,DWORD[((-8))+r11],13436 vpaddd xmm11,xmm11,xmm153437 vpslld xmm8,xmm12,53438 vpandn xmm7,xmm13,xmm103439 vpand xmm6,xmm13,xmm143440 3441 vmovdqa XMMWORD[(208-128)+rax],xmm33442 vpaddd xmm11,xmm11,xmm33443 vpunpckldq xmm4,xmm4,xmm93444 vpsrld xmm9,xmm12,273445 vpxor xmm6,xmm6,xmm73446 vmovd xmm0,DWORD[((-4))+r8]3447 3448 vpslld xmm7,xmm13,303449 vpor xmm8,xmm8,xmm93450 vmovd xmm9,DWORD[((-4))+r9]3451 vpaddd xmm11,xmm11,xmm63452 3453 vpsrld xmm13,xmm13,23454 vpaddd xmm11,xmm11,xmm83455 vpshufb xmm4,xmm4,xmm53456 vpor xmm13,xmm13,xmm73457 vmovdqa xmm1,XMMWORD[((0-128))+rax]3458 vpinsrd xmm0,xmm0,DWORD[((-4))+r10],13459 vpinsrd xmm9,xmm9,DWORD[((-4))+r11],13460 vpaddd xmm10,xmm10,xmm153461 prefetcht0 [63+r8]3462 vpslld xmm8,xmm11,53463 vpandn xmm7,xmm12,xmm143464 vpand xmm6,xmm12,xmm133465 3466 vmovdqa XMMWORD[(224-128)+rax],xmm43467 vpaddd xmm10,xmm10,xmm43468 vpunpckldq xmm0,xmm0,xmm93469 vpsrld xmm9,xmm11,273470 prefetcht0 [63+r9]3471 vpxor xmm6,xmm6,xmm73472 3473 vpslld xmm7,xmm12,303474 vpor xmm8,xmm8,xmm93475 prefetcht0 [63+r10]3476 vpaddd xmm10,xmm10,xmm63477 3478 vpsrld xmm12,xmm12,23479 vpaddd xmm10,xmm10,xmm83480 prefetcht0 [63+r11]3481 vpshufb xmm0,xmm0,xmm53482 vpor xmm12,xmm12,xmm73483 vmovdqa xmm2,XMMWORD[((16-128))+rax]3484 vpxor xmm1,xmm1,xmm33485 vmovdqa xmm3,XMMWORD[((32-128))+rax]3486 3487 vpaddd xmm14,xmm14,xmm153488 vpslld xmm8,xmm10,53489 vpandn xmm7,xmm11,xmm133490 3491 vpand xmm6,xmm11,xmm123492 3493 vmovdqa XMMWORD[(240-128)+rax],xmm03494 vpaddd xmm14,xmm14,xmm03495 vpxor xmm1,xmm1,XMMWORD[((128-128))+rax]3496 vpsrld xmm9,xmm10,273497 vpxor xmm6,xmm6,xmm73498 vpxor xmm1,xmm1,xmm33499 3500 3501 vpslld xmm7,xmm11,303502 vpor xmm8,xmm8,xmm93503 vpaddd xmm14,xmm14,xmm63504 3505 vpsrld xmm5,xmm1,313506 vpaddd xmm1,xmm1,xmm13507 3508 vpsrld xmm11,xmm11,23509 3510 vpaddd xmm14,xmm14,xmm83511 vpor xmm1,xmm1,xmm53512 vpor xmm11,xmm11,xmm73513 vpxor xmm2,xmm2,xmm43514 vmovdqa xmm4,XMMWORD[((48-128))+rax]3515 3516 vpaddd xmm13,xmm13,xmm153517 vpslld xmm8,xmm14,53518 vpandn xmm7,xmm10,xmm123519 3520 vpand xmm6,xmm10,xmm113521 3522 vmovdqa XMMWORD[(0-128)+rax],xmm13523 vpaddd xmm13,xmm13,xmm13524 vpxor xmm2,xmm2,XMMWORD[((144-128))+rax]3525 vpsrld xmm9,xmm14,273526 vpxor xmm6,xmm6,xmm73527 vpxor xmm2,xmm2,xmm43528 3529 3530 vpslld xmm7,xmm10,303531 vpor xmm8,xmm8,xmm93532 vpaddd xmm13,xmm13,xmm63533 3534 vpsrld xmm5,xmm2,313535 vpaddd xmm2,xmm2,xmm23536 3537 vpsrld xmm10,xmm10,23538 3539 vpaddd xmm13,xmm13,xmm83540 vpor xmm2,xmm2,xmm53541 vpor xmm10,xmm10,xmm73542 vpxor xmm3,xmm3,xmm03543 vmovdqa xmm0,XMMWORD[((64-128))+rax]3544 3545 vpaddd xmm12,xmm12,xmm153546 vpslld xmm8,xmm13,53547 vpandn xmm7,xmm14,xmm113548 3549 vpand xmm6,xmm14,xmm103550 3551 vmovdqa XMMWORD[(16-128)+rax],xmm23552 vpaddd xmm12,xmm12,xmm23553 vpxor xmm3,xmm3,XMMWORD[((160-128))+rax]3554 vpsrld xmm9,xmm13,273555 vpxor xmm6,xmm6,xmm73556 vpxor xmm3,xmm3,xmm03557 3558 3559 vpslld xmm7,xmm14,303560 vpor xmm8,xmm8,xmm93561 vpaddd xmm12,xmm12,xmm63562 3563 vpsrld xmm5,xmm3,313564 vpaddd xmm3,xmm3,xmm33565 3566 vpsrld xmm14,xmm14,23567 3568 vpaddd xmm12,xmm12,xmm83569 vpor xmm3,xmm3,xmm53570 vpor xmm14,xmm14,xmm73571 vpxor xmm4,xmm4,xmm13572 vmovdqa xmm1,XMMWORD[((80-128))+rax]3573 3574 vpaddd xmm11,xmm11,xmm153575 vpslld xmm8,xmm12,53576 vpandn xmm7,xmm13,xmm103577 3578 vpand xmm6,xmm13,xmm143579 3580 vmovdqa XMMWORD[(32-128)+rax],xmm33581 vpaddd xmm11,xmm11,xmm33582 vpxor xmm4,xmm4,XMMWORD[((176-128))+rax]3583 vpsrld xmm9,xmm12,273584 vpxor xmm6,xmm6,xmm73585 vpxor xmm4,xmm4,xmm13586 3587 3588 vpslld xmm7,xmm13,303589 vpor xmm8,xmm8,xmm93590 vpaddd xmm11,xmm11,xmm63591 3592 vpsrld xmm5,xmm4,313593 vpaddd xmm4,xmm4,xmm43594 3595 vpsrld xmm13,xmm13,23596 3597 vpaddd xmm11,xmm11,xmm83598 vpor xmm4,xmm4,xmm53599 vpor xmm13,xmm13,xmm73600 vpxor xmm0,xmm0,xmm23601 vmovdqa xmm2,XMMWORD[((96-128))+rax]3602 3603 vpaddd xmm10,xmm10,xmm153604 vpslld xmm8,xmm11,53605 vpandn xmm7,xmm12,xmm143606 3607 vpand xmm6,xmm12,xmm133608 3609 vmovdqa XMMWORD[(48-128)+rax],xmm43610 vpaddd xmm10,xmm10,xmm43611 vpxor xmm0,xmm0,XMMWORD[((192-128))+rax]3612 vpsrld xmm9,xmm11,273613 vpxor xmm6,xmm6,xmm73614 vpxor xmm0,xmm0,xmm23615 3616 3617 vpslld xmm7,xmm12,303618 vpor xmm8,xmm8,xmm93619 vpaddd xmm10,xmm10,xmm63620 3621 vpsrld xmm5,xmm0,313622 vpaddd xmm0,xmm0,xmm03623 3624 vpsrld xmm12,xmm12,23625 3626 vpaddd xmm10,xmm10,xmm83627 vpor xmm0,xmm0,xmm53628 vpor xmm12,xmm12,xmm73629 vmovdqa xmm15,XMMWORD[rbp]3630 vpxor xmm1,xmm1,xmm33631 vmovdqa xmm3,XMMWORD[((112-128))+rax]3632 3633 vpslld xmm8,xmm10,53634 vpaddd xmm14,xmm14,xmm153635 vpxor xmm6,xmm13,xmm113636 vmovdqa XMMWORD[(64-128)+rax],xmm03637 vpaddd xmm14,xmm14,xmm03638 vpxor xmm1,xmm1,XMMWORD[((208-128))+rax]3639 vpsrld xmm9,xmm10,273640 vpxor xmm6,xmm6,xmm123641 vpxor xmm1,xmm1,xmm33642 3643 vpslld xmm7,xmm11,303644 vpor xmm8,xmm8,xmm93645 vpaddd xmm14,xmm14,xmm63646 vpsrld xmm5,xmm1,313647 vpaddd xmm1,xmm1,xmm13648 3649 vpsrld xmm11,xmm11,23650 vpaddd xmm14,xmm14,xmm83651 vpor xmm1,xmm1,xmm53652 vpor xmm11,xmm11,xmm73653 vpxor xmm2,xmm2,xmm43654 vmovdqa xmm4,XMMWORD[((128-128))+rax]3655 3656 vpslld xmm8,xmm14,53657 vpaddd xmm13,xmm13,xmm153658 vpxor xmm6,xmm12,xmm103659 vmovdqa XMMWORD[(80-128)+rax],xmm13660 vpaddd xmm13,xmm13,xmm13661 vpxor xmm2,xmm2,XMMWORD[((224-128))+rax]3662 vpsrld xmm9,xmm14,273663 vpxor xmm6,xmm6,xmm113664 vpxor xmm2,xmm2,xmm43665 3666 vpslld xmm7,xmm10,303667 vpor xmm8,xmm8,xmm93668 vpaddd xmm13,xmm13,xmm63669 vpsrld xmm5,xmm2,313670 vpaddd xmm2,xmm2,xmm23671 3672 vpsrld xmm10,xmm10,23673 vpaddd xmm13,xmm13,xmm83674 vpor xmm2,xmm2,xmm53675 vpor xmm10,xmm10,xmm73676 vpxor xmm3,xmm3,xmm03677 vmovdqa xmm0,XMMWORD[((144-128))+rax]3678 3679 vpslld xmm8,xmm13,53680 vpaddd xmm12,xmm12,xmm153681 vpxor xmm6,xmm11,xmm143682 vmovdqa XMMWORD[(96-128)+rax],xmm23683 vpaddd xmm12,xmm12,xmm23684 vpxor xmm3,xmm3,XMMWORD[((240-128))+rax]3685 vpsrld xmm9,xmm13,273686 vpxor xmm6,xmm6,xmm103687 vpxor xmm3,xmm3,xmm03688 3689 vpslld xmm7,xmm14,303690 vpor xmm8,xmm8,xmm93691 vpaddd xmm12,xmm12,xmm63692 vpsrld xmm5,xmm3,313693 vpaddd xmm3,xmm3,xmm33694 3695 vpsrld xmm14,xmm14,23696 vpaddd xmm12,xmm12,xmm83697 vpor xmm3,xmm3,xmm53698 vpor xmm14,xmm14,xmm73699 vpxor xmm4,xmm4,xmm13700 vmovdqa xmm1,XMMWORD[((160-128))+rax]3701 3702 vpslld xmm8,xmm12,53703 vpaddd xmm11,xmm11,xmm153704 vpxor xmm6,xmm10,xmm133705 vmovdqa XMMWORD[(112-128)+rax],xmm33706 vpaddd xmm11,xmm11,xmm33707 vpxor xmm4,xmm4,XMMWORD[((0-128))+rax]3708 vpsrld xmm9,xmm12,273709 vpxor xmm6,xmm6,xmm143710 vpxor xmm4,xmm4,xmm13711 3712 vpslld xmm7,xmm13,303713 vpor xmm8,xmm8,xmm93714 vpaddd xmm11,xmm11,xmm63715 vpsrld xmm5,xmm4,313716 vpaddd xmm4,xmm4,xmm43717 3718 vpsrld xmm13,xmm13,23719 vpaddd xmm11,xmm11,xmm83720 vpor xmm4,xmm4,xmm53721 vpor xmm13,xmm13,xmm73722 vpxor xmm0,xmm0,xmm23723 vmovdqa xmm2,XMMWORD[((176-128))+rax]3724 3725 vpslld xmm8,xmm11,53726 vpaddd xmm10,xmm10,xmm153727 vpxor xmm6,xmm14,xmm123728 vmovdqa XMMWORD[(128-128)+rax],xmm43729 vpaddd xmm10,xmm10,xmm43730 vpxor xmm0,xmm0,XMMWORD[((16-128))+rax]3731 vpsrld xmm9,xmm11,273732 vpxor xmm6,xmm6,xmm133733 vpxor xmm0,xmm0,xmm23734 3735 vpslld xmm7,xmm12,303736 vpor xmm8,xmm8,xmm93737 vpaddd xmm10,xmm10,xmm63738 vpsrld xmm5,xmm0,313739 vpaddd xmm0,xmm0,xmm03740 3741 vpsrld xmm12,xmm12,23742 vpaddd xmm10,xmm10,xmm83743 vpor xmm0,xmm0,xmm53744 vpor xmm12,xmm12,xmm73745 vpxor xmm1,xmm1,xmm33746 vmovdqa xmm3,XMMWORD[((192-128))+rax]3747 3748 vpslld xmm8,xmm10,53749 vpaddd xmm14,xmm14,xmm153750 vpxor xmm6,xmm13,xmm113751 vmovdqa XMMWORD[(144-128)+rax],xmm03752 vpaddd xmm14,xmm14,xmm03753 vpxor xmm1,xmm1,XMMWORD[((32-128))+rax]3754 vpsrld xmm9,xmm10,273755 vpxor xmm6,xmm6,xmm123756 vpxor xmm1,xmm1,xmm33757 3758 vpslld xmm7,xmm11,303759 vpor xmm8,xmm8,xmm93760 vpaddd xmm14,xmm14,xmm63761 vpsrld xmm5,xmm1,313762 vpaddd xmm1,xmm1,xmm13763 3764 vpsrld xmm11,xmm11,23765 vpaddd xmm14,xmm14,xmm83766 vpor xmm1,xmm1,xmm53767 vpor xmm11,xmm11,xmm73768 vpxor xmm2,xmm2,xmm43769 vmovdqa xmm4,XMMWORD[((208-128))+rax]3770 3771 vpslld xmm8,xmm14,53772 vpaddd xmm13,xmm13,xmm153773 vpxor xmm6,xmm12,xmm103774 vmovdqa XMMWORD[(160-128)+rax],xmm13775 vpaddd xmm13,xmm13,xmm13776 vpxor xmm2,xmm2,XMMWORD[((48-128))+rax]3777 vpsrld xmm9,xmm14,273778 vpxor xmm6,xmm6,xmm113779 vpxor xmm2,xmm2,xmm43780 3781 vpslld xmm7,xmm10,303782 vpor xmm8,xmm8,xmm93783 vpaddd xmm13,xmm13,xmm63784 vpsrld xmm5,xmm2,313785 vpaddd xmm2,xmm2,xmm23786 3787 vpsrld xmm10,xmm10,23788 vpaddd xmm13,xmm13,xmm83789 vpor xmm2,xmm2,xmm53790 vpor xmm10,xmm10,xmm73791 vpxor xmm3,xmm3,xmm03792 vmovdqa xmm0,XMMWORD[((224-128))+rax]3793 3794 vpslld xmm8,xmm13,53795 vpaddd xmm12,xmm12,xmm153796 vpxor xmm6,xmm11,xmm143797 vmovdqa XMMWORD[(176-128)+rax],xmm23798 vpaddd xmm12,xmm12,xmm23799 vpxor xmm3,xmm3,XMMWORD[((64-128))+rax]3800 vpsrld xmm9,xmm13,273801 vpxor xmm6,xmm6,xmm103802 vpxor xmm3,xmm3,xmm03803 3804 vpslld xmm7,xmm14,303805 vpor xmm8,xmm8,xmm93806 vpaddd xmm12,xmm12,xmm63807 vpsrld xmm5,xmm3,313808 vpaddd xmm3,xmm3,xmm33809 3810 vpsrld xmm14,xmm14,23811 vpaddd xmm12,xmm12,xmm83812 vpor xmm3,xmm3,xmm53813 vpor xmm14,xmm14,xmm73814 vpxor xmm4,xmm4,xmm13815 vmovdqa xmm1,XMMWORD[((240-128))+rax]3816 3817 vpslld xmm8,xmm12,53818 vpaddd xmm11,xmm11,xmm153819 vpxor xmm6,xmm10,xmm133820 vmovdqa XMMWORD[(192-128)+rax],xmm33821 vpaddd xmm11,xmm11,xmm33822 vpxor xmm4,xmm4,XMMWORD[((80-128))+rax]3823 vpsrld xmm9,xmm12,273824 vpxor xmm6,xmm6,xmm143825 vpxor xmm4,xmm4,xmm13826 3827 vpslld xmm7,xmm13,303828 vpor xmm8,xmm8,xmm93829 vpaddd xmm11,xmm11,xmm63830 vpsrld xmm5,xmm4,313831 vpaddd xmm4,xmm4,xmm43832 3833 vpsrld xmm13,xmm13,23834 vpaddd xmm11,xmm11,xmm83835 vpor xmm4,xmm4,xmm53836 vpor xmm13,xmm13,xmm73837 vpxor xmm0,xmm0,xmm23838 vmovdqa xmm2,XMMWORD[((0-128))+rax]3839 3840 vpslld xmm8,xmm11,53841 vpaddd xmm10,xmm10,xmm153842 vpxor xmm6,xmm14,xmm123843 vmovdqa XMMWORD[(208-128)+rax],xmm43844 vpaddd xmm10,xmm10,xmm43845 vpxor xmm0,xmm0,XMMWORD[((96-128))+rax]3846 vpsrld xmm9,xmm11,273847 vpxor xmm6,xmm6,xmm133848 vpxor xmm0,xmm0,xmm23849 3850 vpslld xmm7,xmm12,303851 vpor xmm8,xmm8,xmm93852 vpaddd xmm10,xmm10,xmm63853 vpsrld xmm5,xmm0,313854 vpaddd xmm0,xmm0,xmm03855 3856 vpsrld xmm12,xmm12,23857 vpaddd xmm10,xmm10,xmm83858 vpor xmm0,xmm0,xmm53859 vpor xmm12,xmm12,xmm73860 vpxor xmm1,xmm1,xmm33861 vmovdqa xmm3,XMMWORD[((16-128))+rax]3862 3863 vpslld xmm8,xmm10,53864 vpaddd xmm14,xmm14,xmm153865 vpxor xmm6,xmm13,xmm113866 vmovdqa XMMWORD[(224-128)+rax],xmm03867 vpaddd xmm14,xmm14,xmm03868 vpxor xmm1,xmm1,XMMWORD[((112-128))+rax]3869 vpsrld xmm9,xmm10,273870 vpxor xmm6,xmm6,xmm123871 vpxor xmm1,xmm1,xmm33872 3873 vpslld xmm7,xmm11,303874 vpor xmm8,xmm8,xmm93875 vpaddd xmm14,xmm14,xmm63876 vpsrld xmm5,xmm1,313877 vpaddd xmm1,xmm1,xmm13878 3879 vpsrld xmm11,xmm11,23880 vpaddd xmm14,xmm14,xmm83881 vpor xmm1,xmm1,xmm53882 vpor xmm11,xmm11,xmm73883 vpxor xmm2,xmm2,xmm43884 vmovdqa xmm4,XMMWORD[((32-128))+rax]3885 3886 vpslld xmm8,xmm14,53887 vpaddd xmm13,xmm13,xmm153888 vpxor xmm6,xmm12,xmm103889 vmovdqa XMMWORD[(240-128)+rax],xmm13890 vpaddd xmm13,xmm13,xmm13891 vpxor xmm2,xmm2,XMMWORD[((128-128))+rax]3892 vpsrld xmm9,xmm14,273893 vpxor xmm6,xmm6,xmm113894 vpxor xmm2,xmm2,xmm43895 3896 vpslld xmm7,xmm10,303897 vpor xmm8,xmm8,xmm93898 vpaddd xmm13,xmm13,xmm63899 vpsrld xmm5,xmm2,313900 vpaddd xmm2,xmm2,xmm23901 3902 vpsrld xmm10,xmm10,23903 vpaddd xmm13,xmm13,xmm83904 vpor xmm2,xmm2,xmm53905 vpor xmm10,xmm10,xmm73906 vpxor xmm3,xmm3,xmm03907 vmovdqa xmm0,XMMWORD[((48-128))+rax]3908 3909 vpslld xmm8,xmm13,53910 vpaddd xmm12,xmm12,xmm153911 vpxor xmm6,xmm11,xmm143912 vmovdqa XMMWORD[(0-128)+rax],xmm23913 vpaddd xmm12,xmm12,xmm23914 vpxor xmm3,xmm3,XMMWORD[((144-128))+rax]3915 vpsrld xmm9,xmm13,273916 vpxor xmm6,xmm6,xmm103917 vpxor xmm3,xmm3,xmm03918 3919 vpslld xmm7,xmm14,303920 vpor xmm8,xmm8,xmm93921 vpaddd xmm12,xmm12,xmm63922 vpsrld xmm5,xmm3,313923 vpaddd xmm3,xmm3,xmm33924 3925 vpsrld xmm14,xmm14,23926 vpaddd xmm12,xmm12,xmm83927 vpor xmm3,xmm3,xmm53928 vpor xmm14,xmm14,xmm73929 vpxor xmm4,xmm4,xmm13930 vmovdqa xmm1,XMMWORD[((64-128))+rax]3931 3932 vpslld xmm8,xmm12,53933 vpaddd xmm11,xmm11,xmm153934 vpxor xmm6,xmm10,xmm133935 vmovdqa XMMWORD[(16-128)+rax],xmm33936 vpaddd xmm11,xmm11,xmm33937 vpxor xmm4,xmm4,XMMWORD[((160-128))+rax]3938 vpsrld xmm9,xmm12,273939 vpxor xmm6,xmm6,xmm143940 vpxor xmm4,xmm4,xmm13941 3942 vpslld xmm7,xmm13,303943 vpor xmm8,xmm8,xmm93944 vpaddd xmm11,xmm11,xmm63945 vpsrld xmm5,xmm4,313946 vpaddd xmm4,xmm4,xmm43947 3948 vpsrld xmm13,xmm13,23949 vpaddd xmm11,xmm11,xmm83950 vpor xmm4,xmm4,xmm53951 vpor xmm13,xmm13,xmm73952 vpxor xmm0,xmm0,xmm23953 vmovdqa xmm2,XMMWORD[((80-128))+rax]3954 3955 vpslld xmm8,xmm11,53956 vpaddd xmm10,xmm10,xmm153957 vpxor xmm6,xmm14,xmm123958 vmovdqa XMMWORD[(32-128)+rax],xmm43959 vpaddd xmm10,xmm10,xmm43960 vpxor xmm0,xmm0,XMMWORD[((176-128))+rax]3961 vpsrld xmm9,xmm11,273962 vpxor xmm6,xmm6,xmm133963 vpxor xmm0,xmm0,xmm23964 3965 vpslld xmm7,xmm12,303966 vpor xmm8,xmm8,xmm93967 vpaddd xmm10,xmm10,xmm63968 vpsrld xmm5,xmm0,313969 vpaddd xmm0,xmm0,xmm03970 3971 vpsrld xmm12,xmm12,23972 vpaddd xmm10,xmm10,xmm83973 vpor xmm0,xmm0,xmm53974 vpor xmm12,xmm12,xmm73975 vpxor xmm1,xmm1,xmm33976 vmovdqa xmm3,XMMWORD[((96-128))+rax]3977 3978 vpslld xmm8,xmm10,53979 vpaddd xmm14,xmm14,xmm153980 vpxor xmm6,xmm13,xmm113981 vmovdqa XMMWORD[(48-128)+rax],xmm03982 vpaddd xmm14,xmm14,xmm03983 vpxor xmm1,xmm1,XMMWORD[((192-128))+rax]3984 vpsrld xmm9,xmm10,273985 vpxor xmm6,xmm6,xmm123986 vpxor xmm1,xmm1,xmm33987 3988 vpslld xmm7,xmm11,303989 vpor xmm8,xmm8,xmm93990 vpaddd xmm14,xmm14,xmm63991 vpsrld xmm5,xmm1,313992 vpaddd xmm1,xmm1,xmm13993 3994 vpsrld xmm11,xmm11,23995 vpaddd xmm14,xmm14,xmm83996 vpor xmm1,xmm1,xmm53997 vpor xmm11,xmm11,xmm73998 vpxor xmm2,xmm2,xmm43999 vmovdqa xmm4,XMMWORD[((112-128))+rax]4000 4001 vpslld xmm8,xmm14,54002 vpaddd xmm13,xmm13,xmm154003 vpxor xmm6,xmm12,xmm104004 vmovdqa XMMWORD[(64-128)+rax],xmm14005 vpaddd xmm13,xmm13,xmm14006 vpxor xmm2,xmm2,XMMWORD[((208-128))+rax]4007 vpsrld xmm9,xmm14,274008 vpxor xmm6,xmm6,xmm114009 vpxor xmm2,xmm2,xmm44010 4011 vpslld xmm7,xmm10,304012 vpor xmm8,xmm8,xmm94013 vpaddd xmm13,xmm13,xmm64014 vpsrld xmm5,xmm2,314015 vpaddd xmm2,xmm2,xmm24016 4017 vpsrld xmm10,xmm10,24018 vpaddd xmm13,xmm13,xmm84019 vpor xmm2,xmm2,xmm54020 vpor xmm10,xmm10,xmm74021 vpxor xmm3,xmm3,xmm04022 vmovdqa xmm0,XMMWORD[((128-128))+rax]4023 4024 vpslld xmm8,xmm13,54025 vpaddd xmm12,xmm12,xmm154026 vpxor xmm6,xmm11,xmm144027 vmovdqa XMMWORD[(80-128)+rax],xmm24028 vpaddd xmm12,xmm12,xmm24029 vpxor xmm3,xmm3,XMMWORD[((224-128))+rax]4030 vpsrld xmm9,xmm13,274031 vpxor xmm6,xmm6,xmm104032 vpxor xmm3,xmm3,xmm04033 4034 vpslld xmm7,xmm14,304035 vpor xmm8,xmm8,xmm94036 vpaddd xmm12,xmm12,xmm64037 vpsrld xmm5,xmm3,314038 vpaddd xmm3,xmm3,xmm34039 4040 vpsrld xmm14,xmm14,24041 vpaddd xmm12,xmm12,xmm84042 vpor xmm3,xmm3,xmm54043 vpor xmm14,xmm14,xmm74044 vpxor xmm4,xmm4,xmm14045 vmovdqa xmm1,XMMWORD[((144-128))+rax]4046 4047 vpslld xmm8,xmm12,54048 vpaddd xmm11,xmm11,xmm154049 vpxor xmm6,xmm10,xmm134050 vmovdqa XMMWORD[(96-128)+rax],xmm34051 vpaddd xmm11,xmm11,xmm34052 vpxor xmm4,xmm4,XMMWORD[((240-128))+rax]4053 vpsrld xmm9,xmm12,274054 vpxor xmm6,xmm6,xmm144055 vpxor xmm4,xmm4,xmm14056 4057 vpslld xmm7,xmm13,304058 vpor xmm8,xmm8,xmm94059 vpaddd xmm11,xmm11,xmm64060 vpsrld xmm5,xmm4,314061 vpaddd xmm4,xmm4,xmm44062 4063 vpsrld xmm13,xmm13,24064 vpaddd xmm11,xmm11,xmm84065 vpor xmm4,xmm4,xmm54066 vpor xmm13,xmm13,xmm74067 vpxor xmm0,xmm0,xmm24068 vmovdqa xmm2,XMMWORD[((160-128))+rax]4069 4070 vpslld xmm8,xmm11,54071 vpaddd xmm10,xmm10,xmm154072 vpxor xmm6,xmm14,xmm124073 vmovdqa XMMWORD[(112-128)+rax],xmm44074 vpaddd xmm10,xmm10,xmm44075 vpxor xmm0,xmm0,XMMWORD[((0-128))+rax]4076 vpsrld xmm9,xmm11,274077 vpxor xmm6,xmm6,xmm134078 vpxor xmm0,xmm0,xmm24079 4080 vpslld xmm7,xmm12,304081 vpor xmm8,xmm8,xmm94082 vpaddd xmm10,xmm10,xmm64083 vpsrld xmm5,xmm0,314084 vpaddd xmm0,xmm0,xmm04085 4086 vpsrld xmm12,xmm12,24087 vpaddd xmm10,xmm10,xmm84088 vpor xmm0,xmm0,xmm54089 vpor xmm12,xmm12,xmm74090 vmovdqa xmm15,XMMWORD[32+rbp]4091 vpxor xmm1,xmm1,xmm34092 vmovdqa xmm3,XMMWORD[((176-128))+rax]4093 4094 vpaddd xmm14,xmm14,xmm154095 vpslld xmm8,xmm10,54096 vpand xmm7,xmm13,xmm124097 vpxor xmm1,xmm1,XMMWORD[((16-128))+rax]4098 4099 vpaddd xmm14,xmm14,xmm74100 vpsrld xmm9,xmm10,274101 vpxor xmm6,xmm13,xmm124102 vpxor xmm1,xmm1,xmm34103 4104 vmovdqu XMMWORD[(128-128)+rax],xmm04105 vpaddd xmm14,xmm14,xmm04106 vpor xmm8,xmm8,xmm94107 vpsrld xmm5,xmm1,314108 vpand xmm6,xmm6,xmm114109 vpaddd xmm1,xmm1,xmm14110 4111 vpslld xmm7,xmm11,304112 vpaddd xmm14,xmm14,xmm64113 4114 vpsrld xmm11,xmm11,24115 vpaddd xmm14,xmm14,xmm84116 vpor xmm1,xmm1,xmm54117 vpor xmm11,xmm11,xmm74118 vpxor xmm2,xmm2,xmm44119 vmovdqa xmm4,XMMWORD[((192-128))+rax]4120 4121 vpaddd xmm13,xmm13,xmm154122 vpslld xmm8,xmm14,54123 vpand xmm7,xmm12,xmm114124 vpxor xmm2,xmm2,XMMWORD[((32-128))+rax]4125 4126 vpaddd xmm13,xmm13,xmm74127 vpsrld xmm9,xmm14,274128 vpxor xmm6,xmm12,xmm114129 vpxor xmm2,xmm2,xmm44130 4131 vmovdqu XMMWORD[(144-128)+rax],xmm14132 vpaddd xmm13,xmm13,xmm14133 vpor xmm8,xmm8,xmm94134 vpsrld xmm5,xmm2,314135 vpand xmm6,xmm6,xmm104136 vpaddd xmm2,xmm2,xmm24137 4138 vpslld xmm7,xmm10,304139 vpaddd xmm13,xmm13,xmm64140 4141 vpsrld xmm10,xmm10,24142 vpaddd xmm13,xmm13,xmm84143 vpor xmm2,xmm2,xmm54144 vpor xmm10,xmm10,xmm74145 vpxor xmm3,xmm3,xmm04146 vmovdqa xmm0,XMMWORD[((208-128))+rax]4147 4148 vpaddd xmm12,xmm12,xmm154149 vpslld xmm8,xmm13,54150 vpand xmm7,xmm11,xmm104151 vpxor xmm3,xmm3,XMMWORD[((48-128))+rax]4152 4153 vpaddd xmm12,xmm12,xmm74154 vpsrld xmm9,xmm13,274155 vpxor xmm6,xmm11,xmm104156 vpxor xmm3,xmm3,xmm04157 4158 vmovdqu XMMWORD[(160-128)+rax],xmm24159 vpaddd xmm12,xmm12,xmm24160 vpor xmm8,xmm8,xmm94161 vpsrld xmm5,xmm3,314162 vpand xmm6,xmm6,xmm144163 vpaddd xmm3,xmm3,xmm34164 4165 vpslld xmm7,xmm14,304166 vpaddd xmm12,xmm12,xmm64167 4168 vpsrld xmm14,xmm14,24169 vpaddd xmm12,xmm12,xmm84170 vpor xmm3,xmm3,xmm54171 vpor xmm14,xmm14,xmm74172 vpxor xmm4,xmm4,xmm14173 vmovdqa xmm1,XMMWORD[((224-128))+rax]4174 4175 vpaddd xmm11,xmm11,xmm154176 vpslld xmm8,xmm12,54177 vpand xmm7,xmm10,xmm144178 vpxor xmm4,xmm4,XMMWORD[((64-128))+rax]4179 4180 vpaddd xmm11,xmm11,xmm74181 vpsrld xmm9,xmm12,274182 vpxor xmm6,xmm10,xmm144183 vpxor xmm4,xmm4,xmm14184 4185 vmovdqu XMMWORD[(176-128)+rax],xmm34186 vpaddd xmm11,xmm11,xmm34187 vpor xmm8,xmm8,xmm94188 vpsrld xmm5,xmm4,314189 vpand xmm6,xmm6,xmm134190 vpaddd xmm4,xmm4,xmm44191 4192 vpslld xmm7,xmm13,304193 vpaddd xmm11,xmm11,xmm64194 4195 vpsrld xmm13,xmm13,24196 vpaddd xmm11,xmm11,xmm84197 vpor xmm4,xmm4,xmm54198 vpor xmm13,xmm13,xmm74199 vpxor xmm0,xmm0,xmm24200 vmovdqa xmm2,XMMWORD[((240-128))+rax]4201 4202 vpaddd xmm10,xmm10,xmm154203 vpslld xmm8,xmm11,54204 vpand xmm7,xmm14,xmm134205 vpxor xmm0,xmm0,XMMWORD[((80-128))+rax]4206 4207 vpaddd xmm10,xmm10,xmm74208 vpsrld xmm9,xmm11,274209 vpxor xmm6,xmm14,xmm134210 vpxor xmm0,xmm0,xmm24211 4212 vmovdqu XMMWORD[(192-128)+rax],xmm44213 vpaddd xmm10,xmm10,xmm44214 vpor xmm8,xmm8,xmm94215 vpsrld xmm5,xmm0,314216 vpand xmm6,xmm6,xmm124217 vpaddd xmm0,xmm0,xmm04218 4219 vpslld xmm7,xmm12,304220 vpaddd xmm10,xmm10,xmm64221 4222 vpsrld xmm12,xmm12,24223 vpaddd xmm10,xmm10,xmm84224 vpor xmm0,xmm0,xmm54225 vpor xmm12,xmm12,xmm74226 vpxor xmm1,xmm1,xmm34227 vmovdqa xmm3,XMMWORD[((0-128))+rax]4228 4229 vpaddd xmm14,xmm14,xmm154230 vpslld xmm8,xmm10,54231 vpand xmm7,xmm13,xmm124232 vpxor xmm1,xmm1,XMMWORD[((96-128))+rax]4233 4234 vpaddd xmm14,xmm14,xmm74235 vpsrld xmm9,xmm10,274236 vpxor xmm6,xmm13,xmm124237 vpxor xmm1,xmm1,xmm34238 4239 vmovdqu XMMWORD[(208-128)+rax],xmm04240 vpaddd xmm14,xmm14,xmm04241 vpor xmm8,xmm8,xmm94242 vpsrld xmm5,xmm1,314243 vpand xmm6,xmm6,xmm114244 vpaddd xmm1,xmm1,xmm14245 4246 vpslld xmm7,xmm11,304247 vpaddd xmm14,xmm14,xmm64248 4249 vpsrld xmm11,xmm11,24250 vpaddd xmm14,xmm14,xmm84251 vpor xmm1,xmm1,xmm54252 vpor xmm11,xmm11,xmm74253 vpxor xmm2,xmm2,xmm44254 vmovdqa xmm4,XMMWORD[((16-128))+rax]4255 4256 vpaddd xmm13,xmm13,xmm154257 vpslld xmm8,xmm14,54258 vpand xmm7,xmm12,xmm114259 vpxor xmm2,xmm2,XMMWORD[((112-128))+rax]4260 4261 vpaddd xmm13,xmm13,xmm74262 vpsrld xmm9,xmm14,274263 vpxor xmm6,xmm12,xmm114264 vpxor xmm2,xmm2,xmm44265 4266 vmovdqu XMMWORD[(224-128)+rax],xmm14267 vpaddd xmm13,xmm13,xmm14268 vpor xmm8,xmm8,xmm94269 vpsrld xmm5,xmm2,314270 vpand xmm6,xmm6,xmm104271 vpaddd xmm2,xmm2,xmm24272 4273 vpslld xmm7,xmm10,304274 vpaddd xmm13,xmm13,xmm64275 4276 vpsrld xmm10,xmm10,24277 vpaddd xmm13,xmm13,xmm84278 vpor xmm2,xmm2,xmm54279 vpor xmm10,xmm10,xmm74280 vpxor xmm3,xmm3,xmm04281 vmovdqa xmm0,XMMWORD[((32-128))+rax]4282 4283 vpaddd xmm12,xmm12,xmm154284 vpslld xmm8,xmm13,54285 vpand xmm7,xmm11,xmm104286 vpxor xmm3,xmm3,XMMWORD[((128-128))+rax]4287 4288 vpaddd xmm12,xmm12,xmm74289 vpsrld xmm9,xmm13,274290 vpxor xmm6,xmm11,xmm104291 vpxor xmm3,xmm3,xmm04292 4293 vmovdqu XMMWORD[(240-128)+rax],xmm24294 vpaddd xmm12,xmm12,xmm24295 vpor xmm8,xmm8,xmm94296 vpsrld xmm5,xmm3,314297 vpand xmm6,xmm6,xmm144298 vpaddd xmm3,xmm3,xmm34299 4300 vpslld xmm7,xmm14,304301 vpaddd xmm12,xmm12,xmm64302 4303 vpsrld xmm14,xmm14,24304 vpaddd xmm12,xmm12,xmm84305 vpor xmm3,xmm3,xmm54306 vpor xmm14,xmm14,xmm74307 vpxor xmm4,xmm4,xmm14308 vmovdqa xmm1,XMMWORD[((48-128))+rax]4309 4310 vpaddd xmm11,xmm11,xmm154311 vpslld xmm8,xmm12,54312 vpand xmm7,xmm10,xmm144313 vpxor xmm4,xmm4,XMMWORD[((144-128))+rax]4314 4315 vpaddd xmm11,xmm11,xmm74316 vpsrld xmm9,xmm12,274317 vpxor xmm6,xmm10,xmm144318 vpxor xmm4,xmm4,xmm14319 4320 vmovdqu XMMWORD[(0-128)+rax],xmm34321 vpaddd xmm11,xmm11,xmm34322 vpor xmm8,xmm8,xmm94323 vpsrld xmm5,xmm4,314324 vpand xmm6,xmm6,xmm134325 vpaddd xmm4,xmm4,xmm44326 4327 vpslld xmm7,xmm13,304328 vpaddd xmm11,xmm11,xmm64329 4330 vpsrld xmm13,xmm13,24331 vpaddd xmm11,xmm11,xmm84332 vpor xmm4,xmm4,xmm54333 vpor xmm13,xmm13,xmm74334 vpxor xmm0,xmm0,xmm24335 vmovdqa xmm2,XMMWORD[((64-128))+rax]4336 4337 vpaddd xmm10,xmm10,xmm154338 vpslld xmm8,xmm11,54339 vpand xmm7,xmm14,xmm134340 vpxor xmm0,xmm0,XMMWORD[((160-128))+rax]4341 4342 vpaddd xmm10,xmm10,xmm74343 vpsrld xmm9,xmm11,274344 vpxor xmm6,xmm14,xmm134345 vpxor xmm0,xmm0,xmm24346 4347 vmovdqu XMMWORD[(16-128)+rax],xmm44348 vpaddd xmm10,xmm10,xmm44349 vpor xmm8,xmm8,xmm94350 vpsrld xmm5,xmm0,314351 vpand xmm6,xmm6,xmm124352 vpaddd xmm0,xmm0,xmm04353 4354 vpslld xmm7,xmm12,304355 vpaddd xmm10,xmm10,xmm64356 4357 vpsrld xmm12,xmm12,24358 vpaddd xmm10,xmm10,xmm84359 vpor xmm0,xmm0,xmm54360 vpor xmm12,xmm12,xmm74361 vpxor xmm1,xmm1,xmm34362 vmovdqa xmm3,XMMWORD[((80-128))+rax]4363 4364 vpaddd xmm14,xmm14,xmm154365 vpslld xmm8,xmm10,54366 vpand xmm7,xmm13,xmm124367 vpxor xmm1,xmm1,XMMWORD[((176-128))+rax]4368 4369 vpaddd xmm14,xmm14,xmm74370 vpsrld xmm9,xmm10,274371 vpxor xmm6,xmm13,xmm124372 vpxor xmm1,xmm1,xmm34373 4374 vmovdqu XMMWORD[(32-128)+rax],xmm04375 vpaddd xmm14,xmm14,xmm04376 vpor xmm8,xmm8,xmm94377 vpsrld xmm5,xmm1,314378 vpand xmm6,xmm6,xmm114379 vpaddd xmm1,xmm1,xmm14380 4381 vpslld xmm7,xmm11,304382 vpaddd xmm14,xmm14,xmm64383 4384 vpsrld xmm11,xmm11,24385 vpaddd xmm14,xmm14,xmm84386 vpor xmm1,xmm1,xmm54387 vpor xmm11,xmm11,xmm74388 vpxor xmm2,xmm2,xmm44389 vmovdqa xmm4,XMMWORD[((96-128))+rax]4390 4391 vpaddd xmm13,xmm13,xmm154392 vpslld xmm8,xmm14,54393 vpand xmm7,xmm12,xmm114394 vpxor xmm2,xmm2,XMMWORD[((192-128))+rax]4395 4396 vpaddd xmm13,xmm13,xmm74397 vpsrld xmm9,xmm14,274398 vpxor xmm6,xmm12,xmm114399 vpxor xmm2,xmm2,xmm44400 4401 vmovdqu XMMWORD[(48-128)+rax],xmm14402 vpaddd xmm13,xmm13,xmm14403 vpor xmm8,xmm8,xmm94404 vpsrld xmm5,xmm2,314405 vpand xmm6,xmm6,xmm104406 vpaddd xmm2,xmm2,xmm24407 4408 vpslld xmm7,xmm10,304409 vpaddd xmm13,xmm13,xmm64410 4411 vpsrld xmm10,xmm10,24412 vpaddd xmm13,xmm13,xmm84413 vpor xmm2,xmm2,xmm54414 vpor xmm10,xmm10,xmm74415 vpxor xmm3,xmm3,xmm04416 vmovdqa xmm0,XMMWORD[((112-128))+rax]4417 4418 vpaddd xmm12,xmm12,xmm154419 vpslld xmm8,xmm13,54420 vpand xmm7,xmm11,xmm104421 vpxor xmm3,xmm3,XMMWORD[((208-128))+rax]4422 4423 vpaddd xmm12,xmm12,xmm74424 vpsrld xmm9,xmm13,274425 vpxor xmm6,xmm11,xmm104426 vpxor xmm3,xmm3,xmm04427 4428 vmovdqu XMMWORD[(64-128)+rax],xmm24429 vpaddd xmm12,xmm12,xmm24430 vpor xmm8,xmm8,xmm94431 vpsrld xmm5,xmm3,314432 vpand xmm6,xmm6,xmm144433 vpaddd xmm3,xmm3,xmm34434 4435 vpslld xmm7,xmm14,304436 vpaddd xmm12,xmm12,xmm64437 4438 vpsrld xmm14,xmm14,24439 vpaddd xmm12,xmm12,xmm84440 vpor xmm3,xmm3,xmm54441 vpor xmm14,xmm14,xmm74442 vpxor xmm4,xmm4,xmm14443 vmovdqa xmm1,XMMWORD[((128-128))+rax]4444 4445 vpaddd xmm11,xmm11,xmm154446 vpslld xmm8,xmm12,54447 vpand xmm7,xmm10,xmm144448 vpxor xmm4,xmm4,XMMWORD[((224-128))+rax]4449 4450 vpaddd xmm11,xmm11,xmm74451 vpsrld xmm9,xmm12,274452 vpxor xmm6,xmm10,xmm144453 vpxor xmm4,xmm4,xmm14454 4455 vmovdqu XMMWORD[(80-128)+rax],xmm34456 vpaddd xmm11,xmm11,xmm34457 vpor xmm8,xmm8,xmm94458 vpsrld xmm5,xmm4,314459 vpand xmm6,xmm6,xmm134460 vpaddd xmm4,xmm4,xmm44461 4462 vpslld xmm7,xmm13,304463 vpaddd xmm11,xmm11,xmm64464 4465 vpsrld xmm13,xmm13,24466 vpaddd xmm11,xmm11,xmm84467 vpor xmm4,xmm4,xmm54468 vpor xmm13,xmm13,xmm74469 vpxor xmm0,xmm0,xmm24470 vmovdqa xmm2,XMMWORD[((144-128))+rax]4471 4472 vpaddd xmm10,xmm10,xmm154473 vpslld xmm8,xmm11,54474 vpand xmm7,xmm14,xmm134475 vpxor xmm0,xmm0,XMMWORD[((240-128))+rax]4476 4477 vpaddd xmm10,xmm10,xmm74478 vpsrld xmm9,xmm11,274479 vpxor xmm6,xmm14,xmm134480 vpxor xmm0,xmm0,xmm24481 4482 vmovdqu XMMWORD[(96-128)+rax],xmm44483 vpaddd xmm10,xmm10,xmm44484 vpor xmm8,xmm8,xmm94485 vpsrld xmm5,xmm0,314486 vpand xmm6,xmm6,xmm124487 vpaddd xmm0,xmm0,xmm04488 4489 vpslld xmm7,xmm12,304490 vpaddd xmm10,xmm10,xmm64491 4492 vpsrld xmm12,xmm12,24493 vpaddd xmm10,xmm10,xmm84494 vpor xmm0,xmm0,xmm54495 vpor xmm12,xmm12,xmm74496 vpxor xmm1,xmm1,xmm34497 vmovdqa xmm3,XMMWORD[((160-128))+rax]4498 4499 vpaddd xmm14,xmm14,xmm154500 vpslld xmm8,xmm10,54501 vpand xmm7,xmm13,xmm124502 vpxor xmm1,xmm1,XMMWORD[((0-128))+rax]4503 4504 vpaddd xmm14,xmm14,xmm74505 vpsrld xmm9,xmm10,274506 vpxor xmm6,xmm13,xmm124507 vpxor xmm1,xmm1,xmm34508 4509 vmovdqu XMMWORD[(112-128)+rax],xmm04510 vpaddd xmm14,xmm14,xmm04511 vpor xmm8,xmm8,xmm94512 vpsrld xmm5,xmm1,314513 vpand xmm6,xmm6,xmm114514 vpaddd xmm1,xmm1,xmm14515 4516 vpslld xmm7,xmm11,304517 vpaddd xmm14,xmm14,xmm64518 4519 vpsrld xmm11,xmm11,24520 vpaddd xmm14,xmm14,xmm84521 vpor xmm1,xmm1,xmm54522 vpor xmm11,xmm11,xmm74523 vpxor xmm2,xmm2,xmm44524 vmovdqa xmm4,XMMWORD[((176-128))+rax]4525 4526 vpaddd xmm13,xmm13,xmm154527 vpslld xmm8,xmm14,54528 vpand xmm7,xmm12,xmm114529 vpxor xmm2,xmm2,XMMWORD[((16-128))+rax]4530 4531 vpaddd xmm13,xmm13,xmm74532 vpsrld xmm9,xmm14,274533 vpxor xmm6,xmm12,xmm114534 vpxor xmm2,xmm2,xmm44535 4536 vmovdqu XMMWORD[(128-128)+rax],xmm14537 vpaddd xmm13,xmm13,xmm14538 vpor xmm8,xmm8,xmm94539 vpsrld xmm5,xmm2,314540 vpand xmm6,xmm6,xmm104541 vpaddd xmm2,xmm2,xmm24542 4543 vpslld xmm7,xmm10,304544 vpaddd xmm13,xmm13,xmm64545 4546 vpsrld xmm10,xmm10,24547 vpaddd xmm13,xmm13,xmm84548 vpor xmm2,xmm2,xmm54549 vpor xmm10,xmm10,xmm74550 vpxor xmm3,xmm3,xmm04551 vmovdqa xmm0,XMMWORD[((192-128))+rax]4552 4553 vpaddd xmm12,xmm12,xmm154554 vpslld xmm8,xmm13,54555 vpand xmm7,xmm11,xmm104556 vpxor xmm3,xmm3,XMMWORD[((32-128))+rax]4557 4558 vpaddd xmm12,xmm12,xmm74559 vpsrld xmm9,xmm13,274560 vpxor xmm6,xmm11,xmm104561 vpxor xmm3,xmm3,xmm04562 4563 vmovdqu XMMWORD[(144-128)+rax],xmm24564 vpaddd xmm12,xmm12,xmm24565 vpor xmm8,xmm8,xmm94566 vpsrld xmm5,xmm3,314567 vpand xmm6,xmm6,xmm144568 vpaddd xmm3,xmm3,xmm34569 4570 vpslld xmm7,xmm14,304571 vpaddd xmm12,xmm12,xmm64572 4573 vpsrld xmm14,xmm14,24574 vpaddd xmm12,xmm12,xmm84575 vpor xmm3,xmm3,xmm54576 vpor xmm14,xmm14,xmm74577 vpxor xmm4,xmm4,xmm14578 vmovdqa xmm1,XMMWORD[((208-128))+rax]4579 4580 vpaddd xmm11,xmm11,xmm154581 vpslld xmm8,xmm12,54582 vpand xmm7,xmm10,xmm144583 vpxor xmm4,xmm4,XMMWORD[((48-128))+rax]4584 4585 vpaddd xmm11,xmm11,xmm74586 vpsrld xmm9,xmm12,274587 vpxor xmm6,xmm10,xmm144588 vpxor xmm4,xmm4,xmm14589 4590 vmovdqu XMMWORD[(160-128)+rax],xmm34591 vpaddd xmm11,xmm11,xmm34592 vpor xmm8,xmm8,xmm94593 vpsrld xmm5,xmm4,314594 vpand xmm6,xmm6,xmm134595 vpaddd xmm4,xmm4,xmm44596 4597 vpslld xmm7,xmm13,304598 vpaddd xmm11,xmm11,xmm64599 4600 vpsrld xmm13,xmm13,24601 vpaddd xmm11,xmm11,xmm84602 vpor xmm4,xmm4,xmm54603 vpor xmm13,xmm13,xmm74604 vpxor xmm0,xmm0,xmm24605 vmovdqa xmm2,XMMWORD[((224-128))+rax]4606 4607 vpaddd xmm10,xmm10,xmm154608 vpslld xmm8,xmm11,54609 vpand xmm7,xmm14,xmm134610 vpxor xmm0,xmm0,XMMWORD[((64-128))+rax]4611 4612 vpaddd xmm10,xmm10,xmm74613 vpsrld xmm9,xmm11,274614 vpxor xmm6,xmm14,xmm134615 vpxor xmm0,xmm0,xmm24616 4617 vmovdqu XMMWORD[(176-128)+rax],xmm44618 vpaddd xmm10,xmm10,xmm44619 vpor xmm8,xmm8,xmm94620 vpsrld xmm5,xmm0,314621 vpand xmm6,xmm6,xmm124622 vpaddd xmm0,xmm0,xmm04623 4624 vpslld xmm7,xmm12,304625 vpaddd xmm10,xmm10,xmm64626 4627 vpsrld xmm12,xmm12,24628 vpaddd xmm10,xmm10,xmm84629 vpor xmm0,xmm0,xmm54630 vpor xmm12,xmm12,xmm74631 vmovdqa xmm15,XMMWORD[64+rbp]4632 vpxor xmm1,xmm1,xmm34633 vmovdqa xmm3,XMMWORD[((240-128))+rax]4634 4635 vpslld xmm8,xmm10,54636 vpaddd xmm14,xmm14,xmm154637 vpxor xmm6,xmm13,xmm114638 vmovdqa XMMWORD[(192-128)+rax],xmm04639 vpaddd xmm14,xmm14,xmm04640 vpxor xmm1,xmm1,XMMWORD[((80-128))+rax]4641 vpsrld xmm9,xmm10,274642 vpxor xmm6,xmm6,xmm124643 vpxor xmm1,xmm1,xmm34644 4645 vpslld xmm7,xmm11,304646 vpor xmm8,xmm8,xmm94647 vpaddd xmm14,xmm14,xmm64648 vpsrld xmm5,xmm1,314649 vpaddd xmm1,xmm1,xmm14650 4651 vpsrld xmm11,xmm11,24652 vpaddd xmm14,xmm14,xmm84653 vpor xmm1,xmm1,xmm54654 vpor xmm11,xmm11,xmm74655 vpxor xmm2,xmm2,xmm44656 vmovdqa xmm4,XMMWORD[((0-128))+rax]4657 4658 vpslld xmm8,xmm14,54659 vpaddd xmm13,xmm13,xmm154660 vpxor xmm6,xmm12,xmm104661 vmovdqa XMMWORD[(208-128)+rax],xmm14662 vpaddd xmm13,xmm13,xmm14663 vpxor xmm2,xmm2,XMMWORD[((96-128))+rax]4664 vpsrld xmm9,xmm14,274665 vpxor xmm6,xmm6,xmm114666 vpxor xmm2,xmm2,xmm44667 4668 vpslld xmm7,xmm10,304669 vpor xmm8,xmm8,xmm94670 vpaddd xmm13,xmm13,xmm64671 vpsrld xmm5,xmm2,314672 vpaddd xmm2,xmm2,xmm24673 4674 vpsrld xmm10,xmm10,24675 vpaddd xmm13,xmm13,xmm84676 vpor xmm2,xmm2,xmm54677 vpor xmm10,xmm10,xmm74678 vpxor xmm3,xmm3,xmm04679 vmovdqa xmm0,XMMWORD[((16-128))+rax]4680 4681 vpslld xmm8,xmm13,54682 vpaddd xmm12,xmm12,xmm154683 vpxor xmm6,xmm11,xmm144684 vmovdqa XMMWORD[(224-128)+rax],xmm24685 vpaddd xmm12,xmm12,xmm24686 vpxor xmm3,xmm3,XMMWORD[((112-128))+rax]4687 vpsrld xmm9,xmm13,274688 vpxor xmm6,xmm6,xmm104689 vpxor xmm3,xmm3,xmm04690 4691 vpslld xmm7,xmm14,304692 vpor xmm8,xmm8,xmm94693 vpaddd xmm12,xmm12,xmm64694 vpsrld xmm5,xmm3,314695 vpaddd xmm3,xmm3,xmm34696 4697 vpsrld xmm14,xmm14,24698 vpaddd xmm12,xmm12,xmm84699 vpor xmm3,xmm3,xmm54700 vpor xmm14,xmm14,xmm74701 vpxor xmm4,xmm4,xmm14702 vmovdqa xmm1,XMMWORD[((32-128))+rax]4703 4704 vpslld xmm8,xmm12,54705 vpaddd xmm11,xmm11,xmm154706 vpxor xmm6,xmm10,xmm134707 vmovdqa XMMWORD[(240-128)+rax],xmm34708 vpaddd xmm11,xmm11,xmm34709 vpxor xmm4,xmm4,XMMWORD[((128-128))+rax]4710 vpsrld xmm9,xmm12,274711 vpxor xmm6,xmm6,xmm144712 vpxor xmm4,xmm4,xmm14713 4714 vpslld xmm7,xmm13,304715 vpor xmm8,xmm8,xmm94716 vpaddd xmm11,xmm11,xmm64717 vpsrld xmm5,xmm4,314718 vpaddd xmm4,xmm4,xmm44719 4720 vpsrld xmm13,xmm13,24721 vpaddd xmm11,xmm11,xmm84722 vpor xmm4,xmm4,xmm54723 vpor xmm13,xmm13,xmm74724 vpxor xmm0,xmm0,xmm24725 vmovdqa xmm2,XMMWORD[((48-128))+rax]4726 4727 vpslld xmm8,xmm11,54728 vpaddd xmm10,xmm10,xmm154729 vpxor xmm6,xmm14,xmm124730 vmovdqa XMMWORD[(0-128)+rax],xmm44731 vpaddd xmm10,xmm10,xmm44732 vpxor xmm0,xmm0,XMMWORD[((144-128))+rax]4733 vpsrld xmm9,xmm11,274734 vpxor xmm6,xmm6,xmm134735 vpxor xmm0,xmm0,xmm24736 4737 vpslld xmm7,xmm12,304738 vpor xmm8,xmm8,xmm94739 vpaddd xmm10,xmm10,xmm64740 vpsrld xmm5,xmm0,314741 vpaddd xmm0,xmm0,xmm04742 4743 vpsrld xmm12,xmm12,24744 vpaddd xmm10,xmm10,xmm84745 vpor xmm0,xmm0,xmm54746 vpor xmm12,xmm12,xmm74747 vpxor xmm1,xmm1,xmm34748 vmovdqa xmm3,XMMWORD[((64-128))+rax]4749 4750 vpslld xmm8,xmm10,54751 vpaddd xmm14,xmm14,xmm154752 vpxor xmm6,xmm13,xmm114753 vmovdqa XMMWORD[(16-128)+rax],xmm04754 vpaddd xmm14,xmm14,xmm04755 vpxor xmm1,xmm1,XMMWORD[((160-128))+rax]4756 vpsrld xmm9,xmm10,274757 vpxor xmm6,xmm6,xmm124758 vpxor xmm1,xmm1,xmm34759 4760 vpslld xmm7,xmm11,304761 vpor xmm8,xmm8,xmm94762 vpaddd xmm14,xmm14,xmm64763 vpsrld xmm5,xmm1,314764 vpaddd xmm1,xmm1,xmm14765 4766 vpsrld xmm11,xmm11,24767 vpaddd xmm14,xmm14,xmm84768 vpor xmm1,xmm1,xmm54769 vpor xmm11,xmm11,xmm74770 vpxor xmm2,xmm2,xmm44771 vmovdqa xmm4,XMMWORD[((80-128))+rax]4772 4773 vpslld xmm8,xmm14,54774 vpaddd xmm13,xmm13,xmm154775 vpxor xmm6,xmm12,xmm104776 vmovdqa XMMWORD[(32-128)+rax],xmm14777 vpaddd xmm13,xmm13,xmm14778 vpxor xmm2,xmm2,XMMWORD[((176-128))+rax]4779 vpsrld xmm9,xmm14,274780 vpxor xmm6,xmm6,xmm114781 vpxor xmm2,xmm2,xmm44782 4783 vpslld xmm7,xmm10,304784 vpor xmm8,xmm8,xmm94785 vpaddd xmm13,xmm13,xmm64786 vpsrld xmm5,xmm2,314787 vpaddd xmm2,xmm2,xmm24788 4789 vpsrld xmm10,xmm10,24790 vpaddd xmm13,xmm13,xmm84791 vpor xmm2,xmm2,xmm54792 vpor xmm10,xmm10,xmm74793 vpxor xmm3,xmm3,xmm04794 vmovdqa xmm0,XMMWORD[((96-128))+rax]4795 4796 vpslld xmm8,xmm13,54797 vpaddd xmm12,xmm12,xmm154798 vpxor xmm6,xmm11,xmm144799 vmovdqa XMMWORD[(48-128)+rax],xmm24800 vpaddd xmm12,xmm12,xmm24801 vpxor xmm3,xmm3,XMMWORD[((192-128))+rax]4802 vpsrld xmm9,xmm13,274803 vpxor xmm6,xmm6,xmm104804 vpxor xmm3,xmm3,xmm04805 4806 vpslld xmm7,xmm14,304807 vpor xmm8,xmm8,xmm94808 vpaddd xmm12,xmm12,xmm64809 vpsrld xmm5,xmm3,314810 vpaddd xmm3,xmm3,xmm34811 4812 vpsrld xmm14,xmm14,24813 vpaddd xmm12,xmm12,xmm84814 vpor xmm3,xmm3,xmm54815 vpor xmm14,xmm14,xmm74816 vpxor xmm4,xmm4,xmm14817 vmovdqa xmm1,XMMWORD[((112-128))+rax]4818 4819 vpslld xmm8,xmm12,54820 vpaddd xmm11,xmm11,xmm154821 vpxor xmm6,xmm10,xmm134822 vmovdqa XMMWORD[(64-128)+rax],xmm34823 vpaddd xmm11,xmm11,xmm34824 vpxor xmm4,xmm4,XMMWORD[((208-128))+rax]4825 vpsrld xmm9,xmm12,274826 vpxor xmm6,xmm6,xmm144827 vpxor xmm4,xmm4,xmm14828 4829 vpslld xmm7,xmm13,304830 vpor xmm8,xmm8,xmm94831 vpaddd xmm11,xmm11,xmm64832 vpsrld xmm5,xmm4,314833 vpaddd xmm4,xmm4,xmm44834 4835 vpsrld xmm13,xmm13,24836 vpaddd xmm11,xmm11,xmm84837 vpor xmm4,xmm4,xmm54838 vpor xmm13,xmm13,xmm74839 vpxor xmm0,xmm0,xmm24840 vmovdqa xmm2,XMMWORD[((128-128))+rax]4841 4842 vpslld xmm8,xmm11,54843 vpaddd xmm10,xmm10,xmm154844 vpxor xmm6,xmm14,xmm124845 vmovdqa XMMWORD[(80-128)+rax],xmm44846 vpaddd xmm10,xmm10,xmm44847 vpxor xmm0,xmm0,XMMWORD[((224-128))+rax]4848 vpsrld xmm9,xmm11,274849 vpxor xmm6,xmm6,xmm134850 vpxor xmm0,xmm0,xmm24851 4852 vpslld xmm7,xmm12,304853 vpor xmm8,xmm8,xmm94854 vpaddd xmm10,xmm10,xmm64855 vpsrld xmm5,xmm0,314856 vpaddd xmm0,xmm0,xmm04857 4858 vpsrld xmm12,xmm12,24859 vpaddd xmm10,xmm10,xmm84860 vpor xmm0,xmm0,xmm54861 vpor xmm12,xmm12,xmm74862 vpxor xmm1,xmm1,xmm34863 vmovdqa xmm3,XMMWORD[((144-128))+rax]4864 4865 vpslld xmm8,xmm10,54866 vpaddd xmm14,xmm14,xmm154867 vpxor xmm6,xmm13,xmm114868 vmovdqa XMMWORD[(96-128)+rax],xmm04869 vpaddd xmm14,xmm14,xmm04870 vpxor xmm1,xmm1,XMMWORD[((240-128))+rax]4871 vpsrld xmm9,xmm10,274872 vpxor xmm6,xmm6,xmm124873 vpxor xmm1,xmm1,xmm34874 4875 vpslld xmm7,xmm11,304876 vpor xmm8,xmm8,xmm94877 vpaddd xmm14,xmm14,xmm64878 vpsrld xmm5,xmm1,314879 vpaddd xmm1,xmm1,xmm14880 4881 vpsrld xmm11,xmm11,24882 vpaddd xmm14,xmm14,xmm84883 vpor xmm1,xmm1,xmm54884 vpor xmm11,xmm11,xmm74885 vpxor xmm2,xmm2,xmm44886 vmovdqa xmm4,XMMWORD[((160-128))+rax]4887 4888 vpslld xmm8,xmm14,54889 vpaddd xmm13,xmm13,xmm154890 vpxor xmm6,xmm12,xmm104891 vmovdqa XMMWORD[(112-128)+rax],xmm14892 vpaddd xmm13,xmm13,xmm14893 vpxor xmm2,xmm2,XMMWORD[((0-128))+rax]4894 vpsrld xmm9,xmm14,274895 vpxor xmm6,xmm6,xmm114896 vpxor xmm2,xmm2,xmm44897 4898 vpslld xmm7,xmm10,304899 vpor xmm8,xmm8,xmm94900 vpaddd xmm13,xmm13,xmm64901 vpsrld xmm5,xmm2,314902 vpaddd xmm2,xmm2,xmm24903 4904 vpsrld xmm10,xmm10,24905 vpaddd xmm13,xmm13,xmm84906 vpor xmm2,xmm2,xmm54907 vpor xmm10,xmm10,xmm74908 vpxor xmm3,xmm3,xmm04909 vmovdqa xmm0,XMMWORD[((176-128))+rax]4910 4911 vpslld xmm8,xmm13,54912 vpaddd xmm12,xmm12,xmm154913 vpxor xmm6,xmm11,xmm144914 vpaddd xmm12,xmm12,xmm24915 vpxor xmm3,xmm3,XMMWORD[((16-128))+rax]4916 vpsrld xmm9,xmm13,274917 vpxor xmm6,xmm6,xmm104918 vpxor xmm3,xmm3,xmm04919 4920 vpslld xmm7,xmm14,304921 vpor xmm8,xmm8,xmm94922 vpaddd xmm12,xmm12,xmm64923 vpsrld xmm5,xmm3,314924 vpaddd xmm3,xmm3,xmm34925 4926 vpsrld xmm14,xmm14,24927 vpaddd xmm12,xmm12,xmm84928 vpor xmm3,xmm3,xmm54929 vpor xmm14,xmm14,xmm74930 vpxor xmm4,xmm4,xmm14931 vmovdqa xmm1,XMMWORD[((192-128))+rax]4932 4933 vpslld xmm8,xmm12,54934 vpaddd xmm11,xmm11,xmm154935 vpxor xmm6,xmm10,xmm134936 vpaddd xmm11,xmm11,xmm34937 vpxor xmm4,xmm4,XMMWORD[((32-128))+rax]4938 vpsrld xmm9,xmm12,274939 vpxor xmm6,xmm6,xmm144940 vpxor xmm4,xmm4,xmm14941 4942 vpslld xmm7,xmm13,304943 vpor xmm8,xmm8,xmm94944 vpaddd xmm11,xmm11,xmm64945 vpsrld xmm5,xmm4,314946 vpaddd xmm4,xmm4,xmm44947 4948 vpsrld xmm13,xmm13,24949 vpaddd xmm11,xmm11,xmm84950 vpor xmm4,xmm4,xmm54951 vpor xmm13,xmm13,xmm74952 vpxor xmm0,xmm0,xmm24953 vmovdqa xmm2,XMMWORD[((208-128))+rax]4954 4955 vpslld xmm8,xmm11,54956 vpaddd xmm10,xmm10,xmm154957 vpxor xmm6,xmm14,xmm124958 vpaddd xmm10,xmm10,xmm44959 vpxor xmm0,xmm0,XMMWORD[((48-128))+rax]4960 vpsrld xmm9,xmm11,274961 vpxor xmm6,xmm6,xmm134962 vpxor xmm0,xmm0,xmm24963 4964 vpslld xmm7,xmm12,304965 vpor xmm8,xmm8,xmm94966 vpaddd xmm10,xmm10,xmm64967 vpsrld xmm5,xmm0,314968 vpaddd xmm0,xmm0,xmm04969 4970 vpsrld xmm12,xmm12,24971 vpaddd xmm10,xmm10,xmm84972 vpor xmm0,xmm0,xmm54973 vpor xmm12,xmm12,xmm74974 vpxor xmm1,xmm1,xmm34975 vmovdqa xmm3,XMMWORD[((224-128))+rax]4976 4977 vpslld xmm8,xmm10,54978 vpaddd xmm14,xmm14,xmm154979 vpxor xmm6,xmm13,xmm114980 vpaddd xmm14,xmm14,xmm04981 vpxor xmm1,xmm1,XMMWORD[((64-128))+rax]4982 vpsrld xmm9,xmm10,274983 vpxor xmm6,xmm6,xmm124984 vpxor xmm1,xmm1,xmm34985 4986 vpslld xmm7,xmm11,304987 vpor xmm8,xmm8,xmm94988 vpaddd xmm14,xmm14,xmm64989 vpsrld xmm5,xmm1,314990 vpaddd xmm1,xmm1,xmm14991 4992 vpsrld xmm11,xmm11,24993 vpaddd xmm14,xmm14,xmm84994 vpor xmm1,xmm1,xmm54995 vpor xmm11,xmm11,xmm74996 vpxor xmm2,xmm2,xmm44997 vmovdqa xmm4,XMMWORD[((240-128))+rax]4998 4999 vpslld xmm8,xmm14,55000 vpaddd xmm13,xmm13,xmm155001 vpxor xmm6,xmm12,xmm105002 vpaddd xmm13,xmm13,xmm15003 vpxor xmm2,xmm2,XMMWORD[((80-128))+rax]5004 vpsrld xmm9,xmm14,275005 vpxor xmm6,xmm6,xmm115006 vpxor xmm2,xmm2,xmm45007 5008 vpslld xmm7,xmm10,305009 vpor xmm8,xmm8,xmm95010 vpaddd xmm13,xmm13,xmm65011 vpsrld xmm5,xmm2,315012 vpaddd xmm2,xmm2,xmm25013 5014 vpsrld xmm10,xmm10,25015 vpaddd xmm13,xmm13,xmm85016 vpor xmm2,xmm2,xmm55017 vpor xmm10,xmm10,xmm75018 vpxor xmm3,xmm3,xmm05019 vmovdqa xmm0,XMMWORD[((0-128))+rax]5020 5021 vpslld xmm8,xmm13,55022 vpaddd xmm12,xmm12,xmm155023 vpxor xmm6,xmm11,xmm145024 vpaddd xmm12,xmm12,xmm25025 vpxor xmm3,xmm3,XMMWORD[((96-128))+rax]5026 vpsrld xmm9,xmm13,275027 vpxor xmm6,xmm6,xmm105028 vpxor xmm3,xmm3,xmm05029 5030 vpslld xmm7,xmm14,305031 vpor xmm8,xmm8,xmm95032 vpaddd xmm12,xmm12,xmm65033 vpsrld xmm5,xmm3,315034 vpaddd xmm3,xmm3,xmm35035 5036 vpsrld xmm14,xmm14,25037 vpaddd xmm12,xmm12,xmm85038 vpor xmm3,xmm3,xmm55039 vpor xmm14,xmm14,xmm75040 vpxor xmm4,xmm4,xmm15041 vmovdqa xmm1,XMMWORD[((16-128))+rax]5042 5043 vpslld xmm8,xmm12,55044 vpaddd xmm11,xmm11,xmm155045 vpxor xmm6,xmm10,xmm135046 vpaddd xmm11,xmm11,xmm35047 vpxor xmm4,xmm4,XMMWORD[((112-128))+rax]5048 vpsrld xmm9,xmm12,275049 vpxor xmm6,xmm6,xmm145050 vpxor xmm4,xmm4,xmm15051 5052 vpslld xmm7,xmm13,305053 vpor xmm8,xmm8,xmm95054 vpaddd xmm11,xmm11,xmm65055 vpsrld xmm5,xmm4,315056 vpaddd xmm4,xmm4,xmm45057 5058 vpsrld xmm13,xmm13,25059 vpaddd xmm11,xmm11,xmm85060 vpor xmm4,xmm4,xmm55061 vpor xmm13,xmm13,xmm75062 vpslld xmm8,xmm11,55063 vpaddd xmm10,xmm10,xmm155064 vpxor xmm6,xmm14,xmm125065 5066 vpsrld xmm9,xmm11,275067 vpaddd xmm10,xmm10,xmm45068 vpxor xmm6,xmm6,xmm135069 5070 vpslld xmm7,xmm12,305071 vpor xmm8,xmm8,xmm95072 vpaddd xmm10,xmm10,xmm65073 5074 vpsrld xmm12,xmm12,25075 vpaddd xmm10,xmm10,xmm85076 vpor xmm12,xmm12,xmm75077 mov ecx,15078 cmp ecx,DWORD[rbx]5079 cmovge r8,rbp5080 cmp ecx,DWORD[4+rbx]5081 cmovge r9,rbp5082 cmp ecx,DWORD[8+rbx]5083 cmovge r10,rbp5084 cmp ecx,DWORD[12+rbx]5085 cmovge r11,rbp5086 vmovdqu xmm6,XMMWORD[rbx]5087 vpxor xmm8,xmm8,xmm85088 vmovdqa xmm7,xmm65089 vpcmpgtd xmm7,xmm7,xmm85090 vpaddd xmm6,xmm6,xmm75091 5092 vpand xmm10,xmm10,xmm75093 vpand xmm11,xmm11,xmm75094 vpaddd xmm10,xmm10,XMMWORD[rdi]5095 vpand xmm12,xmm12,xmm75096 vpaddd xmm11,xmm11,XMMWORD[32+rdi]5097 vpand xmm13,xmm13,xmm75098 vpaddd xmm12,xmm12,XMMWORD[64+rdi]5099 vpand xmm14,xmm14,xmm75100 vpaddd xmm13,xmm13,XMMWORD[96+rdi]5101 vpaddd xmm14,xmm14,XMMWORD[128+rdi]5102 vmovdqu XMMWORD[rdi],xmm105103 vmovdqu XMMWORD[32+rdi],xmm115104 vmovdqu XMMWORD[64+rdi],xmm125105 vmovdqu XMMWORD[96+rdi],xmm135106 vmovdqu XMMWORD[128+rdi],xmm145107 5108 vmovdqu XMMWORD[rbx],xmm65109 vmovdqu xmm5,XMMWORD[96+rbp]5110 dec edx5111 jnz NEAR $L$oop_avx5112 5113 mov edx,DWORD[280+rsp]5114 lea rdi,[16+rdi]5115 lea rsi,[64+rsi]5116 dec edx5117 jnz NEAR $L$oop_grande_avx5118 5119 $L$done_avx:5120 mov rax,QWORD[272+rsp]5121 5122 vzeroupper5123 movaps xmm6,XMMWORD[((-184))+rax]5124 movaps xmm7,XMMWORD[((-168))+rax]5125 movaps xmm8,XMMWORD[((-152))+rax]5126 movaps xmm9,XMMWORD[((-136))+rax]5127 movaps xmm10,XMMWORD[((-120))+rax]5128 movaps xmm11,XMMWORD[((-104))+rax]5129 movaps xmm12,XMMWORD[((-88))+rax]5130 movaps xmm13,XMMWORD[((-72))+rax]5131 movaps xmm14,XMMWORD[((-56))+rax]5132 movaps xmm15,XMMWORD[((-40))+rax]5133 mov rbp,QWORD[((-16))+rax]5134 5135 mov rbx,QWORD[((-8))+rax]5136 5137 lea rsp,[rax]5138 5139 $L$epilogue_avx:5140 mov rdi,QWORD[8+rsp] ;WIN64 epilogue5141 mov rsi,QWORD[16+rsp]5142 DB 0F3h,0C3h ;repret5143 5144 $L$SEH_end_sha1_multi_block_avx:5145 5146 ALIGN 325147 sha1_multi_block_avx2:5148 mov QWORD[8+rsp],rdi ;WIN64 prologue5149 mov QWORD[16+rsp],rsi5150 mov rax,rsp5151 $L$SEH_begin_sha1_multi_block_avx2:5152 mov rdi,rcx5153 mov rsi,rdx5154 mov rdx,r85155 5156 5157 5158 _avx2_shortcut:5159 mov rax,rsp5160 5161 push rbx5162 5163 push rbp5164 5165 push r125166 5167 push r135168 5169 push r145170 5171 push r155172 5173 lea rsp,[((-168))+rsp]5174 movaps XMMWORD[rsp],xmm65175 movaps XMMWORD[16+rsp],xmm75176 movaps XMMWORD[32+rsp],xmm85177 movaps XMMWORD[48+rsp],xmm95178 movaps XMMWORD[64+rsp],xmm105179 movaps XMMWORD[80+rsp],xmm115180 movaps XMMWORD[(-120)+rax],xmm125181 movaps XMMWORD[(-104)+rax],xmm135182 movaps XMMWORD[(-88)+rax],xmm145183 movaps XMMWORD[(-72)+rax],xmm155184 sub rsp,5765185 and rsp,-2565186 mov QWORD[544+rsp],rax5187 5188 $L$body_avx2:5189 lea rbp,[K_XX_XX]5190 shr edx,15191 5192 vzeroupper5193 $L$oop_grande_avx2:5194 mov DWORD[552+rsp],edx5195 xor edx,edx5196 lea rbx,[512+rsp]5197 5198 mov r12,QWORD[rsi]5199 5200 mov ecx,DWORD[8+rsi]5201 cmp ecx,edx5202 cmovg edx,ecx5203 test ecx,ecx5204 mov DWORD[rbx],ecx5205 cmovle r12,rbp5206 5207 mov r13,QWORD[16+rsi]5208 5209 mov ecx,DWORD[24+rsi]5210 cmp ecx,edx5211 cmovg edx,ecx5212 test ecx,ecx5213 mov DWORD[4+rbx],ecx5214 cmovle r13,rbp5215 5216 mov r14,QWORD[32+rsi]5217 5218 mov ecx,DWORD[40+rsi]5219 cmp ecx,edx5220 cmovg edx,ecx5221 test ecx,ecx5222 mov DWORD[8+rbx],ecx5223 cmovle r14,rbp5224 5225 mov r15,QWORD[48+rsi]5226 5227 mov ecx,DWORD[56+rsi]5228 cmp ecx,edx5229 cmovg edx,ecx5230 test ecx,ecx5231 mov DWORD[12+rbx],ecx5232 cmovle r15,rbp5233 5234 mov r8,QWORD[64+rsi]5235 5236 mov ecx,DWORD[72+rsi]5237 cmp ecx,edx5238 cmovg edx,ecx5239 test ecx,ecx5240 mov DWORD[16+rbx],ecx5241 cmovle r8,rbp5242 5243 mov r9,QWORD[80+rsi]5244 5245 mov ecx,DWORD[88+rsi]5246 cmp ecx,edx5247 cmovg edx,ecx5248 test ecx,ecx5249 mov DWORD[20+rbx],ecx5250 cmovle r9,rbp5251 5252 mov r10,QWORD[96+rsi]5253 5254 mov ecx,DWORD[104+rsi]5255 cmp ecx,edx5256 cmovg edx,ecx5257 test ecx,ecx5258 mov DWORD[24+rbx],ecx5259 cmovle r10,rbp5260 5261 mov r11,QWORD[112+rsi]5262 5263 mov ecx,DWORD[120+rsi]5264 cmp ecx,edx5265 cmovg edx,ecx5266 test ecx,ecx5267 mov DWORD[28+rbx],ecx5268 cmovle r11,rbp5269 vmovdqu ymm0,YMMWORD[rdi]5270 lea rax,[128+rsp]5271 vmovdqu ymm1,YMMWORD[32+rdi]5272 lea rbx,[((256+128))+rsp]5273 vmovdqu ymm2,YMMWORD[64+rdi]5274 vmovdqu ymm3,YMMWORD[96+rdi]5275 vmovdqu ymm4,YMMWORD[128+rdi]5276 vmovdqu ymm9,YMMWORD[96+rbp]5277 jmp NEAR $L$oop_avx25278 5279 ALIGN 325280 $L$oop_avx2:5281 vmovdqa ymm15,YMMWORD[((-32))+rbp]5282 vmovd xmm10,DWORD[r12]5283 lea r12,[64+r12]5284 vmovd xmm12,DWORD[r8]5285 lea r8,[64+r8]5286 vmovd xmm7,DWORD[r13]5287 lea r13,[64+r13]5288 vmovd xmm6,DWORD[r9]5289 lea r9,[64+r9]5290 vpinsrd xmm10,xmm10,DWORD[r14],15291 lea r14,[64+r14]5292 vpinsrd xmm12,xmm12,DWORD[r10],15293 lea r10,[64+r10]5294 vpinsrd xmm7,xmm7,DWORD[r15],15295 lea r15,[64+r15]5296 vpunpckldq ymm10,ymm10,ymm75297 vpinsrd xmm6,xmm6,DWORD[r11],15298 lea r11,[64+r11]5299 vpunpckldq ymm12,ymm12,ymm65300 vmovd xmm11,DWORD[((-60))+r12]5301 vinserti128 ymm10,ymm10,xmm12,15302 vmovd xmm8,DWORD[((-60))+r8]5303 vpshufb ymm10,ymm10,ymm95304 vmovd xmm7,DWORD[((-60))+r13]5305 vmovd xmm6,DWORD[((-60))+r9]5306 vpinsrd xmm11,xmm11,DWORD[((-60))+r14],15307 vpinsrd xmm8,xmm8,DWORD[((-60))+r10],15308 vpinsrd xmm7,xmm7,DWORD[((-60))+r15],15309 vpunpckldq ymm11,ymm11,ymm75310 vpinsrd xmm6,xmm6,DWORD[((-60))+r11],15311 vpunpckldq ymm8,ymm8,ymm65312 vpaddd ymm4,ymm4,ymm155313 vpslld ymm7,ymm0,55314 vpandn ymm6,ymm1,ymm35315 vpand ymm5,ymm1,ymm25316 5317 vmovdqa YMMWORD[(0-128)+rax],ymm105318 vpaddd ymm4,ymm4,ymm105319 vinserti128 ymm11,ymm11,xmm8,15320 vpsrld ymm8,ymm0,275321 vpxor ymm5,ymm5,ymm65322 vmovd xmm12,DWORD[((-56))+r12]5323 5324 vpslld ymm6,ymm1,305325 vpor ymm7,ymm7,ymm85326 vmovd xmm8,DWORD[((-56))+r8]5327 vpaddd ymm4,ymm4,ymm55328 5329 vpsrld ymm1,ymm1,25330 vpaddd ymm4,ymm4,ymm75331 vpshufb ymm11,ymm11,ymm95332 vpor ymm1,ymm1,ymm65333 vmovd xmm7,DWORD[((-56))+r13]5334 vmovd xmm6,DWORD[((-56))+r9]5335 vpinsrd xmm12,xmm12,DWORD[((-56))+r14],15336 vpinsrd xmm8,xmm8,DWORD[((-56))+r10],15337 vpinsrd xmm7,xmm7,DWORD[((-56))+r15],15338 vpunpckldq ymm12,ymm12,ymm75339 vpinsrd xmm6,xmm6,DWORD[((-56))+r11],15340 vpunpckldq ymm8,ymm8,ymm65341 vpaddd ymm3,ymm3,ymm155342 vpslld ymm7,ymm4,55343 vpandn ymm6,ymm0,ymm25344 vpand ymm5,ymm0,ymm15345 5346 vmovdqa YMMWORD[(32-128)+rax],ymm115347 vpaddd ymm3,ymm3,ymm115348 vinserti128 ymm12,ymm12,xmm8,15349 vpsrld ymm8,ymm4,275350 vpxor ymm5,ymm5,ymm65351 vmovd xmm13,DWORD[((-52))+r12]5352 5353 vpslld ymm6,ymm0,305354 vpor ymm7,ymm7,ymm85355 vmovd xmm8,DWORD[((-52))+r8]5356 vpaddd ymm3,ymm3,ymm55357 5358 vpsrld ymm0,ymm0,25359 vpaddd ymm3,ymm3,ymm75360 vpshufb ymm12,ymm12,ymm95361 vpor ymm0,ymm0,ymm65362 vmovd xmm7,DWORD[((-52))+r13]5363 vmovd xmm6,DWORD[((-52))+r9]5364 vpinsrd xmm13,xmm13,DWORD[((-52))+r14],15365 vpinsrd xmm8,xmm8,DWORD[((-52))+r10],15366 vpinsrd xmm7,xmm7,DWORD[((-52))+r15],15367 vpunpckldq ymm13,ymm13,ymm75368 vpinsrd xmm6,xmm6,DWORD[((-52))+r11],15369 vpunpckldq ymm8,ymm8,ymm65370 vpaddd ymm2,ymm2,ymm155371 vpslld ymm7,ymm3,55372 vpandn ymm6,ymm4,ymm15373 vpand ymm5,ymm4,ymm05374 5375 vmovdqa YMMWORD[(64-128)+rax],ymm125376 vpaddd ymm2,ymm2,ymm125377 vinserti128 ymm13,ymm13,xmm8,15378 vpsrld ymm8,ymm3,275379 vpxor ymm5,ymm5,ymm65380 vmovd xmm14,DWORD[((-48))+r12]5381 5382 vpslld ymm6,ymm4,305383 vpor ymm7,ymm7,ymm85384 vmovd xmm8,DWORD[((-48))+r8]5385 vpaddd ymm2,ymm2,ymm55386 5387 vpsrld ymm4,ymm4,25388 vpaddd ymm2,ymm2,ymm75389 vpshufb ymm13,ymm13,ymm95390 vpor ymm4,ymm4,ymm65391 vmovd xmm7,DWORD[((-48))+r13]5392 vmovd xmm6,DWORD[((-48))+r9]5393 vpinsrd xmm14,xmm14,DWORD[((-48))+r14],15394 vpinsrd xmm8,xmm8,DWORD[((-48))+r10],15395 vpinsrd xmm7,xmm7,DWORD[((-48))+r15],15396 vpunpckldq ymm14,ymm14,ymm75397 vpinsrd xmm6,xmm6,DWORD[((-48))+r11],15398 vpunpckldq ymm8,ymm8,ymm65399 vpaddd ymm1,ymm1,ymm155400 vpslld ymm7,ymm2,55401 vpandn ymm6,ymm3,ymm05402 vpand ymm5,ymm3,ymm45403 5404 vmovdqa YMMWORD[(96-128)+rax],ymm135405 vpaddd ymm1,ymm1,ymm135406 vinserti128 ymm14,ymm14,xmm8,15407 vpsrld ymm8,ymm2,275408 vpxor ymm5,ymm5,ymm65409 vmovd xmm10,DWORD[((-44))+r12]5410 5411 vpslld ymm6,ymm3,305412 vpor ymm7,ymm7,ymm85413 vmovd xmm8,DWORD[((-44))+r8]5414 vpaddd ymm1,ymm1,ymm55415 5416 vpsrld ymm3,ymm3,25417 vpaddd ymm1,ymm1,ymm75418 vpshufb ymm14,ymm14,ymm95419 vpor ymm3,ymm3,ymm65420 vmovd xmm7,DWORD[((-44))+r13]5421 vmovd xmm6,DWORD[((-44))+r9]5422 vpinsrd xmm10,xmm10,DWORD[((-44))+r14],15423 vpinsrd xmm8,xmm8,DWORD[((-44))+r10],15424 vpinsrd xmm7,xmm7,DWORD[((-44))+r15],15425 vpunpckldq ymm10,ymm10,ymm75426 vpinsrd xmm6,xmm6,DWORD[((-44))+r11],15427 vpunpckldq ymm8,ymm8,ymm65428 vpaddd ymm0,ymm0,ymm155429 vpslld ymm7,ymm1,55430 vpandn ymm6,ymm2,ymm45431 vpand ymm5,ymm2,ymm35432 5433 vmovdqa YMMWORD[(128-128)+rax],ymm145434 vpaddd ymm0,ymm0,ymm145435 vinserti128 ymm10,ymm10,xmm8,15436 vpsrld ymm8,ymm1,275437 vpxor ymm5,ymm5,ymm65438 vmovd xmm11,DWORD[((-40))+r12]5439 5440 vpslld ymm6,ymm2,305441 vpor ymm7,ymm7,ymm85442 vmovd xmm8,DWORD[((-40))+r8]5443 vpaddd ymm0,ymm0,ymm55444 5445 vpsrld ymm2,ymm2,25446 vpaddd ymm0,ymm0,ymm75447 vpshufb ymm10,ymm10,ymm95448 vpor ymm2,ymm2,ymm65449 vmovd xmm7,DWORD[((-40))+r13]5450 vmovd xmm6,DWORD[((-40))+r9]5451 vpinsrd xmm11,xmm11,DWORD[((-40))+r14],15452 vpinsrd xmm8,xmm8,DWORD[((-40))+r10],15453 vpinsrd xmm7,xmm7,DWORD[((-40))+r15],15454 vpunpckldq ymm11,ymm11,ymm75455 vpinsrd xmm6,xmm6,DWORD[((-40))+r11],15456 vpunpckldq ymm8,ymm8,ymm65457 vpaddd ymm4,ymm4,ymm155458 vpslld ymm7,ymm0,55459 vpandn ymm6,ymm1,ymm35460 vpand ymm5,ymm1,ymm25461 5462 vmovdqa YMMWORD[(160-128)+rax],ymm105463 vpaddd ymm4,ymm4,ymm105464 vinserti128 ymm11,ymm11,xmm8,15465 vpsrld ymm8,ymm0,275466 vpxor ymm5,ymm5,ymm65467 vmovd xmm12,DWORD[((-36))+r12]5468 5469 vpslld ymm6,ymm1,305470 vpor ymm7,ymm7,ymm85471 vmovd xmm8,DWORD[((-36))+r8]5472 vpaddd ymm4,ymm4,ymm55473 5474 vpsrld ymm1,ymm1,25475 vpaddd ymm4,ymm4,ymm75476 vpshufb ymm11,ymm11,ymm95477 vpor ymm1,ymm1,ymm65478 vmovd xmm7,DWORD[((-36))+r13]5479 vmovd xmm6,DWORD[((-36))+r9]5480 vpinsrd xmm12,xmm12,DWORD[((-36))+r14],15481 vpinsrd xmm8,xmm8,DWORD[((-36))+r10],15482 vpinsrd xmm7,xmm7,DWORD[((-36))+r15],15483 vpunpckldq ymm12,ymm12,ymm75484 vpinsrd xmm6,xmm6,DWORD[((-36))+r11],15485 vpunpckldq ymm8,ymm8,ymm65486 vpaddd ymm3,ymm3,ymm155487 vpslld ymm7,ymm4,55488 vpandn ymm6,ymm0,ymm25489 vpand ymm5,ymm0,ymm15490 5491 vmovdqa YMMWORD[(192-128)+rax],ymm115492 vpaddd ymm3,ymm3,ymm115493 vinserti128 ymm12,ymm12,xmm8,15494 vpsrld ymm8,ymm4,275495 vpxor ymm5,ymm5,ymm65496 vmovd xmm13,DWORD[((-32))+r12]5497 5498 vpslld ymm6,ymm0,305499 vpor ymm7,ymm7,ymm85500 vmovd xmm8,DWORD[((-32))+r8]5501 vpaddd ymm3,ymm3,ymm55502 5503 vpsrld ymm0,ymm0,25504 vpaddd ymm3,ymm3,ymm75505 vpshufb ymm12,ymm12,ymm95506 vpor ymm0,ymm0,ymm65507 vmovd xmm7,DWORD[((-32))+r13]5508 vmovd xmm6,DWORD[((-32))+r9]5509 vpinsrd xmm13,xmm13,DWORD[((-32))+r14],15510 vpinsrd xmm8,xmm8,DWORD[((-32))+r10],15511 vpinsrd xmm7,xmm7,DWORD[((-32))+r15],15512 vpunpckldq ymm13,ymm13,ymm75513 vpinsrd xmm6,xmm6,DWORD[((-32))+r11],15514 vpunpckldq ymm8,ymm8,ymm65515 vpaddd ymm2,ymm2,ymm155516 vpslld ymm7,ymm3,55517 vpandn ymm6,ymm4,ymm15518 vpand ymm5,ymm4,ymm05519 5520 vmovdqa YMMWORD[(224-128)+rax],ymm125521 vpaddd ymm2,ymm2,ymm125522 vinserti128 ymm13,ymm13,xmm8,15523 vpsrld ymm8,ymm3,275524 vpxor ymm5,ymm5,ymm65525 vmovd xmm14,DWORD[((-28))+r12]5526 5527 vpslld ymm6,ymm4,305528 vpor ymm7,ymm7,ymm85529 vmovd xmm8,DWORD[((-28))+r8]5530 vpaddd ymm2,ymm2,ymm55531 5532 vpsrld ymm4,ymm4,25533 vpaddd ymm2,ymm2,ymm75534 vpshufb ymm13,ymm13,ymm95535 vpor ymm4,ymm4,ymm65536 vmovd xmm7,DWORD[((-28))+r13]5537 vmovd xmm6,DWORD[((-28))+r9]5538 vpinsrd xmm14,xmm14,DWORD[((-28))+r14],15539 vpinsrd xmm8,xmm8,DWORD[((-28))+r10],15540 vpinsrd xmm7,xmm7,DWORD[((-28))+r15],15541 vpunpckldq ymm14,ymm14,ymm75542 vpinsrd xmm6,xmm6,DWORD[((-28))+r11],15543 vpunpckldq ymm8,ymm8,ymm65544 vpaddd ymm1,ymm1,ymm155545 vpslld ymm7,ymm2,55546 vpandn ymm6,ymm3,ymm05547 vpand ymm5,ymm3,ymm45548 5549 vmovdqa YMMWORD[(256-256-128)+rbx],ymm135550 vpaddd ymm1,ymm1,ymm135551 vinserti128 ymm14,ymm14,xmm8,15552 vpsrld ymm8,ymm2,275553 vpxor ymm5,ymm5,ymm65554 vmovd xmm10,DWORD[((-24))+r12]5555 5556 vpslld ymm6,ymm3,305557 vpor ymm7,ymm7,ymm85558 vmovd xmm8,DWORD[((-24))+r8]5559 vpaddd ymm1,ymm1,ymm55560 5561 vpsrld ymm3,ymm3,25562 vpaddd ymm1,ymm1,ymm75563 vpshufb ymm14,ymm14,ymm95564 vpor ymm3,ymm3,ymm65565 vmovd xmm7,DWORD[((-24))+r13]5566 vmovd xmm6,DWORD[((-24))+r9]5567 vpinsrd xmm10,xmm10,DWORD[((-24))+r14],15568 vpinsrd xmm8,xmm8,DWORD[((-24))+r10],15569 vpinsrd xmm7,xmm7,DWORD[((-24))+r15],15570 vpunpckldq ymm10,ymm10,ymm75571 vpinsrd xmm6,xmm6,DWORD[((-24))+r11],15572 vpunpckldq ymm8,ymm8,ymm65573 vpaddd ymm0,ymm0,ymm155574 vpslld ymm7,ymm1,55575 vpandn ymm6,ymm2,ymm45576 vpand ymm5,ymm2,ymm35577 5578 vmovdqa YMMWORD[(288-256-128)+rbx],ymm145579 vpaddd ymm0,ymm0,ymm145580 vinserti128 ymm10,ymm10,xmm8,15581 vpsrld ymm8,ymm1,275582 vpxor ymm5,ymm5,ymm65583 vmovd xmm11,DWORD[((-20))+r12]5584 5585 vpslld ymm6,ymm2,305586 vpor ymm7,ymm7,ymm85587 vmovd xmm8,DWORD[((-20))+r8]5588 vpaddd ymm0,ymm0,ymm55589 5590 vpsrld ymm2,ymm2,25591 vpaddd ymm0,ymm0,ymm75592 vpshufb ymm10,ymm10,ymm95593 vpor ymm2,ymm2,ymm65594 vmovd xmm7,DWORD[((-20))+r13]5595 vmovd xmm6,DWORD[((-20))+r9]5596 vpinsrd xmm11,xmm11,DWORD[((-20))+r14],15597 vpinsrd xmm8,xmm8,DWORD[((-20))+r10],15598 vpinsrd xmm7,xmm7,DWORD[((-20))+r15],15599 vpunpckldq ymm11,ymm11,ymm75600 vpinsrd xmm6,xmm6,DWORD[((-20))+r11],15601 vpunpckldq ymm8,ymm8,ymm65602 vpaddd ymm4,ymm4,ymm155603 vpslld ymm7,ymm0,55604 vpandn ymm6,ymm1,ymm35605 vpand ymm5,ymm1,ymm25606 5607 vmovdqa YMMWORD[(320-256-128)+rbx],ymm105608 vpaddd ymm4,ymm4,ymm105609 vinserti128 ymm11,ymm11,xmm8,15610 vpsrld ymm8,ymm0,275611 vpxor ymm5,ymm5,ymm65612 vmovd xmm12,DWORD[((-16))+r12]5613 5614 vpslld ymm6,ymm1,305615 vpor ymm7,ymm7,ymm85616 vmovd xmm8,DWORD[((-16))+r8]5617 vpaddd ymm4,ymm4,ymm55618 5619 vpsrld ymm1,ymm1,25620 vpaddd ymm4,ymm4,ymm75621 vpshufb ymm11,ymm11,ymm95622 vpor ymm1,ymm1,ymm65623 vmovd xmm7,DWORD[((-16))+r13]5624 vmovd xmm6,DWORD[((-16))+r9]5625 vpinsrd xmm12,xmm12,DWORD[((-16))+r14],15626 vpinsrd xmm8,xmm8,DWORD[((-16))+r10],15627 vpinsrd xmm7,xmm7,DWORD[((-16))+r15],15628 vpunpckldq ymm12,ymm12,ymm75629 vpinsrd xmm6,xmm6,DWORD[((-16))+r11],15630 vpunpckldq ymm8,ymm8,ymm65631 vpaddd ymm3,ymm3,ymm155632 vpslld ymm7,ymm4,55633 vpandn ymm6,ymm0,ymm25634 vpand ymm5,ymm0,ymm15635 5636 vmovdqa YMMWORD[(352-256-128)+rbx],ymm115637 vpaddd ymm3,ymm3,ymm115638 vinserti128 ymm12,ymm12,xmm8,15639 vpsrld ymm8,ymm4,275640 vpxor ymm5,ymm5,ymm65641 vmovd xmm13,DWORD[((-12))+r12]5642 5643 vpslld ymm6,ymm0,305644 vpor ymm7,ymm7,ymm85645 vmovd xmm8,DWORD[((-12))+r8]5646 vpaddd ymm3,ymm3,ymm55647 5648 vpsrld ymm0,ymm0,25649 vpaddd ymm3,ymm3,ymm75650 vpshufb ymm12,ymm12,ymm95651 vpor ymm0,ymm0,ymm65652 vmovd xmm7,DWORD[((-12))+r13]5653 vmovd xmm6,DWORD[((-12))+r9]5654 vpinsrd xmm13,xmm13,DWORD[((-12))+r14],15655 vpinsrd xmm8,xmm8,DWORD[((-12))+r10],15656 vpinsrd xmm7,xmm7,DWORD[((-12))+r15],15657 vpunpckldq ymm13,ymm13,ymm75658 vpinsrd xmm6,xmm6,DWORD[((-12))+r11],15659 vpunpckldq ymm8,ymm8,ymm65660 vpaddd ymm2,ymm2,ymm155661 vpslld ymm7,ymm3,55662 vpandn ymm6,ymm4,ymm15663 vpand ymm5,ymm4,ymm05664 5665 vmovdqa YMMWORD[(384-256-128)+rbx],ymm125666 vpaddd ymm2,ymm2,ymm125667 vinserti128 ymm13,ymm13,xmm8,15668 vpsrld ymm8,ymm3,275669 vpxor ymm5,ymm5,ymm65670 vmovd xmm14,DWORD[((-8))+r12]5671 5672 vpslld ymm6,ymm4,305673 vpor ymm7,ymm7,ymm85674 vmovd xmm8,DWORD[((-8))+r8]5675 vpaddd ymm2,ymm2,ymm55676 5677 vpsrld ymm4,ymm4,25678 vpaddd ymm2,ymm2,ymm75679 vpshufb ymm13,ymm13,ymm95680 vpor ymm4,ymm4,ymm65681 vmovd xmm7,DWORD[((-8))+r13]5682 vmovd xmm6,DWORD[((-8))+r9]5683 vpinsrd xmm14,xmm14,DWORD[((-8))+r14],15684 vpinsrd xmm8,xmm8,DWORD[((-8))+r10],15685 vpinsrd xmm7,xmm7,DWORD[((-8))+r15],15686 vpunpckldq ymm14,ymm14,ymm75687 vpinsrd xmm6,xmm6,DWORD[((-8))+r11],15688 vpunpckldq ymm8,ymm8,ymm65689 vpaddd ymm1,ymm1,ymm155690 vpslld ymm7,ymm2,55691 vpandn ymm6,ymm3,ymm05692 vpand ymm5,ymm3,ymm45693 5694 vmovdqa YMMWORD[(416-256-128)+rbx],ymm135695 vpaddd ymm1,ymm1,ymm135696 vinserti128 ymm14,ymm14,xmm8,15697 vpsrld ymm8,ymm2,275698 vpxor ymm5,ymm5,ymm65699 vmovd xmm10,DWORD[((-4))+r12]5700 5701 vpslld ymm6,ymm3,305702 vpor ymm7,ymm7,ymm85703 vmovd xmm8,DWORD[((-4))+r8]5704 vpaddd ymm1,ymm1,ymm55705 5706 vpsrld ymm3,ymm3,25707 vpaddd ymm1,ymm1,ymm75708 vpshufb ymm14,ymm14,ymm95709 vpor ymm3,ymm3,ymm65710 vmovdqa ymm11,YMMWORD[((0-128))+rax]5711 vmovd xmm7,DWORD[((-4))+r13]5712 vmovd xmm6,DWORD[((-4))+r9]5713 vpinsrd xmm10,xmm10,DWORD[((-4))+r14],15714 vpinsrd xmm8,xmm8,DWORD[((-4))+r10],15715 vpinsrd xmm7,xmm7,DWORD[((-4))+r15],15716 vpunpckldq ymm10,ymm10,ymm75717 vpinsrd xmm6,xmm6,DWORD[((-4))+r11],15718 vpunpckldq ymm8,ymm8,ymm65719 vpaddd ymm0,ymm0,ymm155720 prefetcht0 [63+r12]5721 vpslld ymm7,ymm1,55722 vpandn ymm6,ymm2,ymm45723 vpand ymm5,ymm2,ymm35724 5725 vmovdqa YMMWORD[(448-256-128)+rbx],ymm145726 vpaddd ymm0,ymm0,ymm145727 vinserti128 ymm10,ymm10,xmm8,15728 vpsrld ymm8,ymm1,275729 prefetcht0 [63+r13]5730 vpxor ymm5,ymm5,ymm65731 5732 vpslld ymm6,ymm2,305733 vpor ymm7,ymm7,ymm85734 prefetcht0 [63+r14]5735 vpaddd ymm0,ymm0,ymm55736 5737 vpsrld ymm2,ymm2,25738 vpaddd ymm0,ymm0,ymm75739 prefetcht0 [63+r15]5740 vpshufb ymm10,ymm10,ymm95741 vpor ymm2,ymm2,ymm65742 vmovdqa ymm12,YMMWORD[((32-128))+rax]5743 vpxor ymm11,ymm11,ymm135744 vmovdqa ymm13,YMMWORD[((64-128))+rax]5745 5746 vpaddd ymm4,ymm4,ymm155747 vpslld ymm7,ymm0,55748 vpandn ymm6,ymm1,ymm35749 prefetcht0 [63+r8]5750 vpand ymm5,ymm1,ymm25751 5752 vmovdqa YMMWORD[(480-256-128)+rbx],ymm105753 vpaddd ymm4,ymm4,ymm105754 vpxor ymm11,ymm11,YMMWORD[((256-256-128))+rbx]5755 vpsrld ymm8,ymm0,275756 vpxor ymm5,ymm5,ymm65757 vpxor ymm11,ymm11,ymm135758 prefetcht0 [63+r9]5759 5760 vpslld ymm6,ymm1,305761 vpor ymm7,ymm7,ymm85762 vpaddd ymm4,ymm4,ymm55763 prefetcht0 [63+r10]5764 vpsrld ymm9,ymm11,315765 vpaddd ymm11,ymm11,ymm115766 5767 vpsrld ymm1,ymm1,25768 prefetcht0 [63+r11]5769 vpaddd ymm4,ymm4,ymm75770 vpor ymm11,ymm11,ymm95771 vpor ymm1,ymm1,ymm65772 vpxor ymm12,ymm12,ymm145773 vmovdqa ymm14,YMMWORD[((96-128))+rax]5774 5775 vpaddd ymm3,ymm3,ymm155776 vpslld ymm7,ymm4,55777 vpandn ymm6,ymm0,ymm25778 5779 vpand ymm5,ymm0,ymm15780 5781 vmovdqa YMMWORD[(0-128)+rax],ymm115782 vpaddd ymm3,ymm3,ymm115783 vpxor ymm12,ymm12,YMMWORD[((288-256-128))+rbx]5784 vpsrld ymm8,ymm4,275785 vpxor ymm5,ymm5,ymm65786 vpxor ymm12,ymm12,ymm145787 5788 5789 vpslld ymm6,ymm0,305790 vpor ymm7,ymm7,ymm85791 vpaddd ymm3,ymm3,ymm55792 5793 vpsrld ymm9,ymm12,315794 vpaddd ymm12,ymm12,ymm125795 5796 vpsrld ymm0,ymm0,25797 5798 vpaddd ymm3,ymm3,ymm75799 vpor ymm12,ymm12,ymm95800 vpor ymm0,ymm0,ymm65801 vpxor ymm13,ymm13,ymm105802 vmovdqa ymm10,YMMWORD[((128-128))+rax]5803 5804 vpaddd ymm2,ymm2,ymm155805 vpslld ymm7,ymm3,55806 vpandn ymm6,ymm4,ymm15807 5808 vpand ymm5,ymm4,ymm05809 5810 vmovdqa YMMWORD[(32-128)+rax],ymm125811 vpaddd ymm2,ymm2,ymm125812 vpxor ymm13,ymm13,YMMWORD[((320-256-128))+rbx]5813 vpsrld ymm8,ymm3,275814 vpxor ymm5,ymm5,ymm65815 vpxor ymm13,ymm13,ymm105816 5817 5818 vpslld ymm6,ymm4,305819 vpor ymm7,ymm7,ymm85820 vpaddd ymm2,ymm2,ymm55821 5822 vpsrld ymm9,ymm13,315823 vpaddd ymm13,ymm13,ymm135824 5825 vpsrld ymm4,ymm4,25826 5827 vpaddd ymm2,ymm2,ymm75828 vpor ymm13,ymm13,ymm95829 vpor ymm4,ymm4,ymm65830 vpxor ymm14,ymm14,ymm115831 vmovdqa ymm11,YMMWORD[((160-128))+rax]5832 5833 vpaddd ymm1,ymm1,ymm155834 vpslld ymm7,ymm2,55835 vpandn ymm6,ymm3,ymm05836 5837 vpand ymm5,ymm3,ymm45838 5839 vmovdqa YMMWORD[(64-128)+rax],ymm135840 vpaddd ymm1,ymm1,ymm135841 vpxor ymm14,ymm14,YMMWORD[((352-256-128))+rbx]5842 vpsrld ymm8,ymm2,275843 vpxor ymm5,ymm5,ymm65844 vpxor ymm14,ymm14,ymm115845 5846 5847 vpslld ymm6,ymm3,305848 vpor ymm7,ymm7,ymm85849 vpaddd ymm1,ymm1,ymm55850 5851 vpsrld ymm9,ymm14,315852 vpaddd ymm14,ymm14,ymm145853 5854 vpsrld ymm3,ymm3,25855 5856 vpaddd ymm1,ymm1,ymm75857 vpor ymm14,ymm14,ymm95858 vpor ymm3,ymm3,ymm65859 vpxor ymm10,ymm10,ymm125860 vmovdqa ymm12,YMMWORD[((192-128))+rax]5861 5862 vpaddd ymm0,ymm0,ymm155863 vpslld ymm7,ymm1,55864 vpandn ymm6,ymm2,ymm45865 5866 vpand ymm5,ymm2,ymm35867 5868 vmovdqa YMMWORD[(96-128)+rax],ymm145869 vpaddd ymm0,ymm0,ymm145870 vpxor ymm10,ymm10,YMMWORD[((384-256-128))+rbx]5871 vpsrld ymm8,ymm1,275872 vpxor ymm5,ymm5,ymm65873 vpxor ymm10,ymm10,ymm125874 5875 5876 vpslld ymm6,ymm2,305877 vpor ymm7,ymm7,ymm85878 vpaddd ymm0,ymm0,ymm55879 5880 vpsrld ymm9,ymm10,315881 vpaddd ymm10,ymm10,ymm105882 5883 vpsrld ymm2,ymm2,25884 5885 vpaddd ymm0,ymm0,ymm75886 vpor ymm10,ymm10,ymm95887 vpor ymm2,ymm2,ymm65888 vmovdqa ymm15,YMMWORD[rbp]5889 vpxor ymm11,ymm11,ymm135890 vmovdqa ymm13,YMMWORD[((224-128))+rax]5891 5892 vpslld ymm7,ymm0,55893 vpaddd ymm4,ymm4,ymm155894 vpxor ymm5,ymm3,ymm15895 vmovdqa YMMWORD[(128-128)+rax],ymm105896 vpaddd ymm4,ymm4,ymm105897 vpxor ymm11,ymm11,YMMWORD[((416-256-128))+rbx]5898 vpsrld ymm8,ymm0,275899 vpxor ymm5,ymm5,ymm25900 vpxor ymm11,ymm11,ymm135901 5902 vpslld ymm6,ymm1,305903 vpor ymm7,ymm7,ymm85904 vpaddd ymm4,ymm4,ymm55905 vpsrld ymm9,ymm11,315906 vpaddd ymm11,ymm11,ymm115907 5908 vpsrld ymm1,ymm1,25909 vpaddd ymm4,ymm4,ymm75910 vpor ymm11,ymm11,ymm95911 vpor ymm1,ymm1,ymm65912 vpxor ymm12,ymm12,ymm145913 vmovdqa ymm14,YMMWORD[((256-256-128))+rbx]5914 5915 vpslld ymm7,ymm4,55916 vpaddd ymm3,ymm3,ymm155917 vpxor ymm5,ymm2,ymm05918 vmovdqa YMMWORD[(160-128)+rax],ymm115919 vpaddd ymm3,ymm3,ymm115920 vpxor ymm12,ymm12,YMMWORD[((448-256-128))+rbx]5921 vpsrld ymm8,ymm4,275922 vpxor ymm5,ymm5,ymm15923 vpxor ymm12,ymm12,ymm145924 5925 vpslld ymm6,ymm0,305926 vpor ymm7,ymm7,ymm85927 vpaddd ymm3,ymm3,ymm55928 vpsrld ymm9,ymm12,315929 vpaddd ymm12,ymm12,ymm125930 5931 vpsrld ymm0,ymm0,25932 vpaddd ymm3,ymm3,ymm75933 vpor ymm12,ymm12,ymm95934 vpor ymm0,ymm0,ymm65935 vpxor ymm13,ymm13,ymm105936 vmovdqa ymm10,YMMWORD[((288-256-128))+rbx]5937 5938 vpslld ymm7,ymm3,55939 vpaddd ymm2,ymm2,ymm155940 vpxor ymm5,ymm1,ymm45941 vmovdqa YMMWORD[(192-128)+rax],ymm125942 vpaddd ymm2,ymm2,ymm125943 vpxor ymm13,ymm13,YMMWORD[((480-256-128))+rbx]5944 vpsrld ymm8,ymm3,275945 vpxor ymm5,ymm5,ymm05946 vpxor ymm13,ymm13,ymm105947 5948 vpslld ymm6,ymm4,305949 vpor ymm7,ymm7,ymm85950 vpaddd ymm2,ymm2,ymm55951 vpsrld ymm9,ymm13,315952 vpaddd ymm13,ymm13,ymm135953 5954 vpsrld ymm4,ymm4,25955 vpaddd ymm2,ymm2,ymm75956 vpor ymm13,ymm13,ymm95957 vpor ymm4,ymm4,ymm65958 vpxor ymm14,ymm14,ymm115959 vmovdqa ymm11,YMMWORD[((320-256-128))+rbx]5960 5961 vpslld ymm7,ymm2,55962 vpaddd ymm1,ymm1,ymm155963 vpxor ymm5,ymm0,ymm35964 vmovdqa YMMWORD[(224-128)+rax],ymm135965 vpaddd ymm1,ymm1,ymm135966 vpxor ymm14,ymm14,YMMWORD[((0-128))+rax]5967 vpsrld ymm8,ymm2,275968 vpxor ymm5,ymm5,ymm45969 vpxor ymm14,ymm14,ymm115970 5971 vpslld ymm6,ymm3,305972 vpor ymm7,ymm7,ymm85973 vpaddd ymm1,ymm1,ymm55974 vpsrld ymm9,ymm14,315975 vpaddd ymm14,ymm14,ymm145976 5977 vpsrld ymm3,ymm3,25978 vpaddd ymm1,ymm1,ymm75979 vpor ymm14,ymm14,ymm95980 vpor ymm3,ymm3,ymm65981 vpxor ymm10,ymm10,ymm125982 vmovdqa ymm12,YMMWORD[((352-256-128))+rbx]5983 5984 vpslld ymm7,ymm1,55985 vpaddd ymm0,ymm0,ymm155986 vpxor ymm5,ymm4,ymm25987 vmovdqa YMMWORD[(256-256-128)+rbx],ymm145988 vpaddd ymm0,ymm0,ymm145989 vpxor ymm10,ymm10,YMMWORD[((32-128))+rax]5990 vpsrld ymm8,ymm1,275991 vpxor ymm5,ymm5,ymm35992 vpxor ymm10,ymm10,ymm125993 5994 vpslld ymm6,ymm2,305995 vpor ymm7,ymm7,ymm85996 vpaddd ymm0,ymm0,ymm55997 vpsrld ymm9,ymm10,315998 vpaddd ymm10,ymm10,ymm105999 6000 vpsrld ymm2,ymm2,26001 vpaddd ymm0,ymm0,ymm76002 vpor ymm10,ymm10,ymm96003 vpor ymm2,ymm2,ymm66004 vpxor ymm11,ymm11,ymm136005 vmovdqa ymm13,YMMWORD[((384-256-128))+rbx]6006 6007 vpslld ymm7,ymm0,56008 vpaddd ymm4,ymm4,ymm156009 vpxor ymm5,ymm3,ymm16010 vmovdqa YMMWORD[(288-256-128)+rbx],ymm106011 vpaddd ymm4,ymm4,ymm106012 vpxor ymm11,ymm11,YMMWORD[((64-128))+rax]6013 vpsrld ymm8,ymm0,276014 vpxor ymm5,ymm5,ymm26015 vpxor ymm11,ymm11,ymm136016 6017 vpslld ymm6,ymm1,306018 vpor ymm7,ymm7,ymm86019 vpaddd ymm4,ymm4,ymm56020 vpsrld ymm9,ymm11,316021 vpaddd ymm11,ymm11,ymm116022 6023 vpsrld ymm1,ymm1,26024 vpaddd ymm4,ymm4,ymm76025 vpor ymm11,ymm11,ymm96026 vpor ymm1,ymm1,ymm66027 vpxor ymm12,ymm12,ymm146028 vmovdqa ymm14,YMMWORD[((416-256-128))+rbx]6029 6030 vpslld ymm7,ymm4,56031 vpaddd ymm3,ymm3,ymm156032 vpxor ymm5,ymm2,ymm06033 vmovdqa YMMWORD[(320-256-128)+rbx],ymm116034 vpaddd ymm3,ymm3,ymm116035 vpxor ymm12,ymm12,YMMWORD[((96-128))+rax]6036 vpsrld ymm8,ymm4,276037 vpxor ymm5,ymm5,ymm16038 vpxor ymm12,ymm12,ymm146039 6040 vpslld ymm6,ymm0,306041 vpor ymm7,ymm7,ymm86042 vpaddd ymm3,ymm3,ymm56043 vpsrld ymm9,ymm12,316044 vpaddd ymm12,ymm12,ymm126045 6046 vpsrld ymm0,ymm0,26047 vpaddd ymm3,ymm3,ymm76048 vpor ymm12,ymm12,ymm96049 vpor ymm0,ymm0,ymm66050 vpxor ymm13,ymm13,ymm106051 vmovdqa ymm10,YMMWORD[((448-256-128))+rbx]6052 6053 vpslld ymm7,ymm3,56054 vpaddd ymm2,ymm2,ymm156055 vpxor ymm5,ymm1,ymm46056 vmovdqa YMMWORD[(352-256-128)+rbx],ymm126057 vpaddd ymm2,ymm2,ymm126058 vpxor ymm13,ymm13,YMMWORD[((128-128))+rax]6059 vpsrld ymm8,ymm3,276060 vpxor ymm5,ymm5,ymm06061 vpxor ymm13,ymm13,ymm106062 6063 vpslld ymm6,ymm4,306064 vpor ymm7,ymm7,ymm86065 vpaddd ymm2,ymm2,ymm56066 vpsrld ymm9,ymm13,316067 vpaddd ymm13,ymm13,ymm136068 6069 vpsrld ymm4,ymm4,26070 vpaddd ymm2,ymm2,ymm76071 vpor ymm13,ymm13,ymm96072 vpor ymm4,ymm4,ymm66073 vpxor ymm14,ymm14,ymm116074 vmovdqa ymm11,YMMWORD[((480-256-128))+rbx]6075 6076 vpslld ymm7,ymm2,56077 vpaddd ymm1,ymm1,ymm156078 vpxor ymm5,ymm0,ymm36079 vmovdqa YMMWORD[(384-256-128)+rbx],ymm136080 vpaddd ymm1,ymm1,ymm136081 vpxor ymm14,ymm14,YMMWORD[((160-128))+rax]6082 vpsrld ymm8,ymm2,276083 vpxor ymm5,ymm5,ymm46084 vpxor ymm14,ymm14,ymm116085 6086 vpslld ymm6,ymm3,306087 vpor ymm7,ymm7,ymm86088 vpaddd ymm1,ymm1,ymm56089 vpsrld ymm9,ymm14,316090 vpaddd ymm14,ymm14,ymm146091 6092 vpsrld ymm3,ymm3,26093 vpaddd ymm1,ymm1,ymm76094 vpor ymm14,ymm14,ymm96095 vpor ymm3,ymm3,ymm66096 vpxor ymm10,ymm10,ymm126097 vmovdqa ymm12,YMMWORD[((0-128))+rax]6098 6099 vpslld ymm7,ymm1,56100 vpaddd ymm0,ymm0,ymm156101 vpxor ymm5,ymm4,ymm26102 vmovdqa YMMWORD[(416-256-128)+rbx],ymm146103 vpaddd ymm0,ymm0,ymm146104 vpxor ymm10,ymm10,YMMWORD[((192-128))+rax]6105 vpsrld ymm8,ymm1,276106 vpxor ymm5,ymm5,ymm36107 vpxor ymm10,ymm10,ymm126108 6109 vpslld ymm6,ymm2,306110 vpor ymm7,ymm7,ymm86111 vpaddd ymm0,ymm0,ymm56112 vpsrld ymm9,ymm10,316113 vpaddd ymm10,ymm10,ymm106114 6115 vpsrld ymm2,ymm2,26116 vpaddd ymm0,ymm0,ymm76117 vpor ymm10,ymm10,ymm96118 vpor ymm2,ymm2,ymm66119 vpxor ymm11,ymm11,ymm136120 vmovdqa ymm13,YMMWORD[((32-128))+rax]6121 6122 vpslld ymm7,ymm0,56123 vpaddd ymm4,ymm4,ymm156124 vpxor ymm5,ymm3,ymm16125 vmovdqa YMMWORD[(448-256-128)+rbx],ymm106126 vpaddd ymm4,ymm4,ymm106127 vpxor ymm11,ymm11,YMMWORD[((224-128))+rax]6128 vpsrld ymm8,ymm0,276129 vpxor ymm5,ymm5,ymm26130 vpxor ymm11,ymm11,ymm136131 6132 vpslld ymm6,ymm1,306133 vpor ymm7,ymm7,ymm86134 vpaddd ymm4,ymm4,ymm56135 vpsrld ymm9,ymm11,316136 vpaddd ymm11,ymm11,ymm116137 6138 vpsrld ymm1,ymm1,26139 vpaddd ymm4,ymm4,ymm76140 vpor ymm11,ymm11,ymm96141 vpor ymm1,ymm1,ymm66142 vpxor ymm12,ymm12,ymm146143 vmovdqa ymm14,YMMWORD[((64-128))+rax]6144 6145 vpslld ymm7,ymm4,56146 vpaddd ymm3,ymm3,ymm156147 vpxor ymm5,ymm2,ymm06148 vmovdqa YMMWORD[(480-256-128)+rbx],ymm116149 vpaddd ymm3,ymm3,ymm116150 vpxor ymm12,ymm12,YMMWORD[((256-256-128))+rbx]6151 vpsrld ymm8,ymm4,276152 vpxor ymm5,ymm5,ymm16153 vpxor ymm12,ymm12,ymm146154 6155 vpslld ymm6,ymm0,306156 vpor ymm7,ymm7,ymm86157 vpaddd ymm3,ymm3,ymm56158 vpsrld ymm9,ymm12,316159 vpaddd ymm12,ymm12,ymm126160 6161 vpsrld ymm0,ymm0,26162 vpaddd ymm3,ymm3,ymm76163 vpor ymm12,ymm12,ymm96164 vpor ymm0,ymm0,ymm66165 vpxor ymm13,ymm13,ymm106166 vmovdqa ymm10,YMMWORD[((96-128))+rax]6167 6168 vpslld ymm7,ymm3,56169 vpaddd ymm2,ymm2,ymm156170 vpxor ymm5,ymm1,ymm46171 vmovdqa YMMWORD[(0-128)+rax],ymm126172 vpaddd ymm2,ymm2,ymm126173 vpxor ymm13,ymm13,YMMWORD[((288-256-128))+rbx]6174 vpsrld ymm8,ymm3,276175 vpxor ymm5,ymm5,ymm06176 vpxor ymm13,ymm13,ymm106177 6178 vpslld ymm6,ymm4,306179 vpor ymm7,ymm7,ymm86180 vpaddd ymm2,ymm2,ymm56181 vpsrld ymm9,ymm13,316182 vpaddd ymm13,ymm13,ymm136183 6184 vpsrld ymm4,ymm4,26185 vpaddd ymm2,ymm2,ymm76186 vpor ymm13,ymm13,ymm96187 vpor ymm4,ymm4,ymm66188 vpxor ymm14,ymm14,ymm116189 vmovdqa ymm11,YMMWORD[((128-128))+rax]6190 6191 vpslld ymm7,ymm2,56192 vpaddd ymm1,ymm1,ymm156193 vpxor ymm5,ymm0,ymm36194 vmovdqa YMMWORD[(32-128)+rax],ymm136195 vpaddd ymm1,ymm1,ymm136196 vpxor ymm14,ymm14,YMMWORD[((320-256-128))+rbx]6197 vpsrld ymm8,ymm2,276198 vpxor ymm5,ymm5,ymm46199 vpxor ymm14,ymm14,ymm116200 6201 vpslld ymm6,ymm3,306202 vpor ymm7,ymm7,ymm86203 vpaddd ymm1,ymm1,ymm56204 vpsrld ymm9,ymm14,316205 vpaddd ymm14,ymm14,ymm146206 6207 vpsrld ymm3,ymm3,26208 vpaddd ymm1,ymm1,ymm76209 vpor ymm14,ymm14,ymm96210 vpor ymm3,ymm3,ymm66211 vpxor ymm10,ymm10,ymm126212 vmovdqa ymm12,YMMWORD[((160-128))+rax]6213 6214 vpslld ymm7,ymm1,56215 vpaddd ymm0,ymm0,ymm156216 vpxor ymm5,ymm4,ymm26217 vmovdqa YMMWORD[(64-128)+rax],ymm146218 vpaddd ymm0,ymm0,ymm146219 vpxor ymm10,ymm10,YMMWORD[((352-256-128))+rbx]6220 vpsrld ymm8,ymm1,276221 vpxor ymm5,ymm5,ymm36222 vpxor ymm10,ymm10,ymm126223 6224 vpslld ymm6,ymm2,306225 vpor ymm7,ymm7,ymm86226 vpaddd ymm0,ymm0,ymm56227 vpsrld ymm9,ymm10,316228 vpaddd ymm10,ymm10,ymm106229 6230 vpsrld ymm2,ymm2,26231 vpaddd ymm0,ymm0,ymm76232 vpor ymm10,ymm10,ymm96233 vpor ymm2,ymm2,ymm66234 vpxor ymm11,ymm11,ymm136235 vmovdqa ymm13,YMMWORD[((192-128))+rax]6236 6237 vpslld ymm7,ymm0,56238 vpaddd ymm4,ymm4,ymm156239 vpxor ymm5,ymm3,ymm16240 vmovdqa YMMWORD[(96-128)+rax],ymm106241 vpaddd ymm4,ymm4,ymm106242 vpxor ymm11,ymm11,YMMWORD[((384-256-128))+rbx]6243 vpsrld ymm8,ymm0,276244 vpxor ymm5,ymm5,ymm26245 vpxor ymm11,ymm11,ymm136246 6247 vpslld ymm6,ymm1,306248 vpor ymm7,ymm7,ymm86249 vpaddd ymm4,ymm4,ymm56250 vpsrld ymm9,ymm11,316251 vpaddd ymm11,ymm11,ymm116252 6253 vpsrld ymm1,ymm1,26254 vpaddd ymm4,ymm4,ymm76255 vpor ymm11,ymm11,ymm96256 vpor ymm1,ymm1,ymm66257 vpxor ymm12,ymm12,ymm146258 vmovdqa ymm14,YMMWORD[((224-128))+rax]6259 6260 vpslld ymm7,ymm4,56261 vpaddd ymm3,ymm3,ymm156262 vpxor ymm5,ymm2,ymm06263 vmovdqa YMMWORD[(128-128)+rax],ymm116264 vpaddd ymm3,ymm3,ymm116265 vpxor ymm12,ymm12,YMMWORD[((416-256-128))+rbx]6266 vpsrld ymm8,ymm4,276267 vpxor ymm5,ymm5,ymm16268 vpxor ymm12,ymm12,ymm146269 6270 vpslld ymm6,ymm0,306271 vpor ymm7,ymm7,ymm86272 vpaddd ymm3,ymm3,ymm56273 vpsrld ymm9,ymm12,316274 vpaddd ymm12,ymm12,ymm126275 6276 vpsrld ymm0,ymm0,26277 vpaddd ymm3,ymm3,ymm76278 vpor ymm12,ymm12,ymm96279 vpor ymm0,ymm0,ymm66280 vpxor ymm13,ymm13,ymm106281 vmovdqa ymm10,YMMWORD[((256-256-128))+rbx]6282 6283 vpslld ymm7,ymm3,56284 vpaddd ymm2,ymm2,ymm156285 vpxor ymm5,ymm1,ymm46286 vmovdqa YMMWORD[(160-128)+rax],ymm126287 vpaddd ymm2,ymm2,ymm126288 vpxor ymm13,ymm13,YMMWORD[((448-256-128))+rbx]6289 vpsrld ymm8,ymm3,276290 vpxor ymm5,ymm5,ymm06291 vpxor ymm13,ymm13,ymm106292 6293 vpslld ymm6,ymm4,306294 vpor ymm7,ymm7,ymm86295 vpaddd ymm2,ymm2,ymm56296 vpsrld ymm9,ymm13,316297 vpaddd ymm13,ymm13,ymm136298 6299 vpsrld ymm4,ymm4,26300 vpaddd ymm2,ymm2,ymm76301 vpor ymm13,ymm13,ymm96302 vpor ymm4,ymm4,ymm66303 vpxor ymm14,ymm14,ymm116304 vmovdqa ymm11,YMMWORD[((288-256-128))+rbx]6305 6306 vpslld ymm7,ymm2,56307 vpaddd ymm1,ymm1,ymm156308 vpxor ymm5,ymm0,ymm36309 vmovdqa YMMWORD[(192-128)+rax],ymm136310 vpaddd ymm1,ymm1,ymm136311 vpxor ymm14,ymm14,YMMWORD[((480-256-128))+rbx]6312 vpsrld ymm8,ymm2,276313 vpxor ymm5,ymm5,ymm46314 vpxor ymm14,ymm14,ymm116315 6316 vpslld ymm6,ymm3,306317 vpor ymm7,ymm7,ymm86318 vpaddd ymm1,ymm1,ymm56319 vpsrld ymm9,ymm14,316320 vpaddd ymm14,ymm14,ymm146321 6322 vpsrld ymm3,ymm3,26323 vpaddd ymm1,ymm1,ymm76324 vpor ymm14,ymm14,ymm96325 vpor ymm3,ymm3,ymm66326 vpxor ymm10,ymm10,ymm126327 vmovdqa ymm12,YMMWORD[((320-256-128))+rbx]6328 6329 vpslld ymm7,ymm1,56330 vpaddd ymm0,ymm0,ymm156331 vpxor ymm5,ymm4,ymm26332 vmovdqa YMMWORD[(224-128)+rax],ymm146333 vpaddd ymm0,ymm0,ymm146334 vpxor ymm10,ymm10,YMMWORD[((0-128))+rax]6335 vpsrld ymm8,ymm1,276336 vpxor ymm5,ymm5,ymm36337 vpxor ymm10,ymm10,ymm126338 6339 vpslld ymm6,ymm2,306340 vpor ymm7,ymm7,ymm86341 vpaddd ymm0,ymm0,ymm56342 vpsrld ymm9,ymm10,316343 vpaddd ymm10,ymm10,ymm106344 6345 vpsrld ymm2,ymm2,26346 vpaddd ymm0,ymm0,ymm76347 vpor ymm10,ymm10,ymm96348 vpor ymm2,ymm2,ymm66349 vmovdqa ymm15,YMMWORD[32+rbp]6350 vpxor ymm11,ymm11,ymm136351 vmovdqa ymm13,YMMWORD[((352-256-128))+rbx]6352 6353 vpaddd ymm4,ymm4,ymm156354 vpslld ymm7,ymm0,56355 vpand ymm6,ymm3,ymm26356 vpxor ymm11,ymm11,YMMWORD[((32-128))+rax]6357 6358 vpaddd ymm4,ymm4,ymm66359 vpsrld ymm8,ymm0,276360 vpxor ymm5,ymm3,ymm26361 vpxor ymm11,ymm11,ymm136362 6363 vmovdqu YMMWORD[(256-256-128)+rbx],ymm106364 vpaddd ymm4,ymm4,ymm106365 vpor ymm7,ymm7,ymm86366 vpsrld ymm9,ymm11,316367 vpand ymm5,ymm5,ymm16368 vpaddd ymm11,ymm11,ymm116369 6370 vpslld ymm6,ymm1,306371 vpaddd ymm4,ymm4,ymm56372 6373 vpsrld ymm1,ymm1,26374 vpaddd ymm4,ymm4,ymm76375 vpor ymm11,ymm11,ymm96376 vpor ymm1,ymm1,ymm66377 vpxor ymm12,ymm12,ymm146378 vmovdqa ymm14,YMMWORD[((384-256-128))+rbx]6379 6380 vpaddd ymm3,ymm3,ymm156381 vpslld ymm7,ymm4,56382 vpand ymm6,ymm2,ymm16383 vpxor ymm12,ymm12,YMMWORD[((64-128))+rax]6384 6385 vpaddd ymm3,ymm3,ymm66386 vpsrld ymm8,ymm4,276387 vpxor ymm5,ymm2,ymm16388 vpxor ymm12,ymm12,ymm146389 6390 vmovdqu YMMWORD[(288-256-128)+rbx],ymm116391 vpaddd ymm3,ymm3,ymm116392 vpor ymm7,ymm7,ymm86393 vpsrld ymm9,ymm12,316394 vpand ymm5,ymm5,ymm06395 vpaddd ymm12,ymm12,ymm126396 6397 vpslld ymm6,ymm0,306398 vpaddd ymm3,ymm3,ymm56399 6400 vpsrld ymm0,ymm0,26401 vpaddd ymm3,ymm3,ymm76402 vpor ymm12,ymm12,ymm96403 vpor ymm0,ymm0,ymm66404 vpxor ymm13,ymm13,ymm106405 vmovdqa ymm10,YMMWORD[((416-256-128))+rbx]6406 6407 vpaddd ymm2,ymm2,ymm156408 vpslld ymm7,ymm3,56409 vpand ymm6,ymm1,ymm06410 vpxor ymm13,ymm13,YMMWORD[((96-128))+rax]6411 6412 vpaddd ymm2,ymm2,ymm66413 vpsrld ymm8,ymm3,276414 vpxor ymm5,ymm1,ymm06415 vpxor ymm13,ymm13,ymm106416 6417 vmovdqu YMMWORD[(320-256-128)+rbx],ymm126418 vpaddd ymm2,ymm2,ymm126419 vpor ymm7,ymm7,ymm86420 vpsrld ymm9,ymm13,316421 vpand ymm5,ymm5,ymm46422 vpaddd ymm13,ymm13,ymm136423 6424 vpslld ymm6,ymm4,306425 vpaddd ymm2,ymm2,ymm56426 6427 vpsrld ymm4,ymm4,26428 vpaddd ymm2,ymm2,ymm76429 vpor ymm13,ymm13,ymm96430 vpor ymm4,ymm4,ymm66431 vpxor ymm14,ymm14,ymm116432 vmovdqa ymm11,YMMWORD[((448-256-128))+rbx]6433 6434 vpaddd ymm1,ymm1,ymm156435 vpslld ymm7,ymm2,56436 vpand ymm6,ymm0,ymm46437 vpxor ymm14,ymm14,YMMWORD[((128-128))+rax]6438 6439 vpaddd ymm1,ymm1,ymm66440 vpsrld ymm8,ymm2,276441 vpxor ymm5,ymm0,ymm46442 vpxor ymm14,ymm14,ymm116443 6444 vmovdqu YMMWORD[(352-256-128)+rbx],ymm136445 vpaddd ymm1,ymm1,ymm136446 vpor ymm7,ymm7,ymm86447 vpsrld ymm9,ymm14,316448 vpand ymm5,ymm5,ymm36449 vpaddd ymm14,ymm14,ymm146450 6451 vpslld ymm6,ymm3,306452 vpaddd ymm1,ymm1,ymm56453 6454 vpsrld ymm3,ymm3,26455 vpaddd ymm1,ymm1,ymm76456 vpor ymm14,ymm14,ymm96457 vpor ymm3,ymm3,ymm66458 vpxor ymm10,ymm10,ymm126459 vmovdqa ymm12,YMMWORD[((480-256-128))+rbx]6460 6461 vpaddd ymm0,ymm0,ymm156462 vpslld ymm7,ymm1,56463 vpand ymm6,ymm4,ymm36464 vpxor ymm10,ymm10,YMMWORD[((160-128))+rax]6465 6466 vpaddd ymm0,ymm0,ymm66467 vpsrld ymm8,ymm1,276468 vpxor ymm5,ymm4,ymm36469 vpxor ymm10,ymm10,ymm126470 6471 vmovdqu YMMWORD[(384-256-128)+rbx],ymm146472 vpaddd ymm0,ymm0,ymm146473 vpor ymm7,ymm7,ymm86474 vpsrld ymm9,ymm10,316475 vpand ymm5,ymm5,ymm26476 vpaddd ymm10,ymm10,ymm106477 6478 vpslld ymm6,ymm2,306479 vpaddd ymm0,ymm0,ymm56480 6481 vpsrld ymm2,ymm2,26482 vpaddd ymm0,ymm0,ymm76483 vpor ymm10,ymm10,ymm96484 vpor ymm2,ymm2,ymm66485 vpxor ymm11,ymm11,ymm136486 vmovdqa ymm13,YMMWORD[((0-128))+rax]6487 6488 vpaddd ymm4,ymm4,ymm156489 vpslld ymm7,ymm0,56490 vpand ymm6,ymm3,ymm26491 vpxor ymm11,ymm11,YMMWORD[((192-128))+rax]6492 6493 vpaddd ymm4,ymm4,ymm66494 vpsrld ymm8,ymm0,276495 vpxor ymm5,ymm3,ymm26496 vpxor ymm11,ymm11,ymm136497 6498 vmovdqu YMMWORD[(416-256-128)+rbx],ymm106499 vpaddd ymm4,ymm4,ymm106500 vpor ymm7,ymm7,ymm86501 vpsrld ymm9,ymm11,316502 vpand ymm5,ymm5,ymm16503 vpaddd ymm11,ymm11,ymm116504 6505 vpslld ymm6,ymm1,306506 vpaddd ymm4,ymm4,ymm56507 6508 vpsrld ymm1,ymm1,26509 vpaddd ymm4,ymm4,ymm76510 vpor ymm11,ymm11,ymm96511 vpor ymm1,ymm1,ymm66512 vpxor ymm12,ymm12,ymm146513 vmovdqa ymm14,YMMWORD[((32-128))+rax]6514 6515 vpaddd ymm3,ymm3,ymm156516 vpslld ymm7,ymm4,56517 vpand ymm6,ymm2,ymm16518 vpxor ymm12,ymm12,YMMWORD[((224-128))+rax]6519 6520 vpaddd ymm3,ymm3,ymm66521 vpsrld ymm8,ymm4,276522 vpxor ymm5,ymm2,ymm16523 vpxor ymm12,ymm12,ymm146524 6525 vmovdqu YMMWORD[(448-256-128)+rbx],ymm116526 vpaddd ymm3,ymm3,ymm116527 vpor ymm7,ymm7,ymm86528 vpsrld ymm9,ymm12,316529 vpand ymm5,ymm5,ymm06530 vpaddd ymm12,ymm12,ymm126531 6532 vpslld ymm6,ymm0,306533 vpaddd ymm3,ymm3,ymm56534 6535 vpsrld ymm0,ymm0,26536 vpaddd ymm3,ymm3,ymm76537 vpor ymm12,ymm12,ymm96538 vpor ymm0,ymm0,ymm66539 vpxor ymm13,ymm13,ymm106540 vmovdqa ymm10,YMMWORD[((64-128))+rax]6541 6542 vpaddd ymm2,ymm2,ymm156543 vpslld ymm7,ymm3,56544 vpand ymm6,ymm1,ymm06545 vpxor ymm13,ymm13,YMMWORD[((256-256-128))+rbx]6546 6547 vpaddd ymm2,ymm2,ymm66548 vpsrld ymm8,ymm3,276549 vpxor ymm5,ymm1,ymm06550 vpxor ymm13,ymm13,ymm106551 6552 vmovdqu YMMWORD[(480-256-128)+rbx],ymm126553 vpaddd ymm2,ymm2,ymm126554 vpor ymm7,ymm7,ymm86555 vpsrld ymm9,ymm13,316556 vpand ymm5,ymm5,ymm46557 vpaddd ymm13,ymm13,ymm136558 6559 vpslld ymm6,ymm4,306560 vpaddd ymm2,ymm2,ymm56561 6562 vpsrld ymm4,ymm4,26563 vpaddd ymm2,ymm2,ymm76564 vpor ymm13,ymm13,ymm96565 vpor ymm4,ymm4,ymm66566 vpxor ymm14,ymm14,ymm116567 vmovdqa ymm11,YMMWORD[((96-128))+rax]6568 6569 vpaddd ymm1,ymm1,ymm156570 vpslld ymm7,ymm2,56571 vpand ymm6,ymm0,ymm46572 vpxor ymm14,ymm14,YMMWORD[((288-256-128))+rbx]6573 6574 vpaddd ymm1,ymm1,ymm66575 vpsrld ymm8,ymm2,276576 vpxor ymm5,ymm0,ymm46577 vpxor ymm14,ymm14,ymm116578 6579 vmovdqu YMMWORD[(0-128)+rax],ymm136580 vpaddd ymm1,ymm1,ymm136581 vpor ymm7,ymm7,ymm86582 vpsrld ymm9,ymm14,316583 vpand ymm5,ymm5,ymm36584 vpaddd ymm14,ymm14,ymm146585 6586 vpslld ymm6,ymm3,306587 vpaddd ymm1,ymm1,ymm56588 6589 vpsrld ymm3,ymm3,26590 vpaddd ymm1,ymm1,ymm76591 vpor ymm14,ymm14,ymm96592 vpor ymm3,ymm3,ymm66593 vpxor ymm10,ymm10,ymm126594 vmovdqa ymm12,YMMWORD[((128-128))+rax]6595 6596 vpaddd ymm0,ymm0,ymm156597 vpslld ymm7,ymm1,56598 vpand ymm6,ymm4,ymm36599 vpxor ymm10,ymm10,YMMWORD[((320-256-128))+rbx]6600 6601 vpaddd ymm0,ymm0,ymm66602 vpsrld ymm8,ymm1,276603 vpxor ymm5,ymm4,ymm36604 vpxor ymm10,ymm10,ymm126605 6606 vmovdqu YMMWORD[(32-128)+rax],ymm146607 vpaddd ymm0,ymm0,ymm146608 vpor ymm7,ymm7,ymm86609 vpsrld ymm9,ymm10,316610 vpand ymm5,ymm5,ymm26611 vpaddd ymm10,ymm10,ymm106612 6613 vpslld ymm6,ymm2,306614 vpaddd ymm0,ymm0,ymm56615 6616 vpsrld ymm2,ymm2,26617 vpaddd ymm0,ymm0,ymm76618 vpor ymm10,ymm10,ymm96619 vpor ymm2,ymm2,ymm66620 vpxor ymm11,ymm11,ymm136621 vmovdqa ymm13,YMMWORD[((160-128))+rax]6622 6623 vpaddd ymm4,ymm4,ymm156624 vpslld ymm7,ymm0,56625 vpand ymm6,ymm3,ymm26626 vpxor ymm11,ymm11,YMMWORD[((352-256-128))+rbx]6627 6628 vpaddd ymm4,ymm4,ymm66629 vpsrld ymm8,ymm0,276630 vpxor ymm5,ymm3,ymm26631 vpxor ymm11,ymm11,ymm136632 6633 vmovdqu YMMWORD[(64-128)+rax],ymm106634 vpaddd ymm4,ymm4,ymm106635 vpor ymm7,ymm7,ymm86636 vpsrld ymm9,ymm11,316637 vpand ymm5,ymm5,ymm16638 vpaddd ymm11,ymm11,ymm116639 6640 vpslld ymm6,ymm1,306641 vpaddd ymm4,ymm4,ymm56642 6643 vpsrld ymm1,ymm1,26644 vpaddd ymm4,ymm4,ymm76645 vpor ymm11,ymm11,ymm96646 vpor ymm1,ymm1,ymm66647 vpxor ymm12,ymm12,ymm146648 vmovdqa ymm14,YMMWORD[((192-128))+rax]6649 6650 vpaddd ymm3,ymm3,ymm156651 vpslld ymm7,ymm4,56652 vpand ymm6,ymm2,ymm16653 vpxor ymm12,ymm12,YMMWORD[((384-256-128))+rbx]6654 6655 vpaddd ymm3,ymm3,ymm66656 vpsrld ymm8,ymm4,276657 vpxor ymm5,ymm2,ymm16658 vpxor ymm12,ymm12,ymm146659 6660 vmovdqu YMMWORD[(96-128)+rax],ymm116661 vpaddd ymm3,ymm3,ymm116662 vpor ymm7,ymm7,ymm86663 vpsrld ymm9,ymm12,316664 vpand ymm5,ymm5,ymm06665 vpaddd ymm12,ymm12,ymm126666 6667 vpslld ymm6,ymm0,306668 vpaddd ymm3,ymm3,ymm56669 6670 vpsrld ymm0,ymm0,26671 vpaddd ymm3,ymm3,ymm76672 vpor ymm12,ymm12,ymm96673 vpor ymm0,ymm0,ymm66674 vpxor ymm13,ymm13,ymm106675 vmovdqa ymm10,YMMWORD[((224-128))+rax]6676 6677 vpaddd ymm2,ymm2,ymm156678 vpslld ymm7,ymm3,56679 vpand ymm6,ymm1,ymm06680 vpxor ymm13,ymm13,YMMWORD[((416-256-128))+rbx]6681 6682 vpaddd ymm2,ymm2,ymm66683 vpsrld ymm8,ymm3,276684 vpxor ymm5,ymm1,ymm06685 vpxor ymm13,ymm13,ymm106686 6687 vmovdqu YMMWORD[(128-128)+rax],ymm126688 vpaddd ymm2,ymm2,ymm126689 vpor ymm7,ymm7,ymm86690 vpsrld ymm9,ymm13,316691 vpand ymm5,ymm5,ymm46692 vpaddd ymm13,ymm13,ymm136693 6694 vpslld ymm6,ymm4,306695 vpaddd ymm2,ymm2,ymm56696 6697 vpsrld ymm4,ymm4,26698 vpaddd ymm2,ymm2,ymm76699 vpor ymm13,ymm13,ymm96700 vpor ymm4,ymm4,ymm66701 vpxor ymm14,ymm14,ymm116702 vmovdqa ymm11,YMMWORD[((256-256-128))+rbx]6703 6704 vpaddd ymm1,ymm1,ymm156705 vpslld ymm7,ymm2,56706 vpand ymm6,ymm0,ymm46707 vpxor ymm14,ymm14,YMMWORD[((448-256-128))+rbx]6708 6709 vpaddd ymm1,ymm1,ymm66710 vpsrld ymm8,ymm2,276711 vpxor ymm5,ymm0,ymm46712 vpxor ymm14,ymm14,ymm116713 6714 vmovdqu YMMWORD[(160-128)+rax],ymm136715 vpaddd ymm1,ymm1,ymm136716 vpor ymm7,ymm7,ymm86717 vpsrld ymm9,ymm14,316718 vpand ymm5,ymm5,ymm36719 vpaddd ymm14,ymm14,ymm146720 6721 vpslld ymm6,ymm3,306722 vpaddd ymm1,ymm1,ymm56723 6724 vpsrld ymm3,ymm3,26725 vpaddd ymm1,ymm1,ymm76726 vpor ymm14,ymm14,ymm96727 vpor ymm3,ymm3,ymm66728 vpxor ymm10,ymm10,ymm126729 vmovdqa ymm12,YMMWORD[((288-256-128))+rbx]6730 6731 vpaddd ymm0,ymm0,ymm156732 vpslld ymm7,ymm1,56733 vpand ymm6,ymm4,ymm36734 vpxor ymm10,ymm10,YMMWORD[((480-256-128))+rbx]6735 6736 vpaddd ymm0,ymm0,ymm66737 vpsrld ymm8,ymm1,276738 vpxor ymm5,ymm4,ymm36739 vpxor ymm10,ymm10,ymm126740 6741 vmovdqu YMMWORD[(192-128)+rax],ymm146742 vpaddd ymm0,ymm0,ymm146743 vpor ymm7,ymm7,ymm86744 vpsrld ymm9,ymm10,316745 vpand ymm5,ymm5,ymm26746 vpaddd ymm10,ymm10,ymm106747 6748 vpslld ymm6,ymm2,306749 vpaddd ymm0,ymm0,ymm56750 6751 vpsrld ymm2,ymm2,26752 vpaddd ymm0,ymm0,ymm76753 vpor ymm10,ymm10,ymm96754 vpor ymm2,ymm2,ymm66755 vpxor ymm11,ymm11,ymm136756 vmovdqa ymm13,YMMWORD[((320-256-128))+rbx]6757 6758 vpaddd ymm4,ymm4,ymm156759 vpslld ymm7,ymm0,56760 vpand ymm6,ymm3,ymm26761 vpxor ymm11,ymm11,YMMWORD[((0-128))+rax]6762 6763 vpaddd ymm4,ymm4,ymm66764 vpsrld ymm8,ymm0,276765 vpxor ymm5,ymm3,ymm26766 vpxor ymm11,ymm11,ymm136767 6768 vmovdqu YMMWORD[(224-128)+rax],ymm106769 vpaddd ymm4,ymm4,ymm106770 vpor ymm7,ymm7,ymm86771 vpsrld ymm9,ymm11,316772 vpand ymm5,ymm5,ymm16773 vpaddd ymm11,ymm11,ymm116774 6775 vpslld ymm6,ymm1,306776 vpaddd ymm4,ymm4,ymm56777 6778 vpsrld ymm1,ymm1,26779 vpaddd ymm4,ymm4,ymm76780 vpor ymm11,ymm11,ymm96781 vpor ymm1,ymm1,ymm66782 vpxor ymm12,ymm12,ymm146783 vmovdqa ymm14,YMMWORD[((352-256-128))+rbx]6784 6785 vpaddd ymm3,ymm3,ymm156786 vpslld ymm7,ymm4,56787 vpand ymm6,ymm2,ymm16788 vpxor ymm12,ymm12,YMMWORD[((32-128))+rax]6789 6790 vpaddd ymm3,ymm3,ymm66791 vpsrld ymm8,ymm4,276792 vpxor ymm5,ymm2,ymm16793 vpxor ymm12,ymm12,ymm146794 6795 vmovdqu YMMWORD[(256-256-128)+rbx],ymm116796 vpaddd ymm3,ymm3,ymm116797 vpor ymm7,ymm7,ymm86798 vpsrld ymm9,ymm12,316799 vpand ymm5,ymm5,ymm06800 vpaddd ymm12,ymm12,ymm126801 6802 vpslld ymm6,ymm0,306803 vpaddd ymm3,ymm3,ymm56804 6805 vpsrld ymm0,ymm0,26806 vpaddd ymm3,ymm3,ymm76807 vpor ymm12,ymm12,ymm96808 vpor ymm0,ymm0,ymm66809 vpxor ymm13,ymm13,ymm106810 vmovdqa ymm10,YMMWORD[((384-256-128))+rbx]6811 6812 vpaddd ymm2,ymm2,ymm156813 vpslld ymm7,ymm3,56814 vpand ymm6,ymm1,ymm06815 vpxor ymm13,ymm13,YMMWORD[((64-128))+rax]6816 6817 vpaddd ymm2,ymm2,ymm66818 vpsrld ymm8,ymm3,276819 vpxor ymm5,ymm1,ymm06820 vpxor ymm13,ymm13,ymm106821 6822 vmovdqu YMMWORD[(288-256-128)+rbx],ymm126823 vpaddd ymm2,ymm2,ymm126824 vpor ymm7,ymm7,ymm86825 vpsrld ymm9,ymm13,316826 vpand ymm5,ymm5,ymm46827 vpaddd ymm13,ymm13,ymm136828 6829 vpslld ymm6,ymm4,306830 vpaddd ymm2,ymm2,ymm56831 6832 vpsrld ymm4,ymm4,26833 vpaddd ymm2,ymm2,ymm76834 vpor ymm13,ymm13,ymm96835 vpor ymm4,ymm4,ymm66836 vpxor ymm14,ymm14,ymm116837 vmovdqa ymm11,YMMWORD[((416-256-128))+rbx]6838 6839 vpaddd ymm1,ymm1,ymm156840 vpslld ymm7,ymm2,56841 vpand ymm6,ymm0,ymm46842 vpxor ymm14,ymm14,YMMWORD[((96-128))+rax]6843 6844 vpaddd ymm1,ymm1,ymm66845 vpsrld ymm8,ymm2,276846 vpxor ymm5,ymm0,ymm46847 vpxor ymm14,ymm14,ymm116848 6849 vmovdqu YMMWORD[(320-256-128)+rbx],ymm136850 vpaddd ymm1,ymm1,ymm136851 vpor ymm7,ymm7,ymm86852 vpsrld ymm9,ymm14,316853 vpand ymm5,ymm5,ymm36854 vpaddd ymm14,ymm14,ymm146855 6856 vpslld ymm6,ymm3,306857 vpaddd ymm1,ymm1,ymm56858 6859 vpsrld ymm3,ymm3,26860 vpaddd ymm1,ymm1,ymm76861 vpor ymm14,ymm14,ymm96862 vpor ymm3,ymm3,ymm66863 vpxor ymm10,ymm10,ymm126864 vmovdqa ymm12,YMMWORD[((448-256-128))+rbx]6865 6866 vpaddd ymm0,ymm0,ymm156867 vpslld ymm7,ymm1,56868 vpand ymm6,ymm4,ymm36869 vpxor ymm10,ymm10,YMMWORD[((128-128))+rax]6870 6871 vpaddd ymm0,ymm0,ymm66872 vpsrld ymm8,ymm1,276873 vpxor ymm5,ymm4,ymm36874 vpxor ymm10,ymm10,ymm126875 6876 vmovdqu YMMWORD[(352-256-128)+rbx],ymm146877 vpaddd ymm0,ymm0,ymm146878 vpor ymm7,ymm7,ymm86879 vpsrld ymm9,ymm10,316880 vpand ymm5,ymm5,ymm26881 vpaddd ymm10,ymm10,ymm106882 6883 vpslld ymm6,ymm2,306884 vpaddd ymm0,ymm0,ymm56885 6886 vpsrld ymm2,ymm2,26887 vpaddd ymm0,ymm0,ymm76888 vpor ymm10,ymm10,ymm96889 vpor ymm2,ymm2,ymm66890 vmovdqa ymm15,YMMWORD[64+rbp]6891 vpxor ymm11,ymm11,ymm136892 vmovdqa ymm13,YMMWORD[((480-256-128))+rbx]6893 6894 vpslld ymm7,ymm0,56895 vpaddd ymm4,ymm4,ymm156896 vpxor ymm5,ymm3,ymm16897 vmovdqa YMMWORD[(384-256-128)+rbx],ymm106898 vpaddd ymm4,ymm4,ymm106899 vpxor ymm11,ymm11,YMMWORD[((160-128))+rax]6900 vpsrld ymm8,ymm0,276901 vpxor ymm5,ymm5,ymm26902 vpxor ymm11,ymm11,ymm136903 6904 vpslld ymm6,ymm1,306905 vpor ymm7,ymm7,ymm86906 vpaddd ymm4,ymm4,ymm56907 vpsrld ymm9,ymm11,316908 vpaddd ymm11,ymm11,ymm116909 6910 vpsrld ymm1,ymm1,26911 vpaddd ymm4,ymm4,ymm76912 vpor ymm11,ymm11,ymm96913 vpor ymm1,ymm1,ymm66914 vpxor ymm12,ymm12,ymm146915 vmovdqa ymm14,YMMWORD[((0-128))+rax]6916 6917 vpslld ymm7,ymm4,56918 vpaddd ymm3,ymm3,ymm156919 vpxor ymm5,ymm2,ymm06920 vmovdqa YMMWORD[(416-256-128)+rbx],ymm116921 vpaddd ymm3,ymm3,ymm116922 vpxor ymm12,ymm12,YMMWORD[((192-128))+rax]6923 vpsrld ymm8,ymm4,276924 vpxor ymm5,ymm5,ymm16925 vpxor ymm12,ymm12,ymm146926 6927 vpslld ymm6,ymm0,306928 vpor ymm7,ymm7,ymm86929 vpaddd ymm3,ymm3,ymm56930 vpsrld ymm9,ymm12,316931 vpaddd ymm12,ymm12,ymm126932 6933 vpsrld ymm0,ymm0,26934 vpaddd ymm3,ymm3,ymm76935 vpor ymm12,ymm12,ymm96936 vpor ymm0,ymm0,ymm66937 vpxor ymm13,ymm13,ymm106938 vmovdqa ymm10,YMMWORD[((32-128))+rax]6939 6940 vpslld ymm7,ymm3,56941 vpaddd ymm2,ymm2,ymm156942 vpxor ymm5,ymm1,ymm46943 vmovdqa YMMWORD[(448-256-128)+rbx],ymm126944 vpaddd ymm2,ymm2,ymm126945 vpxor ymm13,ymm13,YMMWORD[((224-128))+rax]6946 vpsrld ymm8,ymm3,276947 vpxor ymm5,ymm5,ymm06948 vpxor ymm13,ymm13,ymm106949 6950 vpslld ymm6,ymm4,306951 vpor ymm7,ymm7,ymm86952 vpaddd ymm2,ymm2,ymm56953 vpsrld ymm9,ymm13,316954 vpaddd ymm13,ymm13,ymm136955 6956 vpsrld ymm4,ymm4,26957 vpaddd ymm2,ymm2,ymm76958 vpor ymm13,ymm13,ymm96959 vpor ymm4,ymm4,ymm66960 vpxor ymm14,ymm14,ymm116961 vmovdqa ymm11,YMMWORD[((64-128))+rax]6962 6963 vpslld ymm7,ymm2,56964 vpaddd ymm1,ymm1,ymm156965 vpxor ymm5,ymm0,ymm36966 vmovdqa YMMWORD[(480-256-128)+rbx],ymm136967 vpaddd ymm1,ymm1,ymm136968 vpxor ymm14,ymm14,YMMWORD[((256-256-128))+rbx]6969 vpsrld ymm8,ymm2,276970 vpxor ymm5,ymm5,ymm46971 vpxor ymm14,ymm14,ymm116972 6973 vpslld ymm6,ymm3,306974 vpor ymm7,ymm7,ymm86975 vpaddd ymm1,ymm1,ymm56976 vpsrld ymm9,ymm14,316977 vpaddd ymm14,ymm14,ymm146978 6979 vpsrld ymm3,ymm3,26980 vpaddd ymm1,ymm1,ymm76981 vpor ymm14,ymm14,ymm96982 vpor ymm3,ymm3,ymm66983 vpxor ymm10,ymm10,ymm126984 vmovdqa ymm12,YMMWORD[((96-128))+rax]6985 6986 vpslld ymm7,ymm1,56987 vpaddd ymm0,ymm0,ymm156988 vpxor ymm5,ymm4,ymm26989 vmovdqa YMMWORD[(0-128)+rax],ymm146990 vpaddd ymm0,ymm0,ymm146991 vpxor ymm10,ymm10,YMMWORD[((288-256-128))+rbx]6992 vpsrld ymm8,ymm1,276993 vpxor ymm5,ymm5,ymm36994 vpxor ymm10,ymm10,ymm126995 6996 vpslld ymm6,ymm2,306997 vpor ymm7,ymm7,ymm86998 vpaddd ymm0,ymm0,ymm56999 vpsrld ymm9,ymm10,317000 vpaddd ymm10,ymm10,ymm107001 7002 vpsrld ymm2,ymm2,27003 vpaddd ymm0,ymm0,ymm77004 vpor ymm10,ymm10,ymm97005 vpor ymm2,ymm2,ymm67006 vpxor ymm11,ymm11,ymm137007 vmovdqa ymm13,YMMWORD[((128-128))+rax]7008 7009 vpslld ymm7,ymm0,57010 vpaddd ymm4,ymm4,ymm157011 vpxor ymm5,ymm3,ymm17012 vmovdqa YMMWORD[(32-128)+rax],ymm107013 vpaddd ymm4,ymm4,ymm107014 vpxor ymm11,ymm11,YMMWORD[((320-256-128))+rbx]7015 vpsrld ymm8,ymm0,277016 vpxor ymm5,ymm5,ymm27017 vpxor ymm11,ymm11,ymm137018 7019 vpslld ymm6,ymm1,307020 vpor ymm7,ymm7,ymm87021 vpaddd ymm4,ymm4,ymm57022 vpsrld ymm9,ymm11,317023 vpaddd ymm11,ymm11,ymm117024 7025 vpsrld ymm1,ymm1,27026 vpaddd ymm4,ymm4,ymm77027 vpor ymm11,ymm11,ymm97028 vpor ymm1,ymm1,ymm67029 vpxor ymm12,ymm12,ymm147030 vmovdqa ymm14,YMMWORD[((160-128))+rax]7031 7032 vpslld ymm7,ymm4,57033 vpaddd ymm3,ymm3,ymm157034 vpxor ymm5,ymm2,ymm07035 vmovdqa YMMWORD[(64-128)+rax],ymm117036 vpaddd ymm3,ymm3,ymm117037 vpxor ymm12,ymm12,YMMWORD[((352-256-128))+rbx]7038 vpsrld ymm8,ymm4,277039 vpxor ymm5,ymm5,ymm17040 vpxor ymm12,ymm12,ymm147041 7042 vpslld ymm6,ymm0,307043 vpor ymm7,ymm7,ymm87044 vpaddd ymm3,ymm3,ymm57045 vpsrld ymm9,ymm12,317046 vpaddd ymm12,ymm12,ymm127047 7048 vpsrld ymm0,ymm0,27049 vpaddd ymm3,ymm3,ymm77050 vpor ymm12,ymm12,ymm97051 vpor ymm0,ymm0,ymm67052 vpxor ymm13,ymm13,ymm107053 vmovdqa ymm10,YMMWORD[((192-128))+rax]7054 7055 vpslld ymm7,ymm3,57056 vpaddd ymm2,ymm2,ymm157057 vpxor ymm5,ymm1,ymm47058 vmovdqa YMMWORD[(96-128)+rax],ymm127059 vpaddd ymm2,ymm2,ymm127060 vpxor ymm13,ymm13,YMMWORD[((384-256-128))+rbx]7061 vpsrld ymm8,ymm3,277062 vpxor ymm5,ymm5,ymm07063 vpxor ymm13,ymm13,ymm107064 7065 vpslld ymm6,ymm4,307066 vpor ymm7,ymm7,ymm87067 vpaddd ymm2,ymm2,ymm57068 vpsrld ymm9,ymm13,317069 vpaddd ymm13,ymm13,ymm137070 7071 vpsrld ymm4,ymm4,27072 vpaddd ymm2,ymm2,ymm77073 vpor ymm13,ymm13,ymm97074 vpor ymm4,ymm4,ymm67075 vpxor ymm14,ymm14,ymm117076 vmovdqa ymm11,YMMWORD[((224-128))+rax]7077 7078 vpslld ymm7,ymm2,57079 vpaddd ymm1,ymm1,ymm157080 vpxor ymm5,ymm0,ymm37081 vmovdqa YMMWORD[(128-128)+rax],ymm137082 vpaddd ymm1,ymm1,ymm137083 vpxor ymm14,ymm14,YMMWORD[((416-256-128))+rbx]7084 vpsrld ymm8,ymm2,277085 vpxor ymm5,ymm5,ymm47086 vpxor ymm14,ymm14,ymm117087 7088 vpslld ymm6,ymm3,307089 vpor ymm7,ymm7,ymm87090 vpaddd ymm1,ymm1,ymm57091 vpsrld ymm9,ymm14,317092 vpaddd ymm14,ymm14,ymm147093 7094 vpsrld ymm3,ymm3,27095 vpaddd ymm1,ymm1,ymm77096 vpor ymm14,ymm14,ymm97097 vpor ymm3,ymm3,ymm67098 vpxor ymm10,ymm10,ymm127099 vmovdqa ymm12,YMMWORD[((256-256-128))+rbx]7100 7101 vpslld ymm7,ymm1,57102 vpaddd ymm0,ymm0,ymm157103 vpxor ymm5,ymm4,ymm27104 vmovdqa YMMWORD[(160-128)+rax],ymm147105 vpaddd ymm0,ymm0,ymm147106 vpxor ymm10,ymm10,YMMWORD[((448-256-128))+rbx]7107 vpsrld ymm8,ymm1,277108 vpxor ymm5,ymm5,ymm37109 vpxor ymm10,ymm10,ymm127110 7111 vpslld ymm6,ymm2,307112 vpor ymm7,ymm7,ymm87113 vpaddd ymm0,ymm0,ymm57114 vpsrld ymm9,ymm10,317115 vpaddd ymm10,ymm10,ymm107116 7117 vpsrld ymm2,ymm2,27118 vpaddd ymm0,ymm0,ymm77119 vpor ymm10,ymm10,ymm97120 vpor ymm2,ymm2,ymm67121 vpxor ymm11,ymm11,ymm137122 vmovdqa ymm13,YMMWORD[((288-256-128))+rbx]7123 7124 vpslld ymm7,ymm0,57125 vpaddd ymm4,ymm4,ymm157126 vpxor ymm5,ymm3,ymm17127 vmovdqa YMMWORD[(192-128)+rax],ymm107128 vpaddd ymm4,ymm4,ymm107129 vpxor ymm11,ymm11,YMMWORD[((480-256-128))+rbx]7130 vpsrld ymm8,ymm0,277131 vpxor ymm5,ymm5,ymm27132 vpxor ymm11,ymm11,ymm137133 7134 vpslld ymm6,ymm1,307135 vpor ymm7,ymm7,ymm87136 vpaddd ymm4,ymm4,ymm57137 vpsrld ymm9,ymm11,317138 vpaddd ymm11,ymm11,ymm117139 7140 vpsrld ymm1,ymm1,27141 vpaddd ymm4,ymm4,ymm77142 vpor ymm11,ymm11,ymm97143 vpor ymm1,ymm1,ymm67144 vpxor ymm12,ymm12,ymm147145 vmovdqa ymm14,YMMWORD[((320-256-128))+rbx]7146 7147 vpslld ymm7,ymm4,57148 vpaddd ymm3,ymm3,ymm157149 vpxor ymm5,ymm2,ymm07150 vmovdqa YMMWORD[(224-128)+rax],ymm117151 vpaddd ymm3,ymm3,ymm117152 vpxor ymm12,ymm12,YMMWORD[((0-128))+rax]7153 vpsrld ymm8,ymm4,277154 vpxor ymm5,ymm5,ymm17155 vpxor ymm12,ymm12,ymm147156 7157 vpslld ymm6,ymm0,307158 vpor ymm7,ymm7,ymm87159 vpaddd ymm3,ymm3,ymm57160 vpsrld ymm9,ymm12,317161 vpaddd ymm12,ymm12,ymm127162 7163 vpsrld ymm0,ymm0,27164 vpaddd ymm3,ymm3,ymm77165 vpor ymm12,ymm12,ymm97166 vpor ymm0,ymm0,ymm67167 vpxor ymm13,ymm13,ymm107168 vmovdqa ymm10,YMMWORD[((352-256-128))+rbx]7169 7170 vpslld ymm7,ymm3,57171 vpaddd ymm2,ymm2,ymm157172 vpxor ymm5,ymm1,ymm47173 vpaddd ymm2,ymm2,ymm127174 vpxor ymm13,ymm13,YMMWORD[((32-128))+rax]7175 vpsrld ymm8,ymm3,277176 vpxor ymm5,ymm5,ymm07177 vpxor ymm13,ymm13,ymm107178 7179 vpslld ymm6,ymm4,307180 vpor ymm7,ymm7,ymm87181 vpaddd ymm2,ymm2,ymm57182 vpsrld ymm9,ymm13,317183 vpaddd ymm13,ymm13,ymm137184 7185 vpsrld ymm4,ymm4,27186 vpaddd ymm2,ymm2,ymm77187 vpor ymm13,ymm13,ymm97188 vpor ymm4,ymm4,ymm67189 vpxor ymm14,ymm14,ymm117190 vmovdqa ymm11,YMMWORD[((384-256-128))+rbx]7191 7192 vpslld ymm7,ymm2,57193 vpaddd ymm1,ymm1,ymm157194 vpxor ymm5,ymm0,ymm37195 vpaddd ymm1,ymm1,ymm137196 vpxor ymm14,ymm14,YMMWORD[((64-128))+rax]7197 vpsrld ymm8,ymm2,277198 vpxor ymm5,ymm5,ymm47199 vpxor ymm14,ymm14,ymm117200 7201 vpslld ymm6,ymm3,307202 vpor ymm7,ymm7,ymm87203 vpaddd ymm1,ymm1,ymm57204 vpsrld ymm9,ymm14,317205 vpaddd ymm14,ymm14,ymm147206 7207 vpsrld ymm3,ymm3,27208 vpaddd ymm1,ymm1,ymm77209 vpor ymm14,ymm14,ymm97210 vpor ymm3,ymm3,ymm67211 vpxor ymm10,ymm10,ymm127212 vmovdqa ymm12,YMMWORD[((416-256-128))+rbx]7213 7214 vpslld ymm7,ymm1,57215 vpaddd ymm0,ymm0,ymm157216 vpxor ymm5,ymm4,ymm27217 vpaddd ymm0,ymm0,ymm147218 vpxor ymm10,ymm10,YMMWORD[((96-128))+rax]7219 vpsrld ymm8,ymm1,277220 vpxor ymm5,ymm5,ymm37221 vpxor ymm10,ymm10,ymm127222 7223 vpslld ymm6,ymm2,307224 vpor ymm7,ymm7,ymm87225 vpaddd ymm0,ymm0,ymm57226 vpsrld ymm9,ymm10,317227 vpaddd ymm10,ymm10,ymm107228 7229 vpsrld ymm2,ymm2,27230 vpaddd ymm0,ymm0,ymm77231 vpor ymm10,ymm10,ymm97232 vpor ymm2,ymm2,ymm67233 vpxor ymm11,ymm11,ymm137234 vmovdqa ymm13,YMMWORD[((448-256-128))+rbx]7235 7236 vpslld ymm7,ymm0,57237 vpaddd ymm4,ymm4,ymm157238 vpxor ymm5,ymm3,ymm17239 vpaddd ymm4,ymm4,ymm107240 vpxor ymm11,ymm11,YMMWORD[((128-128))+rax]7241 vpsrld ymm8,ymm0,277242 vpxor ymm5,ymm5,ymm27243 vpxor ymm11,ymm11,ymm137244 7245 vpslld ymm6,ymm1,307246 vpor ymm7,ymm7,ymm87247 vpaddd ymm4,ymm4,ymm57248 vpsrld ymm9,ymm11,317249 vpaddd ymm11,ymm11,ymm117250 7251 vpsrld ymm1,ymm1,27252 vpaddd ymm4,ymm4,ymm77253 vpor ymm11,ymm11,ymm97254 vpor ymm1,ymm1,ymm67255 vpxor ymm12,ymm12,ymm147256 vmovdqa ymm14,YMMWORD[((480-256-128))+rbx]7257 7258 vpslld ymm7,ymm4,57259 vpaddd ymm3,ymm3,ymm157260 vpxor ymm5,ymm2,ymm07261 vpaddd ymm3,ymm3,ymm117262 vpxor ymm12,ymm12,YMMWORD[((160-128))+rax]7263 vpsrld ymm8,ymm4,277264 vpxor ymm5,ymm5,ymm17265 vpxor ymm12,ymm12,ymm147266 7267 vpslld ymm6,ymm0,307268 vpor ymm7,ymm7,ymm87269 vpaddd ymm3,ymm3,ymm57270 vpsrld ymm9,ymm12,317271 vpaddd ymm12,ymm12,ymm127272 7273 vpsrld ymm0,ymm0,27274 vpaddd ymm3,ymm3,ymm77275 vpor ymm12,ymm12,ymm97276 vpor ymm0,ymm0,ymm67277 vpxor ymm13,ymm13,ymm107278 vmovdqa ymm10,YMMWORD[((0-128))+rax]7279 7280 vpslld ymm7,ymm3,57281 vpaddd ymm2,ymm2,ymm157282 vpxor ymm5,ymm1,ymm47283 vpaddd ymm2,ymm2,ymm127284 vpxor ymm13,ymm13,YMMWORD[((192-128))+rax]7285 vpsrld ymm8,ymm3,277286 vpxor ymm5,ymm5,ymm07287 vpxor ymm13,ymm13,ymm107288 7289 vpslld ymm6,ymm4,307290 vpor ymm7,ymm7,ymm87291 vpaddd ymm2,ymm2,ymm57292 vpsrld ymm9,ymm13,317293 vpaddd ymm13,ymm13,ymm137294 7295 vpsrld ymm4,ymm4,27296 vpaddd ymm2,ymm2,ymm77297 vpor ymm13,ymm13,ymm97298 vpor ymm4,ymm4,ymm67299 vpxor ymm14,ymm14,ymm117300 vmovdqa ymm11,YMMWORD[((32-128))+rax]7301 7302 vpslld ymm7,ymm2,57303 vpaddd ymm1,ymm1,ymm157304 vpxor ymm5,ymm0,ymm37305 vpaddd ymm1,ymm1,ymm137306 vpxor ymm14,ymm14,YMMWORD[((224-128))+rax]7307 vpsrld ymm8,ymm2,277308 vpxor ymm5,ymm5,ymm47309 vpxor ymm14,ymm14,ymm117310 7311 vpslld ymm6,ymm3,307312 vpor ymm7,ymm7,ymm87313 vpaddd ymm1,ymm1,ymm57314 vpsrld ymm9,ymm14,317315 vpaddd ymm14,ymm14,ymm147316 7317 vpsrld ymm3,ymm3,27318 vpaddd ymm1,ymm1,ymm77319 vpor ymm14,ymm14,ymm97320 vpor ymm3,ymm3,ymm67321 vpslld ymm7,ymm1,57322 vpaddd ymm0,ymm0,ymm157323 vpxor ymm5,ymm4,ymm27324 7325 vpsrld ymm8,ymm1,277326 vpaddd ymm0,ymm0,ymm147327 vpxor ymm5,ymm5,ymm37328 7329 vpslld ymm6,ymm2,307330 vpor ymm7,ymm7,ymm87331 vpaddd ymm0,ymm0,ymm57332 7333 vpsrld ymm2,ymm2,27334 vpaddd ymm0,ymm0,ymm77335 vpor ymm2,ymm2,ymm67336 mov ecx,17337 lea rbx,[512+rsp]7338 cmp ecx,DWORD[rbx]7339 cmovge r12,rbp7340 cmp ecx,DWORD[4+rbx]7341 cmovge r13,rbp7342 cmp ecx,DWORD[8+rbx]7343 cmovge r14,rbp7344 cmp ecx,DWORD[12+rbx]7345 cmovge r15,rbp7346 cmp ecx,DWORD[16+rbx]7347 cmovge r8,rbp7348 cmp ecx,DWORD[20+rbx]7349 cmovge r9,rbp7350 cmp ecx,DWORD[24+rbx]7351 cmovge r10,rbp7352 cmp ecx,DWORD[28+rbx]7353 cmovge r11,rbp7354 vmovdqu ymm5,YMMWORD[rbx]7355 vpxor ymm7,ymm7,ymm77356 vmovdqa ymm6,ymm57357 vpcmpgtd ymm6,ymm6,ymm77358 vpaddd ymm5,ymm5,ymm67359 7360 vpand ymm0,ymm0,ymm67361 vpand ymm1,ymm1,ymm67362 vpaddd ymm0,ymm0,YMMWORD[rdi]7363 vpand ymm2,ymm2,ymm67364 vpaddd ymm1,ymm1,YMMWORD[32+rdi]7365 vpand ymm3,ymm3,ymm67366 vpaddd ymm2,ymm2,YMMWORD[64+rdi]7367 vpand ymm4,ymm4,ymm67368 vpaddd ymm3,ymm3,YMMWORD[96+rdi]7369 vpaddd ymm4,ymm4,YMMWORD[128+rdi]7370 vmovdqu YMMWORD[rdi],ymm07371 vmovdqu YMMWORD[32+rdi],ymm17372 vmovdqu YMMWORD[64+rdi],ymm27373 vmovdqu YMMWORD[96+rdi],ymm37374 vmovdqu YMMWORD[128+rdi],ymm47375 7376 vmovdqu YMMWORD[rbx],ymm57377 lea rbx,[((256+128))+rsp]7378 vmovdqu ymm9,YMMWORD[96+rbp]7379 dec edx7380 jnz NEAR $L$oop_avx27381 7382 7383 7384 7385 7386 7387 7388 $L$done_avx2:7389 mov rax,QWORD[544+rsp]7390 7391 vzeroupper7392 movaps xmm6,XMMWORD[((-216))+rax]7393 movaps xmm7,XMMWORD[((-200))+rax]7394 movaps xmm8,XMMWORD[((-184))+rax]7395 movaps xmm9,XMMWORD[((-168))+rax]7396 movaps xmm10,XMMWORD[((-152))+rax]7397 movaps xmm11,XMMWORD[((-136))+rax]7398 movaps xmm12,XMMWORD[((-120))+rax]7399 movaps xmm13,XMMWORD[((-104))+rax]7400 movaps xmm14,XMMWORD[((-88))+rax]7401 movaps xmm15,XMMWORD[((-72))+rax]7402 mov r15,QWORD[((-48))+rax]7403 7404 mov r14,QWORD[((-40))+rax]7405 7406 mov r13,QWORD[((-32))+rax]7407 7408 mov r12,QWORD[((-24))+rax]7409 7410 mov rbp,QWORD[((-16))+rax]7411 7412 mov rbx,QWORD[((-8))+rax]7413 7414 lea rsp,[rax]7415 7416 $L$epilogue_avx2:7417 mov rdi,QWORD[8+rsp] ;WIN64 epilogue7418 mov rsi,QWORD[16+rsp]7419 DB 0F3h,0C3h ;repret7420 7421 $L$SEH_end_sha1_multi_block_avx2:7422 7423 3020 ALIGN 256 7424 3021 DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 … … 7523 3120 DB 0F3h,0C3h ;repret 7524 3121 7525 7526 ALIGN 167527 avx2_handler:7528 push rsi7529 push rdi7530 push rbx7531 push rbp7532 push r127533 push r137534 push r147535 push r157536 pushfq7537 sub rsp,647538 7539 mov rax,QWORD[120+r8]7540 mov rbx,QWORD[248+r8]7541 7542 mov rsi,QWORD[8+r9]7543 mov r11,QWORD[56+r9]7544 7545 mov r10d,DWORD[r11]7546 lea r10,[r10*1+rsi]7547 cmp rbx,r107548 jb NEAR $L$in_prologue7549 7550 mov rax,QWORD[152+r8]7551 7552 mov r10d,DWORD[4+r11]7553 lea r10,[r10*1+rsi]7554 cmp rbx,r107555 jae NEAR $L$in_prologue7556 7557 mov rax,QWORD[544+r8]7558 7559 mov rbx,QWORD[((-8))+rax]7560 mov rbp,QWORD[((-16))+rax]7561 mov r12,QWORD[((-24))+rax]7562 mov r13,QWORD[((-32))+rax]7563 mov r14,QWORD[((-40))+rax]7564 mov r15,QWORD[((-48))+rax]7565 mov QWORD[144+r8],rbx7566 mov QWORD[160+r8],rbp7567 mov QWORD[216+r8],r127568 mov QWORD[224+r8],r137569 mov QWORD[232+r8],r147570 mov QWORD[240+r8],r157571 7572 lea rsi,[((-56-160))+rax]7573 lea rdi,[512+r8]7574 mov ecx,207575 DD 0xa548f3fc7576 7577 jmp NEAR $L$in_prologue7578 7579 3122 section .pdata rdata align=4 7580 3123 ALIGN 4 … … 7585 3128 DD $L$SEH_end_sha1_multi_block_shaext wrt ..imagebase 7586 3129 DD $L$SEH_info_sha1_multi_block_shaext wrt ..imagebase 7587 DD $L$SEH_begin_sha1_multi_block_avx wrt ..imagebase7588 DD $L$SEH_end_sha1_multi_block_avx wrt ..imagebase7589 DD $L$SEH_info_sha1_multi_block_avx wrt ..imagebase7590 DD $L$SEH_begin_sha1_multi_block_avx2 wrt ..imagebase7591 DD $L$SEH_end_sha1_multi_block_avx2 wrt ..imagebase7592 DD $L$SEH_info_sha1_multi_block_avx2 wrt ..imagebase7593 3130 section .xdata rdata align=8 7594 3131 ALIGN 8 … … 7601 3138 DD se_handler wrt ..imagebase 7602 3139 DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase 7603 $L$SEH_info_sha1_multi_block_avx:7604 DB 9,0,0,07605 DD se_handler wrt ..imagebase7606 DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase7607 $L$SEH_info_sha1_multi_block_avx2:7608 DB 9,0,0,07609 DD avx2_handler wrt ..imagebase7610 DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/sha1-x86_64.S
r94083 r95221 28 28 test r10d,536870912 29 29 jnz NEAR _shaext_shortcut 30 and r10d,29631 cmp r10d,29632 je NEAR _avx2_shortcut33 and r8d,26843545634 and r9d,107374182435 or r8d,r9d36 cmp r8d,134217728037 je NEAR _avx_shortcut38 30 jmp NEAR _ssse3_shortcut 39 31 … … 2676 2668 2677 2669 $L$SEH_end_sha1_block_data_order_ssse3: 2678 2679 ALIGN 162680 sha1_block_data_order_avx:2681 mov QWORD[8+rsp],rdi ;WIN64 prologue2682 mov QWORD[16+rsp],rsi2683 mov rax,rsp2684 $L$SEH_begin_sha1_block_data_order_avx:2685 mov rdi,rcx2686 mov rsi,rdx2687 mov rdx,r82688 2689 2690 _avx_shortcut:2691 2692 mov r11,rsp2693 2694 push rbx2695 2696 push rbp2697 2698 push r122699 2700 push r132701 2702 push r142703 2704 lea rsp,[((-160))+rsp]2705 vzeroupper2706 vmovaps XMMWORD[(-40-96)+r11],xmm62707 vmovaps XMMWORD[(-40-80)+r11],xmm72708 vmovaps XMMWORD[(-40-64)+r11],xmm82709 vmovaps XMMWORD[(-40-48)+r11],xmm92710 vmovaps XMMWORD[(-40-32)+r11],xmm102711 vmovaps XMMWORD[(-40-16)+r11],xmm112712 $L$prologue_avx:2713 and rsp,-642714 mov r8,rdi2715 mov r9,rsi2716 mov r10,rdx2717 2718 shl r10,62719 add r10,r92720 lea r14,[((K_XX_XX+64))]2721 2722 mov eax,DWORD[r8]2723 mov ebx,DWORD[4+r8]2724 mov ecx,DWORD[8+r8]2725 mov edx,DWORD[12+r8]2726 mov esi,ebx2727 mov ebp,DWORD[16+r8]2728 mov edi,ecx2729 xor edi,edx2730 and esi,edi2731 2732 vmovdqa xmm6,XMMWORD[64+r14]2733 vmovdqa xmm11,XMMWORD[((-64))+r14]2734 vmovdqu xmm0,XMMWORD[r9]2735 vmovdqu xmm1,XMMWORD[16+r9]2736 vmovdqu xmm2,XMMWORD[32+r9]2737 vmovdqu xmm3,XMMWORD[48+r9]2738 vpshufb xmm0,xmm0,xmm62739 add r9,642740 vpshufb xmm1,xmm1,xmm62741 vpshufb xmm2,xmm2,xmm62742 vpshufb xmm3,xmm3,xmm62743 vpaddd xmm4,xmm0,xmm112744 vpaddd xmm5,xmm1,xmm112745 vpaddd xmm6,xmm2,xmm112746 vmovdqa XMMWORD[rsp],xmm42747 vmovdqa XMMWORD[16+rsp],xmm52748 vmovdqa XMMWORD[32+rsp],xmm62749 jmp NEAR $L$oop_avx2750 ALIGN 162751 $L$oop_avx:2752 shrd ebx,ebx,22753 xor esi,edx2754 vpalignr xmm4,xmm1,xmm0,82755 mov edi,eax2756 add ebp,DWORD[rsp]2757 vpaddd xmm9,xmm11,xmm32758 xor ebx,ecx2759 shld eax,eax,52760 vpsrldq xmm8,xmm3,42761 add ebp,esi2762 and edi,ebx2763 vpxor xmm4,xmm4,xmm02764 xor ebx,ecx2765 add ebp,eax2766 vpxor xmm8,xmm8,xmm22767 shrd eax,eax,72768 xor edi,ecx2769 mov esi,ebp2770 add edx,DWORD[4+rsp]2771 vpxor xmm4,xmm4,xmm82772 xor eax,ebx2773 shld ebp,ebp,52774 vmovdqa XMMWORD[48+rsp],xmm92775 add edx,edi2776 and esi,eax2777 vpsrld xmm8,xmm4,312778 xor eax,ebx2779 add edx,ebp2780 shrd ebp,ebp,72781 xor esi,ebx2782 vpslldq xmm10,xmm4,122783 vpaddd xmm4,xmm4,xmm42784 mov edi,edx2785 add ecx,DWORD[8+rsp]2786 xor ebp,eax2787 shld edx,edx,52788 vpsrld xmm9,xmm10,302789 vpor xmm4,xmm4,xmm82790 add ecx,esi2791 and edi,ebp2792 xor ebp,eax2793 add ecx,edx2794 vpslld xmm10,xmm10,22795 vpxor xmm4,xmm4,xmm92796 shrd edx,edx,72797 xor edi,eax2798 mov esi,ecx2799 add ebx,DWORD[12+rsp]2800 vpxor xmm4,xmm4,xmm102801 xor edx,ebp2802 shld ecx,ecx,52803 add ebx,edi2804 and esi,edx2805 xor edx,ebp2806 add ebx,ecx2807 shrd ecx,ecx,72808 xor esi,ebp2809 vpalignr xmm5,xmm2,xmm1,82810 mov edi,ebx2811 add eax,DWORD[16+rsp]2812 vpaddd xmm9,xmm11,xmm42813 xor ecx,edx2814 shld ebx,ebx,52815 vpsrldq xmm8,xmm4,42816 add eax,esi2817 and edi,ecx2818 vpxor xmm5,xmm5,xmm12819 xor ecx,edx2820 add eax,ebx2821 vpxor xmm8,xmm8,xmm32822 shrd ebx,ebx,72823 xor edi,edx2824 mov esi,eax2825 add ebp,DWORD[20+rsp]2826 vpxor xmm5,xmm5,xmm82827 xor ebx,ecx2828 shld eax,eax,52829 vmovdqa XMMWORD[rsp],xmm92830 add ebp,edi2831 and esi,ebx2832 vpsrld xmm8,xmm5,312833 xor ebx,ecx2834 add ebp,eax2835 shrd eax,eax,72836 xor esi,ecx2837 vpslldq xmm10,xmm5,122838 vpaddd xmm5,xmm5,xmm52839 mov edi,ebp2840 add edx,DWORD[24+rsp]2841 xor eax,ebx2842 shld ebp,ebp,52843 vpsrld xmm9,xmm10,302844 vpor xmm5,xmm5,xmm82845 add edx,esi2846 and edi,eax2847 xor eax,ebx2848 add edx,ebp2849 vpslld xmm10,xmm10,22850 vpxor xmm5,xmm5,xmm92851 shrd ebp,ebp,72852 xor edi,ebx2853 mov esi,edx2854 add ecx,DWORD[28+rsp]2855 vpxor xmm5,xmm5,xmm102856 xor ebp,eax2857 shld edx,edx,52858 vmovdqa xmm11,XMMWORD[((-32))+r14]2859 add ecx,edi2860 and esi,ebp2861 xor ebp,eax2862 add ecx,edx2863 shrd edx,edx,72864 xor esi,eax2865 vpalignr xmm6,xmm3,xmm2,82866 mov edi,ecx2867 add ebx,DWORD[32+rsp]2868 vpaddd xmm9,xmm11,xmm52869 xor edx,ebp2870 shld ecx,ecx,52871 vpsrldq xmm8,xmm5,42872 add ebx,esi2873 and edi,edx2874 vpxor xmm6,xmm6,xmm22875 xor edx,ebp2876 add ebx,ecx2877 vpxor xmm8,xmm8,xmm42878 shrd ecx,ecx,72879 xor edi,ebp2880 mov esi,ebx2881 add eax,DWORD[36+rsp]2882 vpxor xmm6,xmm6,xmm82883 xor ecx,edx2884 shld ebx,ebx,52885 vmovdqa XMMWORD[16+rsp],xmm92886 add eax,edi2887 and esi,ecx2888 vpsrld xmm8,xmm6,312889 xor ecx,edx2890 add eax,ebx2891 shrd ebx,ebx,72892 xor esi,edx2893 vpslldq xmm10,xmm6,122894 vpaddd xmm6,xmm6,xmm62895 mov edi,eax2896 add ebp,DWORD[40+rsp]2897 xor ebx,ecx2898 shld eax,eax,52899 vpsrld xmm9,xmm10,302900 vpor xmm6,xmm6,xmm82901 add ebp,esi2902 and edi,ebx2903 xor ebx,ecx2904 add ebp,eax2905 vpslld xmm10,xmm10,22906 vpxor xmm6,xmm6,xmm92907 shrd eax,eax,72908 xor edi,ecx2909 mov esi,ebp2910 add edx,DWORD[44+rsp]2911 vpxor xmm6,xmm6,xmm102912 xor eax,ebx2913 shld ebp,ebp,52914 add edx,edi2915 and esi,eax2916 xor eax,ebx2917 add edx,ebp2918 shrd ebp,ebp,72919 xor esi,ebx2920 vpalignr xmm7,xmm4,xmm3,82921 mov edi,edx2922 add ecx,DWORD[48+rsp]2923 vpaddd xmm9,xmm11,xmm62924 xor ebp,eax2925 shld edx,edx,52926 vpsrldq xmm8,xmm6,42927 add ecx,esi2928 and edi,ebp2929 vpxor xmm7,xmm7,xmm32930 xor ebp,eax2931 add ecx,edx2932 vpxor xmm8,xmm8,xmm52933 shrd edx,edx,72934 xor edi,eax2935 mov esi,ecx2936 add ebx,DWORD[52+rsp]2937 vpxor xmm7,xmm7,xmm82938 xor edx,ebp2939 shld ecx,ecx,52940 vmovdqa XMMWORD[32+rsp],xmm92941 add ebx,edi2942 and esi,edx2943 vpsrld xmm8,xmm7,312944 xor edx,ebp2945 add ebx,ecx2946 shrd ecx,ecx,72947 xor esi,ebp2948 vpslldq xmm10,xmm7,122949 vpaddd xmm7,xmm7,xmm72950 mov edi,ebx2951 add eax,DWORD[56+rsp]2952 xor ecx,edx2953 shld ebx,ebx,52954 vpsrld xmm9,xmm10,302955 vpor xmm7,xmm7,xmm82956 add eax,esi2957 and edi,ecx2958 xor ecx,edx2959 add eax,ebx2960 vpslld xmm10,xmm10,22961 vpxor xmm7,xmm7,xmm92962 shrd ebx,ebx,72963 xor edi,edx2964 mov esi,eax2965 add ebp,DWORD[60+rsp]2966 vpxor xmm7,xmm7,xmm102967 xor ebx,ecx2968 shld eax,eax,52969 add ebp,edi2970 and esi,ebx2971 xor ebx,ecx2972 add ebp,eax2973 vpalignr xmm8,xmm7,xmm6,82974 vpxor xmm0,xmm0,xmm42975 shrd eax,eax,72976 xor esi,ecx2977 mov edi,ebp2978 add edx,DWORD[rsp]2979 vpxor xmm0,xmm0,xmm12980 xor eax,ebx2981 shld ebp,ebp,52982 vpaddd xmm9,xmm11,xmm72983 add edx,esi2984 and edi,eax2985 vpxor xmm0,xmm0,xmm82986 xor eax,ebx2987 add edx,ebp2988 shrd ebp,ebp,72989 xor edi,ebx2990 vpsrld xmm8,xmm0,302991 vmovdqa XMMWORD[48+rsp],xmm92992 mov esi,edx2993 add ecx,DWORD[4+rsp]2994 xor ebp,eax2995 shld edx,edx,52996 vpslld xmm0,xmm0,22997 add ecx,edi2998 and esi,ebp2999 xor ebp,eax3000 add ecx,edx3001 shrd edx,edx,73002 xor esi,eax3003 mov edi,ecx3004 add ebx,DWORD[8+rsp]3005 vpor xmm0,xmm0,xmm83006 xor edx,ebp3007 shld ecx,ecx,53008 add ebx,esi3009 and edi,edx3010 xor edx,ebp3011 add ebx,ecx3012 add eax,DWORD[12+rsp]3013 xor edi,ebp3014 mov esi,ebx3015 shld ebx,ebx,53016 add eax,edi3017 xor esi,edx3018 shrd ecx,ecx,73019 add eax,ebx3020 vpalignr xmm8,xmm0,xmm7,83021 vpxor xmm1,xmm1,xmm53022 add ebp,DWORD[16+rsp]3023 xor esi,ecx3024 mov edi,eax3025 shld eax,eax,53026 vpxor xmm1,xmm1,xmm23027 add ebp,esi3028 xor edi,ecx3029 vpaddd xmm9,xmm11,xmm03030 shrd ebx,ebx,73031 add ebp,eax3032 vpxor xmm1,xmm1,xmm83033 add edx,DWORD[20+rsp]3034 xor edi,ebx3035 mov esi,ebp3036 shld ebp,ebp,53037 vpsrld xmm8,xmm1,303038 vmovdqa XMMWORD[rsp],xmm93039 add edx,edi3040 xor esi,ebx3041 shrd eax,eax,73042 add edx,ebp3043 vpslld xmm1,xmm1,23044 add ecx,DWORD[24+rsp]3045 xor esi,eax3046 mov edi,edx3047 shld edx,edx,53048 add ecx,esi3049 xor edi,eax3050 shrd ebp,ebp,73051 add ecx,edx3052 vpor xmm1,xmm1,xmm83053 add ebx,DWORD[28+rsp]3054 xor edi,ebp3055 mov esi,ecx3056 shld ecx,ecx,53057 add ebx,edi3058 xor esi,ebp3059 shrd edx,edx,73060 add ebx,ecx3061 vpalignr xmm8,xmm1,xmm0,83062 vpxor xmm2,xmm2,xmm63063 add eax,DWORD[32+rsp]3064 xor esi,edx3065 mov edi,ebx3066 shld ebx,ebx,53067 vpxor xmm2,xmm2,xmm33068 add eax,esi3069 xor edi,edx3070 vpaddd xmm9,xmm11,xmm13071 vmovdqa xmm11,XMMWORD[r14]3072 shrd ecx,ecx,73073 add eax,ebx3074 vpxor xmm2,xmm2,xmm83075 add ebp,DWORD[36+rsp]3076 xor edi,ecx3077 mov esi,eax3078 shld eax,eax,53079 vpsrld xmm8,xmm2,303080 vmovdqa XMMWORD[16+rsp],xmm93081 add ebp,edi3082 xor esi,ecx3083 shrd ebx,ebx,73084 add ebp,eax3085 vpslld xmm2,xmm2,23086 add edx,DWORD[40+rsp]3087 xor esi,ebx3088 mov edi,ebp3089 shld ebp,ebp,53090 add edx,esi3091 xor edi,ebx3092 shrd eax,eax,73093 add edx,ebp3094 vpor xmm2,xmm2,xmm83095 add ecx,DWORD[44+rsp]3096 xor edi,eax3097 mov esi,edx3098 shld edx,edx,53099 add ecx,edi3100 xor esi,eax3101 shrd ebp,ebp,73102 add ecx,edx3103 vpalignr xmm8,xmm2,xmm1,83104 vpxor xmm3,xmm3,xmm73105 add ebx,DWORD[48+rsp]3106 xor esi,ebp3107 mov edi,ecx3108 shld ecx,ecx,53109 vpxor xmm3,xmm3,xmm43110 add ebx,esi3111 xor edi,ebp3112 vpaddd xmm9,xmm11,xmm23113 shrd edx,edx,73114 add ebx,ecx3115 vpxor xmm3,xmm3,xmm83116 add eax,DWORD[52+rsp]3117 xor edi,edx3118 mov esi,ebx3119 shld ebx,ebx,53120 vpsrld xmm8,xmm3,303121 vmovdqa XMMWORD[32+rsp],xmm93122 add eax,edi3123 xor esi,edx3124 shrd ecx,ecx,73125 add eax,ebx3126 vpslld xmm3,xmm3,23127 add ebp,DWORD[56+rsp]3128 xor esi,ecx3129 mov edi,eax3130 shld eax,eax,53131 add ebp,esi3132 xor edi,ecx3133 shrd ebx,ebx,73134 add ebp,eax3135 vpor xmm3,xmm3,xmm83136 add edx,DWORD[60+rsp]3137 xor edi,ebx3138 mov esi,ebp3139 shld ebp,ebp,53140 add edx,edi3141 xor esi,ebx3142 shrd eax,eax,73143 add edx,ebp3144 vpalignr xmm8,xmm3,xmm2,83145 vpxor xmm4,xmm4,xmm03146 add ecx,DWORD[rsp]3147 xor esi,eax3148 mov edi,edx3149 shld edx,edx,53150 vpxor xmm4,xmm4,xmm53151 add ecx,esi3152 xor edi,eax3153 vpaddd xmm9,xmm11,xmm33154 shrd ebp,ebp,73155 add ecx,edx3156 vpxor xmm4,xmm4,xmm83157 add ebx,DWORD[4+rsp]3158 xor edi,ebp3159 mov esi,ecx3160 shld ecx,ecx,53161 vpsrld xmm8,xmm4,303162 vmovdqa XMMWORD[48+rsp],xmm93163 add ebx,edi3164 xor esi,ebp3165 shrd edx,edx,73166 add ebx,ecx3167 vpslld xmm4,xmm4,23168 add eax,DWORD[8+rsp]3169 xor esi,edx3170 mov edi,ebx3171 shld ebx,ebx,53172 add eax,esi3173 xor edi,edx3174 shrd ecx,ecx,73175 add eax,ebx3176 vpor xmm4,xmm4,xmm83177 add ebp,DWORD[12+rsp]3178 xor edi,ecx3179 mov esi,eax3180 shld eax,eax,53181 add ebp,edi3182 xor esi,ecx3183 shrd ebx,ebx,73184 add ebp,eax3185 vpalignr xmm8,xmm4,xmm3,83186 vpxor xmm5,xmm5,xmm13187 add edx,DWORD[16+rsp]3188 xor esi,ebx3189 mov edi,ebp3190 shld ebp,ebp,53191 vpxor xmm5,xmm5,xmm63192 add edx,esi3193 xor edi,ebx3194 vpaddd xmm9,xmm11,xmm43195 shrd eax,eax,73196 add edx,ebp3197 vpxor xmm5,xmm5,xmm83198 add ecx,DWORD[20+rsp]3199 xor edi,eax3200 mov esi,edx3201 shld edx,edx,53202 vpsrld xmm8,xmm5,303203 vmovdqa XMMWORD[rsp],xmm93204 add ecx,edi3205 xor esi,eax3206 shrd ebp,ebp,73207 add ecx,edx3208 vpslld xmm5,xmm5,23209 add ebx,DWORD[24+rsp]3210 xor esi,ebp3211 mov edi,ecx3212 shld ecx,ecx,53213 add ebx,esi3214 xor edi,ebp3215 shrd edx,edx,73216 add ebx,ecx3217 vpor xmm5,xmm5,xmm83218 add eax,DWORD[28+rsp]3219 shrd ecx,ecx,73220 mov esi,ebx3221 xor edi,edx3222 shld ebx,ebx,53223 add eax,edi3224 xor esi,ecx3225 xor ecx,edx3226 add eax,ebx3227 vpalignr xmm8,xmm5,xmm4,83228 vpxor xmm6,xmm6,xmm23229 add ebp,DWORD[32+rsp]3230 and esi,ecx3231 xor ecx,edx3232 shrd ebx,ebx,73233 vpxor xmm6,xmm6,xmm73234 mov edi,eax3235 xor esi,ecx3236 vpaddd xmm9,xmm11,xmm53237 shld eax,eax,53238 add ebp,esi3239 vpxor xmm6,xmm6,xmm83240 xor edi,ebx3241 xor ebx,ecx3242 add ebp,eax3243 add edx,DWORD[36+rsp]3244 vpsrld xmm8,xmm6,303245 vmovdqa XMMWORD[16+rsp],xmm93246 and edi,ebx3247 xor ebx,ecx3248 shrd eax,eax,73249 mov esi,ebp3250 vpslld xmm6,xmm6,23251 xor edi,ebx3252 shld ebp,ebp,53253 add edx,edi3254 xor esi,eax3255 xor eax,ebx3256 add edx,ebp3257 add ecx,DWORD[40+rsp]3258 and esi,eax3259 vpor xmm6,xmm6,xmm83260 xor eax,ebx3261 shrd ebp,ebp,73262 mov edi,edx3263 xor esi,eax3264 shld edx,edx,53265 add ecx,esi3266 xor edi,ebp3267 xor ebp,eax3268 add ecx,edx3269 add ebx,DWORD[44+rsp]3270 and edi,ebp3271 xor ebp,eax3272 shrd edx,edx,73273 mov esi,ecx3274 xor edi,ebp3275 shld ecx,ecx,53276 add ebx,edi3277 xor esi,edx3278 xor edx,ebp3279 add ebx,ecx3280 vpalignr xmm8,xmm6,xmm5,83281 vpxor xmm7,xmm7,xmm33282 add eax,DWORD[48+rsp]3283 and esi,edx3284 xor edx,ebp3285 shrd ecx,ecx,73286 vpxor xmm7,xmm7,xmm03287 mov edi,ebx3288 xor esi,edx3289 vpaddd xmm9,xmm11,xmm63290 vmovdqa xmm11,XMMWORD[32+r14]3291 shld ebx,ebx,53292 add eax,esi3293 vpxor xmm7,xmm7,xmm83294 xor edi,ecx3295 xor ecx,edx3296 add eax,ebx3297 add ebp,DWORD[52+rsp]3298 vpsrld xmm8,xmm7,303299 vmovdqa XMMWORD[32+rsp],xmm93300 and edi,ecx3301 xor ecx,edx3302 shrd ebx,ebx,73303 mov esi,eax3304 vpslld xmm7,xmm7,23305 xor edi,ecx3306 shld eax,eax,53307 add ebp,edi3308 xor esi,ebx3309 xor ebx,ecx3310 add ebp,eax3311 add edx,DWORD[56+rsp]3312 and esi,ebx3313 vpor xmm7,xmm7,xmm83314 xor ebx,ecx3315 shrd eax,eax,73316 mov edi,ebp3317 xor esi,ebx3318 shld ebp,ebp,53319 add edx,esi3320 xor edi,eax3321 xor eax,ebx3322 add edx,ebp3323 add ecx,DWORD[60+rsp]3324 and edi,eax3325 xor eax,ebx3326 shrd ebp,ebp,73327 mov esi,edx3328 xor edi,eax3329 shld edx,edx,53330 add ecx,edi3331 xor esi,ebp3332 xor ebp,eax3333 add ecx,edx3334 vpalignr xmm8,xmm7,xmm6,83335 vpxor xmm0,xmm0,xmm43336 add ebx,DWORD[rsp]3337 and esi,ebp3338 xor ebp,eax3339 shrd edx,edx,73340 vpxor xmm0,xmm0,xmm13341 mov edi,ecx3342 xor esi,ebp3343 vpaddd xmm9,xmm11,xmm73344 shld ecx,ecx,53345 add ebx,esi3346 vpxor xmm0,xmm0,xmm83347 xor edi,edx3348 xor edx,ebp3349 add ebx,ecx3350 add eax,DWORD[4+rsp]3351 vpsrld xmm8,xmm0,303352 vmovdqa XMMWORD[48+rsp],xmm93353 and edi,edx3354 xor edx,ebp3355 shrd ecx,ecx,73356 mov esi,ebx3357 vpslld xmm0,xmm0,23358 xor edi,edx3359 shld ebx,ebx,53360 add eax,edi3361 xor esi,ecx3362 xor ecx,edx3363 add eax,ebx3364 add ebp,DWORD[8+rsp]3365 and esi,ecx3366 vpor xmm0,xmm0,xmm83367 xor ecx,edx3368 shrd ebx,ebx,73369 mov edi,eax3370 xor esi,ecx3371 shld eax,eax,53372 add ebp,esi3373 xor edi,ebx3374 xor ebx,ecx3375 add ebp,eax3376 add edx,DWORD[12+rsp]3377 and edi,ebx3378 xor ebx,ecx3379 shrd eax,eax,73380 mov esi,ebp3381 xor edi,ebx3382 shld ebp,ebp,53383 add edx,edi3384 xor esi,eax3385 xor eax,ebx3386 add edx,ebp3387 vpalignr xmm8,xmm0,xmm7,83388 vpxor xmm1,xmm1,xmm53389 add ecx,DWORD[16+rsp]3390 and esi,eax3391 xor eax,ebx3392 shrd ebp,ebp,73393 vpxor xmm1,xmm1,xmm23394 mov edi,edx3395 xor esi,eax3396 vpaddd xmm9,xmm11,xmm03397 shld edx,edx,53398 add ecx,esi3399 vpxor xmm1,xmm1,xmm83400 xor edi,ebp3401 xor ebp,eax3402 add ecx,edx3403 add ebx,DWORD[20+rsp]3404 vpsrld xmm8,xmm1,303405 vmovdqa XMMWORD[rsp],xmm93406 and edi,ebp3407 xor ebp,eax3408 shrd edx,edx,73409 mov esi,ecx3410 vpslld xmm1,xmm1,23411 xor edi,ebp3412 shld ecx,ecx,53413 add ebx,edi3414 xor esi,edx3415 xor edx,ebp3416 add ebx,ecx3417 add eax,DWORD[24+rsp]3418 and esi,edx3419 vpor xmm1,xmm1,xmm83420 xor edx,ebp3421 shrd ecx,ecx,73422 mov edi,ebx3423 xor esi,edx3424 shld ebx,ebx,53425 add eax,esi3426 xor edi,ecx3427 xor ecx,edx3428 add eax,ebx3429 add ebp,DWORD[28+rsp]3430 and edi,ecx3431 xor ecx,edx3432 shrd ebx,ebx,73433 mov esi,eax3434 xor edi,ecx3435 shld eax,eax,53436 add ebp,edi3437 xor esi,ebx3438 xor ebx,ecx3439 add ebp,eax3440 vpalignr xmm8,xmm1,xmm0,83441 vpxor xmm2,xmm2,xmm63442 add edx,DWORD[32+rsp]3443 and esi,ebx3444 xor ebx,ecx3445 shrd eax,eax,73446 vpxor xmm2,xmm2,xmm33447 mov edi,ebp3448 xor esi,ebx3449 vpaddd xmm9,xmm11,xmm13450 shld ebp,ebp,53451 add edx,esi3452 vpxor xmm2,xmm2,xmm83453 xor edi,eax3454 xor eax,ebx3455 add edx,ebp3456 add ecx,DWORD[36+rsp]3457 vpsrld xmm8,xmm2,303458 vmovdqa XMMWORD[16+rsp],xmm93459 and edi,eax3460 xor eax,ebx3461 shrd ebp,ebp,73462 mov esi,edx3463 vpslld xmm2,xmm2,23464 xor edi,eax3465 shld edx,edx,53466 add ecx,edi3467 xor esi,ebp3468 xor ebp,eax3469 add ecx,edx3470 add ebx,DWORD[40+rsp]3471 and esi,ebp3472 vpor xmm2,xmm2,xmm83473 xor ebp,eax3474 shrd edx,edx,73475 mov edi,ecx3476 xor esi,ebp3477 shld ecx,ecx,53478 add ebx,esi3479 xor edi,edx3480 xor edx,ebp3481 add ebx,ecx3482 add eax,DWORD[44+rsp]3483 and edi,edx3484 xor edx,ebp3485 shrd ecx,ecx,73486 mov esi,ebx3487 xor edi,edx3488 shld ebx,ebx,53489 add eax,edi3490 xor esi,edx3491 add eax,ebx3492 vpalignr xmm8,xmm2,xmm1,83493 vpxor xmm3,xmm3,xmm73494 add ebp,DWORD[48+rsp]3495 xor esi,ecx3496 mov edi,eax3497 shld eax,eax,53498 vpxor xmm3,xmm3,xmm43499 add ebp,esi3500 xor edi,ecx3501 vpaddd xmm9,xmm11,xmm23502 shrd ebx,ebx,73503 add ebp,eax3504 vpxor xmm3,xmm3,xmm83505 add edx,DWORD[52+rsp]3506 xor edi,ebx3507 mov esi,ebp3508 shld ebp,ebp,53509 vpsrld xmm8,xmm3,303510 vmovdqa XMMWORD[32+rsp],xmm93511 add edx,edi3512 xor esi,ebx3513 shrd eax,eax,73514 add edx,ebp3515 vpslld xmm3,xmm3,23516 add ecx,DWORD[56+rsp]3517 xor esi,eax3518 mov edi,edx3519 shld edx,edx,53520 add ecx,esi3521 xor edi,eax3522 shrd ebp,ebp,73523 add ecx,edx3524 vpor xmm3,xmm3,xmm83525 add ebx,DWORD[60+rsp]3526 xor edi,ebp3527 mov esi,ecx3528 shld ecx,ecx,53529 add ebx,edi3530 xor esi,ebp3531 shrd edx,edx,73532 add ebx,ecx3533 add eax,DWORD[rsp]3534 vpaddd xmm9,xmm11,xmm33535 xor esi,edx3536 mov edi,ebx3537 shld ebx,ebx,53538 add eax,esi3539 vmovdqa XMMWORD[48+rsp],xmm93540 xor edi,edx3541 shrd ecx,ecx,73542 add eax,ebx3543 add ebp,DWORD[4+rsp]3544 xor edi,ecx3545 mov esi,eax3546 shld eax,eax,53547 add ebp,edi3548 xor esi,ecx3549 shrd ebx,ebx,73550 add ebp,eax3551 add edx,DWORD[8+rsp]3552 xor esi,ebx3553 mov edi,ebp3554 shld ebp,ebp,53555 add edx,esi3556 xor edi,ebx3557 shrd eax,eax,73558 add edx,ebp3559 add ecx,DWORD[12+rsp]3560 xor edi,eax3561 mov esi,edx3562 shld edx,edx,53563 add ecx,edi3564 xor esi,eax3565 shrd ebp,ebp,73566 add ecx,edx3567 cmp r9,r103568 je NEAR $L$done_avx3569 vmovdqa xmm6,XMMWORD[64+r14]3570 vmovdqa xmm11,XMMWORD[((-64))+r14]3571 vmovdqu xmm0,XMMWORD[r9]3572 vmovdqu xmm1,XMMWORD[16+r9]3573 vmovdqu xmm2,XMMWORD[32+r9]3574 vmovdqu xmm3,XMMWORD[48+r9]3575 vpshufb xmm0,xmm0,xmm63576 add r9,643577 add ebx,DWORD[16+rsp]3578 xor esi,ebp3579 vpshufb xmm1,xmm1,xmm63580 mov edi,ecx3581 shld ecx,ecx,53582 vpaddd xmm4,xmm0,xmm113583 add ebx,esi3584 xor edi,ebp3585 shrd edx,edx,73586 add ebx,ecx3587 vmovdqa XMMWORD[rsp],xmm43588 add eax,DWORD[20+rsp]3589 xor edi,edx3590 mov esi,ebx3591 shld ebx,ebx,53592 add eax,edi3593 xor esi,edx3594 shrd ecx,ecx,73595 add eax,ebx3596 add ebp,DWORD[24+rsp]3597 xor esi,ecx3598 mov edi,eax3599 shld eax,eax,53600 add ebp,esi3601 xor edi,ecx3602 shrd ebx,ebx,73603 add ebp,eax3604 add edx,DWORD[28+rsp]3605 xor edi,ebx3606 mov esi,ebp3607 shld ebp,ebp,53608 add edx,edi3609 xor esi,ebx3610 shrd eax,eax,73611 add edx,ebp3612 add ecx,DWORD[32+rsp]3613 xor esi,eax3614 vpshufb xmm2,xmm2,xmm63615 mov edi,edx3616 shld edx,edx,53617 vpaddd xmm5,xmm1,xmm113618 add ecx,esi3619 xor edi,eax3620 shrd ebp,ebp,73621 add ecx,edx3622 vmovdqa XMMWORD[16+rsp],xmm53623 add ebx,DWORD[36+rsp]3624 xor edi,ebp3625 mov esi,ecx3626 shld ecx,ecx,53627 add ebx,edi3628 xor esi,ebp3629 shrd edx,edx,73630 add ebx,ecx3631 add eax,DWORD[40+rsp]3632 xor esi,edx3633 mov edi,ebx3634 shld ebx,ebx,53635 add eax,esi3636 xor edi,edx3637 shrd ecx,ecx,73638 add eax,ebx3639 add ebp,DWORD[44+rsp]3640 xor edi,ecx3641 mov esi,eax3642 shld eax,eax,53643 add ebp,edi3644 xor esi,ecx3645 shrd ebx,ebx,73646 add ebp,eax3647 add edx,DWORD[48+rsp]3648 xor esi,ebx3649 vpshufb xmm3,xmm3,xmm63650 mov edi,ebp3651 shld ebp,ebp,53652 vpaddd xmm6,xmm2,xmm113653 add edx,esi3654 xor edi,ebx3655 shrd eax,eax,73656 add edx,ebp3657 vmovdqa XMMWORD[32+rsp],xmm63658 add ecx,DWORD[52+rsp]3659 xor edi,eax3660 mov esi,edx3661 shld edx,edx,53662 add ecx,edi3663 xor esi,eax3664 shrd ebp,ebp,73665 add ecx,edx3666 add ebx,DWORD[56+rsp]3667 xor esi,ebp3668 mov edi,ecx3669 shld ecx,ecx,53670 add ebx,esi3671 xor edi,ebp3672 shrd edx,edx,73673 add ebx,ecx3674 add eax,DWORD[60+rsp]3675 xor edi,edx3676 mov esi,ebx3677 shld ebx,ebx,53678 add eax,edi3679 shrd ecx,ecx,73680 add eax,ebx3681 add eax,DWORD[r8]3682 add esi,DWORD[4+r8]3683 add ecx,DWORD[8+r8]3684 add edx,DWORD[12+r8]3685 mov DWORD[r8],eax3686 add ebp,DWORD[16+r8]3687 mov DWORD[4+r8],esi3688 mov ebx,esi3689 mov DWORD[8+r8],ecx3690 mov edi,ecx3691 mov DWORD[12+r8],edx3692 xor edi,edx3693 mov DWORD[16+r8],ebp3694 and esi,edi3695 jmp NEAR $L$oop_avx3696 3697 ALIGN 163698 $L$done_avx:3699 add ebx,DWORD[16+rsp]3700 xor esi,ebp3701 mov edi,ecx3702 shld ecx,ecx,53703 add ebx,esi3704 xor edi,ebp3705 shrd edx,edx,73706 add ebx,ecx3707 add eax,DWORD[20+rsp]3708 xor edi,edx3709 mov esi,ebx3710 shld ebx,ebx,53711 add eax,edi3712 xor esi,edx3713 shrd ecx,ecx,73714 add eax,ebx3715 add ebp,DWORD[24+rsp]3716 xor esi,ecx3717 mov edi,eax3718 shld eax,eax,53719 add ebp,esi3720 xor edi,ecx3721 shrd ebx,ebx,73722 add ebp,eax3723 add edx,DWORD[28+rsp]3724 xor edi,ebx3725 mov esi,ebp3726 shld ebp,ebp,53727 add edx,edi3728 xor esi,ebx3729 shrd eax,eax,73730 add edx,ebp3731 add ecx,DWORD[32+rsp]3732 xor esi,eax3733 mov edi,edx3734 shld edx,edx,53735 add ecx,esi3736 xor edi,eax3737 shrd ebp,ebp,73738 add ecx,edx3739 add ebx,DWORD[36+rsp]3740 xor edi,ebp3741 mov esi,ecx3742 shld ecx,ecx,53743 add ebx,edi3744 xor esi,ebp3745 shrd edx,edx,73746 add ebx,ecx3747 add eax,DWORD[40+rsp]3748 xor esi,edx3749 mov edi,ebx3750 shld ebx,ebx,53751 add eax,esi3752 xor edi,edx3753 shrd ecx,ecx,73754 add eax,ebx3755 add ebp,DWORD[44+rsp]3756 xor edi,ecx3757 mov esi,eax3758 shld eax,eax,53759 add ebp,edi3760 xor esi,ecx3761 shrd ebx,ebx,73762 add ebp,eax3763 add edx,DWORD[48+rsp]3764 xor esi,ebx3765 mov edi,ebp3766 shld ebp,ebp,53767 add edx,esi3768 xor edi,ebx3769 shrd eax,eax,73770 add edx,ebp3771 add ecx,DWORD[52+rsp]3772 xor edi,eax3773 mov esi,edx3774 shld edx,edx,53775 add ecx,edi3776 xor esi,eax3777 shrd ebp,ebp,73778 add ecx,edx3779 add ebx,DWORD[56+rsp]3780 xor esi,ebp3781 mov edi,ecx3782 shld ecx,ecx,53783 add ebx,esi3784 xor edi,ebp3785 shrd edx,edx,73786 add ebx,ecx3787 add eax,DWORD[60+rsp]3788 xor edi,edx3789 mov esi,ebx3790 shld ebx,ebx,53791 add eax,edi3792 shrd ecx,ecx,73793 add eax,ebx3794 vzeroupper3795 3796 add eax,DWORD[r8]3797 add esi,DWORD[4+r8]3798 add ecx,DWORD[8+r8]3799 mov DWORD[r8],eax3800 add edx,DWORD[12+r8]3801 mov DWORD[4+r8],esi3802 add ebp,DWORD[16+r8]3803 mov DWORD[8+r8],ecx3804 mov DWORD[12+r8],edx3805 mov DWORD[16+r8],ebp3806 movaps xmm6,XMMWORD[((-40-96))+r11]3807 movaps xmm7,XMMWORD[((-40-80))+r11]3808 movaps xmm8,XMMWORD[((-40-64))+r11]3809 movaps xmm9,XMMWORD[((-40-48))+r11]3810 movaps xmm10,XMMWORD[((-40-32))+r11]3811 movaps xmm11,XMMWORD[((-40-16))+r11]3812 mov r14,QWORD[((-40))+r11]3813 3814 mov r13,QWORD[((-32))+r11]3815 3816 mov r12,QWORD[((-24))+r11]3817 3818 mov rbp,QWORD[((-16))+r11]3819 3820 mov rbx,QWORD[((-8))+r11]3821 3822 lea rsp,[r11]3823 3824 $L$epilogue_avx:3825 mov rdi,QWORD[8+rsp] ;WIN64 epilogue3826 mov rsi,QWORD[16+rsp]3827 DB 0F3h,0C3h ;repret3828 3829 $L$SEH_end_sha1_block_data_order_avx:3830 3831 ALIGN 163832 sha1_block_data_order_avx2:3833 mov QWORD[8+rsp],rdi ;WIN64 prologue3834 mov QWORD[16+rsp],rsi3835 mov rax,rsp3836 $L$SEH_begin_sha1_block_data_order_avx2:3837 mov rdi,rcx3838 mov rsi,rdx3839 mov rdx,r83840 3841 3842 _avx2_shortcut:3843 3844 mov r11,rsp3845 3846 push rbx3847 3848 push rbp3849 3850 push r123851 3852 push r133853 3854 push r143855 3856 vzeroupper3857 lea rsp,[((-96))+rsp]3858 vmovaps XMMWORD[(-40-96)+r11],xmm63859 vmovaps XMMWORD[(-40-80)+r11],xmm73860 vmovaps XMMWORD[(-40-64)+r11],xmm83861 vmovaps XMMWORD[(-40-48)+r11],xmm93862 vmovaps XMMWORD[(-40-32)+r11],xmm103863 vmovaps XMMWORD[(-40-16)+r11],xmm113864 $L$prologue_avx2:3865 mov r8,rdi3866 mov r9,rsi3867 mov r10,rdx3868 3869 lea rsp,[((-640))+rsp]3870 shl r10,63871 lea r13,[64+r9]3872 and rsp,-1283873 add r10,r93874 lea r14,[((K_XX_XX+64))]3875 3876 mov eax,DWORD[r8]3877 cmp r13,r103878 cmovae r13,r93879 mov ebp,DWORD[4+r8]3880 mov ecx,DWORD[8+r8]3881 mov edx,DWORD[12+r8]3882 mov esi,DWORD[16+r8]3883 vmovdqu ymm6,YMMWORD[64+r14]3884 3885 vmovdqu xmm0,XMMWORD[r9]3886 vmovdqu xmm1,XMMWORD[16+r9]3887 vmovdqu xmm2,XMMWORD[32+r9]3888 vmovdqu xmm3,XMMWORD[48+r9]3889 lea r9,[64+r9]3890 vinserti128 ymm0,ymm0,XMMWORD[r13],13891 vinserti128 ymm1,ymm1,XMMWORD[16+r13],13892 vpshufb ymm0,ymm0,ymm63893 vinserti128 ymm2,ymm2,XMMWORD[32+r13],13894 vpshufb ymm1,ymm1,ymm63895 vinserti128 ymm3,ymm3,XMMWORD[48+r13],13896 vpshufb ymm2,ymm2,ymm63897 vmovdqu ymm11,YMMWORD[((-64))+r14]3898 vpshufb ymm3,ymm3,ymm63899 3900 vpaddd ymm4,ymm0,ymm113901 vpaddd ymm5,ymm1,ymm113902 vmovdqu YMMWORD[rsp],ymm43903 vpaddd ymm6,ymm2,ymm113904 vmovdqu YMMWORD[32+rsp],ymm53905 vpaddd ymm7,ymm3,ymm113906 vmovdqu YMMWORD[64+rsp],ymm63907 vmovdqu YMMWORD[96+rsp],ymm73908 vpalignr ymm4,ymm1,ymm0,83909 vpsrldq ymm8,ymm3,43910 vpxor ymm4,ymm4,ymm03911 vpxor ymm8,ymm8,ymm23912 vpxor ymm4,ymm4,ymm83913 vpsrld ymm8,ymm4,313914 vpslldq ymm10,ymm4,123915 vpaddd ymm4,ymm4,ymm43916 vpsrld ymm9,ymm10,303917 vpor ymm4,ymm4,ymm83918 vpslld ymm10,ymm10,23919 vpxor ymm4,ymm4,ymm93920 vpxor ymm4,ymm4,ymm103921 vpaddd ymm9,ymm4,ymm113922 vmovdqu YMMWORD[128+rsp],ymm93923 vpalignr ymm5,ymm2,ymm1,83924 vpsrldq ymm8,ymm4,43925 vpxor ymm5,ymm5,ymm13926 vpxor ymm8,ymm8,ymm33927 vpxor ymm5,ymm5,ymm83928 vpsrld ymm8,ymm5,313929 vmovdqu ymm11,YMMWORD[((-32))+r14]3930 vpslldq ymm10,ymm5,123931 vpaddd ymm5,ymm5,ymm53932 vpsrld ymm9,ymm10,303933 vpor ymm5,ymm5,ymm83934 vpslld ymm10,ymm10,23935 vpxor ymm5,ymm5,ymm93936 vpxor ymm5,ymm5,ymm103937 vpaddd ymm9,ymm5,ymm113938 vmovdqu YMMWORD[160+rsp],ymm93939 vpalignr ymm6,ymm3,ymm2,83940 vpsrldq ymm8,ymm5,43941 vpxor ymm6,ymm6,ymm23942 vpxor ymm8,ymm8,ymm43943 vpxor ymm6,ymm6,ymm83944 vpsrld ymm8,ymm6,313945 vpslldq ymm10,ymm6,123946 vpaddd ymm6,ymm6,ymm63947 vpsrld ymm9,ymm10,303948 vpor ymm6,ymm6,ymm83949 vpslld ymm10,ymm10,23950 vpxor ymm6,ymm6,ymm93951 vpxor ymm6,ymm6,ymm103952 vpaddd ymm9,ymm6,ymm113953 vmovdqu YMMWORD[192+rsp],ymm93954 vpalignr ymm7,ymm4,ymm3,83955 vpsrldq ymm8,ymm6,43956 vpxor ymm7,ymm7,ymm33957 vpxor ymm8,ymm8,ymm53958 vpxor ymm7,ymm7,ymm83959 vpsrld ymm8,ymm7,313960 vpslldq ymm10,ymm7,123961 vpaddd ymm7,ymm7,ymm73962 vpsrld ymm9,ymm10,303963 vpor ymm7,ymm7,ymm83964 vpslld ymm10,ymm10,23965 vpxor ymm7,ymm7,ymm93966 vpxor ymm7,ymm7,ymm103967 vpaddd ymm9,ymm7,ymm113968 vmovdqu YMMWORD[224+rsp],ymm93969 lea r13,[128+rsp]3970 jmp NEAR $L$oop_avx23971 ALIGN 323972 $L$oop_avx2:3973 rorx ebx,ebp,23974 andn edi,ebp,edx3975 and ebp,ecx3976 xor ebp,edi3977 jmp NEAR $L$align32_13978 ALIGN 323979 $L$align32_1:3980 vpalignr ymm8,ymm7,ymm6,83981 vpxor ymm0,ymm0,ymm43982 add esi,DWORD[((-128))+r13]3983 andn edi,eax,ecx3984 vpxor ymm0,ymm0,ymm13985 add esi,ebp3986 rorx r12d,eax,273987 rorx ebp,eax,23988 vpxor ymm0,ymm0,ymm83989 and eax,ebx3990 add esi,r12d3991 xor eax,edi3992 vpsrld ymm8,ymm0,303993 vpslld ymm0,ymm0,23994 add edx,DWORD[((-124))+r13]3995 andn edi,esi,ebx3996 add edx,eax3997 rorx r12d,esi,273998 rorx eax,esi,23999 and esi,ebp4000 vpor ymm0,ymm0,ymm84001 add edx,r12d4002 xor esi,edi4003 add ecx,DWORD[((-120))+r13]4004 andn edi,edx,ebp4005 vpaddd ymm9,ymm0,ymm114006 add ecx,esi4007 rorx r12d,edx,274008 rorx esi,edx,24009 and edx,eax4010 vmovdqu YMMWORD[256+rsp],ymm94011 add ecx,r12d4012 xor edx,edi4013 add ebx,DWORD[((-116))+r13]4014 andn edi,ecx,eax4015 add ebx,edx4016 rorx r12d,ecx,274017 rorx edx,ecx,24018 and ecx,esi4019 add ebx,r12d4020 xor ecx,edi4021 add ebp,DWORD[((-96))+r13]4022 andn edi,ebx,esi4023 add ebp,ecx4024 rorx r12d,ebx,274025 rorx ecx,ebx,24026 and ebx,edx4027 add ebp,r12d4028 xor ebx,edi4029 vpalignr ymm8,ymm0,ymm7,84030 vpxor ymm1,ymm1,ymm54031 add eax,DWORD[((-92))+r13]4032 andn edi,ebp,edx4033 vpxor ymm1,ymm1,ymm24034 add eax,ebx4035 rorx r12d,ebp,274036 rorx ebx,ebp,24037 vpxor ymm1,ymm1,ymm84038 and ebp,ecx4039 add eax,r12d4040 xor ebp,edi4041 vpsrld ymm8,ymm1,304042 vpslld ymm1,ymm1,24043 add esi,DWORD[((-88))+r13]4044 andn edi,eax,ecx4045 add esi,ebp4046 rorx r12d,eax,274047 rorx ebp,eax,24048 and eax,ebx4049 vpor ymm1,ymm1,ymm84050 add esi,r12d4051 xor eax,edi4052 add edx,DWORD[((-84))+r13]4053 andn edi,esi,ebx4054 vpaddd ymm9,ymm1,ymm114055 add edx,eax4056 rorx r12d,esi,274057 rorx eax,esi,24058 and esi,ebp4059 vmovdqu YMMWORD[288+rsp],ymm94060 add edx,r12d4061 xor esi,edi4062 add ecx,DWORD[((-64))+r13]4063 andn edi,edx,ebp4064 add ecx,esi4065 rorx r12d,edx,274066 rorx esi,edx,24067 and edx,eax4068 add ecx,r12d4069 xor edx,edi4070 add ebx,DWORD[((-60))+r13]4071 andn edi,ecx,eax4072 add ebx,edx4073 rorx r12d,ecx,274074 rorx edx,ecx,24075 and ecx,esi4076 add ebx,r12d4077 xor ecx,edi4078 vpalignr ymm8,ymm1,ymm0,84079 vpxor ymm2,ymm2,ymm64080 add ebp,DWORD[((-56))+r13]4081 andn edi,ebx,esi4082 vpxor ymm2,ymm2,ymm34083 vmovdqu ymm11,YMMWORD[r14]4084 add ebp,ecx4085 rorx r12d,ebx,274086 rorx ecx,ebx,24087 vpxor ymm2,ymm2,ymm84088 and ebx,edx4089 add ebp,r12d4090 xor ebx,edi4091 vpsrld ymm8,ymm2,304092 vpslld ymm2,ymm2,24093 add eax,DWORD[((-52))+r13]4094 andn edi,ebp,edx4095 add eax,ebx4096 rorx r12d,ebp,274097 rorx ebx,ebp,24098 and ebp,ecx4099 vpor ymm2,ymm2,ymm84100 add eax,r12d4101 xor ebp,edi4102 add esi,DWORD[((-32))+r13]4103 andn edi,eax,ecx4104 vpaddd ymm9,ymm2,ymm114105 add esi,ebp4106 rorx r12d,eax,274107 rorx ebp,eax,24108 and eax,ebx4109 vmovdqu YMMWORD[320+rsp],ymm94110 add esi,r12d4111 xor eax,edi4112 add edx,DWORD[((-28))+r13]4113 andn edi,esi,ebx4114 add edx,eax4115 rorx r12d,esi,274116 rorx eax,esi,24117 and esi,ebp4118 add edx,r12d4119 xor esi,edi4120 add ecx,DWORD[((-24))+r13]4121 andn edi,edx,ebp4122 add ecx,esi4123 rorx r12d,edx,274124 rorx esi,edx,24125 and edx,eax4126 add ecx,r12d4127 xor edx,edi4128 vpalignr ymm8,ymm2,ymm1,84129 vpxor ymm3,ymm3,ymm74130 add ebx,DWORD[((-20))+r13]4131 andn edi,ecx,eax4132 vpxor ymm3,ymm3,ymm44133 add ebx,edx4134 rorx r12d,ecx,274135 rorx edx,ecx,24136 vpxor ymm3,ymm3,ymm84137 and ecx,esi4138 add ebx,r12d4139 xor ecx,edi4140 vpsrld ymm8,ymm3,304141 vpslld ymm3,ymm3,24142 add ebp,DWORD[r13]4143 andn edi,ebx,esi4144 add ebp,ecx4145 rorx r12d,ebx,274146 rorx ecx,ebx,24147 and ebx,edx4148 vpor ymm3,ymm3,ymm84149 add ebp,r12d4150 xor ebx,edi4151 add eax,DWORD[4+r13]4152 andn edi,ebp,edx4153 vpaddd ymm9,ymm3,ymm114154 add eax,ebx4155 rorx r12d,ebp,274156 rorx ebx,ebp,24157 and ebp,ecx4158 vmovdqu YMMWORD[352+rsp],ymm94159 add eax,r12d4160 xor ebp,edi4161 add esi,DWORD[8+r13]4162 andn edi,eax,ecx4163 add esi,ebp4164 rorx r12d,eax,274165 rorx ebp,eax,24166 and eax,ebx4167 add esi,r12d4168 xor eax,edi4169 add edx,DWORD[12+r13]4170 lea edx,[rax*1+rdx]4171 rorx r12d,esi,274172 rorx eax,esi,24173 xor esi,ebp4174 add edx,r12d4175 xor esi,ebx4176 vpalignr ymm8,ymm3,ymm2,84177 vpxor ymm4,ymm4,ymm04178 add ecx,DWORD[32+r13]4179 lea ecx,[rsi*1+rcx]4180 vpxor ymm4,ymm4,ymm54181 rorx r12d,edx,274182 rorx esi,edx,24183 xor edx,eax4184 vpxor ymm4,ymm4,ymm84185 add ecx,r12d4186 xor edx,ebp4187 add ebx,DWORD[36+r13]4188 vpsrld ymm8,ymm4,304189 vpslld ymm4,ymm4,24190 lea ebx,[rdx*1+rbx]4191 rorx r12d,ecx,274192 rorx edx,ecx,24193 xor ecx,esi4194 add ebx,r12d4195 xor ecx,eax4196 vpor ymm4,ymm4,ymm84197 add ebp,DWORD[40+r13]4198 lea ebp,[rbp*1+rcx]4199 rorx r12d,ebx,274200 rorx ecx,ebx,24201 vpaddd ymm9,ymm4,ymm114202 xor ebx,edx4203 add ebp,r12d4204 xor ebx,esi4205 add eax,DWORD[44+r13]4206 vmovdqu YMMWORD[384+rsp],ymm94207 lea eax,[rbx*1+rax]4208 rorx r12d,ebp,274209 rorx ebx,ebp,24210 xor ebp,ecx4211 add eax,r12d4212 xor ebp,edx4213 add esi,DWORD[64+r13]4214 lea esi,[rbp*1+rsi]4215 rorx r12d,eax,274216 rorx ebp,eax,24217 xor eax,ebx4218 add esi,r12d4219 xor eax,ecx4220 vpalignr ymm8,ymm4,ymm3,84221 vpxor ymm5,ymm5,ymm14222 add edx,DWORD[68+r13]4223 lea edx,[rax*1+rdx]4224 vpxor ymm5,ymm5,ymm64225 rorx r12d,esi,274226 rorx eax,esi,24227 xor esi,ebp4228 vpxor ymm5,ymm5,ymm84229 add edx,r12d4230 xor esi,ebx4231 add ecx,DWORD[72+r13]4232 vpsrld ymm8,ymm5,304233 vpslld ymm5,ymm5,24234 lea ecx,[rsi*1+rcx]4235 rorx r12d,edx,274236 rorx esi,edx,24237 xor edx,eax4238 add ecx,r12d4239 xor edx,ebp4240 vpor ymm5,ymm5,ymm84241 add ebx,DWORD[76+r13]4242 lea ebx,[rdx*1+rbx]4243 rorx r12d,ecx,274244 rorx edx,ecx,24245 vpaddd ymm9,ymm5,ymm114246 xor ecx,esi4247 add ebx,r12d4248 xor ecx,eax4249 add ebp,DWORD[96+r13]4250 vmovdqu YMMWORD[416+rsp],ymm94251 lea ebp,[rbp*1+rcx]4252 rorx r12d,ebx,274253 rorx ecx,ebx,24254 xor ebx,edx4255 add ebp,r12d4256 xor ebx,esi4257 add eax,DWORD[100+r13]4258 lea eax,[rbx*1+rax]4259 rorx r12d,ebp,274260 rorx ebx,ebp,24261 xor ebp,ecx4262 add eax,r12d4263 xor ebp,edx4264 vpalignr ymm8,ymm5,ymm4,84265 vpxor ymm6,ymm6,ymm24266 add esi,DWORD[104+r13]4267 lea esi,[rbp*1+rsi]4268 vpxor ymm6,ymm6,ymm74269 rorx r12d,eax,274270 rorx ebp,eax,24271 xor eax,ebx4272 vpxor ymm6,ymm6,ymm84273 add esi,r12d4274 xor eax,ecx4275 add edx,DWORD[108+r13]4276 lea r13,[256+r13]4277 vpsrld ymm8,ymm6,304278 vpslld ymm6,ymm6,24279 lea edx,[rax*1+rdx]4280 rorx r12d,esi,274281 rorx eax,esi,24282 xor esi,ebp4283 add edx,r12d4284 xor esi,ebx4285 vpor ymm6,ymm6,ymm84286 add ecx,DWORD[((-128))+r13]4287 lea ecx,[rsi*1+rcx]4288 rorx r12d,edx,274289 rorx esi,edx,24290 vpaddd ymm9,ymm6,ymm114291 xor edx,eax4292 add ecx,r12d4293 xor edx,ebp4294 add ebx,DWORD[((-124))+r13]4295 vmovdqu YMMWORD[448+rsp],ymm94296 lea ebx,[rdx*1+rbx]4297 rorx r12d,ecx,274298 rorx edx,ecx,24299 xor ecx,esi4300 add ebx,r12d4301 xor ecx,eax4302 add ebp,DWORD[((-120))+r13]4303 lea ebp,[rbp*1+rcx]4304 rorx r12d,ebx,274305 rorx ecx,ebx,24306 xor ebx,edx4307 add ebp,r12d4308 xor ebx,esi4309 vpalignr ymm8,ymm6,ymm5,84310 vpxor ymm7,ymm7,ymm34311 add eax,DWORD[((-116))+r13]4312 lea eax,[rbx*1+rax]4313 vpxor ymm7,ymm7,ymm04314 vmovdqu ymm11,YMMWORD[32+r14]4315 rorx r12d,ebp,274316 rorx ebx,ebp,24317 xor ebp,ecx4318 vpxor ymm7,ymm7,ymm84319 add eax,r12d4320 xor ebp,edx4321 add esi,DWORD[((-96))+r13]4322 vpsrld ymm8,ymm7,304323 vpslld ymm7,ymm7,24324 lea esi,[rbp*1+rsi]4325 rorx r12d,eax,274326 rorx ebp,eax,24327 xor eax,ebx4328 add esi,r12d4329 xor eax,ecx4330 vpor ymm7,ymm7,ymm84331 add edx,DWORD[((-92))+r13]4332 lea edx,[rax*1+rdx]4333 rorx r12d,esi,274334 rorx eax,esi,24335 vpaddd ymm9,ymm7,ymm114336 xor esi,ebp4337 add edx,r12d4338 xor esi,ebx4339 add ecx,DWORD[((-88))+r13]4340 vmovdqu YMMWORD[480+rsp],ymm94341 lea ecx,[rsi*1+rcx]4342 rorx r12d,edx,274343 rorx esi,edx,24344 xor edx,eax4345 add ecx,r12d4346 xor edx,ebp4347 add ebx,DWORD[((-84))+r13]4348 mov edi,esi4349 xor edi,eax4350 lea ebx,[rdx*1+rbx]4351 rorx r12d,ecx,274352 rorx edx,ecx,24353 xor ecx,esi4354 add ebx,r12d4355 and ecx,edi4356 jmp NEAR $L$align32_24357 ALIGN 324358 $L$align32_2:4359 vpalignr ymm8,ymm7,ymm6,84360 vpxor ymm0,ymm0,ymm44361 add ebp,DWORD[((-64))+r13]4362 xor ecx,esi4363 vpxor ymm0,ymm0,ymm14364 mov edi,edx4365 xor edi,esi4366 lea ebp,[rbp*1+rcx]4367 vpxor ymm0,ymm0,ymm84368 rorx r12d,ebx,274369 rorx ecx,ebx,24370 xor ebx,edx4371 vpsrld ymm8,ymm0,304372 vpslld ymm0,ymm0,24373 add ebp,r12d4374 and ebx,edi4375 add eax,DWORD[((-60))+r13]4376 xor ebx,edx4377 mov edi,ecx4378 xor edi,edx4379 vpor ymm0,ymm0,ymm84380 lea eax,[rbx*1+rax]4381 rorx r12d,ebp,274382 rorx ebx,ebp,24383 xor ebp,ecx4384 vpaddd ymm9,ymm0,ymm114385 add eax,r12d4386 and ebp,edi4387 add esi,DWORD[((-56))+r13]4388 xor ebp,ecx4389 vmovdqu YMMWORD[512+rsp],ymm94390 mov edi,ebx4391 xor edi,ecx4392 lea esi,[rbp*1+rsi]4393 rorx r12d,eax,274394 rorx ebp,eax,24395 xor eax,ebx4396 add esi,r12d4397 and eax,edi4398 add edx,DWORD[((-52))+r13]4399 xor eax,ebx4400 mov edi,ebp4401 xor edi,ebx4402 lea edx,[rax*1+rdx]4403 rorx r12d,esi,274404 rorx eax,esi,24405 xor esi,ebp4406 add edx,r12d4407 and esi,edi4408 add ecx,DWORD[((-32))+r13]4409 xor esi,ebp4410 mov edi,eax4411 xor edi,ebp4412 lea ecx,[rsi*1+rcx]4413 rorx r12d,edx,274414 rorx esi,edx,24415 xor edx,eax4416 add ecx,r12d4417 and edx,edi4418 vpalignr ymm8,ymm0,ymm7,84419 vpxor ymm1,ymm1,ymm54420 add ebx,DWORD[((-28))+r13]4421 xor edx,eax4422 vpxor ymm1,ymm1,ymm24423 mov edi,esi4424 xor edi,eax4425 lea ebx,[rdx*1+rbx]4426 vpxor ymm1,ymm1,ymm84427 rorx r12d,ecx,274428 rorx edx,ecx,24429 xor ecx,esi4430 vpsrld ymm8,ymm1,304431 vpslld ymm1,ymm1,24432 add ebx,r12d4433 and ecx,edi4434 add ebp,DWORD[((-24))+r13]4435 xor ecx,esi4436 mov edi,edx4437 xor edi,esi4438 vpor ymm1,ymm1,ymm84439 lea ebp,[rbp*1+rcx]4440 rorx r12d,ebx,274441 rorx ecx,ebx,24442 xor ebx,edx4443 vpaddd ymm9,ymm1,ymm114444 add ebp,r12d4445 and ebx,edi4446 add eax,DWORD[((-20))+r13]4447 xor ebx,edx4448 vmovdqu YMMWORD[544+rsp],ymm94449 mov edi,ecx4450 xor edi,edx4451 lea eax,[rbx*1+rax]4452 rorx r12d,ebp,274453 rorx ebx,ebp,24454 xor ebp,ecx4455 add eax,r12d4456 and ebp,edi4457 add esi,DWORD[r13]4458 xor ebp,ecx4459 mov edi,ebx4460 xor edi,ecx4461 lea esi,[rbp*1+rsi]4462 rorx r12d,eax,274463 rorx ebp,eax,24464 xor eax,ebx4465 add esi,r12d4466 and eax,edi4467 add edx,DWORD[4+r13]4468 xor eax,ebx4469 mov edi,ebp4470 xor edi,ebx4471 lea edx,[rax*1+rdx]4472 rorx r12d,esi,274473 rorx eax,esi,24474 xor esi,ebp4475 add edx,r12d4476 and esi,edi4477 vpalignr ymm8,ymm1,ymm0,84478 vpxor ymm2,ymm2,ymm64479 add ecx,DWORD[8+r13]4480 xor esi,ebp4481 vpxor ymm2,ymm2,ymm34482 mov edi,eax4483 xor edi,ebp4484 lea ecx,[rsi*1+rcx]4485 vpxor ymm2,ymm2,ymm84486 rorx r12d,edx,274487 rorx esi,edx,24488 xor edx,eax4489 vpsrld ymm8,ymm2,304490 vpslld ymm2,ymm2,24491 add ecx,r12d4492 and edx,edi4493 add ebx,DWORD[12+r13]4494 xor edx,eax4495 mov edi,esi4496 xor edi,eax4497 vpor ymm2,ymm2,ymm84498 lea ebx,[rdx*1+rbx]4499 rorx r12d,ecx,274500 rorx edx,ecx,24501 xor ecx,esi4502 vpaddd ymm9,ymm2,ymm114503 add ebx,r12d4504 and ecx,edi4505 add ebp,DWORD[32+r13]4506 xor ecx,esi4507 vmovdqu YMMWORD[576+rsp],ymm94508 mov edi,edx4509 xor edi,esi4510 lea ebp,[rbp*1+rcx]4511 rorx r12d,ebx,274512 rorx ecx,ebx,24513 xor ebx,edx4514 add ebp,r12d4515 and ebx,edi4516 add eax,DWORD[36+r13]4517 xor ebx,edx4518 mov edi,ecx4519 xor edi,edx4520 lea eax,[rbx*1+rax]4521 rorx r12d,ebp,274522 rorx ebx,ebp,24523 xor ebp,ecx4524 add eax,r12d4525 and ebp,edi4526 add esi,DWORD[40+r13]4527 xor ebp,ecx4528 mov edi,ebx4529 xor edi,ecx4530 lea esi,[rbp*1+rsi]4531 rorx r12d,eax,274532 rorx ebp,eax,24533 xor eax,ebx4534 add esi,r12d4535 and eax,edi4536 vpalignr ymm8,ymm2,ymm1,84537 vpxor ymm3,ymm3,ymm74538 add edx,DWORD[44+r13]4539 xor eax,ebx4540 vpxor ymm3,ymm3,ymm44541 mov edi,ebp4542 xor edi,ebx4543 lea edx,[rax*1+rdx]4544 vpxor ymm3,ymm3,ymm84545 rorx r12d,esi,274546 rorx eax,esi,24547 xor esi,ebp4548 vpsrld ymm8,ymm3,304549 vpslld ymm3,ymm3,24550 add edx,r12d4551 and esi,edi4552 add ecx,DWORD[64+r13]4553 xor esi,ebp4554 mov edi,eax4555 xor edi,ebp4556 vpor ymm3,ymm3,ymm84557 lea ecx,[rsi*1+rcx]4558 rorx r12d,edx,274559 rorx esi,edx,24560 xor edx,eax4561 vpaddd ymm9,ymm3,ymm114562 add ecx,r12d4563 and edx,edi4564 add ebx,DWORD[68+r13]4565 xor edx,eax4566 vmovdqu YMMWORD[608+rsp],ymm94567 mov edi,esi4568 xor edi,eax4569 lea ebx,[rdx*1+rbx]4570 rorx r12d,ecx,274571 rorx edx,ecx,24572 xor ecx,esi4573 add ebx,r12d4574 and ecx,edi4575 add ebp,DWORD[72+r13]4576 xor ecx,esi4577 mov edi,edx4578 xor edi,esi4579 lea ebp,[rbp*1+rcx]4580 rorx r12d,ebx,274581 rorx ecx,ebx,24582 xor ebx,edx4583 add ebp,r12d4584 and ebx,edi4585 add eax,DWORD[76+r13]4586 xor ebx,edx4587 lea eax,[rbx*1+rax]4588 rorx r12d,ebp,274589 rorx ebx,ebp,24590 xor ebp,ecx4591 add eax,r12d4592 xor ebp,edx4593 add esi,DWORD[96+r13]4594 lea esi,[rbp*1+rsi]4595 rorx r12d,eax,274596 rorx ebp,eax,24597 xor eax,ebx4598 add esi,r12d4599 xor eax,ecx4600 add edx,DWORD[100+r13]4601 lea edx,[rax*1+rdx]4602 rorx r12d,esi,274603 rorx eax,esi,24604 xor esi,ebp4605 add edx,r12d4606 xor esi,ebx4607 add ecx,DWORD[104+r13]4608 lea ecx,[rsi*1+rcx]4609 rorx r12d,edx,274610 rorx esi,edx,24611 xor edx,eax4612 add ecx,r12d4613 xor edx,ebp4614 add ebx,DWORD[108+r13]4615 lea r13,[256+r13]4616 lea ebx,[rdx*1+rbx]4617 rorx r12d,ecx,274618 rorx edx,ecx,24619 xor ecx,esi4620 add ebx,r12d4621 xor ecx,eax4622 add ebp,DWORD[((-128))+r13]4623 lea ebp,[rbp*1+rcx]4624 rorx r12d,ebx,274625 rorx ecx,ebx,24626 xor ebx,edx4627 add ebp,r12d4628 xor ebx,esi4629 add eax,DWORD[((-124))+r13]4630 lea eax,[rbx*1+rax]4631 rorx r12d,ebp,274632 rorx ebx,ebp,24633 xor ebp,ecx4634 add eax,r12d4635 xor ebp,edx4636 add esi,DWORD[((-120))+r13]4637 lea esi,[rbp*1+rsi]4638 rorx r12d,eax,274639 rorx ebp,eax,24640 xor eax,ebx4641 add esi,r12d4642 xor eax,ecx4643 add edx,DWORD[((-116))+r13]4644 lea edx,[rax*1+rdx]4645 rorx r12d,esi,274646 rorx eax,esi,24647 xor esi,ebp4648 add edx,r12d4649 xor esi,ebx4650 add ecx,DWORD[((-96))+r13]4651 lea ecx,[rsi*1+rcx]4652 rorx r12d,edx,274653 rorx esi,edx,24654 xor edx,eax4655 add ecx,r12d4656 xor edx,ebp4657 add ebx,DWORD[((-92))+r13]4658 lea ebx,[rdx*1+rbx]4659 rorx r12d,ecx,274660 rorx edx,ecx,24661 xor ecx,esi4662 add ebx,r12d4663 xor ecx,eax4664 add ebp,DWORD[((-88))+r13]4665 lea ebp,[rbp*1+rcx]4666 rorx r12d,ebx,274667 rorx ecx,ebx,24668 xor ebx,edx4669 add ebp,r12d4670 xor ebx,esi4671 add eax,DWORD[((-84))+r13]4672 lea eax,[rbx*1+rax]4673 rorx r12d,ebp,274674 rorx ebx,ebp,24675 xor ebp,ecx4676 add eax,r12d4677 xor ebp,edx4678 add esi,DWORD[((-64))+r13]4679 lea esi,[rbp*1+rsi]4680 rorx r12d,eax,274681 rorx ebp,eax,24682 xor eax,ebx4683 add esi,r12d4684 xor eax,ecx4685 add edx,DWORD[((-60))+r13]4686 lea edx,[rax*1+rdx]4687 rorx r12d,esi,274688 rorx eax,esi,24689 xor esi,ebp4690 add edx,r12d4691 xor esi,ebx4692 add ecx,DWORD[((-56))+r13]4693 lea ecx,[rsi*1+rcx]4694 rorx r12d,edx,274695 rorx esi,edx,24696 xor edx,eax4697 add ecx,r12d4698 xor edx,ebp4699 add ebx,DWORD[((-52))+r13]4700 lea ebx,[rdx*1+rbx]4701 rorx r12d,ecx,274702 rorx edx,ecx,24703 xor ecx,esi4704 add ebx,r12d4705 xor ecx,eax4706 add ebp,DWORD[((-32))+r13]4707 lea ebp,[rbp*1+rcx]4708 rorx r12d,ebx,274709 rorx ecx,ebx,24710 xor ebx,edx4711 add ebp,r12d4712 xor ebx,esi4713 add eax,DWORD[((-28))+r13]4714 lea eax,[rbx*1+rax]4715 rorx r12d,ebp,274716 rorx ebx,ebp,24717 xor ebp,ecx4718 add eax,r12d4719 xor ebp,edx4720 add esi,DWORD[((-24))+r13]4721 lea esi,[rbp*1+rsi]4722 rorx r12d,eax,274723 rorx ebp,eax,24724 xor eax,ebx4725 add esi,r12d4726 xor eax,ecx4727 add edx,DWORD[((-20))+r13]4728 lea edx,[rax*1+rdx]4729 rorx r12d,esi,274730 add edx,r12d4731 lea r13,[128+r9]4732 lea rdi,[128+r9]4733 cmp r13,r104734 cmovae r13,r94735 4736 4737 add edx,DWORD[r8]4738 add esi,DWORD[4+r8]4739 add ebp,DWORD[8+r8]4740 mov DWORD[r8],edx4741 add ebx,DWORD[12+r8]4742 mov DWORD[4+r8],esi4743 mov eax,edx4744 add ecx,DWORD[16+r8]4745 mov r12d,ebp4746 mov DWORD[8+r8],ebp4747 mov edx,ebx4748 4749 mov DWORD[12+r8],ebx4750 mov ebp,esi4751 mov DWORD[16+r8],ecx4752 4753 mov esi,ecx4754 mov ecx,r12d4755 4756 4757 cmp r9,r104758 je NEAR $L$done_avx24759 vmovdqu ymm6,YMMWORD[64+r14]4760 cmp rdi,r104761 ja NEAR $L$ast_avx24762 4763 vmovdqu xmm0,XMMWORD[((-64))+rdi]4764 vmovdqu xmm1,XMMWORD[((-48))+rdi]4765 vmovdqu xmm2,XMMWORD[((-32))+rdi]4766 vmovdqu xmm3,XMMWORD[((-16))+rdi]4767 vinserti128 ymm0,ymm0,XMMWORD[r13],14768 vinserti128 ymm1,ymm1,XMMWORD[16+r13],14769 vinserti128 ymm2,ymm2,XMMWORD[32+r13],14770 vinserti128 ymm3,ymm3,XMMWORD[48+r13],14771 jmp NEAR $L$ast_avx24772 4773 ALIGN 324774 $L$ast_avx2:4775 lea r13,[((128+16))+rsp]4776 rorx ebx,ebp,24777 andn edi,ebp,edx4778 and ebp,ecx4779 xor ebp,edi4780 sub r9,-1284781 add esi,DWORD[((-128))+r13]4782 andn edi,eax,ecx4783 add esi,ebp4784 rorx r12d,eax,274785 rorx ebp,eax,24786 and eax,ebx4787 add esi,r12d4788 xor eax,edi4789 add edx,DWORD[((-124))+r13]4790 andn edi,esi,ebx4791 add edx,eax4792 rorx r12d,esi,274793 rorx eax,esi,24794 and esi,ebp4795 add edx,r12d4796 xor esi,edi4797 add ecx,DWORD[((-120))+r13]4798 andn edi,edx,ebp4799 add ecx,esi4800 rorx r12d,edx,274801 rorx esi,edx,24802 and edx,eax4803 add ecx,r12d4804 xor edx,edi4805 add ebx,DWORD[((-116))+r13]4806 andn edi,ecx,eax4807 add ebx,edx4808 rorx r12d,ecx,274809 rorx edx,ecx,24810 and ecx,esi4811 add ebx,r12d4812 xor ecx,edi4813 add ebp,DWORD[((-96))+r13]4814 andn edi,ebx,esi4815 add ebp,ecx4816 rorx r12d,ebx,274817 rorx ecx,ebx,24818 and ebx,edx4819 add ebp,r12d4820 xor ebx,edi4821 add eax,DWORD[((-92))+r13]4822 andn edi,ebp,edx4823 add eax,ebx4824 rorx r12d,ebp,274825 rorx ebx,ebp,24826 and ebp,ecx4827 add eax,r12d4828 xor ebp,edi4829 add esi,DWORD[((-88))+r13]4830 andn edi,eax,ecx4831 add esi,ebp4832 rorx r12d,eax,274833 rorx ebp,eax,24834 and eax,ebx4835 add esi,r12d4836 xor eax,edi4837 add edx,DWORD[((-84))+r13]4838 andn edi,esi,ebx4839 add edx,eax4840 rorx r12d,esi,274841 rorx eax,esi,24842 and esi,ebp4843 add edx,r12d4844 xor esi,edi4845 add ecx,DWORD[((-64))+r13]4846 andn edi,edx,ebp4847 add ecx,esi4848 rorx r12d,edx,274849 rorx esi,edx,24850 and edx,eax4851 add ecx,r12d4852 xor edx,edi4853 add ebx,DWORD[((-60))+r13]4854 andn edi,ecx,eax4855 add ebx,edx4856 rorx r12d,ecx,274857 rorx edx,ecx,24858 and ecx,esi4859 add ebx,r12d4860 xor ecx,edi4861 add ebp,DWORD[((-56))+r13]4862 andn edi,ebx,esi4863 add ebp,ecx4864 rorx r12d,ebx,274865 rorx ecx,ebx,24866 and ebx,edx4867 add ebp,r12d4868 xor ebx,edi4869 add eax,DWORD[((-52))+r13]4870 andn edi,ebp,edx4871 add eax,ebx4872 rorx r12d,ebp,274873 rorx ebx,ebp,24874 and ebp,ecx4875 add eax,r12d4876 xor ebp,edi4877 add esi,DWORD[((-32))+r13]4878 andn edi,eax,ecx4879 add esi,ebp4880 rorx r12d,eax,274881 rorx ebp,eax,24882 and eax,ebx4883 add esi,r12d4884 xor eax,edi4885 add edx,DWORD[((-28))+r13]4886 andn edi,esi,ebx4887 add edx,eax4888 rorx r12d,esi,274889 rorx eax,esi,24890 and esi,ebp4891 add edx,r12d4892 xor esi,edi4893 add ecx,DWORD[((-24))+r13]4894 andn edi,edx,ebp4895 add ecx,esi4896 rorx r12d,edx,274897 rorx esi,edx,24898 and edx,eax4899 add ecx,r12d4900 xor edx,edi4901 add ebx,DWORD[((-20))+r13]4902 andn edi,ecx,eax4903 add ebx,edx4904 rorx r12d,ecx,274905 rorx edx,ecx,24906 and ecx,esi4907 add ebx,r12d4908 xor ecx,edi4909 add ebp,DWORD[r13]4910 andn edi,ebx,esi4911 add ebp,ecx4912 rorx r12d,ebx,274913 rorx ecx,ebx,24914 and ebx,edx4915 add ebp,r12d4916 xor ebx,edi4917 add eax,DWORD[4+r13]4918 andn edi,ebp,edx4919 add eax,ebx4920 rorx r12d,ebp,274921 rorx ebx,ebp,24922 and ebp,ecx4923 add eax,r12d4924 xor ebp,edi4925 add esi,DWORD[8+r13]4926 andn edi,eax,ecx4927 add esi,ebp4928 rorx r12d,eax,274929 rorx ebp,eax,24930 and eax,ebx4931 add esi,r12d4932 xor eax,edi4933 add edx,DWORD[12+r13]4934 lea edx,[rax*1+rdx]4935 rorx r12d,esi,274936 rorx eax,esi,24937 xor esi,ebp4938 add edx,r12d4939 xor esi,ebx4940 add ecx,DWORD[32+r13]4941 lea ecx,[rsi*1+rcx]4942 rorx r12d,edx,274943 rorx esi,edx,24944 xor edx,eax4945 add ecx,r12d4946 xor edx,ebp4947 add ebx,DWORD[36+r13]4948 lea ebx,[rdx*1+rbx]4949 rorx r12d,ecx,274950 rorx edx,ecx,24951 xor ecx,esi4952 add ebx,r12d4953 xor ecx,eax4954 add ebp,DWORD[40+r13]4955 lea ebp,[rbp*1+rcx]4956 rorx r12d,ebx,274957 rorx ecx,ebx,24958 xor ebx,edx4959 add ebp,r12d4960 xor ebx,esi4961 add eax,DWORD[44+r13]4962 lea eax,[rbx*1+rax]4963 rorx r12d,ebp,274964 rorx ebx,ebp,24965 xor ebp,ecx4966 add eax,r12d4967 xor ebp,edx4968 add esi,DWORD[64+r13]4969 lea esi,[rbp*1+rsi]4970 rorx r12d,eax,274971 rorx ebp,eax,24972 xor eax,ebx4973 add esi,r12d4974 xor eax,ecx4975 vmovdqu ymm11,YMMWORD[((-64))+r14]4976 vpshufb ymm0,ymm0,ymm64977 add edx,DWORD[68+r13]4978 lea edx,[rax*1+rdx]4979 rorx r12d,esi,274980 rorx eax,esi,24981 xor esi,ebp4982 add edx,r12d4983 xor esi,ebx4984 add ecx,DWORD[72+r13]4985 lea ecx,[rsi*1+rcx]4986 rorx r12d,edx,274987 rorx esi,edx,24988 xor edx,eax4989 add ecx,r12d4990 xor edx,ebp4991 add ebx,DWORD[76+r13]4992 lea ebx,[rdx*1+rbx]4993 rorx r12d,ecx,274994 rorx edx,ecx,24995 xor ecx,esi4996 add ebx,r12d4997 xor ecx,eax4998 add ebp,DWORD[96+r13]4999 lea ebp,[rbp*1+rcx]5000 rorx r12d,ebx,275001 rorx ecx,ebx,25002 xor ebx,edx5003 add ebp,r12d5004 xor ebx,esi5005 add eax,DWORD[100+r13]5006 lea eax,[rbx*1+rax]5007 rorx r12d,ebp,275008 rorx ebx,ebp,25009 xor ebp,ecx5010 add eax,r12d5011 xor ebp,edx5012 vpshufb ymm1,ymm1,ymm65013 vpaddd ymm8,ymm0,ymm115014 add esi,DWORD[104+r13]5015 lea esi,[rbp*1+rsi]5016 rorx r12d,eax,275017 rorx ebp,eax,25018 xor eax,ebx5019 add esi,r12d5020 xor eax,ecx5021 add edx,DWORD[108+r13]5022 lea r13,[256+r13]5023 lea edx,[rax*1+rdx]5024 rorx r12d,esi,275025 rorx eax,esi,25026 xor esi,ebp5027 add edx,r12d5028 xor esi,ebx5029 add ecx,DWORD[((-128))+r13]5030 lea ecx,[rsi*1+rcx]5031 rorx r12d,edx,275032 rorx esi,edx,25033 xor edx,eax5034 add ecx,r12d5035 xor edx,ebp5036 add ebx,DWORD[((-124))+r13]5037 lea ebx,[rdx*1+rbx]5038 rorx r12d,ecx,275039 rorx edx,ecx,25040 xor ecx,esi5041 add ebx,r12d5042 xor ecx,eax5043 add ebp,DWORD[((-120))+r13]5044 lea ebp,[rbp*1+rcx]5045 rorx r12d,ebx,275046 rorx ecx,ebx,25047 xor ebx,edx5048 add ebp,r12d5049 xor ebx,esi5050 vmovdqu YMMWORD[rsp],ymm85051 vpshufb ymm2,ymm2,ymm65052 vpaddd ymm9,ymm1,ymm115053 add eax,DWORD[((-116))+r13]5054 lea eax,[rbx*1+rax]5055 rorx r12d,ebp,275056 rorx ebx,ebp,25057 xor ebp,ecx5058 add eax,r12d5059 xor ebp,edx5060 add esi,DWORD[((-96))+r13]5061 lea esi,[rbp*1+rsi]5062 rorx r12d,eax,275063 rorx ebp,eax,25064 xor eax,ebx5065 add esi,r12d5066 xor eax,ecx5067 add edx,DWORD[((-92))+r13]5068 lea edx,[rax*1+rdx]5069 rorx r12d,esi,275070 rorx eax,esi,25071 xor esi,ebp5072 add edx,r12d5073 xor esi,ebx5074 add ecx,DWORD[((-88))+r13]5075 lea ecx,[rsi*1+rcx]5076 rorx r12d,edx,275077 rorx esi,edx,25078 xor edx,eax5079 add ecx,r12d5080 xor edx,ebp5081 add ebx,DWORD[((-84))+r13]5082 mov edi,esi5083 xor edi,eax5084 lea ebx,[rdx*1+rbx]5085 rorx r12d,ecx,275086 rorx edx,ecx,25087 xor ecx,esi5088 add ebx,r12d5089 and ecx,edi5090 vmovdqu YMMWORD[32+rsp],ymm95091 vpshufb ymm3,ymm3,ymm65092 vpaddd ymm6,ymm2,ymm115093 add ebp,DWORD[((-64))+r13]5094 xor ecx,esi5095 mov edi,edx5096 xor edi,esi5097 lea ebp,[rbp*1+rcx]5098 rorx r12d,ebx,275099 rorx ecx,ebx,25100 xor ebx,edx5101 add ebp,r12d5102 and ebx,edi5103 add eax,DWORD[((-60))+r13]5104 xor ebx,edx5105 mov edi,ecx5106 xor edi,edx5107 lea eax,[rbx*1+rax]5108 rorx r12d,ebp,275109 rorx ebx,ebp,25110 xor ebp,ecx5111 add eax,r12d5112 and ebp,edi5113 add esi,DWORD[((-56))+r13]5114 xor ebp,ecx5115 mov edi,ebx5116 xor edi,ecx5117 lea esi,[rbp*1+rsi]5118 rorx r12d,eax,275119 rorx ebp,eax,25120 xor eax,ebx5121 add esi,r12d5122 and eax,edi5123 add edx,DWORD[((-52))+r13]5124 xor eax,ebx5125 mov edi,ebp5126 xor edi,ebx5127 lea edx,[rax*1+rdx]5128 rorx r12d,esi,275129 rorx eax,esi,25130 xor esi,ebp5131 add edx,r12d5132 and esi,edi5133 add ecx,DWORD[((-32))+r13]5134 xor esi,ebp5135 mov edi,eax5136 xor edi,ebp5137 lea ecx,[rsi*1+rcx]5138 rorx r12d,edx,275139 rorx esi,edx,25140 xor edx,eax5141 add ecx,r12d5142 and edx,edi5143 jmp NEAR $L$align32_35144 ALIGN 325145 $L$align32_3:5146 vmovdqu YMMWORD[64+rsp],ymm65147 vpaddd ymm7,ymm3,ymm115148 add ebx,DWORD[((-28))+r13]5149 xor edx,eax5150 mov edi,esi5151 xor edi,eax5152 lea ebx,[rdx*1+rbx]5153 rorx r12d,ecx,275154 rorx edx,ecx,25155 xor ecx,esi5156 add ebx,r12d5157 and ecx,edi5158 add ebp,DWORD[((-24))+r13]5159 xor ecx,esi5160 mov edi,edx5161 xor edi,esi5162 lea ebp,[rbp*1+rcx]5163 rorx r12d,ebx,275164 rorx ecx,ebx,25165 xor ebx,edx5166 add ebp,r12d5167 and ebx,edi5168 add eax,DWORD[((-20))+r13]5169 xor ebx,edx5170 mov edi,ecx5171 xor edi,edx5172 lea eax,[rbx*1+rax]5173 rorx r12d,ebp,275174 rorx ebx,ebp,25175 xor ebp,ecx5176 add eax,r12d5177 and ebp,edi5178 add esi,DWORD[r13]5179 xor ebp,ecx5180 mov edi,ebx5181 xor edi,ecx5182 lea esi,[rbp*1+rsi]5183 rorx r12d,eax,275184 rorx ebp,eax,25185 xor eax,ebx5186 add esi,r12d5187 and eax,edi5188 add edx,DWORD[4+r13]5189 xor eax,ebx5190 mov edi,ebp5191 xor edi,ebx5192 lea edx,[rax*1+rdx]5193 rorx r12d,esi,275194 rorx eax,esi,25195 xor esi,ebp5196 add edx,r12d5197 and esi,edi5198 vmovdqu YMMWORD[96+rsp],ymm75199 add ecx,DWORD[8+r13]5200 xor esi,ebp5201 mov edi,eax5202 xor edi,ebp5203 lea ecx,[rsi*1+rcx]5204 rorx r12d,edx,275205 rorx esi,edx,25206 xor edx,eax5207 add ecx,r12d5208 and edx,edi5209 add ebx,DWORD[12+r13]5210 xor edx,eax5211 mov edi,esi5212 xor edi,eax5213 lea ebx,[rdx*1+rbx]5214 rorx r12d,ecx,275215 rorx edx,ecx,25216 xor ecx,esi5217 add ebx,r12d5218 and ecx,edi5219 add ebp,DWORD[32+r13]5220 xor ecx,esi5221 mov edi,edx5222 xor edi,esi5223 lea ebp,[rbp*1+rcx]5224 rorx r12d,ebx,275225 rorx ecx,ebx,25226 xor ebx,edx5227 add ebp,r12d5228 and ebx,edi5229 add eax,DWORD[36+r13]5230 xor ebx,edx5231 mov edi,ecx5232 xor edi,edx5233 lea eax,[rbx*1+rax]5234 rorx r12d,ebp,275235 rorx ebx,ebp,25236 xor ebp,ecx5237 add eax,r12d5238 and ebp,edi5239 add esi,DWORD[40+r13]5240 xor ebp,ecx5241 mov edi,ebx5242 xor edi,ecx5243 lea esi,[rbp*1+rsi]5244 rorx r12d,eax,275245 rorx ebp,eax,25246 xor eax,ebx5247 add esi,r12d5248 and eax,edi5249 vpalignr ymm4,ymm1,ymm0,85250 add edx,DWORD[44+r13]5251 xor eax,ebx5252 mov edi,ebp5253 xor edi,ebx5254 vpsrldq ymm8,ymm3,45255 lea edx,[rax*1+rdx]5256 rorx r12d,esi,275257 rorx eax,esi,25258 vpxor ymm4,ymm4,ymm05259 vpxor ymm8,ymm8,ymm25260 xor esi,ebp5261 add edx,r12d5262 vpxor ymm4,ymm4,ymm85263 and esi,edi5264 add ecx,DWORD[64+r13]5265 xor esi,ebp5266 mov edi,eax5267 vpsrld ymm8,ymm4,315268 xor edi,ebp5269 lea ecx,[rsi*1+rcx]5270 rorx r12d,edx,275271 vpslldq ymm10,ymm4,125272 vpaddd ymm4,ymm4,ymm45273 rorx esi,edx,25274 xor edx,eax5275 vpsrld ymm9,ymm10,305276 vpor ymm4,ymm4,ymm85277 add ecx,r12d5278 and edx,edi5279 vpslld ymm10,ymm10,25280 vpxor ymm4,ymm4,ymm95281 add ebx,DWORD[68+r13]5282 xor edx,eax5283 vpxor ymm4,ymm4,ymm105284 mov edi,esi5285 xor edi,eax5286 lea ebx,[rdx*1+rbx]5287 vpaddd ymm9,ymm4,ymm115288 rorx r12d,ecx,275289 rorx edx,ecx,25290 xor ecx,esi5291 vmovdqu YMMWORD[128+rsp],ymm95292 add ebx,r12d5293 and ecx,edi5294 add ebp,DWORD[72+r13]5295 xor ecx,esi5296 mov edi,edx5297 xor edi,esi5298 lea ebp,[rbp*1+rcx]5299 rorx r12d,ebx,275300 rorx ecx,ebx,25301 xor ebx,edx5302 add ebp,r12d5303 and ebx,edi5304 add eax,DWORD[76+r13]5305 xor ebx,edx5306 lea eax,[rbx*1+rax]5307 rorx r12d,ebp,275308 rorx ebx,ebp,25309 xor ebp,ecx5310 add eax,r12d5311 xor ebp,edx5312 vpalignr ymm5,ymm2,ymm1,85313 add esi,DWORD[96+r13]5314 lea esi,[rbp*1+rsi]5315 rorx r12d,eax,275316 rorx ebp,eax,25317 vpsrldq ymm8,ymm4,45318 xor eax,ebx5319 add esi,r12d5320 xor eax,ecx5321 vpxor ymm5,ymm5,ymm15322 vpxor ymm8,ymm8,ymm35323 add edx,DWORD[100+r13]5324 lea edx,[rax*1+rdx]5325 vpxor ymm5,ymm5,ymm85326 rorx r12d,esi,275327 rorx eax,esi,25328 xor esi,ebp5329 add edx,r12d5330 vpsrld ymm8,ymm5,315331 vmovdqu ymm11,YMMWORD[((-32))+r14]5332 xor esi,ebx5333 add ecx,DWORD[104+r13]5334 lea ecx,[rsi*1+rcx]5335 vpslldq ymm10,ymm5,125336 vpaddd ymm5,ymm5,ymm55337 rorx r12d,edx,275338 rorx esi,edx,25339 vpsrld ymm9,ymm10,305340 vpor ymm5,ymm5,ymm85341 xor edx,eax5342 add ecx,r12d5343 vpslld ymm10,ymm10,25344 vpxor ymm5,ymm5,ymm95345 xor edx,ebp5346 add ebx,DWORD[108+r13]5347 lea r13,[256+r13]5348 vpxor ymm5,ymm5,ymm105349 lea ebx,[rdx*1+rbx]5350 rorx r12d,ecx,275351 rorx edx,ecx,25352 vpaddd ymm9,ymm5,ymm115353 xor ecx,esi5354 add ebx,r12d5355 xor ecx,eax5356 vmovdqu YMMWORD[160+rsp],ymm95357 add ebp,DWORD[((-128))+r13]5358 lea ebp,[rbp*1+rcx]5359 rorx r12d,ebx,275360 rorx ecx,ebx,25361 xor ebx,edx5362 add ebp,r12d5363 xor ebx,esi5364 vpalignr ymm6,ymm3,ymm2,85365 add eax,DWORD[((-124))+r13]5366 lea eax,[rbx*1+rax]5367 rorx r12d,ebp,275368 rorx ebx,ebp,25369 vpsrldq ymm8,ymm5,45370 xor ebp,ecx5371 add eax,r12d5372 xor ebp,edx5373 vpxor ymm6,ymm6,ymm25374 vpxor ymm8,ymm8,ymm45375 add esi,DWORD[((-120))+r13]5376 lea esi,[rbp*1+rsi]5377 vpxor ymm6,ymm6,ymm85378 rorx r12d,eax,275379 rorx ebp,eax,25380 xor eax,ebx5381 add esi,r12d5382 vpsrld ymm8,ymm6,315383 xor eax,ecx5384 add edx,DWORD[((-116))+r13]5385 lea edx,[rax*1+rdx]5386 vpslldq ymm10,ymm6,125387 vpaddd ymm6,ymm6,ymm65388 rorx r12d,esi,275389 rorx eax,esi,25390 vpsrld ymm9,ymm10,305391 vpor ymm6,ymm6,ymm85392 xor esi,ebp5393 add edx,r12d5394 vpslld ymm10,ymm10,25395 vpxor ymm6,ymm6,ymm95396 xor esi,ebx5397 add ecx,DWORD[((-96))+r13]5398 vpxor ymm6,ymm6,ymm105399 lea ecx,[rsi*1+rcx]5400 rorx r12d,edx,275401 rorx esi,edx,25402 vpaddd ymm9,ymm6,ymm115403 xor edx,eax5404 add ecx,r12d5405 xor edx,ebp5406 vmovdqu YMMWORD[192+rsp],ymm95407 add ebx,DWORD[((-92))+r13]5408 lea ebx,[rdx*1+rbx]5409 rorx r12d,ecx,275410 rorx edx,ecx,25411 xor ecx,esi5412 add ebx,r12d5413 xor ecx,eax5414 vpalignr ymm7,ymm4,ymm3,85415 add ebp,DWORD[((-88))+r13]5416 lea ebp,[rbp*1+rcx]5417 rorx r12d,ebx,275418 rorx ecx,ebx,25419 vpsrldq ymm8,ymm6,45420 xor ebx,edx5421 add ebp,r12d5422 xor ebx,esi5423 vpxor ymm7,ymm7,ymm35424 vpxor ymm8,ymm8,ymm55425 add eax,DWORD[((-84))+r13]5426 lea eax,[rbx*1+rax]5427 vpxor ymm7,ymm7,ymm85428 rorx r12d,ebp,275429 rorx ebx,ebp,25430 xor ebp,ecx5431 add eax,r12d5432 vpsrld ymm8,ymm7,315433 xor ebp,edx5434 add esi,DWORD[((-64))+r13]5435 lea esi,[rbp*1+rsi]5436 vpslldq ymm10,ymm7,125437 vpaddd ymm7,ymm7,ymm75438 rorx r12d,eax,275439 rorx ebp,eax,25440 vpsrld ymm9,ymm10,305441 vpor ymm7,ymm7,ymm85442 xor eax,ebx5443 add esi,r12d5444 vpslld ymm10,ymm10,25445 vpxor ymm7,ymm7,ymm95446 xor eax,ecx5447 add edx,DWORD[((-60))+r13]5448 vpxor ymm7,ymm7,ymm105449 lea edx,[rax*1+rdx]5450 rorx r12d,esi,275451 rorx eax,esi,25452 vpaddd ymm9,ymm7,ymm115453 xor esi,ebp5454 add edx,r12d5455 xor esi,ebx5456 vmovdqu YMMWORD[224+rsp],ymm95457 add ecx,DWORD[((-56))+r13]5458 lea ecx,[rsi*1+rcx]5459 rorx r12d,edx,275460 rorx esi,edx,25461 xor edx,eax5462 add ecx,r12d5463 xor edx,ebp5464 add ebx,DWORD[((-52))+r13]5465 lea ebx,[rdx*1+rbx]5466 rorx r12d,ecx,275467 rorx edx,ecx,25468 xor ecx,esi5469 add ebx,r12d5470 xor ecx,eax5471 add ebp,DWORD[((-32))+r13]5472 lea ebp,[rbp*1+rcx]5473 rorx r12d,ebx,275474 rorx ecx,ebx,25475 xor ebx,edx5476 add ebp,r12d5477 xor ebx,esi5478 add eax,DWORD[((-28))+r13]5479 lea eax,[rbx*1+rax]5480 rorx r12d,ebp,275481 rorx ebx,ebp,25482 xor ebp,ecx5483 add eax,r12d5484 xor ebp,edx5485 add esi,DWORD[((-24))+r13]5486 lea esi,[rbp*1+rsi]5487 rorx r12d,eax,275488 rorx ebp,eax,25489 xor eax,ebx5490 add esi,r12d5491 xor eax,ecx5492 add edx,DWORD[((-20))+r13]5493 lea edx,[rax*1+rdx]5494 rorx r12d,esi,275495 add edx,r12d5496 lea r13,[128+rsp]5497 5498 5499 add edx,DWORD[r8]5500 add esi,DWORD[4+r8]5501 add ebp,DWORD[8+r8]5502 mov DWORD[r8],edx5503 add ebx,DWORD[12+r8]5504 mov DWORD[4+r8],esi5505 mov eax,edx5506 add ecx,DWORD[16+r8]5507 mov r12d,ebp5508 mov DWORD[8+r8],ebp5509 mov edx,ebx5510 5511 mov DWORD[12+r8],ebx5512 mov ebp,esi5513 mov DWORD[16+r8],ecx5514 5515 mov esi,ecx5516 mov ecx,r12d5517 5518 5519 cmp r9,r105520 jbe NEAR $L$oop_avx25521 5522 $L$done_avx2:5523 vzeroupper5524 movaps xmm6,XMMWORD[((-40-96))+r11]5525 movaps xmm7,XMMWORD[((-40-80))+r11]5526 movaps xmm8,XMMWORD[((-40-64))+r11]5527 movaps xmm9,XMMWORD[((-40-48))+r11]5528 movaps xmm10,XMMWORD[((-40-32))+r11]5529 movaps xmm11,XMMWORD[((-40-16))+r11]5530 mov r14,QWORD[((-40))+r11]5531 5532 mov r13,QWORD[((-32))+r11]5533 5534 mov r12,QWORD[((-24))+r11]5535 5536 mov rbp,QWORD[((-16))+r11]5537 5538 mov rbx,QWORD[((-8))+r11]5539 5540 lea rsp,[r11]5541 5542 $L$epilogue_avx2:5543 mov rdi,QWORD[8+rsp] ;WIN64 epilogue5544 mov rsi,QWORD[16+rsp]5545 DB 0F3h,0C3h ;repret5546 5547 $L$SEH_end_sha1_block_data_order_avx2:5548 2670 ALIGN 64 5549 2671 K_XX_XX: … … 5739 2861 DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase 5740 2862 DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase 5741 DD $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase5742 DD $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase5743 DD $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase5744 DD $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase5745 DD $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase5746 DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase5747 2863 section .xdata rdata align=8 5748 2864 ALIGN 8 … … 5757 2873 DD ssse3_handler wrt ..imagebase 5758 2874 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase 5759 $L$SEH_info_sha1_block_data_order_avx:5760 DB 9,0,0,05761 DD ssse3_handler wrt ..imagebase5762 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase5763 $L$SEH_info_sha1_block_data_order_avx2:5764 DB 9,0,0,05765 DD ssse3_handler wrt ..imagebase5766 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/sha256-mb-x86_64.S
r94083 r95221 25 25 bt rcx,61 26 26 jc NEAR _shaext_shortcut 27 test ecx,26843545628 jnz NEAR _avx_shortcut29 27 mov rax,rsp 30 28 … … 3207 3205 3208 3206 $L$SEH_end_sha256_multi_block_shaext: 3209 3210 ALIGN 323211 sha256_multi_block_avx:3212 mov QWORD[8+rsp],rdi ;WIN64 prologue3213 mov QWORD[16+rsp],rsi3214 mov rax,rsp3215 $L$SEH_begin_sha256_multi_block_avx:3216 mov rdi,rcx3217 mov rsi,rdx3218 mov rdx,r83219 3220 3221 3222 _avx_shortcut:3223 shr rcx,323224 cmp edx,23225 jb NEAR $L$avx3226 test ecx,323227 jnz NEAR _avx2_shortcut3228 jmp NEAR $L$avx3229 ALIGN 323230 $L$avx:3231 mov rax,rsp3232 3233 push rbx3234 3235 push rbp3236 3237 lea rsp,[((-168))+rsp]3238 movaps XMMWORD[rsp],xmm63239 movaps XMMWORD[16+rsp],xmm73240 movaps XMMWORD[32+rsp],xmm83241 movaps XMMWORD[48+rsp],xmm93242 movaps XMMWORD[(-120)+rax],xmm103243 movaps XMMWORD[(-104)+rax],xmm113244 movaps XMMWORD[(-88)+rax],xmm123245 movaps XMMWORD[(-72)+rax],xmm133246 movaps XMMWORD[(-56)+rax],xmm143247 movaps XMMWORD[(-40)+rax],xmm153248 sub rsp,2883249 and rsp,-2563250 mov QWORD[272+rsp],rax3251 3252 $L$body_avx:3253 lea rbp,[((K256+128))]3254 lea rbx,[256+rsp]3255 lea rdi,[128+rdi]3256 3257 $L$oop_grande_avx:3258 mov DWORD[280+rsp],edx3259 xor edx,edx3260 3261 mov r8,QWORD[rsi]3262 3263 mov ecx,DWORD[8+rsi]3264 cmp ecx,edx3265 cmovg edx,ecx3266 test ecx,ecx3267 mov DWORD[rbx],ecx3268 cmovle r8,rbp3269 3270 mov r9,QWORD[16+rsi]3271 3272 mov ecx,DWORD[24+rsi]3273 cmp ecx,edx3274 cmovg edx,ecx3275 test ecx,ecx3276 mov DWORD[4+rbx],ecx3277 cmovle r9,rbp3278 3279 mov r10,QWORD[32+rsi]3280 3281 mov ecx,DWORD[40+rsi]3282 cmp ecx,edx3283 cmovg edx,ecx3284 test ecx,ecx3285 mov DWORD[8+rbx],ecx3286 cmovle r10,rbp3287 3288 mov r11,QWORD[48+rsi]3289 3290 mov ecx,DWORD[56+rsi]3291 cmp ecx,edx3292 cmovg edx,ecx3293 test ecx,ecx3294 mov DWORD[12+rbx],ecx3295 cmovle r11,rbp3296 test edx,edx3297 jz NEAR $L$done_avx3298 3299 vmovdqu xmm8,XMMWORD[((0-128))+rdi]3300 lea rax,[128+rsp]3301 vmovdqu xmm9,XMMWORD[((32-128))+rdi]3302 vmovdqu xmm10,XMMWORD[((64-128))+rdi]3303 vmovdqu xmm11,XMMWORD[((96-128))+rdi]3304 vmovdqu xmm12,XMMWORD[((128-128))+rdi]3305 vmovdqu xmm13,XMMWORD[((160-128))+rdi]3306 vmovdqu xmm14,XMMWORD[((192-128))+rdi]3307 vmovdqu xmm15,XMMWORD[((224-128))+rdi]3308 vmovdqu xmm6,XMMWORD[$L$pbswap]3309 jmp NEAR $L$oop_avx3310 3311 ALIGN 323312 $L$oop_avx:3313 vpxor xmm4,xmm10,xmm93314 vmovd xmm5,DWORD[r8]3315 vmovd xmm0,DWORD[r9]3316 vpinsrd xmm5,xmm5,DWORD[r10],13317 vpinsrd xmm0,xmm0,DWORD[r11],13318 vpunpckldq xmm5,xmm5,xmm03319 vpshufb xmm5,xmm5,xmm63320 vpsrld xmm7,xmm12,63321 vpslld xmm2,xmm12,263322 vmovdqu XMMWORD[(0-128)+rax],xmm53323 vpaddd xmm5,xmm5,xmm153324 3325 vpsrld xmm1,xmm12,113326 vpxor xmm7,xmm7,xmm23327 vpslld xmm2,xmm12,213328 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]3329 vpxor xmm7,xmm7,xmm13330 3331 vpsrld xmm1,xmm12,253332 vpxor xmm7,xmm7,xmm23333 3334 vpslld xmm2,xmm12,73335 vpandn xmm0,xmm12,xmm143336 vpand xmm3,xmm12,xmm133337 3338 vpxor xmm7,xmm7,xmm13339 3340 vpsrld xmm15,xmm8,23341 vpxor xmm7,xmm7,xmm23342 3343 vpslld xmm1,xmm8,303344 vpxor xmm0,xmm0,xmm33345 vpxor xmm3,xmm9,xmm83346 3347 vpxor xmm15,xmm15,xmm13348 vpaddd xmm5,xmm5,xmm73349 3350 vpsrld xmm1,xmm8,133351 3352 vpslld xmm2,xmm8,193353 vpaddd xmm5,xmm5,xmm03354 vpand xmm4,xmm4,xmm33355 3356 vpxor xmm7,xmm15,xmm13357 3358 vpsrld xmm1,xmm8,223359 vpxor xmm7,xmm7,xmm23360 3361 vpslld xmm2,xmm8,103362 vpxor xmm15,xmm9,xmm43363 vpaddd xmm11,xmm11,xmm53364 3365 vpxor xmm7,xmm7,xmm13366 vpxor xmm7,xmm7,xmm23367 3368 vpaddd xmm15,xmm15,xmm53369 vpaddd xmm15,xmm15,xmm73370 vmovd xmm5,DWORD[4+r8]3371 vmovd xmm0,DWORD[4+r9]3372 vpinsrd xmm5,xmm5,DWORD[4+r10],13373 vpinsrd xmm0,xmm0,DWORD[4+r11],13374 vpunpckldq xmm5,xmm5,xmm03375 vpshufb xmm5,xmm5,xmm63376 vpsrld xmm7,xmm11,63377 vpslld xmm2,xmm11,263378 vmovdqu XMMWORD[(16-128)+rax],xmm53379 vpaddd xmm5,xmm5,xmm143380 3381 vpsrld xmm1,xmm11,113382 vpxor xmm7,xmm7,xmm23383 vpslld xmm2,xmm11,213384 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp]3385 vpxor xmm7,xmm7,xmm13386 3387 vpsrld xmm1,xmm11,253388 vpxor xmm7,xmm7,xmm23389 3390 vpslld xmm2,xmm11,73391 vpandn xmm0,xmm11,xmm133392 vpand xmm4,xmm11,xmm123393 3394 vpxor xmm7,xmm7,xmm13395 3396 vpsrld xmm14,xmm15,23397 vpxor xmm7,xmm7,xmm23398 3399 vpslld xmm1,xmm15,303400 vpxor xmm0,xmm0,xmm43401 vpxor xmm4,xmm8,xmm153402 3403 vpxor xmm14,xmm14,xmm13404 vpaddd xmm5,xmm5,xmm73405 3406 vpsrld xmm1,xmm15,133407 3408 vpslld xmm2,xmm15,193409 vpaddd xmm5,xmm5,xmm03410 vpand xmm3,xmm3,xmm43411 3412 vpxor xmm7,xmm14,xmm13413 3414 vpsrld xmm1,xmm15,223415 vpxor xmm7,xmm7,xmm23416 3417 vpslld xmm2,xmm15,103418 vpxor xmm14,xmm8,xmm33419 vpaddd xmm10,xmm10,xmm53420 3421 vpxor xmm7,xmm7,xmm13422 vpxor xmm7,xmm7,xmm23423 3424 vpaddd xmm14,xmm14,xmm53425 vpaddd xmm14,xmm14,xmm73426 vmovd xmm5,DWORD[8+r8]3427 vmovd xmm0,DWORD[8+r9]3428 vpinsrd xmm5,xmm5,DWORD[8+r10],13429 vpinsrd xmm0,xmm0,DWORD[8+r11],13430 vpunpckldq xmm5,xmm5,xmm03431 vpshufb xmm5,xmm5,xmm63432 vpsrld xmm7,xmm10,63433 vpslld xmm2,xmm10,263434 vmovdqu XMMWORD[(32-128)+rax],xmm53435 vpaddd xmm5,xmm5,xmm133436 3437 vpsrld xmm1,xmm10,113438 vpxor xmm7,xmm7,xmm23439 vpslld xmm2,xmm10,213440 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]3441 vpxor xmm7,xmm7,xmm13442 3443 vpsrld xmm1,xmm10,253444 vpxor xmm7,xmm7,xmm23445 3446 vpslld xmm2,xmm10,73447 vpandn xmm0,xmm10,xmm123448 vpand xmm3,xmm10,xmm113449 3450 vpxor xmm7,xmm7,xmm13451 3452 vpsrld xmm13,xmm14,23453 vpxor xmm7,xmm7,xmm23454 3455 vpslld xmm1,xmm14,303456 vpxor xmm0,xmm0,xmm33457 vpxor xmm3,xmm15,xmm143458 3459 vpxor xmm13,xmm13,xmm13460 vpaddd xmm5,xmm5,xmm73461 3462 vpsrld xmm1,xmm14,133463 3464 vpslld xmm2,xmm14,193465 vpaddd xmm5,xmm5,xmm03466 vpand xmm4,xmm4,xmm33467 3468 vpxor xmm7,xmm13,xmm13469 3470 vpsrld xmm1,xmm14,223471 vpxor xmm7,xmm7,xmm23472 3473 vpslld xmm2,xmm14,103474 vpxor xmm13,xmm15,xmm43475 vpaddd xmm9,xmm9,xmm53476 3477 vpxor xmm7,xmm7,xmm13478 vpxor xmm7,xmm7,xmm23479 3480 vpaddd xmm13,xmm13,xmm53481 vpaddd xmm13,xmm13,xmm73482 vmovd xmm5,DWORD[12+r8]3483 vmovd xmm0,DWORD[12+r9]3484 vpinsrd xmm5,xmm5,DWORD[12+r10],13485 vpinsrd xmm0,xmm0,DWORD[12+r11],13486 vpunpckldq xmm5,xmm5,xmm03487 vpshufb xmm5,xmm5,xmm63488 vpsrld xmm7,xmm9,63489 vpslld xmm2,xmm9,263490 vmovdqu XMMWORD[(48-128)+rax],xmm53491 vpaddd xmm5,xmm5,xmm123492 3493 vpsrld xmm1,xmm9,113494 vpxor xmm7,xmm7,xmm23495 vpslld xmm2,xmm9,213496 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp]3497 vpxor xmm7,xmm7,xmm13498 3499 vpsrld xmm1,xmm9,253500 vpxor xmm7,xmm7,xmm23501 3502 vpslld xmm2,xmm9,73503 vpandn xmm0,xmm9,xmm113504 vpand xmm4,xmm9,xmm103505 3506 vpxor xmm7,xmm7,xmm13507 3508 vpsrld xmm12,xmm13,23509 vpxor xmm7,xmm7,xmm23510 3511 vpslld xmm1,xmm13,303512 vpxor xmm0,xmm0,xmm43513 vpxor xmm4,xmm14,xmm133514 3515 vpxor xmm12,xmm12,xmm13516 vpaddd xmm5,xmm5,xmm73517 3518 vpsrld xmm1,xmm13,133519 3520 vpslld xmm2,xmm13,193521 vpaddd xmm5,xmm5,xmm03522 vpand xmm3,xmm3,xmm43523 3524 vpxor xmm7,xmm12,xmm13525 3526 vpsrld xmm1,xmm13,223527 vpxor xmm7,xmm7,xmm23528 3529 vpslld xmm2,xmm13,103530 vpxor xmm12,xmm14,xmm33531 vpaddd xmm8,xmm8,xmm53532 3533 vpxor xmm7,xmm7,xmm13534 vpxor xmm7,xmm7,xmm23535 3536 vpaddd xmm12,xmm12,xmm53537 vpaddd xmm12,xmm12,xmm73538 vmovd xmm5,DWORD[16+r8]3539 vmovd xmm0,DWORD[16+r9]3540 vpinsrd xmm5,xmm5,DWORD[16+r10],13541 vpinsrd xmm0,xmm0,DWORD[16+r11],13542 vpunpckldq xmm5,xmm5,xmm03543 vpshufb xmm5,xmm5,xmm63544 vpsrld xmm7,xmm8,63545 vpslld xmm2,xmm8,263546 vmovdqu XMMWORD[(64-128)+rax],xmm53547 vpaddd xmm5,xmm5,xmm113548 3549 vpsrld xmm1,xmm8,113550 vpxor xmm7,xmm7,xmm23551 vpslld xmm2,xmm8,213552 vpaddd xmm5,xmm5,XMMWORD[rbp]3553 vpxor xmm7,xmm7,xmm13554 3555 vpsrld xmm1,xmm8,253556 vpxor xmm7,xmm7,xmm23557 3558 vpslld xmm2,xmm8,73559 vpandn xmm0,xmm8,xmm103560 vpand xmm3,xmm8,xmm93561 3562 vpxor xmm7,xmm7,xmm13563 3564 vpsrld xmm11,xmm12,23565 vpxor xmm7,xmm7,xmm23566 3567 vpslld xmm1,xmm12,303568 vpxor xmm0,xmm0,xmm33569 vpxor xmm3,xmm13,xmm123570 3571 vpxor xmm11,xmm11,xmm13572 vpaddd xmm5,xmm5,xmm73573 3574 vpsrld xmm1,xmm12,133575 3576 vpslld xmm2,xmm12,193577 vpaddd xmm5,xmm5,xmm03578 vpand xmm4,xmm4,xmm33579 3580 vpxor xmm7,xmm11,xmm13581 3582 vpsrld xmm1,xmm12,223583 vpxor xmm7,xmm7,xmm23584 3585 vpslld xmm2,xmm12,103586 vpxor xmm11,xmm13,xmm43587 vpaddd xmm15,xmm15,xmm53588 3589 vpxor xmm7,xmm7,xmm13590 vpxor xmm7,xmm7,xmm23591 3592 vpaddd xmm11,xmm11,xmm53593 vpaddd xmm11,xmm11,xmm73594 vmovd xmm5,DWORD[20+r8]3595 vmovd xmm0,DWORD[20+r9]3596 vpinsrd xmm5,xmm5,DWORD[20+r10],13597 vpinsrd xmm0,xmm0,DWORD[20+r11],13598 vpunpckldq xmm5,xmm5,xmm03599 vpshufb xmm5,xmm5,xmm63600 vpsrld xmm7,xmm15,63601 vpslld xmm2,xmm15,263602 vmovdqu XMMWORD[(80-128)+rax],xmm53603 vpaddd xmm5,xmm5,xmm103604 3605 vpsrld xmm1,xmm15,113606 vpxor xmm7,xmm7,xmm23607 vpslld xmm2,xmm15,213608 vpaddd xmm5,xmm5,XMMWORD[32+rbp]3609 vpxor xmm7,xmm7,xmm13610 3611 vpsrld xmm1,xmm15,253612 vpxor xmm7,xmm7,xmm23613 3614 vpslld xmm2,xmm15,73615 vpandn xmm0,xmm15,xmm93616 vpand xmm4,xmm15,xmm83617 3618 vpxor xmm7,xmm7,xmm13619 3620 vpsrld xmm10,xmm11,23621 vpxor xmm7,xmm7,xmm23622 3623 vpslld xmm1,xmm11,303624 vpxor xmm0,xmm0,xmm43625 vpxor xmm4,xmm12,xmm113626 3627 vpxor xmm10,xmm10,xmm13628 vpaddd xmm5,xmm5,xmm73629 3630 vpsrld xmm1,xmm11,133631 3632 vpslld xmm2,xmm11,193633 vpaddd xmm5,xmm5,xmm03634 vpand xmm3,xmm3,xmm43635 3636 vpxor xmm7,xmm10,xmm13637 3638 vpsrld xmm1,xmm11,223639 vpxor xmm7,xmm7,xmm23640 3641 vpslld xmm2,xmm11,103642 vpxor xmm10,xmm12,xmm33643 vpaddd xmm14,xmm14,xmm53644 3645 vpxor xmm7,xmm7,xmm13646 vpxor xmm7,xmm7,xmm23647 3648 vpaddd xmm10,xmm10,xmm53649 vpaddd xmm10,xmm10,xmm73650 vmovd xmm5,DWORD[24+r8]3651 vmovd xmm0,DWORD[24+r9]3652 vpinsrd xmm5,xmm5,DWORD[24+r10],13653 vpinsrd xmm0,xmm0,DWORD[24+r11],13654 vpunpckldq xmm5,xmm5,xmm03655 vpshufb xmm5,xmm5,xmm63656 vpsrld xmm7,xmm14,63657 vpslld xmm2,xmm14,263658 vmovdqu XMMWORD[(96-128)+rax],xmm53659 vpaddd xmm5,xmm5,xmm93660 3661 vpsrld xmm1,xmm14,113662 vpxor xmm7,xmm7,xmm23663 vpslld xmm2,xmm14,213664 vpaddd xmm5,xmm5,XMMWORD[64+rbp]3665 vpxor xmm7,xmm7,xmm13666 3667 vpsrld xmm1,xmm14,253668 vpxor xmm7,xmm7,xmm23669 3670 vpslld xmm2,xmm14,73671 vpandn xmm0,xmm14,xmm83672 vpand xmm3,xmm14,xmm153673 3674 vpxor xmm7,xmm7,xmm13675 3676 vpsrld xmm9,xmm10,23677 vpxor xmm7,xmm7,xmm23678 3679 vpslld xmm1,xmm10,303680 vpxor xmm0,xmm0,xmm33681 vpxor xmm3,xmm11,xmm103682 3683 vpxor xmm9,xmm9,xmm13684 vpaddd xmm5,xmm5,xmm73685 3686 vpsrld xmm1,xmm10,133687 3688 vpslld xmm2,xmm10,193689 vpaddd xmm5,xmm5,xmm03690 vpand xmm4,xmm4,xmm33691 3692 vpxor xmm7,xmm9,xmm13693 3694 vpsrld xmm1,xmm10,223695 vpxor xmm7,xmm7,xmm23696 3697 vpslld xmm2,xmm10,103698 vpxor xmm9,xmm11,xmm43699 vpaddd xmm13,xmm13,xmm53700 3701 vpxor xmm7,xmm7,xmm13702 vpxor xmm7,xmm7,xmm23703 3704 vpaddd xmm9,xmm9,xmm53705 vpaddd xmm9,xmm9,xmm73706 vmovd xmm5,DWORD[28+r8]3707 vmovd xmm0,DWORD[28+r9]3708 vpinsrd xmm5,xmm5,DWORD[28+r10],13709 vpinsrd xmm0,xmm0,DWORD[28+r11],13710 vpunpckldq xmm5,xmm5,xmm03711 vpshufb xmm5,xmm5,xmm63712 vpsrld xmm7,xmm13,63713 vpslld xmm2,xmm13,263714 vmovdqu XMMWORD[(112-128)+rax],xmm53715 vpaddd xmm5,xmm5,xmm83716 3717 vpsrld xmm1,xmm13,113718 vpxor xmm7,xmm7,xmm23719 vpslld xmm2,xmm13,213720 vpaddd xmm5,xmm5,XMMWORD[96+rbp]3721 vpxor xmm7,xmm7,xmm13722 3723 vpsrld xmm1,xmm13,253724 vpxor xmm7,xmm7,xmm23725 3726 vpslld xmm2,xmm13,73727 vpandn xmm0,xmm13,xmm153728 vpand xmm4,xmm13,xmm143729 3730 vpxor xmm7,xmm7,xmm13731 3732 vpsrld xmm8,xmm9,23733 vpxor xmm7,xmm7,xmm23734 3735 vpslld xmm1,xmm9,303736 vpxor xmm0,xmm0,xmm43737 vpxor xmm4,xmm10,xmm93738 3739 vpxor xmm8,xmm8,xmm13740 vpaddd xmm5,xmm5,xmm73741 3742 vpsrld xmm1,xmm9,133743 3744 vpslld xmm2,xmm9,193745 vpaddd xmm5,xmm5,xmm03746 vpand xmm3,xmm3,xmm43747 3748 vpxor xmm7,xmm8,xmm13749 3750 vpsrld xmm1,xmm9,223751 vpxor xmm7,xmm7,xmm23752 3753 vpslld xmm2,xmm9,103754 vpxor xmm8,xmm10,xmm33755 vpaddd xmm12,xmm12,xmm53756 3757 vpxor xmm7,xmm7,xmm13758 vpxor xmm7,xmm7,xmm23759 3760 vpaddd xmm8,xmm8,xmm53761 vpaddd xmm8,xmm8,xmm73762 add rbp,2563763 vmovd xmm5,DWORD[32+r8]3764 vmovd xmm0,DWORD[32+r9]3765 vpinsrd xmm5,xmm5,DWORD[32+r10],13766 vpinsrd xmm0,xmm0,DWORD[32+r11],13767 vpunpckldq xmm5,xmm5,xmm03768 vpshufb xmm5,xmm5,xmm63769 vpsrld xmm7,xmm12,63770 vpslld xmm2,xmm12,263771 vmovdqu XMMWORD[(128-128)+rax],xmm53772 vpaddd xmm5,xmm5,xmm153773 3774 vpsrld xmm1,xmm12,113775 vpxor xmm7,xmm7,xmm23776 vpslld xmm2,xmm12,213777 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]3778 vpxor xmm7,xmm7,xmm13779 3780 vpsrld xmm1,xmm12,253781 vpxor xmm7,xmm7,xmm23782 3783 vpslld xmm2,xmm12,73784 vpandn xmm0,xmm12,xmm143785 vpand xmm3,xmm12,xmm133786 3787 vpxor xmm7,xmm7,xmm13788 3789 vpsrld xmm15,xmm8,23790 vpxor xmm7,xmm7,xmm23791 3792 vpslld xmm1,xmm8,303793 vpxor xmm0,xmm0,xmm33794 vpxor xmm3,xmm9,xmm83795 3796 vpxor xmm15,xmm15,xmm13797 vpaddd xmm5,xmm5,xmm73798 3799 vpsrld xmm1,xmm8,133800 3801 vpslld xmm2,xmm8,193802 vpaddd xmm5,xmm5,xmm03803 vpand xmm4,xmm4,xmm33804 3805 vpxor xmm7,xmm15,xmm13806 3807 vpsrld xmm1,xmm8,223808 vpxor xmm7,xmm7,xmm23809 3810 vpslld xmm2,xmm8,103811 vpxor xmm15,xmm9,xmm43812 vpaddd xmm11,xmm11,xmm53813 3814 vpxor xmm7,xmm7,xmm13815 vpxor xmm7,xmm7,xmm23816 3817 vpaddd xmm15,xmm15,xmm53818 vpaddd xmm15,xmm15,xmm73819 vmovd xmm5,DWORD[36+r8]3820 vmovd xmm0,DWORD[36+r9]3821 vpinsrd xmm5,xmm5,DWORD[36+r10],13822 vpinsrd xmm0,xmm0,DWORD[36+r11],13823 vpunpckldq xmm5,xmm5,xmm03824 vpshufb xmm5,xmm5,xmm63825 vpsrld xmm7,xmm11,63826 vpslld xmm2,xmm11,263827 vmovdqu XMMWORD[(144-128)+rax],xmm53828 vpaddd xmm5,xmm5,xmm143829 3830 vpsrld xmm1,xmm11,113831 vpxor xmm7,xmm7,xmm23832 vpslld xmm2,xmm11,213833 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp]3834 vpxor xmm7,xmm7,xmm13835 3836 vpsrld xmm1,xmm11,253837 vpxor xmm7,xmm7,xmm23838 3839 vpslld xmm2,xmm11,73840 vpandn xmm0,xmm11,xmm133841 vpand xmm4,xmm11,xmm123842 3843 vpxor xmm7,xmm7,xmm13844 3845 vpsrld xmm14,xmm15,23846 vpxor xmm7,xmm7,xmm23847 3848 vpslld xmm1,xmm15,303849 vpxor xmm0,xmm0,xmm43850 vpxor xmm4,xmm8,xmm153851 3852 vpxor xmm14,xmm14,xmm13853 vpaddd xmm5,xmm5,xmm73854 3855 vpsrld xmm1,xmm15,133856 3857 vpslld xmm2,xmm15,193858 vpaddd xmm5,xmm5,xmm03859 vpand xmm3,xmm3,xmm43860 3861 vpxor xmm7,xmm14,xmm13862 3863 vpsrld xmm1,xmm15,223864 vpxor xmm7,xmm7,xmm23865 3866 vpslld xmm2,xmm15,103867 vpxor xmm14,xmm8,xmm33868 vpaddd xmm10,xmm10,xmm53869 3870 vpxor xmm7,xmm7,xmm13871 vpxor xmm7,xmm7,xmm23872 3873 vpaddd xmm14,xmm14,xmm53874 vpaddd xmm14,xmm14,xmm73875 vmovd xmm5,DWORD[40+r8]3876 vmovd xmm0,DWORD[40+r9]3877 vpinsrd xmm5,xmm5,DWORD[40+r10],13878 vpinsrd xmm0,xmm0,DWORD[40+r11],13879 vpunpckldq xmm5,xmm5,xmm03880 vpshufb xmm5,xmm5,xmm63881 vpsrld xmm7,xmm10,63882 vpslld xmm2,xmm10,263883 vmovdqu XMMWORD[(160-128)+rax],xmm53884 vpaddd xmm5,xmm5,xmm133885 3886 vpsrld xmm1,xmm10,113887 vpxor xmm7,xmm7,xmm23888 vpslld xmm2,xmm10,213889 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]3890 vpxor xmm7,xmm7,xmm13891 3892 vpsrld xmm1,xmm10,253893 vpxor xmm7,xmm7,xmm23894 3895 vpslld xmm2,xmm10,73896 vpandn xmm0,xmm10,xmm123897 vpand xmm3,xmm10,xmm113898 3899 vpxor xmm7,xmm7,xmm13900 3901 vpsrld xmm13,xmm14,23902 vpxor xmm7,xmm7,xmm23903 3904 vpslld xmm1,xmm14,303905 vpxor xmm0,xmm0,xmm33906 vpxor xmm3,xmm15,xmm143907 3908 vpxor xmm13,xmm13,xmm13909 vpaddd xmm5,xmm5,xmm73910 3911 vpsrld xmm1,xmm14,133912 3913 vpslld xmm2,xmm14,193914 vpaddd xmm5,xmm5,xmm03915 vpand xmm4,xmm4,xmm33916 3917 vpxor xmm7,xmm13,xmm13918 3919 vpsrld xmm1,xmm14,223920 vpxor xmm7,xmm7,xmm23921 3922 vpslld xmm2,xmm14,103923 vpxor xmm13,xmm15,xmm43924 vpaddd xmm9,xmm9,xmm53925 3926 vpxor xmm7,xmm7,xmm13927 vpxor xmm7,xmm7,xmm23928 3929 vpaddd xmm13,xmm13,xmm53930 vpaddd xmm13,xmm13,xmm73931 vmovd xmm5,DWORD[44+r8]3932 vmovd xmm0,DWORD[44+r9]3933 vpinsrd xmm5,xmm5,DWORD[44+r10],13934 vpinsrd xmm0,xmm0,DWORD[44+r11],13935 vpunpckldq xmm5,xmm5,xmm03936 vpshufb xmm5,xmm5,xmm63937 vpsrld xmm7,xmm9,63938 vpslld xmm2,xmm9,263939 vmovdqu XMMWORD[(176-128)+rax],xmm53940 vpaddd xmm5,xmm5,xmm123941 3942 vpsrld xmm1,xmm9,113943 vpxor xmm7,xmm7,xmm23944 vpslld xmm2,xmm9,213945 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp]3946 vpxor xmm7,xmm7,xmm13947 3948 vpsrld xmm1,xmm9,253949 vpxor xmm7,xmm7,xmm23950 3951 vpslld xmm2,xmm9,73952 vpandn xmm0,xmm9,xmm113953 vpand xmm4,xmm9,xmm103954 3955 vpxor xmm7,xmm7,xmm13956 3957 vpsrld xmm12,xmm13,23958 vpxor xmm7,xmm7,xmm23959 3960 vpslld xmm1,xmm13,303961 vpxor xmm0,xmm0,xmm43962 vpxor xmm4,xmm14,xmm133963 3964 vpxor xmm12,xmm12,xmm13965 vpaddd xmm5,xmm5,xmm73966 3967 vpsrld xmm1,xmm13,133968 3969 vpslld xmm2,xmm13,193970 vpaddd xmm5,xmm5,xmm03971 vpand xmm3,xmm3,xmm43972 3973 vpxor xmm7,xmm12,xmm13974 3975 vpsrld xmm1,xmm13,223976 vpxor xmm7,xmm7,xmm23977 3978 vpslld xmm2,xmm13,103979 vpxor xmm12,xmm14,xmm33980 vpaddd xmm8,xmm8,xmm53981 3982 vpxor xmm7,xmm7,xmm13983 vpxor xmm7,xmm7,xmm23984 3985 vpaddd xmm12,xmm12,xmm53986 vpaddd xmm12,xmm12,xmm73987 vmovd xmm5,DWORD[48+r8]3988 vmovd xmm0,DWORD[48+r9]3989 vpinsrd xmm5,xmm5,DWORD[48+r10],13990 vpinsrd xmm0,xmm0,DWORD[48+r11],13991 vpunpckldq xmm5,xmm5,xmm03992 vpshufb xmm5,xmm5,xmm63993 vpsrld xmm7,xmm8,63994 vpslld xmm2,xmm8,263995 vmovdqu XMMWORD[(192-128)+rax],xmm53996 vpaddd xmm5,xmm5,xmm113997 3998 vpsrld xmm1,xmm8,113999 vpxor xmm7,xmm7,xmm24000 vpslld xmm2,xmm8,214001 vpaddd xmm5,xmm5,XMMWORD[rbp]4002 vpxor xmm7,xmm7,xmm14003 4004 vpsrld xmm1,xmm8,254005 vpxor xmm7,xmm7,xmm24006 4007 vpslld xmm2,xmm8,74008 vpandn xmm0,xmm8,xmm104009 vpand xmm3,xmm8,xmm94010 4011 vpxor xmm7,xmm7,xmm14012 4013 vpsrld xmm11,xmm12,24014 vpxor xmm7,xmm7,xmm24015 4016 vpslld xmm1,xmm12,304017 vpxor xmm0,xmm0,xmm34018 vpxor xmm3,xmm13,xmm124019 4020 vpxor xmm11,xmm11,xmm14021 vpaddd xmm5,xmm5,xmm74022 4023 vpsrld xmm1,xmm12,134024 4025 vpslld xmm2,xmm12,194026 vpaddd xmm5,xmm5,xmm04027 vpand xmm4,xmm4,xmm34028 4029 vpxor xmm7,xmm11,xmm14030 4031 vpsrld xmm1,xmm12,224032 vpxor xmm7,xmm7,xmm24033 4034 vpslld xmm2,xmm12,104035 vpxor xmm11,xmm13,xmm44036 vpaddd xmm15,xmm15,xmm54037 4038 vpxor xmm7,xmm7,xmm14039 vpxor xmm7,xmm7,xmm24040 4041 vpaddd xmm11,xmm11,xmm54042 vpaddd xmm11,xmm11,xmm74043 vmovd xmm5,DWORD[52+r8]4044 vmovd xmm0,DWORD[52+r9]4045 vpinsrd xmm5,xmm5,DWORD[52+r10],14046 vpinsrd xmm0,xmm0,DWORD[52+r11],14047 vpunpckldq xmm5,xmm5,xmm04048 vpshufb xmm5,xmm5,xmm64049 vpsrld xmm7,xmm15,64050 vpslld xmm2,xmm15,264051 vmovdqu XMMWORD[(208-128)+rax],xmm54052 vpaddd xmm5,xmm5,xmm104053 4054 vpsrld xmm1,xmm15,114055 vpxor xmm7,xmm7,xmm24056 vpslld xmm2,xmm15,214057 vpaddd xmm5,xmm5,XMMWORD[32+rbp]4058 vpxor xmm7,xmm7,xmm14059 4060 vpsrld xmm1,xmm15,254061 vpxor xmm7,xmm7,xmm24062 4063 vpslld xmm2,xmm15,74064 vpandn xmm0,xmm15,xmm94065 vpand xmm4,xmm15,xmm84066 4067 vpxor xmm7,xmm7,xmm14068 4069 vpsrld xmm10,xmm11,24070 vpxor xmm7,xmm7,xmm24071 4072 vpslld xmm1,xmm11,304073 vpxor xmm0,xmm0,xmm44074 vpxor xmm4,xmm12,xmm114075 4076 vpxor xmm10,xmm10,xmm14077 vpaddd xmm5,xmm5,xmm74078 4079 vpsrld xmm1,xmm11,134080 4081 vpslld xmm2,xmm11,194082 vpaddd xmm5,xmm5,xmm04083 vpand xmm3,xmm3,xmm44084 4085 vpxor xmm7,xmm10,xmm14086 4087 vpsrld xmm1,xmm11,224088 vpxor xmm7,xmm7,xmm24089 4090 vpslld xmm2,xmm11,104091 vpxor xmm10,xmm12,xmm34092 vpaddd xmm14,xmm14,xmm54093 4094 vpxor xmm7,xmm7,xmm14095 vpxor xmm7,xmm7,xmm24096 4097 vpaddd xmm10,xmm10,xmm54098 vpaddd xmm10,xmm10,xmm74099 vmovd xmm5,DWORD[56+r8]4100 vmovd xmm0,DWORD[56+r9]4101 vpinsrd xmm5,xmm5,DWORD[56+r10],14102 vpinsrd xmm0,xmm0,DWORD[56+r11],14103 vpunpckldq xmm5,xmm5,xmm04104 vpshufb xmm5,xmm5,xmm64105 vpsrld xmm7,xmm14,64106 vpslld xmm2,xmm14,264107 vmovdqu XMMWORD[(224-128)+rax],xmm54108 vpaddd xmm5,xmm5,xmm94109 4110 vpsrld xmm1,xmm14,114111 vpxor xmm7,xmm7,xmm24112 vpslld xmm2,xmm14,214113 vpaddd xmm5,xmm5,XMMWORD[64+rbp]4114 vpxor xmm7,xmm7,xmm14115 4116 vpsrld xmm1,xmm14,254117 vpxor xmm7,xmm7,xmm24118 4119 vpslld xmm2,xmm14,74120 vpandn xmm0,xmm14,xmm84121 vpand xmm3,xmm14,xmm154122 4123 vpxor xmm7,xmm7,xmm14124 4125 vpsrld xmm9,xmm10,24126 vpxor xmm7,xmm7,xmm24127 4128 vpslld xmm1,xmm10,304129 vpxor xmm0,xmm0,xmm34130 vpxor xmm3,xmm11,xmm104131 4132 vpxor xmm9,xmm9,xmm14133 vpaddd xmm5,xmm5,xmm74134 4135 vpsrld xmm1,xmm10,134136 4137 vpslld xmm2,xmm10,194138 vpaddd xmm5,xmm5,xmm04139 vpand xmm4,xmm4,xmm34140 4141 vpxor xmm7,xmm9,xmm14142 4143 vpsrld xmm1,xmm10,224144 vpxor xmm7,xmm7,xmm24145 4146 vpslld xmm2,xmm10,104147 vpxor xmm9,xmm11,xmm44148 vpaddd xmm13,xmm13,xmm54149 4150 vpxor xmm7,xmm7,xmm14151 vpxor xmm7,xmm7,xmm24152 4153 vpaddd xmm9,xmm9,xmm54154 vpaddd xmm9,xmm9,xmm74155 vmovd xmm5,DWORD[60+r8]4156 lea r8,[64+r8]4157 vmovd xmm0,DWORD[60+r9]4158 lea r9,[64+r9]4159 vpinsrd xmm5,xmm5,DWORD[60+r10],14160 lea r10,[64+r10]4161 vpinsrd xmm0,xmm0,DWORD[60+r11],14162 lea r11,[64+r11]4163 vpunpckldq xmm5,xmm5,xmm04164 vpshufb xmm5,xmm5,xmm64165 vpsrld xmm7,xmm13,64166 vpslld xmm2,xmm13,264167 vmovdqu XMMWORD[(240-128)+rax],xmm54168 vpaddd xmm5,xmm5,xmm84169 4170 vpsrld xmm1,xmm13,114171 vpxor xmm7,xmm7,xmm24172 vpslld xmm2,xmm13,214173 vpaddd xmm5,xmm5,XMMWORD[96+rbp]4174 vpxor xmm7,xmm7,xmm14175 4176 vpsrld xmm1,xmm13,254177 vpxor xmm7,xmm7,xmm24178 prefetcht0 [63+r8]4179 vpslld xmm2,xmm13,74180 vpandn xmm0,xmm13,xmm154181 vpand xmm4,xmm13,xmm144182 prefetcht0 [63+r9]4183 vpxor xmm7,xmm7,xmm14184 4185 vpsrld xmm8,xmm9,24186 vpxor xmm7,xmm7,xmm24187 prefetcht0 [63+r10]4188 vpslld xmm1,xmm9,304189 vpxor xmm0,xmm0,xmm44190 vpxor xmm4,xmm10,xmm94191 prefetcht0 [63+r11]4192 vpxor xmm8,xmm8,xmm14193 vpaddd xmm5,xmm5,xmm74194 4195 vpsrld xmm1,xmm9,134196 4197 vpslld xmm2,xmm9,194198 vpaddd xmm5,xmm5,xmm04199 vpand xmm3,xmm3,xmm44200 4201 vpxor xmm7,xmm8,xmm14202 4203 vpsrld xmm1,xmm9,224204 vpxor xmm7,xmm7,xmm24205 4206 vpslld xmm2,xmm9,104207 vpxor xmm8,xmm10,xmm34208 vpaddd xmm12,xmm12,xmm54209 4210 vpxor xmm7,xmm7,xmm14211 vpxor xmm7,xmm7,xmm24212 4213 vpaddd xmm8,xmm8,xmm54214 vpaddd xmm8,xmm8,xmm74215 add rbp,2564216 vmovdqu xmm5,XMMWORD[((0-128))+rax]4217 mov ecx,34218 jmp NEAR $L$oop_16_xx_avx4219 ALIGN 324220 $L$oop_16_xx_avx:4221 vmovdqu xmm6,XMMWORD[((16-128))+rax]4222 vpaddd xmm5,xmm5,XMMWORD[((144-128))+rax]4223 4224 vpsrld xmm7,xmm6,34225 vpsrld xmm1,xmm6,74226 vpslld xmm2,xmm6,254227 vpxor xmm7,xmm7,xmm14228 vpsrld xmm1,xmm6,184229 vpxor xmm7,xmm7,xmm24230 vpslld xmm2,xmm6,144231 vmovdqu xmm0,XMMWORD[((224-128))+rax]4232 vpsrld xmm3,xmm0,104233 4234 vpxor xmm7,xmm7,xmm14235 vpsrld xmm1,xmm0,174236 vpxor xmm7,xmm7,xmm24237 vpslld xmm2,xmm0,154238 vpaddd xmm5,xmm5,xmm74239 vpxor xmm7,xmm3,xmm14240 vpsrld xmm1,xmm0,194241 vpxor xmm7,xmm7,xmm24242 vpslld xmm2,xmm0,134243 vpxor xmm7,xmm7,xmm14244 vpxor xmm7,xmm7,xmm24245 vpaddd xmm5,xmm5,xmm74246 vpsrld xmm7,xmm12,64247 vpslld xmm2,xmm12,264248 vmovdqu XMMWORD[(0-128)+rax],xmm54249 vpaddd xmm5,xmm5,xmm154250 4251 vpsrld xmm1,xmm12,114252 vpxor xmm7,xmm7,xmm24253 vpslld xmm2,xmm12,214254 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]4255 vpxor xmm7,xmm7,xmm14256 4257 vpsrld xmm1,xmm12,254258 vpxor xmm7,xmm7,xmm24259 4260 vpslld xmm2,xmm12,74261 vpandn xmm0,xmm12,xmm144262 vpand xmm3,xmm12,xmm134263 4264 vpxor xmm7,xmm7,xmm14265 4266 vpsrld xmm15,xmm8,24267 vpxor xmm7,xmm7,xmm24268 4269 vpslld xmm1,xmm8,304270 vpxor xmm0,xmm0,xmm34271 vpxor xmm3,xmm9,xmm84272 4273 vpxor xmm15,xmm15,xmm14274 vpaddd xmm5,xmm5,xmm74275 4276 vpsrld xmm1,xmm8,134277 4278 vpslld xmm2,xmm8,194279 vpaddd xmm5,xmm5,xmm04280 vpand xmm4,xmm4,xmm34281 4282 vpxor xmm7,xmm15,xmm14283 4284 vpsrld xmm1,xmm8,224285 vpxor xmm7,xmm7,xmm24286 4287 vpslld xmm2,xmm8,104288 vpxor xmm15,xmm9,xmm44289 vpaddd xmm11,xmm11,xmm54290 4291 vpxor xmm7,xmm7,xmm14292 vpxor xmm7,xmm7,xmm24293 4294 vpaddd xmm15,xmm15,xmm54295 vpaddd xmm15,xmm15,xmm74296 vmovdqu xmm5,XMMWORD[((32-128))+rax]4297 vpaddd xmm6,xmm6,XMMWORD[((160-128))+rax]4298 4299 vpsrld xmm7,xmm5,34300 vpsrld xmm1,xmm5,74301 vpslld xmm2,xmm5,254302 vpxor xmm7,xmm7,xmm14303 vpsrld xmm1,xmm5,184304 vpxor xmm7,xmm7,xmm24305 vpslld xmm2,xmm5,144306 vmovdqu xmm0,XMMWORD[((240-128))+rax]4307 vpsrld xmm4,xmm0,104308 4309 vpxor xmm7,xmm7,xmm14310 vpsrld xmm1,xmm0,174311 vpxor xmm7,xmm7,xmm24312 vpslld xmm2,xmm0,154313 vpaddd xmm6,xmm6,xmm74314 vpxor xmm7,xmm4,xmm14315 vpsrld xmm1,xmm0,194316 vpxor xmm7,xmm7,xmm24317 vpslld xmm2,xmm0,134318 vpxor xmm7,xmm7,xmm14319 vpxor xmm7,xmm7,xmm24320 vpaddd xmm6,xmm6,xmm74321 vpsrld xmm7,xmm11,64322 vpslld xmm2,xmm11,264323 vmovdqu XMMWORD[(16-128)+rax],xmm64324 vpaddd xmm6,xmm6,xmm144325 4326 vpsrld xmm1,xmm11,114327 vpxor xmm7,xmm7,xmm24328 vpslld xmm2,xmm11,214329 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp]4330 vpxor xmm7,xmm7,xmm14331 4332 vpsrld xmm1,xmm11,254333 vpxor xmm7,xmm7,xmm24334 4335 vpslld xmm2,xmm11,74336 vpandn xmm0,xmm11,xmm134337 vpand xmm4,xmm11,xmm124338 4339 vpxor xmm7,xmm7,xmm14340 4341 vpsrld xmm14,xmm15,24342 vpxor xmm7,xmm7,xmm24343 4344 vpslld xmm1,xmm15,304345 vpxor xmm0,xmm0,xmm44346 vpxor xmm4,xmm8,xmm154347 4348 vpxor xmm14,xmm14,xmm14349 vpaddd xmm6,xmm6,xmm74350 4351 vpsrld xmm1,xmm15,134352 4353 vpslld xmm2,xmm15,194354 vpaddd xmm6,xmm6,xmm04355 vpand xmm3,xmm3,xmm44356 4357 vpxor xmm7,xmm14,xmm14358 4359 vpsrld xmm1,xmm15,224360 vpxor xmm7,xmm7,xmm24361 4362 vpslld xmm2,xmm15,104363 vpxor xmm14,xmm8,xmm34364 vpaddd xmm10,xmm10,xmm64365 4366 vpxor xmm7,xmm7,xmm14367 vpxor xmm7,xmm7,xmm24368 4369 vpaddd xmm14,xmm14,xmm64370 vpaddd xmm14,xmm14,xmm74371 vmovdqu xmm6,XMMWORD[((48-128))+rax]4372 vpaddd xmm5,xmm5,XMMWORD[((176-128))+rax]4373 4374 vpsrld xmm7,xmm6,34375 vpsrld xmm1,xmm6,74376 vpslld xmm2,xmm6,254377 vpxor xmm7,xmm7,xmm14378 vpsrld xmm1,xmm6,184379 vpxor xmm7,xmm7,xmm24380 vpslld xmm2,xmm6,144381 vmovdqu xmm0,XMMWORD[((0-128))+rax]4382 vpsrld xmm3,xmm0,104383 4384 vpxor xmm7,xmm7,xmm14385 vpsrld xmm1,xmm0,174386 vpxor xmm7,xmm7,xmm24387 vpslld xmm2,xmm0,154388 vpaddd xmm5,xmm5,xmm74389 vpxor xmm7,xmm3,xmm14390 vpsrld xmm1,xmm0,194391 vpxor xmm7,xmm7,xmm24392 vpslld xmm2,xmm0,134393 vpxor xmm7,xmm7,xmm14394 vpxor xmm7,xmm7,xmm24395 vpaddd xmm5,xmm5,xmm74396 vpsrld xmm7,xmm10,64397 vpslld xmm2,xmm10,264398 vmovdqu XMMWORD[(32-128)+rax],xmm54399 vpaddd xmm5,xmm5,xmm134400 4401 vpsrld xmm1,xmm10,114402 vpxor xmm7,xmm7,xmm24403 vpslld xmm2,xmm10,214404 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]4405 vpxor xmm7,xmm7,xmm14406 4407 vpsrld xmm1,xmm10,254408 vpxor xmm7,xmm7,xmm24409 4410 vpslld xmm2,xmm10,74411 vpandn xmm0,xmm10,xmm124412 vpand xmm3,xmm10,xmm114413 4414 vpxor xmm7,xmm7,xmm14415 4416 vpsrld xmm13,xmm14,24417 vpxor xmm7,xmm7,xmm24418 4419 vpslld xmm1,xmm14,304420 vpxor xmm0,xmm0,xmm34421 vpxor xmm3,xmm15,xmm144422 4423 vpxor xmm13,xmm13,xmm14424 vpaddd xmm5,xmm5,xmm74425 4426 vpsrld xmm1,xmm14,134427 4428 vpslld xmm2,xmm14,194429 vpaddd xmm5,xmm5,xmm04430 vpand xmm4,xmm4,xmm34431 4432 vpxor xmm7,xmm13,xmm14433 4434 vpsrld xmm1,xmm14,224435 vpxor xmm7,xmm7,xmm24436 4437 vpslld xmm2,xmm14,104438 vpxor xmm13,xmm15,xmm44439 vpaddd xmm9,xmm9,xmm54440 4441 vpxor xmm7,xmm7,xmm14442 vpxor xmm7,xmm7,xmm24443 4444 vpaddd xmm13,xmm13,xmm54445 vpaddd xmm13,xmm13,xmm74446 vmovdqu xmm5,XMMWORD[((64-128))+rax]4447 vpaddd xmm6,xmm6,XMMWORD[((192-128))+rax]4448 4449 vpsrld xmm7,xmm5,34450 vpsrld xmm1,xmm5,74451 vpslld xmm2,xmm5,254452 vpxor xmm7,xmm7,xmm14453 vpsrld xmm1,xmm5,184454 vpxor xmm7,xmm7,xmm24455 vpslld xmm2,xmm5,144456 vmovdqu xmm0,XMMWORD[((16-128))+rax]4457 vpsrld xmm4,xmm0,104458 4459 vpxor xmm7,xmm7,xmm14460 vpsrld xmm1,xmm0,174461 vpxor xmm7,xmm7,xmm24462 vpslld xmm2,xmm0,154463 vpaddd xmm6,xmm6,xmm74464 vpxor xmm7,xmm4,xmm14465 vpsrld xmm1,xmm0,194466 vpxor xmm7,xmm7,xmm24467 vpslld xmm2,xmm0,134468 vpxor xmm7,xmm7,xmm14469 vpxor xmm7,xmm7,xmm24470 vpaddd xmm6,xmm6,xmm74471 vpsrld xmm7,xmm9,64472 vpslld xmm2,xmm9,264473 vmovdqu XMMWORD[(48-128)+rax],xmm64474 vpaddd xmm6,xmm6,xmm124475 4476 vpsrld xmm1,xmm9,114477 vpxor xmm7,xmm7,xmm24478 vpslld xmm2,xmm9,214479 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp]4480 vpxor xmm7,xmm7,xmm14481 4482 vpsrld xmm1,xmm9,254483 vpxor xmm7,xmm7,xmm24484 4485 vpslld xmm2,xmm9,74486 vpandn xmm0,xmm9,xmm114487 vpand xmm4,xmm9,xmm104488 4489 vpxor xmm7,xmm7,xmm14490 4491 vpsrld xmm12,xmm13,24492 vpxor xmm7,xmm7,xmm24493 4494 vpslld xmm1,xmm13,304495 vpxor xmm0,xmm0,xmm44496 vpxor xmm4,xmm14,xmm134497 4498 vpxor xmm12,xmm12,xmm14499 vpaddd xmm6,xmm6,xmm74500 4501 vpsrld xmm1,xmm13,134502 4503 vpslld xmm2,xmm13,194504 vpaddd xmm6,xmm6,xmm04505 vpand xmm3,xmm3,xmm44506 4507 vpxor xmm7,xmm12,xmm14508 4509 vpsrld xmm1,xmm13,224510 vpxor xmm7,xmm7,xmm24511 4512 vpslld xmm2,xmm13,104513 vpxor xmm12,xmm14,xmm34514 vpaddd xmm8,xmm8,xmm64515 4516 vpxor xmm7,xmm7,xmm14517 vpxor xmm7,xmm7,xmm24518 4519 vpaddd xmm12,xmm12,xmm64520 vpaddd xmm12,xmm12,xmm74521 vmovdqu xmm6,XMMWORD[((80-128))+rax]4522 vpaddd xmm5,xmm5,XMMWORD[((208-128))+rax]4523 4524 vpsrld xmm7,xmm6,34525 vpsrld xmm1,xmm6,74526 vpslld xmm2,xmm6,254527 vpxor xmm7,xmm7,xmm14528 vpsrld xmm1,xmm6,184529 vpxor xmm7,xmm7,xmm24530 vpslld xmm2,xmm6,144531 vmovdqu xmm0,XMMWORD[((32-128))+rax]4532 vpsrld xmm3,xmm0,104533 4534 vpxor xmm7,xmm7,xmm14535 vpsrld xmm1,xmm0,174536 vpxor xmm7,xmm7,xmm24537 vpslld xmm2,xmm0,154538 vpaddd xmm5,xmm5,xmm74539 vpxor xmm7,xmm3,xmm14540 vpsrld xmm1,xmm0,194541 vpxor xmm7,xmm7,xmm24542 vpslld xmm2,xmm0,134543 vpxor xmm7,xmm7,xmm14544 vpxor xmm7,xmm7,xmm24545 vpaddd xmm5,xmm5,xmm74546 vpsrld xmm7,xmm8,64547 vpslld xmm2,xmm8,264548 vmovdqu XMMWORD[(64-128)+rax],xmm54549 vpaddd xmm5,xmm5,xmm114550 4551 vpsrld xmm1,xmm8,114552 vpxor xmm7,xmm7,xmm24553 vpslld xmm2,xmm8,214554 vpaddd xmm5,xmm5,XMMWORD[rbp]4555 vpxor xmm7,xmm7,xmm14556 4557 vpsrld xmm1,xmm8,254558 vpxor xmm7,xmm7,xmm24559 4560 vpslld xmm2,xmm8,74561 vpandn xmm0,xmm8,xmm104562 vpand xmm3,xmm8,xmm94563 4564 vpxor xmm7,xmm7,xmm14565 4566 vpsrld xmm11,xmm12,24567 vpxor xmm7,xmm7,xmm24568 4569 vpslld xmm1,xmm12,304570 vpxor xmm0,xmm0,xmm34571 vpxor xmm3,xmm13,xmm124572 4573 vpxor xmm11,xmm11,xmm14574 vpaddd xmm5,xmm5,xmm74575 4576 vpsrld xmm1,xmm12,134577 4578 vpslld xmm2,xmm12,194579 vpaddd xmm5,xmm5,xmm04580 vpand xmm4,xmm4,xmm34581 4582 vpxor xmm7,xmm11,xmm14583 4584 vpsrld xmm1,xmm12,224585 vpxor xmm7,xmm7,xmm24586 4587 vpslld xmm2,xmm12,104588 vpxor xmm11,xmm13,xmm44589 vpaddd xmm15,xmm15,xmm54590 4591 vpxor xmm7,xmm7,xmm14592 vpxor xmm7,xmm7,xmm24593 4594 vpaddd xmm11,xmm11,xmm54595 vpaddd xmm11,xmm11,xmm74596 vmovdqu xmm5,XMMWORD[((96-128))+rax]4597 vpaddd xmm6,xmm6,XMMWORD[((224-128))+rax]4598 4599 vpsrld xmm7,xmm5,34600 vpsrld xmm1,xmm5,74601 vpslld xmm2,xmm5,254602 vpxor xmm7,xmm7,xmm14603 vpsrld xmm1,xmm5,184604 vpxor xmm7,xmm7,xmm24605 vpslld xmm2,xmm5,144606 vmovdqu xmm0,XMMWORD[((48-128))+rax]4607 vpsrld xmm4,xmm0,104608 4609 vpxor xmm7,xmm7,xmm14610 vpsrld xmm1,xmm0,174611 vpxor xmm7,xmm7,xmm24612 vpslld xmm2,xmm0,154613 vpaddd xmm6,xmm6,xmm74614 vpxor xmm7,xmm4,xmm14615 vpsrld xmm1,xmm0,194616 vpxor xmm7,xmm7,xmm24617 vpslld xmm2,xmm0,134618 vpxor xmm7,xmm7,xmm14619 vpxor xmm7,xmm7,xmm24620 vpaddd xmm6,xmm6,xmm74621 vpsrld xmm7,xmm15,64622 vpslld xmm2,xmm15,264623 vmovdqu XMMWORD[(80-128)+rax],xmm64624 vpaddd xmm6,xmm6,xmm104625 4626 vpsrld xmm1,xmm15,114627 vpxor xmm7,xmm7,xmm24628 vpslld xmm2,xmm15,214629 vpaddd xmm6,xmm6,XMMWORD[32+rbp]4630 vpxor xmm7,xmm7,xmm14631 4632 vpsrld xmm1,xmm15,254633 vpxor xmm7,xmm7,xmm24634 4635 vpslld xmm2,xmm15,74636 vpandn xmm0,xmm15,xmm94637 vpand xmm4,xmm15,xmm84638 4639 vpxor xmm7,xmm7,xmm14640 4641 vpsrld xmm10,xmm11,24642 vpxor xmm7,xmm7,xmm24643 4644 vpslld xmm1,xmm11,304645 vpxor xmm0,xmm0,xmm44646 vpxor xmm4,xmm12,xmm114647 4648 vpxor xmm10,xmm10,xmm14649 vpaddd xmm6,xmm6,xmm74650 4651 vpsrld xmm1,xmm11,134652 4653 vpslld xmm2,xmm11,194654 vpaddd xmm6,xmm6,xmm04655 vpand xmm3,xmm3,xmm44656 4657 vpxor xmm7,xmm10,xmm14658 4659 vpsrld xmm1,xmm11,224660 vpxor xmm7,xmm7,xmm24661 4662 vpslld xmm2,xmm11,104663 vpxor xmm10,xmm12,xmm34664 vpaddd xmm14,xmm14,xmm64665 4666 vpxor xmm7,xmm7,xmm14667 vpxor xmm7,xmm7,xmm24668 4669 vpaddd xmm10,xmm10,xmm64670 vpaddd xmm10,xmm10,xmm74671 vmovdqu xmm6,XMMWORD[((112-128))+rax]4672 vpaddd xmm5,xmm5,XMMWORD[((240-128))+rax]4673 4674 vpsrld xmm7,xmm6,34675 vpsrld xmm1,xmm6,74676 vpslld xmm2,xmm6,254677 vpxor xmm7,xmm7,xmm14678 vpsrld xmm1,xmm6,184679 vpxor xmm7,xmm7,xmm24680 vpslld xmm2,xmm6,144681 vmovdqu xmm0,XMMWORD[((64-128))+rax]4682 vpsrld xmm3,xmm0,104683 4684 vpxor xmm7,xmm7,xmm14685 vpsrld xmm1,xmm0,174686 vpxor xmm7,xmm7,xmm24687 vpslld xmm2,xmm0,154688 vpaddd xmm5,xmm5,xmm74689 vpxor xmm7,xmm3,xmm14690 vpsrld xmm1,xmm0,194691 vpxor xmm7,xmm7,xmm24692 vpslld xmm2,xmm0,134693 vpxor xmm7,xmm7,xmm14694 vpxor xmm7,xmm7,xmm24695 vpaddd xmm5,xmm5,xmm74696 vpsrld xmm7,xmm14,64697 vpslld xmm2,xmm14,264698 vmovdqu XMMWORD[(96-128)+rax],xmm54699 vpaddd xmm5,xmm5,xmm94700 4701 vpsrld xmm1,xmm14,114702 vpxor xmm7,xmm7,xmm24703 vpslld xmm2,xmm14,214704 vpaddd xmm5,xmm5,XMMWORD[64+rbp]4705 vpxor xmm7,xmm7,xmm14706 4707 vpsrld xmm1,xmm14,254708 vpxor xmm7,xmm7,xmm24709 4710 vpslld xmm2,xmm14,74711 vpandn xmm0,xmm14,xmm84712 vpand xmm3,xmm14,xmm154713 4714 vpxor xmm7,xmm7,xmm14715 4716 vpsrld xmm9,xmm10,24717 vpxor xmm7,xmm7,xmm24718 4719 vpslld xmm1,xmm10,304720 vpxor xmm0,xmm0,xmm34721 vpxor xmm3,xmm11,xmm104722 4723 vpxor xmm9,xmm9,xmm14724 vpaddd xmm5,xmm5,xmm74725 4726 vpsrld xmm1,xmm10,134727 4728 vpslld xmm2,xmm10,194729 vpaddd xmm5,xmm5,xmm04730 vpand xmm4,xmm4,xmm34731 4732 vpxor xmm7,xmm9,xmm14733 4734 vpsrld xmm1,xmm10,224735 vpxor xmm7,xmm7,xmm24736 4737 vpslld xmm2,xmm10,104738 vpxor xmm9,xmm11,xmm44739 vpaddd xmm13,xmm13,xmm54740 4741 vpxor xmm7,xmm7,xmm14742 vpxor xmm7,xmm7,xmm24743 4744 vpaddd xmm9,xmm9,xmm54745 vpaddd xmm9,xmm9,xmm74746 vmovdqu xmm5,XMMWORD[((128-128))+rax]4747 vpaddd xmm6,xmm6,XMMWORD[((0-128))+rax]4748 4749 vpsrld xmm7,xmm5,34750 vpsrld xmm1,xmm5,74751 vpslld xmm2,xmm5,254752 vpxor xmm7,xmm7,xmm14753 vpsrld xmm1,xmm5,184754 vpxor xmm7,xmm7,xmm24755 vpslld xmm2,xmm5,144756 vmovdqu xmm0,XMMWORD[((80-128))+rax]4757 vpsrld xmm4,xmm0,104758 4759 vpxor xmm7,xmm7,xmm14760 vpsrld xmm1,xmm0,174761 vpxor xmm7,xmm7,xmm24762 vpslld xmm2,xmm0,154763 vpaddd xmm6,xmm6,xmm74764 vpxor xmm7,xmm4,xmm14765 vpsrld xmm1,xmm0,194766 vpxor xmm7,xmm7,xmm24767 vpslld xmm2,xmm0,134768 vpxor xmm7,xmm7,xmm14769 vpxor xmm7,xmm7,xmm24770 vpaddd xmm6,xmm6,xmm74771 vpsrld xmm7,xmm13,64772 vpslld xmm2,xmm13,264773 vmovdqu XMMWORD[(112-128)+rax],xmm64774 vpaddd xmm6,xmm6,xmm84775 4776 vpsrld xmm1,xmm13,114777 vpxor xmm7,xmm7,xmm24778 vpslld xmm2,xmm13,214779 vpaddd xmm6,xmm6,XMMWORD[96+rbp]4780 vpxor xmm7,xmm7,xmm14781 4782 vpsrld xmm1,xmm13,254783 vpxor xmm7,xmm7,xmm24784 4785 vpslld xmm2,xmm13,74786 vpandn xmm0,xmm13,xmm154787 vpand xmm4,xmm13,xmm144788 4789 vpxor xmm7,xmm7,xmm14790 4791 vpsrld xmm8,xmm9,24792 vpxor xmm7,xmm7,xmm24793 4794 vpslld xmm1,xmm9,304795 vpxor xmm0,xmm0,xmm44796 vpxor xmm4,xmm10,xmm94797 4798 vpxor xmm8,xmm8,xmm14799 vpaddd xmm6,xmm6,xmm74800 4801 vpsrld xmm1,xmm9,134802 4803 vpslld xmm2,xmm9,194804 vpaddd xmm6,xmm6,xmm04805 vpand xmm3,xmm3,xmm44806 4807 vpxor xmm7,xmm8,xmm14808 4809 vpsrld xmm1,xmm9,224810 vpxor xmm7,xmm7,xmm24811 4812 vpslld xmm2,xmm9,104813 vpxor xmm8,xmm10,xmm34814 vpaddd xmm12,xmm12,xmm64815 4816 vpxor xmm7,xmm7,xmm14817 vpxor xmm7,xmm7,xmm24818 4819 vpaddd xmm8,xmm8,xmm64820 vpaddd xmm8,xmm8,xmm74821 add rbp,2564822 vmovdqu xmm6,XMMWORD[((144-128))+rax]4823 vpaddd xmm5,xmm5,XMMWORD[((16-128))+rax]4824 4825 vpsrld xmm7,xmm6,34826 vpsrld xmm1,xmm6,74827 vpslld xmm2,xmm6,254828 vpxor xmm7,xmm7,xmm14829 vpsrld xmm1,xmm6,184830 vpxor xmm7,xmm7,xmm24831 vpslld xmm2,xmm6,144832 vmovdqu xmm0,XMMWORD[((96-128))+rax]4833 vpsrld xmm3,xmm0,104834 4835 vpxor xmm7,xmm7,xmm14836 vpsrld xmm1,xmm0,174837 vpxor xmm7,xmm7,xmm24838 vpslld xmm2,xmm0,154839 vpaddd xmm5,xmm5,xmm74840 vpxor xmm7,xmm3,xmm14841 vpsrld xmm1,xmm0,194842 vpxor xmm7,xmm7,xmm24843 vpslld xmm2,xmm0,134844 vpxor xmm7,xmm7,xmm14845 vpxor xmm7,xmm7,xmm24846 vpaddd xmm5,xmm5,xmm74847 vpsrld xmm7,xmm12,64848 vpslld xmm2,xmm12,264849 vmovdqu XMMWORD[(128-128)+rax],xmm54850 vpaddd xmm5,xmm5,xmm154851 4852 vpsrld xmm1,xmm12,114853 vpxor xmm7,xmm7,xmm24854 vpslld xmm2,xmm12,214855 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp]4856 vpxor xmm7,xmm7,xmm14857 4858 vpsrld xmm1,xmm12,254859 vpxor xmm7,xmm7,xmm24860 4861 vpslld xmm2,xmm12,74862 vpandn xmm0,xmm12,xmm144863 vpand xmm3,xmm12,xmm134864 4865 vpxor xmm7,xmm7,xmm14866 4867 vpsrld xmm15,xmm8,24868 vpxor xmm7,xmm7,xmm24869 4870 vpslld xmm1,xmm8,304871 vpxor xmm0,xmm0,xmm34872 vpxor xmm3,xmm9,xmm84873 4874 vpxor xmm15,xmm15,xmm14875 vpaddd xmm5,xmm5,xmm74876 4877 vpsrld xmm1,xmm8,134878 4879 vpslld xmm2,xmm8,194880 vpaddd xmm5,xmm5,xmm04881 vpand xmm4,xmm4,xmm34882 4883 vpxor xmm7,xmm15,xmm14884 4885 vpsrld xmm1,xmm8,224886 vpxor xmm7,xmm7,xmm24887 4888 vpslld xmm2,xmm8,104889 vpxor xmm15,xmm9,xmm44890 vpaddd xmm11,xmm11,xmm54891 4892 vpxor xmm7,xmm7,xmm14893 vpxor xmm7,xmm7,xmm24894 4895 vpaddd xmm15,xmm15,xmm54896 vpaddd xmm15,xmm15,xmm74897 vmovdqu xmm5,XMMWORD[((160-128))+rax]4898 vpaddd xmm6,xmm6,XMMWORD[((32-128))+rax]4899 4900 vpsrld xmm7,xmm5,34901 vpsrld xmm1,xmm5,74902 vpslld xmm2,xmm5,254903 vpxor xmm7,xmm7,xmm14904 vpsrld xmm1,xmm5,184905 vpxor xmm7,xmm7,xmm24906 vpslld xmm2,xmm5,144907 vmovdqu xmm0,XMMWORD[((112-128))+rax]4908 vpsrld xmm4,xmm0,104909 4910 vpxor xmm7,xmm7,xmm14911 vpsrld xmm1,xmm0,174912 vpxor xmm7,xmm7,xmm24913 vpslld xmm2,xmm0,154914 vpaddd xmm6,xmm6,xmm74915 vpxor xmm7,xmm4,xmm14916 vpsrld xmm1,xmm0,194917 vpxor xmm7,xmm7,xmm24918 vpslld xmm2,xmm0,134919 vpxor xmm7,xmm7,xmm14920 vpxor xmm7,xmm7,xmm24921 vpaddd xmm6,xmm6,xmm74922 vpsrld xmm7,xmm11,64923 vpslld xmm2,xmm11,264924 vmovdqu XMMWORD[(144-128)+rax],xmm64925 vpaddd xmm6,xmm6,xmm144926 4927 vpsrld xmm1,xmm11,114928 vpxor xmm7,xmm7,xmm24929 vpslld xmm2,xmm11,214930 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp]4931 vpxor xmm7,xmm7,xmm14932 4933 vpsrld xmm1,xmm11,254934 vpxor xmm7,xmm7,xmm24935 4936 vpslld xmm2,xmm11,74937 vpandn xmm0,xmm11,xmm134938 vpand xmm4,xmm11,xmm124939 4940 vpxor xmm7,xmm7,xmm14941 4942 vpsrld xmm14,xmm15,24943 vpxor xmm7,xmm7,xmm24944 4945 vpslld xmm1,xmm15,304946 vpxor xmm0,xmm0,xmm44947 vpxor xmm4,xmm8,xmm154948 4949 vpxor xmm14,xmm14,xmm14950 vpaddd xmm6,xmm6,xmm74951 4952 vpsrld xmm1,xmm15,134953 4954 vpslld xmm2,xmm15,194955 vpaddd xmm6,xmm6,xmm04956 vpand xmm3,xmm3,xmm44957 4958 vpxor xmm7,xmm14,xmm14959 4960 vpsrld xmm1,xmm15,224961 vpxor xmm7,xmm7,xmm24962 4963 vpslld xmm2,xmm15,104964 vpxor xmm14,xmm8,xmm34965 vpaddd xmm10,xmm10,xmm64966 4967 vpxor xmm7,xmm7,xmm14968 vpxor xmm7,xmm7,xmm24969 4970 vpaddd xmm14,xmm14,xmm64971 vpaddd xmm14,xmm14,xmm74972 vmovdqu xmm6,XMMWORD[((176-128))+rax]4973 vpaddd xmm5,xmm5,XMMWORD[((48-128))+rax]4974 4975 vpsrld xmm7,xmm6,34976 vpsrld xmm1,xmm6,74977 vpslld xmm2,xmm6,254978 vpxor xmm7,xmm7,xmm14979 vpsrld xmm1,xmm6,184980 vpxor xmm7,xmm7,xmm24981 vpslld xmm2,xmm6,144982 vmovdqu xmm0,XMMWORD[((128-128))+rax]4983 vpsrld xmm3,xmm0,104984 4985 vpxor xmm7,xmm7,xmm14986 vpsrld xmm1,xmm0,174987 vpxor xmm7,xmm7,xmm24988 vpslld xmm2,xmm0,154989 vpaddd xmm5,xmm5,xmm74990 vpxor xmm7,xmm3,xmm14991 vpsrld xmm1,xmm0,194992 vpxor xmm7,xmm7,xmm24993 vpslld xmm2,xmm0,134994 vpxor xmm7,xmm7,xmm14995 vpxor xmm7,xmm7,xmm24996 vpaddd xmm5,xmm5,xmm74997 vpsrld xmm7,xmm10,64998 vpslld xmm2,xmm10,264999 vmovdqu XMMWORD[(160-128)+rax],xmm55000 vpaddd xmm5,xmm5,xmm135001 5002 vpsrld xmm1,xmm10,115003 vpxor xmm7,xmm7,xmm25004 vpslld xmm2,xmm10,215005 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp]5006 vpxor xmm7,xmm7,xmm15007 5008 vpsrld xmm1,xmm10,255009 vpxor xmm7,xmm7,xmm25010 5011 vpslld xmm2,xmm10,75012 vpandn xmm0,xmm10,xmm125013 vpand xmm3,xmm10,xmm115014 5015 vpxor xmm7,xmm7,xmm15016 5017 vpsrld xmm13,xmm14,25018 vpxor xmm7,xmm7,xmm25019 5020 vpslld xmm1,xmm14,305021 vpxor xmm0,xmm0,xmm35022 vpxor xmm3,xmm15,xmm145023 5024 vpxor xmm13,xmm13,xmm15025 vpaddd xmm5,xmm5,xmm75026 5027 vpsrld xmm1,xmm14,135028 5029 vpslld xmm2,xmm14,195030 vpaddd xmm5,xmm5,xmm05031 vpand xmm4,xmm4,xmm35032 5033 vpxor xmm7,xmm13,xmm15034 5035 vpsrld xmm1,xmm14,225036 vpxor xmm7,xmm7,xmm25037 5038 vpslld xmm2,xmm14,105039 vpxor xmm13,xmm15,xmm45040 vpaddd xmm9,xmm9,xmm55041 5042 vpxor xmm7,xmm7,xmm15043 vpxor xmm7,xmm7,xmm25044 5045 vpaddd xmm13,xmm13,xmm55046 vpaddd xmm13,xmm13,xmm75047 vmovdqu xmm5,XMMWORD[((192-128))+rax]5048 vpaddd xmm6,xmm6,XMMWORD[((64-128))+rax]5049 5050 vpsrld xmm7,xmm5,35051 vpsrld xmm1,xmm5,75052 vpslld xmm2,xmm5,255053 vpxor xmm7,xmm7,xmm15054 vpsrld xmm1,xmm5,185055 vpxor xmm7,xmm7,xmm25056 vpslld xmm2,xmm5,145057 vmovdqu xmm0,XMMWORD[((144-128))+rax]5058 vpsrld xmm4,xmm0,105059 5060 vpxor xmm7,xmm7,xmm15061 vpsrld xmm1,xmm0,175062 vpxor xmm7,xmm7,xmm25063 vpslld xmm2,xmm0,155064 vpaddd xmm6,xmm6,xmm75065 vpxor xmm7,xmm4,xmm15066 vpsrld xmm1,xmm0,195067 vpxor xmm7,xmm7,xmm25068 vpslld xmm2,xmm0,135069 vpxor xmm7,xmm7,xmm15070 vpxor xmm7,xmm7,xmm25071 vpaddd xmm6,xmm6,xmm75072 vpsrld xmm7,xmm9,65073 vpslld xmm2,xmm9,265074 vmovdqu XMMWORD[(176-128)+rax],xmm65075 vpaddd xmm6,xmm6,xmm125076 5077 vpsrld xmm1,xmm9,115078 vpxor xmm7,xmm7,xmm25079 vpslld xmm2,xmm9,215080 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp]5081 vpxor xmm7,xmm7,xmm15082 5083 vpsrld xmm1,xmm9,255084 vpxor xmm7,xmm7,xmm25085 5086 vpslld xmm2,xmm9,75087 vpandn xmm0,xmm9,xmm115088 vpand xmm4,xmm9,xmm105089 5090 vpxor xmm7,xmm7,xmm15091 5092 vpsrld xmm12,xmm13,25093 vpxor xmm7,xmm7,xmm25094 5095 vpslld xmm1,xmm13,305096 vpxor xmm0,xmm0,xmm45097 vpxor xmm4,xmm14,xmm135098 5099 vpxor xmm12,xmm12,xmm15100 vpaddd xmm6,xmm6,xmm75101 5102 vpsrld xmm1,xmm13,135103 5104 vpslld xmm2,xmm13,195105 vpaddd xmm6,xmm6,xmm05106 vpand xmm3,xmm3,xmm45107 5108 vpxor xmm7,xmm12,xmm15109 5110 vpsrld xmm1,xmm13,225111 vpxor xmm7,xmm7,xmm25112 5113 vpslld xmm2,xmm13,105114 vpxor xmm12,xmm14,xmm35115 vpaddd xmm8,xmm8,xmm65116 5117 vpxor xmm7,xmm7,xmm15118 vpxor xmm7,xmm7,xmm25119 5120 vpaddd xmm12,xmm12,xmm65121 vpaddd xmm12,xmm12,xmm75122 vmovdqu xmm6,XMMWORD[((208-128))+rax]5123 vpaddd xmm5,xmm5,XMMWORD[((80-128))+rax]5124 5125 vpsrld xmm7,xmm6,35126 vpsrld xmm1,xmm6,75127 vpslld xmm2,xmm6,255128 vpxor xmm7,xmm7,xmm15129 vpsrld xmm1,xmm6,185130 vpxor xmm7,xmm7,xmm25131 vpslld xmm2,xmm6,145132 vmovdqu xmm0,XMMWORD[((160-128))+rax]5133 vpsrld xmm3,xmm0,105134 5135 vpxor xmm7,xmm7,xmm15136 vpsrld xmm1,xmm0,175137 vpxor xmm7,xmm7,xmm25138 vpslld xmm2,xmm0,155139 vpaddd xmm5,xmm5,xmm75140 vpxor xmm7,xmm3,xmm15141 vpsrld xmm1,xmm0,195142 vpxor xmm7,xmm7,xmm25143 vpslld xmm2,xmm0,135144 vpxor xmm7,xmm7,xmm15145 vpxor xmm7,xmm7,xmm25146 vpaddd xmm5,xmm5,xmm75147 vpsrld xmm7,xmm8,65148 vpslld xmm2,xmm8,265149 vmovdqu XMMWORD[(192-128)+rax],xmm55150 vpaddd xmm5,xmm5,xmm115151 5152 vpsrld xmm1,xmm8,115153 vpxor xmm7,xmm7,xmm25154 vpslld xmm2,xmm8,215155 vpaddd xmm5,xmm5,XMMWORD[rbp]5156 vpxor xmm7,xmm7,xmm15157 5158 vpsrld xmm1,xmm8,255159 vpxor xmm7,xmm7,xmm25160 5161 vpslld xmm2,xmm8,75162 vpandn xmm0,xmm8,xmm105163 vpand xmm3,xmm8,xmm95164 5165 vpxor xmm7,xmm7,xmm15166 5167 vpsrld xmm11,xmm12,25168 vpxor xmm7,xmm7,xmm25169 5170 vpslld xmm1,xmm12,305171 vpxor xmm0,xmm0,xmm35172 vpxor xmm3,xmm13,xmm125173 5174 vpxor xmm11,xmm11,xmm15175 vpaddd xmm5,xmm5,xmm75176 5177 vpsrld xmm1,xmm12,135178 5179 vpslld xmm2,xmm12,195180 vpaddd xmm5,xmm5,xmm05181 vpand xmm4,xmm4,xmm35182 5183 vpxor xmm7,xmm11,xmm15184 5185 vpsrld xmm1,xmm12,225186 vpxor xmm7,xmm7,xmm25187 5188 vpslld xmm2,xmm12,105189 vpxor xmm11,xmm13,xmm45190 vpaddd xmm15,xmm15,xmm55191 5192 vpxor xmm7,xmm7,xmm15193 vpxor xmm7,xmm7,xmm25194 5195 vpaddd xmm11,xmm11,xmm55196 vpaddd xmm11,xmm11,xmm75197 vmovdqu xmm5,XMMWORD[((224-128))+rax]5198 vpaddd xmm6,xmm6,XMMWORD[((96-128))+rax]5199 5200 vpsrld xmm7,xmm5,35201 vpsrld xmm1,xmm5,75202 vpslld xmm2,xmm5,255203 vpxor xmm7,xmm7,xmm15204 vpsrld xmm1,xmm5,185205 vpxor xmm7,xmm7,xmm25206 vpslld xmm2,xmm5,145207 vmovdqu xmm0,XMMWORD[((176-128))+rax]5208 vpsrld xmm4,xmm0,105209 5210 vpxor xmm7,xmm7,xmm15211 vpsrld xmm1,xmm0,175212 vpxor xmm7,xmm7,xmm25213 vpslld xmm2,xmm0,155214 vpaddd xmm6,xmm6,xmm75215 vpxor xmm7,xmm4,xmm15216 vpsrld xmm1,xmm0,195217 vpxor xmm7,xmm7,xmm25218 vpslld xmm2,xmm0,135219 vpxor xmm7,xmm7,xmm15220 vpxor xmm7,xmm7,xmm25221 vpaddd xmm6,xmm6,xmm75222 vpsrld xmm7,xmm15,65223 vpslld xmm2,xmm15,265224 vmovdqu XMMWORD[(208-128)+rax],xmm65225 vpaddd xmm6,xmm6,xmm105226 5227 vpsrld xmm1,xmm15,115228 vpxor xmm7,xmm7,xmm25229 vpslld xmm2,xmm15,215230 vpaddd xmm6,xmm6,XMMWORD[32+rbp]5231 vpxor xmm7,xmm7,xmm15232 5233 vpsrld xmm1,xmm15,255234 vpxor xmm7,xmm7,xmm25235 5236 vpslld xmm2,xmm15,75237 vpandn xmm0,xmm15,xmm95238 vpand xmm4,xmm15,xmm85239 5240 vpxor xmm7,xmm7,xmm15241 5242 vpsrld xmm10,xmm11,25243 vpxor xmm7,xmm7,xmm25244 5245 vpslld xmm1,xmm11,305246 vpxor xmm0,xmm0,xmm45247 vpxor xmm4,xmm12,xmm115248 5249 vpxor xmm10,xmm10,xmm15250 vpaddd xmm6,xmm6,xmm75251 5252 vpsrld xmm1,xmm11,135253 5254 vpslld xmm2,xmm11,195255 vpaddd xmm6,xmm6,xmm05256 vpand xmm3,xmm3,xmm45257 5258 vpxor xmm7,xmm10,xmm15259 5260 vpsrld xmm1,xmm11,225261 vpxor xmm7,xmm7,xmm25262 5263 vpslld xmm2,xmm11,105264 vpxor xmm10,xmm12,xmm35265 vpaddd xmm14,xmm14,xmm65266 5267 vpxor xmm7,xmm7,xmm15268 vpxor xmm7,xmm7,xmm25269 5270 vpaddd xmm10,xmm10,xmm65271 vpaddd xmm10,xmm10,xmm75272 vmovdqu xmm6,XMMWORD[((240-128))+rax]5273 vpaddd xmm5,xmm5,XMMWORD[((112-128))+rax]5274 5275 vpsrld xmm7,xmm6,35276 vpsrld xmm1,xmm6,75277 vpslld xmm2,xmm6,255278 vpxor xmm7,xmm7,xmm15279 vpsrld xmm1,xmm6,185280 vpxor xmm7,xmm7,xmm25281 vpslld xmm2,xmm6,145282 vmovdqu xmm0,XMMWORD[((192-128))+rax]5283 vpsrld xmm3,xmm0,105284 5285 vpxor xmm7,xmm7,xmm15286 vpsrld xmm1,xmm0,175287 vpxor xmm7,xmm7,xmm25288 vpslld xmm2,xmm0,155289 vpaddd xmm5,xmm5,xmm75290 vpxor xmm7,xmm3,xmm15291 vpsrld xmm1,xmm0,195292 vpxor xmm7,xmm7,xmm25293 vpslld xmm2,xmm0,135294 vpxor xmm7,xmm7,xmm15295 vpxor xmm7,xmm7,xmm25296 vpaddd xmm5,xmm5,xmm75297 vpsrld xmm7,xmm14,65298 vpslld xmm2,xmm14,265299 vmovdqu XMMWORD[(224-128)+rax],xmm55300 vpaddd xmm5,xmm5,xmm95301 5302 vpsrld xmm1,xmm14,115303 vpxor xmm7,xmm7,xmm25304 vpslld xmm2,xmm14,215305 vpaddd xmm5,xmm5,XMMWORD[64+rbp]5306 vpxor xmm7,xmm7,xmm15307 5308 vpsrld xmm1,xmm14,255309 vpxor xmm7,xmm7,xmm25310 5311 vpslld xmm2,xmm14,75312 vpandn xmm0,xmm14,xmm85313 vpand xmm3,xmm14,xmm155314 5315 vpxor xmm7,xmm7,xmm15316 5317 vpsrld xmm9,xmm10,25318 vpxor xmm7,xmm7,xmm25319 5320 vpslld xmm1,xmm10,305321 vpxor xmm0,xmm0,xmm35322 vpxor xmm3,xmm11,xmm105323 5324 vpxor xmm9,xmm9,xmm15325 vpaddd xmm5,xmm5,xmm75326 5327 vpsrld xmm1,xmm10,135328 5329 vpslld xmm2,xmm10,195330 vpaddd xmm5,xmm5,xmm05331 vpand xmm4,xmm4,xmm35332 5333 vpxor xmm7,xmm9,xmm15334 5335 vpsrld xmm1,xmm10,225336 vpxor xmm7,xmm7,xmm25337 5338 vpslld xmm2,xmm10,105339 vpxor xmm9,xmm11,xmm45340 vpaddd xmm13,xmm13,xmm55341 5342 vpxor xmm7,xmm7,xmm15343 vpxor xmm7,xmm7,xmm25344 5345 vpaddd xmm9,xmm9,xmm55346 vpaddd xmm9,xmm9,xmm75347 vmovdqu xmm5,XMMWORD[((0-128))+rax]5348 vpaddd xmm6,xmm6,XMMWORD[((128-128))+rax]5349 5350 vpsrld xmm7,xmm5,35351 vpsrld xmm1,xmm5,75352 vpslld xmm2,xmm5,255353 vpxor xmm7,xmm7,xmm15354 vpsrld xmm1,xmm5,185355 vpxor xmm7,xmm7,xmm25356 vpslld xmm2,xmm5,145357 vmovdqu xmm0,XMMWORD[((208-128))+rax]5358 vpsrld xmm4,xmm0,105359 5360 vpxor xmm7,xmm7,xmm15361 vpsrld xmm1,xmm0,175362 vpxor xmm7,xmm7,xmm25363 vpslld xmm2,xmm0,155364 vpaddd xmm6,xmm6,xmm75365 vpxor xmm7,xmm4,xmm15366 vpsrld xmm1,xmm0,195367 vpxor xmm7,xmm7,xmm25368 vpslld xmm2,xmm0,135369 vpxor xmm7,xmm7,xmm15370 vpxor xmm7,xmm7,xmm25371 vpaddd xmm6,xmm6,xmm75372 vpsrld xmm7,xmm13,65373 vpslld xmm2,xmm13,265374 vmovdqu XMMWORD[(240-128)+rax],xmm65375 vpaddd xmm6,xmm6,xmm85376 5377 vpsrld xmm1,xmm13,115378 vpxor xmm7,xmm7,xmm25379 vpslld xmm2,xmm13,215380 vpaddd xmm6,xmm6,XMMWORD[96+rbp]5381 vpxor xmm7,xmm7,xmm15382 5383 vpsrld xmm1,xmm13,255384 vpxor xmm7,xmm7,xmm25385 5386 vpslld xmm2,xmm13,75387 vpandn xmm0,xmm13,xmm155388 vpand xmm4,xmm13,xmm145389 5390 vpxor xmm7,xmm7,xmm15391 5392 vpsrld xmm8,xmm9,25393 vpxor xmm7,xmm7,xmm25394 5395 vpslld xmm1,xmm9,305396 vpxor xmm0,xmm0,xmm45397 vpxor xmm4,xmm10,xmm95398 5399 vpxor xmm8,xmm8,xmm15400 vpaddd xmm6,xmm6,xmm75401 5402 vpsrld xmm1,xmm9,135403 5404 vpslld xmm2,xmm9,195405 vpaddd xmm6,xmm6,xmm05406 vpand xmm3,xmm3,xmm45407 5408 vpxor xmm7,xmm8,xmm15409 5410 vpsrld xmm1,xmm9,225411 vpxor xmm7,xmm7,xmm25412 5413 vpslld xmm2,xmm9,105414 vpxor xmm8,xmm10,xmm35415 vpaddd xmm12,xmm12,xmm65416 5417 vpxor xmm7,xmm7,xmm15418 vpxor xmm7,xmm7,xmm25419 5420 vpaddd xmm8,xmm8,xmm65421 vpaddd xmm8,xmm8,xmm75422 add rbp,2565423 dec ecx5424 jnz NEAR $L$oop_16_xx_avx5425 5426 mov ecx,15427 lea rbp,[((K256+128))]5428 cmp ecx,DWORD[rbx]5429 cmovge r8,rbp5430 cmp ecx,DWORD[4+rbx]5431 cmovge r9,rbp5432 cmp ecx,DWORD[8+rbx]5433 cmovge r10,rbp5434 cmp ecx,DWORD[12+rbx]5435 cmovge r11,rbp5436 vmovdqa xmm7,XMMWORD[rbx]5437 vpxor xmm0,xmm0,xmm05438 vmovdqa xmm6,xmm75439 vpcmpgtd xmm6,xmm6,xmm05440 vpaddd xmm7,xmm7,xmm65441 5442 vmovdqu xmm0,XMMWORD[((0-128))+rdi]5443 vpand xmm8,xmm8,xmm65444 vmovdqu xmm1,XMMWORD[((32-128))+rdi]5445 vpand xmm9,xmm9,xmm65446 vmovdqu xmm2,XMMWORD[((64-128))+rdi]5447 vpand xmm10,xmm10,xmm65448 vmovdqu xmm5,XMMWORD[((96-128))+rdi]5449 vpand xmm11,xmm11,xmm65450 vpaddd xmm8,xmm8,xmm05451 vmovdqu xmm0,XMMWORD[((128-128))+rdi]5452 vpand xmm12,xmm12,xmm65453 vpaddd xmm9,xmm9,xmm15454 vmovdqu xmm1,XMMWORD[((160-128))+rdi]5455 vpand xmm13,xmm13,xmm65456 vpaddd xmm10,xmm10,xmm25457 vmovdqu xmm2,XMMWORD[((192-128))+rdi]5458 vpand xmm14,xmm14,xmm65459 vpaddd xmm11,xmm11,xmm55460 vmovdqu xmm5,XMMWORD[((224-128))+rdi]5461 vpand xmm15,xmm15,xmm65462 vpaddd xmm12,xmm12,xmm05463 vpaddd xmm13,xmm13,xmm15464 vmovdqu XMMWORD[(0-128)+rdi],xmm85465 vpaddd xmm14,xmm14,xmm25466 vmovdqu XMMWORD[(32-128)+rdi],xmm95467 vpaddd xmm15,xmm15,xmm55468 vmovdqu XMMWORD[(64-128)+rdi],xmm105469 vmovdqu XMMWORD[(96-128)+rdi],xmm115470 vmovdqu XMMWORD[(128-128)+rdi],xmm125471 vmovdqu XMMWORD[(160-128)+rdi],xmm135472 vmovdqu XMMWORD[(192-128)+rdi],xmm145473 vmovdqu XMMWORD[(224-128)+rdi],xmm155474 5475 vmovdqu XMMWORD[rbx],xmm75476 vmovdqu xmm6,XMMWORD[$L$pbswap]5477 dec edx5478 jnz NEAR $L$oop_avx5479 5480 mov edx,DWORD[280+rsp]5481 lea rdi,[16+rdi]5482 lea rsi,[64+rsi]5483 dec edx5484 jnz NEAR $L$oop_grande_avx5485 5486 $L$done_avx:5487 mov rax,QWORD[272+rsp]5488 5489 vzeroupper5490 movaps xmm6,XMMWORD[((-184))+rax]5491 movaps xmm7,XMMWORD[((-168))+rax]5492 movaps xmm8,XMMWORD[((-152))+rax]5493 movaps xmm9,XMMWORD[((-136))+rax]5494 movaps xmm10,XMMWORD[((-120))+rax]5495 movaps xmm11,XMMWORD[((-104))+rax]5496 movaps xmm12,XMMWORD[((-88))+rax]5497 movaps xmm13,XMMWORD[((-72))+rax]5498 movaps xmm14,XMMWORD[((-56))+rax]5499 movaps xmm15,XMMWORD[((-40))+rax]5500 mov rbp,QWORD[((-16))+rax]5501 5502 mov rbx,QWORD[((-8))+rax]5503 5504 lea rsp,[rax]5505 5506 $L$epilogue_avx:5507 mov rdi,QWORD[8+rsp] ;WIN64 epilogue5508 mov rsi,QWORD[16+rsp]5509 DB 0F3h,0C3h ;repret5510 5511 $L$SEH_end_sha256_multi_block_avx:5512 5513 ALIGN 325514 sha256_multi_block_avx2:5515 mov QWORD[8+rsp],rdi ;WIN64 prologue5516 mov QWORD[16+rsp],rsi5517 mov rax,rsp5518 $L$SEH_begin_sha256_multi_block_avx2:5519 mov rdi,rcx5520 mov rsi,rdx5521 mov rdx,r85522 5523 5524 5525 _avx2_shortcut:5526 mov rax,rsp5527 5528 push rbx5529 5530 push rbp5531 5532 push r125533 5534 push r135535 5536 push r145537 5538 push r155539 5540 lea rsp,[((-168))+rsp]5541 movaps XMMWORD[rsp],xmm65542 movaps XMMWORD[16+rsp],xmm75543 movaps XMMWORD[32+rsp],xmm85544 movaps XMMWORD[48+rsp],xmm95545 movaps XMMWORD[64+rsp],xmm105546 movaps XMMWORD[80+rsp],xmm115547 movaps XMMWORD[(-120)+rax],xmm125548 movaps XMMWORD[(-104)+rax],xmm135549 movaps XMMWORD[(-88)+rax],xmm145550 movaps XMMWORD[(-72)+rax],xmm155551 sub rsp,5765552 and rsp,-2565553 mov QWORD[544+rsp],rax5554 5555 $L$body_avx2:5556 lea rbp,[((K256+128))]5557 lea rdi,[128+rdi]5558 5559 $L$oop_grande_avx2:5560 mov DWORD[552+rsp],edx5561 xor edx,edx5562 lea rbx,[512+rsp]5563 5564 mov r12,QWORD[rsi]5565 5566 mov ecx,DWORD[8+rsi]5567 cmp ecx,edx5568 cmovg edx,ecx5569 test ecx,ecx5570 mov DWORD[rbx],ecx5571 cmovle r12,rbp5572 5573 mov r13,QWORD[16+rsi]5574 5575 mov ecx,DWORD[24+rsi]5576 cmp ecx,edx5577 cmovg edx,ecx5578 test ecx,ecx5579 mov DWORD[4+rbx],ecx5580 cmovle r13,rbp5581 5582 mov r14,QWORD[32+rsi]5583 5584 mov ecx,DWORD[40+rsi]5585 cmp ecx,edx5586 cmovg edx,ecx5587 test ecx,ecx5588 mov DWORD[8+rbx],ecx5589 cmovle r14,rbp5590 5591 mov r15,QWORD[48+rsi]5592 5593 mov ecx,DWORD[56+rsi]5594 cmp ecx,edx5595 cmovg edx,ecx5596 test ecx,ecx5597 mov DWORD[12+rbx],ecx5598 cmovle r15,rbp5599 5600 mov r8,QWORD[64+rsi]5601 5602 mov ecx,DWORD[72+rsi]5603 cmp ecx,edx5604 cmovg edx,ecx5605 test ecx,ecx5606 mov DWORD[16+rbx],ecx5607 cmovle r8,rbp5608 5609 mov r9,QWORD[80+rsi]5610 5611 mov ecx,DWORD[88+rsi]5612 cmp ecx,edx5613 cmovg edx,ecx5614 test ecx,ecx5615 mov DWORD[20+rbx],ecx5616 cmovle r9,rbp5617 5618 mov r10,QWORD[96+rsi]5619 5620 mov ecx,DWORD[104+rsi]5621 cmp ecx,edx5622 cmovg edx,ecx5623 test ecx,ecx5624 mov DWORD[24+rbx],ecx5625 cmovle r10,rbp5626 5627 mov r11,QWORD[112+rsi]5628 5629 mov ecx,DWORD[120+rsi]5630 cmp ecx,edx5631 cmovg edx,ecx5632 test ecx,ecx5633 mov DWORD[28+rbx],ecx5634 cmovle r11,rbp5635 vmovdqu ymm8,YMMWORD[((0-128))+rdi]5636 lea rax,[128+rsp]5637 vmovdqu ymm9,YMMWORD[((32-128))+rdi]5638 lea rbx,[((256+128))+rsp]5639 vmovdqu ymm10,YMMWORD[((64-128))+rdi]5640 vmovdqu ymm11,YMMWORD[((96-128))+rdi]5641 vmovdqu ymm12,YMMWORD[((128-128))+rdi]5642 vmovdqu ymm13,YMMWORD[((160-128))+rdi]5643 vmovdqu ymm14,YMMWORD[((192-128))+rdi]5644 vmovdqu ymm15,YMMWORD[((224-128))+rdi]5645 vmovdqu ymm6,YMMWORD[$L$pbswap]5646 jmp NEAR $L$oop_avx25647 5648 ALIGN 325649 $L$oop_avx2:5650 vpxor ymm4,ymm10,ymm95651 vmovd xmm5,DWORD[r12]5652 vmovd xmm0,DWORD[r8]5653 vmovd xmm1,DWORD[r13]5654 vmovd xmm2,DWORD[r9]5655 vpinsrd xmm5,xmm5,DWORD[r14],15656 vpinsrd xmm0,xmm0,DWORD[r10],15657 vpinsrd xmm1,xmm1,DWORD[r15],15658 vpunpckldq ymm5,ymm5,ymm15659 vpinsrd xmm2,xmm2,DWORD[r11],15660 vpunpckldq ymm0,ymm0,ymm25661 vinserti128 ymm5,ymm5,xmm0,15662 vpshufb ymm5,ymm5,ymm65663 vpsrld ymm7,ymm12,65664 vpslld ymm2,ymm12,265665 vmovdqu YMMWORD[(0-128)+rax],ymm55666 vpaddd ymm5,ymm5,ymm155667 5668 vpsrld ymm1,ymm12,115669 vpxor ymm7,ymm7,ymm25670 vpslld ymm2,ymm12,215671 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]5672 vpxor ymm7,ymm7,ymm15673 5674 vpsrld ymm1,ymm12,255675 vpxor ymm7,ymm7,ymm25676 5677 vpslld ymm2,ymm12,75678 vpandn ymm0,ymm12,ymm145679 vpand ymm3,ymm12,ymm135680 5681 vpxor ymm7,ymm7,ymm15682 5683 vpsrld ymm15,ymm8,25684 vpxor ymm7,ymm7,ymm25685 5686 vpslld ymm1,ymm8,305687 vpxor ymm0,ymm0,ymm35688 vpxor ymm3,ymm9,ymm85689 5690 vpxor ymm15,ymm15,ymm15691 vpaddd ymm5,ymm5,ymm75692 5693 vpsrld ymm1,ymm8,135694 5695 vpslld ymm2,ymm8,195696 vpaddd ymm5,ymm5,ymm05697 vpand ymm4,ymm4,ymm35698 5699 vpxor ymm7,ymm15,ymm15700 5701 vpsrld ymm1,ymm8,225702 vpxor ymm7,ymm7,ymm25703 5704 vpslld ymm2,ymm8,105705 vpxor ymm15,ymm9,ymm45706 vpaddd ymm11,ymm11,ymm55707 5708 vpxor ymm7,ymm7,ymm15709 vpxor ymm7,ymm7,ymm25710 5711 vpaddd ymm15,ymm15,ymm55712 vpaddd ymm15,ymm15,ymm75713 vmovd xmm5,DWORD[4+r12]5714 vmovd xmm0,DWORD[4+r8]5715 vmovd xmm1,DWORD[4+r13]5716 vmovd xmm2,DWORD[4+r9]5717 vpinsrd xmm5,xmm5,DWORD[4+r14],15718 vpinsrd xmm0,xmm0,DWORD[4+r10],15719 vpinsrd xmm1,xmm1,DWORD[4+r15],15720 vpunpckldq ymm5,ymm5,ymm15721 vpinsrd xmm2,xmm2,DWORD[4+r11],15722 vpunpckldq ymm0,ymm0,ymm25723 vinserti128 ymm5,ymm5,xmm0,15724 vpshufb ymm5,ymm5,ymm65725 vpsrld ymm7,ymm11,65726 vpslld ymm2,ymm11,265727 vmovdqu YMMWORD[(32-128)+rax],ymm55728 vpaddd ymm5,ymm5,ymm145729 5730 vpsrld ymm1,ymm11,115731 vpxor ymm7,ymm7,ymm25732 vpslld ymm2,ymm11,215733 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp]5734 vpxor ymm7,ymm7,ymm15735 5736 vpsrld ymm1,ymm11,255737 vpxor ymm7,ymm7,ymm25738 5739 vpslld ymm2,ymm11,75740 vpandn ymm0,ymm11,ymm135741 vpand ymm4,ymm11,ymm125742 5743 vpxor ymm7,ymm7,ymm15744 5745 vpsrld ymm14,ymm15,25746 vpxor ymm7,ymm7,ymm25747 5748 vpslld ymm1,ymm15,305749 vpxor ymm0,ymm0,ymm45750 vpxor ymm4,ymm8,ymm155751 5752 vpxor ymm14,ymm14,ymm15753 vpaddd ymm5,ymm5,ymm75754 5755 vpsrld ymm1,ymm15,135756 5757 vpslld ymm2,ymm15,195758 vpaddd ymm5,ymm5,ymm05759 vpand ymm3,ymm3,ymm45760 5761 vpxor ymm7,ymm14,ymm15762 5763 vpsrld ymm1,ymm15,225764 vpxor ymm7,ymm7,ymm25765 5766 vpslld ymm2,ymm15,105767 vpxor ymm14,ymm8,ymm35768 vpaddd ymm10,ymm10,ymm55769 5770 vpxor ymm7,ymm7,ymm15771 vpxor ymm7,ymm7,ymm25772 5773 vpaddd ymm14,ymm14,ymm55774 vpaddd ymm14,ymm14,ymm75775 vmovd xmm5,DWORD[8+r12]5776 vmovd xmm0,DWORD[8+r8]5777 vmovd xmm1,DWORD[8+r13]5778 vmovd xmm2,DWORD[8+r9]5779 vpinsrd xmm5,xmm5,DWORD[8+r14],15780 vpinsrd xmm0,xmm0,DWORD[8+r10],15781 vpinsrd xmm1,xmm1,DWORD[8+r15],15782 vpunpckldq ymm5,ymm5,ymm15783 vpinsrd xmm2,xmm2,DWORD[8+r11],15784 vpunpckldq ymm0,ymm0,ymm25785 vinserti128 ymm5,ymm5,xmm0,15786 vpshufb ymm5,ymm5,ymm65787 vpsrld ymm7,ymm10,65788 vpslld ymm2,ymm10,265789 vmovdqu YMMWORD[(64-128)+rax],ymm55790 vpaddd ymm5,ymm5,ymm135791 5792 vpsrld ymm1,ymm10,115793 vpxor ymm7,ymm7,ymm25794 vpslld ymm2,ymm10,215795 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]5796 vpxor ymm7,ymm7,ymm15797 5798 vpsrld ymm1,ymm10,255799 vpxor ymm7,ymm7,ymm25800 5801 vpslld ymm2,ymm10,75802 vpandn ymm0,ymm10,ymm125803 vpand ymm3,ymm10,ymm115804 5805 vpxor ymm7,ymm7,ymm15806 5807 vpsrld ymm13,ymm14,25808 vpxor ymm7,ymm7,ymm25809 5810 vpslld ymm1,ymm14,305811 vpxor ymm0,ymm0,ymm35812 vpxor ymm3,ymm15,ymm145813 5814 vpxor ymm13,ymm13,ymm15815 vpaddd ymm5,ymm5,ymm75816 5817 vpsrld ymm1,ymm14,135818 5819 vpslld ymm2,ymm14,195820 vpaddd ymm5,ymm5,ymm05821 vpand ymm4,ymm4,ymm35822 5823 vpxor ymm7,ymm13,ymm15824 5825 vpsrld ymm1,ymm14,225826 vpxor ymm7,ymm7,ymm25827 5828 vpslld ymm2,ymm14,105829 vpxor ymm13,ymm15,ymm45830 vpaddd ymm9,ymm9,ymm55831 5832 vpxor ymm7,ymm7,ymm15833 vpxor ymm7,ymm7,ymm25834 5835 vpaddd ymm13,ymm13,ymm55836 vpaddd ymm13,ymm13,ymm75837 vmovd xmm5,DWORD[12+r12]5838 vmovd xmm0,DWORD[12+r8]5839 vmovd xmm1,DWORD[12+r13]5840 vmovd xmm2,DWORD[12+r9]5841 vpinsrd xmm5,xmm5,DWORD[12+r14],15842 vpinsrd xmm0,xmm0,DWORD[12+r10],15843 vpinsrd xmm1,xmm1,DWORD[12+r15],15844 vpunpckldq ymm5,ymm5,ymm15845 vpinsrd xmm2,xmm2,DWORD[12+r11],15846 vpunpckldq ymm0,ymm0,ymm25847 vinserti128 ymm5,ymm5,xmm0,15848 vpshufb ymm5,ymm5,ymm65849 vpsrld ymm7,ymm9,65850 vpslld ymm2,ymm9,265851 vmovdqu YMMWORD[(96-128)+rax],ymm55852 vpaddd ymm5,ymm5,ymm125853 5854 vpsrld ymm1,ymm9,115855 vpxor ymm7,ymm7,ymm25856 vpslld ymm2,ymm9,215857 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp]5858 vpxor ymm7,ymm7,ymm15859 5860 vpsrld ymm1,ymm9,255861 vpxor ymm7,ymm7,ymm25862 5863 vpslld ymm2,ymm9,75864 vpandn ymm0,ymm9,ymm115865 vpand ymm4,ymm9,ymm105866 5867 vpxor ymm7,ymm7,ymm15868 5869 vpsrld ymm12,ymm13,25870 vpxor ymm7,ymm7,ymm25871 5872 vpslld ymm1,ymm13,305873 vpxor ymm0,ymm0,ymm45874 vpxor ymm4,ymm14,ymm135875 5876 vpxor ymm12,ymm12,ymm15877 vpaddd ymm5,ymm5,ymm75878 5879 vpsrld ymm1,ymm13,135880 5881 vpslld ymm2,ymm13,195882 vpaddd ymm5,ymm5,ymm05883 vpand ymm3,ymm3,ymm45884 5885 vpxor ymm7,ymm12,ymm15886 5887 vpsrld ymm1,ymm13,225888 vpxor ymm7,ymm7,ymm25889 5890 vpslld ymm2,ymm13,105891 vpxor ymm12,ymm14,ymm35892 vpaddd ymm8,ymm8,ymm55893 5894 vpxor ymm7,ymm7,ymm15895 vpxor ymm7,ymm7,ymm25896 5897 vpaddd ymm12,ymm12,ymm55898 vpaddd ymm12,ymm12,ymm75899 vmovd xmm5,DWORD[16+r12]5900 vmovd xmm0,DWORD[16+r8]5901 vmovd xmm1,DWORD[16+r13]5902 vmovd xmm2,DWORD[16+r9]5903 vpinsrd xmm5,xmm5,DWORD[16+r14],15904 vpinsrd xmm0,xmm0,DWORD[16+r10],15905 vpinsrd xmm1,xmm1,DWORD[16+r15],15906 vpunpckldq ymm5,ymm5,ymm15907 vpinsrd xmm2,xmm2,DWORD[16+r11],15908 vpunpckldq ymm0,ymm0,ymm25909 vinserti128 ymm5,ymm5,xmm0,15910 vpshufb ymm5,ymm5,ymm65911 vpsrld ymm7,ymm8,65912 vpslld ymm2,ymm8,265913 vmovdqu YMMWORD[(128-128)+rax],ymm55914 vpaddd ymm5,ymm5,ymm115915 5916 vpsrld ymm1,ymm8,115917 vpxor ymm7,ymm7,ymm25918 vpslld ymm2,ymm8,215919 vpaddd ymm5,ymm5,YMMWORD[rbp]5920 vpxor ymm7,ymm7,ymm15921 5922 vpsrld ymm1,ymm8,255923 vpxor ymm7,ymm7,ymm25924 5925 vpslld ymm2,ymm8,75926 vpandn ymm0,ymm8,ymm105927 vpand ymm3,ymm8,ymm95928 5929 vpxor ymm7,ymm7,ymm15930 5931 vpsrld ymm11,ymm12,25932 vpxor ymm7,ymm7,ymm25933 5934 vpslld ymm1,ymm12,305935 vpxor ymm0,ymm0,ymm35936 vpxor ymm3,ymm13,ymm125937 5938 vpxor ymm11,ymm11,ymm15939 vpaddd ymm5,ymm5,ymm75940 5941 vpsrld ymm1,ymm12,135942 5943 vpslld ymm2,ymm12,195944 vpaddd ymm5,ymm5,ymm05945 vpand ymm4,ymm4,ymm35946 5947 vpxor ymm7,ymm11,ymm15948 5949 vpsrld ymm1,ymm12,225950 vpxor ymm7,ymm7,ymm25951 5952 vpslld ymm2,ymm12,105953 vpxor ymm11,ymm13,ymm45954 vpaddd ymm15,ymm15,ymm55955 5956 vpxor ymm7,ymm7,ymm15957 vpxor ymm7,ymm7,ymm25958 5959 vpaddd ymm11,ymm11,ymm55960 vpaddd ymm11,ymm11,ymm75961 vmovd xmm5,DWORD[20+r12]5962 vmovd xmm0,DWORD[20+r8]5963 vmovd xmm1,DWORD[20+r13]5964 vmovd xmm2,DWORD[20+r9]5965 vpinsrd xmm5,xmm5,DWORD[20+r14],15966 vpinsrd xmm0,xmm0,DWORD[20+r10],15967 vpinsrd xmm1,xmm1,DWORD[20+r15],15968 vpunpckldq ymm5,ymm5,ymm15969 vpinsrd xmm2,xmm2,DWORD[20+r11],15970 vpunpckldq ymm0,ymm0,ymm25971 vinserti128 ymm5,ymm5,xmm0,15972 vpshufb ymm5,ymm5,ymm65973 vpsrld ymm7,ymm15,65974 vpslld ymm2,ymm15,265975 vmovdqu YMMWORD[(160-128)+rax],ymm55976 vpaddd ymm5,ymm5,ymm105977 5978 vpsrld ymm1,ymm15,115979 vpxor ymm7,ymm7,ymm25980 vpslld ymm2,ymm15,215981 vpaddd ymm5,ymm5,YMMWORD[32+rbp]5982 vpxor ymm7,ymm7,ymm15983 5984 vpsrld ymm1,ymm15,255985 vpxor ymm7,ymm7,ymm25986 5987 vpslld ymm2,ymm15,75988 vpandn ymm0,ymm15,ymm95989 vpand ymm4,ymm15,ymm85990 5991 vpxor ymm7,ymm7,ymm15992 5993 vpsrld ymm10,ymm11,25994 vpxor ymm7,ymm7,ymm25995 5996 vpslld ymm1,ymm11,305997 vpxor ymm0,ymm0,ymm45998 vpxor ymm4,ymm12,ymm115999 6000 vpxor ymm10,ymm10,ymm16001 vpaddd ymm5,ymm5,ymm76002 6003 vpsrld ymm1,ymm11,136004 6005 vpslld ymm2,ymm11,196006 vpaddd ymm5,ymm5,ymm06007 vpand ymm3,ymm3,ymm46008 6009 vpxor ymm7,ymm10,ymm16010 6011 vpsrld ymm1,ymm11,226012 vpxor ymm7,ymm7,ymm26013 6014 vpslld ymm2,ymm11,106015 vpxor ymm10,ymm12,ymm36016 vpaddd ymm14,ymm14,ymm56017 6018 vpxor ymm7,ymm7,ymm16019 vpxor ymm7,ymm7,ymm26020 6021 vpaddd ymm10,ymm10,ymm56022 vpaddd ymm10,ymm10,ymm76023 vmovd xmm5,DWORD[24+r12]6024 vmovd xmm0,DWORD[24+r8]6025 vmovd xmm1,DWORD[24+r13]6026 vmovd xmm2,DWORD[24+r9]6027 vpinsrd xmm5,xmm5,DWORD[24+r14],16028 vpinsrd xmm0,xmm0,DWORD[24+r10],16029 vpinsrd xmm1,xmm1,DWORD[24+r15],16030 vpunpckldq ymm5,ymm5,ymm16031 vpinsrd xmm2,xmm2,DWORD[24+r11],16032 vpunpckldq ymm0,ymm0,ymm26033 vinserti128 ymm5,ymm5,xmm0,16034 vpshufb ymm5,ymm5,ymm66035 vpsrld ymm7,ymm14,66036 vpslld ymm2,ymm14,266037 vmovdqu YMMWORD[(192-128)+rax],ymm56038 vpaddd ymm5,ymm5,ymm96039 6040 vpsrld ymm1,ymm14,116041 vpxor ymm7,ymm7,ymm26042 vpslld ymm2,ymm14,216043 vpaddd ymm5,ymm5,YMMWORD[64+rbp]6044 vpxor ymm7,ymm7,ymm16045 6046 vpsrld ymm1,ymm14,256047 vpxor ymm7,ymm7,ymm26048 6049 vpslld ymm2,ymm14,76050 vpandn ymm0,ymm14,ymm86051 vpand ymm3,ymm14,ymm156052 6053 vpxor ymm7,ymm7,ymm16054 6055 vpsrld ymm9,ymm10,26056 vpxor ymm7,ymm7,ymm26057 6058 vpslld ymm1,ymm10,306059 vpxor ymm0,ymm0,ymm36060 vpxor ymm3,ymm11,ymm106061 6062 vpxor ymm9,ymm9,ymm16063 vpaddd ymm5,ymm5,ymm76064 6065 vpsrld ymm1,ymm10,136066 6067 vpslld ymm2,ymm10,196068 vpaddd ymm5,ymm5,ymm06069 vpand ymm4,ymm4,ymm36070 6071 vpxor ymm7,ymm9,ymm16072 6073 vpsrld ymm1,ymm10,226074 vpxor ymm7,ymm7,ymm26075 6076 vpslld ymm2,ymm10,106077 vpxor ymm9,ymm11,ymm46078 vpaddd ymm13,ymm13,ymm56079 6080 vpxor ymm7,ymm7,ymm16081 vpxor ymm7,ymm7,ymm26082 6083 vpaddd ymm9,ymm9,ymm56084 vpaddd ymm9,ymm9,ymm76085 vmovd xmm5,DWORD[28+r12]6086 vmovd xmm0,DWORD[28+r8]6087 vmovd xmm1,DWORD[28+r13]6088 vmovd xmm2,DWORD[28+r9]6089 vpinsrd xmm5,xmm5,DWORD[28+r14],16090 vpinsrd xmm0,xmm0,DWORD[28+r10],16091 vpinsrd xmm1,xmm1,DWORD[28+r15],16092 vpunpckldq ymm5,ymm5,ymm16093 vpinsrd xmm2,xmm2,DWORD[28+r11],16094 vpunpckldq ymm0,ymm0,ymm26095 vinserti128 ymm5,ymm5,xmm0,16096 vpshufb ymm5,ymm5,ymm66097 vpsrld ymm7,ymm13,66098 vpslld ymm2,ymm13,266099 vmovdqu YMMWORD[(224-128)+rax],ymm56100 vpaddd ymm5,ymm5,ymm86101 6102 vpsrld ymm1,ymm13,116103 vpxor ymm7,ymm7,ymm26104 vpslld ymm2,ymm13,216105 vpaddd ymm5,ymm5,YMMWORD[96+rbp]6106 vpxor ymm7,ymm7,ymm16107 6108 vpsrld ymm1,ymm13,256109 vpxor ymm7,ymm7,ymm26110 6111 vpslld ymm2,ymm13,76112 vpandn ymm0,ymm13,ymm156113 vpand ymm4,ymm13,ymm146114 6115 vpxor ymm7,ymm7,ymm16116 6117 vpsrld ymm8,ymm9,26118 vpxor ymm7,ymm7,ymm26119 6120 vpslld ymm1,ymm9,306121 vpxor ymm0,ymm0,ymm46122 vpxor ymm4,ymm10,ymm96123 6124 vpxor ymm8,ymm8,ymm16125 vpaddd ymm5,ymm5,ymm76126 6127 vpsrld ymm1,ymm9,136128 6129 vpslld ymm2,ymm9,196130 vpaddd ymm5,ymm5,ymm06131 vpand ymm3,ymm3,ymm46132 6133 vpxor ymm7,ymm8,ymm16134 6135 vpsrld ymm1,ymm9,226136 vpxor ymm7,ymm7,ymm26137 6138 vpslld ymm2,ymm9,106139 vpxor ymm8,ymm10,ymm36140 vpaddd ymm12,ymm12,ymm56141 6142 vpxor ymm7,ymm7,ymm16143 vpxor ymm7,ymm7,ymm26144 6145 vpaddd ymm8,ymm8,ymm56146 vpaddd ymm8,ymm8,ymm76147 add rbp,2566148 vmovd xmm5,DWORD[32+r12]6149 vmovd xmm0,DWORD[32+r8]6150 vmovd xmm1,DWORD[32+r13]6151 vmovd xmm2,DWORD[32+r9]6152 vpinsrd xmm5,xmm5,DWORD[32+r14],16153 vpinsrd xmm0,xmm0,DWORD[32+r10],16154 vpinsrd xmm1,xmm1,DWORD[32+r15],16155 vpunpckldq ymm5,ymm5,ymm16156 vpinsrd xmm2,xmm2,DWORD[32+r11],16157 vpunpckldq ymm0,ymm0,ymm26158 vinserti128 ymm5,ymm5,xmm0,16159 vpshufb ymm5,ymm5,ymm66160 vpsrld ymm7,ymm12,66161 vpslld ymm2,ymm12,266162 vmovdqu YMMWORD[(256-256-128)+rbx],ymm56163 vpaddd ymm5,ymm5,ymm156164 6165 vpsrld ymm1,ymm12,116166 vpxor ymm7,ymm7,ymm26167 vpslld ymm2,ymm12,216168 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]6169 vpxor ymm7,ymm7,ymm16170 6171 vpsrld ymm1,ymm12,256172 vpxor ymm7,ymm7,ymm26173 6174 vpslld ymm2,ymm12,76175 vpandn ymm0,ymm12,ymm146176 vpand ymm3,ymm12,ymm136177 6178 vpxor ymm7,ymm7,ymm16179 6180 vpsrld ymm15,ymm8,26181 vpxor ymm7,ymm7,ymm26182 6183 vpslld ymm1,ymm8,306184 vpxor ymm0,ymm0,ymm36185 vpxor ymm3,ymm9,ymm86186 6187 vpxor ymm15,ymm15,ymm16188 vpaddd ymm5,ymm5,ymm76189 6190 vpsrld ymm1,ymm8,136191 6192 vpslld ymm2,ymm8,196193 vpaddd ymm5,ymm5,ymm06194 vpand ymm4,ymm4,ymm36195 6196 vpxor ymm7,ymm15,ymm16197 6198 vpsrld ymm1,ymm8,226199 vpxor ymm7,ymm7,ymm26200 6201 vpslld ymm2,ymm8,106202 vpxor ymm15,ymm9,ymm46203 vpaddd ymm11,ymm11,ymm56204 6205 vpxor ymm7,ymm7,ymm16206 vpxor ymm7,ymm7,ymm26207 6208 vpaddd ymm15,ymm15,ymm56209 vpaddd ymm15,ymm15,ymm76210 vmovd xmm5,DWORD[36+r12]6211 vmovd xmm0,DWORD[36+r8]6212 vmovd xmm1,DWORD[36+r13]6213 vmovd xmm2,DWORD[36+r9]6214 vpinsrd xmm5,xmm5,DWORD[36+r14],16215 vpinsrd xmm0,xmm0,DWORD[36+r10],16216 vpinsrd xmm1,xmm1,DWORD[36+r15],16217 vpunpckldq ymm5,ymm5,ymm16218 vpinsrd xmm2,xmm2,DWORD[36+r11],16219 vpunpckldq ymm0,ymm0,ymm26220 vinserti128 ymm5,ymm5,xmm0,16221 vpshufb ymm5,ymm5,ymm66222 vpsrld ymm7,ymm11,66223 vpslld ymm2,ymm11,266224 vmovdqu YMMWORD[(288-256-128)+rbx],ymm56225 vpaddd ymm5,ymm5,ymm146226 6227 vpsrld ymm1,ymm11,116228 vpxor ymm7,ymm7,ymm26229 vpslld ymm2,ymm11,216230 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp]6231 vpxor ymm7,ymm7,ymm16232 6233 vpsrld ymm1,ymm11,256234 vpxor ymm7,ymm7,ymm26235 6236 vpslld ymm2,ymm11,76237 vpandn ymm0,ymm11,ymm136238 vpand ymm4,ymm11,ymm126239 6240 vpxor ymm7,ymm7,ymm16241 6242 vpsrld ymm14,ymm15,26243 vpxor ymm7,ymm7,ymm26244 6245 vpslld ymm1,ymm15,306246 vpxor ymm0,ymm0,ymm46247 vpxor ymm4,ymm8,ymm156248 6249 vpxor ymm14,ymm14,ymm16250 vpaddd ymm5,ymm5,ymm76251 6252 vpsrld ymm1,ymm15,136253 6254 vpslld ymm2,ymm15,196255 vpaddd ymm5,ymm5,ymm06256 vpand ymm3,ymm3,ymm46257 6258 vpxor ymm7,ymm14,ymm16259 6260 vpsrld ymm1,ymm15,226261 vpxor ymm7,ymm7,ymm26262 6263 vpslld ymm2,ymm15,106264 vpxor ymm14,ymm8,ymm36265 vpaddd ymm10,ymm10,ymm56266 6267 vpxor ymm7,ymm7,ymm16268 vpxor ymm7,ymm7,ymm26269 6270 vpaddd ymm14,ymm14,ymm56271 vpaddd ymm14,ymm14,ymm76272 vmovd xmm5,DWORD[40+r12]6273 vmovd xmm0,DWORD[40+r8]6274 vmovd xmm1,DWORD[40+r13]6275 vmovd xmm2,DWORD[40+r9]6276 vpinsrd xmm5,xmm5,DWORD[40+r14],16277 vpinsrd xmm0,xmm0,DWORD[40+r10],16278 vpinsrd xmm1,xmm1,DWORD[40+r15],16279 vpunpckldq ymm5,ymm5,ymm16280 vpinsrd xmm2,xmm2,DWORD[40+r11],16281 vpunpckldq ymm0,ymm0,ymm26282 vinserti128 ymm5,ymm5,xmm0,16283 vpshufb ymm5,ymm5,ymm66284 vpsrld ymm7,ymm10,66285 vpslld ymm2,ymm10,266286 vmovdqu YMMWORD[(320-256-128)+rbx],ymm56287 vpaddd ymm5,ymm5,ymm136288 6289 vpsrld ymm1,ymm10,116290 vpxor ymm7,ymm7,ymm26291 vpslld ymm2,ymm10,216292 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]6293 vpxor ymm7,ymm7,ymm16294 6295 vpsrld ymm1,ymm10,256296 vpxor ymm7,ymm7,ymm26297 6298 vpslld ymm2,ymm10,76299 vpandn ymm0,ymm10,ymm126300 vpand ymm3,ymm10,ymm116301 6302 vpxor ymm7,ymm7,ymm16303 6304 vpsrld ymm13,ymm14,26305 vpxor ymm7,ymm7,ymm26306 6307 vpslld ymm1,ymm14,306308 vpxor ymm0,ymm0,ymm36309 vpxor ymm3,ymm15,ymm146310 6311 vpxor ymm13,ymm13,ymm16312 vpaddd ymm5,ymm5,ymm76313 6314 vpsrld ymm1,ymm14,136315 6316 vpslld ymm2,ymm14,196317 vpaddd ymm5,ymm5,ymm06318 vpand ymm4,ymm4,ymm36319 6320 vpxor ymm7,ymm13,ymm16321 6322 vpsrld ymm1,ymm14,226323 vpxor ymm7,ymm7,ymm26324 6325 vpslld ymm2,ymm14,106326 vpxor ymm13,ymm15,ymm46327 vpaddd ymm9,ymm9,ymm56328 6329 vpxor ymm7,ymm7,ymm16330 vpxor ymm7,ymm7,ymm26331 6332 vpaddd ymm13,ymm13,ymm56333 vpaddd ymm13,ymm13,ymm76334 vmovd xmm5,DWORD[44+r12]6335 vmovd xmm0,DWORD[44+r8]6336 vmovd xmm1,DWORD[44+r13]6337 vmovd xmm2,DWORD[44+r9]6338 vpinsrd xmm5,xmm5,DWORD[44+r14],16339 vpinsrd xmm0,xmm0,DWORD[44+r10],16340 vpinsrd xmm1,xmm1,DWORD[44+r15],16341 vpunpckldq ymm5,ymm5,ymm16342 vpinsrd xmm2,xmm2,DWORD[44+r11],16343 vpunpckldq ymm0,ymm0,ymm26344 vinserti128 ymm5,ymm5,xmm0,16345 vpshufb ymm5,ymm5,ymm66346 vpsrld ymm7,ymm9,66347 vpslld ymm2,ymm9,266348 vmovdqu YMMWORD[(352-256-128)+rbx],ymm56349 vpaddd ymm5,ymm5,ymm126350 6351 vpsrld ymm1,ymm9,116352 vpxor ymm7,ymm7,ymm26353 vpslld ymm2,ymm9,216354 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp]6355 vpxor ymm7,ymm7,ymm16356 6357 vpsrld ymm1,ymm9,256358 vpxor ymm7,ymm7,ymm26359 6360 vpslld ymm2,ymm9,76361 vpandn ymm0,ymm9,ymm116362 vpand ymm4,ymm9,ymm106363 6364 vpxor ymm7,ymm7,ymm16365 6366 vpsrld ymm12,ymm13,26367 vpxor ymm7,ymm7,ymm26368 6369 vpslld ymm1,ymm13,306370 vpxor ymm0,ymm0,ymm46371 vpxor ymm4,ymm14,ymm136372 6373 vpxor ymm12,ymm12,ymm16374 vpaddd ymm5,ymm5,ymm76375 6376 vpsrld ymm1,ymm13,136377 6378 vpslld ymm2,ymm13,196379 vpaddd ymm5,ymm5,ymm06380 vpand ymm3,ymm3,ymm46381 6382 vpxor ymm7,ymm12,ymm16383 6384 vpsrld ymm1,ymm13,226385 vpxor ymm7,ymm7,ymm26386 6387 vpslld ymm2,ymm13,106388 vpxor ymm12,ymm14,ymm36389 vpaddd ymm8,ymm8,ymm56390 6391 vpxor ymm7,ymm7,ymm16392 vpxor ymm7,ymm7,ymm26393 6394 vpaddd ymm12,ymm12,ymm56395 vpaddd ymm12,ymm12,ymm76396 vmovd xmm5,DWORD[48+r12]6397 vmovd xmm0,DWORD[48+r8]6398 vmovd xmm1,DWORD[48+r13]6399 vmovd xmm2,DWORD[48+r9]6400 vpinsrd xmm5,xmm5,DWORD[48+r14],16401 vpinsrd xmm0,xmm0,DWORD[48+r10],16402 vpinsrd xmm1,xmm1,DWORD[48+r15],16403 vpunpckldq ymm5,ymm5,ymm16404 vpinsrd xmm2,xmm2,DWORD[48+r11],16405 vpunpckldq ymm0,ymm0,ymm26406 vinserti128 ymm5,ymm5,xmm0,16407 vpshufb ymm5,ymm5,ymm66408 vpsrld ymm7,ymm8,66409 vpslld ymm2,ymm8,266410 vmovdqu YMMWORD[(384-256-128)+rbx],ymm56411 vpaddd ymm5,ymm5,ymm116412 6413 vpsrld ymm1,ymm8,116414 vpxor ymm7,ymm7,ymm26415 vpslld ymm2,ymm8,216416 vpaddd ymm5,ymm5,YMMWORD[rbp]6417 vpxor ymm7,ymm7,ymm16418 6419 vpsrld ymm1,ymm8,256420 vpxor ymm7,ymm7,ymm26421 6422 vpslld ymm2,ymm8,76423 vpandn ymm0,ymm8,ymm106424 vpand ymm3,ymm8,ymm96425 6426 vpxor ymm7,ymm7,ymm16427 6428 vpsrld ymm11,ymm12,26429 vpxor ymm7,ymm7,ymm26430 6431 vpslld ymm1,ymm12,306432 vpxor ymm0,ymm0,ymm36433 vpxor ymm3,ymm13,ymm126434 6435 vpxor ymm11,ymm11,ymm16436 vpaddd ymm5,ymm5,ymm76437 6438 vpsrld ymm1,ymm12,136439 6440 vpslld ymm2,ymm12,196441 vpaddd ymm5,ymm5,ymm06442 vpand ymm4,ymm4,ymm36443 6444 vpxor ymm7,ymm11,ymm16445 6446 vpsrld ymm1,ymm12,226447 vpxor ymm7,ymm7,ymm26448 6449 vpslld ymm2,ymm12,106450 vpxor ymm11,ymm13,ymm46451 vpaddd ymm15,ymm15,ymm56452 6453 vpxor ymm7,ymm7,ymm16454 vpxor ymm7,ymm7,ymm26455 6456 vpaddd ymm11,ymm11,ymm56457 vpaddd ymm11,ymm11,ymm76458 vmovd xmm5,DWORD[52+r12]6459 vmovd xmm0,DWORD[52+r8]6460 vmovd xmm1,DWORD[52+r13]6461 vmovd xmm2,DWORD[52+r9]6462 vpinsrd xmm5,xmm5,DWORD[52+r14],16463 vpinsrd xmm0,xmm0,DWORD[52+r10],16464 vpinsrd xmm1,xmm1,DWORD[52+r15],16465 vpunpckldq ymm5,ymm5,ymm16466 vpinsrd xmm2,xmm2,DWORD[52+r11],16467 vpunpckldq ymm0,ymm0,ymm26468 vinserti128 ymm5,ymm5,xmm0,16469 vpshufb ymm5,ymm5,ymm66470 vpsrld ymm7,ymm15,66471 vpslld ymm2,ymm15,266472 vmovdqu YMMWORD[(416-256-128)+rbx],ymm56473 vpaddd ymm5,ymm5,ymm106474 6475 vpsrld ymm1,ymm15,116476 vpxor ymm7,ymm7,ymm26477 vpslld ymm2,ymm15,216478 vpaddd ymm5,ymm5,YMMWORD[32+rbp]6479 vpxor ymm7,ymm7,ymm16480 6481 vpsrld ymm1,ymm15,256482 vpxor ymm7,ymm7,ymm26483 6484 vpslld ymm2,ymm15,76485 vpandn ymm0,ymm15,ymm96486 vpand ymm4,ymm15,ymm86487 6488 vpxor ymm7,ymm7,ymm16489 6490 vpsrld ymm10,ymm11,26491 vpxor ymm7,ymm7,ymm26492 6493 vpslld ymm1,ymm11,306494 vpxor ymm0,ymm0,ymm46495 vpxor ymm4,ymm12,ymm116496 6497 vpxor ymm10,ymm10,ymm16498 vpaddd ymm5,ymm5,ymm76499 6500 vpsrld ymm1,ymm11,136501 6502 vpslld ymm2,ymm11,196503 vpaddd ymm5,ymm5,ymm06504 vpand ymm3,ymm3,ymm46505 6506 vpxor ymm7,ymm10,ymm16507 6508 vpsrld ymm1,ymm11,226509 vpxor ymm7,ymm7,ymm26510 6511 vpslld ymm2,ymm11,106512 vpxor ymm10,ymm12,ymm36513 vpaddd ymm14,ymm14,ymm56514 6515 vpxor ymm7,ymm7,ymm16516 vpxor ymm7,ymm7,ymm26517 6518 vpaddd ymm10,ymm10,ymm56519 vpaddd ymm10,ymm10,ymm76520 vmovd xmm5,DWORD[56+r12]6521 vmovd xmm0,DWORD[56+r8]6522 vmovd xmm1,DWORD[56+r13]6523 vmovd xmm2,DWORD[56+r9]6524 vpinsrd xmm5,xmm5,DWORD[56+r14],16525 vpinsrd xmm0,xmm0,DWORD[56+r10],16526 vpinsrd xmm1,xmm1,DWORD[56+r15],16527 vpunpckldq ymm5,ymm5,ymm16528 vpinsrd xmm2,xmm2,DWORD[56+r11],16529 vpunpckldq ymm0,ymm0,ymm26530 vinserti128 ymm5,ymm5,xmm0,16531 vpshufb ymm5,ymm5,ymm66532 vpsrld ymm7,ymm14,66533 vpslld ymm2,ymm14,266534 vmovdqu YMMWORD[(448-256-128)+rbx],ymm56535 vpaddd ymm5,ymm5,ymm96536 6537 vpsrld ymm1,ymm14,116538 vpxor ymm7,ymm7,ymm26539 vpslld ymm2,ymm14,216540 vpaddd ymm5,ymm5,YMMWORD[64+rbp]6541 vpxor ymm7,ymm7,ymm16542 6543 vpsrld ymm1,ymm14,256544 vpxor ymm7,ymm7,ymm26545 6546 vpslld ymm2,ymm14,76547 vpandn ymm0,ymm14,ymm86548 vpand ymm3,ymm14,ymm156549 6550 vpxor ymm7,ymm7,ymm16551 6552 vpsrld ymm9,ymm10,26553 vpxor ymm7,ymm7,ymm26554 6555 vpslld ymm1,ymm10,306556 vpxor ymm0,ymm0,ymm36557 vpxor ymm3,ymm11,ymm106558 6559 vpxor ymm9,ymm9,ymm16560 vpaddd ymm5,ymm5,ymm76561 6562 vpsrld ymm1,ymm10,136563 6564 vpslld ymm2,ymm10,196565 vpaddd ymm5,ymm5,ymm06566 vpand ymm4,ymm4,ymm36567 6568 vpxor ymm7,ymm9,ymm16569 6570 vpsrld ymm1,ymm10,226571 vpxor ymm7,ymm7,ymm26572 6573 vpslld ymm2,ymm10,106574 vpxor ymm9,ymm11,ymm46575 vpaddd ymm13,ymm13,ymm56576 6577 vpxor ymm7,ymm7,ymm16578 vpxor ymm7,ymm7,ymm26579 6580 vpaddd ymm9,ymm9,ymm56581 vpaddd ymm9,ymm9,ymm76582 vmovd xmm5,DWORD[60+r12]6583 lea r12,[64+r12]6584 vmovd xmm0,DWORD[60+r8]6585 lea r8,[64+r8]6586 vmovd xmm1,DWORD[60+r13]6587 lea r13,[64+r13]6588 vmovd xmm2,DWORD[60+r9]6589 lea r9,[64+r9]6590 vpinsrd xmm5,xmm5,DWORD[60+r14],16591 lea r14,[64+r14]6592 vpinsrd xmm0,xmm0,DWORD[60+r10],16593 lea r10,[64+r10]6594 vpinsrd xmm1,xmm1,DWORD[60+r15],16595 lea r15,[64+r15]6596 vpunpckldq ymm5,ymm5,ymm16597 vpinsrd xmm2,xmm2,DWORD[60+r11],16598 lea r11,[64+r11]6599 vpunpckldq ymm0,ymm0,ymm26600 vinserti128 ymm5,ymm5,xmm0,16601 vpshufb ymm5,ymm5,ymm66602 vpsrld ymm7,ymm13,66603 vpslld ymm2,ymm13,266604 vmovdqu YMMWORD[(480-256-128)+rbx],ymm56605 vpaddd ymm5,ymm5,ymm86606 6607 vpsrld ymm1,ymm13,116608 vpxor ymm7,ymm7,ymm26609 vpslld ymm2,ymm13,216610 vpaddd ymm5,ymm5,YMMWORD[96+rbp]6611 vpxor ymm7,ymm7,ymm16612 6613 vpsrld ymm1,ymm13,256614 vpxor ymm7,ymm7,ymm26615 prefetcht0 [63+r12]6616 vpslld ymm2,ymm13,76617 vpandn ymm0,ymm13,ymm156618 vpand ymm4,ymm13,ymm146619 prefetcht0 [63+r13]6620 vpxor ymm7,ymm7,ymm16621 6622 vpsrld ymm8,ymm9,26623 vpxor ymm7,ymm7,ymm26624 prefetcht0 [63+r14]6625 vpslld ymm1,ymm9,306626 vpxor ymm0,ymm0,ymm46627 vpxor ymm4,ymm10,ymm96628 prefetcht0 [63+r15]6629 vpxor ymm8,ymm8,ymm16630 vpaddd ymm5,ymm5,ymm76631 6632 vpsrld ymm1,ymm9,136633 prefetcht0 [63+r8]6634 vpslld ymm2,ymm9,196635 vpaddd ymm5,ymm5,ymm06636 vpand ymm3,ymm3,ymm46637 prefetcht0 [63+r9]6638 vpxor ymm7,ymm8,ymm16639 6640 vpsrld ymm1,ymm9,226641 vpxor ymm7,ymm7,ymm26642 prefetcht0 [63+r10]6643 vpslld ymm2,ymm9,106644 vpxor ymm8,ymm10,ymm36645 vpaddd ymm12,ymm12,ymm56646 prefetcht0 [63+r11]6647 vpxor ymm7,ymm7,ymm16648 vpxor ymm7,ymm7,ymm26649 6650 vpaddd ymm8,ymm8,ymm56651 vpaddd ymm8,ymm8,ymm76652 add rbp,2566653 vmovdqu ymm5,YMMWORD[((0-128))+rax]6654 mov ecx,36655 jmp NEAR $L$oop_16_xx_avx26656 ALIGN 326657 $L$oop_16_xx_avx2:6658 vmovdqu ymm6,YMMWORD[((32-128))+rax]6659 vpaddd ymm5,ymm5,YMMWORD[((288-256-128))+rbx]6660 6661 vpsrld ymm7,ymm6,36662 vpsrld ymm1,ymm6,76663 vpslld ymm2,ymm6,256664 vpxor ymm7,ymm7,ymm16665 vpsrld ymm1,ymm6,186666 vpxor ymm7,ymm7,ymm26667 vpslld ymm2,ymm6,146668 vmovdqu ymm0,YMMWORD[((448-256-128))+rbx]6669 vpsrld ymm3,ymm0,106670 6671 vpxor ymm7,ymm7,ymm16672 vpsrld ymm1,ymm0,176673 vpxor ymm7,ymm7,ymm26674 vpslld ymm2,ymm0,156675 vpaddd ymm5,ymm5,ymm76676 vpxor ymm7,ymm3,ymm16677 vpsrld ymm1,ymm0,196678 vpxor ymm7,ymm7,ymm26679 vpslld ymm2,ymm0,136680 vpxor ymm7,ymm7,ymm16681 vpxor ymm7,ymm7,ymm26682 vpaddd ymm5,ymm5,ymm76683 vpsrld ymm7,ymm12,66684 vpslld ymm2,ymm12,266685 vmovdqu YMMWORD[(0-128)+rax],ymm56686 vpaddd ymm5,ymm5,ymm156687 6688 vpsrld ymm1,ymm12,116689 vpxor ymm7,ymm7,ymm26690 vpslld ymm2,ymm12,216691 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]6692 vpxor ymm7,ymm7,ymm16693 6694 vpsrld ymm1,ymm12,256695 vpxor ymm7,ymm7,ymm26696 6697 vpslld ymm2,ymm12,76698 vpandn ymm0,ymm12,ymm146699 vpand ymm3,ymm12,ymm136700 6701 vpxor ymm7,ymm7,ymm16702 6703 vpsrld ymm15,ymm8,26704 vpxor ymm7,ymm7,ymm26705 6706 vpslld ymm1,ymm8,306707 vpxor ymm0,ymm0,ymm36708 vpxor ymm3,ymm9,ymm86709 6710 vpxor ymm15,ymm15,ymm16711 vpaddd ymm5,ymm5,ymm76712 6713 vpsrld ymm1,ymm8,136714 6715 vpslld ymm2,ymm8,196716 vpaddd ymm5,ymm5,ymm06717 vpand ymm4,ymm4,ymm36718 6719 vpxor ymm7,ymm15,ymm16720 6721 vpsrld ymm1,ymm8,226722 vpxor ymm7,ymm7,ymm26723 6724 vpslld ymm2,ymm8,106725 vpxor ymm15,ymm9,ymm46726 vpaddd ymm11,ymm11,ymm56727 6728 vpxor ymm7,ymm7,ymm16729 vpxor ymm7,ymm7,ymm26730 6731 vpaddd ymm15,ymm15,ymm56732 vpaddd ymm15,ymm15,ymm76733 vmovdqu ymm5,YMMWORD[((64-128))+rax]6734 vpaddd ymm6,ymm6,YMMWORD[((320-256-128))+rbx]6735 6736 vpsrld ymm7,ymm5,36737 vpsrld ymm1,ymm5,76738 vpslld ymm2,ymm5,256739 vpxor ymm7,ymm7,ymm16740 vpsrld ymm1,ymm5,186741 vpxor ymm7,ymm7,ymm26742 vpslld ymm2,ymm5,146743 vmovdqu ymm0,YMMWORD[((480-256-128))+rbx]6744 vpsrld ymm4,ymm0,106745 6746 vpxor ymm7,ymm7,ymm16747 vpsrld ymm1,ymm0,176748 vpxor ymm7,ymm7,ymm26749 vpslld ymm2,ymm0,156750 vpaddd ymm6,ymm6,ymm76751 vpxor ymm7,ymm4,ymm16752 vpsrld ymm1,ymm0,196753 vpxor ymm7,ymm7,ymm26754 vpslld ymm2,ymm0,136755 vpxor ymm7,ymm7,ymm16756 vpxor ymm7,ymm7,ymm26757 vpaddd ymm6,ymm6,ymm76758 vpsrld ymm7,ymm11,66759 vpslld ymm2,ymm11,266760 vmovdqu YMMWORD[(32-128)+rax],ymm66761 vpaddd ymm6,ymm6,ymm146762 6763 vpsrld ymm1,ymm11,116764 vpxor ymm7,ymm7,ymm26765 vpslld ymm2,ymm11,216766 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp]6767 vpxor ymm7,ymm7,ymm16768 6769 vpsrld ymm1,ymm11,256770 vpxor ymm7,ymm7,ymm26771 6772 vpslld ymm2,ymm11,76773 vpandn ymm0,ymm11,ymm136774 vpand ymm4,ymm11,ymm126775 6776 vpxor ymm7,ymm7,ymm16777 6778 vpsrld ymm14,ymm15,26779 vpxor ymm7,ymm7,ymm26780 6781 vpslld ymm1,ymm15,306782 vpxor ymm0,ymm0,ymm46783 vpxor ymm4,ymm8,ymm156784 6785 vpxor ymm14,ymm14,ymm16786 vpaddd ymm6,ymm6,ymm76787 6788 vpsrld ymm1,ymm15,136789 6790 vpslld ymm2,ymm15,196791 vpaddd ymm6,ymm6,ymm06792 vpand ymm3,ymm3,ymm46793 6794 vpxor ymm7,ymm14,ymm16795 6796 vpsrld ymm1,ymm15,226797 vpxor ymm7,ymm7,ymm26798 6799 vpslld ymm2,ymm15,106800 vpxor ymm14,ymm8,ymm36801 vpaddd ymm10,ymm10,ymm66802 6803 vpxor ymm7,ymm7,ymm16804 vpxor ymm7,ymm7,ymm26805 6806 vpaddd ymm14,ymm14,ymm66807 vpaddd ymm14,ymm14,ymm76808 vmovdqu ymm6,YMMWORD[((96-128))+rax]6809 vpaddd ymm5,ymm5,YMMWORD[((352-256-128))+rbx]6810 6811 vpsrld ymm7,ymm6,36812 vpsrld ymm1,ymm6,76813 vpslld ymm2,ymm6,256814 vpxor ymm7,ymm7,ymm16815 vpsrld ymm1,ymm6,186816 vpxor ymm7,ymm7,ymm26817 vpslld ymm2,ymm6,146818 vmovdqu ymm0,YMMWORD[((0-128))+rax]6819 vpsrld ymm3,ymm0,106820 6821 vpxor ymm7,ymm7,ymm16822 vpsrld ymm1,ymm0,176823 vpxor ymm7,ymm7,ymm26824 vpslld ymm2,ymm0,156825 vpaddd ymm5,ymm5,ymm76826 vpxor ymm7,ymm3,ymm16827 vpsrld ymm1,ymm0,196828 vpxor ymm7,ymm7,ymm26829 vpslld ymm2,ymm0,136830 vpxor ymm7,ymm7,ymm16831 vpxor ymm7,ymm7,ymm26832 vpaddd ymm5,ymm5,ymm76833 vpsrld ymm7,ymm10,66834 vpslld ymm2,ymm10,266835 vmovdqu YMMWORD[(64-128)+rax],ymm56836 vpaddd ymm5,ymm5,ymm136837 6838 vpsrld ymm1,ymm10,116839 vpxor ymm7,ymm7,ymm26840 vpslld ymm2,ymm10,216841 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]6842 vpxor ymm7,ymm7,ymm16843 6844 vpsrld ymm1,ymm10,256845 vpxor ymm7,ymm7,ymm26846 6847 vpslld ymm2,ymm10,76848 vpandn ymm0,ymm10,ymm126849 vpand ymm3,ymm10,ymm116850 6851 vpxor ymm7,ymm7,ymm16852 6853 vpsrld ymm13,ymm14,26854 vpxor ymm7,ymm7,ymm26855 6856 vpslld ymm1,ymm14,306857 vpxor ymm0,ymm0,ymm36858 vpxor ymm3,ymm15,ymm146859 6860 vpxor ymm13,ymm13,ymm16861 vpaddd ymm5,ymm5,ymm76862 6863 vpsrld ymm1,ymm14,136864 6865 vpslld ymm2,ymm14,196866 vpaddd ymm5,ymm5,ymm06867 vpand ymm4,ymm4,ymm36868 6869 vpxor ymm7,ymm13,ymm16870 6871 vpsrld ymm1,ymm14,226872 vpxor ymm7,ymm7,ymm26873 6874 vpslld ymm2,ymm14,106875 vpxor ymm13,ymm15,ymm46876 vpaddd ymm9,ymm9,ymm56877 6878 vpxor ymm7,ymm7,ymm16879 vpxor ymm7,ymm7,ymm26880 6881 vpaddd ymm13,ymm13,ymm56882 vpaddd ymm13,ymm13,ymm76883 vmovdqu ymm5,YMMWORD[((128-128))+rax]6884 vpaddd ymm6,ymm6,YMMWORD[((384-256-128))+rbx]6885 6886 vpsrld ymm7,ymm5,36887 vpsrld ymm1,ymm5,76888 vpslld ymm2,ymm5,256889 vpxor ymm7,ymm7,ymm16890 vpsrld ymm1,ymm5,186891 vpxor ymm7,ymm7,ymm26892 vpslld ymm2,ymm5,146893 vmovdqu ymm0,YMMWORD[((32-128))+rax]6894 vpsrld ymm4,ymm0,106895 6896 vpxor ymm7,ymm7,ymm16897 vpsrld ymm1,ymm0,176898 vpxor ymm7,ymm7,ymm26899 vpslld ymm2,ymm0,156900 vpaddd ymm6,ymm6,ymm76901 vpxor ymm7,ymm4,ymm16902 vpsrld ymm1,ymm0,196903 vpxor ymm7,ymm7,ymm26904 vpslld ymm2,ymm0,136905 vpxor ymm7,ymm7,ymm16906 vpxor ymm7,ymm7,ymm26907 vpaddd ymm6,ymm6,ymm76908 vpsrld ymm7,ymm9,66909 vpslld ymm2,ymm9,266910 vmovdqu YMMWORD[(96-128)+rax],ymm66911 vpaddd ymm6,ymm6,ymm126912 6913 vpsrld ymm1,ymm9,116914 vpxor ymm7,ymm7,ymm26915 vpslld ymm2,ymm9,216916 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp]6917 vpxor ymm7,ymm7,ymm16918 6919 vpsrld ymm1,ymm9,256920 vpxor ymm7,ymm7,ymm26921 6922 vpslld ymm2,ymm9,76923 vpandn ymm0,ymm9,ymm116924 vpand ymm4,ymm9,ymm106925 6926 vpxor ymm7,ymm7,ymm16927 6928 vpsrld ymm12,ymm13,26929 vpxor ymm7,ymm7,ymm26930 6931 vpslld ymm1,ymm13,306932 vpxor ymm0,ymm0,ymm46933 vpxor ymm4,ymm14,ymm136934 6935 vpxor ymm12,ymm12,ymm16936 vpaddd ymm6,ymm6,ymm76937 6938 vpsrld ymm1,ymm13,136939 6940 vpslld ymm2,ymm13,196941 vpaddd ymm6,ymm6,ymm06942 vpand ymm3,ymm3,ymm46943 6944 vpxor ymm7,ymm12,ymm16945 6946 vpsrld ymm1,ymm13,226947 vpxor ymm7,ymm7,ymm26948 6949 vpslld ymm2,ymm13,106950 vpxor ymm12,ymm14,ymm36951 vpaddd ymm8,ymm8,ymm66952 6953 vpxor ymm7,ymm7,ymm16954 vpxor ymm7,ymm7,ymm26955 6956 vpaddd ymm12,ymm12,ymm66957 vpaddd ymm12,ymm12,ymm76958 vmovdqu ymm6,YMMWORD[((160-128))+rax]6959 vpaddd ymm5,ymm5,YMMWORD[((416-256-128))+rbx]6960 6961 vpsrld ymm7,ymm6,36962 vpsrld ymm1,ymm6,76963 vpslld ymm2,ymm6,256964 vpxor ymm7,ymm7,ymm16965 vpsrld ymm1,ymm6,186966 vpxor ymm7,ymm7,ymm26967 vpslld ymm2,ymm6,146968 vmovdqu ymm0,YMMWORD[((64-128))+rax]6969 vpsrld ymm3,ymm0,106970 6971 vpxor ymm7,ymm7,ymm16972 vpsrld ymm1,ymm0,176973 vpxor ymm7,ymm7,ymm26974 vpslld ymm2,ymm0,156975 vpaddd ymm5,ymm5,ymm76976 vpxor ymm7,ymm3,ymm16977 vpsrld ymm1,ymm0,196978 vpxor ymm7,ymm7,ymm26979 vpslld ymm2,ymm0,136980 vpxor ymm7,ymm7,ymm16981 vpxor ymm7,ymm7,ymm26982 vpaddd ymm5,ymm5,ymm76983 vpsrld ymm7,ymm8,66984 vpslld ymm2,ymm8,266985 vmovdqu YMMWORD[(128-128)+rax],ymm56986 vpaddd ymm5,ymm5,ymm116987 6988 vpsrld ymm1,ymm8,116989 vpxor ymm7,ymm7,ymm26990 vpslld ymm2,ymm8,216991 vpaddd ymm5,ymm5,YMMWORD[rbp]6992 vpxor ymm7,ymm7,ymm16993 6994 vpsrld ymm1,ymm8,256995 vpxor ymm7,ymm7,ymm26996 6997 vpslld ymm2,ymm8,76998 vpandn ymm0,ymm8,ymm106999 vpand ymm3,ymm8,ymm97000 7001 vpxor ymm7,ymm7,ymm17002 7003 vpsrld ymm11,ymm12,27004 vpxor ymm7,ymm7,ymm27005 7006 vpslld ymm1,ymm12,307007 vpxor ymm0,ymm0,ymm37008 vpxor ymm3,ymm13,ymm127009 7010 vpxor ymm11,ymm11,ymm17011 vpaddd ymm5,ymm5,ymm77012 7013 vpsrld ymm1,ymm12,137014 7015 vpslld ymm2,ymm12,197016 vpaddd ymm5,ymm5,ymm07017 vpand ymm4,ymm4,ymm37018 7019 vpxor ymm7,ymm11,ymm17020 7021 vpsrld ymm1,ymm12,227022 vpxor ymm7,ymm7,ymm27023 7024 vpslld ymm2,ymm12,107025 vpxor ymm11,ymm13,ymm47026 vpaddd ymm15,ymm15,ymm57027 7028 vpxor ymm7,ymm7,ymm17029 vpxor ymm7,ymm7,ymm27030 7031 vpaddd ymm11,ymm11,ymm57032 vpaddd ymm11,ymm11,ymm77033 vmovdqu ymm5,YMMWORD[((192-128))+rax]7034 vpaddd ymm6,ymm6,YMMWORD[((448-256-128))+rbx]7035 7036 vpsrld ymm7,ymm5,37037 vpsrld ymm1,ymm5,77038 vpslld ymm2,ymm5,257039 vpxor ymm7,ymm7,ymm17040 vpsrld ymm1,ymm5,187041 vpxor ymm7,ymm7,ymm27042 vpslld ymm2,ymm5,147043 vmovdqu ymm0,YMMWORD[((96-128))+rax]7044 vpsrld ymm4,ymm0,107045 7046 vpxor ymm7,ymm7,ymm17047 vpsrld ymm1,ymm0,177048 vpxor ymm7,ymm7,ymm27049 vpslld ymm2,ymm0,157050 vpaddd ymm6,ymm6,ymm77051 vpxor ymm7,ymm4,ymm17052 vpsrld ymm1,ymm0,197053 vpxor ymm7,ymm7,ymm27054 vpslld ymm2,ymm0,137055 vpxor ymm7,ymm7,ymm17056 vpxor ymm7,ymm7,ymm27057 vpaddd ymm6,ymm6,ymm77058 vpsrld ymm7,ymm15,67059 vpslld ymm2,ymm15,267060 vmovdqu YMMWORD[(160-128)+rax],ymm67061 vpaddd ymm6,ymm6,ymm107062 7063 vpsrld ymm1,ymm15,117064 vpxor ymm7,ymm7,ymm27065 vpslld ymm2,ymm15,217066 vpaddd ymm6,ymm6,YMMWORD[32+rbp]7067 vpxor ymm7,ymm7,ymm17068 7069 vpsrld ymm1,ymm15,257070 vpxor ymm7,ymm7,ymm27071 7072 vpslld ymm2,ymm15,77073 vpandn ymm0,ymm15,ymm97074 vpand ymm4,ymm15,ymm87075 7076 vpxor ymm7,ymm7,ymm17077 7078 vpsrld ymm10,ymm11,27079 vpxor ymm7,ymm7,ymm27080 7081 vpslld ymm1,ymm11,307082 vpxor ymm0,ymm0,ymm47083 vpxor ymm4,ymm12,ymm117084 7085 vpxor ymm10,ymm10,ymm17086 vpaddd ymm6,ymm6,ymm77087 7088 vpsrld ymm1,ymm11,137089 7090 vpslld ymm2,ymm11,197091 vpaddd ymm6,ymm6,ymm07092 vpand ymm3,ymm3,ymm47093 7094 vpxor ymm7,ymm10,ymm17095 7096 vpsrld ymm1,ymm11,227097 vpxor ymm7,ymm7,ymm27098 7099 vpslld ymm2,ymm11,107100 vpxor ymm10,ymm12,ymm37101 vpaddd ymm14,ymm14,ymm67102 7103 vpxor ymm7,ymm7,ymm17104 vpxor ymm7,ymm7,ymm27105 7106 vpaddd ymm10,ymm10,ymm67107 vpaddd ymm10,ymm10,ymm77108 vmovdqu ymm6,YMMWORD[((224-128))+rax]7109 vpaddd ymm5,ymm5,YMMWORD[((480-256-128))+rbx]7110 7111 vpsrld ymm7,ymm6,37112 vpsrld ymm1,ymm6,77113 vpslld ymm2,ymm6,257114 vpxor ymm7,ymm7,ymm17115 vpsrld ymm1,ymm6,187116 vpxor ymm7,ymm7,ymm27117 vpslld ymm2,ymm6,147118 vmovdqu ymm0,YMMWORD[((128-128))+rax]7119 vpsrld ymm3,ymm0,107120 7121 vpxor ymm7,ymm7,ymm17122 vpsrld ymm1,ymm0,177123 vpxor ymm7,ymm7,ymm27124 vpslld ymm2,ymm0,157125 vpaddd ymm5,ymm5,ymm77126 vpxor ymm7,ymm3,ymm17127 vpsrld ymm1,ymm0,197128 vpxor ymm7,ymm7,ymm27129 vpslld ymm2,ymm0,137130 vpxor ymm7,ymm7,ymm17131 vpxor ymm7,ymm7,ymm27132 vpaddd ymm5,ymm5,ymm77133 vpsrld ymm7,ymm14,67134 vpslld ymm2,ymm14,267135 vmovdqu YMMWORD[(192-128)+rax],ymm57136 vpaddd ymm5,ymm5,ymm97137 7138 vpsrld ymm1,ymm14,117139 vpxor ymm7,ymm7,ymm27140 vpslld ymm2,ymm14,217141 vpaddd ymm5,ymm5,YMMWORD[64+rbp]7142 vpxor ymm7,ymm7,ymm17143 7144 vpsrld ymm1,ymm14,257145 vpxor ymm7,ymm7,ymm27146 7147 vpslld ymm2,ymm14,77148 vpandn ymm0,ymm14,ymm87149 vpand ymm3,ymm14,ymm157150 7151 vpxor ymm7,ymm7,ymm17152 7153 vpsrld ymm9,ymm10,27154 vpxor ymm7,ymm7,ymm27155 7156 vpslld ymm1,ymm10,307157 vpxor ymm0,ymm0,ymm37158 vpxor ymm3,ymm11,ymm107159 7160 vpxor ymm9,ymm9,ymm17161 vpaddd ymm5,ymm5,ymm77162 7163 vpsrld ymm1,ymm10,137164 7165 vpslld ymm2,ymm10,197166 vpaddd ymm5,ymm5,ymm07167 vpand ymm4,ymm4,ymm37168 7169 vpxor ymm7,ymm9,ymm17170 7171 vpsrld ymm1,ymm10,227172 vpxor ymm7,ymm7,ymm27173 7174 vpslld ymm2,ymm10,107175 vpxor ymm9,ymm11,ymm47176 vpaddd ymm13,ymm13,ymm57177 7178 vpxor ymm7,ymm7,ymm17179 vpxor ymm7,ymm7,ymm27180 7181 vpaddd ymm9,ymm9,ymm57182 vpaddd ymm9,ymm9,ymm77183 vmovdqu ymm5,YMMWORD[((256-256-128))+rbx]7184 vpaddd ymm6,ymm6,YMMWORD[((0-128))+rax]7185 7186 vpsrld ymm7,ymm5,37187 vpsrld ymm1,ymm5,77188 vpslld ymm2,ymm5,257189 vpxor ymm7,ymm7,ymm17190 vpsrld ymm1,ymm5,187191 vpxor ymm7,ymm7,ymm27192 vpslld ymm2,ymm5,147193 vmovdqu ymm0,YMMWORD[((160-128))+rax]7194 vpsrld ymm4,ymm0,107195 7196 vpxor ymm7,ymm7,ymm17197 vpsrld ymm1,ymm0,177198 vpxor ymm7,ymm7,ymm27199 vpslld ymm2,ymm0,157200 vpaddd ymm6,ymm6,ymm77201 vpxor ymm7,ymm4,ymm17202 vpsrld ymm1,ymm0,197203 vpxor ymm7,ymm7,ymm27204 vpslld ymm2,ymm0,137205 vpxor ymm7,ymm7,ymm17206 vpxor ymm7,ymm7,ymm27207 vpaddd ymm6,ymm6,ymm77208 vpsrld ymm7,ymm13,67209 vpslld ymm2,ymm13,267210 vmovdqu YMMWORD[(224-128)+rax],ymm67211 vpaddd ymm6,ymm6,ymm87212 7213 vpsrld ymm1,ymm13,117214 vpxor ymm7,ymm7,ymm27215 vpslld ymm2,ymm13,217216 vpaddd ymm6,ymm6,YMMWORD[96+rbp]7217 vpxor ymm7,ymm7,ymm17218 7219 vpsrld ymm1,ymm13,257220 vpxor ymm7,ymm7,ymm27221 7222 vpslld ymm2,ymm13,77223 vpandn ymm0,ymm13,ymm157224 vpand ymm4,ymm13,ymm147225 7226 vpxor ymm7,ymm7,ymm17227 7228 vpsrld ymm8,ymm9,27229 vpxor ymm7,ymm7,ymm27230 7231 vpslld ymm1,ymm9,307232 vpxor ymm0,ymm0,ymm47233 vpxor ymm4,ymm10,ymm97234 7235 vpxor ymm8,ymm8,ymm17236 vpaddd ymm6,ymm6,ymm77237 7238 vpsrld ymm1,ymm9,137239 7240 vpslld ymm2,ymm9,197241 vpaddd ymm6,ymm6,ymm07242 vpand ymm3,ymm3,ymm47243 7244 vpxor ymm7,ymm8,ymm17245 7246 vpsrld ymm1,ymm9,227247 vpxor ymm7,ymm7,ymm27248 7249 vpslld ymm2,ymm9,107250 vpxor ymm8,ymm10,ymm37251 vpaddd ymm12,ymm12,ymm67252 7253 vpxor ymm7,ymm7,ymm17254 vpxor ymm7,ymm7,ymm27255 7256 vpaddd ymm8,ymm8,ymm67257 vpaddd ymm8,ymm8,ymm77258 add rbp,2567259 vmovdqu ymm6,YMMWORD[((288-256-128))+rbx]7260 vpaddd ymm5,ymm5,YMMWORD[((32-128))+rax]7261 7262 vpsrld ymm7,ymm6,37263 vpsrld ymm1,ymm6,77264 vpslld ymm2,ymm6,257265 vpxor ymm7,ymm7,ymm17266 vpsrld ymm1,ymm6,187267 vpxor ymm7,ymm7,ymm27268 vpslld ymm2,ymm6,147269 vmovdqu ymm0,YMMWORD[((192-128))+rax]7270 vpsrld ymm3,ymm0,107271 7272 vpxor ymm7,ymm7,ymm17273 vpsrld ymm1,ymm0,177274 vpxor ymm7,ymm7,ymm27275 vpslld ymm2,ymm0,157276 vpaddd ymm5,ymm5,ymm77277 vpxor ymm7,ymm3,ymm17278 vpsrld ymm1,ymm0,197279 vpxor ymm7,ymm7,ymm27280 vpslld ymm2,ymm0,137281 vpxor ymm7,ymm7,ymm17282 vpxor ymm7,ymm7,ymm27283 vpaddd ymm5,ymm5,ymm77284 vpsrld ymm7,ymm12,67285 vpslld ymm2,ymm12,267286 vmovdqu YMMWORD[(256-256-128)+rbx],ymm57287 vpaddd ymm5,ymm5,ymm157288 7289 vpsrld ymm1,ymm12,117290 vpxor ymm7,ymm7,ymm27291 vpslld ymm2,ymm12,217292 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp]7293 vpxor ymm7,ymm7,ymm17294 7295 vpsrld ymm1,ymm12,257296 vpxor ymm7,ymm7,ymm27297 7298 vpslld ymm2,ymm12,77299 vpandn ymm0,ymm12,ymm147300 vpand ymm3,ymm12,ymm137301 7302 vpxor ymm7,ymm7,ymm17303 7304 vpsrld ymm15,ymm8,27305 vpxor ymm7,ymm7,ymm27306 7307 vpslld ymm1,ymm8,307308 vpxor ymm0,ymm0,ymm37309 vpxor ymm3,ymm9,ymm87310 7311 vpxor ymm15,ymm15,ymm17312 vpaddd ymm5,ymm5,ymm77313 7314 vpsrld ymm1,ymm8,137315 7316 vpslld ymm2,ymm8,197317 vpaddd ymm5,ymm5,ymm07318 vpand ymm4,ymm4,ymm37319 7320 vpxor ymm7,ymm15,ymm17321 7322 vpsrld ymm1,ymm8,227323 vpxor ymm7,ymm7,ymm27324 7325 vpslld ymm2,ymm8,107326 vpxor ymm15,ymm9,ymm47327 vpaddd ymm11,ymm11,ymm57328 7329 vpxor ymm7,ymm7,ymm17330 vpxor ymm7,ymm7,ymm27331 7332 vpaddd ymm15,ymm15,ymm57333 vpaddd ymm15,ymm15,ymm77334 vmovdqu ymm5,YMMWORD[((320-256-128))+rbx]7335 vpaddd ymm6,ymm6,YMMWORD[((64-128))+rax]7336 7337 vpsrld ymm7,ymm5,37338 vpsrld ymm1,ymm5,77339 vpslld ymm2,ymm5,257340 vpxor ymm7,ymm7,ymm17341 vpsrld ymm1,ymm5,187342 vpxor ymm7,ymm7,ymm27343 vpslld ymm2,ymm5,147344 vmovdqu ymm0,YMMWORD[((224-128))+rax]7345 vpsrld ymm4,ymm0,107346 7347 vpxor ymm7,ymm7,ymm17348 vpsrld ymm1,ymm0,177349 vpxor ymm7,ymm7,ymm27350 vpslld ymm2,ymm0,157351 vpaddd ymm6,ymm6,ymm77352 vpxor ymm7,ymm4,ymm17353 vpsrld ymm1,ymm0,197354 vpxor ymm7,ymm7,ymm27355 vpslld ymm2,ymm0,137356 vpxor ymm7,ymm7,ymm17357 vpxor ymm7,ymm7,ymm27358 vpaddd ymm6,ymm6,ymm77359 vpsrld ymm7,ymm11,67360 vpslld ymm2,ymm11,267361 vmovdqu YMMWORD[(288-256-128)+rbx],ymm67362 vpaddd ymm6,ymm6,ymm147363 7364 vpsrld ymm1,ymm11,117365 vpxor ymm7,ymm7,ymm27366 vpslld ymm2,ymm11,217367 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp]7368 vpxor ymm7,ymm7,ymm17369 7370 vpsrld ymm1,ymm11,257371 vpxor ymm7,ymm7,ymm27372 7373 vpslld ymm2,ymm11,77374 vpandn ymm0,ymm11,ymm137375 vpand ymm4,ymm11,ymm127376 7377 vpxor ymm7,ymm7,ymm17378 7379 vpsrld ymm14,ymm15,27380 vpxor ymm7,ymm7,ymm27381 7382 vpslld ymm1,ymm15,307383 vpxor ymm0,ymm0,ymm47384 vpxor ymm4,ymm8,ymm157385 7386 vpxor ymm14,ymm14,ymm17387 vpaddd ymm6,ymm6,ymm77388 7389 vpsrld ymm1,ymm15,137390 7391 vpslld ymm2,ymm15,197392 vpaddd ymm6,ymm6,ymm07393 vpand ymm3,ymm3,ymm47394 7395 vpxor ymm7,ymm14,ymm17396 7397 vpsrld ymm1,ymm15,227398 vpxor ymm7,ymm7,ymm27399 7400 vpslld ymm2,ymm15,107401 vpxor ymm14,ymm8,ymm37402 vpaddd ymm10,ymm10,ymm67403 7404 vpxor ymm7,ymm7,ymm17405 vpxor ymm7,ymm7,ymm27406 7407 vpaddd ymm14,ymm14,ymm67408 vpaddd ymm14,ymm14,ymm77409 vmovdqu ymm6,YMMWORD[((352-256-128))+rbx]7410 vpaddd ymm5,ymm5,YMMWORD[((96-128))+rax]7411 7412 vpsrld ymm7,ymm6,37413 vpsrld ymm1,ymm6,77414 vpslld ymm2,ymm6,257415 vpxor ymm7,ymm7,ymm17416 vpsrld ymm1,ymm6,187417 vpxor ymm7,ymm7,ymm27418 vpslld ymm2,ymm6,147419 vmovdqu ymm0,YMMWORD[((256-256-128))+rbx]7420 vpsrld ymm3,ymm0,107421 7422 vpxor ymm7,ymm7,ymm17423 vpsrld ymm1,ymm0,177424 vpxor ymm7,ymm7,ymm27425 vpslld ymm2,ymm0,157426 vpaddd ymm5,ymm5,ymm77427 vpxor ymm7,ymm3,ymm17428 vpsrld ymm1,ymm0,197429 vpxor ymm7,ymm7,ymm27430 vpslld ymm2,ymm0,137431 vpxor ymm7,ymm7,ymm17432 vpxor ymm7,ymm7,ymm27433 vpaddd ymm5,ymm5,ymm77434 vpsrld ymm7,ymm10,67435 vpslld ymm2,ymm10,267436 vmovdqu YMMWORD[(320-256-128)+rbx],ymm57437 vpaddd ymm5,ymm5,ymm137438 7439 vpsrld ymm1,ymm10,117440 vpxor ymm7,ymm7,ymm27441 vpslld ymm2,ymm10,217442 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp]7443 vpxor ymm7,ymm7,ymm17444 7445 vpsrld ymm1,ymm10,257446 vpxor ymm7,ymm7,ymm27447 7448 vpslld ymm2,ymm10,77449 vpandn ymm0,ymm10,ymm127450 vpand ymm3,ymm10,ymm117451 7452 vpxor ymm7,ymm7,ymm17453 7454 vpsrld ymm13,ymm14,27455 vpxor ymm7,ymm7,ymm27456 7457 vpslld ymm1,ymm14,307458 vpxor ymm0,ymm0,ymm37459 vpxor ymm3,ymm15,ymm147460 7461 vpxor ymm13,ymm13,ymm17462 vpaddd ymm5,ymm5,ymm77463 7464 vpsrld ymm1,ymm14,137465 7466 vpslld ymm2,ymm14,197467 vpaddd ymm5,ymm5,ymm07468 vpand ymm4,ymm4,ymm37469 7470 vpxor ymm7,ymm13,ymm17471 7472 vpsrld ymm1,ymm14,227473 vpxor ymm7,ymm7,ymm27474 7475 vpslld ymm2,ymm14,107476 vpxor ymm13,ymm15,ymm47477 vpaddd ymm9,ymm9,ymm57478 7479 vpxor ymm7,ymm7,ymm17480 vpxor ymm7,ymm7,ymm27481 7482 vpaddd ymm13,ymm13,ymm57483 vpaddd ymm13,ymm13,ymm77484 vmovdqu ymm5,YMMWORD[((384-256-128))+rbx]7485 vpaddd ymm6,ymm6,YMMWORD[((128-128))+rax]7486 7487 vpsrld ymm7,ymm5,37488 vpsrld ymm1,ymm5,77489 vpslld ymm2,ymm5,257490 vpxor ymm7,ymm7,ymm17491 vpsrld ymm1,ymm5,187492 vpxor ymm7,ymm7,ymm27493 vpslld ymm2,ymm5,147494 vmovdqu ymm0,YMMWORD[((288-256-128))+rbx]7495 vpsrld ymm4,ymm0,107496 7497 vpxor ymm7,ymm7,ymm17498 vpsrld ymm1,ymm0,177499 vpxor ymm7,ymm7,ymm27500 vpslld ymm2,ymm0,157501 vpaddd ymm6,ymm6,ymm77502 vpxor ymm7,ymm4,ymm17503 vpsrld ymm1,ymm0,197504 vpxor ymm7,ymm7,ymm27505 vpslld ymm2,ymm0,137506 vpxor ymm7,ymm7,ymm17507 vpxor ymm7,ymm7,ymm27508 vpaddd ymm6,ymm6,ymm77509 vpsrld ymm7,ymm9,67510 vpslld ymm2,ymm9,267511 vmovdqu YMMWORD[(352-256-128)+rbx],ymm67512 vpaddd ymm6,ymm6,ymm127513 7514 vpsrld ymm1,ymm9,117515 vpxor ymm7,ymm7,ymm27516 vpslld ymm2,ymm9,217517 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp]7518 vpxor ymm7,ymm7,ymm17519 7520 vpsrld ymm1,ymm9,257521 vpxor ymm7,ymm7,ymm27522 7523 vpslld ymm2,ymm9,77524 vpandn ymm0,ymm9,ymm117525 vpand ymm4,ymm9,ymm107526 7527 vpxor ymm7,ymm7,ymm17528 7529 vpsrld ymm12,ymm13,27530 vpxor ymm7,ymm7,ymm27531 7532 vpslld ymm1,ymm13,307533 vpxor ymm0,ymm0,ymm47534 vpxor ymm4,ymm14,ymm137535 7536 vpxor ymm12,ymm12,ymm17537 vpaddd ymm6,ymm6,ymm77538 7539 vpsrld ymm1,ymm13,137540 7541 vpslld ymm2,ymm13,197542 vpaddd ymm6,ymm6,ymm07543 vpand ymm3,ymm3,ymm47544 7545 vpxor ymm7,ymm12,ymm17546 7547 vpsrld ymm1,ymm13,227548 vpxor ymm7,ymm7,ymm27549 7550 vpslld ymm2,ymm13,107551 vpxor ymm12,ymm14,ymm37552 vpaddd ymm8,ymm8,ymm67553 7554 vpxor ymm7,ymm7,ymm17555 vpxor ymm7,ymm7,ymm27556 7557 vpaddd ymm12,ymm12,ymm67558 vpaddd ymm12,ymm12,ymm77559 vmovdqu ymm6,YMMWORD[((416-256-128))+rbx]7560 vpaddd ymm5,ymm5,YMMWORD[((160-128))+rax]7561 7562 vpsrld ymm7,ymm6,37563 vpsrld ymm1,ymm6,77564 vpslld ymm2,ymm6,257565 vpxor ymm7,ymm7,ymm17566 vpsrld ymm1,ymm6,187567 vpxor ymm7,ymm7,ymm27568 vpslld ymm2,ymm6,147569 vmovdqu ymm0,YMMWORD[((320-256-128))+rbx]7570 vpsrld ymm3,ymm0,107571 7572 vpxor ymm7,ymm7,ymm17573 vpsrld ymm1,ymm0,177574 vpxor ymm7,ymm7,ymm27575 vpslld ymm2,ymm0,157576 vpaddd ymm5,ymm5,ymm77577 vpxor ymm7,ymm3,ymm17578 vpsrld ymm1,ymm0,197579 vpxor ymm7,ymm7,ymm27580 vpslld ymm2,ymm0,137581 vpxor ymm7,ymm7,ymm17582 vpxor ymm7,ymm7,ymm27583 vpaddd ymm5,ymm5,ymm77584 vpsrld ymm7,ymm8,67585 vpslld ymm2,ymm8,267586 vmovdqu YMMWORD[(384-256-128)+rbx],ymm57587 vpaddd ymm5,ymm5,ymm117588 7589 vpsrld ymm1,ymm8,117590 vpxor ymm7,ymm7,ymm27591 vpslld ymm2,ymm8,217592 vpaddd ymm5,ymm5,YMMWORD[rbp]7593 vpxor ymm7,ymm7,ymm17594 7595 vpsrld ymm1,ymm8,257596 vpxor ymm7,ymm7,ymm27597 7598 vpslld ymm2,ymm8,77599 vpandn ymm0,ymm8,ymm107600 vpand ymm3,ymm8,ymm97601 7602 vpxor ymm7,ymm7,ymm17603 7604 vpsrld ymm11,ymm12,27605 vpxor ymm7,ymm7,ymm27606 7607 vpslld ymm1,ymm12,307608 vpxor ymm0,ymm0,ymm37609 vpxor ymm3,ymm13,ymm127610 7611 vpxor ymm11,ymm11,ymm17612 vpaddd ymm5,ymm5,ymm77613 7614 vpsrld ymm1,ymm12,137615 7616 vpslld ymm2,ymm12,197617 vpaddd ymm5,ymm5,ymm07618 vpand ymm4,ymm4,ymm37619 7620 vpxor ymm7,ymm11,ymm17621 7622 vpsrld ymm1,ymm12,227623 vpxor ymm7,ymm7,ymm27624 7625 vpslld ymm2,ymm12,107626 vpxor ymm11,ymm13,ymm47627 vpaddd ymm15,ymm15,ymm57628 7629 vpxor ymm7,ymm7,ymm17630 vpxor ymm7,ymm7,ymm27631 7632 vpaddd ymm11,ymm11,ymm57633 vpaddd ymm11,ymm11,ymm77634 vmovdqu ymm5,YMMWORD[((448-256-128))+rbx]7635 vpaddd ymm6,ymm6,YMMWORD[((192-128))+rax]7636 7637 vpsrld ymm7,ymm5,37638 vpsrld ymm1,ymm5,77639 vpslld ymm2,ymm5,257640 vpxor ymm7,ymm7,ymm17641 vpsrld ymm1,ymm5,187642 vpxor ymm7,ymm7,ymm27643 vpslld ymm2,ymm5,147644 vmovdqu ymm0,YMMWORD[((352-256-128))+rbx]7645 vpsrld ymm4,ymm0,107646 7647 vpxor ymm7,ymm7,ymm17648 vpsrld ymm1,ymm0,177649 vpxor ymm7,ymm7,ymm27650 vpslld ymm2,ymm0,157651 vpaddd ymm6,ymm6,ymm77652 vpxor ymm7,ymm4,ymm17653 vpsrld ymm1,ymm0,197654 vpxor ymm7,ymm7,ymm27655 vpslld ymm2,ymm0,137656 vpxor ymm7,ymm7,ymm17657 vpxor ymm7,ymm7,ymm27658 vpaddd ymm6,ymm6,ymm77659 vpsrld ymm7,ymm15,67660 vpslld ymm2,ymm15,267661 vmovdqu YMMWORD[(416-256-128)+rbx],ymm67662 vpaddd ymm6,ymm6,ymm107663 7664 vpsrld ymm1,ymm15,117665 vpxor ymm7,ymm7,ymm27666 vpslld ymm2,ymm15,217667 vpaddd ymm6,ymm6,YMMWORD[32+rbp]7668 vpxor ymm7,ymm7,ymm17669 7670 vpsrld ymm1,ymm15,257671 vpxor ymm7,ymm7,ymm27672 7673 vpslld ymm2,ymm15,77674 vpandn ymm0,ymm15,ymm97675 vpand ymm4,ymm15,ymm87676 7677 vpxor ymm7,ymm7,ymm17678 7679 vpsrld ymm10,ymm11,27680 vpxor ymm7,ymm7,ymm27681 7682 vpslld ymm1,ymm11,307683 vpxor ymm0,ymm0,ymm47684 vpxor ymm4,ymm12,ymm117685 7686 vpxor ymm10,ymm10,ymm17687 vpaddd ymm6,ymm6,ymm77688 7689 vpsrld ymm1,ymm11,137690 7691 vpslld ymm2,ymm11,197692 vpaddd ymm6,ymm6,ymm07693 vpand ymm3,ymm3,ymm47694 7695 vpxor ymm7,ymm10,ymm17696 7697 vpsrld ymm1,ymm11,227698 vpxor ymm7,ymm7,ymm27699 7700 vpslld ymm2,ymm11,107701 vpxor ymm10,ymm12,ymm37702 vpaddd ymm14,ymm14,ymm67703 7704 vpxor ymm7,ymm7,ymm17705 vpxor ymm7,ymm7,ymm27706 7707 vpaddd ymm10,ymm10,ymm67708 vpaddd ymm10,ymm10,ymm77709 vmovdqu ymm6,YMMWORD[((480-256-128))+rbx]7710 vpaddd ymm5,ymm5,YMMWORD[((224-128))+rax]7711 7712 vpsrld ymm7,ymm6,37713 vpsrld ymm1,ymm6,77714 vpslld ymm2,ymm6,257715 vpxor ymm7,ymm7,ymm17716 vpsrld ymm1,ymm6,187717 vpxor ymm7,ymm7,ymm27718 vpslld ymm2,ymm6,147719 vmovdqu ymm0,YMMWORD[((384-256-128))+rbx]7720 vpsrld ymm3,ymm0,107721 7722 vpxor ymm7,ymm7,ymm17723 vpsrld ymm1,ymm0,177724 vpxor ymm7,ymm7,ymm27725 vpslld ymm2,ymm0,157726 vpaddd ymm5,ymm5,ymm77727 vpxor ymm7,ymm3,ymm17728 vpsrld ymm1,ymm0,197729 vpxor ymm7,ymm7,ymm27730 vpslld ymm2,ymm0,137731 vpxor ymm7,ymm7,ymm17732 vpxor ymm7,ymm7,ymm27733 vpaddd ymm5,ymm5,ymm77734 vpsrld ymm7,ymm14,67735 vpslld ymm2,ymm14,267736 vmovdqu YMMWORD[(448-256-128)+rbx],ymm57737 vpaddd ymm5,ymm5,ymm97738 7739 vpsrld ymm1,ymm14,117740 vpxor ymm7,ymm7,ymm27741 vpslld ymm2,ymm14,217742 vpaddd ymm5,ymm5,YMMWORD[64+rbp]7743 vpxor ymm7,ymm7,ymm17744 7745 vpsrld ymm1,ymm14,257746 vpxor ymm7,ymm7,ymm27747 7748 vpslld ymm2,ymm14,77749 vpandn ymm0,ymm14,ymm87750 vpand ymm3,ymm14,ymm157751 7752 vpxor ymm7,ymm7,ymm17753 7754 vpsrld ymm9,ymm10,27755 vpxor ymm7,ymm7,ymm27756 7757 vpslld ymm1,ymm10,307758 vpxor ymm0,ymm0,ymm37759 vpxor ymm3,ymm11,ymm107760 7761 vpxor ymm9,ymm9,ymm17762 vpaddd ymm5,ymm5,ymm77763 7764 vpsrld ymm1,ymm10,137765 7766 vpslld ymm2,ymm10,197767 vpaddd ymm5,ymm5,ymm07768 vpand ymm4,ymm4,ymm37769 7770 vpxor ymm7,ymm9,ymm17771 7772 vpsrld ymm1,ymm10,227773 vpxor ymm7,ymm7,ymm27774 7775 vpslld ymm2,ymm10,107776 vpxor ymm9,ymm11,ymm47777 vpaddd ymm13,ymm13,ymm57778 7779 vpxor ymm7,ymm7,ymm17780 vpxor ymm7,ymm7,ymm27781 7782 vpaddd ymm9,ymm9,ymm57783 vpaddd ymm9,ymm9,ymm77784 vmovdqu ymm5,YMMWORD[((0-128))+rax]7785 vpaddd ymm6,ymm6,YMMWORD[((256-256-128))+rbx]7786 7787 vpsrld ymm7,ymm5,37788 vpsrld ymm1,ymm5,77789 vpslld ymm2,ymm5,257790 vpxor ymm7,ymm7,ymm17791 vpsrld ymm1,ymm5,187792 vpxor ymm7,ymm7,ymm27793 vpslld ymm2,ymm5,147794 vmovdqu ymm0,YMMWORD[((416-256-128))+rbx]7795 vpsrld ymm4,ymm0,107796 7797 vpxor ymm7,ymm7,ymm17798 vpsrld ymm1,ymm0,177799 vpxor ymm7,ymm7,ymm27800 vpslld ymm2,ymm0,157801 vpaddd ymm6,ymm6,ymm77802 vpxor ymm7,ymm4,ymm17803 vpsrld ymm1,ymm0,197804 vpxor ymm7,ymm7,ymm27805 vpslld ymm2,ymm0,137806 vpxor ymm7,ymm7,ymm17807 vpxor ymm7,ymm7,ymm27808 vpaddd ymm6,ymm6,ymm77809 vpsrld ymm7,ymm13,67810 vpslld ymm2,ymm13,267811 vmovdqu YMMWORD[(480-256-128)+rbx],ymm67812 vpaddd ymm6,ymm6,ymm87813 7814 vpsrld ymm1,ymm13,117815 vpxor ymm7,ymm7,ymm27816 vpslld ymm2,ymm13,217817 vpaddd ymm6,ymm6,YMMWORD[96+rbp]7818 vpxor ymm7,ymm7,ymm17819 7820 vpsrld ymm1,ymm13,257821 vpxor ymm7,ymm7,ymm27822 7823 vpslld ymm2,ymm13,77824 vpandn ymm0,ymm13,ymm157825 vpand ymm4,ymm13,ymm147826 7827 vpxor ymm7,ymm7,ymm17828 7829 vpsrld ymm8,ymm9,27830 vpxor ymm7,ymm7,ymm27831 7832 vpslld ymm1,ymm9,307833 vpxor ymm0,ymm0,ymm47834 vpxor ymm4,ymm10,ymm97835 7836 vpxor ymm8,ymm8,ymm17837 vpaddd ymm6,ymm6,ymm77838 7839 vpsrld ymm1,ymm9,137840 7841 vpslld ymm2,ymm9,197842 vpaddd ymm6,ymm6,ymm07843 vpand ymm3,ymm3,ymm47844 7845 vpxor ymm7,ymm8,ymm17846 7847 vpsrld ymm1,ymm9,227848 vpxor ymm7,ymm7,ymm27849 7850 vpslld ymm2,ymm9,107851 vpxor ymm8,ymm10,ymm37852 vpaddd ymm12,ymm12,ymm67853 7854 vpxor ymm7,ymm7,ymm17855 vpxor ymm7,ymm7,ymm27856 7857 vpaddd ymm8,ymm8,ymm67858 vpaddd ymm8,ymm8,ymm77859 add rbp,2567860 dec ecx7861 jnz NEAR $L$oop_16_xx_avx27862 7863 mov ecx,17864 lea rbx,[512+rsp]7865 lea rbp,[((K256+128))]7866 cmp ecx,DWORD[rbx]7867 cmovge r12,rbp7868 cmp ecx,DWORD[4+rbx]7869 cmovge r13,rbp7870 cmp ecx,DWORD[8+rbx]7871 cmovge r14,rbp7872 cmp ecx,DWORD[12+rbx]7873 cmovge r15,rbp7874 cmp ecx,DWORD[16+rbx]7875 cmovge r8,rbp7876 cmp ecx,DWORD[20+rbx]7877 cmovge r9,rbp7878 cmp ecx,DWORD[24+rbx]7879 cmovge r10,rbp7880 cmp ecx,DWORD[28+rbx]7881 cmovge r11,rbp7882 vmovdqa ymm7,YMMWORD[rbx]7883 vpxor ymm0,ymm0,ymm07884 vmovdqa ymm6,ymm77885 vpcmpgtd ymm6,ymm6,ymm07886 vpaddd ymm7,ymm7,ymm67887 7888 vmovdqu ymm0,YMMWORD[((0-128))+rdi]7889 vpand ymm8,ymm8,ymm67890 vmovdqu ymm1,YMMWORD[((32-128))+rdi]7891 vpand ymm9,ymm9,ymm67892 vmovdqu ymm2,YMMWORD[((64-128))+rdi]7893 vpand ymm10,ymm10,ymm67894 vmovdqu ymm5,YMMWORD[((96-128))+rdi]7895 vpand ymm11,ymm11,ymm67896 vpaddd ymm8,ymm8,ymm07897 vmovdqu ymm0,YMMWORD[((128-128))+rdi]7898 vpand ymm12,ymm12,ymm67899 vpaddd ymm9,ymm9,ymm17900 vmovdqu ymm1,YMMWORD[((160-128))+rdi]7901 vpand ymm13,ymm13,ymm67902 vpaddd ymm10,ymm10,ymm27903 vmovdqu ymm2,YMMWORD[((192-128))+rdi]7904 vpand ymm14,ymm14,ymm67905 vpaddd ymm11,ymm11,ymm57906 vmovdqu ymm5,YMMWORD[((224-128))+rdi]7907 vpand ymm15,ymm15,ymm67908 vpaddd ymm12,ymm12,ymm07909 vpaddd ymm13,ymm13,ymm17910 vmovdqu YMMWORD[(0-128)+rdi],ymm87911 vpaddd ymm14,ymm14,ymm27912 vmovdqu YMMWORD[(32-128)+rdi],ymm97913 vpaddd ymm15,ymm15,ymm57914 vmovdqu YMMWORD[(64-128)+rdi],ymm107915 vmovdqu YMMWORD[(96-128)+rdi],ymm117916 vmovdqu YMMWORD[(128-128)+rdi],ymm127917 vmovdqu YMMWORD[(160-128)+rdi],ymm137918 vmovdqu YMMWORD[(192-128)+rdi],ymm147919 vmovdqu YMMWORD[(224-128)+rdi],ymm157920 7921 vmovdqu YMMWORD[rbx],ymm77922 lea rbx,[((256+128))+rsp]7923 vmovdqu ymm6,YMMWORD[$L$pbswap]7924 dec edx7925 jnz NEAR $L$oop_avx27926 7927 7928 7929 7930 7931 7932 7933 $L$done_avx2:7934 mov rax,QWORD[544+rsp]7935 7936 vzeroupper7937 movaps xmm6,XMMWORD[((-216))+rax]7938 movaps xmm7,XMMWORD[((-200))+rax]7939 movaps xmm8,XMMWORD[((-184))+rax]7940 movaps xmm9,XMMWORD[((-168))+rax]7941 movaps xmm10,XMMWORD[((-152))+rax]7942 movaps xmm11,XMMWORD[((-136))+rax]7943 movaps xmm12,XMMWORD[((-120))+rax]7944 movaps xmm13,XMMWORD[((-104))+rax]7945 movaps xmm14,XMMWORD[((-88))+rax]7946 movaps xmm15,XMMWORD[((-72))+rax]7947 mov r15,QWORD[((-48))+rax]7948 7949 mov r14,QWORD[((-40))+rax]7950 7951 mov r13,QWORD[((-32))+rax]7952 7953 mov r12,QWORD[((-24))+rax]7954 7955 mov rbp,QWORD[((-16))+rax]7956 7957 mov rbx,QWORD[((-8))+rax]7958 7959 lea rsp,[rax]7960 7961 $L$epilogue_avx2:7962 mov rdi,QWORD[8+rsp] ;WIN64 epilogue7963 mov rsi,QWORD[16+rsp]7964 DB 0F3h,0C3h ;repret7965 7966 $L$SEH_end_sha256_multi_block_avx2:7967 3207 ALIGN 256 7968 3208 K256: … … 8204 3444 DB 0F3h,0C3h ;repret 8205 3445 8206 8207 ALIGN 168208 avx2_handler:8209 push rsi8210 push rdi8211 push rbx8212 push rbp8213 push r128214 push r138215 push r148216 push r158217 pushfq8218 sub rsp,648219 8220 mov rax,QWORD[120+r8]8221 mov rbx,QWORD[248+r8]8222 8223 mov rsi,QWORD[8+r9]8224 mov r11,QWORD[56+r9]8225 8226 mov r10d,DWORD[r11]8227 lea r10,[r10*1+rsi]8228 cmp rbx,r108229 jb NEAR $L$in_prologue8230 8231 mov rax,QWORD[152+r8]8232 8233 mov r10d,DWORD[4+r11]8234 lea r10,[r10*1+rsi]8235 cmp rbx,r108236 jae NEAR $L$in_prologue8237 8238 mov rax,QWORD[544+r8]8239 8240 mov rbx,QWORD[((-8))+rax]8241 mov rbp,QWORD[((-16))+rax]8242 mov r12,QWORD[((-24))+rax]8243 mov r13,QWORD[((-32))+rax]8244 mov r14,QWORD[((-40))+rax]8245 mov r15,QWORD[((-48))+rax]8246 mov QWORD[144+r8],rbx8247 mov QWORD[160+r8],rbp8248 mov QWORD[216+r8],r128249 mov QWORD[224+r8],r138250 mov QWORD[232+r8],r148251 mov QWORD[240+r8],r158252 8253 lea rsi,[((-56-160))+rax]8254 lea rdi,[512+r8]8255 mov ecx,208256 DD 0xa548f3fc8257 8258 jmp NEAR $L$in_prologue8259 8260 3446 section .pdata rdata align=4 8261 3447 ALIGN 4 … … 8266 3452 DD $L$SEH_end_sha256_multi_block_shaext wrt ..imagebase 8267 3453 DD $L$SEH_info_sha256_multi_block_shaext wrt ..imagebase 8268 DD $L$SEH_begin_sha256_multi_block_avx wrt ..imagebase8269 DD $L$SEH_end_sha256_multi_block_avx wrt ..imagebase8270 DD $L$SEH_info_sha256_multi_block_avx wrt ..imagebase8271 DD $L$SEH_begin_sha256_multi_block_avx2 wrt ..imagebase8272 DD $L$SEH_end_sha256_multi_block_avx2 wrt ..imagebase8273 DD $L$SEH_info_sha256_multi_block_avx2 wrt ..imagebase8274 3454 section .xdata rdata align=8 8275 3455 ALIGN 8 … … 8282 3462 DD se_handler wrt ..imagebase 8283 3463 DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase 8284 $L$SEH_info_sha256_multi_block_avx:8285 DB 9,0,0,08286 DD se_handler wrt ..imagebase8287 DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase8288 $L$SEH_info_sha256_multi_block_avx2:8289 DB 9,0,0,08290 DD avx2_handler wrt ..imagebase8291 DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/sha256-x86_64.S
r94083 r95221 27 27 test r11d,536870912 28 28 jnz NEAR _shaext_shortcut 29 and r11d,29630 cmp r11d,29631 je NEAR $L$avx2_shortcut32 and r9d,107374182433 and r10d,26843596834 or r10d,r9d35 cmp r10d,134217779236 je NEAR $L$avx_shortcut37 29 test r10d,512 38 30 jnz NEAR $L$ssse3_shortcut … … 3158 3150 3159 3151 $L$SEH_end_sha256_block_data_order_ssse3: 3160 3161 ALIGN 643162 sha256_block_data_order_avx:3163 mov QWORD[8+rsp],rdi ;WIN64 prologue3164 mov QWORD[16+rsp],rsi3165 mov rax,rsp3166 $L$SEH_begin_sha256_block_data_order_avx:3167 mov rdi,rcx3168 mov rsi,rdx3169 mov rdx,r83170 3171 3172 3173 $L$avx_shortcut:3174 mov rax,rsp3175 3176 push rbx3177 3178 push rbp3179 3180 push r123181 3182 push r133183 3184 push r143185 3186 push r153187 3188 shl rdx,43189 sub rsp,1603190 lea rdx,[rdx*4+rsi]3191 and rsp,-643192 mov QWORD[((64+0))+rsp],rdi3193 mov QWORD[((64+8))+rsp],rsi3194 mov QWORD[((64+16))+rsp],rdx3195 mov QWORD[88+rsp],rax3196 3197 movaps XMMWORD[(64+32)+rsp],xmm63198 movaps XMMWORD[(64+48)+rsp],xmm73199 movaps XMMWORD[(64+64)+rsp],xmm83200 movaps XMMWORD[(64+80)+rsp],xmm93201 $L$prologue_avx:3202 3203 vzeroupper3204 mov eax,DWORD[rdi]3205 mov ebx,DWORD[4+rdi]3206 mov ecx,DWORD[8+rdi]3207 mov edx,DWORD[12+rdi]3208 mov r8d,DWORD[16+rdi]3209 mov r9d,DWORD[20+rdi]3210 mov r10d,DWORD[24+rdi]3211 mov r11d,DWORD[28+rdi]3212 vmovdqa xmm8,XMMWORD[((K256+512+32))]3213 vmovdqa xmm9,XMMWORD[((K256+512+64))]3214 jmp NEAR $L$loop_avx3215 ALIGN 163216 $L$loop_avx:3217 vmovdqa xmm7,XMMWORD[((K256+512))]3218 vmovdqu xmm0,XMMWORD[rsi]3219 vmovdqu xmm1,XMMWORD[16+rsi]3220 vmovdqu xmm2,XMMWORD[32+rsi]3221 vmovdqu xmm3,XMMWORD[48+rsi]3222 vpshufb xmm0,xmm0,xmm73223 lea rbp,[K256]3224 vpshufb xmm1,xmm1,xmm73225 vpshufb xmm2,xmm2,xmm73226 vpaddd xmm4,xmm0,XMMWORD[rbp]3227 vpshufb xmm3,xmm3,xmm73228 vpaddd xmm5,xmm1,XMMWORD[32+rbp]3229 vpaddd xmm6,xmm2,XMMWORD[64+rbp]3230 vpaddd xmm7,xmm3,XMMWORD[96+rbp]3231 vmovdqa XMMWORD[rsp],xmm43232 mov r14d,eax3233 vmovdqa XMMWORD[16+rsp],xmm53234 mov edi,ebx3235 vmovdqa XMMWORD[32+rsp],xmm63236 xor edi,ecx3237 vmovdqa XMMWORD[48+rsp],xmm73238 mov r13d,r8d3239 jmp NEAR $L$avx_00_473240 3241 ALIGN 163242 $L$avx_00_47:3243 sub rbp,-1283244 vpalignr xmm4,xmm1,xmm0,43245 shrd r13d,r13d,143246 mov eax,r14d3247 mov r12d,r9d3248 vpalignr xmm7,xmm3,xmm2,43249 shrd r14d,r14d,93250 xor r13d,r8d3251 xor r12d,r10d3252 vpsrld xmm6,xmm4,73253 shrd r13d,r13d,53254 xor r14d,eax3255 and r12d,r8d3256 vpaddd xmm0,xmm0,xmm73257 xor r13d,r8d3258 add r11d,DWORD[rsp]3259 mov r15d,eax3260 vpsrld xmm7,xmm4,33261 xor r12d,r10d3262 shrd r14d,r14d,113263 xor r15d,ebx3264 vpslld xmm5,xmm4,143265 add r11d,r12d3266 shrd r13d,r13d,63267 and edi,r15d3268 vpxor xmm4,xmm7,xmm63269 xor r14d,eax3270 add r11d,r13d3271 xor edi,ebx3272 vpshufd xmm7,xmm3,2503273 shrd r14d,r14d,23274 add edx,r11d3275 add r11d,edi3276 vpsrld xmm6,xmm6,113277 mov r13d,edx3278 add r14d,r11d3279 shrd r13d,r13d,143280 vpxor xmm4,xmm4,xmm53281 mov r11d,r14d3282 mov r12d,r8d3283 shrd r14d,r14d,93284 vpslld xmm5,xmm5,113285 xor r13d,edx3286 xor r12d,r9d3287 shrd r13d,r13d,53288 vpxor xmm4,xmm4,xmm63289 xor r14d,r11d3290 and r12d,edx3291 xor r13d,edx3292 vpsrld xmm6,xmm7,103293 add r10d,DWORD[4+rsp]3294 mov edi,r11d3295 xor r12d,r9d3296 vpxor xmm4,xmm4,xmm53297 shrd r14d,r14d,113298 xor edi,eax3299 add r10d,r12d3300 vpsrlq xmm7,xmm7,173301 shrd r13d,r13d,63302 and r15d,edi3303 xor r14d,r11d3304 vpaddd xmm0,xmm0,xmm43305 add r10d,r13d3306 xor r15d,eax3307 shrd r14d,r14d,23308 vpxor xmm6,xmm6,xmm73309 add ecx,r10d3310 add r10d,r15d3311 mov r13d,ecx3312 vpsrlq xmm7,xmm7,23313 add r14d,r10d3314 shrd r13d,r13d,143315 mov r10d,r14d3316 vpxor xmm6,xmm6,xmm73317 mov r12d,edx3318 shrd r14d,r14d,93319 xor r13d,ecx3320 vpshufb xmm6,xmm6,xmm83321 xor r12d,r8d3322 shrd r13d,r13d,53323 xor r14d,r10d3324 vpaddd xmm0,xmm0,xmm63325 and r12d,ecx3326 xor r13d,ecx3327 add r9d,DWORD[8+rsp]3328 vpshufd xmm7,xmm0,803329 mov r15d,r10d3330 xor r12d,r8d3331 shrd r14d,r14d,113332 vpsrld xmm6,xmm7,103333 xor r15d,r11d3334 add r9d,r12d3335 shrd r13d,r13d,63336 vpsrlq xmm7,xmm7,173337 and edi,r15d3338 xor r14d,r10d3339 add r9d,r13d3340 vpxor xmm6,xmm6,xmm73341 xor edi,r11d3342 shrd r14d,r14d,23343 add ebx,r9d3344 vpsrlq xmm7,xmm7,23345 add r9d,edi3346 mov r13d,ebx3347 add r14d,r9d3348 vpxor xmm6,xmm6,xmm73349 shrd r13d,r13d,143350 mov r9d,r14d3351 mov r12d,ecx3352 vpshufb xmm6,xmm6,xmm93353 shrd r14d,r14d,93354 xor r13d,ebx3355 xor r12d,edx3356 vpaddd xmm0,xmm0,xmm63357 shrd r13d,r13d,53358 xor r14d,r9d3359 and r12d,ebx3360 vpaddd xmm6,xmm0,XMMWORD[rbp]3361 xor r13d,ebx3362 add r8d,DWORD[12+rsp]3363 mov edi,r9d3364 xor r12d,edx3365 shrd r14d,r14d,113366 xor edi,r10d3367 add r8d,r12d3368 shrd r13d,r13d,63369 and r15d,edi3370 xor r14d,r9d3371 add r8d,r13d3372 xor r15d,r10d3373 shrd r14d,r14d,23374 add eax,r8d3375 add r8d,r15d3376 mov r13d,eax3377 add r14d,r8d3378 vmovdqa XMMWORD[rsp],xmm63379 vpalignr xmm4,xmm2,xmm1,43380 shrd r13d,r13d,143381 mov r8d,r14d3382 mov r12d,ebx3383 vpalignr xmm7,xmm0,xmm3,43384 shrd r14d,r14d,93385 xor r13d,eax3386 xor r12d,ecx3387 vpsrld xmm6,xmm4,73388 shrd r13d,r13d,53389 xor r14d,r8d3390 and r12d,eax3391 vpaddd xmm1,xmm1,xmm73392 xor r13d,eax3393 add edx,DWORD[16+rsp]3394 mov r15d,r8d3395 vpsrld xmm7,xmm4,33396 xor r12d,ecx3397 shrd r14d,r14d,113398 xor r15d,r9d3399 vpslld xmm5,xmm4,143400 add edx,r12d3401 shrd r13d,r13d,63402 and edi,r15d3403 vpxor xmm4,xmm7,xmm63404 xor r14d,r8d3405 add edx,r13d3406 xor edi,r9d3407 vpshufd xmm7,xmm0,2503408 shrd r14d,r14d,23409 add r11d,edx3410 add edx,edi3411 vpsrld xmm6,xmm6,113412 mov r13d,r11d3413 add r14d,edx3414 shrd r13d,r13d,143415 vpxor xmm4,xmm4,xmm53416 mov edx,r14d3417 mov r12d,eax3418 shrd r14d,r14d,93419 vpslld xmm5,xmm5,113420 xor r13d,r11d3421 xor r12d,ebx3422 shrd r13d,r13d,53423 vpxor xmm4,xmm4,xmm63424 xor r14d,edx3425 and r12d,r11d3426 xor r13d,r11d3427 vpsrld xmm6,xmm7,103428 add ecx,DWORD[20+rsp]3429 mov edi,edx3430 xor r12d,ebx3431 vpxor xmm4,xmm4,xmm53432 shrd r14d,r14d,113433 xor edi,r8d3434 add ecx,r12d3435 vpsrlq xmm7,xmm7,173436 shrd r13d,r13d,63437 and r15d,edi3438 xor r14d,edx3439 vpaddd xmm1,xmm1,xmm43440 add ecx,r13d3441 xor r15d,r8d3442 shrd r14d,r14d,23443 vpxor xmm6,xmm6,xmm73444 add r10d,ecx3445 add ecx,r15d3446 mov r13d,r10d3447 vpsrlq xmm7,xmm7,23448 add r14d,ecx3449 shrd r13d,r13d,143450 mov ecx,r14d3451 vpxor xmm6,xmm6,xmm73452 mov r12d,r11d3453 shrd r14d,r14d,93454 xor r13d,r10d3455 vpshufb xmm6,xmm6,xmm83456 xor r12d,eax3457 shrd r13d,r13d,53458 xor r14d,ecx3459 vpaddd xmm1,xmm1,xmm63460 and r12d,r10d3461 xor r13d,r10d3462 add ebx,DWORD[24+rsp]3463 vpshufd xmm7,xmm1,803464 mov r15d,ecx3465 xor r12d,eax3466 shrd r14d,r14d,113467 vpsrld xmm6,xmm7,103468 xor r15d,edx3469 add ebx,r12d3470 shrd r13d,r13d,63471 vpsrlq xmm7,xmm7,173472 and edi,r15d3473 xor r14d,ecx3474 add ebx,r13d3475 vpxor xmm6,xmm6,xmm73476 xor edi,edx3477 shrd r14d,r14d,23478 add r9d,ebx3479 vpsrlq xmm7,xmm7,23480 add ebx,edi3481 mov r13d,r9d3482 add r14d,ebx3483 vpxor xmm6,xmm6,xmm73484 shrd r13d,r13d,143485 mov ebx,r14d3486 mov r12d,r10d3487 vpshufb xmm6,xmm6,xmm93488 shrd r14d,r14d,93489 xor r13d,r9d3490 xor r12d,r11d3491 vpaddd xmm1,xmm1,xmm63492 shrd r13d,r13d,53493 xor r14d,ebx3494 and r12d,r9d3495 vpaddd xmm6,xmm1,XMMWORD[32+rbp]3496 xor r13d,r9d3497 add eax,DWORD[28+rsp]3498 mov edi,ebx3499 xor r12d,r11d3500 shrd r14d,r14d,113501 xor edi,ecx3502 add eax,r12d3503 shrd r13d,r13d,63504 and r15d,edi3505 xor r14d,ebx3506 add eax,r13d3507 xor r15d,ecx3508 shrd r14d,r14d,23509 add r8d,eax3510 add eax,r15d3511 mov r13d,r8d3512 add r14d,eax3513 vmovdqa XMMWORD[16+rsp],xmm63514 vpalignr xmm4,xmm3,xmm2,43515 shrd r13d,r13d,143516 mov eax,r14d3517 mov r12d,r9d3518 vpalignr xmm7,xmm1,xmm0,43519 shrd r14d,r14d,93520 xor r13d,r8d3521 xor r12d,r10d3522 vpsrld xmm6,xmm4,73523 shrd r13d,r13d,53524 xor r14d,eax3525 and r12d,r8d3526 vpaddd xmm2,xmm2,xmm73527 xor r13d,r8d3528 add r11d,DWORD[32+rsp]3529 mov r15d,eax3530 vpsrld xmm7,xmm4,33531 xor r12d,r10d3532 shrd r14d,r14d,113533 xor r15d,ebx3534 vpslld xmm5,xmm4,143535 add r11d,r12d3536 shrd r13d,r13d,63537 and edi,r15d3538 vpxor xmm4,xmm7,xmm63539 xor r14d,eax3540 add r11d,r13d3541 xor edi,ebx3542 vpshufd xmm7,xmm1,2503543 shrd r14d,r14d,23544 add edx,r11d3545 add r11d,edi3546 vpsrld xmm6,xmm6,113547 mov r13d,edx3548 add r14d,r11d3549 shrd r13d,r13d,143550 vpxor xmm4,xmm4,xmm53551 mov r11d,r14d3552 mov r12d,r8d3553 shrd r14d,r14d,93554 vpslld xmm5,xmm5,113555 xor r13d,edx3556 xor r12d,r9d3557 shrd r13d,r13d,53558 vpxor xmm4,xmm4,xmm63559 xor r14d,r11d3560 and r12d,edx3561 xor r13d,edx3562 vpsrld xmm6,xmm7,103563 add r10d,DWORD[36+rsp]3564 mov edi,r11d3565 xor r12d,r9d3566 vpxor xmm4,xmm4,xmm53567 shrd r14d,r14d,113568 xor edi,eax3569 add r10d,r12d3570 vpsrlq xmm7,xmm7,173571 shrd r13d,r13d,63572 and r15d,edi3573 xor r14d,r11d3574 vpaddd xmm2,xmm2,xmm43575 add r10d,r13d3576 xor r15d,eax3577 shrd r14d,r14d,23578 vpxor xmm6,xmm6,xmm73579 add ecx,r10d3580 add r10d,r15d3581 mov r13d,ecx3582 vpsrlq xmm7,xmm7,23583 add r14d,r10d3584 shrd r13d,r13d,143585 mov r10d,r14d3586 vpxor xmm6,xmm6,xmm73587 mov r12d,edx3588 shrd r14d,r14d,93589 xor r13d,ecx3590 vpshufb xmm6,xmm6,xmm83591 xor r12d,r8d3592 shrd r13d,r13d,53593 xor r14d,r10d3594 vpaddd xmm2,xmm2,xmm63595 and r12d,ecx3596 xor r13d,ecx3597 add r9d,DWORD[40+rsp]3598 vpshufd xmm7,xmm2,803599 mov r15d,r10d3600 xor r12d,r8d3601 shrd r14d,r14d,113602 vpsrld xmm6,xmm7,103603 xor r15d,r11d3604 add r9d,r12d3605 shrd r13d,r13d,63606 vpsrlq xmm7,xmm7,173607 and edi,r15d3608 xor r14d,r10d3609 add r9d,r13d3610 vpxor xmm6,xmm6,xmm73611 xor edi,r11d3612 shrd r14d,r14d,23613 add ebx,r9d3614 vpsrlq xmm7,xmm7,23615 add r9d,edi3616 mov r13d,ebx3617 add r14d,r9d3618 vpxor xmm6,xmm6,xmm73619 shrd r13d,r13d,143620 mov r9d,r14d3621 mov r12d,ecx3622 vpshufb xmm6,xmm6,xmm93623 shrd r14d,r14d,93624 xor r13d,ebx3625 xor r12d,edx3626 vpaddd xmm2,xmm2,xmm63627 shrd r13d,r13d,53628 xor r14d,r9d3629 and r12d,ebx3630 vpaddd xmm6,xmm2,XMMWORD[64+rbp]3631 xor r13d,ebx3632 add r8d,DWORD[44+rsp]3633 mov edi,r9d3634 xor r12d,edx3635 shrd r14d,r14d,113636 xor edi,r10d3637 add r8d,r12d3638 shrd r13d,r13d,63639 and r15d,edi3640 xor r14d,r9d3641 add r8d,r13d3642 xor r15d,r10d3643 shrd r14d,r14d,23644 add eax,r8d3645 add r8d,r15d3646 mov r13d,eax3647 add r14d,r8d3648 vmovdqa XMMWORD[32+rsp],xmm63649 vpalignr xmm4,xmm0,xmm3,43650 shrd r13d,r13d,143651 mov r8d,r14d3652 mov r12d,ebx3653 vpalignr xmm7,xmm2,xmm1,43654 shrd r14d,r14d,93655 xor r13d,eax3656 xor r12d,ecx3657 vpsrld xmm6,xmm4,73658 shrd r13d,r13d,53659 xor r14d,r8d3660 and r12d,eax3661 vpaddd xmm3,xmm3,xmm73662 xor r13d,eax3663 add edx,DWORD[48+rsp]3664 mov r15d,r8d3665 vpsrld xmm7,xmm4,33666 xor r12d,ecx3667 shrd r14d,r14d,113668 xor r15d,r9d3669 vpslld xmm5,xmm4,143670 add edx,r12d3671 shrd r13d,r13d,63672 and edi,r15d3673 vpxor xmm4,xmm7,xmm63674 xor r14d,r8d3675 add edx,r13d3676 xor edi,r9d3677 vpshufd xmm7,xmm2,2503678 shrd r14d,r14d,23679 add r11d,edx3680 add edx,edi3681 vpsrld xmm6,xmm6,113682 mov r13d,r11d3683 add r14d,edx3684 shrd r13d,r13d,143685 vpxor xmm4,xmm4,xmm53686 mov edx,r14d3687 mov r12d,eax3688 shrd r14d,r14d,93689 vpslld xmm5,xmm5,113690 xor r13d,r11d3691 xor r12d,ebx3692 shrd r13d,r13d,53693 vpxor xmm4,xmm4,xmm63694 xor r14d,edx3695 and r12d,r11d3696 xor r13d,r11d3697 vpsrld xmm6,xmm7,103698 add ecx,DWORD[52+rsp]3699 mov edi,edx3700 xor r12d,ebx3701 vpxor xmm4,xmm4,xmm53702 shrd r14d,r14d,113703 xor edi,r8d3704 add ecx,r12d3705 vpsrlq xmm7,xmm7,173706 shrd r13d,r13d,63707 and r15d,edi3708 xor r14d,edx3709 vpaddd xmm3,xmm3,xmm43710 add ecx,r13d3711 xor r15d,r8d3712 shrd r14d,r14d,23713 vpxor xmm6,xmm6,xmm73714 add r10d,ecx3715 add ecx,r15d3716 mov r13d,r10d3717 vpsrlq xmm7,xmm7,23718 add r14d,ecx3719 shrd r13d,r13d,143720 mov ecx,r14d3721 vpxor xmm6,xmm6,xmm73722 mov r12d,r11d3723 shrd r14d,r14d,93724 xor r13d,r10d3725 vpshufb xmm6,xmm6,xmm83726 xor r12d,eax3727 shrd r13d,r13d,53728 xor r14d,ecx3729 vpaddd xmm3,xmm3,xmm63730 and r12d,r10d3731 xor r13d,r10d3732 add ebx,DWORD[56+rsp]3733 vpshufd xmm7,xmm3,803734 mov r15d,ecx3735 xor r12d,eax3736 shrd r14d,r14d,113737 vpsrld xmm6,xmm7,103738 xor r15d,edx3739 add ebx,r12d3740 shrd r13d,r13d,63741 vpsrlq xmm7,xmm7,173742 and edi,r15d3743 xor r14d,ecx3744 add ebx,r13d3745 vpxor xmm6,xmm6,xmm73746 xor edi,edx3747 shrd r14d,r14d,23748 add r9d,ebx3749 vpsrlq xmm7,xmm7,23750 add ebx,edi3751 mov r13d,r9d3752 add r14d,ebx3753 vpxor xmm6,xmm6,xmm73754 shrd r13d,r13d,143755 mov ebx,r14d3756 mov r12d,r10d3757 vpshufb xmm6,xmm6,xmm93758 shrd r14d,r14d,93759 xor r13d,r9d3760 xor r12d,r11d3761 vpaddd xmm3,xmm3,xmm63762 shrd r13d,r13d,53763 xor r14d,ebx3764 and r12d,r9d3765 vpaddd xmm6,xmm3,XMMWORD[96+rbp]3766 xor r13d,r9d3767 add eax,DWORD[60+rsp]3768 mov edi,ebx3769 xor r12d,r11d3770 shrd r14d,r14d,113771 xor edi,ecx3772 add eax,r12d3773 shrd r13d,r13d,63774 and r15d,edi3775 xor r14d,ebx3776 add eax,r13d3777 xor r15d,ecx3778 shrd r14d,r14d,23779 add r8d,eax3780 add eax,r15d3781 mov r13d,r8d3782 add r14d,eax3783 vmovdqa XMMWORD[48+rsp],xmm63784 cmp BYTE[131+rbp],03785 jne NEAR $L$avx_00_473786 shrd r13d,r13d,143787 mov eax,r14d3788 mov r12d,r9d3789 shrd r14d,r14d,93790 xor r13d,r8d3791 xor r12d,r10d3792 shrd r13d,r13d,53793 xor r14d,eax3794 and r12d,r8d3795 xor r13d,r8d3796 add r11d,DWORD[rsp]3797 mov r15d,eax3798 xor r12d,r10d3799 shrd r14d,r14d,113800 xor r15d,ebx3801 add r11d,r12d3802 shrd r13d,r13d,63803 and edi,r15d3804 xor r14d,eax3805 add r11d,r13d3806 xor edi,ebx3807 shrd r14d,r14d,23808 add edx,r11d3809 add r11d,edi3810 mov r13d,edx3811 add r14d,r11d3812 shrd r13d,r13d,143813 mov r11d,r14d3814 mov r12d,r8d3815 shrd r14d,r14d,93816 xor r13d,edx3817 xor r12d,r9d3818 shrd r13d,r13d,53819 xor r14d,r11d3820 and r12d,edx3821 xor r13d,edx3822 add r10d,DWORD[4+rsp]3823 mov edi,r11d3824 xor r12d,r9d3825 shrd r14d,r14d,113826 xor edi,eax3827 add r10d,r12d3828 shrd r13d,r13d,63829 and r15d,edi3830 xor r14d,r11d3831 add r10d,r13d3832 xor r15d,eax3833 shrd r14d,r14d,23834 add ecx,r10d3835 add r10d,r15d3836 mov r13d,ecx3837 add r14d,r10d3838 shrd r13d,r13d,143839 mov r10d,r14d3840 mov r12d,edx3841 shrd r14d,r14d,93842 xor r13d,ecx3843 xor r12d,r8d3844 shrd r13d,r13d,53845 xor r14d,r10d3846 and r12d,ecx3847 xor r13d,ecx3848 add r9d,DWORD[8+rsp]3849 mov r15d,r10d3850 xor r12d,r8d3851 shrd r14d,r14d,113852 xor r15d,r11d3853 add r9d,r12d3854 shrd r13d,r13d,63855 and edi,r15d3856 xor r14d,r10d3857 add r9d,r13d3858 xor edi,r11d3859 shrd r14d,r14d,23860 add ebx,r9d3861 add r9d,edi3862 mov r13d,ebx3863 add r14d,r9d3864 shrd r13d,r13d,143865 mov r9d,r14d3866 mov r12d,ecx3867 shrd r14d,r14d,93868 xor r13d,ebx3869 xor r12d,edx3870 shrd r13d,r13d,53871 xor r14d,r9d3872 and r12d,ebx3873 xor r13d,ebx3874 add r8d,DWORD[12+rsp]3875 mov edi,r9d3876 xor r12d,edx3877 shrd r14d,r14d,113878 xor edi,r10d3879 add r8d,r12d3880 shrd r13d,r13d,63881 and r15d,edi3882 xor r14d,r9d3883 add r8d,r13d3884 xor r15d,r10d3885 shrd r14d,r14d,23886 add eax,r8d3887 add r8d,r15d3888 mov r13d,eax3889 add r14d,r8d3890 shrd r13d,r13d,143891 mov r8d,r14d3892 mov r12d,ebx3893 shrd r14d,r14d,93894 xor r13d,eax3895 xor r12d,ecx3896 shrd r13d,r13d,53897 xor r14d,r8d3898 and r12d,eax3899 xor r13d,eax3900 add edx,DWORD[16+rsp]3901 mov r15d,r8d3902 xor r12d,ecx3903 shrd r14d,r14d,113904 xor r15d,r9d3905 add edx,r12d3906 shrd r13d,r13d,63907 and edi,r15d3908 xor r14d,r8d3909 add edx,r13d3910 xor edi,r9d3911 shrd r14d,r14d,23912 add r11d,edx3913 add edx,edi3914 mov r13d,r11d3915 add r14d,edx3916 shrd r13d,r13d,143917 mov edx,r14d3918 mov r12d,eax3919 shrd r14d,r14d,93920 xor r13d,r11d3921 xor r12d,ebx3922 shrd r13d,r13d,53923 xor r14d,edx3924 and r12d,r11d3925 xor r13d,r11d3926 add ecx,DWORD[20+rsp]3927 mov edi,edx3928 xor r12d,ebx3929 shrd r14d,r14d,113930 xor edi,r8d3931 add ecx,r12d3932 shrd r13d,r13d,63933 and r15d,edi3934 xor r14d,edx3935 add ecx,r13d3936 xor r15d,r8d3937 shrd r14d,r14d,23938 add r10d,ecx3939 add ecx,r15d3940 mov r13d,r10d3941 add r14d,ecx3942 shrd r13d,r13d,143943 mov ecx,r14d3944 mov r12d,r11d3945 shrd r14d,r14d,93946 xor r13d,r10d3947 xor r12d,eax3948 shrd r13d,r13d,53949 xor r14d,ecx3950 and r12d,r10d3951 xor r13d,r10d3952 add ebx,DWORD[24+rsp]3953 mov r15d,ecx3954 xor r12d,eax3955 shrd r14d,r14d,113956 xor r15d,edx3957 add ebx,r12d3958 shrd r13d,r13d,63959 and edi,r15d3960 xor r14d,ecx3961 add ebx,r13d3962 xor edi,edx3963 shrd r14d,r14d,23964 add r9d,ebx3965 add ebx,edi3966 mov r13d,r9d3967 add r14d,ebx3968 shrd r13d,r13d,143969 mov ebx,r14d3970 mov r12d,r10d3971 shrd r14d,r14d,93972 xor r13d,r9d3973 xor r12d,r11d3974 shrd r13d,r13d,53975 xor r14d,ebx3976 and r12d,r9d3977 xor r13d,r9d3978 add eax,DWORD[28+rsp]3979 mov edi,ebx3980 xor r12d,r11d3981 shrd r14d,r14d,113982 xor edi,ecx3983 add eax,r12d3984 shrd r13d,r13d,63985 and r15d,edi3986 xor r14d,ebx3987 add eax,r13d3988 xor r15d,ecx3989 shrd r14d,r14d,23990 add r8d,eax3991 add eax,r15d3992 mov r13d,r8d3993 add r14d,eax3994 shrd r13d,r13d,143995 mov eax,r14d3996 mov r12d,r9d3997 shrd r14d,r14d,93998 xor r13d,r8d3999 xor r12d,r10d4000 shrd r13d,r13d,54001 xor r14d,eax4002 and r12d,r8d4003 xor r13d,r8d4004 add r11d,DWORD[32+rsp]4005 mov r15d,eax4006 xor r12d,r10d4007 shrd r14d,r14d,114008 xor r15d,ebx4009 add r11d,r12d4010 shrd r13d,r13d,64011 and edi,r15d4012 xor r14d,eax4013 add r11d,r13d4014 xor edi,ebx4015 shrd r14d,r14d,24016 add edx,r11d4017 add r11d,edi4018 mov r13d,edx4019 add r14d,r11d4020 shrd r13d,r13d,144021 mov r11d,r14d4022 mov r12d,r8d4023 shrd r14d,r14d,94024 xor r13d,edx4025 xor r12d,r9d4026 shrd r13d,r13d,54027 xor r14d,r11d4028 and r12d,edx4029 xor r13d,edx4030 add r10d,DWORD[36+rsp]4031 mov edi,r11d4032 xor r12d,r9d4033 shrd r14d,r14d,114034 xor edi,eax4035 add r10d,r12d4036 shrd r13d,r13d,64037 and r15d,edi4038 xor r14d,r11d4039 add r10d,r13d4040 xor r15d,eax4041 shrd r14d,r14d,24042 add ecx,r10d4043 add r10d,r15d4044 mov r13d,ecx4045 add r14d,r10d4046 shrd r13d,r13d,144047 mov r10d,r14d4048 mov r12d,edx4049 shrd r14d,r14d,94050 xor r13d,ecx4051 xor r12d,r8d4052 shrd r13d,r13d,54053 xor r14d,r10d4054 and r12d,ecx4055 xor r13d,ecx4056 add r9d,DWORD[40+rsp]4057 mov r15d,r10d4058 xor r12d,r8d4059 shrd r14d,r14d,114060 xor r15d,r11d4061 add r9d,r12d4062 shrd r13d,r13d,64063 and edi,r15d4064 xor r14d,r10d4065 add r9d,r13d4066 xor edi,r11d4067 shrd r14d,r14d,24068 add ebx,r9d4069 add r9d,edi4070 mov r13d,ebx4071 add r14d,r9d4072 shrd r13d,r13d,144073 mov r9d,r14d4074 mov r12d,ecx4075 shrd r14d,r14d,94076 xor r13d,ebx4077 xor r12d,edx4078 shrd r13d,r13d,54079 xor r14d,r9d4080 and r12d,ebx4081 xor r13d,ebx4082 add r8d,DWORD[44+rsp]4083 mov edi,r9d4084 xor r12d,edx4085 shrd r14d,r14d,114086 xor edi,r10d4087 add r8d,r12d4088 shrd r13d,r13d,64089 and r15d,edi4090 xor r14d,r9d4091 add r8d,r13d4092 xor r15d,r10d4093 shrd r14d,r14d,24094 add eax,r8d4095 add r8d,r15d4096 mov r13d,eax4097 add r14d,r8d4098 shrd r13d,r13d,144099 mov r8d,r14d4100 mov r12d,ebx4101 shrd r14d,r14d,94102 xor r13d,eax4103 xor r12d,ecx4104 shrd r13d,r13d,54105 xor r14d,r8d4106 and r12d,eax4107 xor r13d,eax4108 add edx,DWORD[48+rsp]4109 mov r15d,r8d4110 xor r12d,ecx4111 shrd r14d,r14d,114112 xor r15d,r9d4113 add edx,r12d4114 shrd r13d,r13d,64115 and edi,r15d4116 xor r14d,r8d4117 add edx,r13d4118 xor edi,r9d4119 shrd r14d,r14d,24120 add r11d,edx4121 add edx,edi4122 mov r13d,r11d4123 add r14d,edx4124 shrd r13d,r13d,144125 mov edx,r14d4126 mov r12d,eax4127 shrd r14d,r14d,94128 xor r13d,r11d4129 xor r12d,ebx4130 shrd r13d,r13d,54131 xor r14d,edx4132 and r12d,r11d4133 xor r13d,r11d4134 add ecx,DWORD[52+rsp]4135 mov edi,edx4136 xor r12d,ebx4137 shrd r14d,r14d,114138 xor edi,r8d4139 add ecx,r12d4140 shrd r13d,r13d,64141 and r15d,edi4142 xor r14d,edx4143 add ecx,r13d4144 xor r15d,r8d4145 shrd r14d,r14d,24146 add r10d,ecx4147 add ecx,r15d4148 mov r13d,r10d4149 add r14d,ecx4150 shrd r13d,r13d,144151 mov ecx,r14d4152 mov r12d,r11d4153 shrd r14d,r14d,94154 xor r13d,r10d4155 xor r12d,eax4156 shrd r13d,r13d,54157 xor r14d,ecx4158 and r12d,r10d4159 xor r13d,r10d4160 add ebx,DWORD[56+rsp]4161 mov r15d,ecx4162 xor r12d,eax4163 shrd r14d,r14d,114164 xor r15d,edx4165 add ebx,r12d4166 shrd r13d,r13d,64167 and edi,r15d4168 xor r14d,ecx4169 add ebx,r13d4170 xor edi,edx4171 shrd r14d,r14d,24172 add r9d,ebx4173 add ebx,edi4174 mov r13d,r9d4175 add r14d,ebx4176 shrd r13d,r13d,144177 mov ebx,r14d4178 mov r12d,r10d4179 shrd r14d,r14d,94180 xor r13d,r9d4181 xor r12d,r11d4182 shrd r13d,r13d,54183 xor r14d,ebx4184 and r12d,r9d4185 xor r13d,r9d4186 add eax,DWORD[60+rsp]4187 mov edi,ebx4188 xor r12d,r11d4189 shrd r14d,r14d,114190 xor edi,ecx4191 add eax,r12d4192 shrd r13d,r13d,64193 and r15d,edi4194 xor r14d,ebx4195 add eax,r13d4196 xor r15d,ecx4197 shrd r14d,r14d,24198 add r8d,eax4199 add eax,r15d4200 mov r13d,r8d4201 add r14d,eax4202 mov rdi,QWORD[((64+0))+rsp]4203 mov eax,r14d4204 4205 add eax,DWORD[rdi]4206 lea rsi,[64+rsi]4207 add ebx,DWORD[4+rdi]4208 add ecx,DWORD[8+rdi]4209 add edx,DWORD[12+rdi]4210 add r8d,DWORD[16+rdi]4211 add r9d,DWORD[20+rdi]4212 add r10d,DWORD[24+rdi]4213 add r11d,DWORD[28+rdi]4214 4215 cmp rsi,QWORD[((64+16))+rsp]4216 4217 mov DWORD[rdi],eax4218 mov DWORD[4+rdi],ebx4219 mov DWORD[8+rdi],ecx4220 mov DWORD[12+rdi],edx4221 mov DWORD[16+rdi],r8d4222 mov DWORD[20+rdi],r9d4223 mov DWORD[24+rdi],r10d4224 mov DWORD[28+rdi],r11d4225 jb NEAR $L$loop_avx4226 4227 mov rsi,QWORD[88+rsp]4228 4229 vzeroupper4230 movaps xmm6,XMMWORD[((64+32))+rsp]4231 movaps xmm7,XMMWORD[((64+48))+rsp]4232 movaps xmm8,XMMWORD[((64+64))+rsp]4233 movaps xmm9,XMMWORD[((64+80))+rsp]4234 mov r15,QWORD[((-48))+rsi]4235 4236 mov r14,QWORD[((-40))+rsi]4237 4238 mov r13,QWORD[((-32))+rsi]4239 4240 mov r12,QWORD[((-24))+rsi]4241 4242 mov rbp,QWORD[((-16))+rsi]4243 4244 mov rbx,QWORD[((-8))+rsi]4245 4246 lea rsp,[rsi]4247 4248 $L$epilogue_avx:4249 mov rdi,QWORD[8+rsp] ;WIN64 epilogue4250 mov rsi,QWORD[16+rsp]4251 DB 0F3h,0C3h ;repret4252 4253 $L$SEH_end_sha256_block_data_order_avx:4254 4255 ALIGN 644256 sha256_block_data_order_avx2:4257 mov QWORD[8+rsp],rdi ;WIN64 prologue4258 mov QWORD[16+rsp],rsi4259 mov rax,rsp4260 $L$SEH_begin_sha256_block_data_order_avx2:4261 mov rdi,rcx4262 mov rsi,rdx4263 mov rdx,r84264 4265 4266 4267 $L$avx2_shortcut:4268 mov rax,rsp4269 4270 push rbx4271 4272 push rbp4273 4274 push r124275 4276 push r134277 4278 push r144279 4280 push r154281 4282 sub rsp,6084283 shl rdx,44284 and rsp,-256*44285 lea rdx,[rdx*4+rsi]4286 add rsp,4484287 mov QWORD[((64+0))+rsp],rdi4288 mov QWORD[((64+8))+rsp],rsi4289 mov QWORD[((64+16))+rsp],rdx4290 mov QWORD[88+rsp],rax4291 4292 movaps XMMWORD[(64+32)+rsp],xmm64293 movaps XMMWORD[(64+48)+rsp],xmm74294 movaps XMMWORD[(64+64)+rsp],xmm84295 movaps XMMWORD[(64+80)+rsp],xmm94296 $L$prologue_avx2:4297 4298 vzeroupper4299 sub rsi,-16*44300 mov eax,DWORD[rdi]4301 mov r12,rsi4302 mov ebx,DWORD[4+rdi]4303 cmp rsi,rdx4304 mov ecx,DWORD[8+rdi]4305 cmove r12,rsp4306 mov edx,DWORD[12+rdi]4307 mov r8d,DWORD[16+rdi]4308 mov r9d,DWORD[20+rdi]4309 mov r10d,DWORD[24+rdi]4310 mov r11d,DWORD[28+rdi]4311 vmovdqa ymm8,YMMWORD[((K256+512+32))]4312 vmovdqa ymm9,YMMWORD[((K256+512+64))]4313 jmp NEAR $L$oop_avx24314 ALIGN 164315 $L$oop_avx2:4316 vmovdqa ymm7,YMMWORD[((K256+512))]4317 vmovdqu xmm0,XMMWORD[((-64+0))+rsi]4318 vmovdqu xmm1,XMMWORD[((-64+16))+rsi]4319 vmovdqu xmm2,XMMWORD[((-64+32))+rsi]4320 vmovdqu xmm3,XMMWORD[((-64+48))+rsi]4321 4322 vinserti128 ymm0,ymm0,XMMWORD[r12],14323 vinserti128 ymm1,ymm1,XMMWORD[16+r12],14324 vpshufb ymm0,ymm0,ymm74325 vinserti128 ymm2,ymm2,XMMWORD[32+r12],14326 vpshufb ymm1,ymm1,ymm74327 vinserti128 ymm3,ymm3,XMMWORD[48+r12],14328 4329 lea rbp,[K256]4330 vpshufb ymm2,ymm2,ymm74331 vpaddd ymm4,ymm0,YMMWORD[rbp]4332 vpshufb ymm3,ymm3,ymm74333 vpaddd ymm5,ymm1,YMMWORD[32+rbp]4334 vpaddd ymm6,ymm2,YMMWORD[64+rbp]4335 vpaddd ymm7,ymm3,YMMWORD[96+rbp]4336 vmovdqa YMMWORD[rsp],ymm44337 xor r14d,r14d4338 vmovdqa YMMWORD[32+rsp],ymm54339 lea rsp,[((-64))+rsp]4340 mov edi,ebx4341 vmovdqa YMMWORD[rsp],ymm64342 xor edi,ecx4343 vmovdqa YMMWORD[32+rsp],ymm74344 mov r12d,r9d4345 sub rbp,-16*2*44346 jmp NEAR $L$avx2_00_474347 4348 ALIGN 164349 $L$avx2_00_47:4350 lea rsp,[((-64))+rsp]4351 vpalignr ymm4,ymm1,ymm0,44352 add r11d,DWORD[((0+128))+rsp]4353 and r12d,r8d4354 rorx r13d,r8d,254355 vpalignr ymm7,ymm3,ymm2,44356 rorx r15d,r8d,114357 lea eax,[r14*1+rax]4358 lea r11d,[r12*1+r11]4359 vpsrld ymm6,ymm4,74360 andn r12d,r8d,r10d4361 xor r13d,r15d4362 rorx r14d,r8d,64363 vpaddd ymm0,ymm0,ymm74364 lea r11d,[r12*1+r11]4365 xor r13d,r14d4366 mov r15d,eax4367 vpsrld ymm7,ymm4,34368 rorx r12d,eax,224369 lea r11d,[r13*1+r11]4370 xor r15d,ebx4371 vpslld ymm5,ymm4,144372 rorx r14d,eax,134373 rorx r13d,eax,24374 lea edx,[r11*1+rdx]4375 vpxor ymm4,ymm7,ymm64376 and edi,r15d4377 xor r14d,r12d4378 xor edi,ebx4379 vpshufd ymm7,ymm3,2504380 xor r14d,r13d4381 lea r11d,[rdi*1+r11]4382 mov r12d,r8d4383 vpsrld ymm6,ymm6,114384 add r10d,DWORD[((4+128))+rsp]4385 and r12d,edx4386 rorx r13d,edx,254387 vpxor ymm4,ymm4,ymm54388 rorx edi,edx,114389 lea r11d,[r14*1+r11]4390 lea r10d,[r12*1+r10]4391 vpslld ymm5,ymm5,114392 andn r12d,edx,r9d4393 xor r13d,edi4394 rorx r14d,edx,64395 vpxor ymm4,ymm4,ymm64396 lea r10d,[r12*1+r10]4397 xor r13d,r14d4398 mov edi,r11d4399 vpsrld ymm6,ymm7,104400 rorx r12d,r11d,224401 lea r10d,[r13*1+r10]4402 xor edi,eax4403 vpxor ymm4,ymm4,ymm54404 rorx r14d,r11d,134405 rorx r13d,r11d,24406 lea ecx,[r10*1+rcx]4407 vpsrlq ymm7,ymm7,174408 and r15d,edi4409 xor r14d,r12d4410 xor r15d,eax4411 vpaddd ymm0,ymm0,ymm44412 xor r14d,r13d4413 lea r10d,[r15*1+r10]4414 mov r12d,edx4415 vpxor ymm6,ymm6,ymm74416 add r9d,DWORD[((8+128))+rsp]4417 and r12d,ecx4418 rorx r13d,ecx,254419 vpsrlq ymm7,ymm7,24420 rorx r15d,ecx,114421 lea r10d,[r14*1+r10]4422 lea r9d,[r12*1+r9]4423 vpxor ymm6,ymm6,ymm74424 andn r12d,ecx,r8d4425 xor r13d,r15d4426 rorx r14d,ecx,64427 vpshufb ymm6,ymm6,ymm84428 lea r9d,[r12*1+r9]4429 xor r13d,r14d4430 mov r15d,r10d4431 vpaddd ymm0,ymm0,ymm64432 rorx r12d,r10d,224433 lea r9d,[r13*1+r9]4434 xor r15d,r11d4435 vpshufd ymm7,ymm0,804436 rorx r14d,r10d,134437 rorx r13d,r10d,24438 lea ebx,[r9*1+rbx]4439 vpsrld ymm6,ymm7,104440 and edi,r15d4441 xor r14d,r12d4442 xor edi,r11d4443 vpsrlq ymm7,ymm7,174444 xor r14d,r13d4445 lea r9d,[rdi*1+r9]4446 mov r12d,ecx4447 vpxor ymm6,ymm6,ymm74448 add r8d,DWORD[((12+128))+rsp]4449 and r12d,ebx4450 rorx r13d,ebx,254451 vpsrlq ymm7,ymm7,24452 rorx edi,ebx,114453 lea r9d,[r14*1+r9]4454 lea r8d,[r12*1+r8]4455 vpxor ymm6,ymm6,ymm74456 andn r12d,ebx,edx4457 xor r13d,edi4458 rorx r14d,ebx,64459 vpshufb ymm6,ymm6,ymm94460 lea r8d,[r12*1+r8]4461 xor r13d,r14d4462 mov edi,r9d4463 vpaddd ymm0,ymm0,ymm64464 rorx r12d,r9d,224465 lea r8d,[r13*1+r8]4466 xor edi,r10d4467 vpaddd ymm6,ymm0,YMMWORD[rbp]4468 rorx r14d,r9d,134469 rorx r13d,r9d,24470 lea eax,[r8*1+rax]4471 and r15d,edi4472 xor r14d,r12d4473 xor r15d,r10d4474 xor r14d,r13d4475 lea r8d,[r15*1+r8]4476 mov r12d,ebx4477 vmovdqa YMMWORD[rsp],ymm64478 vpalignr ymm4,ymm2,ymm1,44479 add edx,DWORD[((32+128))+rsp]4480 and r12d,eax4481 rorx r13d,eax,254482 vpalignr ymm7,ymm0,ymm3,44483 rorx r15d,eax,114484 lea r8d,[r14*1+r8]4485 lea edx,[r12*1+rdx]4486 vpsrld ymm6,ymm4,74487 andn r12d,eax,ecx4488 xor r13d,r15d4489 rorx r14d,eax,64490 vpaddd ymm1,ymm1,ymm74491 lea edx,[r12*1+rdx]4492 xor r13d,r14d4493 mov r15d,r8d4494 vpsrld ymm7,ymm4,34495 rorx r12d,r8d,224496 lea edx,[r13*1+rdx]4497 xor r15d,r9d4498 vpslld ymm5,ymm4,144499 rorx r14d,r8d,134500 rorx r13d,r8d,24501 lea r11d,[rdx*1+r11]4502 vpxor ymm4,ymm7,ymm64503 and edi,r15d4504 xor r14d,r12d4505 xor edi,r9d4506 vpshufd ymm7,ymm0,2504507 xor r14d,r13d4508 lea edx,[rdi*1+rdx]4509 mov r12d,eax4510 vpsrld ymm6,ymm6,114511 add ecx,DWORD[((36+128))+rsp]4512 and r12d,r11d4513 rorx r13d,r11d,254514 vpxor ymm4,ymm4,ymm54515 rorx edi,r11d,114516 lea edx,[r14*1+rdx]4517 lea ecx,[r12*1+rcx]4518 vpslld ymm5,ymm5,114519 andn r12d,r11d,ebx4520 xor r13d,edi4521 rorx r14d,r11d,64522 vpxor ymm4,ymm4,ymm64523 lea ecx,[r12*1+rcx]4524 xor r13d,r14d4525 mov edi,edx4526 vpsrld ymm6,ymm7,104527 rorx r12d,edx,224528 lea ecx,[r13*1+rcx]4529 xor edi,r8d4530 vpxor ymm4,ymm4,ymm54531 rorx r14d,edx,134532 rorx r13d,edx,24533 lea r10d,[rcx*1+r10]4534 vpsrlq ymm7,ymm7,174535 and r15d,edi4536 xor r14d,r12d4537 xor r15d,r8d4538 vpaddd ymm1,ymm1,ymm44539 xor r14d,r13d4540 lea ecx,[r15*1+rcx]4541 mov r12d,r11d4542 vpxor ymm6,ymm6,ymm74543 add ebx,DWORD[((40+128))+rsp]4544 and r12d,r10d4545 rorx r13d,r10d,254546 vpsrlq ymm7,ymm7,24547 rorx r15d,r10d,114548 lea ecx,[r14*1+rcx]4549 lea ebx,[r12*1+rbx]4550 vpxor ymm6,ymm6,ymm74551 andn r12d,r10d,eax4552 xor r13d,r15d4553 rorx r14d,r10d,64554 vpshufb ymm6,ymm6,ymm84555 lea ebx,[r12*1+rbx]4556 xor r13d,r14d4557 mov r15d,ecx4558 vpaddd ymm1,ymm1,ymm64559 rorx r12d,ecx,224560 lea ebx,[r13*1+rbx]4561 xor r15d,edx4562 vpshufd ymm7,ymm1,804563 rorx r14d,ecx,134564 rorx r13d,ecx,24565 lea r9d,[rbx*1+r9]4566 vpsrld ymm6,ymm7,104567 and edi,r15d4568 xor r14d,r12d4569 xor edi,edx4570 vpsrlq ymm7,ymm7,174571 xor r14d,r13d4572 lea ebx,[rdi*1+rbx]4573 mov r12d,r10d4574 vpxor ymm6,ymm6,ymm74575 add eax,DWORD[((44+128))+rsp]4576 and r12d,r9d4577 rorx r13d,r9d,254578 vpsrlq ymm7,ymm7,24579 rorx edi,r9d,114580 lea ebx,[r14*1+rbx]4581 lea eax,[r12*1+rax]4582 vpxor ymm6,ymm6,ymm74583 andn r12d,r9d,r11d4584 xor r13d,edi4585 rorx r14d,r9d,64586 vpshufb ymm6,ymm6,ymm94587 lea eax,[r12*1+rax]4588 xor r13d,r14d4589 mov edi,ebx4590 vpaddd ymm1,ymm1,ymm64591 rorx r12d,ebx,224592 lea eax,[r13*1+rax]4593 xor edi,ecx4594 vpaddd ymm6,ymm1,YMMWORD[32+rbp]4595 rorx r14d,ebx,134596 rorx r13d,ebx,24597 lea r8d,[rax*1+r8]4598 and r15d,edi4599 xor r14d,r12d4600 xor r15d,ecx4601 xor r14d,r13d4602 lea eax,[r15*1+rax]4603 mov r12d,r9d4604 vmovdqa YMMWORD[32+rsp],ymm64605 lea rsp,[((-64))+rsp]4606 vpalignr ymm4,ymm3,ymm2,44607 add r11d,DWORD[((0+128))+rsp]4608 and r12d,r8d4609 rorx r13d,r8d,254610 vpalignr ymm7,ymm1,ymm0,44611 rorx r15d,r8d,114612 lea eax,[r14*1+rax]4613 lea r11d,[r12*1+r11]4614 vpsrld ymm6,ymm4,74615 andn r12d,r8d,r10d4616 xor r13d,r15d4617 rorx r14d,r8d,64618 vpaddd ymm2,ymm2,ymm74619 lea r11d,[r12*1+r11]4620 xor r13d,r14d4621 mov r15d,eax4622 vpsrld ymm7,ymm4,34623 rorx r12d,eax,224624 lea r11d,[r13*1+r11]4625 xor r15d,ebx4626 vpslld ymm5,ymm4,144627 rorx r14d,eax,134628 rorx r13d,eax,24629 lea edx,[r11*1+rdx]4630 vpxor ymm4,ymm7,ymm64631 and edi,r15d4632 xor r14d,r12d4633 xor edi,ebx4634 vpshufd ymm7,ymm1,2504635 xor r14d,r13d4636 lea r11d,[rdi*1+r11]4637 mov r12d,r8d4638 vpsrld ymm6,ymm6,114639 add r10d,DWORD[((4+128))+rsp]4640 and r12d,edx4641 rorx r13d,edx,254642 vpxor ymm4,ymm4,ymm54643 rorx edi,edx,114644 lea r11d,[r14*1+r11]4645 lea r10d,[r12*1+r10]4646 vpslld ymm5,ymm5,114647 andn r12d,edx,r9d4648 xor r13d,edi4649 rorx r14d,edx,64650 vpxor ymm4,ymm4,ymm64651 lea r10d,[r12*1+r10]4652 xor r13d,r14d4653 mov edi,r11d4654 vpsrld ymm6,ymm7,104655 rorx r12d,r11d,224656 lea r10d,[r13*1+r10]4657 xor edi,eax4658 vpxor ymm4,ymm4,ymm54659 rorx r14d,r11d,134660 rorx r13d,r11d,24661 lea ecx,[r10*1+rcx]4662 vpsrlq ymm7,ymm7,174663 and r15d,edi4664 xor r14d,r12d4665 xor r15d,eax4666 vpaddd ymm2,ymm2,ymm44667 xor r14d,r13d4668 lea r10d,[r15*1+r10]4669 mov r12d,edx4670 vpxor ymm6,ymm6,ymm74671 add r9d,DWORD[((8+128))+rsp]4672 and r12d,ecx4673 rorx r13d,ecx,254674 vpsrlq ymm7,ymm7,24675 rorx r15d,ecx,114676 lea r10d,[r14*1+r10]4677 lea r9d,[r12*1+r9]4678 vpxor ymm6,ymm6,ymm74679 andn r12d,ecx,r8d4680 xor r13d,r15d4681 rorx r14d,ecx,64682 vpshufb ymm6,ymm6,ymm84683 lea r9d,[r12*1+r9]4684 xor r13d,r14d4685 mov r15d,r10d4686 vpaddd ymm2,ymm2,ymm64687 rorx r12d,r10d,224688 lea r9d,[r13*1+r9]4689 xor r15d,r11d4690 vpshufd ymm7,ymm2,804691 rorx r14d,r10d,134692 rorx r13d,r10d,24693 lea ebx,[r9*1+rbx]4694 vpsrld ymm6,ymm7,104695 and edi,r15d4696 xor r14d,r12d4697 xor edi,r11d4698 vpsrlq ymm7,ymm7,174699 xor r14d,r13d4700 lea r9d,[rdi*1+r9]4701 mov r12d,ecx4702 vpxor ymm6,ymm6,ymm74703 add r8d,DWORD[((12+128))+rsp]4704 and r12d,ebx4705 rorx r13d,ebx,254706 vpsrlq ymm7,ymm7,24707 rorx edi,ebx,114708 lea r9d,[r14*1+r9]4709 lea r8d,[r12*1+r8]4710 vpxor ymm6,ymm6,ymm74711 andn r12d,ebx,edx4712 xor r13d,edi4713 rorx r14d,ebx,64714 vpshufb ymm6,ymm6,ymm94715 lea r8d,[r12*1+r8]4716 xor r13d,r14d4717 mov edi,r9d4718 vpaddd ymm2,ymm2,ymm64719 rorx r12d,r9d,224720 lea r8d,[r13*1+r8]4721 xor edi,r10d4722 vpaddd ymm6,ymm2,YMMWORD[64+rbp]4723 rorx r14d,r9d,134724 rorx r13d,r9d,24725 lea eax,[r8*1+rax]4726 and r15d,edi4727 xor r14d,r12d4728 xor r15d,r10d4729 xor r14d,r13d4730 lea r8d,[r15*1+r8]4731 mov r12d,ebx4732 vmovdqa YMMWORD[rsp],ymm64733 vpalignr ymm4,ymm0,ymm3,44734 add edx,DWORD[((32+128))+rsp]4735 and r12d,eax4736 rorx r13d,eax,254737 vpalignr ymm7,ymm2,ymm1,44738 rorx r15d,eax,114739 lea r8d,[r14*1+r8]4740 lea edx,[r12*1+rdx]4741 vpsrld ymm6,ymm4,74742 andn r12d,eax,ecx4743 xor r13d,r15d4744 rorx r14d,eax,64745 vpaddd ymm3,ymm3,ymm74746 lea edx,[r12*1+rdx]4747 xor r13d,r14d4748 mov r15d,r8d4749 vpsrld ymm7,ymm4,34750 rorx r12d,r8d,224751 lea edx,[r13*1+rdx]4752 xor r15d,r9d4753 vpslld ymm5,ymm4,144754 rorx r14d,r8d,134755 rorx r13d,r8d,24756 lea r11d,[rdx*1+r11]4757 vpxor ymm4,ymm7,ymm64758 and edi,r15d4759 xor r14d,r12d4760 xor edi,r9d4761 vpshufd ymm7,ymm2,2504762 xor r14d,r13d4763 lea edx,[rdi*1+rdx]4764 mov r12d,eax4765 vpsrld ymm6,ymm6,114766 add ecx,DWORD[((36+128))+rsp]4767 and r12d,r11d4768 rorx r13d,r11d,254769 vpxor ymm4,ymm4,ymm54770 rorx edi,r11d,114771 lea edx,[r14*1+rdx]4772 lea ecx,[r12*1+rcx]4773 vpslld ymm5,ymm5,114774 andn r12d,r11d,ebx4775 xor r13d,edi4776 rorx r14d,r11d,64777 vpxor ymm4,ymm4,ymm64778 lea ecx,[r12*1+rcx]4779 xor r13d,r14d4780 mov edi,edx4781 vpsrld ymm6,ymm7,104782 rorx r12d,edx,224783 lea ecx,[r13*1+rcx]4784 xor edi,r8d4785 vpxor ymm4,ymm4,ymm54786 rorx r14d,edx,134787 rorx r13d,edx,24788 lea r10d,[rcx*1+r10]4789 vpsrlq ymm7,ymm7,174790 and r15d,edi4791 xor r14d,r12d4792 xor r15d,r8d4793 vpaddd ymm3,ymm3,ymm44794 xor r14d,r13d4795 lea ecx,[r15*1+rcx]4796 mov r12d,r11d4797 vpxor ymm6,ymm6,ymm74798 add ebx,DWORD[((40+128))+rsp]4799 and r12d,r10d4800 rorx r13d,r10d,254801 vpsrlq ymm7,ymm7,24802 rorx r15d,r10d,114803 lea ecx,[r14*1+rcx]4804 lea ebx,[r12*1+rbx]4805 vpxor ymm6,ymm6,ymm74806 andn r12d,r10d,eax4807 xor r13d,r15d4808 rorx r14d,r10d,64809 vpshufb ymm6,ymm6,ymm84810 lea ebx,[r12*1+rbx]4811 xor r13d,r14d4812 mov r15d,ecx4813 vpaddd ymm3,ymm3,ymm64814 rorx r12d,ecx,224815 lea ebx,[r13*1+rbx]4816 xor r15d,edx4817 vpshufd ymm7,ymm3,804818 rorx r14d,ecx,134819 rorx r13d,ecx,24820 lea r9d,[rbx*1+r9]4821 vpsrld ymm6,ymm7,104822 and edi,r15d4823 xor r14d,r12d4824 xor edi,edx4825 vpsrlq ymm7,ymm7,174826 xor r14d,r13d4827 lea ebx,[rdi*1+rbx]4828 mov r12d,r10d4829 vpxor ymm6,ymm6,ymm74830 add eax,DWORD[((44+128))+rsp]4831 and r12d,r9d4832 rorx r13d,r9d,254833 vpsrlq ymm7,ymm7,24834 rorx edi,r9d,114835 lea ebx,[r14*1+rbx]4836 lea eax,[r12*1+rax]4837 vpxor ymm6,ymm6,ymm74838 andn r12d,r9d,r11d4839 xor r13d,edi4840 rorx r14d,r9d,64841 vpshufb ymm6,ymm6,ymm94842 lea eax,[r12*1+rax]4843 xor r13d,r14d4844 mov edi,ebx4845 vpaddd ymm3,ymm3,ymm64846 rorx r12d,ebx,224847 lea eax,[r13*1+rax]4848 xor edi,ecx4849 vpaddd ymm6,ymm3,YMMWORD[96+rbp]4850 rorx r14d,ebx,134851 rorx r13d,ebx,24852 lea r8d,[rax*1+r8]4853 and r15d,edi4854 xor r14d,r12d4855 xor r15d,ecx4856 xor r14d,r13d4857 lea eax,[r15*1+rax]4858 mov r12d,r9d4859 vmovdqa YMMWORD[32+rsp],ymm64860 lea rbp,[128+rbp]4861 cmp BYTE[3+rbp],04862 jne NEAR $L$avx2_00_474863 add r11d,DWORD[((0+64))+rsp]4864 and r12d,r8d4865 rorx r13d,r8d,254866 rorx r15d,r8d,114867 lea eax,[r14*1+rax]4868 lea r11d,[r12*1+r11]4869 andn r12d,r8d,r10d4870 xor r13d,r15d4871 rorx r14d,r8d,64872 lea r11d,[r12*1+r11]4873 xor r13d,r14d4874 mov r15d,eax4875 rorx r12d,eax,224876 lea r11d,[r13*1+r11]4877 xor r15d,ebx4878 rorx r14d,eax,134879 rorx r13d,eax,24880 lea edx,[r11*1+rdx]4881 and edi,r15d4882 xor r14d,r12d4883 xor edi,ebx4884 xor r14d,r13d4885 lea r11d,[rdi*1+r11]4886 mov r12d,r8d4887 add r10d,DWORD[((4+64))+rsp]4888 and r12d,edx4889 rorx r13d,edx,254890 rorx edi,edx,114891 lea r11d,[r14*1+r11]4892 lea r10d,[r12*1+r10]4893 andn r12d,edx,r9d4894 xor r13d,edi4895 rorx r14d,edx,64896 lea r10d,[r12*1+r10]4897 xor r13d,r14d4898 mov edi,r11d4899 rorx r12d,r11d,224900 lea r10d,[r13*1+r10]4901 xor edi,eax4902 rorx r14d,r11d,134903 rorx r13d,r11d,24904 lea ecx,[r10*1+rcx]4905 and r15d,edi4906 xor r14d,r12d4907 xor r15d,eax4908 xor r14d,r13d4909 lea r10d,[r15*1+r10]4910 mov r12d,edx4911 add r9d,DWORD[((8+64))+rsp]4912 and r12d,ecx4913 rorx r13d,ecx,254914 rorx r15d,ecx,114915 lea r10d,[r14*1+r10]4916 lea r9d,[r12*1+r9]4917 andn r12d,ecx,r8d4918 xor r13d,r15d4919 rorx r14d,ecx,64920 lea r9d,[r12*1+r9]4921 xor r13d,r14d4922 mov r15d,r10d4923 rorx r12d,r10d,224924 lea r9d,[r13*1+r9]4925 xor r15d,r11d4926 rorx r14d,r10d,134927 rorx r13d,r10d,24928 lea ebx,[r9*1+rbx]4929 and edi,r15d4930 xor r14d,r12d4931 xor edi,r11d4932 xor r14d,r13d4933 lea r9d,[rdi*1+r9]4934 mov r12d,ecx4935 add r8d,DWORD[((12+64))+rsp]4936 and r12d,ebx4937 rorx r13d,ebx,254938 rorx edi,ebx,114939 lea r9d,[r14*1+r9]4940 lea r8d,[r12*1+r8]4941 andn r12d,ebx,edx4942 xor r13d,edi4943 rorx r14d,ebx,64944 lea r8d,[r12*1+r8]4945 xor r13d,r14d4946 mov edi,r9d4947 rorx r12d,r9d,224948 lea r8d,[r13*1+r8]4949 xor edi,r10d4950 rorx r14d,r9d,134951 rorx r13d,r9d,24952 lea eax,[r8*1+rax]4953 and r15d,edi4954 xor r14d,r12d4955 xor r15d,r10d4956 xor r14d,r13d4957 lea r8d,[r15*1+r8]4958 mov r12d,ebx4959 add edx,DWORD[((32+64))+rsp]4960 and r12d,eax4961 rorx r13d,eax,254962 rorx r15d,eax,114963 lea r8d,[r14*1+r8]4964 lea edx,[r12*1+rdx]4965 andn r12d,eax,ecx4966 xor r13d,r15d4967 rorx r14d,eax,64968 lea edx,[r12*1+rdx]4969 xor r13d,r14d4970 mov r15d,r8d4971 rorx r12d,r8d,224972 lea edx,[r13*1+rdx]4973 xor r15d,r9d4974 rorx r14d,r8d,134975 rorx r13d,r8d,24976 lea r11d,[rdx*1+r11]4977 and edi,r15d4978 xor r14d,r12d4979 xor edi,r9d4980 xor r14d,r13d4981 lea edx,[rdi*1+rdx]4982 mov r12d,eax4983 add ecx,DWORD[((36+64))+rsp]4984 and r12d,r11d4985 rorx r13d,r11d,254986 rorx edi,r11d,114987 lea edx,[r14*1+rdx]4988 lea ecx,[r12*1+rcx]4989 andn r12d,r11d,ebx4990 xor r13d,edi4991 rorx r14d,r11d,64992 lea ecx,[r12*1+rcx]4993 xor r13d,r14d4994 mov edi,edx4995 rorx r12d,edx,224996 lea ecx,[r13*1+rcx]4997 xor edi,r8d4998 rorx r14d,edx,134999 rorx r13d,edx,25000 lea r10d,[rcx*1+r10]5001 and r15d,edi5002 xor r14d,r12d5003 xor r15d,r8d5004 xor r14d,r13d5005 lea ecx,[r15*1+rcx]5006 mov r12d,r11d5007 add ebx,DWORD[((40+64))+rsp]5008 and r12d,r10d5009 rorx r13d,r10d,255010 rorx r15d,r10d,115011 lea ecx,[r14*1+rcx]5012 lea ebx,[r12*1+rbx]5013 andn r12d,r10d,eax5014 xor r13d,r15d5015 rorx r14d,r10d,65016 lea ebx,[r12*1+rbx]5017 xor r13d,r14d5018 mov r15d,ecx5019 rorx r12d,ecx,225020 lea ebx,[r13*1+rbx]5021 xor r15d,edx5022 rorx r14d,ecx,135023 rorx r13d,ecx,25024 lea r9d,[rbx*1+r9]5025 and edi,r15d5026 xor r14d,r12d5027 xor edi,edx5028 xor r14d,r13d5029 lea ebx,[rdi*1+rbx]5030 mov r12d,r10d5031 add eax,DWORD[((44+64))+rsp]5032 and r12d,r9d5033 rorx r13d,r9d,255034 rorx edi,r9d,115035 lea ebx,[r14*1+rbx]5036 lea eax,[r12*1+rax]5037 andn r12d,r9d,r11d5038 xor r13d,edi5039 rorx r14d,r9d,65040 lea eax,[r12*1+rax]5041 xor r13d,r14d5042 mov edi,ebx5043 rorx r12d,ebx,225044 lea eax,[r13*1+rax]5045 xor edi,ecx5046 rorx r14d,ebx,135047 rorx r13d,ebx,25048 lea r8d,[rax*1+r8]5049 and r15d,edi5050 xor r14d,r12d5051 xor r15d,ecx5052 xor r14d,r13d5053 lea eax,[r15*1+rax]5054 mov r12d,r9d5055 add r11d,DWORD[rsp]5056 and r12d,r8d5057 rorx r13d,r8d,255058 rorx r15d,r8d,115059 lea eax,[r14*1+rax]5060 lea r11d,[r12*1+r11]5061 andn r12d,r8d,r10d5062 xor r13d,r15d5063 rorx r14d,r8d,65064 lea r11d,[r12*1+r11]5065 xor r13d,r14d5066 mov r15d,eax5067 rorx r12d,eax,225068 lea r11d,[r13*1+r11]5069 xor r15d,ebx5070 rorx r14d,eax,135071 rorx r13d,eax,25072 lea edx,[r11*1+rdx]5073 and edi,r15d5074 xor r14d,r12d5075 xor edi,ebx5076 xor r14d,r13d5077 lea r11d,[rdi*1+r11]5078 mov r12d,r8d5079 add r10d,DWORD[4+rsp]5080 and r12d,edx5081 rorx r13d,edx,255082 rorx edi,edx,115083 lea r11d,[r14*1+r11]5084 lea r10d,[r12*1+r10]5085 andn r12d,edx,r9d5086 xor r13d,edi5087 rorx r14d,edx,65088 lea r10d,[r12*1+r10]5089 xor r13d,r14d5090 mov edi,r11d5091 rorx r12d,r11d,225092 lea r10d,[r13*1+r10]5093 xor edi,eax5094 rorx r14d,r11d,135095 rorx r13d,r11d,25096 lea ecx,[r10*1+rcx]5097 and r15d,edi5098 xor r14d,r12d5099 xor r15d,eax5100 xor r14d,r13d5101 lea r10d,[r15*1+r10]5102 mov r12d,edx5103 add r9d,DWORD[8+rsp]5104 and r12d,ecx5105 rorx r13d,ecx,255106 rorx r15d,ecx,115107 lea r10d,[r14*1+r10]5108 lea r9d,[r12*1+r9]5109 andn r12d,ecx,r8d5110 xor r13d,r15d5111 rorx r14d,ecx,65112 lea r9d,[r12*1+r9]5113 xor r13d,r14d5114 mov r15d,r10d5115 rorx r12d,r10d,225116 lea r9d,[r13*1+r9]5117 xor r15d,r11d5118 rorx r14d,r10d,135119 rorx r13d,r10d,25120 lea ebx,[r9*1+rbx]5121 and edi,r15d5122 xor r14d,r12d5123 xor edi,r11d5124 xor r14d,r13d5125 lea r9d,[rdi*1+r9]5126 mov r12d,ecx5127 add r8d,DWORD[12+rsp]5128 and r12d,ebx5129 rorx r13d,ebx,255130 rorx edi,ebx,115131 lea r9d,[r14*1+r9]5132 lea r8d,[r12*1+r8]5133 andn r12d,ebx,edx5134 xor r13d,edi5135 rorx r14d,ebx,65136 lea r8d,[r12*1+r8]5137 xor r13d,r14d5138 mov edi,r9d5139 rorx r12d,r9d,225140 lea r8d,[r13*1+r8]5141 xor edi,r10d5142 rorx r14d,r9d,135143 rorx r13d,r9d,25144 lea eax,[r8*1+rax]5145 and r15d,edi5146 xor r14d,r12d5147 xor r15d,r10d5148 xor r14d,r13d5149 lea r8d,[r15*1+r8]5150 mov r12d,ebx5151 add edx,DWORD[32+rsp]5152 and r12d,eax5153 rorx r13d,eax,255154 rorx r15d,eax,115155 lea r8d,[r14*1+r8]5156 lea edx,[r12*1+rdx]5157 andn r12d,eax,ecx5158 xor r13d,r15d5159 rorx r14d,eax,65160 lea edx,[r12*1+rdx]5161 xor r13d,r14d5162 mov r15d,r8d5163 rorx r12d,r8d,225164 lea edx,[r13*1+rdx]5165 xor r15d,r9d5166 rorx r14d,r8d,135167 rorx r13d,r8d,25168 lea r11d,[rdx*1+r11]5169 and edi,r15d5170 xor r14d,r12d5171 xor edi,r9d5172 xor r14d,r13d5173 lea edx,[rdi*1+rdx]5174 mov r12d,eax5175 add ecx,DWORD[36+rsp]5176 and r12d,r11d5177 rorx r13d,r11d,255178 rorx edi,r11d,115179 lea edx,[r14*1+rdx]5180 lea ecx,[r12*1+rcx]5181 andn r12d,r11d,ebx5182 xor r13d,edi5183 rorx r14d,r11d,65184 lea ecx,[r12*1+rcx]5185 xor r13d,r14d5186 mov edi,edx5187 rorx r12d,edx,225188 lea ecx,[r13*1+rcx]5189 xor edi,r8d5190 rorx r14d,edx,135191 rorx r13d,edx,25192 lea r10d,[rcx*1+r10]5193 and r15d,edi5194 xor r14d,r12d5195 xor r15d,r8d5196 xor r14d,r13d5197 lea ecx,[r15*1+rcx]5198 mov r12d,r11d5199 add ebx,DWORD[40+rsp]5200 and r12d,r10d5201 rorx r13d,r10d,255202 rorx r15d,r10d,115203 lea ecx,[r14*1+rcx]5204 lea ebx,[r12*1+rbx]5205 andn r12d,r10d,eax5206 xor r13d,r15d5207 rorx r14d,r10d,65208 lea ebx,[r12*1+rbx]5209 xor r13d,r14d5210 mov r15d,ecx5211 rorx r12d,ecx,225212 lea ebx,[r13*1+rbx]5213 xor r15d,edx5214 rorx r14d,ecx,135215 rorx r13d,ecx,25216 lea r9d,[rbx*1+r9]5217 and edi,r15d5218 xor r14d,r12d5219 xor edi,edx5220 xor r14d,r13d5221 lea ebx,[rdi*1+rbx]5222 mov r12d,r10d5223 add eax,DWORD[44+rsp]5224 and r12d,r9d5225 rorx r13d,r9d,255226 rorx edi,r9d,115227 lea ebx,[r14*1+rbx]5228 lea eax,[r12*1+rax]5229 andn r12d,r9d,r11d5230 xor r13d,edi5231 rorx r14d,r9d,65232 lea eax,[r12*1+rax]5233 xor r13d,r14d5234 mov edi,ebx5235 rorx r12d,ebx,225236 lea eax,[r13*1+rax]5237 xor edi,ecx5238 rorx r14d,ebx,135239 rorx r13d,ebx,25240 lea r8d,[rax*1+r8]5241 and r15d,edi5242 xor r14d,r12d5243 xor r15d,ecx5244 xor r14d,r13d5245 lea eax,[r15*1+rax]5246 mov r12d,r9d5247 mov rdi,QWORD[512+rsp]5248 add eax,r14d5249 5250 lea rbp,[448+rsp]5251 5252 add eax,DWORD[rdi]5253 add ebx,DWORD[4+rdi]5254 add ecx,DWORD[8+rdi]5255 add edx,DWORD[12+rdi]5256 add r8d,DWORD[16+rdi]5257 add r9d,DWORD[20+rdi]5258 add r10d,DWORD[24+rdi]5259 add r11d,DWORD[28+rdi]5260 5261 mov DWORD[rdi],eax5262 mov DWORD[4+rdi],ebx5263 mov DWORD[8+rdi],ecx5264 mov DWORD[12+rdi],edx5265 mov DWORD[16+rdi],r8d5266 mov DWORD[20+rdi],r9d5267 mov DWORD[24+rdi],r10d5268 mov DWORD[28+rdi],r11d5269 5270 cmp rsi,QWORD[80+rbp]5271 je NEAR $L$done_avx25272 5273 xor r14d,r14d5274 mov edi,ebx5275 xor edi,ecx5276 mov r12d,r9d5277 jmp NEAR $L$ower_avx25278 ALIGN 165279 $L$ower_avx2:5280 add r11d,DWORD[((0+16))+rbp]5281 and r12d,r8d5282 rorx r13d,r8d,255283 rorx r15d,r8d,115284 lea eax,[r14*1+rax]5285 lea r11d,[r12*1+r11]5286 andn r12d,r8d,r10d5287 xor r13d,r15d5288 rorx r14d,r8d,65289 lea r11d,[r12*1+r11]5290 xor r13d,r14d5291 mov r15d,eax5292 rorx r12d,eax,225293 lea r11d,[r13*1+r11]5294 xor r15d,ebx5295 rorx r14d,eax,135296 rorx r13d,eax,25297 lea edx,[r11*1+rdx]5298 and edi,r15d5299 xor r14d,r12d5300 xor edi,ebx5301 xor r14d,r13d5302 lea r11d,[rdi*1+r11]5303 mov r12d,r8d5304 add r10d,DWORD[((4+16))+rbp]5305 and r12d,edx5306 rorx r13d,edx,255307 rorx edi,edx,115308 lea r11d,[r14*1+r11]5309 lea r10d,[r12*1+r10]5310 andn r12d,edx,r9d5311 xor r13d,edi5312 rorx r14d,edx,65313 lea r10d,[r12*1+r10]5314 xor r13d,r14d5315 mov edi,r11d5316 rorx r12d,r11d,225317 lea r10d,[r13*1+r10]5318 xor edi,eax5319 rorx r14d,r11d,135320 rorx r13d,r11d,25321 lea ecx,[r10*1+rcx]5322 and r15d,edi5323 xor r14d,r12d5324 xor r15d,eax5325 xor r14d,r13d5326 lea r10d,[r15*1+r10]5327 mov r12d,edx5328 add r9d,DWORD[((8+16))+rbp]5329 and r12d,ecx5330 rorx r13d,ecx,255331 rorx r15d,ecx,115332 lea r10d,[r14*1+r10]5333 lea r9d,[r12*1+r9]5334 andn r12d,ecx,r8d5335 xor r13d,r15d5336 rorx r14d,ecx,65337 lea r9d,[r12*1+r9]5338 xor r13d,r14d5339 mov r15d,r10d5340 rorx r12d,r10d,225341 lea r9d,[r13*1+r9]5342 xor r15d,r11d5343 rorx r14d,r10d,135344 rorx r13d,r10d,25345 lea ebx,[r9*1+rbx]5346 and edi,r15d5347 xor r14d,r12d5348 xor edi,r11d5349 xor r14d,r13d5350 lea r9d,[rdi*1+r9]5351 mov r12d,ecx5352 add r8d,DWORD[((12+16))+rbp]5353 and r12d,ebx5354 rorx r13d,ebx,255355 rorx edi,ebx,115356 lea r9d,[r14*1+r9]5357 lea r8d,[r12*1+r8]5358 andn r12d,ebx,edx5359 xor r13d,edi5360 rorx r14d,ebx,65361 lea r8d,[r12*1+r8]5362 xor r13d,r14d5363 mov edi,r9d5364 rorx r12d,r9d,225365 lea r8d,[r13*1+r8]5366 xor edi,r10d5367 rorx r14d,r9d,135368 rorx r13d,r9d,25369 lea eax,[r8*1+rax]5370 and r15d,edi5371 xor r14d,r12d5372 xor r15d,r10d5373 xor r14d,r13d5374 lea r8d,[r15*1+r8]5375 mov r12d,ebx5376 add edx,DWORD[((32+16))+rbp]5377 and r12d,eax5378 rorx r13d,eax,255379 rorx r15d,eax,115380 lea r8d,[r14*1+r8]5381 lea edx,[r12*1+rdx]5382 andn r12d,eax,ecx5383 xor r13d,r15d5384 rorx r14d,eax,65385 lea edx,[r12*1+rdx]5386 xor r13d,r14d5387 mov r15d,r8d5388 rorx r12d,r8d,225389 lea edx,[r13*1+rdx]5390 xor r15d,r9d5391 rorx r14d,r8d,135392 rorx r13d,r8d,25393 lea r11d,[rdx*1+r11]5394 and edi,r15d5395 xor r14d,r12d5396 xor edi,r9d5397 xor r14d,r13d5398 lea edx,[rdi*1+rdx]5399 mov r12d,eax5400 add ecx,DWORD[((36+16))+rbp]5401 and r12d,r11d5402 rorx r13d,r11d,255403 rorx edi,r11d,115404 lea edx,[r14*1+rdx]5405 lea ecx,[r12*1+rcx]5406 andn r12d,r11d,ebx5407 xor r13d,edi5408 rorx r14d,r11d,65409 lea ecx,[r12*1+rcx]5410 xor r13d,r14d5411 mov edi,edx5412 rorx r12d,edx,225413 lea ecx,[r13*1+rcx]5414 xor edi,r8d5415 rorx r14d,edx,135416 rorx r13d,edx,25417 lea r10d,[rcx*1+r10]5418 and r15d,edi5419 xor r14d,r12d5420 xor r15d,r8d5421 xor r14d,r13d5422 lea ecx,[r15*1+rcx]5423 mov r12d,r11d5424 add ebx,DWORD[((40+16))+rbp]5425 and r12d,r10d5426 rorx r13d,r10d,255427 rorx r15d,r10d,115428 lea ecx,[r14*1+rcx]5429 lea ebx,[r12*1+rbx]5430 andn r12d,r10d,eax5431 xor r13d,r15d5432 rorx r14d,r10d,65433 lea ebx,[r12*1+rbx]5434 xor r13d,r14d5435 mov r15d,ecx5436 rorx r12d,ecx,225437 lea ebx,[r13*1+rbx]5438 xor r15d,edx5439 rorx r14d,ecx,135440 rorx r13d,ecx,25441 lea r9d,[rbx*1+r9]5442 and edi,r15d5443 xor r14d,r12d5444 xor edi,edx5445 xor r14d,r13d5446 lea ebx,[rdi*1+rbx]5447 mov r12d,r10d5448 add eax,DWORD[((44+16))+rbp]5449 and r12d,r9d5450 rorx r13d,r9d,255451 rorx edi,r9d,115452 lea ebx,[r14*1+rbx]5453 lea eax,[r12*1+rax]5454 andn r12d,r9d,r11d5455 xor r13d,edi5456 rorx r14d,r9d,65457 lea eax,[r12*1+rax]5458 xor r13d,r14d5459 mov edi,ebx5460 rorx r12d,ebx,225461 lea eax,[r13*1+rax]5462 xor edi,ecx5463 rorx r14d,ebx,135464 rorx r13d,ebx,25465 lea r8d,[rax*1+r8]5466 and r15d,edi5467 xor r14d,r12d5468 xor r15d,ecx5469 xor r14d,r13d5470 lea eax,[r15*1+rax]5471 mov r12d,r9d5472 lea rbp,[((-64))+rbp]5473 cmp rbp,rsp5474 jae NEAR $L$ower_avx25475 5476 mov rdi,QWORD[512+rsp]5477 add eax,r14d5478 5479 lea rsp,[448+rsp]5480 5481 5482 5483 add eax,DWORD[rdi]5484 add ebx,DWORD[4+rdi]5485 add ecx,DWORD[8+rdi]5486 add edx,DWORD[12+rdi]5487 add r8d,DWORD[16+rdi]5488 add r9d,DWORD[20+rdi]5489 lea rsi,[128+rsi]5490 add r10d,DWORD[24+rdi]5491 mov r12,rsi5492 add r11d,DWORD[28+rdi]5493 cmp rsi,QWORD[((64+16))+rsp]5494 5495 mov DWORD[rdi],eax5496 cmove r12,rsp5497 mov DWORD[4+rdi],ebx5498 mov DWORD[8+rdi],ecx5499 mov DWORD[12+rdi],edx5500 mov DWORD[16+rdi],r8d5501 mov DWORD[20+rdi],r9d5502 mov DWORD[24+rdi],r10d5503 mov DWORD[28+rdi],r11d5504 5505 jbe NEAR $L$oop_avx25506 lea rbp,[rsp]5507 5508 5509 5510 5511 $L$done_avx2:5512 mov rsi,QWORD[88+rbp]5513 5514 vzeroupper5515 movaps xmm6,XMMWORD[((64+32))+rbp]5516 movaps xmm7,XMMWORD[((64+48))+rbp]5517 movaps xmm8,XMMWORD[((64+64))+rbp]5518 movaps xmm9,XMMWORD[((64+80))+rbp]5519 mov r15,QWORD[((-48))+rsi]5520 5521 mov r14,QWORD[((-40))+rsi]5522 5523 mov r13,QWORD[((-32))+rsi]5524 5525 mov r12,QWORD[((-24))+rsi]5526 5527 mov rbp,QWORD[((-16))+rsi]5528 5529 mov rbx,QWORD[((-8))+rsi]5530 5531 lea rsp,[rsi]5532 5533 $L$epilogue_avx2:5534 mov rdi,QWORD[8+rsp] ;WIN64 epilogue5535 mov rsi,QWORD[16+rsp]5536 DB 0F3h,0C3h ;repret5537 5538 $L$SEH_end_sha256_block_data_order_avx2:5539 3152 EXTERN __imp_RtlVirtualUnwind 5540 3153 … … 5569 3182 cmp rbx,r10 5570 3183 jae NEAR $L$in_prologue 5571 lea r10,[$L$avx2_shortcut]5572 cmp rbx,r105573 jb NEAR $L$not_in_avx25574 5575 and rax,-256*45576 add rax,4485577 $L$not_in_avx2:5578 3184 mov rsi,rax 5579 3185 mov rax,QWORD[((64+24))+rax] … … 5683 3289 DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase 5684 3290 DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase 5685 DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase5686 DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase5687 DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase5688 DD $L$SEH_begin_sha256_block_data_order_avx2 wrt ..imagebase5689 DD $L$SEH_end_sha256_block_data_order_avx2 wrt ..imagebase5690 DD $L$SEH_info_sha256_block_data_order_avx2 wrt ..imagebase5691 3291 section .xdata rdata align=8 5692 3292 ALIGN 8 … … 5702 3302 DD se_handler wrt ..imagebase 5703 3303 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase 5704 $L$SEH_info_sha256_block_data_order_avx:5705 DB 9,0,0,05706 DD se_handler wrt ..imagebase5707 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase5708 $L$SEH_info_sha256_block_data_order_avx2:5709 DB 9,0,0,05710 DD se_handler wrt ..imagebase5711 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/sha512-x86_64.S
r94083 r95221 21 21 22 22 23 lea r11,[OPENSSL_ia32cap_P]24 mov r9d,DWORD[r11]25 mov r10d,DWORD[4+r11]26 mov r11d,DWORD[8+r11]27 test r10d,204828 jnz NEAR $L$xop_shortcut29 and r11d,29630 cmp r11d,29631 je NEAR $L$avx2_shortcut32 and r9d,107374182433 and r10d,26843596834 or r10d,r9d35 cmp r10d,134217779236 je NEAR $L$avx_shortcut37 23 mov rax,rsp 38 24 … … 1834 1820 DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 1835 1821 DB 111,114,103,62,0 1836 1837 ALIGN 641838 sha512_block_data_order_xop:1839 mov QWORD[8+rsp],rdi ;WIN64 prologue1840 mov QWORD[16+rsp],rsi1841 mov rax,rsp1842 $L$SEH_begin_sha512_block_data_order_xop:1843 mov rdi,rcx1844 mov rsi,rdx1845 mov rdx,r81846 1847 1848 1849 $L$xop_shortcut:1850 mov rax,rsp1851 1852 push rbx1853 1854 push rbp1855 1856 push r121857 1858 push r131859 1860 push r141861 1862 push r151863 1864 shl rdx,41865 sub rsp,2561866 lea rdx,[rdx*8+rsi]1867 and rsp,-641868 mov QWORD[((128+0))+rsp],rdi1869 mov QWORD[((128+8))+rsp],rsi1870 mov QWORD[((128+16))+rsp],rdx1871 mov QWORD[152+rsp],rax1872 1873 movaps XMMWORD[(128+32)+rsp],xmm61874 movaps XMMWORD[(128+48)+rsp],xmm71875 movaps XMMWORD[(128+64)+rsp],xmm81876 movaps XMMWORD[(128+80)+rsp],xmm91877 movaps XMMWORD[(128+96)+rsp],xmm101878 movaps XMMWORD[(128+112)+rsp],xmm111879 $L$prologue_xop:1880 1881 vzeroupper1882 mov rax,QWORD[rdi]1883 mov rbx,QWORD[8+rdi]1884 mov rcx,QWORD[16+rdi]1885 mov rdx,QWORD[24+rdi]1886 mov r8,QWORD[32+rdi]1887 mov r9,QWORD[40+rdi]1888 mov r10,QWORD[48+rdi]1889 mov r11,QWORD[56+rdi]1890 jmp NEAR $L$loop_xop1891 ALIGN 161892 $L$loop_xop:1893 vmovdqa xmm11,XMMWORD[((K512+1280))]1894 vmovdqu xmm0,XMMWORD[rsi]1895 lea rbp,[((K512+128))]1896 vmovdqu xmm1,XMMWORD[16+rsi]1897 vmovdqu xmm2,XMMWORD[32+rsi]1898 vpshufb xmm0,xmm0,xmm111899 vmovdqu xmm3,XMMWORD[48+rsi]1900 vpshufb xmm1,xmm1,xmm111901 vmovdqu xmm4,XMMWORD[64+rsi]1902 vpshufb xmm2,xmm2,xmm111903 vmovdqu xmm5,XMMWORD[80+rsi]1904 vpshufb xmm3,xmm3,xmm111905 vmovdqu xmm6,XMMWORD[96+rsi]1906 vpshufb xmm4,xmm4,xmm111907 vmovdqu xmm7,XMMWORD[112+rsi]1908 vpshufb xmm5,xmm5,xmm111909 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]1910 vpshufb xmm6,xmm6,xmm111911 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]1912 vpshufb xmm7,xmm7,xmm111913 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]1914 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]1915 vmovdqa XMMWORD[rsp],xmm81916 vpaddq xmm8,xmm4,XMMWORD[rbp]1917 vmovdqa XMMWORD[16+rsp],xmm91918 vpaddq xmm9,xmm5,XMMWORD[32+rbp]1919 vmovdqa XMMWORD[32+rsp],xmm101920 vpaddq xmm10,xmm6,XMMWORD[64+rbp]1921 vmovdqa XMMWORD[48+rsp],xmm111922 vpaddq xmm11,xmm7,XMMWORD[96+rbp]1923 vmovdqa XMMWORD[64+rsp],xmm81924 mov r14,rax1925 vmovdqa XMMWORD[80+rsp],xmm91926 mov rdi,rbx1927 vmovdqa XMMWORD[96+rsp],xmm101928 xor rdi,rcx1929 vmovdqa XMMWORD[112+rsp],xmm111930 mov r13,r81931 jmp NEAR $L$xop_00_471932 1933 ALIGN 161934 $L$xop_00_47:1935 add rbp,2561936 vpalignr xmm8,xmm1,xmm0,81937 ror r13,231938 mov rax,r141939 vpalignr xmm11,xmm5,xmm4,81940 mov r12,r91941 ror r14,51942 DB 143,72,120,195,200,561943 xor r13,r81944 xor r12,r101945 vpsrlq xmm8,xmm8,71946 ror r13,41947 xor r14,rax1948 vpaddq xmm0,xmm0,xmm111949 and r12,r81950 xor r13,r81951 add r11,QWORD[rsp]1952 mov r15,rax1953 DB 143,72,120,195,209,71954 xor r12,r101955 ror r14,61956 vpxor xmm8,xmm8,xmm91957 xor r15,rbx1958 add r11,r121959 ror r13,141960 and rdi,r151961 DB 143,104,120,195,223,31962 xor r14,rax1963 add r11,r131964 vpxor xmm8,xmm8,xmm101965 xor rdi,rbx1966 ror r14,281967 vpsrlq xmm10,xmm7,61968 add rdx,r111969 add r11,rdi1970 vpaddq xmm0,xmm0,xmm81971 mov r13,rdx1972 add r14,r111973 DB 143,72,120,195,203,421974 ror r13,231975 mov r11,r141976 vpxor xmm11,xmm11,xmm101977 mov r12,r81978 ror r14,51979 xor r13,rdx1980 xor r12,r91981 vpxor xmm11,xmm11,xmm91982 ror r13,41983 xor r14,r111984 and r12,rdx1985 xor r13,rdx1986 vpaddq xmm0,xmm0,xmm111987 add r10,QWORD[8+rsp]1988 mov rdi,r111989 xor r12,r91990 ror r14,61991 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]1992 xor rdi,rax1993 add r10,r121994 ror r13,141995 and r15,rdi1996 xor r14,r111997 add r10,r131998 xor r15,rax1999 ror r14,282000 add rcx,r102001 add r10,r152002 mov r13,rcx2003 add r14,r102004 vmovdqa XMMWORD[rsp],xmm102005 vpalignr xmm8,xmm2,xmm1,82006 ror r13,232007 mov r10,r142008 vpalignr xmm11,xmm6,xmm5,82009 mov r12,rdx2010 ror r14,52011 DB 143,72,120,195,200,562012 xor r13,rcx2013 xor r12,r82014 vpsrlq xmm8,xmm8,72015 ror r13,42016 xor r14,r102017 vpaddq xmm1,xmm1,xmm112018 and r12,rcx2019 xor r13,rcx2020 add r9,QWORD[16+rsp]2021 mov r15,r102022 DB 143,72,120,195,209,72023 xor r12,r82024 ror r14,62025 vpxor xmm8,xmm8,xmm92026 xor r15,r112027 add r9,r122028 ror r13,142029 and rdi,r152030 DB 143,104,120,195,216,32031 xor r14,r102032 add r9,r132033 vpxor xmm8,xmm8,xmm102034 xor rdi,r112035 ror r14,282036 vpsrlq xmm10,xmm0,62037 add rbx,r92038 add r9,rdi2039 vpaddq xmm1,xmm1,xmm82040 mov r13,rbx2041 add r14,r92042 DB 143,72,120,195,203,422043 ror r13,232044 mov r9,r142045 vpxor xmm11,xmm11,xmm102046 mov r12,rcx2047 ror r14,52048 xor r13,rbx2049 xor r12,rdx2050 vpxor xmm11,xmm11,xmm92051 ror r13,42052 xor r14,r92053 and r12,rbx2054 xor r13,rbx2055 vpaddq xmm1,xmm1,xmm112056 add r8,QWORD[24+rsp]2057 mov rdi,r92058 xor r12,rdx2059 ror r14,62060 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]2061 xor rdi,r102062 add r8,r122063 ror r13,142064 and r15,rdi2065 xor r14,r92066 add r8,r132067 xor r15,r102068 ror r14,282069 add rax,r82070 add r8,r152071 mov r13,rax2072 add r14,r82073 vmovdqa XMMWORD[16+rsp],xmm102074 vpalignr xmm8,xmm3,xmm2,82075 ror r13,232076 mov r8,r142077 vpalignr xmm11,xmm7,xmm6,82078 mov r12,rbx2079 ror r14,52080 DB 143,72,120,195,200,562081 xor r13,rax2082 xor r12,rcx2083 vpsrlq xmm8,xmm8,72084 ror r13,42085 xor r14,r82086 vpaddq xmm2,xmm2,xmm112087 and r12,rax2088 xor r13,rax2089 add rdx,QWORD[32+rsp]2090 mov r15,r82091 DB 143,72,120,195,209,72092 xor r12,rcx2093 ror r14,62094 vpxor xmm8,xmm8,xmm92095 xor r15,r92096 add rdx,r122097 ror r13,142098 and rdi,r152099 DB 143,104,120,195,217,32100 xor r14,r82101 add rdx,r132102 vpxor xmm8,xmm8,xmm102103 xor rdi,r92104 ror r14,282105 vpsrlq xmm10,xmm1,62106 add r11,rdx2107 add rdx,rdi2108 vpaddq xmm2,xmm2,xmm82109 mov r13,r112110 add r14,rdx2111 DB 143,72,120,195,203,422112 ror r13,232113 mov rdx,r142114 vpxor xmm11,xmm11,xmm102115 mov r12,rax2116 ror r14,52117 xor r13,r112118 xor r12,rbx2119 vpxor xmm11,xmm11,xmm92120 ror r13,42121 xor r14,rdx2122 and r12,r112123 xor r13,r112124 vpaddq xmm2,xmm2,xmm112125 add rcx,QWORD[40+rsp]2126 mov rdi,rdx2127 xor r12,rbx2128 ror r14,62129 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]2130 xor rdi,r82131 add rcx,r122132 ror r13,142133 and r15,rdi2134 xor r14,rdx2135 add rcx,r132136 xor r15,r82137 ror r14,282138 add r10,rcx2139 add rcx,r152140 mov r13,r102141 add r14,rcx2142 vmovdqa XMMWORD[32+rsp],xmm102143 vpalignr xmm8,xmm4,xmm3,82144 ror r13,232145 mov rcx,r142146 vpalignr xmm11,xmm0,xmm7,82147 mov r12,r112148 ror r14,52149 DB 143,72,120,195,200,562150 xor r13,r102151 xor r12,rax2152 vpsrlq xmm8,xmm8,72153 ror r13,42154 xor r14,rcx2155 vpaddq xmm3,xmm3,xmm112156 and r12,r102157 xor r13,r102158 add rbx,QWORD[48+rsp]2159 mov r15,rcx2160 DB 143,72,120,195,209,72161 xor r12,rax2162 ror r14,62163 vpxor xmm8,xmm8,xmm92164 xor r15,rdx2165 add rbx,r122166 ror r13,142167 and rdi,r152168 DB 143,104,120,195,218,32169 xor r14,rcx2170 add rbx,r132171 vpxor xmm8,xmm8,xmm102172 xor rdi,rdx2173 ror r14,282174 vpsrlq xmm10,xmm2,62175 add r9,rbx2176 add rbx,rdi2177 vpaddq xmm3,xmm3,xmm82178 mov r13,r92179 add r14,rbx2180 DB 143,72,120,195,203,422181 ror r13,232182 mov rbx,r142183 vpxor xmm11,xmm11,xmm102184 mov r12,r102185 ror r14,52186 xor r13,r92187 xor r12,r112188 vpxor xmm11,xmm11,xmm92189 ror r13,42190 xor r14,rbx2191 and r12,r92192 xor r13,r92193 vpaddq xmm3,xmm3,xmm112194 add rax,QWORD[56+rsp]2195 mov rdi,rbx2196 xor r12,r112197 ror r14,62198 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]2199 xor rdi,rcx2200 add rax,r122201 ror r13,142202 and r15,rdi2203 xor r14,rbx2204 add rax,r132205 xor r15,rcx2206 ror r14,282207 add r8,rax2208 add rax,r152209 mov r13,r82210 add r14,rax2211 vmovdqa XMMWORD[48+rsp],xmm102212 vpalignr xmm8,xmm5,xmm4,82213 ror r13,232214 mov rax,r142215 vpalignr xmm11,xmm1,xmm0,82216 mov r12,r92217 ror r14,52218 DB 143,72,120,195,200,562219 xor r13,r82220 xor r12,r102221 vpsrlq xmm8,xmm8,72222 ror r13,42223 xor r14,rax2224 vpaddq xmm4,xmm4,xmm112225 and r12,r82226 xor r13,r82227 add r11,QWORD[64+rsp]2228 mov r15,rax2229 DB 143,72,120,195,209,72230 xor r12,r102231 ror r14,62232 vpxor xmm8,xmm8,xmm92233 xor r15,rbx2234 add r11,r122235 ror r13,142236 and rdi,r152237 DB 143,104,120,195,219,32238 xor r14,rax2239 add r11,r132240 vpxor xmm8,xmm8,xmm102241 xor rdi,rbx2242 ror r14,282243 vpsrlq xmm10,xmm3,62244 add rdx,r112245 add r11,rdi2246 vpaddq xmm4,xmm4,xmm82247 mov r13,rdx2248 add r14,r112249 DB 143,72,120,195,203,422250 ror r13,232251 mov r11,r142252 vpxor xmm11,xmm11,xmm102253 mov r12,r82254 ror r14,52255 xor r13,rdx2256 xor r12,r92257 vpxor xmm11,xmm11,xmm92258 ror r13,42259 xor r14,r112260 and r12,rdx2261 xor r13,rdx2262 vpaddq xmm4,xmm4,xmm112263 add r10,QWORD[72+rsp]2264 mov rdi,r112265 xor r12,r92266 ror r14,62267 vpaddq xmm10,xmm4,XMMWORD[rbp]2268 xor rdi,rax2269 add r10,r122270 ror r13,142271 and r15,rdi2272 xor r14,r112273 add r10,r132274 xor r15,rax2275 ror r14,282276 add rcx,r102277 add r10,r152278 mov r13,rcx2279 add r14,r102280 vmovdqa XMMWORD[64+rsp],xmm102281 vpalignr xmm8,xmm6,xmm5,82282 ror r13,232283 mov r10,r142284 vpalignr xmm11,xmm2,xmm1,82285 mov r12,rdx2286 ror r14,52287 DB 143,72,120,195,200,562288 xor r13,rcx2289 xor r12,r82290 vpsrlq xmm8,xmm8,72291 ror r13,42292 xor r14,r102293 vpaddq xmm5,xmm5,xmm112294 and r12,rcx2295 xor r13,rcx2296 add r9,QWORD[80+rsp]2297 mov r15,r102298 DB 143,72,120,195,209,72299 xor r12,r82300 ror r14,62301 vpxor xmm8,xmm8,xmm92302 xor r15,r112303 add r9,r122304 ror r13,142305 and rdi,r152306 DB 143,104,120,195,220,32307 xor r14,r102308 add r9,r132309 vpxor xmm8,xmm8,xmm102310 xor rdi,r112311 ror r14,282312 vpsrlq xmm10,xmm4,62313 add rbx,r92314 add r9,rdi2315 vpaddq xmm5,xmm5,xmm82316 mov r13,rbx2317 add r14,r92318 DB 143,72,120,195,203,422319 ror r13,232320 mov r9,r142321 vpxor xmm11,xmm11,xmm102322 mov r12,rcx2323 ror r14,52324 xor r13,rbx2325 xor r12,rdx2326 vpxor xmm11,xmm11,xmm92327 ror r13,42328 xor r14,r92329 and r12,rbx2330 xor r13,rbx2331 vpaddq xmm5,xmm5,xmm112332 add r8,QWORD[88+rsp]2333 mov rdi,r92334 xor r12,rdx2335 ror r14,62336 vpaddq xmm10,xmm5,XMMWORD[32+rbp]2337 xor rdi,r102338 add r8,r122339 ror r13,142340 and r15,rdi2341 xor r14,r92342 add r8,r132343 xor r15,r102344 ror r14,282345 add rax,r82346 add r8,r152347 mov r13,rax2348 add r14,r82349 vmovdqa XMMWORD[80+rsp],xmm102350 vpalignr xmm8,xmm7,xmm6,82351 ror r13,232352 mov r8,r142353 vpalignr xmm11,xmm3,xmm2,82354 mov r12,rbx2355 ror r14,52356 DB 143,72,120,195,200,562357 xor r13,rax2358 xor r12,rcx2359 vpsrlq xmm8,xmm8,72360 ror r13,42361 xor r14,r82362 vpaddq xmm6,xmm6,xmm112363 and r12,rax2364 xor r13,rax2365 add rdx,QWORD[96+rsp]2366 mov r15,r82367 DB 143,72,120,195,209,72368 xor r12,rcx2369 ror r14,62370 vpxor xmm8,xmm8,xmm92371 xor r15,r92372 add rdx,r122373 ror r13,142374 and rdi,r152375 DB 143,104,120,195,221,32376 xor r14,r82377 add rdx,r132378 vpxor xmm8,xmm8,xmm102379 xor rdi,r92380 ror r14,282381 vpsrlq xmm10,xmm5,62382 add r11,rdx2383 add rdx,rdi2384 vpaddq xmm6,xmm6,xmm82385 mov r13,r112386 add r14,rdx2387 DB 143,72,120,195,203,422388 ror r13,232389 mov rdx,r142390 vpxor xmm11,xmm11,xmm102391 mov r12,rax2392 ror r14,52393 xor r13,r112394 xor r12,rbx2395 vpxor xmm11,xmm11,xmm92396 ror r13,42397 xor r14,rdx2398 and r12,r112399 xor r13,r112400 vpaddq xmm6,xmm6,xmm112401 add rcx,QWORD[104+rsp]2402 mov rdi,rdx2403 xor r12,rbx2404 ror r14,62405 vpaddq xmm10,xmm6,XMMWORD[64+rbp]2406 xor rdi,r82407 add rcx,r122408 ror r13,142409 and r15,rdi2410 xor r14,rdx2411 add rcx,r132412 xor r15,r82413 ror r14,282414 add r10,rcx2415 add rcx,r152416 mov r13,r102417 add r14,rcx2418 vmovdqa XMMWORD[96+rsp],xmm102419 vpalignr xmm8,xmm0,xmm7,82420 ror r13,232421 mov rcx,r142422 vpalignr xmm11,xmm4,xmm3,82423 mov r12,r112424 ror r14,52425 DB 143,72,120,195,200,562426 xor r13,r102427 xor r12,rax2428 vpsrlq xmm8,xmm8,72429 ror r13,42430 xor r14,rcx2431 vpaddq xmm7,xmm7,xmm112432 and r12,r102433 xor r13,r102434 add rbx,QWORD[112+rsp]2435 mov r15,rcx2436 DB 143,72,120,195,209,72437 xor r12,rax2438 ror r14,62439 vpxor xmm8,xmm8,xmm92440 xor r15,rdx2441 add rbx,r122442 ror r13,142443 and rdi,r152444 DB 143,104,120,195,222,32445 xor r14,rcx2446 add rbx,r132447 vpxor xmm8,xmm8,xmm102448 xor rdi,rdx2449 ror r14,282450 vpsrlq xmm10,xmm6,62451 add r9,rbx2452 add rbx,rdi2453 vpaddq xmm7,xmm7,xmm82454 mov r13,r92455 add r14,rbx2456 DB 143,72,120,195,203,422457 ror r13,232458 mov rbx,r142459 vpxor xmm11,xmm11,xmm102460 mov r12,r102461 ror r14,52462 xor r13,r92463 xor r12,r112464 vpxor xmm11,xmm11,xmm92465 ror r13,42466 xor r14,rbx2467 and r12,r92468 xor r13,r92469 vpaddq xmm7,xmm7,xmm112470 add rax,QWORD[120+rsp]2471 mov rdi,rbx2472 xor r12,r112473 ror r14,62474 vpaddq xmm10,xmm7,XMMWORD[96+rbp]2475 xor rdi,rcx2476 add rax,r122477 ror r13,142478 and r15,rdi2479 xor r14,rbx2480 add rax,r132481 xor r15,rcx2482 ror r14,282483 add r8,rax2484 add rax,r152485 mov r13,r82486 add r14,rax2487 vmovdqa XMMWORD[112+rsp],xmm102488 cmp BYTE[135+rbp],02489 jne NEAR $L$xop_00_472490 ror r13,232491 mov rax,r142492 mov r12,r92493 ror r14,52494 xor r13,r82495 xor r12,r102496 ror r13,42497 xor r14,rax2498 and r12,r82499 xor r13,r82500 add r11,QWORD[rsp]2501 mov r15,rax2502 xor r12,r102503 ror r14,62504 xor r15,rbx2505 add r11,r122506 ror r13,142507 and rdi,r152508 xor r14,rax2509 add r11,r132510 xor rdi,rbx2511 ror r14,282512 add rdx,r112513 add r11,rdi2514 mov r13,rdx2515 add r14,r112516 ror r13,232517 mov r11,r142518 mov r12,r82519 ror r14,52520 xor r13,rdx2521 xor r12,r92522 ror r13,42523 xor r14,r112524 and r12,rdx2525 xor r13,rdx2526 add r10,QWORD[8+rsp]2527 mov rdi,r112528 xor r12,r92529 ror r14,62530 xor rdi,rax2531 add r10,r122532 ror r13,142533 and r15,rdi2534 xor r14,r112535 add r10,r132536 xor r15,rax2537 ror r14,282538 add rcx,r102539 add r10,r152540 mov r13,rcx2541 add r14,r102542 ror r13,232543 mov r10,r142544 mov r12,rdx2545 ror r14,52546 xor r13,rcx2547 xor r12,r82548 ror r13,42549 xor r14,r102550 and r12,rcx2551 xor r13,rcx2552 add r9,QWORD[16+rsp]2553 mov r15,r102554 xor r12,r82555 ror r14,62556 xor r15,r112557 add r9,r122558 ror r13,142559 and rdi,r152560 xor r14,r102561 add r9,r132562 xor rdi,r112563 ror r14,282564 add rbx,r92565 add r9,rdi2566 mov r13,rbx2567 add r14,r92568 ror r13,232569 mov r9,r142570 mov r12,rcx2571 ror r14,52572 xor r13,rbx2573 xor r12,rdx2574 ror r13,42575 xor r14,r92576 and r12,rbx2577 xor r13,rbx2578 add r8,QWORD[24+rsp]2579 mov rdi,r92580 xor r12,rdx2581 ror r14,62582 xor rdi,r102583 add r8,r122584 ror r13,142585 and r15,rdi2586 xor r14,r92587 add r8,r132588 xor r15,r102589 ror r14,282590 add rax,r82591 add r8,r152592 mov r13,rax2593 add r14,r82594 ror r13,232595 mov r8,r142596 mov r12,rbx2597 ror r14,52598 xor r13,rax2599 xor r12,rcx2600 ror r13,42601 xor r14,r82602 and r12,rax2603 xor r13,rax2604 add rdx,QWORD[32+rsp]2605 mov r15,r82606 xor r12,rcx2607 ror r14,62608 xor r15,r92609 add rdx,r122610 ror r13,142611 and rdi,r152612 xor r14,r82613 add rdx,r132614 xor rdi,r92615 ror r14,282616 add r11,rdx2617 add rdx,rdi2618 mov r13,r112619 add r14,rdx2620 ror r13,232621 mov rdx,r142622 mov r12,rax2623 ror r14,52624 xor r13,r112625 xor r12,rbx2626 ror r13,42627 xor r14,rdx2628 and r12,r112629 xor r13,r112630 add rcx,QWORD[40+rsp]2631 mov rdi,rdx2632 xor r12,rbx2633 ror r14,62634 xor rdi,r82635 add rcx,r122636 ror r13,142637 and r15,rdi2638 xor r14,rdx2639 add rcx,r132640 xor r15,r82641 ror r14,282642 add r10,rcx2643 add rcx,r152644 mov r13,r102645 add r14,rcx2646 ror r13,232647 mov rcx,r142648 mov r12,r112649 ror r14,52650 xor r13,r102651 xor r12,rax2652 ror r13,42653 xor r14,rcx2654 and r12,r102655 xor r13,r102656 add rbx,QWORD[48+rsp]2657 mov r15,rcx2658 xor r12,rax2659 ror r14,62660 xor r15,rdx2661 add rbx,r122662 ror r13,142663 and rdi,r152664 xor r14,rcx2665 add rbx,r132666 xor rdi,rdx2667 ror r14,282668 add r9,rbx2669 add rbx,rdi2670 mov r13,r92671 add r14,rbx2672 ror r13,232673 mov rbx,r142674 mov r12,r102675 ror r14,52676 xor r13,r92677 xor r12,r112678 ror r13,42679 xor r14,rbx2680 and r12,r92681 xor r13,r92682 add rax,QWORD[56+rsp]2683 mov rdi,rbx2684 xor r12,r112685 ror r14,62686 xor rdi,rcx2687 add rax,r122688 ror r13,142689 and r15,rdi2690 xor r14,rbx2691 add rax,r132692 xor r15,rcx2693 ror r14,282694 add r8,rax2695 add rax,r152696 mov r13,r82697 add r14,rax2698 ror r13,232699 mov rax,r142700 mov r12,r92701 ror r14,52702 xor r13,r82703 xor r12,r102704 ror r13,42705 xor r14,rax2706 and r12,r82707 xor r13,r82708 add r11,QWORD[64+rsp]2709 mov r15,rax2710 xor r12,r102711 ror r14,62712 xor r15,rbx2713 add r11,r122714 ror r13,142715 and rdi,r152716 xor r14,rax2717 add r11,r132718 xor rdi,rbx2719 ror r14,282720 add rdx,r112721 add r11,rdi2722 mov r13,rdx2723 add r14,r112724 ror r13,232725 mov r11,r142726 mov r12,r82727 ror r14,52728 xor r13,rdx2729 xor r12,r92730 ror r13,42731 xor r14,r112732 and r12,rdx2733 xor r13,rdx2734 add r10,QWORD[72+rsp]2735 mov rdi,r112736 xor r12,r92737 ror r14,62738 xor rdi,rax2739 add r10,r122740 ror r13,142741 and r15,rdi2742 xor r14,r112743 add r10,r132744 xor r15,rax2745 ror r14,282746 add rcx,r102747 add r10,r152748 mov r13,rcx2749 add r14,r102750 ror r13,232751 mov r10,r142752 mov r12,rdx2753 ror r14,52754 xor r13,rcx2755 xor r12,r82756 ror r13,42757 xor r14,r102758 and r12,rcx2759 xor r13,rcx2760 add r9,QWORD[80+rsp]2761 mov r15,r102762 xor r12,r82763 ror r14,62764 xor r15,r112765 add r9,r122766 ror r13,142767 and rdi,r152768 xor r14,r102769 add r9,r132770 xor rdi,r112771 ror r14,282772 add rbx,r92773 add r9,rdi2774 mov r13,rbx2775 add r14,r92776 ror r13,232777 mov r9,r142778 mov r12,rcx2779 ror r14,52780 xor r13,rbx2781 xor r12,rdx2782 ror r13,42783 xor r14,r92784 and r12,rbx2785 xor r13,rbx2786 add r8,QWORD[88+rsp]2787 mov rdi,r92788 xor r12,rdx2789 ror r14,62790 xor rdi,r102791 add r8,r122792 ror r13,142793 and r15,rdi2794 xor r14,r92795 add r8,r132796 xor r15,r102797 ror r14,282798 add rax,r82799 add r8,r152800 mov r13,rax2801 add r14,r82802 ror r13,232803 mov r8,r142804 mov r12,rbx2805 ror r14,52806 xor r13,rax2807 xor r12,rcx2808 ror r13,42809 xor r14,r82810 and r12,rax2811 xor r13,rax2812 add rdx,QWORD[96+rsp]2813 mov r15,r82814 xor r12,rcx2815 ror r14,62816 xor r15,r92817 add rdx,r122818 ror r13,142819 and rdi,r152820 xor r14,r82821 add rdx,r132822 xor rdi,r92823 ror r14,282824 add r11,rdx2825 add rdx,rdi2826 mov r13,r112827 add r14,rdx2828 ror r13,232829 mov rdx,r142830 mov r12,rax2831 ror r14,52832 xor r13,r112833 xor r12,rbx2834 ror r13,42835 xor r14,rdx2836 and r12,r112837 xor r13,r112838 add rcx,QWORD[104+rsp]2839 mov rdi,rdx2840 xor r12,rbx2841 ror r14,62842 xor rdi,r82843 add rcx,r122844 ror r13,142845 and r15,rdi2846 xor r14,rdx2847 add rcx,r132848 xor r15,r82849 ror r14,282850 add r10,rcx2851 add rcx,r152852 mov r13,r102853 add r14,rcx2854 ror r13,232855 mov rcx,r142856 mov r12,r112857 ror r14,52858 xor r13,r102859 xor r12,rax2860 ror r13,42861 xor r14,rcx2862 and r12,r102863 xor r13,r102864 add rbx,QWORD[112+rsp]2865 mov r15,rcx2866 xor r12,rax2867 ror r14,62868 xor r15,rdx2869 add rbx,r122870 ror r13,142871 and rdi,r152872 xor r14,rcx2873 add rbx,r132874 xor rdi,rdx2875 ror r14,282876 add r9,rbx2877 add rbx,rdi2878 mov r13,r92879 add r14,rbx2880 ror r13,232881 mov rbx,r142882 mov r12,r102883 ror r14,52884 xor r13,r92885 xor r12,r112886 ror r13,42887 xor r14,rbx2888 and r12,r92889 xor r13,r92890 add rax,QWORD[120+rsp]2891 mov rdi,rbx2892 xor r12,r112893 ror r14,62894 xor rdi,rcx2895 add rax,r122896 ror r13,142897 and r15,rdi2898 xor r14,rbx2899 add rax,r132900 xor r15,rcx2901 ror r14,282902 add r8,rax2903 add rax,r152904 mov r13,r82905 add r14,rax2906 mov rdi,QWORD[((128+0))+rsp]2907 mov rax,r142908 2909 add rax,QWORD[rdi]2910 lea rsi,[128+rsi]2911 add rbx,QWORD[8+rdi]2912 add rcx,QWORD[16+rdi]2913 add rdx,QWORD[24+rdi]2914 add r8,QWORD[32+rdi]2915 add r9,QWORD[40+rdi]2916 add r10,QWORD[48+rdi]2917 add r11,QWORD[56+rdi]2918 2919 cmp rsi,QWORD[((128+16))+rsp]2920 2921 mov QWORD[rdi],rax2922 mov QWORD[8+rdi],rbx2923 mov QWORD[16+rdi],rcx2924 mov QWORD[24+rdi],rdx2925 mov QWORD[32+rdi],r82926 mov QWORD[40+rdi],r92927 mov QWORD[48+rdi],r102928 mov QWORD[56+rdi],r112929 jb NEAR $L$loop_xop2930 2931 mov rsi,QWORD[152+rsp]2932 2933 vzeroupper2934 movaps xmm6,XMMWORD[((128+32))+rsp]2935 movaps xmm7,XMMWORD[((128+48))+rsp]2936 movaps xmm8,XMMWORD[((128+64))+rsp]2937 movaps xmm9,XMMWORD[((128+80))+rsp]2938 movaps xmm10,XMMWORD[((128+96))+rsp]2939 movaps xmm11,XMMWORD[((128+112))+rsp]2940 mov r15,QWORD[((-48))+rsi]2941 2942 mov r14,QWORD[((-40))+rsi]2943 2944 mov r13,QWORD[((-32))+rsi]2945 2946 mov r12,QWORD[((-24))+rsi]2947 2948 mov rbp,QWORD[((-16))+rsi]2949 2950 mov rbx,QWORD[((-8))+rsi]2951 2952 lea rsp,[rsi]2953 2954 $L$epilogue_xop:2955 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2956 mov rsi,QWORD[16+rsp]2957 DB 0F3h,0C3h ;repret2958 2959 $L$SEH_end_sha512_block_data_order_xop:2960 2961 ALIGN 642962 sha512_block_data_order_avx:2963 mov QWORD[8+rsp],rdi ;WIN64 prologue2964 mov QWORD[16+rsp],rsi2965 mov rax,rsp2966 $L$SEH_begin_sha512_block_data_order_avx:2967 mov rdi,rcx2968 mov rsi,rdx2969 mov rdx,r82970 2971 2972 2973 $L$avx_shortcut:2974 mov rax,rsp2975 2976 push rbx2977 2978 push rbp2979 2980 push r122981 2982 push r132983 2984 push r142985 2986 push r152987 2988 shl rdx,42989 sub rsp,2562990 lea rdx,[rdx*8+rsi]2991 and rsp,-642992 mov QWORD[((128+0))+rsp],rdi2993 mov QWORD[((128+8))+rsp],rsi2994 mov QWORD[((128+16))+rsp],rdx2995 mov QWORD[152+rsp],rax2996 2997 movaps XMMWORD[(128+32)+rsp],xmm62998 movaps XMMWORD[(128+48)+rsp],xmm72999 movaps XMMWORD[(128+64)+rsp],xmm83000 movaps XMMWORD[(128+80)+rsp],xmm93001 movaps XMMWORD[(128+96)+rsp],xmm103002 movaps XMMWORD[(128+112)+rsp],xmm113003 $L$prologue_avx:3004 3005 vzeroupper3006 mov rax,QWORD[rdi]3007 mov rbx,QWORD[8+rdi]3008 mov rcx,QWORD[16+rdi]3009 mov rdx,QWORD[24+rdi]3010 mov r8,QWORD[32+rdi]3011 mov r9,QWORD[40+rdi]3012 mov r10,QWORD[48+rdi]3013 mov r11,QWORD[56+rdi]3014 jmp NEAR $L$loop_avx3015 ALIGN 163016 $L$loop_avx:3017 vmovdqa xmm11,XMMWORD[((K512+1280))]3018 vmovdqu xmm0,XMMWORD[rsi]3019 lea rbp,[((K512+128))]3020 vmovdqu xmm1,XMMWORD[16+rsi]3021 vmovdqu xmm2,XMMWORD[32+rsi]3022 vpshufb xmm0,xmm0,xmm113023 vmovdqu xmm3,XMMWORD[48+rsi]3024 vpshufb xmm1,xmm1,xmm113025 vmovdqu xmm4,XMMWORD[64+rsi]3026 vpshufb xmm2,xmm2,xmm113027 vmovdqu xmm5,XMMWORD[80+rsi]3028 vpshufb xmm3,xmm3,xmm113029 vmovdqu xmm6,XMMWORD[96+rsi]3030 vpshufb xmm4,xmm4,xmm113031 vmovdqu xmm7,XMMWORD[112+rsi]3032 vpshufb xmm5,xmm5,xmm113033 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp]3034 vpshufb xmm6,xmm6,xmm113035 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp]3036 vpshufb xmm7,xmm7,xmm113037 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]3038 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp]3039 vmovdqa XMMWORD[rsp],xmm83040 vpaddq xmm8,xmm4,XMMWORD[rbp]3041 vmovdqa XMMWORD[16+rsp],xmm93042 vpaddq xmm9,xmm5,XMMWORD[32+rbp]3043 vmovdqa XMMWORD[32+rsp],xmm103044 vpaddq xmm10,xmm6,XMMWORD[64+rbp]3045 vmovdqa XMMWORD[48+rsp],xmm113046 vpaddq xmm11,xmm7,XMMWORD[96+rbp]3047 vmovdqa XMMWORD[64+rsp],xmm83048 mov r14,rax3049 vmovdqa XMMWORD[80+rsp],xmm93050 mov rdi,rbx3051 vmovdqa XMMWORD[96+rsp],xmm103052 xor rdi,rcx3053 vmovdqa XMMWORD[112+rsp],xmm113054 mov r13,r83055 jmp NEAR $L$avx_00_473056 3057 ALIGN 163058 $L$avx_00_47:3059 add rbp,2563060 vpalignr xmm8,xmm1,xmm0,83061 shrd r13,r13,233062 mov rax,r143063 vpalignr xmm11,xmm5,xmm4,83064 mov r12,r93065 shrd r14,r14,53066 vpsrlq xmm10,xmm8,13067 xor r13,r83068 xor r12,r103069 vpaddq xmm0,xmm0,xmm113070 shrd r13,r13,43071 xor r14,rax3072 vpsrlq xmm11,xmm8,73073 and r12,r83074 xor r13,r83075 vpsllq xmm9,xmm8,563076 add r11,QWORD[rsp]3077 mov r15,rax3078 vpxor xmm8,xmm11,xmm103079 xor r12,r103080 shrd r14,r14,63081 vpsrlq xmm10,xmm10,73082 xor r15,rbx3083 add r11,r123084 vpxor xmm8,xmm8,xmm93085 shrd r13,r13,143086 and rdi,r153087 vpsllq xmm9,xmm9,73088 xor r14,rax3089 add r11,r133090 vpxor xmm8,xmm8,xmm103091 xor rdi,rbx3092 shrd r14,r14,283093 vpsrlq xmm11,xmm7,63094 add rdx,r113095 add r11,rdi3096 vpxor xmm8,xmm8,xmm93097 mov r13,rdx3098 add r14,r113099 vpsllq xmm10,xmm7,33100 shrd r13,r13,233101 mov r11,r143102 vpaddq xmm0,xmm0,xmm83103 mov r12,r83104 shrd r14,r14,53105 vpsrlq xmm9,xmm7,193106 xor r13,rdx3107 xor r12,r93108 vpxor xmm11,xmm11,xmm103109 shrd r13,r13,43110 xor r14,r113111 vpsllq xmm10,xmm10,423112 and r12,rdx3113 xor r13,rdx3114 vpxor xmm11,xmm11,xmm93115 add r10,QWORD[8+rsp]3116 mov rdi,r113117 vpsrlq xmm9,xmm9,423118 xor r12,r93119 shrd r14,r14,63120 vpxor xmm11,xmm11,xmm103121 xor rdi,rax3122 add r10,r123123 vpxor xmm11,xmm11,xmm93124 shrd r13,r13,143125 and r15,rdi3126 vpaddq xmm0,xmm0,xmm113127 xor r14,r113128 add r10,r133129 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp]3130 xor r15,rax3131 shrd r14,r14,283132 add rcx,r103133 add r10,r153134 mov r13,rcx3135 add r14,r103136 vmovdqa XMMWORD[rsp],xmm103137 vpalignr xmm8,xmm2,xmm1,83138 shrd r13,r13,233139 mov r10,r143140 vpalignr xmm11,xmm6,xmm5,83141 mov r12,rdx3142 shrd r14,r14,53143 vpsrlq xmm10,xmm8,13144 xor r13,rcx3145 xor r12,r83146 vpaddq xmm1,xmm1,xmm113147 shrd r13,r13,43148 xor r14,r103149 vpsrlq xmm11,xmm8,73150 and r12,rcx3151 xor r13,rcx3152 vpsllq xmm9,xmm8,563153 add r9,QWORD[16+rsp]3154 mov r15,r103155 vpxor xmm8,xmm11,xmm103156 xor r12,r83157 shrd r14,r14,63158 vpsrlq xmm10,xmm10,73159 xor r15,r113160 add r9,r123161 vpxor xmm8,xmm8,xmm93162 shrd r13,r13,143163 and rdi,r153164 vpsllq xmm9,xmm9,73165 xor r14,r103166 add r9,r133167 vpxor xmm8,xmm8,xmm103168 xor rdi,r113169 shrd r14,r14,283170 vpsrlq xmm11,xmm0,63171 add rbx,r93172 add r9,rdi3173 vpxor xmm8,xmm8,xmm93174 mov r13,rbx3175 add r14,r93176 vpsllq xmm10,xmm0,33177 shrd r13,r13,233178 mov r9,r143179 vpaddq xmm1,xmm1,xmm83180 mov r12,rcx3181 shrd r14,r14,53182 vpsrlq xmm9,xmm0,193183 xor r13,rbx3184 xor r12,rdx3185 vpxor xmm11,xmm11,xmm103186 shrd r13,r13,43187 xor r14,r93188 vpsllq xmm10,xmm10,423189 and r12,rbx3190 xor r13,rbx3191 vpxor xmm11,xmm11,xmm93192 add r8,QWORD[24+rsp]3193 mov rdi,r93194 vpsrlq xmm9,xmm9,423195 xor r12,rdx3196 shrd r14,r14,63197 vpxor xmm11,xmm11,xmm103198 xor rdi,r103199 add r8,r123200 vpxor xmm11,xmm11,xmm93201 shrd r13,r13,143202 and r15,rdi3203 vpaddq xmm1,xmm1,xmm113204 xor r14,r93205 add r8,r133206 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp]3207 xor r15,r103208 shrd r14,r14,283209 add rax,r83210 add r8,r153211 mov r13,rax3212 add r14,r83213 vmovdqa XMMWORD[16+rsp],xmm103214 vpalignr xmm8,xmm3,xmm2,83215 shrd r13,r13,233216 mov r8,r143217 vpalignr xmm11,xmm7,xmm6,83218 mov r12,rbx3219 shrd r14,r14,53220 vpsrlq xmm10,xmm8,13221 xor r13,rax3222 xor r12,rcx3223 vpaddq xmm2,xmm2,xmm113224 shrd r13,r13,43225 xor r14,r83226 vpsrlq xmm11,xmm8,73227 and r12,rax3228 xor r13,rax3229 vpsllq xmm9,xmm8,563230 add rdx,QWORD[32+rsp]3231 mov r15,r83232 vpxor xmm8,xmm11,xmm103233 xor r12,rcx3234 shrd r14,r14,63235 vpsrlq xmm10,xmm10,73236 xor r15,r93237 add rdx,r123238 vpxor xmm8,xmm8,xmm93239 shrd r13,r13,143240 and rdi,r153241 vpsllq xmm9,xmm9,73242 xor r14,r83243 add rdx,r133244 vpxor xmm8,xmm8,xmm103245 xor rdi,r93246 shrd r14,r14,283247 vpsrlq xmm11,xmm1,63248 add r11,rdx3249 add rdx,rdi3250 vpxor xmm8,xmm8,xmm93251 mov r13,r113252 add r14,rdx3253 vpsllq xmm10,xmm1,33254 shrd r13,r13,233255 mov rdx,r143256 vpaddq xmm2,xmm2,xmm83257 mov r12,rax3258 shrd r14,r14,53259 vpsrlq xmm9,xmm1,193260 xor r13,r113261 xor r12,rbx3262 vpxor xmm11,xmm11,xmm103263 shrd r13,r13,43264 xor r14,rdx3265 vpsllq xmm10,xmm10,423266 and r12,r113267 xor r13,r113268 vpxor xmm11,xmm11,xmm93269 add rcx,QWORD[40+rsp]3270 mov rdi,rdx3271 vpsrlq xmm9,xmm9,423272 xor r12,rbx3273 shrd r14,r14,63274 vpxor xmm11,xmm11,xmm103275 xor rdi,r83276 add rcx,r123277 vpxor xmm11,xmm11,xmm93278 shrd r13,r13,143279 and r15,rdi3280 vpaddq xmm2,xmm2,xmm113281 xor r14,rdx3282 add rcx,r133283 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp]3284 xor r15,r83285 shrd r14,r14,283286 add r10,rcx3287 add rcx,r153288 mov r13,r103289 add r14,rcx3290 vmovdqa XMMWORD[32+rsp],xmm103291 vpalignr xmm8,xmm4,xmm3,83292 shrd r13,r13,233293 mov rcx,r143294 vpalignr xmm11,xmm0,xmm7,83295 mov r12,r113296 shrd r14,r14,53297 vpsrlq xmm10,xmm8,13298 xor r13,r103299 xor r12,rax3300 vpaddq xmm3,xmm3,xmm113301 shrd r13,r13,43302 xor r14,rcx3303 vpsrlq xmm11,xmm8,73304 and r12,r103305 xor r13,r103306 vpsllq xmm9,xmm8,563307 add rbx,QWORD[48+rsp]3308 mov r15,rcx3309 vpxor xmm8,xmm11,xmm103310 xor r12,rax3311 shrd r14,r14,63312 vpsrlq xmm10,xmm10,73313 xor r15,rdx3314 add rbx,r123315 vpxor xmm8,xmm8,xmm93316 shrd r13,r13,143317 and rdi,r153318 vpsllq xmm9,xmm9,73319 xor r14,rcx3320 add rbx,r133321 vpxor xmm8,xmm8,xmm103322 xor rdi,rdx3323 shrd r14,r14,283324 vpsrlq xmm11,xmm2,63325 add r9,rbx3326 add rbx,rdi3327 vpxor xmm8,xmm8,xmm93328 mov r13,r93329 add r14,rbx3330 vpsllq xmm10,xmm2,33331 shrd r13,r13,233332 mov rbx,r143333 vpaddq xmm3,xmm3,xmm83334 mov r12,r103335 shrd r14,r14,53336 vpsrlq xmm9,xmm2,193337 xor r13,r93338 xor r12,r113339 vpxor xmm11,xmm11,xmm103340 shrd r13,r13,43341 xor r14,rbx3342 vpsllq xmm10,xmm10,423343 and r12,r93344 xor r13,r93345 vpxor xmm11,xmm11,xmm93346 add rax,QWORD[56+rsp]3347 mov rdi,rbx3348 vpsrlq xmm9,xmm9,423349 xor r12,r113350 shrd r14,r14,63351 vpxor xmm11,xmm11,xmm103352 xor rdi,rcx3353 add rax,r123354 vpxor xmm11,xmm11,xmm93355 shrd r13,r13,143356 and r15,rdi3357 vpaddq xmm3,xmm3,xmm113358 xor r14,rbx3359 add rax,r133360 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp]3361 xor r15,rcx3362 shrd r14,r14,283363 add r8,rax3364 add rax,r153365 mov r13,r83366 add r14,rax3367 vmovdqa XMMWORD[48+rsp],xmm103368 vpalignr xmm8,xmm5,xmm4,83369 shrd r13,r13,233370 mov rax,r143371 vpalignr xmm11,xmm1,xmm0,83372 mov r12,r93373 shrd r14,r14,53374 vpsrlq xmm10,xmm8,13375 xor r13,r83376 xor r12,r103377 vpaddq xmm4,xmm4,xmm113378 shrd r13,r13,43379 xor r14,rax3380 vpsrlq xmm11,xmm8,73381 and r12,r83382 xor r13,r83383 vpsllq xmm9,xmm8,563384 add r11,QWORD[64+rsp]3385 mov r15,rax3386 vpxor xmm8,xmm11,xmm103387 xor r12,r103388 shrd r14,r14,63389 vpsrlq xmm10,xmm10,73390 xor r15,rbx3391 add r11,r123392 vpxor xmm8,xmm8,xmm93393 shrd r13,r13,143394 and rdi,r153395 vpsllq xmm9,xmm9,73396 xor r14,rax3397 add r11,r133398 vpxor xmm8,xmm8,xmm103399 xor rdi,rbx3400 shrd r14,r14,283401 vpsrlq xmm11,xmm3,63402 add rdx,r113403 add r11,rdi3404 vpxor xmm8,xmm8,xmm93405 mov r13,rdx3406 add r14,r113407 vpsllq xmm10,xmm3,33408 shrd r13,r13,233409 mov r11,r143410 vpaddq xmm4,xmm4,xmm83411 mov r12,r83412 shrd r14,r14,53413 vpsrlq xmm9,xmm3,193414 xor r13,rdx3415 xor r12,r93416 vpxor xmm11,xmm11,xmm103417 shrd r13,r13,43418 xor r14,r113419 vpsllq xmm10,xmm10,423420 and r12,rdx3421 xor r13,rdx3422 vpxor xmm11,xmm11,xmm93423 add r10,QWORD[72+rsp]3424 mov rdi,r113425 vpsrlq xmm9,xmm9,423426 xor r12,r93427 shrd r14,r14,63428 vpxor xmm11,xmm11,xmm103429 xor rdi,rax3430 add r10,r123431 vpxor xmm11,xmm11,xmm93432 shrd r13,r13,143433 and r15,rdi3434 vpaddq xmm4,xmm4,xmm113435 xor r14,r113436 add r10,r133437 vpaddq xmm10,xmm4,XMMWORD[rbp]3438 xor r15,rax3439 shrd r14,r14,283440 add rcx,r103441 add r10,r153442 mov r13,rcx3443 add r14,r103444 vmovdqa XMMWORD[64+rsp],xmm103445 vpalignr xmm8,xmm6,xmm5,83446 shrd r13,r13,233447 mov r10,r143448 vpalignr xmm11,xmm2,xmm1,83449 mov r12,rdx3450 shrd r14,r14,53451 vpsrlq xmm10,xmm8,13452 xor r13,rcx3453 xor r12,r83454 vpaddq xmm5,xmm5,xmm113455 shrd r13,r13,43456 xor r14,r103457 vpsrlq xmm11,xmm8,73458 and r12,rcx3459 xor r13,rcx3460 vpsllq xmm9,xmm8,563461 add r9,QWORD[80+rsp]3462 mov r15,r103463 vpxor xmm8,xmm11,xmm103464 xor r12,r83465 shrd r14,r14,63466 vpsrlq xmm10,xmm10,73467 xor r15,r113468 add r9,r123469 vpxor xmm8,xmm8,xmm93470 shrd r13,r13,143471 and rdi,r153472 vpsllq xmm9,xmm9,73473 xor r14,r103474 add r9,r133475 vpxor xmm8,xmm8,xmm103476 xor rdi,r113477 shrd r14,r14,283478 vpsrlq xmm11,xmm4,63479 add rbx,r93480 add r9,rdi3481 vpxor xmm8,xmm8,xmm93482 mov r13,rbx3483 add r14,r93484 vpsllq xmm10,xmm4,33485 shrd r13,r13,233486 mov r9,r143487 vpaddq xmm5,xmm5,xmm83488 mov r12,rcx3489 shrd r14,r14,53490 vpsrlq xmm9,xmm4,193491 xor r13,rbx3492 xor r12,rdx3493 vpxor xmm11,xmm11,xmm103494 shrd r13,r13,43495 xor r14,r93496 vpsllq xmm10,xmm10,423497 and r12,rbx3498 xor r13,rbx3499 vpxor xmm11,xmm11,xmm93500 add r8,QWORD[88+rsp]3501 mov rdi,r93502 vpsrlq xmm9,xmm9,423503 xor r12,rdx3504 shrd r14,r14,63505 vpxor xmm11,xmm11,xmm103506 xor rdi,r103507 add r8,r123508 vpxor xmm11,xmm11,xmm93509 shrd r13,r13,143510 and r15,rdi3511 vpaddq xmm5,xmm5,xmm113512 xor r14,r93513 add r8,r133514 vpaddq xmm10,xmm5,XMMWORD[32+rbp]3515 xor r15,r103516 shrd r14,r14,283517 add rax,r83518 add r8,r153519 mov r13,rax3520 add r14,r83521 vmovdqa XMMWORD[80+rsp],xmm103522 vpalignr xmm8,xmm7,xmm6,83523 shrd r13,r13,233524 mov r8,r143525 vpalignr xmm11,xmm3,xmm2,83526 mov r12,rbx3527 shrd r14,r14,53528 vpsrlq xmm10,xmm8,13529 xor r13,rax3530 xor r12,rcx3531 vpaddq xmm6,xmm6,xmm113532 shrd r13,r13,43533 xor r14,r83534 vpsrlq xmm11,xmm8,73535 and r12,rax3536 xor r13,rax3537 vpsllq xmm9,xmm8,563538 add rdx,QWORD[96+rsp]3539 mov r15,r83540 vpxor xmm8,xmm11,xmm103541 xor r12,rcx3542 shrd r14,r14,63543 vpsrlq xmm10,xmm10,73544 xor r15,r93545 add rdx,r123546 vpxor xmm8,xmm8,xmm93547 shrd r13,r13,143548 and rdi,r153549 vpsllq xmm9,xmm9,73550 xor r14,r83551 add rdx,r133552 vpxor xmm8,xmm8,xmm103553 xor rdi,r93554 shrd r14,r14,283555 vpsrlq xmm11,xmm5,63556 add r11,rdx3557 add rdx,rdi3558 vpxor xmm8,xmm8,xmm93559 mov r13,r113560 add r14,rdx3561 vpsllq xmm10,xmm5,33562 shrd r13,r13,233563 mov rdx,r143564 vpaddq xmm6,xmm6,xmm83565 mov r12,rax3566 shrd r14,r14,53567 vpsrlq xmm9,xmm5,193568 xor r13,r113569 xor r12,rbx3570 vpxor xmm11,xmm11,xmm103571 shrd r13,r13,43572 xor r14,rdx3573 vpsllq xmm10,xmm10,423574 and r12,r113575 xor r13,r113576 vpxor xmm11,xmm11,xmm93577 add rcx,QWORD[104+rsp]3578 mov rdi,rdx3579 vpsrlq xmm9,xmm9,423580 xor r12,rbx3581 shrd r14,r14,63582 vpxor xmm11,xmm11,xmm103583 xor rdi,r83584 add rcx,r123585 vpxor xmm11,xmm11,xmm93586 shrd r13,r13,143587 and r15,rdi3588 vpaddq xmm6,xmm6,xmm113589 xor r14,rdx3590 add rcx,r133591 vpaddq xmm10,xmm6,XMMWORD[64+rbp]3592 xor r15,r83593 shrd r14,r14,283594 add r10,rcx3595 add rcx,r153596 mov r13,r103597 add r14,rcx3598 vmovdqa XMMWORD[96+rsp],xmm103599 vpalignr xmm8,xmm0,xmm7,83600 shrd r13,r13,233601 mov rcx,r143602 vpalignr xmm11,xmm4,xmm3,83603 mov r12,r113604 shrd r14,r14,53605 vpsrlq xmm10,xmm8,13606 xor r13,r103607 xor r12,rax3608 vpaddq xmm7,xmm7,xmm113609 shrd r13,r13,43610 xor r14,rcx3611 vpsrlq xmm11,xmm8,73612 and r12,r103613 xor r13,r103614 vpsllq xmm9,xmm8,563615 add rbx,QWORD[112+rsp]3616 mov r15,rcx3617 vpxor xmm8,xmm11,xmm103618 xor r12,rax3619 shrd r14,r14,63620 vpsrlq xmm10,xmm10,73621 xor r15,rdx3622 add rbx,r123623 vpxor xmm8,xmm8,xmm93624 shrd r13,r13,143625 and rdi,r153626 vpsllq xmm9,xmm9,73627 xor r14,rcx3628 add rbx,r133629 vpxor xmm8,xmm8,xmm103630 xor rdi,rdx3631 shrd r14,r14,283632 vpsrlq xmm11,xmm6,63633 add r9,rbx3634 add rbx,rdi3635 vpxor xmm8,xmm8,xmm93636 mov r13,r93637 add r14,rbx3638 vpsllq xmm10,xmm6,33639 shrd r13,r13,233640 mov rbx,r143641 vpaddq xmm7,xmm7,xmm83642 mov r12,r103643 shrd r14,r14,53644 vpsrlq xmm9,xmm6,193645 xor r13,r93646 xor r12,r113647 vpxor xmm11,xmm11,xmm103648 shrd r13,r13,43649 xor r14,rbx3650 vpsllq xmm10,xmm10,423651 and r12,r93652 xor r13,r93653 vpxor xmm11,xmm11,xmm93654 add rax,QWORD[120+rsp]3655 mov rdi,rbx3656 vpsrlq xmm9,xmm9,423657 xor r12,r113658 shrd r14,r14,63659 vpxor xmm11,xmm11,xmm103660 xor rdi,rcx3661 add rax,r123662 vpxor xmm11,xmm11,xmm93663 shrd r13,r13,143664 and r15,rdi3665 vpaddq xmm7,xmm7,xmm113666 xor r14,rbx3667 add rax,r133668 vpaddq xmm10,xmm7,XMMWORD[96+rbp]3669 xor r15,rcx3670 shrd r14,r14,283671 add r8,rax3672 add rax,r153673 mov r13,r83674 add r14,rax3675 vmovdqa XMMWORD[112+rsp],xmm103676 cmp BYTE[135+rbp],03677 jne NEAR $L$avx_00_473678 shrd r13,r13,233679 mov rax,r143680 mov r12,r93681 shrd r14,r14,53682 xor r13,r83683 xor r12,r103684 shrd r13,r13,43685 xor r14,rax3686 and r12,r83687 xor r13,r83688 add r11,QWORD[rsp]3689 mov r15,rax3690 xor r12,r103691 shrd r14,r14,63692 xor r15,rbx3693 add r11,r123694 shrd r13,r13,143695 and rdi,r153696 xor r14,rax3697 add r11,r133698 xor rdi,rbx3699 shrd r14,r14,283700 add rdx,r113701 add r11,rdi3702 mov r13,rdx3703 add r14,r113704 shrd r13,r13,233705 mov r11,r143706 mov r12,r83707 shrd r14,r14,53708 xor r13,rdx3709 xor r12,r93710 shrd r13,r13,43711 xor r14,r113712 and r12,rdx3713 xor r13,rdx3714 add r10,QWORD[8+rsp]3715 mov rdi,r113716 xor r12,r93717 shrd r14,r14,63718 xor rdi,rax3719 add r10,r123720 shrd r13,r13,143721 and r15,rdi3722 xor r14,r113723 add r10,r133724 xor r15,rax3725 shrd r14,r14,283726 add rcx,r103727 add r10,r153728 mov r13,rcx3729 add r14,r103730 shrd r13,r13,233731 mov r10,r143732 mov r12,rdx3733 shrd r14,r14,53734 xor r13,rcx3735 xor r12,r83736 shrd r13,r13,43737 xor r14,r103738 and r12,rcx3739 xor r13,rcx3740 add r9,QWORD[16+rsp]3741 mov r15,r103742 xor r12,r83743 shrd r14,r14,63744 xor r15,r113745 add r9,r123746 shrd r13,r13,143747 and rdi,r153748 xor r14,r103749 add r9,r133750 xor rdi,r113751 shrd r14,r14,283752 add rbx,r93753 add r9,rdi3754 mov r13,rbx3755 add r14,r93756 shrd r13,r13,233757 mov r9,r143758 mov r12,rcx3759 shrd r14,r14,53760 xor r13,rbx3761 xor r12,rdx3762 shrd r13,r13,43763 xor r14,r93764 and r12,rbx3765 xor r13,rbx3766 add r8,QWORD[24+rsp]3767 mov rdi,r93768 xor r12,rdx3769 shrd r14,r14,63770 xor rdi,r103771 add r8,r123772 shrd r13,r13,143773 and r15,rdi3774 xor r14,r93775 add r8,r133776 xor r15,r103777 shrd r14,r14,283778 add rax,r83779 add r8,r153780 mov r13,rax3781 add r14,r83782 shrd r13,r13,233783 mov r8,r143784 mov r12,rbx3785 shrd r14,r14,53786 xor r13,rax3787 xor r12,rcx3788 shrd r13,r13,43789 xor r14,r83790 and r12,rax3791 xor r13,rax3792 add rdx,QWORD[32+rsp]3793 mov r15,r83794 xor r12,rcx3795 shrd r14,r14,63796 xor r15,r93797 add rdx,r123798 shrd r13,r13,143799 and rdi,r153800 xor r14,r83801 add rdx,r133802 xor rdi,r93803 shrd r14,r14,283804 add r11,rdx3805 add rdx,rdi3806 mov r13,r113807 add r14,rdx3808 shrd r13,r13,233809 mov rdx,r143810 mov r12,rax3811 shrd r14,r14,53812 xor r13,r113813 xor r12,rbx3814 shrd r13,r13,43815 xor r14,rdx3816 and r12,r113817 xor r13,r113818 add rcx,QWORD[40+rsp]3819 mov rdi,rdx3820 xor r12,rbx3821 shrd r14,r14,63822 xor rdi,r83823 add rcx,r123824 shrd r13,r13,143825 and r15,rdi3826 xor r14,rdx3827 add rcx,r133828 xor r15,r83829 shrd r14,r14,283830 add r10,rcx3831 add rcx,r153832 mov r13,r103833 add r14,rcx3834 shrd r13,r13,233835 mov rcx,r143836 mov r12,r113837 shrd r14,r14,53838 xor r13,r103839 xor r12,rax3840 shrd r13,r13,43841 xor r14,rcx3842 and r12,r103843 xor r13,r103844 add rbx,QWORD[48+rsp]3845 mov r15,rcx3846 xor r12,rax3847 shrd r14,r14,63848 xor r15,rdx3849 add rbx,r123850 shrd r13,r13,143851 and rdi,r153852 xor r14,rcx3853 add rbx,r133854 xor rdi,rdx3855 shrd r14,r14,283856 add r9,rbx3857 add rbx,rdi3858 mov r13,r93859 add r14,rbx3860 shrd r13,r13,233861 mov rbx,r143862 mov r12,r103863 shrd r14,r14,53864 xor r13,r93865 xor r12,r113866 shrd r13,r13,43867 xor r14,rbx3868 and r12,r93869 xor r13,r93870 add rax,QWORD[56+rsp]3871 mov rdi,rbx3872 xor r12,r113873 shrd r14,r14,63874 xor rdi,rcx3875 add rax,r123876 shrd r13,r13,143877 and r15,rdi3878 xor r14,rbx3879 add rax,r133880 xor r15,rcx3881 shrd r14,r14,283882 add r8,rax3883 add rax,r153884 mov r13,r83885 add r14,rax3886 shrd r13,r13,233887 mov rax,r143888 mov r12,r93889 shrd r14,r14,53890 xor r13,r83891 xor r12,r103892 shrd r13,r13,43893 xor r14,rax3894 and r12,r83895 xor r13,r83896 add r11,QWORD[64+rsp]3897 mov r15,rax3898 xor r12,r103899 shrd r14,r14,63900 xor r15,rbx3901 add r11,r123902 shrd r13,r13,143903 and rdi,r153904 xor r14,rax3905 add r11,r133906 xor rdi,rbx3907 shrd r14,r14,283908 add rdx,r113909 add r11,rdi3910 mov r13,rdx3911 add r14,r113912 shrd r13,r13,233913 mov r11,r143914 mov r12,r83915 shrd r14,r14,53916 xor r13,rdx3917 xor r12,r93918 shrd r13,r13,43919 xor r14,r113920 and r12,rdx3921 xor r13,rdx3922 add r10,QWORD[72+rsp]3923 mov rdi,r113924 xor r12,r93925 shrd r14,r14,63926 xor rdi,rax3927 add r10,r123928 shrd r13,r13,143929 and r15,rdi3930 xor r14,r113931 add r10,r133932 xor r15,rax3933 shrd r14,r14,283934 add rcx,r103935 add r10,r153936 mov r13,rcx3937 add r14,r103938 shrd r13,r13,233939 mov r10,r143940 mov r12,rdx3941 shrd r14,r14,53942 xor r13,rcx3943 xor r12,r83944 shrd r13,r13,43945 xor r14,r103946 and r12,rcx3947 xor r13,rcx3948 add r9,QWORD[80+rsp]3949 mov r15,r103950 xor r12,r83951 shrd r14,r14,63952 xor r15,r113953 add r9,r123954 shrd r13,r13,143955 and rdi,r153956 xor r14,r103957 add r9,r133958 xor rdi,r113959 shrd r14,r14,283960 add rbx,r93961 add r9,rdi3962 mov r13,rbx3963 add r14,r93964 shrd r13,r13,233965 mov r9,r143966 mov r12,rcx3967 shrd r14,r14,53968 xor r13,rbx3969 xor r12,rdx3970 shrd r13,r13,43971 xor r14,r93972 and r12,rbx3973 xor r13,rbx3974 add r8,QWORD[88+rsp]3975 mov rdi,r93976 xor r12,rdx3977 shrd r14,r14,63978 xor rdi,r103979 add r8,r123980 shrd r13,r13,143981 and r15,rdi3982 xor r14,r93983 add r8,r133984 xor r15,r103985 shrd r14,r14,283986 add rax,r83987 add r8,r153988 mov r13,rax3989 add r14,r83990 shrd r13,r13,233991 mov r8,r143992 mov r12,rbx3993 shrd r14,r14,53994 xor r13,rax3995 xor r12,rcx3996 shrd r13,r13,43997 xor r14,r83998 and r12,rax3999 xor r13,rax4000 add rdx,QWORD[96+rsp]4001 mov r15,r84002 xor r12,rcx4003 shrd r14,r14,64004 xor r15,r94005 add rdx,r124006 shrd r13,r13,144007 and rdi,r154008 xor r14,r84009 add rdx,r134010 xor rdi,r94011 shrd r14,r14,284012 add r11,rdx4013 add rdx,rdi4014 mov r13,r114015 add r14,rdx4016 shrd r13,r13,234017 mov rdx,r144018 mov r12,rax4019 shrd r14,r14,54020 xor r13,r114021 xor r12,rbx4022 shrd r13,r13,44023 xor r14,rdx4024 and r12,r114025 xor r13,r114026 add rcx,QWORD[104+rsp]4027 mov rdi,rdx4028 xor r12,rbx4029 shrd r14,r14,64030 xor rdi,r84031 add rcx,r124032 shrd r13,r13,144033 and r15,rdi4034 xor r14,rdx4035 add rcx,r134036 xor r15,r84037 shrd r14,r14,284038 add r10,rcx4039 add rcx,r154040 mov r13,r104041 add r14,rcx4042 shrd r13,r13,234043 mov rcx,r144044 mov r12,r114045 shrd r14,r14,54046 xor r13,r104047 xor r12,rax4048 shrd r13,r13,44049 xor r14,rcx4050 and r12,r104051 xor r13,r104052 add rbx,QWORD[112+rsp]4053 mov r15,rcx4054 xor r12,rax4055 shrd r14,r14,64056 xor r15,rdx4057 add rbx,r124058 shrd r13,r13,144059 and rdi,r154060 xor r14,rcx4061 add rbx,r134062 xor rdi,rdx4063 shrd r14,r14,284064 add r9,rbx4065 add rbx,rdi4066 mov r13,r94067 add r14,rbx4068 shrd r13,r13,234069 mov rbx,r144070 mov r12,r104071 shrd r14,r14,54072 xor r13,r94073 xor r12,r114074 shrd r13,r13,44075 xor r14,rbx4076 and r12,r94077 xor r13,r94078 add rax,QWORD[120+rsp]4079 mov rdi,rbx4080 xor r12,r114081 shrd r14,r14,64082 xor rdi,rcx4083 add rax,r124084 shrd r13,r13,144085 and r15,rdi4086 xor r14,rbx4087 add rax,r134088 xor r15,rcx4089 shrd r14,r14,284090 add r8,rax4091 add rax,r154092 mov r13,r84093 add r14,rax4094 mov rdi,QWORD[((128+0))+rsp]4095 mov rax,r144096 4097 add rax,QWORD[rdi]4098 lea rsi,[128+rsi]4099 add rbx,QWORD[8+rdi]4100 add rcx,QWORD[16+rdi]4101 add rdx,QWORD[24+rdi]4102 add r8,QWORD[32+rdi]4103 add r9,QWORD[40+rdi]4104 add r10,QWORD[48+rdi]4105 add r11,QWORD[56+rdi]4106 4107 cmp rsi,QWORD[((128+16))+rsp]4108 4109 mov QWORD[rdi],rax4110 mov QWORD[8+rdi],rbx4111 mov QWORD[16+rdi],rcx4112 mov QWORD[24+rdi],rdx4113 mov QWORD[32+rdi],r84114 mov QWORD[40+rdi],r94115 mov QWORD[48+rdi],r104116 mov QWORD[56+rdi],r114117 jb NEAR $L$loop_avx4118 4119 mov rsi,QWORD[152+rsp]4120 4121 vzeroupper4122 movaps xmm6,XMMWORD[((128+32))+rsp]4123 movaps xmm7,XMMWORD[((128+48))+rsp]4124 movaps xmm8,XMMWORD[((128+64))+rsp]4125 movaps xmm9,XMMWORD[((128+80))+rsp]4126 movaps xmm10,XMMWORD[((128+96))+rsp]4127 movaps xmm11,XMMWORD[((128+112))+rsp]4128 mov r15,QWORD[((-48))+rsi]4129 4130 mov r14,QWORD[((-40))+rsi]4131 4132 mov r13,QWORD[((-32))+rsi]4133 4134 mov r12,QWORD[((-24))+rsi]4135 4136 mov rbp,QWORD[((-16))+rsi]4137 4138 mov rbx,QWORD[((-8))+rsi]4139 4140 lea rsp,[rsi]4141 4142 $L$epilogue_avx:4143 mov rdi,QWORD[8+rsp] ;WIN64 epilogue4144 mov rsi,QWORD[16+rsp]4145 DB 0F3h,0C3h ;repret4146 4147 $L$SEH_end_sha512_block_data_order_avx:4148 4149 ALIGN 644150 sha512_block_data_order_avx2:4151 mov QWORD[8+rsp],rdi ;WIN64 prologue4152 mov QWORD[16+rsp],rsi4153 mov rax,rsp4154 $L$SEH_begin_sha512_block_data_order_avx2:4155 mov rdi,rcx4156 mov rsi,rdx4157 mov rdx,r84158 4159 4160 4161 $L$avx2_shortcut:4162 mov rax,rsp4163 4164 push rbx4165 4166 push rbp4167 4168 push r124169 4170 push r134171 4172 push r144173 4174 push r154175 4176 sub rsp,14084177 shl rdx,44178 and rsp,-256*84179 lea rdx,[rdx*8+rsi]4180 add rsp,11524181 mov QWORD[((128+0))+rsp],rdi4182 mov QWORD[((128+8))+rsp],rsi4183 mov QWORD[((128+16))+rsp],rdx4184 mov QWORD[152+rsp],rax4185 4186 movaps XMMWORD[(128+32)+rsp],xmm64187 movaps XMMWORD[(128+48)+rsp],xmm74188 movaps XMMWORD[(128+64)+rsp],xmm84189 movaps XMMWORD[(128+80)+rsp],xmm94190 movaps XMMWORD[(128+96)+rsp],xmm104191 movaps XMMWORD[(128+112)+rsp],xmm114192 $L$prologue_avx2:4193 4194 vzeroupper4195 sub rsi,-16*84196 mov rax,QWORD[rdi]4197 mov r12,rsi4198 mov rbx,QWORD[8+rdi]4199 cmp rsi,rdx4200 mov rcx,QWORD[16+rdi]4201 cmove r12,rsp4202 mov rdx,QWORD[24+rdi]4203 mov r8,QWORD[32+rdi]4204 mov r9,QWORD[40+rdi]4205 mov r10,QWORD[48+rdi]4206 mov r11,QWORD[56+rdi]4207 jmp NEAR $L$oop_avx24208 ALIGN 164209 $L$oop_avx2:4210 vmovdqu xmm0,XMMWORD[((-128))+rsi]4211 vmovdqu xmm1,XMMWORD[((-128+16))+rsi]4212 vmovdqu xmm2,XMMWORD[((-128+32))+rsi]4213 lea rbp,[((K512+128))]4214 vmovdqu xmm3,XMMWORD[((-128+48))+rsi]4215 vmovdqu xmm4,XMMWORD[((-128+64))+rsi]4216 vmovdqu xmm5,XMMWORD[((-128+80))+rsi]4217 vmovdqu xmm6,XMMWORD[((-128+96))+rsi]4218 vmovdqu xmm7,XMMWORD[((-128+112))+rsi]4219 4220 vmovdqa ymm10,YMMWORD[1152+rbp]4221 vinserti128 ymm0,ymm0,XMMWORD[r12],14222 vinserti128 ymm1,ymm1,XMMWORD[16+r12],14223 vpshufb ymm0,ymm0,ymm104224 vinserti128 ymm2,ymm2,XMMWORD[32+r12],14225 vpshufb ymm1,ymm1,ymm104226 vinserti128 ymm3,ymm3,XMMWORD[48+r12],14227 vpshufb ymm2,ymm2,ymm104228 vinserti128 ymm4,ymm4,XMMWORD[64+r12],14229 vpshufb ymm3,ymm3,ymm104230 vinserti128 ymm5,ymm5,XMMWORD[80+r12],14231 vpshufb ymm4,ymm4,ymm104232 vinserti128 ymm6,ymm6,XMMWORD[96+r12],14233 vpshufb ymm5,ymm5,ymm104234 vinserti128 ymm7,ymm7,XMMWORD[112+r12],14235 4236 vpaddq ymm8,ymm0,YMMWORD[((-128))+rbp]4237 vpshufb ymm6,ymm6,ymm104238 vpaddq ymm9,ymm1,YMMWORD[((-96))+rbp]4239 vpshufb ymm7,ymm7,ymm104240 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp]4241 vpaddq ymm11,ymm3,YMMWORD[((-32))+rbp]4242 vmovdqa YMMWORD[rsp],ymm84243 vpaddq ymm8,ymm4,YMMWORD[rbp]4244 vmovdqa YMMWORD[32+rsp],ymm94245 vpaddq ymm9,ymm5,YMMWORD[32+rbp]4246 vmovdqa YMMWORD[64+rsp],ymm104247 vpaddq ymm10,ymm6,YMMWORD[64+rbp]4248 vmovdqa YMMWORD[96+rsp],ymm114249 lea rsp,[((-128))+rsp]4250 vpaddq ymm11,ymm7,YMMWORD[96+rbp]4251 vmovdqa YMMWORD[rsp],ymm84252 xor r14,r144253 vmovdqa YMMWORD[32+rsp],ymm94254 mov rdi,rbx4255 vmovdqa YMMWORD[64+rsp],ymm104256 xor rdi,rcx4257 vmovdqa YMMWORD[96+rsp],ymm114258 mov r12,r94259 add rbp,16*2*84260 jmp NEAR $L$avx2_00_474261 4262 ALIGN 164263 $L$avx2_00_47:4264 lea rsp,[((-128))+rsp]4265 vpalignr ymm8,ymm1,ymm0,84266 add r11,QWORD[((0+256))+rsp]4267 and r12,r84268 rorx r13,r8,414269 vpalignr ymm11,ymm5,ymm4,84270 rorx r15,r8,184271 lea rax,[r14*1+rax]4272 lea r11,[r12*1+r11]4273 vpsrlq ymm10,ymm8,14274 andn r12,r8,r104275 xor r13,r154276 rorx r14,r8,144277 vpaddq ymm0,ymm0,ymm114278 vpsrlq ymm11,ymm8,74279 lea r11,[r12*1+r11]4280 xor r13,r144281 mov r15,rax4282 vpsllq ymm9,ymm8,564283 vpxor ymm8,ymm11,ymm104284 rorx r12,rax,394285 lea r11,[r13*1+r11]4286 xor r15,rbx4287 vpsrlq ymm10,ymm10,74288 vpxor ymm8,ymm8,ymm94289 rorx r14,rax,344290 rorx r13,rax,284291 lea rdx,[r11*1+rdx]4292 vpsllq ymm9,ymm9,74293 vpxor ymm8,ymm8,ymm104294 and rdi,r154295 xor r14,r124296 xor rdi,rbx4297 vpsrlq ymm11,ymm7,64298 vpxor ymm8,ymm8,ymm94299 xor r14,r134300 lea r11,[rdi*1+r11]4301 mov r12,r84302 vpsllq ymm10,ymm7,34303 vpaddq ymm0,ymm0,ymm84304 add r10,QWORD[((8+256))+rsp]4305 and r12,rdx4306 rorx r13,rdx,414307 vpsrlq ymm9,ymm7,194308 vpxor ymm11,ymm11,ymm104309 rorx rdi,rdx,184310 lea r11,[r14*1+r11]4311 lea r10,[r12*1+r10]4312 vpsllq ymm10,ymm10,424313 vpxor ymm11,ymm11,ymm94314 andn r12,rdx,r94315 xor r13,rdi4316 rorx r14,rdx,144317 vpsrlq ymm9,ymm9,424318 vpxor ymm11,ymm11,ymm104319 lea r10,[r12*1+r10]4320 xor r13,r144321 mov rdi,r114322 vpxor ymm11,ymm11,ymm94323 rorx r12,r11,394324 lea r10,[r13*1+r10]4325 xor rdi,rax4326 vpaddq ymm0,ymm0,ymm114327 rorx r14,r11,344328 rorx r13,r11,284329 lea rcx,[r10*1+rcx]4330 vpaddq ymm10,ymm0,YMMWORD[((-128))+rbp]4331 and r15,rdi4332 xor r14,r124333 xor r15,rax4334 xor r14,r134335 lea r10,[r15*1+r10]4336 mov r12,rdx4337 vmovdqa YMMWORD[rsp],ymm104338 vpalignr ymm8,ymm2,ymm1,84339 add r9,QWORD[((32+256))+rsp]4340 and r12,rcx4341 rorx r13,rcx,414342 vpalignr ymm11,ymm6,ymm5,84343 rorx r15,rcx,184344 lea r10,[r14*1+r10]4345 lea r9,[r12*1+r9]4346 vpsrlq ymm10,ymm8,14347 andn r12,rcx,r84348 xor r13,r154349 rorx r14,rcx,144350 vpaddq ymm1,ymm1,ymm114351 vpsrlq ymm11,ymm8,74352 lea r9,[r12*1+r9]4353 xor r13,r144354 mov r15,r104355 vpsllq ymm9,ymm8,564356 vpxor ymm8,ymm11,ymm104357 rorx r12,r10,394358 lea r9,[r13*1+r9]4359 xor r15,r114360 vpsrlq ymm10,ymm10,74361 vpxor ymm8,ymm8,ymm94362 rorx r14,r10,344363 rorx r13,r10,284364 lea rbx,[r9*1+rbx]4365 vpsllq ymm9,ymm9,74366 vpxor ymm8,ymm8,ymm104367 and rdi,r154368 xor r14,r124369 xor rdi,r114370 vpsrlq ymm11,ymm0,64371 vpxor ymm8,ymm8,ymm94372 xor r14,r134373 lea r9,[rdi*1+r9]4374 mov r12,rcx4375 vpsllq ymm10,ymm0,34376 vpaddq ymm1,ymm1,ymm84377 add r8,QWORD[((40+256))+rsp]4378 and r12,rbx4379 rorx r13,rbx,414380 vpsrlq ymm9,ymm0,194381 vpxor ymm11,ymm11,ymm104382 rorx rdi,rbx,184383 lea r9,[r14*1+r9]4384 lea r8,[r12*1+r8]4385 vpsllq ymm10,ymm10,424386 vpxor ymm11,ymm11,ymm94387 andn r12,rbx,rdx4388 xor r13,rdi4389 rorx r14,rbx,144390 vpsrlq ymm9,ymm9,424391 vpxor ymm11,ymm11,ymm104392 lea r8,[r12*1+r8]4393 xor r13,r144394 mov rdi,r94395 vpxor ymm11,ymm11,ymm94396 rorx r12,r9,394397 lea r8,[r13*1+r8]4398 xor rdi,r104399 vpaddq ymm1,ymm1,ymm114400 rorx r14,r9,344401 rorx r13,r9,284402 lea rax,[r8*1+rax]4403 vpaddq ymm10,ymm1,YMMWORD[((-96))+rbp]4404 and r15,rdi4405 xor r14,r124406 xor r15,r104407 xor r14,r134408 lea r8,[r15*1+r8]4409 mov r12,rbx4410 vmovdqa YMMWORD[32+rsp],ymm104411 vpalignr ymm8,ymm3,ymm2,84412 add rdx,QWORD[((64+256))+rsp]4413 and r12,rax4414 rorx r13,rax,414415 vpalignr ymm11,ymm7,ymm6,84416 rorx r15,rax,184417 lea r8,[r14*1+r8]4418 lea rdx,[r12*1+rdx]4419 vpsrlq ymm10,ymm8,14420 andn r12,rax,rcx4421 xor r13,r154422 rorx r14,rax,144423 vpaddq ymm2,ymm2,ymm114424 vpsrlq ymm11,ymm8,74425 lea rdx,[r12*1+rdx]4426 xor r13,r144427 mov r15,r84428 vpsllq ymm9,ymm8,564429 vpxor ymm8,ymm11,ymm104430 rorx r12,r8,394431 lea rdx,[r13*1+rdx]4432 xor r15,r94433 vpsrlq ymm10,ymm10,74434 vpxor ymm8,ymm8,ymm94435 rorx r14,r8,344436 rorx r13,r8,284437 lea r11,[rdx*1+r11]4438 vpsllq ymm9,ymm9,74439 vpxor ymm8,ymm8,ymm104440 and rdi,r154441 xor r14,r124442 xor rdi,r94443 vpsrlq ymm11,ymm1,64444 vpxor ymm8,ymm8,ymm94445 xor r14,r134446 lea rdx,[rdi*1+rdx]4447 mov r12,rax4448 vpsllq ymm10,ymm1,34449 vpaddq ymm2,ymm2,ymm84450 add rcx,QWORD[((72+256))+rsp]4451 and r12,r114452 rorx r13,r11,414453 vpsrlq ymm9,ymm1,194454 vpxor ymm11,ymm11,ymm104455 rorx rdi,r11,184456 lea rdx,[r14*1+rdx]4457 lea rcx,[r12*1+rcx]4458 vpsllq ymm10,ymm10,424459 vpxor ymm11,ymm11,ymm94460 andn r12,r11,rbx4461 xor r13,rdi4462 rorx r14,r11,144463 vpsrlq ymm9,ymm9,424464 vpxor ymm11,ymm11,ymm104465 lea rcx,[r12*1+rcx]4466 xor r13,r144467 mov rdi,rdx4468 vpxor ymm11,ymm11,ymm94469 rorx r12,rdx,394470 lea rcx,[r13*1+rcx]4471 xor rdi,r84472 vpaddq ymm2,ymm2,ymm114473 rorx r14,rdx,344474 rorx r13,rdx,284475 lea r10,[rcx*1+r10]4476 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp]4477 and r15,rdi4478 xor r14,r124479 xor r15,r84480 xor r14,r134481 lea rcx,[r15*1+rcx]4482 mov r12,r114483 vmovdqa YMMWORD[64+rsp],ymm104484 vpalignr ymm8,ymm4,ymm3,84485 add rbx,QWORD[((96+256))+rsp]4486 and r12,r104487 rorx r13,r10,414488 vpalignr ymm11,ymm0,ymm7,84489 rorx r15,r10,184490 lea rcx,[r14*1+rcx]4491 lea rbx,[r12*1+rbx]4492 vpsrlq ymm10,ymm8,14493 andn r12,r10,rax4494 xor r13,r154495 rorx r14,r10,144496 vpaddq ymm3,ymm3,ymm114497 vpsrlq ymm11,ymm8,74498 lea rbx,[r12*1+rbx]4499 xor r13,r144500 mov r15,rcx4501 vpsllq ymm9,ymm8,564502 vpxor ymm8,ymm11,ymm104503 rorx r12,rcx,394504 lea rbx,[r13*1+rbx]4505 xor r15,rdx4506 vpsrlq ymm10,ymm10,74507 vpxor ymm8,ymm8,ymm94508 rorx r14,rcx,344509 rorx r13,rcx,284510 lea r9,[rbx*1+r9]4511 vpsllq ymm9,ymm9,74512 vpxor ymm8,ymm8,ymm104513 and rdi,r154514 xor r14,r124515 xor rdi,rdx4516 vpsrlq ymm11,ymm2,64517 vpxor ymm8,ymm8,ymm94518 xor r14,r134519 lea rbx,[rdi*1+rbx]4520 mov r12,r104521 vpsllq ymm10,ymm2,34522 vpaddq ymm3,ymm3,ymm84523 add rax,QWORD[((104+256))+rsp]4524 and r12,r94525 rorx r13,r9,414526 vpsrlq ymm9,ymm2,194527 vpxor ymm11,ymm11,ymm104528 rorx rdi,r9,184529 lea rbx,[r14*1+rbx]4530 lea rax,[r12*1+rax]4531 vpsllq ymm10,ymm10,424532 vpxor ymm11,ymm11,ymm94533 andn r12,r9,r114534 xor r13,rdi4535 rorx r14,r9,144536 vpsrlq ymm9,ymm9,424537 vpxor ymm11,ymm11,ymm104538 lea rax,[r12*1+rax]4539 xor r13,r144540 mov rdi,rbx4541 vpxor ymm11,ymm11,ymm94542 rorx r12,rbx,394543 lea rax,[r13*1+rax]4544 xor rdi,rcx4545 vpaddq ymm3,ymm3,ymm114546 rorx r14,rbx,344547 rorx r13,rbx,284548 lea r8,[rax*1+r8]4549 vpaddq ymm10,ymm3,YMMWORD[((-32))+rbp]4550 and r15,rdi4551 xor r14,r124552 xor r15,rcx4553 xor r14,r134554 lea rax,[r15*1+rax]4555 mov r12,r94556 vmovdqa YMMWORD[96+rsp],ymm104557 lea rsp,[((-128))+rsp]4558 vpalignr ymm8,ymm5,ymm4,84559 add r11,QWORD[((0+256))+rsp]4560 and r12,r84561 rorx r13,r8,414562 vpalignr ymm11,ymm1,ymm0,84563 rorx r15,r8,184564 lea rax,[r14*1+rax]4565 lea r11,[r12*1+r11]4566 vpsrlq ymm10,ymm8,14567 andn r12,r8,r104568 xor r13,r154569 rorx r14,r8,144570 vpaddq ymm4,ymm4,ymm114571 vpsrlq ymm11,ymm8,74572 lea r11,[r12*1+r11]4573 xor r13,r144574 mov r15,rax4575 vpsllq ymm9,ymm8,564576 vpxor ymm8,ymm11,ymm104577 rorx r12,rax,394578 lea r11,[r13*1+r11]4579 xor r15,rbx4580 vpsrlq ymm10,ymm10,74581 vpxor ymm8,ymm8,ymm94582 rorx r14,rax,344583 rorx r13,rax,284584 lea rdx,[r11*1+rdx]4585 vpsllq ymm9,ymm9,74586 vpxor ymm8,ymm8,ymm104587 and rdi,r154588 xor r14,r124589 xor rdi,rbx4590 vpsrlq ymm11,ymm3,64591 vpxor ymm8,ymm8,ymm94592 xor r14,r134593 lea r11,[rdi*1+r11]4594 mov r12,r84595 vpsllq ymm10,ymm3,34596 vpaddq ymm4,ymm4,ymm84597 add r10,QWORD[((8+256))+rsp]4598 and r12,rdx4599 rorx r13,rdx,414600 vpsrlq ymm9,ymm3,194601 vpxor ymm11,ymm11,ymm104602 rorx rdi,rdx,184603 lea r11,[r14*1+r11]4604 lea r10,[r12*1+r10]4605 vpsllq ymm10,ymm10,424606 vpxor ymm11,ymm11,ymm94607 andn r12,rdx,r94608 xor r13,rdi4609 rorx r14,rdx,144610 vpsrlq ymm9,ymm9,424611 vpxor ymm11,ymm11,ymm104612 lea r10,[r12*1+r10]4613 xor r13,r144614 mov rdi,r114615 vpxor ymm11,ymm11,ymm94616 rorx r12,r11,394617 lea r10,[r13*1+r10]4618 xor rdi,rax4619 vpaddq ymm4,ymm4,ymm114620 rorx r14,r11,344621 rorx r13,r11,284622 lea rcx,[r10*1+rcx]4623 vpaddq ymm10,ymm4,YMMWORD[rbp]4624 and r15,rdi4625 xor r14,r124626 xor r15,rax4627 xor r14,r134628 lea r10,[r15*1+r10]4629 mov r12,rdx4630 vmovdqa YMMWORD[rsp],ymm104631 vpalignr ymm8,ymm6,ymm5,84632 add r9,QWORD[((32+256))+rsp]4633 and r12,rcx4634 rorx r13,rcx,414635 vpalignr ymm11,ymm2,ymm1,84636 rorx r15,rcx,184637 lea r10,[r14*1+r10]4638 lea r9,[r12*1+r9]4639 vpsrlq ymm10,ymm8,14640 andn r12,rcx,r84641 xor r13,r154642 rorx r14,rcx,144643 vpaddq ymm5,ymm5,ymm114644 vpsrlq ymm11,ymm8,74645 lea r9,[r12*1+r9]4646 xor r13,r144647 mov r15,r104648 vpsllq ymm9,ymm8,564649 vpxor ymm8,ymm11,ymm104650 rorx r12,r10,394651 lea r9,[r13*1+r9]4652 xor r15,r114653 vpsrlq ymm10,ymm10,74654 vpxor ymm8,ymm8,ymm94655 rorx r14,r10,344656 rorx r13,r10,284657 lea rbx,[r9*1+rbx]4658 vpsllq ymm9,ymm9,74659 vpxor ymm8,ymm8,ymm104660 and rdi,r154661 xor r14,r124662 xor rdi,r114663 vpsrlq ymm11,ymm4,64664 vpxor ymm8,ymm8,ymm94665 xor r14,r134666 lea r9,[rdi*1+r9]4667 mov r12,rcx4668 vpsllq ymm10,ymm4,34669 vpaddq ymm5,ymm5,ymm84670 add r8,QWORD[((40+256))+rsp]4671 and r12,rbx4672 rorx r13,rbx,414673 vpsrlq ymm9,ymm4,194674 vpxor ymm11,ymm11,ymm104675 rorx rdi,rbx,184676 lea r9,[r14*1+r9]4677 lea r8,[r12*1+r8]4678 vpsllq ymm10,ymm10,424679 vpxor ymm11,ymm11,ymm94680 andn r12,rbx,rdx4681 xor r13,rdi4682 rorx r14,rbx,144683 vpsrlq ymm9,ymm9,424684 vpxor ymm11,ymm11,ymm104685 lea r8,[r12*1+r8]4686 xor r13,r144687 mov rdi,r94688 vpxor ymm11,ymm11,ymm94689 rorx r12,r9,394690 lea r8,[r13*1+r8]4691 xor rdi,r104692 vpaddq ymm5,ymm5,ymm114693 rorx r14,r9,344694 rorx r13,r9,284695 lea rax,[r8*1+rax]4696 vpaddq ymm10,ymm5,YMMWORD[32+rbp]4697 and r15,rdi4698 xor r14,r124699 xor r15,r104700 xor r14,r134701 lea r8,[r15*1+r8]4702 mov r12,rbx4703 vmovdqa YMMWORD[32+rsp],ymm104704 vpalignr ymm8,ymm7,ymm6,84705 add rdx,QWORD[((64+256))+rsp]4706 and r12,rax4707 rorx r13,rax,414708 vpalignr ymm11,ymm3,ymm2,84709 rorx r15,rax,184710 lea r8,[r14*1+r8]4711 lea rdx,[r12*1+rdx]4712 vpsrlq ymm10,ymm8,14713 andn r12,rax,rcx4714 xor r13,r154715 rorx r14,rax,144716 vpaddq ymm6,ymm6,ymm114717 vpsrlq ymm11,ymm8,74718 lea rdx,[r12*1+rdx]4719 xor r13,r144720 mov r15,r84721 vpsllq ymm9,ymm8,564722 vpxor ymm8,ymm11,ymm104723 rorx r12,r8,394724 lea rdx,[r13*1+rdx]4725 xor r15,r94726 vpsrlq ymm10,ymm10,74727 vpxor ymm8,ymm8,ymm94728 rorx r14,r8,344729 rorx r13,r8,284730 lea r11,[rdx*1+r11]4731 vpsllq ymm9,ymm9,74732 vpxor ymm8,ymm8,ymm104733 and rdi,r154734 xor r14,r124735 xor rdi,r94736 vpsrlq ymm11,ymm5,64737 vpxor ymm8,ymm8,ymm94738 xor r14,r134739 lea rdx,[rdi*1+rdx]4740 mov r12,rax4741 vpsllq ymm10,ymm5,34742 vpaddq ymm6,ymm6,ymm84743 add rcx,QWORD[((72+256))+rsp]4744 and r12,r114745 rorx r13,r11,414746 vpsrlq ymm9,ymm5,194747 vpxor ymm11,ymm11,ymm104748 rorx rdi,r11,184749 lea rdx,[r14*1+rdx]4750 lea rcx,[r12*1+rcx]4751 vpsllq ymm10,ymm10,424752 vpxor ymm11,ymm11,ymm94753 andn r12,r11,rbx4754 xor r13,rdi4755 rorx r14,r11,144756 vpsrlq ymm9,ymm9,424757 vpxor ymm11,ymm11,ymm104758 lea rcx,[r12*1+rcx]4759 xor r13,r144760 mov rdi,rdx4761 vpxor ymm11,ymm11,ymm94762 rorx r12,rdx,394763 lea rcx,[r13*1+rcx]4764 xor rdi,r84765 vpaddq ymm6,ymm6,ymm114766 rorx r14,rdx,344767 rorx r13,rdx,284768 lea r10,[rcx*1+r10]4769 vpaddq ymm10,ymm6,YMMWORD[64+rbp]4770 and r15,rdi4771 xor r14,r124772 xor r15,r84773 xor r14,r134774 lea rcx,[r15*1+rcx]4775 mov r12,r114776 vmovdqa YMMWORD[64+rsp],ymm104777 vpalignr ymm8,ymm0,ymm7,84778 add rbx,QWORD[((96+256))+rsp]4779 and r12,r104780 rorx r13,r10,414781 vpalignr ymm11,ymm4,ymm3,84782 rorx r15,r10,184783 lea rcx,[r14*1+rcx]4784 lea rbx,[r12*1+rbx]4785 vpsrlq ymm10,ymm8,14786 andn r12,r10,rax4787 xor r13,r154788 rorx r14,r10,144789 vpaddq ymm7,ymm7,ymm114790 vpsrlq ymm11,ymm8,74791 lea rbx,[r12*1+rbx]4792 xor r13,r144793 mov r15,rcx4794 vpsllq ymm9,ymm8,564795 vpxor ymm8,ymm11,ymm104796 rorx r12,rcx,394797 lea rbx,[r13*1+rbx]4798 xor r15,rdx4799 vpsrlq ymm10,ymm10,74800 vpxor ymm8,ymm8,ymm94801 rorx r14,rcx,344802 rorx r13,rcx,284803 lea r9,[rbx*1+r9]4804 vpsllq ymm9,ymm9,74805 vpxor ymm8,ymm8,ymm104806 and rdi,r154807 xor r14,r124808 xor rdi,rdx4809 vpsrlq ymm11,ymm6,64810 vpxor ymm8,ymm8,ymm94811 xor r14,r134812 lea rbx,[rdi*1+rbx]4813 mov r12,r104814 vpsllq ymm10,ymm6,34815 vpaddq ymm7,ymm7,ymm84816 add rax,QWORD[((104+256))+rsp]4817 and r12,r94818 rorx r13,r9,414819 vpsrlq ymm9,ymm6,194820 vpxor ymm11,ymm11,ymm104821 rorx rdi,r9,184822 lea rbx,[r14*1+rbx]4823 lea rax,[r12*1+rax]4824 vpsllq ymm10,ymm10,424825 vpxor ymm11,ymm11,ymm94826 andn r12,r9,r114827 xor r13,rdi4828 rorx r14,r9,144829 vpsrlq ymm9,ymm9,424830 vpxor ymm11,ymm11,ymm104831 lea rax,[r12*1+rax]4832 xor r13,r144833 mov rdi,rbx4834 vpxor ymm11,ymm11,ymm94835 rorx r12,rbx,394836 lea rax,[r13*1+rax]4837 xor rdi,rcx4838 vpaddq ymm7,ymm7,ymm114839 rorx r14,rbx,344840 rorx r13,rbx,284841 lea r8,[rax*1+r8]4842 vpaddq ymm10,ymm7,YMMWORD[96+rbp]4843 and r15,rdi4844 xor r14,r124845 xor r15,rcx4846 xor r14,r134847 lea rax,[r15*1+rax]4848 mov r12,r94849 vmovdqa YMMWORD[96+rsp],ymm104850 lea rbp,[256+rbp]4851 cmp BYTE[((-121))+rbp],04852 jne NEAR $L$avx2_00_474853 add r11,QWORD[((0+128))+rsp]4854 and r12,r84855 rorx r13,r8,414856 rorx r15,r8,184857 lea rax,[r14*1+rax]4858 lea r11,[r12*1+r11]4859 andn r12,r8,r104860 xor r13,r154861 rorx r14,r8,144862 lea r11,[r12*1+r11]4863 xor r13,r144864 mov r15,rax4865 rorx r12,rax,394866 lea r11,[r13*1+r11]4867 xor r15,rbx4868 rorx r14,rax,344869 rorx r13,rax,284870 lea rdx,[r11*1+rdx]4871 and rdi,r154872 xor r14,r124873 xor rdi,rbx4874 xor r14,r134875 lea r11,[rdi*1+r11]4876 mov r12,r84877 add r10,QWORD[((8+128))+rsp]4878 and r12,rdx4879 rorx r13,rdx,414880 rorx rdi,rdx,184881 lea r11,[r14*1+r11]4882 lea r10,[r12*1+r10]4883 andn r12,rdx,r94884 xor r13,rdi4885 rorx r14,rdx,144886 lea r10,[r12*1+r10]4887 xor r13,r144888 mov rdi,r114889 rorx r12,r11,394890 lea r10,[r13*1+r10]4891 xor rdi,rax4892 rorx r14,r11,344893 rorx r13,r11,284894 lea rcx,[r10*1+rcx]4895 and r15,rdi4896 xor r14,r124897 xor r15,rax4898 xor r14,r134899 lea r10,[r15*1+r10]4900 mov r12,rdx4901 add r9,QWORD[((32+128))+rsp]4902 and r12,rcx4903 rorx r13,rcx,414904 rorx r15,rcx,184905 lea r10,[r14*1+r10]4906 lea r9,[r12*1+r9]4907 andn r12,rcx,r84908 xor r13,r154909 rorx r14,rcx,144910 lea r9,[r12*1+r9]4911 xor r13,r144912 mov r15,r104913 rorx r12,r10,394914 lea r9,[r13*1+r9]4915 xor r15,r114916 rorx r14,r10,344917 rorx r13,r10,284918 lea rbx,[r9*1+rbx]4919 and rdi,r154920 xor r14,r124921 xor rdi,r114922 xor r14,r134923 lea r9,[rdi*1+r9]4924 mov r12,rcx4925 add r8,QWORD[((40+128))+rsp]4926 and r12,rbx4927 rorx r13,rbx,414928 rorx rdi,rbx,184929 lea r9,[r14*1+r9]4930 lea r8,[r12*1+r8]4931 andn r12,rbx,rdx4932 xor r13,rdi4933 rorx r14,rbx,144934 lea r8,[r12*1+r8]4935 xor r13,r144936 mov rdi,r94937 rorx r12,r9,394938 lea r8,[r13*1+r8]4939 xor rdi,r104940 rorx r14,r9,344941 rorx r13,r9,284942 lea rax,[r8*1+rax]4943 and r15,rdi4944 xor r14,r124945 xor r15,r104946 xor r14,r134947 lea r8,[r15*1+r8]4948 mov r12,rbx4949 add rdx,QWORD[((64+128))+rsp]4950 and r12,rax4951 rorx r13,rax,414952 rorx r15,rax,184953 lea r8,[r14*1+r8]4954 lea rdx,[r12*1+rdx]4955 andn r12,rax,rcx4956 xor r13,r154957 rorx r14,rax,144958 lea rdx,[r12*1+rdx]4959 xor r13,r144960 mov r15,r84961 rorx r12,r8,394962 lea rdx,[r13*1+rdx]4963 xor r15,r94964 rorx r14,r8,344965 rorx r13,r8,284966 lea r11,[rdx*1+r11]4967 and rdi,r154968 xor r14,r124969 xor rdi,r94970 xor r14,r134971 lea rdx,[rdi*1+rdx]4972 mov r12,rax4973 add rcx,QWORD[((72+128))+rsp]4974 and r12,r114975 rorx r13,r11,414976 rorx rdi,r11,184977 lea rdx,[r14*1+rdx]4978 lea rcx,[r12*1+rcx]4979 andn r12,r11,rbx4980 xor r13,rdi4981 rorx r14,r11,144982 lea rcx,[r12*1+rcx]4983 xor r13,r144984 mov rdi,rdx4985 rorx r12,rdx,394986 lea rcx,[r13*1+rcx]4987 xor rdi,r84988 rorx r14,rdx,344989 rorx r13,rdx,284990 lea r10,[rcx*1+r10]4991 and r15,rdi4992 xor r14,r124993 xor r15,r84994 xor r14,r134995 lea rcx,[r15*1+rcx]4996 mov r12,r114997 add rbx,QWORD[((96+128))+rsp]4998 and r12,r104999 rorx r13,r10,415000 rorx r15,r10,185001 lea rcx,[r14*1+rcx]5002 lea rbx,[r12*1+rbx]5003 andn r12,r10,rax5004 xor r13,r155005 rorx r14,r10,145006 lea rbx,[r12*1+rbx]5007 xor r13,r145008 mov r15,rcx5009 rorx r12,rcx,395010 lea rbx,[r13*1+rbx]5011 xor r15,rdx5012 rorx r14,rcx,345013 rorx r13,rcx,285014 lea r9,[rbx*1+r9]5015 and rdi,r155016 xor r14,r125017 xor rdi,rdx5018 xor r14,r135019 lea rbx,[rdi*1+rbx]5020 mov r12,r105021 add rax,QWORD[((104+128))+rsp]5022 and r12,r95023 rorx r13,r9,415024 rorx rdi,r9,185025 lea rbx,[r14*1+rbx]5026 lea rax,[r12*1+rax]5027 andn r12,r9,r115028 xor r13,rdi5029 rorx r14,r9,145030 lea rax,[r12*1+rax]5031 xor r13,r145032 mov rdi,rbx5033 rorx r12,rbx,395034 lea rax,[r13*1+rax]5035 xor rdi,rcx5036 rorx r14,rbx,345037 rorx r13,rbx,285038 lea r8,[rax*1+r8]5039 and r15,rdi5040 xor r14,r125041 xor r15,rcx5042 xor r14,r135043 lea rax,[r15*1+rax]5044 mov r12,r95045 add r11,QWORD[rsp]5046 and r12,r85047 rorx r13,r8,415048 rorx r15,r8,185049 lea rax,[r14*1+rax]5050 lea r11,[r12*1+r11]5051 andn r12,r8,r105052 xor r13,r155053 rorx r14,r8,145054 lea r11,[r12*1+r11]5055 xor r13,r145056 mov r15,rax5057 rorx r12,rax,395058 lea r11,[r13*1+r11]5059 xor r15,rbx5060 rorx r14,rax,345061 rorx r13,rax,285062 lea rdx,[r11*1+rdx]5063 and rdi,r155064 xor r14,r125065 xor rdi,rbx5066 xor r14,r135067 lea r11,[rdi*1+r11]5068 mov r12,r85069 add r10,QWORD[8+rsp]5070 and r12,rdx5071 rorx r13,rdx,415072 rorx rdi,rdx,185073 lea r11,[r14*1+r11]5074 lea r10,[r12*1+r10]5075 andn r12,rdx,r95076 xor r13,rdi5077 rorx r14,rdx,145078 lea r10,[r12*1+r10]5079 xor r13,r145080 mov rdi,r115081 rorx r12,r11,395082 lea r10,[r13*1+r10]5083 xor rdi,rax5084 rorx r14,r11,345085 rorx r13,r11,285086 lea rcx,[r10*1+rcx]5087 and r15,rdi5088 xor r14,r125089 xor r15,rax5090 xor r14,r135091 lea r10,[r15*1+r10]5092 mov r12,rdx5093 add r9,QWORD[32+rsp]5094 and r12,rcx5095 rorx r13,rcx,415096 rorx r15,rcx,185097 lea r10,[r14*1+r10]5098 lea r9,[r12*1+r9]5099 andn r12,rcx,r85100 xor r13,r155101 rorx r14,rcx,145102 lea r9,[r12*1+r9]5103 xor r13,r145104 mov r15,r105105 rorx r12,r10,395106 lea r9,[r13*1+r9]5107 xor r15,r115108 rorx r14,r10,345109 rorx r13,r10,285110 lea rbx,[r9*1+rbx]5111 and rdi,r155112 xor r14,r125113 xor rdi,r115114 xor r14,r135115 lea r9,[rdi*1+r9]5116 mov r12,rcx5117 add r8,QWORD[40+rsp]5118 and r12,rbx5119 rorx r13,rbx,415120 rorx rdi,rbx,185121 lea r9,[r14*1+r9]5122 lea r8,[r12*1+r8]5123 andn r12,rbx,rdx5124 xor r13,rdi5125 rorx r14,rbx,145126 lea r8,[r12*1+r8]5127 xor r13,r145128 mov rdi,r95129 rorx r12,r9,395130 lea r8,[r13*1+r8]5131 xor rdi,r105132 rorx r14,r9,345133 rorx r13,r9,285134 lea rax,[r8*1+rax]5135 and r15,rdi5136 xor r14,r125137 xor r15,r105138 xor r14,r135139 lea r8,[r15*1+r8]5140 mov r12,rbx5141 add rdx,QWORD[64+rsp]5142 and r12,rax5143 rorx r13,rax,415144 rorx r15,rax,185145 lea r8,[r14*1+r8]5146 lea rdx,[r12*1+rdx]5147 andn r12,rax,rcx5148 xor r13,r155149 rorx r14,rax,145150 lea rdx,[r12*1+rdx]5151 xor r13,r145152 mov r15,r85153 rorx r12,r8,395154 lea rdx,[r13*1+rdx]5155 xor r15,r95156 rorx r14,r8,345157 rorx r13,r8,285158 lea r11,[rdx*1+r11]5159 and rdi,r155160 xor r14,r125161 xor rdi,r95162 xor r14,r135163 lea rdx,[rdi*1+rdx]5164 mov r12,rax5165 add rcx,QWORD[72+rsp]5166 and r12,r115167 rorx r13,r11,415168 rorx rdi,r11,185169 lea rdx,[r14*1+rdx]5170 lea rcx,[r12*1+rcx]5171 andn r12,r11,rbx5172 xor r13,rdi5173 rorx r14,r11,145174 lea rcx,[r12*1+rcx]5175 xor r13,r145176 mov rdi,rdx5177 rorx r12,rdx,395178 lea rcx,[r13*1+rcx]5179 xor rdi,r85180 rorx r14,rdx,345181 rorx r13,rdx,285182 lea r10,[rcx*1+r10]5183 and r15,rdi5184 xor r14,r125185 xor r15,r85186 xor r14,r135187 lea rcx,[r15*1+rcx]5188 mov r12,r115189 add rbx,QWORD[96+rsp]5190 and r12,r105191 rorx r13,r10,415192 rorx r15,r10,185193 lea rcx,[r14*1+rcx]5194 lea rbx,[r12*1+rbx]5195 andn r12,r10,rax5196 xor r13,r155197 rorx r14,r10,145198 lea rbx,[r12*1+rbx]5199 xor r13,r145200 mov r15,rcx5201 rorx r12,rcx,395202 lea rbx,[r13*1+rbx]5203 xor r15,rdx5204 rorx r14,rcx,345205 rorx r13,rcx,285206 lea r9,[rbx*1+r9]5207 and rdi,r155208 xor r14,r125209 xor rdi,rdx5210 xor r14,r135211 lea rbx,[rdi*1+rbx]5212 mov r12,r105213 add rax,QWORD[104+rsp]5214 and r12,r95215 rorx r13,r9,415216 rorx rdi,r9,185217 lea rbx,[r14*1+rbx]5218 lea rax,[r12*1+rax]5219 andn r12,r9,r115220 xor r13,rdi5221 rorx r14,r9,145222 lea rax,[r12*1+rax]5223 xor r13,r145224 mov rdi,rbx5225 rorx r12,rbx,395226 lea rax,[r13*1+rax]5227 xor rdi,rcx5228 rorx r14,rbx,345229 rorx r13,rbx,285230 lea r8,[rax*1+r8]5231 and r15,rdi5232 xor r14,r125233 xor r15,rcx5234 xor r14,r135235 lea rax,[r15*1+rax]5236 mov r12,r95237 mov rdi,QWORD[1280+rsp]5238 add rax,r145239 5240 lea rbp,[1152+rsp]5241 5242 add rax,QWORD[rdi]5243 add rbx,QWORD[8+rdi]5244 add rcx,QWORD[16+rdi]5245 add rdx,QWORD[24+rdi]5246 add r8,QWORD[32+rdi]5247 add r9,QWORD[40+rdi]5248 add r10,QWORD[48+rdi]5249 add r11,QWORD[56+rdi]5250 5251 mov QWORD[rdi],rax5252 mov QWORD[8+rdi],rbx5253 mov QWORD[16+rdi],rcx5254 mov QWORD[24+rdi],rdx5255 mov QWORD[32+rdi],r85256 mov QWORD[40+rdi],r95257 mov QWORD[48+rdi],r105258 mov QWORD[56+rdi],r115259 5260 cmp rsi,QWORD[144+rbp]5261 je NEAR $L$done_avx25262 5263 xor r14,r145264 mov rdi,rbx5265 xor rdi,rcx5266 mov r12,r95267 jmp NEAR $L$ower_avx25268 ALIGN 165269 $L$ower_avx2:5270 add r11,QWORD[((0+16))+rbp]5271 and r12,r85272 rorx r13,r8,415273 rorx r15,r8,185274 lea rax,[r14*1+rax]5275 lea r11,[r12*1+r11]5276 andn r12,r8,r105277 xor r13,r155278 rorx r14,r8,145279 lea r11,[r12*1+r11]5280 xor r13,r145281 mov r15,rax5282 rorx r12,rax,395283 lea r11,[r13*1+r11]5284 xor r15,rbx5285 rorx r14,rax,345286 rorx r13,rax,285287 lea rdx,[r11*1+rdx]5288 and rdi,r155289 xor r14,r125290 xor rdi,rbx5291 xor r14,r135292 lea r11,[rdi*1+r11]5293 mov r12,r85294 add r10,QWORD[((8+16))+rbp]5295 and r12,rdx5296 rorx r13,rdx,415297 rorx rdi,rdx,185298 lea r11,[r14*1+r11]5299 lea r10,[r12*1+r10]5300 andn r12,rdx,r95301 xor r13,rdi5302 rorx r14,rdx,145303 lea r10,[r12*1+r10]5304 xor r13,r145305 mov rdi,r115306 rorx r12,r11,395307 lea r10,[r13*1+r10]5308 xor rdi,rax5309 rorx r14,r11,345310 rorx r13,r11,285311 lea rcx,[r10*1+rcx]5312 and r15,rdi5313 xor r14,r125314 xor r15,rax5315 xor r14,r135316 lea r10,[r15*1+r10]5317 mov r12,rdx5318 add r9,QWORD[((32+16))+rbp]5319 and r12,rcx5320 rorx r13,rcx,415321 rorx r15,rcx,185322 lea r10,[r14*1+r10]5323 lea r9,[r12*1+r9]5324 andn r12,rcx,r85325 xor r13,r155326 rorx r14,rcx,145327 lea r9,[r12*1+r9]5328 xor r13,r145329 mov r15,r105330 rorx r12,r10,395331 lea r9,[r13*1+r9]5332 xor r15,r115333 rorx r14,r10,345334 rorx r13,r10,285335 lea rbx,[r9*1+rbx]5336 and rdi,r155337 xor r14,r125338 xor rdi,r115339 xor r14,r135340 lea r9,[rdi*1+r9]5341 mov r12,rcx5342 add r8,QWORD[((40+16))+rbp]5343 and r12,rbx5344 rorx r13,rbx,415345 rorx rdi,rbx,185346 lea r9,[r14*1+r9]5347 lea r8,[r12*1+r8]5348 andn r12,rbx,rdx5349 xor r13,rdi5350 rorx r14,rbx,145351 lea r8,[r12*1+r8]5352 xor r13,r145353 mov rdi,r95354 rorx r12,r9,395355 lea r8,[r13*1+r8]5356 xor rdi,r105357 rorx r14,r9,345358 rorx r13,r9,285359 lea rax,[r8*1+rax]5360 and r15,rdi5361 xor r14,r125362 xor r15,r105363 xor r14,r135364 lea r8,[r15*1+r8]5365 mov r12,rbx5366 add rdx,QWORD[((64+16))+rbp]5367 and r12,rax5368 rorx r13,rax,415369 rorx r15,rax,185370 lea r8,[r14*1+r8]5371 lea rdx,[r12*1+rdx]5372 andn r12,rax,rcx5373 xor r13,r155374 rorx r14,rax,145375 lea rdx,[r12*1+rdx]5376 xor r13,r145377 mov r15,r85378 rorx r12,r8,395379 lea rdx,[r13*1+rdx]5380 xor r15,r95381 rorx r14,r8,345382 rorx r13,r8,285383 lea r11,[rdx*1+r11]5384 and rdi,r155385 xor r14,r125386 xor rdi,r95387 xor r14,r135388 lea rdx,[rdi*1+rdx]5389 mov r12,rax5390 add rcx,QWORD[((72+16))+rbp]5391 and r12,r115392 rorx r13,r11,415393 rorx rdi,r11,185394 lea rdx,[r14*1+rdx]5395 lea rcx,[r12*1+rcx]5396 andn r12,r11,rbx5397 xor r13,rdi5398 rorx r14,r11,145399 lea rcx,[r12*1+rcx]5400 xor r13,r145401 mov rdi,rdx5402 rorx r12,rdx,395403 lea rcx,[r13*1+rcx]5404 xor rdi,r85405 rorx r14,rdx,345406 rorx r13,rdx,285407 lea r10,[rcx*1+r10]5408 and r15,rdi5409 xor r14,r125410 xor r15,r85411 xor r14,r135412 lea rcx,[r15*1+rcx]5413 mov r12,r115414 add rbx,QWORD[((96+16))+rbp]5415 and r12,r105416 rorx r13,r10,415417 rorx r15,r10,185418 lea rcx,[r14*1+rcx]5419 lea rbx,[r12*1+rbx]5420 andn r12,r10,rax5421 xor r13,r155422 rorx r14,r10,145423 lea rbx,[r12*1+rbx]5424 xor r13,r145425 mov r15,rcx5426 rorx r12,rcx,395427 lea rbx,[r13*1+rbx]5428 xor r15,rdx5429 rorx r14,rcx,345430 rorx r13,rcx,285431 lea r9,[rbx*1+r9]5432 and rdi,r155433 xor r14,r125434 xor rdi,rdx5435 xor r14,r135436 lea rbx,[rdi*1+rbx]5437 mov r12,r105438 add rax,QWORD[((104+16))+rbp]5439 and r12,r95440 rorx r13,r9,415441 rorx rdi,r9,185442 lea rbx,[r14*1+rbx]5443 lea rax,[r12*1+rax]5444 andn r12,r9,r115445 xor r13,rdi5446 rorx r14,r9,145447 lea rax,[r12*1+rax]5448 xor r13,r145449 mov rdi,rbx5450 rorx r12,rbx,395451 lea rax,[r13*1+rax]5452 xor rdi,rcx5453 rorx r14,rbx,345454 rorx r13,rbx,285455 lea r8,[rax*1+r8]5456 and r15,rdi5457 xor r14,r125458 xor r15,rcx5459 xor r14,r135460 lea rax,[r15*1+rax]5461 mov r12,r95462 lea rbp,[((-128))+rbp]5463 cmp rbp,rsp5464 jae NEAR $L$ower_avx25465 5466 mov rdi,QWORD[1280+rsp]5467 add rax,r145468 5469 lea rsp,[1152+rsp]5470 5471 5472 5473 add rax,QWORD[rdi]5474 add rbx,QWORD[8+rdi]5475 add rcx,QWORD[16+rdi]5476 add rdx,QWORD[24+rdi]5477 add r8,QWORD[32+rdi]5478 add r9,QWORD[40+rdi]5479 lea rsi,[256+rsi]5480 add r10,QWORD[48+rdi]5481 mov r12,rsi5482 add r11,QWORD[56+rdi]5483 cmp rsi,QWORD[((128+16))+rsp]5484 5485 mov QWORD[rdi],rax5486 cmove r12,rsp5487 mov QWORD[8+rdi],rbx5488 mov QWORD[16+rdi],rcx5489 mov QWORD[24+rdi],rdx5490 mov QWORD[32+rdi],r85491 mov QWORD[40+rdi],r95492 mov QWORD[48+rdi],r105493 mov QWORD[56+rdi],r115494 5495 jbe NEAR $L$oop_avx25496 lea rbp,[rsp]5497 5498 5499 5500 5501 $L$done_avx2:5502 mov rsi,QWORD[152+rbp]5503 5504 vzeroupper5505 movaps xmm6,XMMWORD[((128+32))+rbp]5506 movaps xmm7,XMMWORD[((128+48))+rbp]5507 movaps xmm8,XMMWORD[((128+64))+rbp]5508 movaps xmm9,XMMWORD[((128+80))+rbp]5509 movaps xmm10,XMMWORD[((128+96))+rbp]5510 movaps xmm11,XMMWORD[((128+112))+rbp]5511 mov r15,QWORD[((-48))+rsi]5512 5513 mov r14,QWORD[((-40))+rsi]5514 5515 mov r13,QWORD[((-32))+rsi]5516 5517 mov r12,QWORD[((-24))+rsi]5518 5519 mov rbp,QWORD[((-16))+rsi]5520 5521 mov rbx,QWORD[((-8))+rsi]5522 5523 lea rsp,[rsi]5524 5525 $L$epilogue_avx2:5526 mov rdi,QWORD[8+rsp] ;WIN64 epilogue5527 mov rsi,QWORD[16+rsp]5528 DB 0F3h,0C3h ;repret5529 5530 $L$SEH_end_sha512_block_data_order_avx2:5531 1822 EXTERN __imp_RtlVirtualUnwind 5532 1823 … … 5561 1852 cmp rbx,r10 5562 1853 jae NEAR $L$in_prologue 5563 lea r10,[$L$avx2_shortcut]5564 cmp rbx,r105565 jb NEAR $L$not_in_avx25566 5567 and rax,-256*85568 add rax,11525569 $L$not_in_avx2:5570 1854 mov rsi,rax 5571 1855 mov rax,QWORD[((128+24))+rax] … … 5637 1921 DD $L$SEH_end_sha512_block_data_order wrt ..imagebase 5638 1922 DD $L$SEH_info_sha512_block_data_order wrt ..imagebase 5639 DD $L$SEH_begin_sha512_block_data_order_xop wrt ..imagebase5640 DD $L$SEH_end_sha512_block_data_order_xop wrt ..imagebase5641 DD $L$SEH_info_sha512_block_data_order_xop wrt ..imagebase5642 DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase5643 DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase5644 DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase5645 DD $L$SEH_begin_sha512_block_data_order_avx2 wrt ..imagebase5646 DD $L$SEH_end_sha512_block_data_order_avx2 wrt ..imagebase5647 DD $L$SEH_info_sha512_block_data_order_avx2 wrt ..imagebase5648 1923 section .xdata rdata align=8 5649 1924 ALIGN 8 … … 5652 1927 DD se_handler wrt ..imagebase 5653 1928 DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase 5654 $L$SEH_info_sha512_block_data_order_xop:5655 DB 9,0,0,05656 DD se_handler wrt ..imagebase5657 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase5658 $L$SEH_info_sha512_block_data_order_avx:5659 DB 9,0,0,05660 DD se_handler wrt ..imagebase5661 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase5662 $L$SEH_info_sha512_block_data_order_avx2:5663 DB 9,0,0,05664 DD se_handler wrt ..imagebase5665 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/x25519-x86_64.S
r94083 r95221 410 410 411 411 $L$SEH_end_x25519_fe51_mul121666: 412 EXTERN OPENSSL_ia32cap_P413 412 global x25519_fe64_eligible 414 413 … … 416 415 x25519_fe64_eligible: 417 416 418 mov ecx,DWORD[((OPENSSL_ia32cap_P+8))]419 417 xor eax,eax 420 and ecx,0x80100421 cmp ecx,0x80100422 cmove eax,ecx423 418 DB 0F3h,0C3h ;repret 424 419 … … 427 422 global x25519_fe64_mul 428 423 429 ALIGN 32 424 global x25519_fe64_sqr 425 global x25519_fe64_mul121666 426 global x25519_fe64_add 427 global x25519_fe64_sub 428 global x25519_fe64_tobytes 430 429 x25519_fe64_mul: 431 mov QWORD[8+rsp],rdi ;WIN64 prologue432 mov QWORD[16+rsp],rsi433 mov rax,rsp434 $L$SEH_begin_x25519_fe64_mul:435 mov rdi,rcx436 mov rsi,rdx437 mov rdx,r8438 439 440 441 push rbp442 443 push rbx444 445 push r12446 447 push r13448 449 push r14450 451 push r15452 453 push rdi454 455 lea rsp,[((-16))+rsp]456 457 $L$fe64_mul_body:458 459 mov rax,rdx460 mov rbp,QWORD[rdx]461 mov rdx,QWORD[rsi]462 mov rcx,QWORD[8+rax]463 mov r14,QWORD[16+rax]464 mov r15,QWORD[24+rax]465 466 mulx rax,r8,rbp467 xor edi,edi468 mulx rbx,r9,rcx469 adcx r9,rax470 mulx rax,r10,r14471 adcx r10,rbx472 mulx r12,r11,r15473 mov rdx,QWORD[8+rsi]474 adcx r11,rax475 mov QWORD[rsp],r14476 adcx r12,rdi477 478 mulx rbx,rax,rbp479 adox r9,rax480 adcx r10,rbx481 mulx rbx,rax,rcx482 adox r10,rax483 adcx r11,rbx484 mulx rbx,rax,r14485 adox r11,rax486 adcx r12,rbx487 mulx r13,rax,r15488 mov rdx,QWORD[16+rsi]489 adox r12,rax490 adcx r13,rdi491 adox r13,rdi492 493 mulx rbx,rax,rbp494 adcx r10,rax495 adox r11,rbx496 mulx rbx,rax,rcx497 adcx r11,rax498 adox r12,rbx499 mulx rbx,rax,r14500 adcx r12,rax501 adox r13,rbx502 mulx r14,rax,r15503 mov rdx,QWORD[24+rsi]504 adcx r13,rax505 adox r14,rdi506 adcx r14,rdi507 508 mulx rbx,rax,rbp509 adox r11,rax510 adcx r12,rbx511 mulx rbx,rax,rcx512 adox r12,rax513 adcx r13,rbx514 mulx rbx,rax,QWORD[rsp]515 adox r13,rax516 adcx r14,rbx517 mulx r15,rax,r15518 mov edx,38519 adox r14,rax520 adcx r15,rdi521 adox r15,rdi522 523 jmp NEAR $L$reduce64524 $L$fe64_mul_epilogue:525 526 $L$SEH_end_x25519_fe64_mul:527 528 global x25519_fe64_sqr529 530 ALIGN 32531 430 x25519_fe64_sqr: 532 mov QWORD[8+rsp],rdi ;WIN64 prologue 533 mov QWORD[16+rsp],rsi 534 mov rax,rsp 535 $L$SEH_begin_x25519_fe64_sqr: 536 mov rdi,rcx 537 mov rsi,rdx 538 539 540 541 push rbp 542 543 push rbx 544 545 push r12 546 547 push r13 548 549 push r14 550 551 push r15 552 553 push rdi 554 555 lea rsp,[((-16))+rsp] 556 557 $L$fe64_sqr_body: 558 559 mov rdx,QWORD[rsi] 560 mov rcx,QWORD[8+rsi] 561 mov rbp,QWORD[16+rsi] 562 mov rsi,QWORD[24+rsi] 563 564 565 mulx r15,r8,rdx 566 mulx rax,r9,rcx 567 xor edi,edi 568 mulx rbx,r10,rbp 569 adcx r10,rax 570 mulx r12,r11,rsi 571 mov rdx,rcx 572 adcx r11,rbx 573 adcx r12,rdi 574 575 576 mulx rbx,rax,rbp 577 adox r11,rax 578 adcx r12,rbx 579 mulx r13,rax,rsi 580 mov rdx,rbp 581 adox r12,rax 582 adcx r13,rdi 583 584 585 mulx r14,rax,rsi 586 mov rdx,rcx 587 adox r13,rax 588 adcx r14,rdi 589 adox r14,rdi 590 591 adcx r9,r9 592 adox r9,r15 593 adcx r10,r10 594 mulx rbx,rax,rdx 595 mov rdx,rbp 596 adcx r11,r11 597 adox r10,rax 598 adcx r12,r12 599 adox r11,rbx 600 mulx rbx,rax,rdx 601 mov rdx,rsi 602 adcx r13,r13 603 adox r12,rax 604 adcx r14,r14 605 adox r13,rbx 606 mulx r15,rax,rdx 607 mov edx,38 608 adox r14,rax 609 adcx r15,rdi 610 adox r15,rdi 611 jmp NEAR $L$reduce64 612 613 ALIGN 32 614 $L$reduce64: 615 mulx rbx,rax,r12 616 adcx r8,rax 617 adox r9,rbx 618 mulx rbx,rax,r13 619 adcx r9,rax 620 adox r10,rbx 621 mulx rbx,rax,r14 622 adcx r10,rax 623 adox r11,rbx 624 mulx r12,rax,r15 625 adcx r11,rax 626 adox r12,rdi 627 adcx r12,rdi 628 629 mov rdi,QWORD[16+rsp] 630 imul r12,rdx 631 632 add r8,r12 633 adc r9,0 634 adc r10,0 635 adc r11,0 636 637 sbb rax,rax 638 and rax,38 639 640 add r8,rax 641 mov QWORD[8+rdi],r9 642 mov QWORD[16+rdi],r10 643 mov QWORD[24+rdi],r11 644 mov QWORD[rdi],r8 645 646 mov r15,QWORD[24+rsp] 647 648 mov r14,QWORD[32+rsp] 649 650 mov r13,QWORD[40+rsp] 651 652 mov r12,QWORD[48+rsp] 653 654 mov rbx,QWORD[56+rsp] 655 656 mov rbp,QWORD[64+rsp] 657 658 lea rsp,[72+rsp] 659 660 $L$fe64_sqr_epilogue: 661 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 662 mov rsi,QWORD[16+rsp] 431 x25519_fe64_mul121666: 432 x25519_fe64_add: 433 x25519_fe64_sub: 434 x25519_fe64_tobytes: 435 436 DB 0x0f,0x0b 663 437 DB 0F3h,0C3h ;repret 664 438 665 $L$SEH_end_x25519_fe64_sqr: 666 667 global x25519_fe64_mul121666 668 669 ALIGN 32 670 x25519_fe64_mul121666: 671 mov QWORD[8+rsp],rdi ;WIN64 prologue 672 mov QWORD[16+rsp],rsi 673 mov rax,rsp 674 $L$SEH_begin_x25519_fe64_mul121666: 675 mov rdi,rcx 676 mov rsi,rdx 677 678 679 $L$fe64_mul121666_body: 680 681 mov edx,121666 682 mulx rcx,r8,QWORD[rsi] 683 mulx rax,r9,QWORD[8+rsi] 684 add r9,rcx 685 mulx rcx,r10,QWORD[16+rsi] 686 adc r10,rax 687 mulx rax,r11,QWORD[24+rsi] 688 adc r11,rcx 689 adc rax,0 690 691 imul rax,rax,38 692 693 add r8,rax 694 adc r9,0 695 adc r10,0 696 adc r11,0 697 698 sbb rax,rax 699 and rax,38 700 701 add r8,rax 702 mov QWORD[8+rdi],r9 703 mov QWORD[16+rdi],r10 704 mov QWORD[24+rdi],r11 705 mov QWORD[rdi],r8 706 707 $L$fe64_mul121666_epilogue: 708 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 709 mov rsi,QWORD[16+rsp] 710 DB 0F3h,0C3h ;repret 711 712 $L$SEH_end_x25519_fe64_mul121666: 713 714 global x25519_fe64_add 715 716 ALIGN 32 717 x25519_fe64_add: 718 mov QWORD[8+rsp],rdi ;WIN64 prologue 719 mov QWORD[16+rsp],rsi 720 mov rax,rsp 721 $L$SEH_begin_x25519_fe64_add: 722 mov rdi,rcx 723 mov rsi,rdx 724 mov rdx,r8 725 726 727 $L$fe64_add_body: 728 729 mov r8,QWORD[rsi] 730 mov r9,QWORD[8+rsi] 731 mov r10,QWORD[16+rsi] 732 mov r11,QWORD[24+rsi] 733 734 add r8,QWORD[rdx] 735 adc r9,QWORD[8+rdx] 736 adc r10,QWORD[16+rdx] 737 adc r11,QWORD[24+rdx] 738 739 sbb rax,rax 740 and rax,38 741 742 add r8,rax 743 adc r9,0 744 adc r10,0 745 mov QWORD[8+rdi],r9 746 adc r11,0 747 mov QWORD[16+rdi],r10 748 sbb rax,rax 749 mov QWORD[24+rdi],r11 750 and rax,38 751 752 add r8,rax 753 mov QWORD[rdi],r8 754 755 $L$fe64_add_epilogue: 756 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 757 mov rsi,QWORD[16+rsp] 758 DB 0F3h,0C3h ;repret 759 760 $L$SEH_end_x25519_fe64_add: 761 762 global x25519_fe64_sub 763 764 ALIGN 32 765 x25519_fe64_sub: 766 mov QWORD[8+rsp],rdi ;WIN64 prologue 767 mov QWORD[16+rsp],rsi 768 mov rax,rsp 769 $L$SEH_begin_x25519_fe64_sub: 770 mov rdi,rcx 771 mov rsi,rdx 772 mov rdx,r8 773 774 775 $L$fe64_sub_body: 776 777 mov r8,QWORD[rsi] 778 mov r9,QWORD[8+rsi] 779 mov r10,QWORD[16+rsi] 780 mov r11,QWORD[24+rsi] 781 782 sub r8,QWORD[rdx] 783 sbb r9,QWORD[8+rdx] 784 sbb r10,QWORD[16+rdx] 785 sbb r11,QWORD[24+rdx] 786 787 sbb rax,rax 788 and rax,38 789 790 sub r8,rax 791 sbb r9,0 792 sbb r10,0 793 mov QWORD[8+rdi],r9 794 sbb r11,0 795 mov QWORD[16+rdi],r10 796 sbb rax,rax 797 mov QWORD[24+rdi],r11 798 and rax,38 799 800 sub r8,rax 801 mov QWORD[rdi],r8 802 803 $L$fe64_sub_epilogue: 804 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 805 mov rsi,QWORD[16+rsp] 806 DB 0F3h,0C3h ;repret 807 808 $L$SEH_end_x25519_fe64_sub: 809 810 global x25519_fe64_tobytes 811 812 ALIGN 32 813 x25519_fe64_tobytes: 814 mov QWORD[8+rsp],rdi ;WIN64 prologue 815 mov QWORD[16+rsp],rsi 816 mov rax,rsp 817 $L$SEH_begin_x25519_fe64_tobytes: 818 mov rdi,rcx 819 mov rsi,rdx 820 821 822 $L$fe64_to_body: 823 824 mov r8,QWORD[rsi] 825 mov r9,QWORD[8+rsi] 826 mov r10,QWORD[16+rsi] 827 mov r11,QWORD[24+rsi] 828 829 830 lea rax,[r11*1+r11] 831 sar r11,63 832 shr rax,1 833 and r11,19 834 add r11,19 835 836 add r8,r11 837 adc r9,0 838 adc r10,0 839 adc rax,0 840 841 lea r11,[rax*1+rax] 842 sar rax,63 843 shr r11,1 844 not rax 845 and rax,19 846 847 sub r8,rax 848 sbb r9,0 849 sbb r10,0 850 sbb r11,0 851 852 mov QWORD[rdi],r8 853 mov QWORD[8+rdi],r9 854 mov QWORD[16+rdi],r10 855 mov QWORD[24+rdi],r11 856 857 $L$fe64_to_epilogue: 858 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 859 mov rsi,QWORD[16+rsp] 860 DB 0F3h,0C3h ;repret 861 862 $L$SEH_end_x25519_fe64_tobytes: 439 863 440 DB 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101 864 441 DB 115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 … … 997 574 DD $L$SEH_end_x25519_fe51_mul121666 wrt ..imagebase 998 575 DD $L$SEH_info_x25519_fe51_mul121666 wrt ..imagebase 999 DD $L$SEH_begin_x25519_fe64_mul wrt ..imagebase1000 DD $L$SEH_end_x25519_fe64_mul wrt ..imagebase1001 DD $L$SEH_info_x25519_fe64_mul wrt ..imagebase1002 1003 DD $L$SEH_begin_x25519_fe64_sqr wrt ..imagebase1004 DD $L$SEH_end_x25519_fe64_sqr wrt ..imagebase1005 DD $L$SEH_info_x25519_fe64_sqr wrt ..imagebase1006 1007 DD $L$SEH_begin_x25519_fe64_mul121666 wrt ..imagebase1008 DD $L$SEH_end_x25519_fe64_mul121666 wrt ..imagebase1009 DD $L$SEH_info_x25519_fe64_mul121666 wrt ..imagebase1010 1011 DD $L$SEH_begin_x25519_fe64_add wrt ..imagebase1012 DD $L$SEH_end_x25519_fe64_add wrt ..imagebase1013 DD $L$SEH_info_x25519_fe64_add wrt ..imagebase1014 1015 DD $L$SEH_begin_x25519_fe64_sub wrt ..imagebase1016 DD $L$SEH_end_x25519_fe64_sub wrt ..imagebase1017 DD $L$SEH_info_x25519_fe64_sub wrt ..imagebase1018 1019 DD $L$SEH_begin_x25519_fe64_tobytes wrt ..imagebase1020 DD $L$SEH_end_x25519_fe64_tobytes wrt ..imagebase1021 DD $L$SEH_info_x25519_fe64_tobytes wrt ..imagebase1022 576 section .xdata rdata align=8 1023 577 ALIGN 8 … … 1037 591 DD $L$fe51_mul121666_body wrt ..imagebase,$L$fe51_mul121666_epilogue wrt ..imagebase 1038 592 DD 88,0 1039 $L$SEH_info_x25519_fe64_mul:1040 DB 9,0,0,01041 DD full_handler wrt ..imagebase1042 DD $L$fe64_mul_body wrt ..imagebase,$L$fe64_mul_epilogue wrt ..imagebase1043 DD 72,01044 $L$SEH_info_x25519_fe64_sqr:1045 DB 9,0,0,01046 DD full_handler wrt ..imagebase1047 DD $L$fe64_sqr_body wrt ..imagebase,$L$fe64_sqr_epilogue wrt ..imagebase1048 DD 72,01049 $L$SEH_info_x25519_fe64_mul121666:1050 DB 9,0,0,01051 DD short_handler wrt ..imagebase1052 DD $L$fe64_mul121666_body wrt ..imagebase,$L$fe64_mul121666_epilogue wrt ..imagebase1053 $L$SEH_info_x25519_fe64_add:1054 DB 9,0,0,01055 DD short_handler wrt ..imagebase1056 DD $L$fe64_add_body wrt ..imagebase,$L$fe64_add_epilogue wrt ..imagebase1057 $L$SEH_info_x25519_fe64_sub:1058 DB 9,0,0,01059 DD short_handler wrt ..imagebase1060 DD $L$fe64_sub_body wrt ..imagebase,$L$fe64_sub_epilogue wrt ..imagebase1061 $L$SEH_info_x25519_fe64_tobytes:1062 DB 9,0,0,01063 DD short_handler wrt ..imagebase1064 DD $L$fe64_to_body wrt ..imagebase,$L$fe64_to_epilogue wrt ..imagebase -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/x86_64-mont.S
r95219 r95221 32 32 cmp r9d,8 33 33 jb NEAR $L$mul_enter 34 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]35 34 cmp rdx,rsi 36 35 jne NEAR $L$mul4x_enter … … 295 294 296 295 $L$mul4x_enter: 297 and r11d,0x80100298 cmp r11d,0x80100299 je NEAR $L$mulx4x_enter300 296 push rbx 301 297 … … 723 719 724 720 $L$SEH_end_bn_mul4x_mont: 725 EXTERN bn_sqrx8x_internal726 721 EXTERN bn_sqr8x_internal 727 722 … … 819 814 DB 102,72,15,110,207 820 815 DB 102,73,15,110,218 821 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]822 and eax,0x80100823 cmp eax,0x80100824 jne NEAR $L$sqr8x_nox825 826 call bn_sqrx8x_internal827 828 829 830 831 lea rbx,[rcx*1+r8]832 mov r9,rcx833 mov rdx,rcx834 DB 102,72,15,126,207835 sar rcx,3+2836 jmp NEAR $L$sqr8x_sub837 838 ALIGN 32839 $L$sqr8x_nox:840 816 call bn_sqr8x_internal 841 817 … … 927 903 928 904 $L$SEH_end_bn_sqr8x_mont: 929 930 ALIGN 32931 bn_mulx4x_mont:932 mov QWORD[8+rsp],rdi ;WIN64 prologue933 mov QWORD[16+rsp],rsi934 mov rax,rsp935 $L$SEH_begin_bn_mulx4x_mont:936 mov rdi,rcx937 mov rsi,rdx938 mov rdx,r8939 mov rcx,r9940 mov r8,QWORD[40+rsp]941 mov r9,QWORD[48+rsp]942 943 944 945 mov rax,rsp946 947 $L$mulx4x_enter:948 push rbx949 950 push rbp951 952 push r12953 954 push r13955 956 push r14957 958 push r15959 960 $L$mulx4x_prologue:961 962 shl r9d,3963 xor r10,r10964 sub r10,r9965 mov r8,QWORD[r8]966 lea rbp,[((-72))+r10*1+rsp]967 and rbp,-128968 mov r11,rsp969 sub r11,rbp970 and r11,-4096971 lea rsp,[rbp*1+r11]972 mov r10,QWORD[rsp]973 cmp rsp,rbp974 ja NEAR $L$mulx4x_page_walk975 jmp NEAR $L$mulx4x_page_walk_done976 977 ALIGN 16978 $L$mulx4x_page_walk:979 lea rsp,[((-4096))+rsp]980 mov r10,QWORD[rsp]981 cmp rsp,rbp982 ja NEAR $L$mulx4x_page_walk983 $L$mulx4x_page_walk_done:984 985 lea r10,[r9*1+rdx]986 987 988 989 990 991 992 993 994 995 996 997 998 mov QWORD[rsp],r9999 shr r9,51000 mov QWORD[16+rsp],r101001 sub r9,11002 mov QWORD[24+rsp],r81003 mov QWORD[32+rsp],rdi1004 mov QWORD[40+rsp],rax1005 1006 mov QWORD[48+rsp],r91007 jmp NEAR $L$mulx4x_body1008 1009 ALIGN 321010 $L$mulx4x_body:1011 lea rdi,[8+rdx]1012 mov rdx,QWORD[rdx]1013 lea rbx,[((64+32))+rsp]1014 mov r9,rdx1015 1016 mulx rax,r8,QWORD[rsi]1017 mulx r14,r11,QWORD[8+rsi]1018 add r11,rax1019 mov QWORD[8+rsp],rdi1020 mulx r13,r12,QWORD[16+rsi]1021 adc r12,r141022 adc r13,01023 1024 mov rdi,r81025 imul r8,QWORD[24+rsp]1026 xor rbp,rbp1027 1028 mulx r14,rax,QWORD[24+rsi]1029 mov rdx,r81030 lea rsi,[32+rsi]1031 adcx r13,rax1032 adcx r14,rbp1033 1034 mulx r10,rax,QWORD[rcx]1035 adcx rdi,rax1036 adox r10,r111037 mulx r11,rax,QWORD[8+rcx]1038 adcx r10,rax1039 adox r11,r121040 DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x001041 mov rdi,QWORD[48+rsp]1042 mov QWORD[((-32))+rbx],r101043 adcx r11,rax1044 adox r12,r131045 mulx r15,rax,QWORD[24+rcx]1046 mov rdx,r91047 mov QWORD[((-24))+rbx],r111048 adcx r12,rax1049 adox r15,rbp1050 lea rcx,[32+rcx]1051 mov QWORD[((-16))+rbx],r121052 1053 jmp NEAR $L$mulx4x_1st1054 1055 ALIGN 321056 $L$mulx4x_1st:1057 adcx r15,rbp1058 mulx rax,r10,QWORD[rsi]1059 adcx r10,r141060 mulx r14,r11,QWORD[8+rsi]1061 adcx r11,rax1062 mulx rax,r12,QWORD[16+rsi]1063 adcx r12,r141064 mulx r14,r13,QWORD[24+rsi]1065 DB 0x67,0x671066 mov rdx,r81067 adcx r13,rax1068 adcx r14,rbp1069 lea rsi,[32+rsi]1070 lea rbx,[32+rbx]1071 1072 adox r10,r151073 mulx r15,rax,QWORD[rcx]1074 adcx r10,rax1075 adox r11,r151076 mulx r15,rax,QWORD[8+rcx]1077 adcx r11,rax1078 adox r12,r151079 mulx r15,rax,QWORD[16+rcx]1080 mov QWORD[((-40))+rbx],r101081 adcx r12,rax1082 mov QWORD[((-32))+rbx],r111083 adox r13,r151084 mulx r15,rax,QWORD[24+rcx]1085 mov rdx,r91086 mov QWORD[((-24))+rbx],r121087 adcx r13,rax1088 adox r15,rbp1089 lea rcx,[32+rcx]1090 mov QWORD[((-16))+rbx],r131091 1092 dec rdi1093 jnz NEAR $L$mulx4x_1st1094 1095 mov rax,QWORD[rsp]1096 mov rdi,QWORD[8+rsp]1097 adc r15,rbp1098 add r14,r151099 sbb r15,r151100 mov QWORD[((-8))+rbx],r141101 jmp NEAR $L$mulx4x_outer1102 1103 ALIGN 321104 $L$mulx4x_outer:1105 mov rdx,QWORD[rdi]1106 lea rdi,[8+rdi]1107 sub rsi,rax1108 mov QWORD[rbx],r151109 lea rbx,[((64+32))+rsp]1110 sub rcx,rax1111 1112 mulx r11,r8,QWORD[rsi]1113 xor ebp,ebp1114 mov r9,rdx1115 mulx r12,r14,QWORD[8+rsi]1116 adox r8,QWORD[((-32))+rbx]1117 adcx r11,r141118 mulx r13,r15,QWORD[16+rsi]1119 adox r11,QWORD[((-24))+rbx]1120 adcx r12,r151121 adox r12,QWORD[((-16))+rbx]1122 adcx r13,rbp1123 adox r13,rbp1124 1125 mov QWORD[8+rsp],rdi1126 mov r15,r81127 imul r8,QWORD[24+rsp]1128 xor ebp,ebp1129 1130 mulx r14,rax,QWORD[24+rsi]1131 mov rdx,r81132 adcx r13,rax1133 adox r13,QWORD[((-8))+rbx]1134 adcx r14,rbp1135 lea rsi,[32+rsi]1136 adox r14,rbp1137 1138 mulx r10,rax,QWORD[rcx]1139 adcx r15,rax1140 adox r10,r111141 mulx r11,rax,QWORD[8+rcx]1142 adcx r10,rax1143 adox r11,r121144 mulx r12,rax,QWORD[16+rcx]1145 mov QWORD[((-32))+rbx],r101146 adcx r11,rax1147 adox r12,r131148 mulx r15,rax,QWORD[24+rcx]1149 mov rdx,r91150 mov QWORD[((-24))+rbx],r111151 lea rcx,[32+rcx]1152 adcx r12,rax1153 adox r15,rbp1154 mov rdi,QWORD[48+rsp]1155 mov QWORD[((-16))+rbx],r121156 1157 jmp NEAR $L$mulx4x_inner1158 1159 ALIGN 321160 $L$mulx4x_inner:1161 mulx rax,r10,QWORD[rsi]1162 adcx r15,rbp1163 adox r10,r141164 mulx r14,r11,QWORD[8+rsi]1165 adcx r10,QWORD[rbx]1166 adox r11,rax1167 mulx rax,r12,QWORD[16+rsi]1168 adcx r11,QWORD[8+rbx]1169 adox r12,r141170 mulx r14,r13,QWORD[24+rsi]1171 mov rdx,r81172 adcx r12,QWORD[16+rbx]1173 adox r13,rax1174 adcx r13,QWORD[24+rbx]1175 adox r14,rbp1176 lea rsi,[32+rsi]1177 lea rbx,[32+rbx]1178 adcx r14,rbp1179 1180 adox r10,r151181 mulx r15,rax,QWORD[rcx]1182 adcx r10,rax1183 adox r11,r151184 mulx r15,rax,QWORD[8+rcx]1185 adcx r11,rax1186 adox r12,r151187 mulx r15,rax,QWORD[16+rcx]1188 mov QWORD[((-40))+rbx],r101189 adcx r12,rax1190 adox r13,r151191 mulx r15,rax,QWORD[24+rcx]1192 mov rdx,r91193 mov QWORD[((-32))+rbx],r111194 mov QWORD[((-24))+rbx],r121195 adcx r13,rax1196 adox r15,rbp1197 lea rcx,[32+rcx]1198 mov QWORD[((-16))+rbx],r131199 1200 dec rdi1201 jnz NEAR $L$mulx4x_inner1202 1203 mov rax,QWORD[rsp]1204 mov rdi,QWORD[8+rsp]1205 adc r15,rbp1206 sub rbp,QWORD[rbx]1207 adc r14,r151208 sbb r15,r151209 mov QWORD[((-8))+rbx],r141210 1211 cmp rdi,QWORD[16+rsp]1212 jne NEAR $L$mulx4x_outer1213 1214 lea rbx,[64+rsp]1215 sub rcx,rax1216 neg r151217 mov rdx,rax1218 shr rax,3+21219 mov rdi,QWORD[32+rsp]1220 jmp NEAR $L$mulx4x_sub1221 1222 ALIGN 321223 $L$mulx4x_sub:1224 mov r11,QWORD[rbx]1225 mov r12,QWORD[8+rbx]1226 mov r13,QWORD[16+rbx]1227 mov r14,QWORD[24+rbx]1228 lea rbx,[32+rbx]1229 sbb r11,QWORD[rcx]1230 sbb r12,QWORD[8+rcx]1231 sbb r13,QWORD[16+rcx]1232 sbb r14,QWORD[24+rcx]1233 lea rcx,[32+rcx]1234 mov QWORD[rdi],r111235 mov QWORD[8+rdi],r121236 mov QWORD[16+rdi],r131237 mov QWORD[24+rdi],r141238 lea rdi,[32+rdi]1239 dec rax1240 jnz NEAR $L$mulx4x_sub1241 1242 sbb r15,01243 lea rbx,[64+rsp]1244 sub rdi,rdx1245 1246 DB 102,73,15,110,2071247 pxor xmm0,xmm01248 pshufd xmm1,xmm1,01249 mov rsi,QWORD[40+rsp]1250 1251 jmp NEAR $L$mulx4x_cond_copy1252 1253 ALIGN 321254 $L$mulx4x_cond_copy:1255 movdqa xmm2,XMMWORD[rbx]1256 movdqa xmm3,XMMWORD[16+rbx]1257 lea rbx,[32+rbx]1258 movdqu xmm4,XMMWORD[rdi]1259 movdqu xmm5,XMMWORD[16+rdi]1260 lea rdi,[32+rdi]1261 movdqa XMMWORD[(-32)+rbx],xmm01262 movdqa XMMWORD[(-16)+rbx],xmm01263 pcmpeqd xmm0,xmm11264 pand xmm2,xmm11265 pand xmm3,xmm11266 pand xmm4,xmm01267 pand xmm5,xmm01268 pxor xmm0,xmm01269 por xmm4,xmm21270 por xmm5,xmm31271 movdqu XMMWORD[(-32)+rdi],xmm41272 movdqu XMMWORD[(-16)+rdi],xmm51273 sub rdx,321274 jnz NEAR $L$mulx4x_cond_copy1275 1276 mov QWORD[rbx],rdx1277 1278 mov rax,11279 mov r15,QWORD[((-48))+rsi]1280 1281 mov r14,QWORD[((-40))+rsi]1282 1283 mov r13,QWORD[((-32))+rsi]1284 1285 mov r12,QWORD[((-24))+rsi]1286 1287 mov rbp,QWORD[((-16))+rsi]1288 1289 mov rbx,QWORD[((-8))+rsi]1290 1291 lea rsp,[rsi]1292 1293 $L$mulx4x_epilogue:1294 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1295 mov rsi,QWORD[16+rsp]1296 DB 0F3h,0C3h ;repret1297 1298 $L$SEH_end_bn_mulx4x_mont:1299 905 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1300 906 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 … … 1448 1054 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1449 1055 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1450 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase1451 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase1452 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase1453 1056 section .xdata rdata align=8 1454 1057 ALIGN 8 … … 1466 1069 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1467 1070 ALIGN 8 1468 $L$SEH_info_bn_mulx4x_mont:1469 DB 9,0,0,01470 DD sqr_handler wrt ..imagebase1471 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase1472 ALIGN 8 -
trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/x86_64-mont5.S
r95219 r95221 30 30 test r9d,7 31 31 jnz NEAR $L$mul_enter 32 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]33 32 jmp NEAR $L$mul4x_enter 34 33 … … 481 480 482 481 $L$mul4x_enter: 483 and r11d,0x80108484 cmp r11d,0x80108485 je NEAR $L$mulx4x_enter486 482 push rbx 487 483 … … 1127 1123 mov rax,rsp 1128 1124 1129 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]1130 and r11d,0x801081131 cmp r11d,0x801081132 je NEAR $L$powerx5_enter1133 1125 push rbx 1134 1126 … … 2234 2226 mov rbp,rcx 2235 2227 DB 102,73,15,110,218 2236 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]2237 and r11d,0x801082238 cmp r11d,0x801082239 jne NEAR $L$from_mont_nox2240 2241 lea rdi,[r9*1+rax]2242 call __bn_sqrx8x_reduction2243 call __bn_postx4x_internal2244 2245 pxor xmm0,xmm02246 lea rax,[48+rsp]2247 jmp NEAR $L$from_mont_zero2248 2249 ALIGN 322250 $L$from_mont_nox:2251 2228 call __bn_sqr8x_reduction 2252 2229 call __bn_post4x_internal … … 2289 2266 2290 2267 $L$SEH_end_bn_from_mont8x: 2291 2292 ALIGN 322293 bn_mulx4x_mont_gather5:2294 mov QWORD[8+rsp],rdi ;WIN64 prologue2295 mov QWORD[16+rsp],rsi2296 mov rax,rsp2297 $L$SEH_begin_bn_mulx4x_mont_gather5:2298 mov rdi,rcx2299 mov rsi,rdx2300 mov rdx,r82301 mov rcx,r92302 mov r8,QWORD[40+rsp]2303 mov r9,QWORD[48+rsp]2304 2305 2306 2307 mov rax,rsp2308 2309 $L$mulx4x_enter:2310 push rbx2311 2312 push rbp2313 2314 push r122315 2316 push r132317 2318 push r142319 2320 push r152321 2322 $L$mulx4x_prologue:2323 2324 shl r9d,32325 lea r10,[r9*2+r9]2326 neg r92327 mov r8,QWORD[r8]2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 lea r11,[((-320))+r9*2+rsp]2339 mov rbp,rsp2340 sub r11,rdi2341 and r11,40952342 cmp r10,r112343 jb NEAR $L$mulx4xsp_alt2344 sub rbp,r112345 lea rbp,[((-320))+r9*2+rbp]2346 jmp NEAR $L$mulx4xsp_done2347 2348 $L$mulx4xsp_alt:2349 lea r10,[((4096-320))+r9*2]2350 lea rbp,[((-320))+r9*2+rbp]2351 sub r11,r102352 mov r10,02353 cmovc r11,r102354 sub rbp,r112355 $L$mulx4xsp_done:2356 and rbp,-642357 mov r11,rsp2358 sub r11,rbp2359 and r11,-40962360 lea rsp,[rbp*1+r11]2361 mov r10,QWORD[rsp]2362 cmp rsp,rbp2363 ja NEAR $L$mulx4x_page_walk2364 jmp NEAR $L$mulx4x_page_walk_done2365 2366 $L$mulx4x_page_walk:2367 lea rsp,[((-4096))+rsp]2368 mov r10,QWORD[rsp]2369 cmp rsp,rbp2370 ja NEAR $L$mulx4x_page_walk2371 $L$mulx4x_page_walk_done:2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 mov QWORD[32+rsp],r82386 mov QWORD[40+rsp],rax2387 2388 $L$mulx4x_body:2389 call mulx4x_internal2390 2391 mov rsi,QWORD[40+rsp]2392 2393 mov rax,12394 2395 mov r15,QWORD[((-48))+rsi]2396 2397 mov r14,QWORD[((-40))+rsi]2398 2399 mov r13,QWORD[((-32))+rsi]2400 2401 mov r12,QWORD[((-24))+rsi]2402 2403 mov rbp,QWORD[((-16))+rsi]2404 2405 mov rbx,QWORD[((-8))+rsi]2406 2407 lea rsp,[rsi]2408 2409 $L$mulx4x_epilogue:2410 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2411 mov rsi,QWORD[16+rsp]2412 DB 0F3h,0C3h ;repret2413 2414 $L$SEH_end_bn_mulx4x_mont_gather5:2415 2416 2417 ALIGN 322418 mulx4x_internal:2419 2420 mov QWORD[8+rsp],r92421 mov r10,r92422 neg r92423 shl r9,52424 neg r102425 lea r13,[128+r9*1+rdx]2426 shr r9,5+52427 movd xmm5,DWORD[56+rax]2428 sub r9,12429 lea rax,[$L$inc]2430 mov QWORD[((16+8))+rsp],r132431 mov QWORD[((24+8))+rsp],r92432 mov QWORD[((56+8))+rsp],rdi2433 movdqa xmm0,XMMWORD[rax]2434 movdqa xmm1,XMMWORD[16+rax]2435 lea r10,[((88-112))+r10*1+rsp]2436 lea rdi,[128+rdx]2437 2438 pshufd xmm5,xmm5,02439 movdqa xmm4,xmm12440 DB 0x672441 movdqa xmm2,xmm12442 DB 0x672443 paddd xmm1,xmm02444 pcmpeqd xmm0,xmm52445 movdqa xmm3,xmm42446 paddd xmm2,xmm12447 pcmpeqd xmm1,xmm52448 movdqa XMMWORD[112+r10],xmm02449 movdqa xmm0,xmm42450 2451 paddd xmm3,xmm22452 pcmpeqd xmm2,xmm52453 movdqa XMMWORD[128+r10],xmm12454 movdqa xmm1,xmm42455 2456 paddd xmm0,xmm32457 pcmpeqd xmm3,xmm52458 movdqa XMMWORD[144+r10],xmm22459 movdqa xmm2,xmm42460 2461 paddd xmm1,xmm02462 pcmpeqd xmm0,xmm52463 movdqa XMMWORD[160+r10],xmm32464 movdqa xmm3,xmm42465 paddd xmm2,xmm12466 pcmpeqd xmm1,xmm52467 movdqa XMMWORD[176+r10],xmm02468 movdqa xmm0,xmm42469 2470 paddd xmm3,xmm22471 pcmpeqd xmm2,xmm52472 movdqa XMMWORD[192+r10],xmm12473 movdqa xmm1,xmm42474 2475 paddd xmm0,xmm32476 pcmpeqd xmm3,xmm52477 movdqa XMMWORD[208+r10],xmm22478 movdqa xmm2,xmm42479 2480 paddd xmm1,xmm02481 pcmpeqd xmm0,xmm52482 movdqa XMMWORD[224+r10],xmm32483 movdqa xmm3,xmm42484 paddd xmm2,xmm12485 pcmpeqd xmm1,xmm52486 movdqa XMMWORD[240+r10],xmm02487 movdqa xmm0,xmm42488 2489 paddd xmm3,xmm22490 pcmpeqd xmm2,xmm52491 movdqa XMMWORD[256+r10],xmm12492 movdqa xmm1,xmm42493 2494 paddd xmm0,xmm32495 pcmpeqd xmm3,xmm52496 movdqa XMMWORD[272+r10],xmm22497 movdqa xmm2,xmm42498 2499 paddd xmm1,xmm02500 pcmpeqd xmm0,xmm52501 movdqa XMMWORD[288+r10],xmm32502 movdqa xmm3,xmm42503 DB 0x672504 paddd xmm2,xmm12505 pcmpeqd xmm1,xmm52506 movdqa XMMWORD[304+r10],xmm02507 2508 paddd xmm3,xmm22509 pcmpeqd xmm2,xmm52510 movdqa XMMWORD[320+r10],xmm12511 2512 pcmpeqd xmm3,xmm52513 movdqa XMMWORD[336+r10],xmm22514 2515 pand xmm0,XMMWORD[64+rdi]2516 pand xmm1,XMMWORD[80+rdi]2517 pand xmm2,XMMWORD[96+rdi]2518 movdqa XMMWORD[352+r10],xmm32519 pand xmm3,XMMWORD[112+rdi]2520 por xmm0,xmm22521 por xmm1,xmm32522 movdqa xmm4,XMMWORD[((-128))+rdi]2523 movdqa xmm5,XMMWORD[((-112))+rdi]2524 movdqa xmm2,XMMWORD[((-96))+rdi]2525 pand xmm4,XMMWORD[112+r10]2526 movdqa xmm3,XMMWORD[((-80))+rdi]2527 pand xmm5,XMMWORD[128+r10]2528 por xmm0,xmm42529 pand xmm2,XMMWORD[144+r10]2530 por xmm1,xmm52531 pand xmm3,XMMWORD[160+r10]2532 por xmm0,xmm22533 por xmm1,xmm32534 movdqa xmm4,XMMWORD[((-64))+rdi]2535 movdqa xmm5,XMMWORD[((-48))+rdi]2536 movdqa xmm2,XMMWORD[((-32))+rdi]2537 pand xmm4,XMMWORD[176+r10]2538 movdqa xmm3,XMMWORD[((-16))+rdi]2539 pand xmm5,XMMWORD[192+r10]2540 por xmm0,xmm42541 pand xmm2,XMMWORD[208+r10]2542 por xmm1,xmm52543 pand xmm3,XMMWORD[224+r10]2544 por xmm0,xmm22545 por xmm1,xmm32546 movdqa xmm4,XMMWORD[rdi]2547 movdqa xmm5,XMMWORD[16+rdi]2548 movdqa xmm2,XMMWORD[32+rdi]2549 pand xmm4,XMMWORD[240+r10]2550 movdqa xmm3,XMMWORD[48+rdi]2551 pand xmm5,XMMWORD[256+r10]2552 por xmm0,xmm42553 pand xmm2,XMMWORD[272+r10]2554 por xmm1,xmm52555 pand xmm3,XMMWORD[288+r10]2556 por xmm0,xmm22557 por xmm1,xmm32558 pxor xmm0,xmm12559 pshufd xmm1,xmm0,0x4e2560 por xmm0,xmm12561 lea rdi,[256+rdi]2562 DB 102,72,15,126,1942563 lea rbx,[((64+32+8))+rsp]2564 2565 mov r9,rdx2566 mulx rax,r8,QWORD[rsi]2567 mulx r12,r11,QWORD[8+rsi]2568 add r11,rax2569 mulx r13,rax,QWORD[16+rsi]2570 adc r12,rax2571 adc r13,02572 mulx r14,rax,QWORD[24+rsi]2573 2574 mov r15,r82575 imul r8,QWORD[((32+8))+rsp]2576 xor rbp,rbp2577 mov rdx,r82578 2579 mov QWORD[((8+8))+rsp],rdi2580 2581 lea rsi,[32+rsi]2582 adcx r13,rax2583 adcx r14,rbp2584 2585 mulx r10,rax,QWORD[rcx]2586 adcx r15,rax2587 adox r10,r112588 mulx r11,rax,QWORD[8+rcx]2589 adcx r10,rax2590 adox r11,r122591 mulx r12,rax,QWORD[16+rcx]2592 mov rdi,QWORD[((24+8))+rsp]2593 mov QWORD[((-32))+rbx],r102594 adcx r11,rax2595 adox r12,r132596 mulx r15,rax,QWORD[24+rcx]2597 mov rdx,r92598 mov QWORD[((-24))+rbx],r112599 adcx r12,rax2600 adox r15,rbp2601 lea rcx,[32+rcx]2602 mov QWORD[((-16))+rbx],r122603 jmp NEAR $L$mulx4x_1st2604 2605 ALIGN 322606 $L$mulx4x_1st:2607 adcx r15,rbp2608 mulx rax,r10,QWORD[rsi]2609 adcx r10,r142610 mulx r14,r11,QWORD[8+rsi]2611 adcx r11,rax2612 mulx rax,r12,QWORD[16+rsi]2613 adcx r12,r142614 mulx r14,r13,QWORD[24+rsi]2615 DB 0x67,0x672616 mov rdx,r82617 adcx r13,rax2618 adcx r14,rbp2619 lea rsi,[32+rsi]2620 lea rbx,[32+rbx]2621 2622 adox r10,r152623 mulx r15,rax,QWORD[rcx]2624 adcx r10,rax2625 adox r11,r152626 mulx r15,rax,QWORD[8+rcx]2627 adcx r11,rax2628 adox r12,r152629 mulx r15,rax,QWORD[16+rcx]2630 mov QWORD[((-40))+rbx],r102631 adcx r12,rax2632 mov QWORD[((-32))+rbx],r112633 adox r13,r152634 mulx r15,rax,QWORD[24+rcx]2635 mov rdx,r92636 mov QWORD[((-24))+rbx],r122637 adcx r13,rax2638 adox r15,rbp2639 lea rcx,[32+rcx]2640 mov QWORD[((-16))+rbx],r132641 2642 dec rdi2643 jnz NEAR $L$mulx4x_1st2644 2645 mov rax,QWORD[8+rsp]2646 adc r15,rbp2647 lea rsi,[rax*1+rsi]2648 add r14,r152649 mov rdi,QWORD[((8+8))+rsp]2650 adc rbp,rbp2651 mov QWORD[((-8))+rbx],r142652 jmp NEAR $L$mulx4x_outer2653 2654 ALIGN 322655 $L$mulx4x_outer:2656 lea r10,[((16-256))+rbx]2657 pxor xmm4,xmm42658 DB 0x67,0x672659 pxor xmm5,xmm52660 movdqa xmm0,XMMWORD[((-128))+rdi]2661 movdqa xmm1,XMMWORD[((-112))+rdi]2662 movdqa xmm2,XMMWORD[((-96))+rdi]2663 pand xmm0,XMMWORD[256+r10]2664 movdqa xmm3,XMMWORD[((-80))+rdi]2665 pand xmm1,XMMWORD[272+r10]2666 por xmm4,xmm02667 pand xmm2,XMMWORD[288+r10]2668 por xmm5,xmm12669 pand xmm3,XMMWORD[304+r10]2670 por xmm4,xmm22671 por xmm5,xmm32672 movdqa xmm0,XMMWORD[((-64))+rdi]2673 movdqa xmm1,XMMWORD[((-48))+rdi]2674 movdqa xmm2,XMMWORD[((-32))+rdi]2675 pand xmm0,XMMWORD[320+r10]2676 movdqa xmm3,XMMWORD[((-16))+rdi]2677 pand xmm1,XMMWORD[336+r10]2678 por xmm4,xmm02679 pand xmm2,XMMWORD[352+r10]2680 por xmm5,xmm12681 pand xmm3,XMMWORD[368+r10]2682 por xmm4,xmm22683 por xmm5,xmm32684 movdqa xmm0,XMMWORD[rdi]2685 movdqa xmm1,XMMWORD[16+rdi]2686 movdqa xmm2,XMMWORD[32+rdi]2687 pand xmm0,XMMWORD[384+r10]2688 movdqa xmm3,XMMWORD[48+rdi]2689 pand xmm1,XMMWORD[400+r10]2690 por xmm4,xmm02691 pand xmm2,XMMWORD[416+r10]2692 por xmm5,xmm12693 pand xmm3,XMMWORD[432+r10]2694 por xmm4,xmm22695 por xmm5,xmm32696 movdqa xmm0,XMMWORD[64+rdi]2697 movdqa xmm1,XMMWORD[80+rdi]2698 movdqa xmm2,XMMWORD[96+rdi]2699 pand xmm0,XMMWORD[448+r10]2700 movdqa xmm3,XMMWORD[112+rdi]2701 pand xmm1,XMMWORD[464+r10]2702 por xmm4,xmm02703 pand xmm2,XMMWORD[480+r10]2704 por xmm5,xmm12705 pand xmm3,XMMWORD[496+r10]2706 por xmm4,xmm22707 por xmm5,xmm32708 por xmm4,xmm52709 pshufd xmm0,xmm4,0x4e2710 por xmm0,xmm42711 lea rdi,[256+rdi]2712 DB 102,72,15,126,1942713 2714 mov QWORD[rbx],rbp2715 lea rbx,[32+rax*1+rbx]2716 mulx r11,r8,QWORD[rsi]2717 xor rbp,rbp2718 mov r9,rdx2719 mulx r12,r14,QWORD[8+rsi]2720 adox r8,QWORD[((-32))+rbx]2721 adcx r11,r142722 mulx r13,r15,QWORD[16+rsi]2723 adox r11,QWORD[((-24))+rbx]2724 adcx r12,r152725 mulx r14,rdx,QWORD[24+rsi]2726 adox r12,QWORD[((-16))+rbx]2727 adcx r13,rdx2728 lea rcx,[rax*1+rcx]2729 lea rsi,[32+rsi]2730 adox r13,QWORD[((-8))+rbx]2731 adcx r14,rbp2732 adox r14,rbp2733 2734 mov r15,r82735 imul r8,QWORD[((32+8))+rsp]2736 2737 mov rdx,r82738 xor rbp,rbp2739 mov QWORD[((8+8))+rsp],rdi2740 2741 mulx r10,rax,QWORD[rcx]2742 adcx r15,rax2743 adox r10,r112744 mulx r11,rax,QWORD[8+rcx]2745 adcx r10,rax2746 adox r11,r122747 mulx r12,rax,QWORD[16+rcx]2748 adcx r11,rax2749 adox r12,r132750 mulx r15,rax,QWORD[24+rcx]2751 mov rdx,r92752 mov rdi,QWORD[((24+8))+rsp]2753 mov QWORD[((-32))+rbx],r102754 adcx r12,rax2755 mov QWORD[((-24))+rbx],r112756 adox r15,rbp2757 mov QWORD[((-16))+rbx],r122758 lea rcx,[32+rcx]2759 jmp NEAR $L$mulx4x_inner2760 2761 ALIGN 322762 $L$mulx4x_inner:2763 mulx rax,r10,QWORD[rsi]2764 adcx r15,rbp2765 adox r10,r142766 mulx r14,r11,QWORD[8+rsi]2767 adcx r10,QWORD[rbx]2768 adox r11,rax2769 mulx rax,r12,QWORD[16+rsi]2770 adcx r11,QWORD[8+rbx]2771 adox r12,r142772 mulx r14,r13,QWORD[24+rsi]2773 mov rdx,r82774 adcx r12,QWORD[16+rbx]2775 adox r13,rax2776 adcx r13,QWORD[24+rbx]2777 adox r14,rbp2778 lea rsi,[32+rsi]2779 lea rbx,[32+rbx]2780 adcx r14,rbp2781 2782 adox r10,r152783 mulx r15,rax,QWORD[rcx]2784 adcx r10,rax2785 adox r11,r152786 mulx r15,rax,QWORD[8+rcx]2787 adcx r11,rax2788 adox r12,r152789 mulx r15,rax,QWORD[16+rcx]2790 mov QWORD[((-40))+rbx],r102791 adcx r12,rax2792 adox r13,r152793 mov QWORD[((-32))+rbx],r112794 mulx r15,rax,QWORD[24+rcx]2795 mov rdx,r92796 lea rcx,[32+rcx]2797 mov QWORD[((-24))+rbx],r122798 adcx r13,rax2799 adox r15,rbp2800 mov QWORD[((-16))+rbx],r132801 2802 dec rdi2803 jnz NEAR $L$mulx4x_inner2804 2805 mov rax,QWORD[((0+8))+rsp]2806 adc r15,rbp2807 sub rdi,QWORD[rbx]2808 mov rdi,QWORD[((8+8))+rsp]2809 mov r10,QWORD[((16+8))+rsp]2810 adc r14,r152811 lea rsi,[rax*1+rsi]2812 adc rbp,rbp2813 mov QWORD[((-8))+rbx],r142814 2815 cmp rdi,r102816 jb NEAR $L$mulx4x_outer2817 2818 mov r10,QWORD[((-8))+rcx]2819 mov r8,rbp2820 mov r12,QWORD[rax*1+rcx]2821 lea rbp,[rax*1+rcx]2822 mov rcx,rax2823 lea rdi,[rax*1+rbx]2824 xor eax,eax2825 xor r15,r152826 sub r10,r142827 adc r15,r152828 or r8,r152829 sar rcx,3+22830 sub rax,r82831 mov rdx,QWORD[((56+8))+rsp]2832 dec r122833 mov r13,QWORD[8+rbp]2834 xor r8,r82835 mov r14,QWORD[16+rbp]2836 mov r15,QWORD[24+rbp]2837 jmp NEAR $L$sqrx4x_sub_entry2838 2839 2840 2841 ALIGN 322842 bn_powerx5:2843 mov QWORD[8+rsp],rdi ;WIN64 prologue2844 mov QWORD[16+rsp],rsi2845 mov rax,rsp2846 $L$SEH_begin_bn_powerx5:2847 mov rdi,rcx2848 mov rsi,rdx2849 mov rdx,r82850 mov rcx,r92851 mov r8,QWORD[40+rsp]2852 mov r9,QWORD[48+rsp]2853 2854 2855 2856 mov rax,rsp2857 2858 $L$powerx5_enter:2859 push rbx2860 2861 push rbp2862 2863 push r122864 2865 push r132866 2867 push r142868 2869 push r152870 2871 $L$powerx5_prologue:2872 2873 shl r9d,32874 lea r10,[r9*2+r9]2875 neg r92876 mov r8,QWORD[r8]2877 2878 2879 2880 2881 2882 2883 2884 2885 lea r11,[((-320))+r9*2+rsp]2886 mov rbp,rsp2887 sub r11,rdi2888 and r11,40952889 cmp r10,r112890 jb NEAR $L$pwrx_sp_alt2891 sub rbp,r112892 lea rbp,[((-320))+r9*2+rbp]2893 jmp NEAR $L$pwrx_sp_done2894 2895 ALIGN 322896 $L$pwrx_sp_alt:2897 lea r10,[((4096-320))+r9*2]2898 lea rbp,[((-320))+r9*2+rbp]2899 sub r11,r102900 mov r10,02901 cmovc r11,r102902 sub rbp,r112903 $L$pwrx_sp_done:2904 and rbp,-642905 mov r11,rsp2906 sub r11,rbp2907 and r11,-40962908 lea rsp,[rbp*1+r11]2909 mov r10,QWORD[rsp]2910 cmp rsp,rbp2911 ja NEAR $L$pwrx_page_walk2912 jmp NEAR $L$pwrx_page_walk_done2913 2914 $L$pwrx_page_walk:2915 lea rsp,[((-4096))+rsp]2916 mov r10,QWORD[rsp]2917 cmp rsp,rbp2918 ja NEAR $L$pwrx_page_walk2919 $L$pwrx_page_walk_done:2920 2921 mov r10,r92922 neg r92923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 pxor xmm0,xmm02936 DB 102,72,15,110,2072937 DB 102,72,15,110,2092938 DB 102,73,15,110,2182939 DB 102,72,15,110,2262940 mov QWORD[32+rsp],r82941 mov QWORD[40+rsp],rax2942 2943 $L$powerx5_body:2944 2945 call __bn_sqrx8x_internal2946 call __bn_postx4x_internal2947 call __bn_sqrx8x_internal2948 call __bn_postx4x_internal2949 call __bn_sqrx8x_internal2950 call __bn_postx4x_internal2951 call __bn_sqrx8x_internal2952 call __bn_postx4x_internal2953 call __bn_sqrx8x_internal2954 call __bn_postx4x_internal2955 2956 mov r9,r102957 mov rdi,rsi2958 DB 102,72,15,126,2092959 DB 102,72,15,126,2262960 mov rax,QWORD[40+rsp]2961 2962 call mulx4x_internal2963 2964 mov rsi,QWORD[40+rsp]2965 2966 mov rax,12967 2968 mov r15,QWORD[((-48))+rsi]2969 2970 mov r14,QWORD[((-40))+rsi]2971 2972 mov r13,QWORD[((-32))+rsi]2973 2974 mov r12,QWORD[((-24))+rsi]2975 2976 mov rbp,QWORD[((-16))+rsi]2977 2978 mov rbx,QWORD[((-8))+rsi]2979 2980 lea rsp,[rsi]2981 2982 $L$powerx5_epilogue:2983 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2984 mov rsi,QWORD[16+rsp]2985 DB 0F3h,0C3h ;repret2986 2987 $L$SEH_end_bn_powerx5:2988 2989 global bn_sqrx8x_internal2990 2991 2992 ALIGN 322993 bn_sqrx8x_internal:2994 __bn_sqrx8x_internal:2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 lea rdi,[((48+8))+rsp]3037 lea rbp,[r9*1+rsi]3038 mov QWORD[((0+8))+rsp],r93039 mov QWORD[((8+8))+rsp],rbp3040 jmp NEAR $L$sqr8x_zero_start3041 3042 ALIGN 323043 DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x003044 $L$sqrx8x_zero:3045 DB 0x3e3046 movdqa XMMWORD[rdi],xmm03047 movdqa XMMWORD[16+rdi],xmm03048 movdqa XMMWORD[32+rdi],xmm03049 movdqa XMMWORD[48+rdi],xmm03050 $L$sqr8x_zero_start:3051 movdqa XMMWORD[64+rdi],xmm03052 movdqa XMMWORD[80+rdi],xmm03053 movdqa XMMWORD[96+rdi],xmm03054 movdqa XMMWORD[112+rdi],xmm03055 lea rdi,[128+rdi]3056 sub r9,643057 jnz NEAR $L$sqrx8x_zero3058 3059 mov rdx,QWORD[rsi]3060 3061 xor r10,r103062 xor r11,r113063 xor r12,r123064 xor r13,r133065 xor r14,r143066 xor r15,r153067 lea rdi,[((48+8))+rsp]3068 xor rbp,rbp3069 jmp NEAR $L$sqrx8x_outer_loop3070 3071 ALIGN 323072 $L$sqrx8x_outer_loop:3073 mulx rax,r8,QWORD[8+rsi]3074 adcx r8,r93075 adox r10,rax3076 mulx rax,r9,QWORD[16+rsi]3077 adcx r9,r103078 adox r11,rax3079 DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x003080 adcx r10,r113081 adox r12,rax3082 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x003083 adcx r11,r123084 adox r13,rax3085 mulx rax,r12,QWORD[40+rsi]3086 adcx r12,r133087 adox r14,rax3088 mulx rax,r13,QWORD[48+rsi]3089 adcx r13,r143090 adox rax,r153091 mulx r15,r14,QWORD[56+rsi]3092 mov rdx,QWORD[8+rsi]3093 adcx r14,rax3094 adox r15,rbp3095 adc r15,QWORD[64+rdi]3096 mov QWORD[8+rdi],r83097 mov QWORD[16+rdi],r93098 sbb rcx,rcx3099 xor rbp,rbp3100 3101 3102 mulx rbx,r8,QWORD[16+rsi]3103 mulx rax,r9,QWORD[24+rsi]3104 adcx r8,r103105 adox r9,rbx3106 mulx rbx,r10,QWORD[32+rsi]3107 adcx r9,r113108 adox r10,rax3109 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x003110 adcx r10,r123111 adox r11,rbx3112 DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x003113 adcx r11,r133114 adox r12,r143115 DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x003116 mov rdx,QWORD[16+rsi]3117 adcx r12,rax3118 adox r13,rbx3119 adcx r13,r153120 adox r14,rbp3121 adcx r14,rbp3122 3123 mov QWORD[24+rdi],r83124 mov QWORD[32+rdi],r93125 3126 mulx rbx,r8,QWORD[24+rsi]3127 mulx rax,r9,QWORD[32+rsi]3128 adcx r8,r103129 adox r9,rbx3130 mulx rbx,r10,QWORD[40+rsi]3131 adcx r9,r113132 adox r10,rax3133 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x003134 adcx r10,r123135 adox r11,r133136 DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x003137 DB 0x3e3138 mov rdx,QWORD[24+rsi]3139 adcx r11,rbx3140 adox r12,rax3141 adcx r12,r143142 mov QWORD[40+rdi],r83143 mov QWORD[48+rdi],r93144 mulx rax,r8,QWORD[32+rsi]3145 adox r13,rbp3146 adcx r13,rbp3147 3148 mulx rbx,r9,QWORD[40+rsi]3149 adcx r8,r103150 adox r9,rax3151 mulx rax,r10,QWORD[48+rsi]3152 adcx r9,r113153 adox r10,r123154 mulx r12,r11,QWORD[56+rsi]3155 mov rdx,QWORD[32+rsi]3156 mov r14,QWORD[40+rsi]3157 adcx r10,rbx3158 adox r11,rax3159 mov r15,QWORD[48+rsi]3160 adcx r11,r133161 adox r12,rbp3162 adcx r12,rbp3163 3164 mov QWORD[56+rdi],r83165 mov QWORD[64+rdi],r93166 3167 mulx rax,r9,r143168 mov r8,QWORD[56+rsi]3169 adcx r9,r103170 mulx rbx,r10,r153171 adox r10,rax3172 adcx r10,r113173 mulx rax,r11,r83174 mov rdx,r143175 adox r11,rbx3176 adcx r11,r123177 3178 adcx rax,rbp3179 3180 mulx rbx,r14,r153181 mulx r13,r12,r83182 mov rdx,r153183 lea rsi,[64+rsi]3184 adcx r11,r143185 adox r12,rbx3186 adcx r12,rax3187 adox r13,rbp3188 3189 DB 0x67,0x673190 mulx r14,r8,r83191 adcx r13,r83192 adcx r14,rbp3193 3194 cmp rsi,QWORD[((8+8))+rsp]3195 je NEAR $L$sqrx8x_outer_break3196 3197 neg rcx3198 mov rcx,-83199 mov r15,rbp3200 mov r8,QWORD[64+rdi]3201 adcx r9,QWORD[72+rdi]3202 adcx r10,QWORD[80+rdi]3203 adcx r11,QWORD[88+rdi]3204 adc r12,QWORD[96+rdi]3205 adc r13,QWORD[104+rdi]3206 adc r14,QWORD[112+rdi]3207 adc r15,QWORD[120+rdi]3208 lea rbp,[rsi]3209 lea rdi,[128+rdi]3210 sbb rax,rax3211 3212 mov rdx,QWORD[((-64))+rsi]3213 mov QWORD[((16+8))+rsp],rax3214 mov QWORD[((24+8))+rsp],rdi3215 3216 3217 xor eax,eax3218 jmp NEAR $L$sqrx8x_loop3219 3220 ALIGN 323221 $L$sqrx8x_loop:3222 mov rbx,r83223 mulx r8,rax,QWORD[rbp]3224 adcx rbx,rax3225 adox r8,r93226 3227 mulx r9,rax,QWORD[8+rbp]3228 adcx r8,rax3229 adox r9,r103230 3231 mulx r10,rax,QWORD[16+rbp]3232 adcx r9,rax3233 adox r10,r113234 3235 mulx r11,rax,QWORD[24+rbp]3236 adcx r10,rax3237 adox r11,r123238 3239 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x003240 adcx r11,rax3241 adox r12,r133242 3243 mulx r13,rax,QWORD[40+rbp]3244 adcx r12,rax3245 adox r13,r143246 3247 mulx r14,rax,QWORD[48+rbp]3248 mov QWORD[rcx*8+rdi],rbx3249 mov ebx,03250 adcx r13,rax3251 adox r14,r153252 3253 DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x003254 mov rdx,QWORD[8+rcx*8+rsi]3255 adcx r14,rax3256 adox r15,rbx3257 adcx r15,rbx3258 3259 DB 0x673260 inc rcx3261 jnz NEAR $L$sqrx8x_loop3262 3263 lea rbp,[64+rbp]3264 mov rcx,-83265 cmp rbp,QWORD[((8+8))+rsp]3266 je NEAR $L$sqrx8x_break3267 3268 sub rbx,QWORD[((16+8))+rsp]3269 DB 0x663270 mov rdx,QWORD[((-64))+rsi]3271 adcx r8,QWORD[rdi]3272 adcx r9,QWORD[8+rdi]3273 adc r10,QWORD[16+rdi]3274 adc r11,QWORD[24+rdi]3275 adc r12,QWORD[32+rdi]3276 adc r13,QWORD[40+rdi]3277 adc r14,QWORD[48+rdi]3278 adc r15,QWORD[56+rdi]3279 lea rdi,[64+rdi]3280 DB 0x673281 sbb rax,rax3282 xor ebx,ebx3283 mov QWORD[((16+8))+rsp],rax3284 jmp NEAR $L$sqrx8x_loop3285 3286 ALIGN 323287 $L$sqrx8x_break:3288 xor rbp,rbp3289 sub rbx,QWORD[((16+8))+rsp]3290 adcx r8,rbp3291 mov rcx,QWORD[((24+8))+rsp]3292 adcx r9,rbp3293 mov rdx,QWORD[rsi]3294 adc r10,03295 mov QWORD[rdi],r83296 adc r11,03297 adc r12,03298 adc r13,03299 adc r14,03300 adc r15,03301 cmp rdi,rcx3302 je NEAR $L$sqrx8x_outer_loop3303 3304 mov QWORD[8+rdi],r93305 mov r9,QWORD[8+rcx]3306 mov QWORD[16+rdi],r103307 mov r10,QWORD[16+rcx]3308 mov QWORD[24+rdi],r113309 mov r11,QWORD[24+rcx]3310 mov QWORD[32+rdi],r123311 mov r12,QWORD[32+rcx]3312 mov QWORD[40+rdi],r133313 mov r13,QWORD[40+rcx]3314 mov QWORD[48+rdi],r143315 mov r14,QWORD[48+rcx]3316 mov QWORD[56+rdi],r153317 mov r15,QWORD[56+rcx]3318 mov rdi,rcx3319 jmp NEAR $L$sqrx8x_outer_loop3320 3321 ALIGN 323322 $L$sqrx8x_outer_break:3323 mov QWORD[72+rdi],r93324 DB 102,72,15,126,2173325 mov QWORD[80+rdi],r103326 mov QWORD[88+rdi],r113327 mov QWORD[96+rdi],r123328 mov QWORD[104+rdi],r133329 mov QWORD[112+rdi],r143330 lea rdi,[((48+8))+rsp]3331 mov rdx,QWORD[rcx*1+rsi]3332 3333 mov r11,QWORD[8+rdi]3334 xor r10,r103335 mov r9,QWORD[((0+8))+rsp]3336 adox r11,r113337 mov r12,QWORD[16+rdi]3338 mov r13,QWORD[24+rdi]3339 3340 3341 ALIGN 323342 $L$sqrx4x_shift_n_add:3343 mulx rbx,rax,rdx3344 adox r12,r123345 adcx rax,r103346 DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x003347 DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x003348 adox r13,r133349 adcx rbx,r113350 mov r11,QWORD[40+rdi]3351 mov QWORD[rdi],rax3352 mov QWORD[8+rdi],rbx3353 3354 mulx rbx,rax,rdx3355 adox r10,r103356 adcx rax,r123357 mov rdx,QWORD[16+rcx*1+rsi]3358 mov r12,QWORD[48+rdi]3359 adox r11,r113360 adcx rbx,r133361 mov r13,QWORD[56+rdi]3362 mov QWORD[16+rdi],rax3363 mov QWORD[24+rdi],rbx3364 3365 mulx rbx,rax,rdx3366 adox r12,r123367 adcx rax,r103368 mov rdx,QWORD[24+rcx*1+rsi]3369 lea rcx,[32+rcx]3370 mov r10,QWORD[64+rdi]3371 adox r13,r133372 adcx rbx,r113373 mov r11,QWORD[72+rdi]3374 mov QWORD[32+rdi],rax3375 mov QWORD[40+rdi],rbx3376 3377 mulx rbx,rax,rdx3378 adox r10,r103379 adcx rax,r123380 jrcxz $L$sqrx4x_shift_n_add_break3381 DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x003382 adox r11,r113383 adcx rbx,r133384 mov r12,QWORD[80+rdi]3385 mov r13,QWORD[88+rdi]3386 mov QWORD[48+rdi],rax3387 mov QWORD[56+rdi],rbx3388 lea rdi,[64+rdi]3389 nop3390 jmp NEAR $L$sqrx4x_shift_n_add3391 3392 ALIGN 323393 $L$sqrx4x_shift_n_add_break:3394 adcx rbx,r133395 mov QWORD[48+rdi],rax3396 mov QWORD[56+rdi],rbx3397 lea rdi,[64+rdi]3398 DB 102,72,15,126,2133399 __bn_sqrx8x_reduction:3400 xor eax,eax3401 mov rbx,QWORD[((32+8))+rsp]3402 mov rdx,QWORD[((48+8))+rsp]3403 lea rcx,[((-64))+r9*1+rbp]3404 3405 mov QWORD[((0+8))+rsp],rcx3406 mov QWORD[((8+8))+rsp],rdi3407 3408 lea rdi,[((48+8))+rsp]3409 jmp NEAR $L$sqrx8x_reduction_loop3410 3411 ALIGN 323412 $L$sqrx8x_reduction_loop:3413 mov r9,QWORD[8+rdi]3414 mov r10,QWORD[16+rdi]3415 mov r11,QWORD[24+rdi]3416 mov r12,QWORD[32+rdi]3417 mov r8,rdx3418 imul rdx,rbx3419 mov r13,QWORD[40+rdi]3420 mov r14,QWORD[48+rdi]3421 mov r15,QWORD[56+rdi]3422 mov QWORD[((24+8))+rsp],rax3423 3424 lea rdi,[64+rdi]3425 xor rsi,rsi3426 mov rcx,-83427 jmp NEAR $L$sqrx8x_reduce3428 3429 ALIGN 323430 $L$sqrx8x_reduce:3431 mov rbx,r83432 mulx r8,rax,QWORD[rbp]3433 adcx rax,rbx3434 adox r8,r93435 3436 mulx r9,rbx,QWORD[8+rbp]3437 adcx r8,rbx3438 adox r9,r103439 3440 mulx r10,rbx,QWORD[16+rbp]3441 adcx r9,rbx3442 adox r10,r113443 3444 mulx r11,rbx,QWORD[24+rbp]3445 adcx r10,rbx3446 adox r11,r123447 3448 DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x003449 mov rax,rdx3450 mov rdx,r83451 adcx r11,rbx3452 adox r12,r133453 3454 mulx rdx,rbx,QWORD[((32+8))+rsp]3455 mov rdx,rax3456 mov QWORD[((64+48+8))+rcx*8+rsp],rax3457 3458 mulx r13,rax,QWORD[40+rbp]3459 adcx r12,rax3460 adox r13,r143461 3462 mulx r14,rax,QWORD[48+rbp]3463 adcx r13,rax3464 adox r14,r153465 3466 mulx r15,rax,QWORD[56+rbp]3467 mov rdx,rbx3468 adcx r14,rax3469 adox r15,rsi3470 adcx r15,rsi3471 3472 DB 0x67,0x67,0x673473 inc rcx3474 jnz NEAR $L$sqrx8x_reduce3475 3476 mov rax,rsi3477 cmp rbp,QWORD[((0+8))+rsp]3478 jae NEAR $L$sqrx8x_no_tail3479 3480 mov rdx,QWORD[((48+8))+rsp]3481 add r8,QWORD[rdi]3482 lea rbp,[64+rbp]3483 mov rcx,-83484 adcx r9,QWORD[8+rdi]3485 adcx r10,QWORD[16+rdi]3486 adc r11,QWORD[24+rdi]3487 adc r12,QWORD[32+rdi]3488 adc r13,QWORD[40+rdi]3489 adc r14,QWORD[48+rdi]3490 adc r15,QWORD[56+rdi]3491 lea rdi,[64+rdi]3492 sbb rax,rax3493 3494 xor rsi,rsi3495 mov QWORD[((16+8))+rsp],rax3496 jmp NEAR $L$sqrx8x_tail3497 3498 ALIGN 323499 $L$sqrx8x_tail:3500 mov rbx,r83501 mulx r8,rax,QWORD[rbp]3502 adcx rbx,rax3503 adox r8,r93504 3505 mulx r9,rax,QWORD[8+rbp]3506 adcx r8,rax3507 adox r9,r103508 3509 mulx r10,rax,QWORD[16+rbp]3510 adcx r9,rax3511 adox r10,r113512 3513 mulx r11,rax,QWORD[24+rbp]3514 adcx r10,rax3515 adox r11,r123516 3517 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x003518 adcx r11,rax3519 adox r12,r133520 3521 mulx r13,rax,QWORD[40+rbp]3522 adcx r12,rax3523 adox r13,r143524 3525 mulx r14,rax,QWORD[48+rbp]3526 adcx r13,rax3527 adox r14,r153528 3529 mulx r15,rax,QWORD[56+rbp]3530 mov rdx,QWORD[((72+48+8))+rcx*8+rsp]3531 adcx r14,rax3532 adox r15,rsi3533 mov QWORD[rcx*8+rdi],rbx3534 mov rbx,r83535 adcx r15,rsi3536 3537 inc rcx3538 jnz NEAR $L$sqrx8x_tail3539 3540 cmp rbp,QWORD[((0+8))+rsp]3541 jae NEAR $L$sqrx8x_tail_done3542 3543 sub rsi,QWORD[((16+8))+rsp]3544 mov rdx,QWORD[((48+8))+rsp]3545 lea rbp,[64+rbp]3546 adc r8,QWORD[rdi]3547 adc r9,QWORD[8+rdi]3548 adc r10,QWORD[16+rdi]3549 adc r11,QWORD[24+rdi]3550 adc r12,QWORD[32+rdi]3551 adc r13,QWORD[40+rdi]3552 adc r14,QWORD[48+rdi]3553 adc r15,QWORD[56+rdi]3554 lea rdi,[64+rdi]3555 sbb rax,rax3556 sub rcx,83557 3558 xor rsi,rsi3559 mov QWORD[((16+8))+rsp],rax3560 jmp NEAR $L$sqrx8x_tail3561 3562 ALIGN 323563 $L$sqrx8x_tail_done:3564 xor rax,rax3565 add r8,QWORD[((24+8))+rsp]3566 adc r9,03567 adc r10,03568 adc r11,03569 adc r12,03570 adc r13,03571 adc r14,03572 adc r15,03573 adc rax,03574 3575 sub rsi,QWORD[((16+8))+rsp]3576 $L$sqrx8x_no_tail:3577 adc r8,QWORD[rdi]3578 DB 102,72,15,126,2173579 adc r9,QWORD[8+rdi]3580 mov rsi,QWORD[56+rbp]3581 DB 102,72,15,126,2133582 adc r10,QWORD[16+rdi]3583 adc r11,QWORD[24+rdi]3584 adc r12,QWORD[32+rdi]3585 adc r13,QWORD[40+rdi]3586 adc r14,QWORD[48+rdi]3587 adc r15,QWORD[56+rdi]3588 adc rax,03589 3590 mov rbx,QWORD[((32+8))+rsp]3591 mov rdx,QWORD[64+rcx*1+rdi]3592 3593 mov QWORD[rdi],r83594 lea r8,[64+rdi]3595 mov QWORD[8+rdi],r93596 mov QWORD[16+rdi],r103597 mov QWORD[24+rdi],r113598 mov QWORD[32+rdi],r123599 mov QWORD[40+rdi],r133600 mov QWORD[48+rdi],r143601 mov QWORD[56+rdi],r153602 3603 lea rdi,[64+rcx*1+rdi]3604 cmp r8,QWORD[((8+8))+rsp]3605 jb NEAR $L$sqrx8x_reduction_loop3606 DB 0F3h,0C3h ;repret3607 3608 3609 ALIGN 323610 __bn_postx4x_internal:3611 3612 mov r12,QWORD[rbp]3613 mov r10,rcx3614 mov r9,rcx3615 neg rax3616 sar rcx,3+23617 3618 DB 102,72,15,126,2023619 DB 102,72,15,126,2063620 dec r123621 mov r13,QWORD[8+rbp]3622 xor r8,r83623 mov r14,QWORD[16+rbp]3624 mov r15,QWORD[24+rbp]3625 jmp NEAR $L$sqrx4x_sub_entry3626 3627 ALIGN 163628 $L$sqrx4x_sub:3629 mov r12,QWORD[rbp]3630 mov r13,QWORD[8+rbp]3631 mov r14,QWORD[16+rbp]3632 mov r15,QWORD[24+rbp]3633 $L$sqrx4x_sub_entry:3634 andn r12,r12,rax3635 lea rbp,[32+rbp]3636 andn r13,r13,rax3637 andn r14,r14,rax3638 andn r15,r15,rax3639 3640 neg r83641 adc r12,QWORD[rdi]3642 adc r13,QWORD[8+rdi]3643 adc r14,QWORD[16+rdi]3644 adc r15,QWORD[24+rdi]3645 mov QWORD[rdx],r123646 lea rdi,[32+rdi]3647 mov QWORD[8+rdx],r133648 sbb r8,r83649 mov QWORD[16+rdx],r143650 mov QWORD[24+rdx],r153651 lea rdx,[32+rdx]3652 3653 inc rcx3654 jnz NEAR $L$sqrx4x_sub3655 3656 neg r93657 3658 DB 0F3h,0C3h ;repret3659 3660 3661 2268 global bn_get_bits5 3662 2269 … … 3995 2602 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 3996 2603 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 3997 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase3998 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase3999 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase4000 4001 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase4002 DD $L$SEH_end_bn_powerx5 wrt ..imagebase4003 DD $L$SEH_info_bn_powerx5 wrt ..imagebase4004 2604 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 4005 2605 DD $L$SEH_end_bn_gather5 wrt ..imagebase … … 4028 2628 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 4029 2629 ALIGN 8 4030 $L$SEH_info_bn_mulx4x_mont_gather5:4031 DB 9,0,0,04032 DD mul_handler wrt ..imagebase4033 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase4034 ALIGN 84035 $L$SEH_info_bn_powerx5:4036 DB 9,0,0,04037 DD mul_handler wrt ..imagebase4038 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase4039 ALIGN 84040 2630 $L$SEH_info_bn_gather5: 4041 2631 DB 0x01,0x0b,0x03,0x0a
Note:
See TracChangeset
for help on using the changeset viewer.