Changeset 99371 in vbox
- Timestamp:
- Apr 11, 2023 10:16:56 AM (20 months ago)
- Location:
- trunk/src/libs/openssl-3.1.0
- Files:
-
- 29 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/libs/openssl-3.1.0/crypto/bn/Makefile.kmk
r99366 r99371 94 94 $(VBOX_PATH_CRYPTO_BN)/asm/x86-gf2m.pl \ 95 95 $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx2.pl \ 96 $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx512.pl \97 96 $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-x86_64.pl \ 98 97 $(VBOX_PATH_CRYPTO_BN)/asm/x86_64-gf2m.pl \ … … 104 103 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86-gf2m.pl elf -fPIC $(VBOX_PATH_CRYPTO)/genasm-elf/x86-gf2m.S 105 104 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx2.pl elf $(VBOX_PATH_CRYPTO)/genasm-elf/rsaz-avx2.S 106 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx512.pl elf $(VBOX_PATH_CRYPTO)/genasm-elf/rsaz-avx512.S107 105 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-x86_64.pl elf $(VBOX_PATH_CRYPTO)/genasm-elf/rsaz-x86_64.S 108 106 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86_64-gf2m.pl elf $(VBOX_PATH_CRYPTO)/genasm-elf/x86_64-gf2m.S … … 114 112 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86-gf2m.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/x86-gf2m.S 115 113 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx2.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/rsaz-avx2.S 116 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx512.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/rsaz-avx512.S117 114 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-x86_64.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/rsaz-x86_64.S 118 115 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86_64-gf2m.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/x86_64-gf2m.S … … 124 121 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86-gf2m.pl win32n $(VBOX_PATH_CRYPTO)/genasm-nasm/x86-gf2m.S 125 122 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx2.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/rsaz-avx2.S 126 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx512.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/rsaz-avx512.S127 123 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-x86_64.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/rsaz-x86_64.S 128 124 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86_64-gf2m.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/x86_64-gf2m.S -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/aesni-gcm-x86_64.S
r97373 r99371 6 6 7 7 8 9 ALIGN 32 10 _aesni_ctr32_ghash_6x: 11 12 vmovdqu xmm2,XMMWORD[32+r11] 13 sub rdx,6 14 vpxor xmm4,xmm4,xmm4 15 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 16 vpaddb xmm10,xmm1,xmm2 17 vpaddb xmm11,xmm10,xmm2 18 vpaddb xmm12,xmm11,xmm2 19 vpaddb xmm13,xmm12,xmm2 20 vpaddb xmm14,xmm13,xmm2 21 vpxor xmm9,xmm1,xmm15 22 vmovdqu XMMWORD[(16+8)+rsp],xmm4 23 jmp NEAR $L$oop6x 24 25 ALIGN 32 26 $L$oop6x: 27 add ebx,100663296 28 jc NEAR $L$handle_ctr32 29 vmovdqu xmm3,XMMWORD[((0-32))+r9] 30 vpaddb xmm1,xmm14,xmm2 31 vpxor xmm10,xmm10,xmm15 32 vpxor xmm11,xmm11,xmm15 33 34 $L$resume_ctr32: 35 vmovdqu XMMWORD[r8],xmm1 36 vpclmulqdq xmm5,xmm7,xmm3,0x10 37 vpxor xmm12,xmm12,xmm15 38 vmovups xmm2,XMMWORD[((16-128))+rcx] 39 vpclmulqdq xmm6,xmm7,xmm3,0x01 40 xor r12,r12 41 cmp r15,r14 42 43 vaesenc xmm9,xmm9,xmm2 44 vmovdqu xmm0,XMMWORD[((48+8))+rsp] 45 vpxor xmm13,xmm13,xmm15 46 vpclmulqdq xmm1,xmm7,xmm3,0x00 47 vaesenc xmm10,xmm10,xmm2 48 vpxor xmm14,xmm14,xmm15 49 setnc r12b 50 vpclmulqdq xmm7,xmm7,xmm3,0x11 51 vaesenc xmm11,xmm11,xmm2 52 vmovdqu xmm3,XMMWORD[((16-32))+r9] 53 neg r12 54 vaesenc xmm12,xmm12,xmm2 55 vpxor xmm6,xmm6,xmm5 56 vpclmulqdq xmm5,xmm0,xmm3,0x00 57 vpxor xmm8,xmm8,xmm4 58 vaesenc xmm13,xmm13,xmm2 59 vpxor xmm4,xmm1,xmm5 60 and r12,0x60 61 vmovups xmm15,XMMWORD[((32-128))+rcx] 62 vpclmulqdq xmm1,xmm0,xmm3,0x10 63 vaesenc xmm14,xmm14,xmm2 64 65 vpclmulqdq xmm2,xmm0,xmm3,0x01 66 lea r14,[r12*1+r14] 67 vaesenc xmm9,xmm9,xmm15 68 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 69 vpclmulqdq xmm3,xmm0,xmm3,0x11 70 vmovdqu xmm0,XMMWORD[((64+8))+rsp] 71 vaesenc xmm10,xmm10,xmm15 72 movbe r13,QWORD[88+r14] 73 vaesenc xmm11,xmm11,xmm15 74 movbe r12,QWORD[80+r14] 75 vaesenc xmm12,xmm12,xmm15 76 mov QWORD[((32+8))+rsp],r13 77 vaesenc xmm13,xmm13,xmm15 78 mov QWORD[((40+8))+rsp],r12 79 vmovdqu xmm5,XMMWORD[((48-32))+r9] 80 vaesenc xmm14,xmm14,xmm15 81 82 vmovups xmm15,XMMWORD[((48-128))+rcx] 83 vpxor xmm6,xmm6,xmm1 84 vpclmulqdq xmm1,xmm0,xmm5,0x00 85 vaesenc xmm9,xmm9,xmm15 86 vpxor xmm6,xmm6,xmm2 87 vpclmulqdq xmm2,xmm0,xmm5,0x10 88 vaesenc xmm10,xmm10,xmm15 89 vpxor xmm7,xmm7,xmm3 90 vpclmulqdq xmm3,xmm0,xmm5,0x01 91 vaesenc xmm11,xmm11,xmm15 92 vpclmulqdq xmm5,xmm0,xmm5,0x11 93 vmovdqu xmm0,XMMWORD[((80+8))+rsp] 94 vaesenc xmm12,xmm12,xmm15 95 vaesenc xmm13,xmm13,xmm15 96 vpxor xmm4,xmm4,xmm1 97 vmovdqu xmm1,XMMWORD[((64-32))+r9] 98 vaesenc xmm14,xmm14,xmm15 99 100 vmovups xmm15,XMMWORD[((64-128))+rcx] 101 vpxor xmm6,xmm6,xmm2 102 vpclmulqdq xmm2,xmm0,xmm1,0x00 103 vaesenc xmm9,xmm9,xmm15 104 vpxor xmm6,xmm6,xmm3 105 vpclmulqdq xmm3,xmm0,xmm1,0x10 106 vaesenc xmm10,xmm10,xmm15 107 movbe r13,QWORD[72+r14] 108 vpxor xmm7,xmm7,xmm5 109 vpclmulqdq xmm5,xmm0,xmm1,0x01 110 vaesenc xmm11,xmm11,xmm15 111 movbe r12,QWORD[64+r14] 112 vpclmulqdq xmm1,xmm0,xmm1,0x11 113 vmovdqu xmm0,XMMWORD[((96+8))+rsp] 114 vaesenc xmm12,xmm12,xmm15 115 mov QWORD[((48+8))+rsp],r13 116 vaesenc xmm13,xmm13,xmm15 117 mov QWORD[((56+8))+rsp],r12 118 vpxor xmm4,xmm4,xmm2 119 vmovdqu xmm2,XMMWORD[((96-32))+r9] 120 vaesenc xmm14,xmm14,xmm15 121 122 vmovups xmm15,XMMWORD[((80-128))+rcx] 123 vpxor xmm6,xmm6,xmm3 124 vpclmulqdq xmm3,xmm0,xmm2,0x00 125 vaesenc xmm9,xmm9,xmm15 126 vpxor xmm6,xmm6,xmm5 127 vpclmulqdq xmm5,xmm0,xmm2,0x10 128 vaesenc xmm10,xmm10,xmm15 129 movbe r13,QWORD[56+r14] 130 vpxor xmm7,xmm7,xmm1 131 vpclmulqdq xmm1,xmm0,xmm2,0x01 132 vpxor xmm8,xmm8,XMMWORD[((112+8))+rsp] 133 vaesenc xmm11,xmm11,xmm15 134 movbe r12,QWORD[48+r14] 135 vpclmulqdq xmm2,xmm0,xmm2,0x11 136 vaesenc xmm12,xmm12,xmm15 137 mov QWORD[((64+8))+rsp],r13 138 vaesenc xmm13,xmm13,xmm15 139 mov QWORD[((72+8))+rsp],r12 140 vpxor xmm4,xmm4,xmm3 141 vmovdqu xmm3,XMMWORD[((112-32))+r9] 142 vaesenc xmm14,xmm14,xmm15 143 144 vmovups xmm15,XMMWORD[((96-128))+rcx] 145 vpxor xmm6,xmm6,xmm5 146 vpclmulqdq xmm5,xmm8,xmm3,0x10 147 vaesenc xmm9,xmm9,xmm15 148 vpxor xmm6,xmm6,xmm1 149 vpclmulqdq xmm1,xmm8,xmm3,0x01 150 vaesenc xmm10,xmm10,xmm15 151 movbe r13,QWORD[40+r14] 152 vpxor xmm7,xmm7,xmm2 153 vpclmulqdq xmm2,xmm8,xmm3,0x00 154 vaesenc xmm11,xmm11,xmm15 155 movbe r12,QWORD[32+r14] 156 vpclmulqdq xmm8,xmm8,xmm3,0x11 157 vaesenc xmm12,xmm12,xmm15 158 mov QWORD[((80+8))+rsp],r13 159 vaesenc xmm13,xmm13,xmm15 160 mov QWORD[((88+8))+rsp],r12 161 vpxor xmm6,xmm6,xmm5 162 vaesenc xmm14,xmm14,xmm15 163 vpxor xmm6,xmm6,xmm1 164 165 vmovups xmm15,XMMWORD[((112-128))+rcx] 166 vpslldq xmm5,xmm6,8 167 vpxor xmm4,xmm4,xmm2 168 vmovdqu xmm3,XMMWORD[16+r11] 169 170 vaesenc xmm9,xmm9,xmm15 171 vpxor xmm7,xmm7,xmm8 172 vaesenc xmm10,xmm10,xmm15 173 vpxor xmm4,xmm4,xmm5 174 movbe r13,QWORD[24+r14] 175 vaesenc xmm11,xmm11,xmm15 176 movbe r12,QWORD[16+r14] 177 vpalignr xmm0,xmm4,xmm4,8 178 vpclmulqdq xmm4,xmm4,xmm3,0x10 179 mov QWORD[((96+8))+rsp],r13 180 vaesenc xmm12,xmm12,xmm15 181 mov QWORD[((104+8))+rsp],r12 182 vaesenc xmm13,xmm13,xmm15 183 vmovups xmm1,XMMWORD[((128-128))+rcx] 184 vaesenc xmm14,xmm14,xmm15 185 186 vaesenc xmm9,xmm9,xmm1 187 vmovups xmm15,XMMWORD[((144-128))+rcx] 188 vaesenc xmm10,xmm10,xmm1 189 vpsrldq xmm6,xmm6,8 190 vaesenc xmm11,xmm11,xmm1 191 vpxor xmm7,xmm7,xmm6 192 vaesenc xmm12,xmm12,xmm1 193 vpxor xmm4,xmm4,xmm0 194 movbe r13,QWORD[8+r14] 195 vaesenc xmm13,xmm13,xmm1 196 movbe r12,QWORD[r14] 197 vaesenc xmm14,xmm14,xmm1 198 vmovups xmm1,XMMWORD[((160-128))+rcx] 199 cmp ebp,11 200 jb NEAR $L$enc_tail 201 202 vaesenc xmm9,xmm9,xmm15 203 vaesenc xmm10,xmm10,xmm15 204 vaesenc xmm11,xmm11,xmm15 205 vaesenc xmm12,xmm12,xmm15 206 vaesenc xmm13,xmm13,xmm15 207 vaesenc xmm14,xmm14,xmm15 208 209 vaesenc xmm9,xmm9,xmm1 210 vaesenc xmm10,xmm10,xmm1 211 vaesenc xmm11,xmm11,xmm1 212 vaesenc xmm12,xmm12,xmm1 213 vaesenc xmm13,xmm13,xmm1 214 vmovups xmm15,XMMWORD[((176-128))+rcx] 215 vaesenc xmm14,xmm14,xmm1 216 vmovups xmm1,XMMWORD[((192-128))+rcx] 217 je NEAR $L$enc_tail 218 219 vaesenc xmm9,xmm9,xmm15 220 vaesenc xmm10,xmm10,xmm15 221 vaesenc xmm11,xmm11,xmm15 222 vaesenc xmm12,xmm12,xmm15 223 vaesenc xmm13,xmm13,xmm15 224 vaesenc xmm14,xmm14,xmm15 225 226 vaesenc xmm9,xmm9,xmm1 227 vaesenc xmm10,xmm10,xmm1 228 vaesenc xmm11,xmm11,xmm1 229 vaesenc xmm12,xmm12,xmm1 230 vaesenc xmm13,xmm13,xmm1 231 vmovups xmm15,XMMWORD[((208-128))+rcx] 232 vaesenc xmm14,xmm14,xmm1 233 vmovups xmm1,XMMWORD[((224-128))+rcx] 234 jmp NEAR $L$enc_tail 235 236 ALIGN 32 237 $L$handle_ctr32: 238 vmovdqu xmm0,XMMWORD[r11] 239 vpshufb xmm6,xmm1,xmm0 240 vmovdqu xmm5,XMMWORD[48+r11] 241 vpaddd xmm10,xmm6,XMMWORD[64+r11] 242 vpaddd xmm11,xmm6,xmm5 243 vmovdqu xmm3,XMMWORD[((0-32))+r9] 244 vpaddd xmm12,xmm10,xmm5 245 vpshufb xmm10,xmm10,xmm0 246 vpaddd xmm13,xmm11,xmm5 247 vpshufb xmm11,xmm11,xmm0 248 vpxor xmm10,xmm10,xmm15 249 vpaddd xmm14,xmm12,xmm5 250 vpshufb xmm12,xmm12,xmm0 251 vpxor xmm11,xmm11,xmm15 252 vpaddd xmm1,xmm13,xmm5 253 vpshufb xmm13,xmm13,xmm0 254 vpshufb xmm14,xmm14,xmm0 255 vpshufb xmm1,xmm1,xmm0 256 jmp NEAR $L$resume_ctr32 257 258 ALIGN 32 259 $L$enc_tail: 260 vaesenc xmm9,xmm9,xmm15 261 vmovdqu XMMWORD[(16+8)+rsp],xmm7 262 vpalignr xmm8,xmm4,xmm4,8 263 vaesenc xmm10,xmm10,xmm15 264 vpclmulqdq xmm4,xmm4,xmm3,0x10 265 vpxor xmm2,xmm1,XMMWORD[rdi] 266 vaesenc xmm11,xmm11,xmm15 267 vpxor xmm0,xmm1,XMMWORD[16+rdi] 268 vaesenc xmm12,xmm12,xmm15 269 vpxor xmm5,xmm1,XMMWORD[32+rdi] 270 vaesenc xmm13,xmm13,xmm15 271 vpxor xmm6,xmm1,XMMWORD[48+rdi] 272 vaesenc xmm14,xmm14,xmm15 273 vpxor xmm7,xmm1,XMMWORD[64+rdi] 274 vpxor xmm3,xmm1,XMMWORD[80+rdi] 275 vmovdqu xmm1,XMMWORD[r8] 276 277 vaesenclast xmm9,xmm9,xmm2 278 vmovdqu xmm2,XMMWORD[32+r11] 279 vaesenclast xmm10,xmm10,xmm0 280 vpaddb xmm0,xmm1,xmm2 281 mov QWORD[((112+8))+rsp],r13 282 lea rdi,[96+rdi] 283 vaesenclast xmm11,xmm11,xmm5 284 vpaddb xmm5,xmm0,xmm2 285 mov QWORD[((120+8))+rsp],r12 286 lea rsi,[96+rsi] 287 vmovdqu xmm15,XMMWORD[((0-128))+rcx] 288 vaesenclast xmm12,xmm12,xmm6 289 vpaddb xmm6,xmm5,xmm2 290 vaesenclast xmm13,xmm13,xmm7 291 vpaddb xmm7,xmm6,xmm2 292 vaesenclast xmm14,xmm14,xmm3 293 vpaddb xmm3,xmm7,xmm2 294 295 add r10,0x60 296 sub rdx,0x6 297 jc NEAR $L$6x_done 298 299 vmovups XMMWORD[(-96)+rsi],xmm9 300 vpxor xmm9,xmm1,xmm15 301 vmovups XMMWORD[(-80)+rsi],xmm10 302 vmovdqa xmm10,xmm0 303 vmovups XMMWORD[(-64)+rsi],xmm11 304 vmovdqa xmm11,xmm5 305 vmovups XMMWORD[(-48)+rsi],xmm12 306 vmovdqa xmm12,xmm6 307 vmovups XMMWORD[(-32)+rsi],xmm13 308 vmovdqa xmm13,xmm7 309 vmovups XMMWORD[(-16)+rsi],xmm14 310 vmovdqa xmm14,xmm3 311 vmovdqu xmm7,XMMWORD[((32+8))+rsp] 312 jmp NEAR $L$oop6x 313 314 $L$6x_done: 315 vpxor xmm8,xmm8,XMMWORD[((16+8))+rsp] 316 vpxor xmm8,xmm8,xmm4 317 318 DB 0F3h,0C3h ;repret 319 320 321 global aesni_gcm_decrypt 322 323 ALIGN 32 324 aesni_gcm_decrypt: 325 mov QWORD[8+rsp],rdi ;WIN64 prologue 326 mov QWORD[16+rsp],rsi 327 mov rax,rsp 328 $L$SEH_begin_aesni_gcm_decrypt: 329 mov rdi,rcx 330 mov rsi,rdx 331 mov rdx,r8 332 mov rcx,r9 333 mov r8,QWORD[40+rsp] 334 mov r9,QWORD[48+rsp] 335 336 337 338 xor r10,r10 339 cmp rdx,0x60 340 jb NEAR $L$gcm_dec_abort 341 342 lea rax,[rsp] 343 344 push rbx 345 346 push rbp 347 348 push r12 349 350 push r13 351 352 push r14 353 354 push r15 355 356 lea rsp,[((-168))+rsp] 357 movaps XMMWORD[(-216)+rax],xmm6 358 movaps XMMWORD[(-200)+rax],xmm7 359 movaps XMMWORD[(-184)+rax],xmm8 360 movaps XMMWORD[(-168)+rax],xmm9 361 movaps XMMWORD[(-152)+rax],xmm10 362 movaps XMMWORD[(-136)+rax],xmm11 363 movaps XMMWORD[(-120)+rax],xmm12 364 movaps XMMWORD[(-104)+rax],xmm13 365 movaps XMMWORD[(-88)+rax],xmm14 366 movaps XMMWORD[(-72)+rax],xmm15 367 $L$gcm_dec_body: 368 vzeroupper 369 370 vmovdqu xmm1,XMMWORD[r8] 371 add rsp,-128 372 mov ebx,DWORD[12+r8] 373 lea r11,[$L$bswap_mask] 374 lea r14,[((-128))+rcx] 375 mov r15,0xf80 376 vmovdqu xmm8,XMMWORD[r9] 377 and rsp,-128 378 vmovdqu xmm0,XMMWORD[r11] 379 lea rcx,[128+rcx] 380 lea r9,[((32+32))+r9] 381 mov ebp,DWORD[((240-128))+rcx] 382 vpshufb xmm8,xmm8,xmm0 383 384 and r14,r15 385 and r15,rsp 386 sub r15,r14 387 jc NEAR $L$dec_no_key_aliasing 388 cmp r15,768 389 jnc NEAR $L$dec_no_key_aliasing 390 sub rsp,r15 391 $L$dec_no_key_aliasing: 392 393 vmovdqu xmm7,XMMWORD[80+rdi] 394 lea r14,[rdi] 395 vmovdqu xmm4,XMMWORD[64+rdi] 396 lea r15,[((-192))+rdx*1+rdi] 397 vmovdqu xmm5,XMMWORD[48+rdi] 398 shr rdx,4 399 xor r10,r10 400 vmovdqu xmm6,XMMWORD[32+rdi] 401 vpshufb xmm7,xmm7,xmm0 402 vmovdqu xmm2,XMMWORD[16+rdi] 403 vpshufb xmm4,xmm4,xmm0 404 vmovdqu xmm3,XMMWORD[rdi] 405 vpshufb xmm5,xmm5,xmm0 406 vmovdqu XMMWORD[48+rsp],xmm4 407 vpshufb xmm6,xmm6,xmm0 408 vmovdqu XMMWORD[64+rsp],xmm5 409 vpshufb xmm2,xmm2,xmm0 410 vmovdqu XMMWORD[80+rsp],xmm6 411 vpshufb xmm3,xmm3,xmm0 412 vmovdqu XMMWORD[96+rsp],xmm2 413 vmovdqu XMMWORD[112+rsp],xmm3 414 415 call _aesni_ctr32_ghash_6x 416 417 vmovups XMMWORD[(-96)+rsi],xmm9 418 vmovups XMMWORD[(-80)+rsi],xmm10 419 vmovups XMMWORD[(-64)+rsi],xmm11 420 vmovups XMMWORD[(-48)+rsi],xmm12 421 vmovups XMMWORD[(-32)+rsi],xmm13 422 vmovups XMMWORD[(-16)+rsi],xmm14 423 424 vpshufb xmm8,xmm8,XMMWORD[r11] 425 vmovdqu XMMWORD[(-64)+r9],xmm8 426 427 vzeroupper 428 movaps xmm6,XMMWORD[((-216))+rax] 429 movaps xmm7,XMMWORD[((-200))+rax] 430 movaps xmm8,XMMWORD[((-184))+rax] 431 movaps xmm9,XMMWORD[((-168))+rax] 432 movaps xmm10,XMMWORD[((-152))+rax] 433 movaps xmm11,XMMWORD[((-136))+rax] 434 movaps xmm12,XMMWORD[((-120))+rax] 435 movaps xmm13,XMMWORD[((-104))+rax] 436 movaps xmm14,XMMWORD[((-88))+rax] 437 movaps xmm15,XMMWORD[((-72))+rax] 438 mov r15,QWORD[((-48))+rax] 439 440 mov r14,QWORD[((-40))+rax] 441 442 mov r13,QWORD[((-32))+rax] 443 444 mov r12,QWORD[((-24))+rax] 445 446 mov rbp,QWORD[((-16))+rax] 447 448 mov rbx,QWORD[((-8))+rax] 449 450 lea rsp,[rax] 451 452 $L$gcm_dec_abort: 453 mov rax,r10 454 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 455 mov rsi,QWORD[16+rsp] 456 DB 0F3h,0C3h ;repret 457 458 $L$SEH_end_aesni_gcm_decrypt: 459 460 ALIGN 32 461 _aesni_ctr32_6x: 462 463 vmovdqu xmm4,XMMWORD[((0-128))+rcx] 464 vmovdqu xmm2,XMMWORD[32+r11] 465 lea r13,[((-1))+rbp] 466 vmovups xmm15,XMMWORD[((16-128))+rcx] 467 lea r12,[((32-128))+rcx] 468 vpxor xmm9,xmm1,xmm4 469 add ebx,100663296 470 jc NEAR $L$handle_ctr32_2 471 vpaddb xmm10,xmm1,xmm2 472 vpaddb xmm11,xmm10,xmm2 473 vpxor xmm10,xmm10,xmm4 474 vpaddb xmm12,xmm11,xmm2 475 vpxor xmm11,xmm11,xmm4 476 vpaddb xmm13,xmm12,xmm2 477 vpxor xmm12,xmm12,xmm4 478 vpaddb xmm14,xmm13,xmm2 479 vpxor xmm13,xmm13,xmm4 480 vpaddb xmm1,xmm14,xmm2 481 vpxor xmm14,xmm14,xmm4 482 jmp NEAR $L$oop_ctr32 483 484 ALIGN 16 485 $L$oop_ctr32: 486 vaesenc xmm9,xmm9,xmm15 487 vaesenc xmm10,xmm10,xmm15 488 vaesenc xmm11,xmm11,xmm15 489 vaesenc xmm12,xmm12,xmm15 490 vaesenc xmm13,xmm13,xmm15 491 vaesenc xmm14,xmm14,xmm15 492 vmovups xmm15,XMMWORD[r12] 493 lea r12,[16+r12] 494 dec r13d 495 jnz NEAR $L$oop_ctr32 496 497 vmovdqu xmm3,XMMWORD[r12] 498 vaesenc xmm9,xmm9,xmm15 499 vpxor xmm4,xmm3,XMMWORD[rdi] 500 vaesenc xmm10,xmm10,xmm15 501 vpxor xmm5,xmm3,XMMWORD[16+rdi] 502 vaesenc xmm11,xmm11,xmm15 503 vpxor xmm6,xmm3,XMMWORD[32+rdi] 504 vaesenc xmm12,xmm12,xmm15 505 vpxor xmm8,xmm3,XMMWORD[48+rdi] 506 vaesenc xmm13,xmm13,xmm15 507 vpxor xmm2,xmm3,XMMWORD[64+rdi] 508 vaesenc xmm14,xmm14,xmm15 509 vpxor xmm3,xmm3,XMMWORD[80+rdi] 510 lea rdi,[96+rdi] 511 512 vaesenclast xmm9,xmm9,xmm4 513 vaesenclast xmm10,xmm10,xmm5 514 vaesenclast xmm11,xmm11,xmm6 515 vaesenclast xmm12,xmm12,xmm8 516 vaesenclast xmm13,xmm13,xmm2 517 vaesenclast xmm14,xmm14,xmm3 518 vmovups XMMWORD[rsi],xmm9 519 vmovups XMMWORD[16+rsi],xmm10 520 vmovups XMMWORD[32+rsi],xmm11 521 vmovups XMMWORD[48+rsi],xmm12 522 vmovups XMMWORD[64+rsi],xmm13 523 vmovups XMMWORD[80+rsi],xmm14 524 lea rsi,[96+rsi] 525 526 DB 0F3h,0C3h ;repret 527 ALIGN 32 528 $L$handle_ctr32_2: 529 vpshufb xmm6,xmm1,xmm0 530 vmovdqu xmm5,XMMWORD[48+r11] 531 vpaddd xmm10,xmm6,XMMWORD[64+r11] 532 vpaddd xmm11,xmm6,xmm5 533 vpaddd xmm12,xmm10,xmm5 534 vpshufb xmm10,xmm10,xmm0 535 vpaddd xmm13,xmm11,xmm5 536 vpshufb xmm11,xmm11,xmm0 537 vpxor xmm10,xmm10,xmm4 538 vpaddd xmm14,xmm12,xmm5 539 vpshufb xmm12,xmm12,xmm0 540 vpxor xmm11,xmm11,xmm4 541 vpaddd xmm1,xmm13,xmm5 542 vpshufb xmm13,xmm13,xmm0 543 vpxor xmm12,xmm12,xmm4 544 vpshufb xmm14,xmm14,xmm0 545 vpxor xmm13,xmm13,xmm4 546 vpshufb xmm1,xmm1,xmm0 547 vpxor xmm14,xmm14,xmm4 548 jmp NEAR $L$oop_ctr32 549 550 551 8 552 global aesni_gcm_encrypt 9 553 554 ALIGN 32 10 555 aesni_gcm_encrypt: 11 12 xor eax,eax 556 mov QWORD[8+rsp],rdi ;WIN64 prologue 557 mov QWORD[16+rsp],rsi 558 mov rax,rsp 559 $L$SEH_begin_aesni_gcm_encrypt: 560 mov rdi,rcx 561 mov rsi,rdx 562 mov rdx,r8 563 mov rcx,r9 564 mov r8,QWORD[40+rsp] 565 mov r9,QWORD[48+rsp] 566 567 568 569 xor r10,r10 570 cmp rdx,0x60*3 571 jb NEAR $L$gcm_enc_abort 572 573 lea rax,[rsp] 574 575 push rbx 576 577 push rbp 578 579 push r12 580 581 push r13 582 583 push r14 584 585 push r15 586 587 lea rsp,[((-168))+rsp] 588 movaps XMMWORD[(-216)+rax],xmm6 589 movaps XMMWORD[(-200)+rax],xmm7 590 movaps XMMWORD[(-184)+rax],xmm8 591 movaps XMMWORD[(-168)+rax],xmm9 592 movaps XMMWORD[(-152)+rax],xmm10 593 movaps XMMWORD[(-136)+rax],xmm11 594 movaps XMMWORD[(-120)+rax],xmm12 595 movaps XMMWORD[(-104)+rax],xmm13 596 movaps XMMWORD[(-88)+rax],xmm14 597 movaps XMMWORD[(-72)+rax],xmm15 598 $L$gcm_enc_body: 599 vzeroupper 600 601 vmovdqu xmm1,XMMWORD[r8] 602 add rsp,-128 603 mov ebx,DWORD[12+r8] 604 lea r11,[$L$bswap_mask] 605 lea r14,[((-128))+rcx] 606 mov r15,0xf80 607 lea rcx,[128+rcx] 608 vmovdqu xmm0,XMMWORD[r11] 609 and rsp,-128 610 mov ebp,DWORD[((240-128))+rcx] 611 612 and r14,r15 613 and r15,rsp 614 sub r15,r14 615 jc NEAR $L$enc_no_key_aliasing 616 cmp r15,768 617 jnc NEAR $L$enc_no_key_aliasing 618 sub rsp,r15 619 $L$enc_no_key_aliasing: 620 621 lea r14,[rsi] 622 lea r15,[((-192))+rdx*1+rsi] 623 shr rdx,4 624 625 call _aesni_ctr32_6x 626 vpshufb xmm8,xmm9,xmm0 627 vpshufb xmm2,xmm10,xmm0 628 vmovdqu XMMWORD[112+rsp],xmm8 629 vpshufb xmm4,xmm11,xmm0 630 vmovdqu XMMWORD[96+rsp],xmm2 631 vpshufb xmm5,xmm12,xmm0 632 vmovdqu XMMWORD[80+rsp],xmm4 633 vpshufb xmm6,xmm13,xmm0 634 vmovdqu XMMWORD[64+rsp],xmm5 635 vpshufb xmm7,xmm14,xmm0 636 vmovdqu XMMWORD[48+rsp],xmm6 637 638 call _aesni_ctr32_6x 639 640 vmovdqu xmm8,XMMWORD[r9] 641 lea r9,[((32+32))+r9] 642 sub rdx,12 643 mov r10,0x60*2 644 vpshufb xmm8,xmm8,xmm0 645 646 call _aesni_ctr32_ghash_6x 647 vmovdqu xmm7,XMMWORD[32+rsp] 648 vmovdqu xmm0,XMMWORD[r11] 649 vmovdqu xmm3,XMMWORD[((0-32))+r9] 650 vpunpckhqdq xmm1,xmm7,xmm7 651 vmovdqu xmm15,XMMWORD[((32-32))+r9] 652 vmovups XMMWORD[(-96)+rsi],xmm9 653 vpshufb xmm9,xmm9,xmm0 654 vpxor xmm1,xmm1,xmm7 655 vmovups XMMWORD[(-80)+rsi],xmm10 656 vpshufb xmm10,xmm10,xmm0 657 vmovups XMMWORD[(-64)+rsi],xmm11 658 vpshufb xmm11,xmm11,xmm0 659 vmovups XMMWORD[(-48)+rsi],xmm12 660 vpshufb xmm12,xmm12,xmm0 661 vmovups XMMWORD[(-32)+rsi],xmm13 662 vpshufb xmm13,xmm13,xmm0 663 vmovups XMMWORD[(-16)+rsi],xmm14 664 vpshufb xmm14,xmm14,xmm0 665 vmovdqu XMMWORD[16+rsp],xmm9 666 vmovdqu xmm6,XMMWORD[48+rsp] 667 vmovdqu xmm0,XMMWORD[((16-32))+r9] 668 vpunpckhqdq xmm2,xmm6,xmm6 669 vpclmulqdq xmm5,xmm7,xmm3,0x00 670 vpxor xmm2,xmm2,xmm6 671 vpclmulqdq xmm7,xmm7,xmm3,0x11 672 vpclmulqdq xmm1,xmm1,xmm15,0x00 673 674 vmovdqu xmm9,XMMWORD[64+rsp] 675 vpclmulqdq xmm4,xmm6,xmm0,0x00 676 vmovdqu xmm3,XMMWORD[((48-32))+r9] 677 vpxor xmm4,xmm4,xmm5 678 vpunpckhqdq xmm5,xmm9,xmm9 679 vpclmulqdq xmm6,xmm6,xmm0,0x11 680 vpxor xmm5,xmm5,xmm9 681 vpxor xmm6,xmm6,xmm7 682 vpclmulqdq xmm2,xmm2,xmm15,0x10 683 vmovdqu xmm15,XMMWORD[((80-32))+r9] 684 vpxor xmm2,xmm2,xmm1 685 686 vmovdqu xmm1,XMMWORD[80+rsp] 687 vpclmulqdq xmm7,xmm9,xmm3,0x00 688 vmovdqu xmm0,XMMWORD[((64-32))+r9] 689 vpxor xmm7,xmm7,xmm4 690 vpunpckhqdq xmm4,xmm1,xmm1 691 vpclmulqdq xmm9,xmm9,xmm3,0x11 692 vpxor xmm4,xmm4,xmm1 693 vpxor xmm9,xmm9,xmm6 694 vpclmulqdq xmm5,xmm5,xmm15,0x00 695 vpxor xmm5,xmm5,xmm2 696 697 vmovdqu xmm2,XMMWORD[96+rsp] 698 vpclmulqdq xmm6,xmm1,xmm0,0x00 699 vmovdqu xmm3,XMMWORD[((96-32))+r9] 700 vpxor xmm6,xmm6,xmm7 701 vpunpckhqdq xmm7,xmm2,xmm2 702 vpclmulqdq xmm1,xmm1,xmm0,0x11 703 vpxor xmm7,xmm7,xmm2 704 vpxor xmm1,xmm1,xmm9 705 vpclmulqdq xmm4,xmm4,xmm15,0x10 706 vmovdqu xmm15,XMMWORD[((128-32))+r9] 707 vpxor xmm4,xmm4,xmm5 708 709 vpxor xmm8,xmm8,XMMWORD[112+rsp] 710 vpclmulqdq xmm5,xmm2,xmm3,0x00 711 vmovdqu xmm0,XMMWORD[((112-32))+r9] 712 vpunpckhqdq xmm9,xmm8,xmm8 713 vpxor xmm5,xmm5,xmm6 714 vpclmulqdq xmm2,xmm2,xmm3,0x11 715 vpxor xmm9,xmm9,xmm8 716 vpxor xmm2,xmm2,xmm1 717 vpclmulqdq xmm7,xmm7,xmm15,0x00 718 vpxor xmm4,xmm7,xmm4 719 720 vpclmulqdq xmm6,xmm8,xmm0,0x00 721 vmovdqu xmm3,XMMWORD[((0-32))+r9] 722 vpunpckhqdq xmm1,xmm14,xmm14 723 vpclmulqdq xmm8,xmm8,xmm0,0x11 724 vpxor xmm1,xmm1,xmm14 725 vpxor xmm5,xmm6,xmm5 726 vpclmulqdq xmm9,xmm9,xmm15,0x10 727 vmovdqu xmm15,XMMWORD[((32-32))+r9] 728 vpxor xmm7,xmm8,xmm2 729 vpxor xmm6,xmm9,xmm4 730 731 vmovdqu xmm0,XMMWORD[((16-32))+r9] 732 vpxor xmm9,xmm7,xmm5 733 vpclmulqdq xmm4,xmm14,xmm3,0x00 734 vpxor xmm6,xmm6,xmm9 735 vpunpckhqdq xmm2,xmm13,xmm13 736 vpclmulqdq xmm14,xmm14,xmm3,0x11 737 vpxor xmm2,xmm2,xmm13 738 vpslldq xmm9,xmm6,8 739 vpclmulqdq xmm1,xmm1,xmm15,0x00 740 vpxor xmm8,xmm5,xmm9 741 vpsrldq xmm6,xmm6,8 742 vpxor xmm7,xmm7,xmm6 743 744 vpclmulqdq xmm5,xmm13,xmm0,0x00 745 vmovdqu xmm3,XMMWORD[((48-32))+r9] 746 vpxor xmm5,xmm5,xmm4 747 vpunpckhqdq xmm9,xmm12,xmm12 748 vpclmulqdq xmm13,xmm13,xmm0,0x11 749 vpxor xmm9,xmm9,xmm12 750 vpxor xmm13,xmm13,xmm14 751 vpalignr xmm14,xmm8,xmm8,8 752 vpclmulqdq xmm2,xmm2,xmm15,0x10 753 vmovdqu xmm15,XMMWORD[((80-32))+r9] 754 vpxor xmm2,xmm2,xmm1 755 756 vpclmulqdq xmm4,xmm12,xmm3,0x00 757 vmovdqu xmm0,XMMWORD[((64-32))+r9] 758 vpxor xmm4,xmm4,xmm5 759 vpunpckhqdq xmm1,xmm11,xmm11 760 vpclmulqdq xmm12,xmm12,xmm3,0x11 761 vpxor xmm1,xmm1,xmm11 762 vpxor xmm12,xmm12,xmm13 763 vxorps xmm7,xmm7,XMMWORD[16+rsp] 764 vpclmulqdq xmm9,xmm9,xmm15,0x00 765 vpxor xmm9,xmm9,xmm2 766 767 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 768 vxorps xmm8,xmm8,xmm14 769 770 vpclmulqdq xmm5,xmm11,xmm0,0x00 771 vmovdqu xmm3,XMMWORD[((96-32))+r9] 772 vpxor xmm5,xmm5,xmm4 773 vpunpckhqdq xmm2,xmm10,xmm10 774 vpclmulqdq xmm11,xmm11,xmm0,0x11 775 vpxor xmm2,xmm2,xmm10 776 vpalignr xmm14,xmm8,xmm8,8 777 vpxor xmm11,xmm11,xmm12 778 vpclmulqdq xmm1,xmm1,xmm15,0x10 779 vmovdqu xmm15,XMMWORD[((128-32))+r9] 780 vpxor xmm1,xmm1,xmm9 781 782 vxorps xmm14,xmm14,xmm7 783 vpclmulqdq xmm8,xmm8,XMMWORD[16+r11],0x10 784 vxorps xmm8,xmm8,xmm14 785 786 vpclmulqdq xmm4,xmm10,xmm3,0x00 787 vmovdqu xmm0,XMMWORD[((112-32))+r9] 788 vpxor xmm4,xmm4,xmm5 789 vpunpckhqdq xmm9,xmm8,xmm8 790 vpclmulqdq xmm10,xmm10,xmm3,0x11 791 vpxor xmm9,xmm9,xmm8 792 vpxor xmm10,xmm10,xmm11 793 vpclmulqdq xmm2,xmm2,xmm15,0x00 794 vpxor xmm2,xmm2,xmm1 795 796 vpclmulqdq xmm5,xmm8,xmm0,0x00 797 vpclmulqdq xmm7,xmm8,xmm0,0x11 798 vpxor xmm5,xmm5,xmm4 799 vpclmulqdq xmm6,xmm9,xmm15,0x10 800 vpxor xmm7,xmm7,xmm10 801 vpxor xmm6,xmm6,xmm2 802 803 vpxor xmm4,xmm7,xmm5 804 vpxor xmm6,xmm6,xmm4 805 vpslldq xmm1,xmm6,8 806 vmovdqu xmm3,XMMWORD[16+r11] 807 vpsrldq xmm6,xmm6,8 808 vpxor xmm8,xmm5,xmm1 809 vpxor xmm7,xmm7,xmm6 810 811 vpalignr xmm2,xmm8,xmm8,8 812 vpclmulqdq xmm8,xmm8,xmm3,0x10 813 vpxor xmm8,xmm8,xmm2 814 815 vpalignr xmm2,xmm8,xmm8,8 816 vpclmulqdq xmm8,xmm8,xmm3,0x10 817 vpxor xmm2,xmm2,xmm7 818 vpxor xmm8,xmm8,xmm2 819 vpshufb xmm8,xmm8,XMMWORD[r11] 820 vmovdqu XMMWORD[(-64)+r9],xmm8 821 822 vzeroupper 823 movaps xmm6,XMMWORD[((-216))+rax] 824 movaps xmm7,XMMWORD[((-200))+rax] 825 movaps xmm8,XMMWORD[((-184))+rax] 826 movaps xmm9,XMMWORD[((-168))+rax] 827 movaps xmm10,XMMWORD[((-152))+rax] 828 movaps xmm11,XMMWORD[((-136))+rax] 829 movaps xmm12,XMMWORD[((-120))+rax] 830 movaps xmm13,XMMWORD[((-104))+rax] 831 movaps xmm14,XMMWORD[((-88))+rax] 832 movaps xmm15,XMMWORD[((-72))+rax] 833 mov r15,QWORD[((-48))+rax] 834 835 mov r14,QWORD[((-40))+rax] 836 837 mov r13,QWORD[((-32))+rax] 838 839 mov r12,QWORD[((-24))+rax] 840 841 mov rbp,QWORD[((-16))+rax] 842 843 mov rbx,QWORD[((-8))+rax] 844 845 lea rsp,[rax] 846 847 $L$gcm_enc_abort: 848 mov rax,r10 849 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 850 mov rsi,QWORD[16+rsp] 13 851 DB 0F3h,0C3h ;repret 14 852 15 16 17 global aesni_gcm_decrypt 18 19 aesni_gcm_decrypt: 20 21 xor eax,eax 853 $L$SEH_end_aesni_gcm_encrypt: 854 ALIGN 64 855 $L$bswap_mask: 856 DB 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 857 $L$poly: 858 DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 859 $L$one_msb: 860 DB 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 861 $L$two_lsb: 862 DB 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 863 $L$one_lsb: 864 DB 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 865 DB 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108 866 DB 101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 867 DB 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 868 DB 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 869 ALIGN 64 870 EXTERN __imp_RtlVirtualUnwind 871 872 ALIGN 16 873 gcm_se_handler: 874 push rsi 875 push rdi 876 push rbx 877 push rbp 878 push r12 879 push r13 880 push r14 881 push r15 882 pushfq 883 sub rsp,64 884 885 mov rax,QWORD[120+r8] 886 mov rbx,QWORD[248+r8] 887 888 mov rsi,QWORD[8+r9] 889 mov r11,QWORD[56+r9] 890 891 mov r10d,DWORD[r11] 892 lea r10,[r10*1+rsi] 893 cmp rbx,r10 894 jb NEAR $L$common_seh_tail 895 896 mov rax,QWORD[152+r8] 897 898 mov r10d,DWORD[4+r11] 899 lea r10,[r10*1+rsi] 900 cmp rbx,r10 901 jae NEAR $L$common_seh_tail 902 903 mov rax,QWORD[120+r8] 904 905 mov r15,QWORD[((-48))+rax] 906 mov r14,QWORD[((-40))+rax] 907 mov r13,QWORD[((-32))+rax] 908 mov r12,QWORD[((-24))+rax] 909 mov rbp,QWORD[((-16))+rax] 910 mov rbx,QWORD[((-8))+rax] 911 mov QWORD[240+r8],r15 912 mov QWORD[232+r8],r14 913 mov QWORD[224+r8],r13 914 mov QWORD[216+r8],r12 915 mov QWORD[160+r8],rbp 916 mov QWORD[144+r8],rbx 917 918 lea rsi,[((-216))+rax] 919 lea rdi,[512+r8] 920 mov ecx,20 921 DD 0xa548f3fc 922 923 $L$common_seh_tail: 924 mov rdi,QWORD[8+rax] 925 mov rsi,QWORD[16+rax] 926 mov QWORD[152+r8],rax 927 mov QWORD[168+r8],rsi 928 mov QWORD[176+r8],rdi 929 930 mov rdi,QWORD[40+r9] 931 mov rsi,r8 932 mov ecx,154 933 DD 0xa548f3fc 934 935 mov rsi,r9 936 xor rcx,rcx 937 mov rdx,QWORD[8+rsi] 938 mov r8,QWORD[rsi] 939 mov r9,QWORD[16+rsi] 940 mov r10,QWORD[40+rsi] 941 lea r11,[56+rsi] 942 lea r12,[24+rsi] 943 mov QWORD[32+rsp],r10 944 mov QWORD[40+rsp],r11 945 mov QWORD[48+rsp],r12 946 mov QWORD[56+rsp],rcx 947 call QWORD[__imp_RtlVirtualUnwind] 948 949 mov eax,1 950 add rsp,64 951 popfq 952 pop r15 953 pop r14 954 pop r13 955 pop r12 956 pop rbp 957 pop rbx 958 pop rdi 959 pop rsi 22 960 DB 0F3h,0C3h ;repret 23 961 24 962 963 section .pdata rdata align=4 964 ALIGN 4 965 DD $L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase 966 DD $L$SEH_end_aesni_gcm_decrypt wrt ..imagebase 967 DD $L$SEH_gcm_dec_info wrt ..imagebase 968 969 DD $L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase 970 DD $L$SEH_end_aesni_gcm_encrypt wrt ..imagebase 971 DD $L$SEH_gcm_enc_info wrt ..imagebase 972 section .xdata rdata align=8 973 ALIGN 8 974 $L$SEH_gcm_dec_info: 975 DB 9,0,0,0 976 DD gcm_se_handler wrt ..imagebase 977 DD $L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase 978 $L$SEH_gcm_enc_info: 979 DB 9,0,0,0 980 DD gcm_se_handler wrt ..imagebase 981 DD $L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/aesni-mb-x86_64.S
r97373 r99371 22 22 23 23 24 cmp edx,2 25 jb NEAR $L$enc_non_avx 26 mov ecx,DWORD[((OPENSSL_ia32cap_P+4))] 27 test ecx,268435456 28 jnz NEAR _avx_cbc_enc_shortcut 29 jmp NEAR $L$enc_non_avx 30 ALIGN 16 31 $L$enc_non_avx: 24 32 mov rax,rsp 25 33 … … 337 345 338 346 347 cmp edx,2 348 jb NEAR $L$dec_non_avx 349 mov ecx,DWORD[((OPENSSL_ia32cap_P+4))] 350 test ecx,268435456 351 jnz NEAR _avx_cbc_dec_shortcut 352 jmp NEAR $L$dec_non_avx 353 ALIGN 16 354 $L$dec_non_avx: 339 355 mov rax,rsp 340 356 … … 627 643 628 644 $L$SEH_end_aesni_multi_cbc_decrypt: 645 646 ALIGN 32 647 aesni_multi_cbc_encrypt_avx: 648 mov QWORD[8+rsp],rdi ;WIN64 prologue 649 mov QWORD[16+rsp],rsi 650 mov rax,rsp 651 $L$SEH_begin_aesni_multi_cbc_encrypt_avx: 652 mov rdi,rcx 653 mov rsi,rdx 654 mov rdx,r8 655 656 657 658 _avx_cbc_enc_shortcut: 659 mov rax,rsp 660 661 push rbx 662 663 push rbp 664 665 push r12 666 667 push r13 668 669 push r14 670 671 push r15 672 673 lea rsp,[((-168))+rsp] 674 movaps XMMWORD[rsp],xmm6 675 movaps XMMWORD[16+rsp],xmm7 676 movaps XMMWORD[32+rsp],xmm8 677 movaps XMMWORD[48+rsp],xmm9 678 movaps XMMWORD[64+rsp],xmm10 679 movaps XMMWORD[80+rsp],xmm11 680 movaps XMMWORD[(-120)+rax],xmm12 681 movaps XMMWORD[(-104)+rax],xmm13 682 movaps XMMWORD[(-88)+rax],xmm14 683 movaps XMMWORD[(-72)+rax],xmm15 684 685 686 687 688 689 690 691 692 sub rsp,192 693 and rsp,-128 694 mov QWORD[16+rsp],rax 695 696 697 $L$enc8x_body: 698 vzeroupper 699 vmovdqu xmm15,XMMWORD[rsi] 700 lea rsi,[120+rsi] 701 lea rdi,[160+rdi] 702 shr edx,1 703 704 $L$enc8x_loop_grande: 705 706 xor edx,edx 707 708 mov ecx,DWORD[((-144))+rdi] 709 710 mov r8,QWORD[((-160))+rdi] 711 cmp ecx,edx 712 713 mov rbx,QWORD[((-152))+rdi] 714 cmovg edx,ecx 715 test ecx,ecx 716 717 vmovdqu xmm2,XMMWORD[((-136))+rdi] 718 mov DWORD[32+rsp],ecx 719 cmovle r8,rsp 720 sub rbx,r8 721 mov QWORD[64+rsp],rbx 722 723 mov ecx,DWORD[((-104))+rdi] 724 725 mov r9,QWORD[((-120))+rdi] 726 cmp ecx,edx 727 728 mov rbp,QWORD[((-112))+rdi] 729 cmovg edx,ecx 730 test ecx,ecx 731 732 vmovdqu xmm3,XMMWORD[((-96))+rdi] 733 mov DWORD[36+rsp],ecx 734 cmovle r9,rsp 735 sub rbp,r9 736 mov QWORD[72+rsp],rbp 737 738 mov ecx,DWORD[((-64))+rdi] 739 740 mov r10,QWORD[((-80))+rdi] 741 cmp ecx,edx 742 743 mov rbp,QWORD[((-72))+rdi] 744 cmovg edx,ecx 745 test ecx,ecx 746 747 vmovdqu xmm4,XMMWORD[((-56))+rdi] 748 mov DWORD[40+rsp],ecx 749 cmovle r10,rsp 750 sub rbp,r10 751 mov QWORD[80+rsp],rbp 752 753 mov ecx,DWORD[((-24))+rdi] 754 755 mov r11,QWORD[((-40))+rdi] 756 cmp ecx,edx 757 758 mov rbp,QWORD[((-32))+rdi] 759 cmovg edx,ecx 760 test ecx,ecx 761 762 vmovdqu xmm5,XMMWORD[((-16))+rdi] 763 mov DWORD[44+rsp],ecx 764 cmovle r11,rsp 765 sub rbp,r11 766 mov QWORD[88+rsp],rbp 767 768 mov ecx,DWORD[16+rdi] 769 770 mov r12,QWORD[rdi] 771 cmp ecx,edx 772 773 mov rbp,QWORD[8+rdi] 774 cmovg edx,ecx 775 test ecx,ecx 776 777 vmovdqu xmm6,XMMWORD[24+rdi] 778 mov DWORD[48+rsp],ecx 779 cmovle r12,rsp 780 sub rbp,r12 781 mov QWORD[96+rsp],rbp 782 783 mov ecx,DWORD[56+rdi] 784 785 mov r13,QWORD[40+rdi] 786 cmp ecx,edx 787 788 mov rbp,QWORD[48+rdi] 789 cmovg edx,ecx 790 test ecx,ecx 791 792 vmovdqu xmm7,XMMWORD[64+rdi] 793 mov DWORD[52+rsp],ecx 794 cmovle r13,rsp 795 sub rbp,r13 796 mov QWORD[104+rsp],rbp 797 798 mov ecx,DWORD[96+rdi] 799 800 mov r14,QWORD[80+rdi] 801 cmp ecx,edx 802 803 mov rbp,QWORD[88+rdi] 804 cmovg edx,ecx 805 test ecx,ecx 806 807 vmovdqu xmm8,XMMWORD[104+rdi] 808 mov DWORD[56+rsp],ecx 809 cmovle r14,rsp 810 sub rbp,r14 811 mov QWORD[112+rsp],rbp 812 813 mov ecx,DWORD[136+rdi] 814 815 mov r15,QWORD[120+rdi] 816 cmp ecx,edx 817 818 mov rbp,QWORD[128+rdi] 819 cmovg edx,ecx 820 test ecx,ecx 821 822 vmovdqu xmm9,XMMWORD[144+rdi] 823 mov DWORD[60+rsp],ecx 824 cmovle r15,rsp 825 sub rbp,r15 826 mov QWORD[120+rsp],rbp 827 test edx,edx 828 jz NEAR $L$enc8x_done 829 830 vmovups xmm1,XMMWORD[((16-120))+rsi] 831 vmovups xmm0,XMMWORD[((32-120))+rsi] 832 mov eax,DWORD[((240-120))+rsi] 833 834 vpxor xmm10,xmm15,XMMWORD[r8] 835 lea rbp,[128+rsp] 836 vpxor xmm11,xmm15,XMMWORD[r9] 837 vpxor xmm12,xmm15,XMMWORD[r10] 838 vpxor xmm13,xmm15,XMMWORD[r11] 839 vpxor xmm2,xmm2,xmm10 840 vpxor xmm10,xmm15,XMMWORD[r12] 841 vpxor xmm3,xmm3,xmm11 842 vpxor xmm11,xmm15,XMMWORD[r13] 843 vpxor xmm4,xmm4,xmm12 844 vpxor xmm12,xmm15,XMMWORD[r14] 845 vpxor xmm5,xmm5,xmm13 846 vpxor xmm13,xmm15,XMMWORD[r15] 847 vpxor xmm6,xmm6,xmm10 848 mov ecx,1 849 vpxor xmm7,xmm7,xmm11 850 vpxor xmm8,xmm8,xmm12 851 vpxor xmm9,xmm9,xmm13 852 jmp NEAR $L$oop_enc8x 853 854 ALIGN 32 855 $L$oop_enc8x: 856 vaesenc xmm2,xmm2,xmm1 857 cmp ecx,DWORD[((32+0))+rsp] 858 vaesenc xmm3,xmm3,xmm1 859 prefetcht0 [31+r8] 860 vaesenc xmm4,xmm4,xmm1 861 vaesenc xmm5,xmm5,xmm1 862 lea rbx,[rbx*1+r8] 863 cmovge r8,rsp 864 vaesenc xmm6,xmm6,xmm1 865 cmovg rbx,rsp 866 vaesenc xmm7,xmm7,xmm1 867 sub rbx,r8 868 vaesenc xmm8,xmm8,xmm1 869 vpxor xmm10,xmm15,XMMWORD[16+r8] 870 mov QWORD[((64+0))+rsp],rbx 871 vaesenc xmm9,xmm9,xmm1 872 vmovups xmm1,XMMWORD[((-72))+rsi] 873 lea r8,[16+rbx*1+r8] 874 vmovdqu XMMWORD[rbp],xmm10 875 vaesenc xmm2,xmm2,xmm0 876 cmp ecx,DWORD[((32+4))+rsp] 877 mov rbx,QWORD[((64+8))+rsp] 878 vaesenc xmm3,xmm3,xmm0 879 prefetcht0 [31+r9] 880 vaesenc xmm4,xmm4,xmm0 881 vaesenc xmm5,xmm5,xmm0 882 lea rbx,[rbx*1+r9] 883 cmovge r9,rsp 884 vaesenc xmm6,xmm6,xmm0 885 cmovg rbx,rsp 886 vaesenc xmm7,xmm7,xmm0 887 sub rbx,r9 888 vaesenc xmm8,xmm8,xmm0 889 vpxor xmm11,xmm15,XMMWORD[16+r9] 890 mov QWORD[((64+8))+rsp],rbx 891 vaesenc xmm9,xmm9,xmm0 892 vmovups xmm0,XMMWORD[((-56))+rsi] 893 lea r9,[16+rbx*1+r9] 894 vmovdqu XMMWORD[16+rbp],xmm11 895 vaesenc xmm2,xmm2,xmm1 896 cmp ecx,DWORD[((32+8))+rsp] 897 mov rbx,QWORD[((64+16))+rsp] 898 vaesenc xmm3,xmm3,xmm1 899 prefetcht0 [31+r10] 900 vaesenc xmm4,xmm4,xmm1 901 prefetcht0 [15+r8] 902 vaesenc xmm5,xmm5,xmm1 903 lea rbx,[rbx*1+r10] 904 cmovge r10,rsp 905 vaesenc xmm6,xmm6,xmm1 906 cmovg rbx,rsp 907 vaesenc xmm7,xmm7,xmm1 908 sub rbx,r10 909 vaesenc xmm8,xmm8,xmm1 910 vpxor xmm12,xmm15,XMMWORD[16+r10] 911 mov QWORD[((64+16))+rsp],rbx 912 vaesenc xmm9,xmm9,xmm1 913 vmovups xmm1,XMMWORD[((-40))+rsi] 914 lea r10,[16+rbx*1+r10] 915 vmovdqu XMMWORD[32+rbp],xmm12 916 vaesenc xmm2,xmm2,xmm0 917 cmp ecx,DWORD[((32+12))+rsp] 918 mov rbx,QWORD[((64+24))+rsp] 919 vaesenc xmm3,xmm3,xmm0 920 prefetcht0 [31+r11] 921 vaesenc xmm4,xmm4,xmm0 922 prefetcht0 [15+r9] 923 vaesenc xmm5,xmm5,xmm0 924 lea rbx,[rbx*1+r11] 925 cmovge r11,rsp 926 vaesenc xmm6,xmm6,xmm0 927 cmovg rbx,rsp 928 vaesenc xmm7,xmm7,xmm0 929 sub rbx,r11 930 vaesenc xmm8,xmm8,xmm0 931 vpxor xmm13,xmm15,XMMWORD[16+r11] 932 mov QWORD[((64+24))+rsp],rbx 933 vaesenc xmm9,xmm9,xmm0 934 vmovups xmm0,XMMWORD[((-24))+rsi] 935 lea r11,[16+rbx*1+r11] 936 vmovdqu XMMWORD[48+rbp],xmm13 937 vaesenc xmm2,xmm2,xmm1 938 cmp ecx,DWORD[((32+16))+rsp] 939 mov rbx,QWORD[((64+32))+rsp] 940 vaesenc xmm3,xmm3,xmm1 941 prefetcht0 [31+r12] 942 vaesenc xmm4,xmm4,xmm1 943 prefetcht0 [15+r10] 944 vaesenc xmm5,xmm5,xmm1 945 lea rbx,[rbx*1+r12] 946 cmovge r12,rsp 947 vaesenc xmm6,xmm6,xmm1 948 cmovg rbx,rsp 949 vaesenc xmm7,xmm7,xmm1 950 sub rbx,r12 951 vaesenc xmm8,xmm8,xmm1 952 vpxor xmm10,xmm15,XMMWORD[16+r12] 953 mov QWORD[((64+32))+rsp],rbx 954 vaesenc xmm9,xmm9,xmm1 955 vmovups xmm1,XMMWORD[((-8))+rsi] 956 lea r12,[16+rbx*1+r12] 957 vaesenc xmm2,xmm2,xmm0 958 cmp ecx,DWORD[((32+20))+rsp] 959 mov rbx,QWORD[((64+40))+rsp] 960 vaesenc xmm3,xmm3,xmm0 961 prefetcht0 [31+r13] 962 vaesenc xmm4,xmm4,xmm0 963 prefetcht0 [15+r11] 964 vaesenc xmm5,xmm5,xmm0 965 lea rbx,[r13*1+rbx] 966 cmovge r13,rsp 967 vaesenc xmm6,xmm6,xmm0 968 cmovg rbx,rsp 969 vaesenc xmm7,xmm7,xmm0 970 sub rbx,r13 971 vaesenc xmm8,xmm8,xmm0 972 vpxor xmm11,xmm15,XMMWORD[16+r13] 973 mov QWORD[((64+40))+rsp],rbx 974 vaesenc xmm9,xmm9,xmm0 975 vmovups xmm0,XMMWORD[8+rsi] 976 lea r13,[16+rbx*1+r13] 977 vaesenc xmm2,xmm2,xmm1 978 cmp ecx,DWORD[((32+24))+rsp] 979 mov rbx,QWORD[((64+48))+rsp] 980 vaesenc xmm3,xmm3,xmm1 981 prefetcht0 [31+r14] 982 vaesenc xmm4,xmm4,xmm1 983 prefetcht0 [15+r12] 984 vaesenc xmm5,xmm5,xmm1 985 lea rbx,[rbx*1+r14] 986 cmovge r14,rsp 987 vaesenc xmm6,xmm6,xmm1 988 cmovg rbx,rsp 989 vaesenc xmm7,xmm7,xmm1 990 sub rbx,r14 991 vaesenc xmm8,xmm8,xmm1 992 vpxor xmm12,xmm15,XMMWORD[16+r14] 993 mov QWORD[((64+48))+rsp],rbx 994 vaesenc xmm9,xmm9,xmm1 995 vmovups xmm1,XMMWORD[24+rsi] 996 lea r14,[16+rbx*1+r14] 997 vaesenc xmm2,xmm2,xmm0 998 cmp ecx,DWORD[((32+28))+rsp] 999 mov rbx,QWORD[((64+56))+rsp] 1000 vaesenc xmm3,xmm3,xmm0 1001 prefetcht0 [31+r15] 1002 vaesenc xmm4,xmm4,xmm0 1003 prefetcht0 [15+r13] 1004 vaesenc xmm5,xmm5,xmm0 1005 lea rbx,[rbx*1+r15] 1006 cmovge r15,rsp 1007 vaesenc xmm6,xmm6,xmm0 1008 cmovg rbx,rsp 1009 vaesenc xmm7,xmm7,xmm0 1010 sub rbx,r15 1011 vaesenc xmm8,xmm8,xmm0 1012 vpxor xmm13,xmm15,XMMWORD[16+r15] 1013 mov QWORD[((64+56))+rsp],rbx 1014 vaesenc xmm9,xmm9,xmm0 1015 vmovups xmm0,XMMWORD[40+rsi] 1016 lea r15,[16+rbx*1+r15] 1017 vmovdqu xmm14,XMMWORD[32+rsp] 1018 prefetcht0 [15+r14] 1019 prefetcht0 [15+r15] 1020 cmp eax,11 1021 jb NEAR $L$enc8x_tail 1022 1023 vaesenc xmm2,xmm2,xmm1 1024 vaesenc xmm3,xmm3,xmm1 1025 vaesenc xmm4,xmm4,xmm1 1026 vaesenc xmm5,xmm5,xmm1 1027 vaesenc xmm6,xmm6,xmm1 1028 vaesenc xmm7,xmm7,xmm1 1029 vaesenc xmm8,xmm8,xmm1 1030 vaesenc xmm9,xmm9,xmm1 1031 vmovups xmm1,XMMWORD[((176-120))+rsi] 1032 1033 vaesenc xmm2,xmm2,xmm0 1034 vaesenc xmm3,xmm3,xmm0 1035 vaesenc xmm4,xmm4,xmm0 1036 vaesenc xmm5,xmm5,xmm0 1037 vaesenc xmm6,xmm6,xmm0 1038 vaesenc xmm7,xmm7,xmm0 1039 vaesenc xmm8,xmm8,xmm0 1040 vaesenc xmm9,xmm9,xmm0 1041 vmovups xmm0,XMMWORD[((192-120))+rsi] 1042 je NEAR $L$enc8x_tail 1043 1044 vaesenc xmm2,xmm2,xmm1 1045 vaesenc xmm3,xmm3,xmm1 1046 vaesenc xmm4,xmm4,xmm1 1047 vaesenc xmm5,xmm5,xmm1 1048 vaesenc xmm6,xmm6,xmm1 1049 vaesenc xmm7,xmm7,xmm1 1050 vaesenc xmm8,xmm8,xmm1 1051 vaesenc xmm9,xmm9,xmm1 1052 vmovups xmm1,XMMWORD[((208-120))+rsi] 1053 1054 vaesenc xmm2,xmm2,xmm0 1055 vaesenc xmm3,xmm3,xmm0 1056 vaesenc xmm4,xmm4,xmm0 1057 vaesenc xmm5,xmm5,xmm0 1058 vaesenc xmm6,xmm6,xmm0 1059 vaesenc xmm7,xmm7,xmm0 1060 vaesenc xmm8,xmm8,xmm0 1061 vaesenc xmm9,xmm9,xmm0 1062 vmovups xmm0,XMMWORD[((224-120))+rsi] 1063 1064 $L$enc8x_tail: 1065 vaesenc xmm2,xmm2,xmm1 1066 vpxor xmm15,xmm15,xmm15 1067 vaesenc xmm3,xmm3,xmm1 1068 vaesenc xmm4,xmm4,xmm1 1069 vpcmpgtd xmm15,xmm14,xmm15 1070 vaesenc xmm5,xmm5,xmm1 1071 vaesenc xmm6,xmm6,xmm1 1072 vpaddd xmm15,xmm15,xmm14 1073 vmovdqu xmm14,XMMWORD[48+rsp] 1074 vaesenc xmm7,xmm7,xmm1 1075 mov rbx,QWORD[64+rsp] 1076 vaesenc xmm8,xmm8,xmm1 1077 vaesenc xmm9,xmm9,xmm1 1078 vmovups xmm1,XMMWORD[((16-120))+rsi] 1079 1080 vaesenclast xmm2,xmm2,xmm0 1081 vmovdqa XMMWORD[32+rsp],xmm15 1082 vpxor xmm15,xmm15,xmm15 1083 vaesenclast xmm3,xmm3,xmm0 1084 vaesenclast xmm4,xmm4,xmm0 1085 vpcmpgtd xmm15,xmm14,xmm15 1086 vaesenclast xmm5,xmm5,xmm0 1087 vaesenclast xmm6,xmm6,xmm0 1088 vpaddd xmm14,xmm14,xmm15 1089 vmovdqu xmm15,XMMWORD[((-120))+rsi] 1090 vaesenclast xmm7,xmm7,xmm0 1091 vaesenclast xmm8,xmm8,xmm0 1092 vmovdqa XMMWORD[48+rsp],xmm14 1093 vaesenclast xmm9,xmm9,xmm0 1094 vmovups xmm0,XMMWORD[((32-120))+rsi] 1095 1096 vmovups XMMWORD[(-16)+r8],xmm2 1097 sub r8,rbx 1098 vpxor xmm2,xmm2,XMMWORD[rbp] 1099 vmovups XMMWORD[(-16)+r9],xmm3 1100 sub r9,QWORD[72+rsp] 1101 vpxor xmm3,xmm3,XMMWORD[16+rbp] 1102 vmovups XMMWORD[(-16)+r10],xmm4 1103 sub r10,QWORD[80+rsp] 1104 vpxor xmm4,xmm4,XMMWORD[32+rbp] 1105 vmovups XMMWORD[(-16)+r11],xmm5 1106 sub r11,QWORD[88+rsp] 1107 vpxor xmm5,xmm5,XMMWORD[48+rbp] 1108 vmovups XMMWORD[(-16)+r12],xmm6 1109 sub r12,QWORD[96+rsp] 1110 vpxor xmm6,xmm6,xmm10 1111 vmovups XMMWORD[(-16)+r13],xmm7 1112 sub r13,QWORD[104+rsp] 1113 vpxor xmm7,xmm7,xmm11 1114 vmovups XMMWORD[(-16)+r14],xmm8 1115 sub r14,QWORD[112+rsp] 1116 vpxor xmm8,xmm8,xmm12 1117 vmovups XMMWORD[(-16)+r15],xmm9 1118 sub r15,QWORD[120+rsp] 1119 vpxor xmm9,xmm9,xmm13 1120 1121 dec edx 1122 jnz NEAR $L$oop_enc8x 1123 1124 mov rax,QWORD[16+rsp] 1125 1126 1127 1128 1129 1130 1131 $L$enc8x_done: 1132 vzeroupper 1133 movaps xmm6,XMMWORD[((-216))+rax] 1134 movaps xmm7,XMMWORD[((-200))+rax] 1135 movaps xmm8,XMMWORD[((-184))+rax] 1136 movaps xmm9,XMMWORD[((-168))+rax] 1137 movaps xmm10,XMMWORD[((-152))+rax] 1138 movaps xmm11,XMMWORD[((-136))+rax] 1139 movaps xmm12,XMMWORD[((-120))+rax] 1140 movaps xmm13,XMMWORD[((-104))+rax] 1141 movaps xmm14,XMMWORD[((-88))+rax] 1142 movaps xmm15,XMMWORD[((-72))+rax] 1143 mov r15,QWORD[((-48))+rax] 1144 1145 mov r14,QWORD[((-40))+rax] 1146 1147 mov r13,QWORD[((-32))+rax] 1148 1149 mov r12,QWORD[((-24))+rax] 1150 1151 mov rbp,QWORD[((-16))+rax] 1152 1153 mov rbx,QWORD[((-8))+rax] 1154 1155 lea rsp,[rax] 1156 1157 $L$enc8x_epilogue: 1158 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1159 mov rsi,QWORD[16+rsp] 1160 DB 0F3h,0C3h ;repret 1161 1162 $L$SEH_end_aesni_multi_cbc_encrypt_avx: 1163 1164 1165 ALIGN 32 1166 aesni_multi_cbc_decrypt_avx: 1167 mov QWORD[8+rsp],rdi ;WIN64 prologue 1168 mov QWORD[16+rsp],rsi 1169 mov rax,rsp 1170 $L$SEH_begin_aesni_multi_cbc_decrypt_avx: 1171 mov rdi,rcx 1172 mov rsi,rdx 1173 mov rdx,r8 1174 1175 1176 1177 _avx_cbc_dec_shortcut: 1178 mov rax,rsp 1179 1180 push rbx 1181 1182 push rbp 1183 1184 push r12 1185 1186 push r13 1187 1188 push r14 1189 1190 push r15 1191 1192 lea rsp,[((-168))+rsp] 1193 movaps XMMWORD[rsp],xmm6 1194 movaps XMMWORD[16+rsp],xmm7 1195 movaps XMMWORD[32+rsp],xmm8 1196 movaps XMMWORD[48+rsp],xmm9 1197 movaps XMMWORD[64+rsp],xmm10 1198 movaps XMMWORD[80+rsp],xmm11 1199 movaps XMMWORD[(-120)+rax],xmm12 1200 movaps XMMWORD[(-104)+rax],xmm13 1201 movaps XMMWORD[(-88)+rax],xmm14 1202 movaps XMMWORD[(-72)+rax],xmm15 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 sub rsp,256 1213 and rsp,-256 1214 sub rsp,192 1215 mov QWORD[16+rsp],rax 1216 1217 1218 $L$dec8x_body: 1219 vzeroupper 1220 vmovdqu xmm15,XMMWORD[rsi] 1221 lea rsi,[120+rsi] 1222 lea rdi,[160+rdi] 1223 shr edx,1 1224 1225 $L$dec8x_loop_grande: 1226 1227 xor edx,edx 1228 1229 mov ecx,DWORD[((-144))+rdi] 1230 1231 mov r8,QWORD[((-160))+rdi] 1232 cmp ecx,edx 1233 1234 mov rbx,QWORD[((-152))+rdi] 1235 cmovg edx,ecx 1236 test ecx,ecx 1237 1238 vmovdqu xmm2,XMMWORD[((-136))+rdi] 1239 mov DWORD[32+rsp],ecx 1240 cmovle r8,rsp 1241 sub rbx,r8 1242 mov QWORD[64+rsp],rbx 1243 vmovdqu XMMWORD[192+rsp],xmm2 1244 1245 mov ecx,DWORD[((-104))+rdi] 1246 1247 mov r9,QWORD[((-120))+rdi] 1248 cmp ecx,edx 1249 1250 mov rbp,QWORD[((-112))+rdi] 1251 cmovg edx,ecx 1252 test ecx,ecx 1253 1254 vmovdqu xmm3,XMMWORD[((-96))+rdi] 1255 mov DWORD[36+rsp],ecx 1256 cmovle r9,rsp 1257 sub rbp,r9 1258 mov QWORD[72+rsp],rbp 1259 vmovdqu XMMWORD[208+rsp],xmm3 1260 1261 mov ecx,DWORD[((-64))+rdi] 1262 1263 mov r10,QWORD[((-80))+rdi] 1264 cmp ecx,edx 1265 1266 mov rbp,QWORD[((-72))+rdi] 1267 cmovg edx,ecx 1268 test ecx,ecx 1269 1270 vmovdqu xmm4,XMMWORD[((-56))+rdi] 1271 mov DWORD[40+rsp],ecx 1272 cmovle r10,rsp 1273 sub rbp,r10 1274 mov QWORD[80+rsp],rbp 1275 vmovdqu XMMWORD[224+rsp],xmm4 1276 1277 mov ecx,DWORD[((-24))+rdi] 1278 1279 mov r11,QWORD[((-40))+rdi] 1280 cmp ecx,edx 1281 1282 mov rbp,QWORD[((-32))+rdi] 1283 cmovg edx,ecx 1284 test ecx,ecx 1285 1286 vmovdqu xmm5,XMMWORD[((-16))+rdi] 1287 mov DWORD[44+rsp],ecx 1288 cmovle r11,rsp 1289 sub rbp,r11 1290 mov QWORD[88+rsp],rbp 1291 vmovdqu XMMWORD[240+rsp],xmm5 1292 1293 mov ecx,DWORD[16+rdi] 1294 1295 mov r12,QWORD[rdi] 1296 cmp ecx,edx 1297 1298 mov rbp,QWORD[8+rdi] 1299 cmovg edx,ecx 1300 test ecx,ecx 1301 1302 vmovdqu xmm6,XMMWORD[24+rdi] 1303 mov DWORD[48+rsp],ecx 1304 cmovle r12,rsp 1305 sub rbp,r12 1306 mov QWORD[96+rsp],rbp 1307 vmovdqu XMMWORD[256+rsp],xmm6 1308 1309 mov ecx,DWORD[56+rdi] 1310 1311 mov r13,QWORD[40+rdi] 1312 cmp ecx,edx 1313 1314 mov rbp,QWORD[48+rdi] 1315 cmovg edx,ecx 1316 test ecx,ecx 1317 1318 vmovdqu xmm7,XMMWORD[64+rdi] 1319 mov DWORD[52+rsp],ecx 1320 cmovle r13,rsp 1321 sub rbp,r13 1322 mov QWORD[104+rsp],rbp 1323 vmovdqu XMMWORD[272+rsp],xmm7 1324 1325 mov ecx,DWORD[96+rdi] 1326 1327 mov r14,QWORD[80+rdi] 1328 cmp ecx,edx 1329 1330 mov rbp,QWORD[88+rdi] 1331 cmovg edx,ecx 1332 test ecx,ecx 1333 1334 vmovdqu xmm8,XMMWORD[104+rdi] 1335 mov DWORD[56+rsp],ecx 1336 cmovle r14,rsp 1337 sub rbp,r14 1338 mov QWORD[112+rsp],rbp 1339 vmovdqu XMMWORD[288+rsp],xmm8 1340 1341 mov ecx,DWORD[136+rdi] 1342 1343 mov r15,QWORD[120+rdi] 1344 cmp ecx,edx 1345 1346 mov rbp,QWORD[128+rdi] 1347 cmovg edx,ecx 1348 test ecx,ecx 1349 1350 vmovdqu xmm9,XMMWORD[144+rdi] 1351 mov DWORD[60+rsp],ecx 1352 cmovle r15,rsp 1353 sub rbp,r15 1354 mov QWORD[120+rsp],rbp 1355 vmovdqu XMMWORD[304+rsp],xmm9 1356 test edx,edx 1357 jz NEAR $L$dec8x_done 1358 1359 vmovups xmm1,XMMWORD[((16-120))+rsi] 1360 vmovups xmm0,XMMWORD[((32-120))+rsi] 1361 mov eax,DWORD[((240-120))+rsi] 1362 lea rbp,[((192+128))+rsp] 1363 1364 vmovdqu xmm2,XMMWORD[r8] 1365 vmovdqu xmm3,XMMWORD[r9] 1366 vmovdqu xmm4,XMMWORD[r10] 1367 vmovdqu xmm5,XMMWORD[r11] 1368 vmovdqu xmm6,XMMWORD[r12] 1369 vmovdqu xmm7,XMMWORD[r13] 1370 vmovdqu xmm8,XMMWORD[r14] 1371 vmovdqu xmm9,XMMWORD[r15] 1372 vmovdqu XMMWORD[rbp],xmm2 1373 vpxor xmm2,xmm2,xmm15 1374 vmovdqu XMMWORD[16+rbp],xmm3 1375 vpxor xmm3,xmm3,xmm15 1376 vmovdqu XMMWORD[32+rbp],xmm4 1377 vpxor xmm4,xmm4,xmm15 1378 vmovdqu XMMWORD[48+rbp],xmm5 1379 vpxor xmm5,xmm5,xmm15 1380 vmovdqu XMMWORD[64+rbp],xmm6 1381 vpxor xmm6,xmm6,xmm15 1382 vmovdqu XMMWORD[80+rbp],xmm7 1383 vpxor xmm7,xmm7,xmm15 1384 vmovdqu XMMWORD[96+rbp],xmm8 1385 vpxor xmm8,xmm8,xmm15 1386 vmovdqu XMMWORD[112+rbp],xmm9 1387 vpxor xmm9,xmm9,xmm15 1388 xor rbp,0x80 1389 mov ecx,1 1390 jmp NEAR $L$oop_dec8x 1391 1392 ALIGN 32 1393 $L$oop_dec8x: 1394 vaesdec xmm2,xmm2,xmm1 1395 cmp ecx,DWORD[((32+0))+rsp] 1396 vaesdec xmm3,xmm3,xmm1 1397 prefetcht0 [31+r8] 1398 vaesdec xmm4,xmm4,xmm1 1399 vaesdec xmm5,xmm5,xmm1 1400 lea rbx,[rbx*1+r8] 1401 cmovge r8,rsp 1402 vaesdec xmm6,xmm6,xmm1 1403 cmovg rbx,rsp 1404 vaesdec xmm7,xmm7,xmm1 1405 sub rbx,r8 1406 vaesdec xmm8,xmm8,xmm1 1407 vmovdqu xmm10,XMMWORD[16+r8] 1408 mov QWORD[((64+0))+rsp],rbx 1409 vaesdec xmm9,xmm9,xmm1 1410 vmovups xmm1,XMMWORD[((-72))+rsi] 1411 lea r8,[16+rbx*1+r8] 1412 vmovdqu XMMWORD[128+rsp],xmm10 1413 vaesdec xmm2,xmm2,xmm0 1414 cmp ecx,DWORD[((32+4))+rsp] 1415 mov rbx,QWORD[((64+8))+rsp] 1416 vaesdec xmm3,xmm3,xmm0 1417 prefetcht0 [31+r9] 1418 vaesdec xmm4,xmm4,xmm0 1419 vaesdec xmm5,xmm5,xmm0 1420 lea rbx,[rbx*1+r9] 1421 cmovge r9,rsp 1422 vaesdec xmm6,xmm6,xmm0 1423 cmovg rbx,rsp 1424 vaesdec xmm7,xmm7,xmm0 1425 sub rbx,r9 1426 vaesdec xmm8,xmm8,xmm0 1427 vmovdqu xmm11,XMMWORD[16+r9] 1428 mov QWORD[((64+8))+rsp],rbx 1429 vaesdec xmm9,xmm9,xmm0 1430 vmovups xmm0,XMMWORD[((-56))+rsi] 1431 lea r9,[16+rbx*1+r9] 1432 vmovdqu XMMWORD[144+rsp],xmm11 1433 vaesdec xmm2,xmm2,xmm1 1434 cmp ecx,DWORD[((32+8))+rsp] 1435 mov rbx,QWORD[((64+16))+rsp] 1436 vaesdec xmm3,xmm3,xmm1 1437 prefetcht0 [31+r10] 1438 vaesdec xmm4,xmm4,xmm1 1439 prefetcht0 [15+r8] 1440 vaesdec xmm5,xmm5,xmm1 1441 lea rbx,[rbx*1+r10] 1442 cmovge r10,rsp 1443 vaesdec xmm6,xmm6,xmm1 1444 cmovg rbx,rsp 1445 vaesdec xmm7,xmm7,xmm1 1446 sub rbx,r10 1447 vaesdec xmm8,xmm8,xmm1 1448 vmovdqu xmm12,XMMWORD[16+r10] 1449 mov QWORD[((64+16))+rsp],rbx 1450 vaesdec xmm9,xmm9,xmm1 1451 vmovups xmm1,XMMWORD[((-40))+rsi] 1452 lea r10,[16+rbx*1+r10] 1453 vmovdqu XMMWORD[160+rsp],xmm12 1454 vaesdec xmm2,xmm2,xmm0 1455 cmp ecx,DWORD[((32+12))+rsp] 1456 mov rbx,QWORD[((64+24))+rsp] 1457 vaesdec xmm3,xmm3,xmm0 1458 prefetcht0 [31+r11] 1459 vaesdec xmm4,xmm4,xmm0 1460 prefetcht0 [15+r9] 1461 vaesdec xmm5,xmm5,xmm0 1462 lea rbx,[rbx*1+r11] 1463 cmovge r11,rsp 1464 vaesdec xmm6,xmm6,xmm0 1465 cmovg rbx,rsp 1466 vaesdec xmm7,xmm7,xmm0 1467 sub rbx,r11 1468 vaesdec xmm8,xmm8,xmm0 1469 vmovdqu xmm13,XMMWORD[16+r11] 1470 mov QWORD[((64+24))+rsp],rbx 1471 vaesdec xmm9,xmm9,xmm0 1472 vmovups xmm0,XMMWORD[((-24))+rsi] 1473 lea r11,[16+rbx*1+r11] 1474 vmovdqu XMMWORD[176+rsp],xmm13 1475 vaesdec xmm2,xmm2,xmm1 1476 cmp ecx,DWORD[((32+16))+rsp] 1477 mov rbx,QWORD[((64+32))+rsp] 1478 vaesdec xmm3,xmm3,xmm1 1479 prefetcht0 [31+r12] 1480 vaesdec xmm4,xmm4,xmm1 1481 prefetcht0 [15+r10] 1482 vaesdec xmm5,xmm5,xmm1 1483 lea rbx,[rbx*1+r12] 1484 cmovge r12,rsp 1485 vaesdec xmm6,xmm6,xmm1 1486 cmovg rbx,rsp 1487 vaesdec xmm7,xmm7,xmm1 1488 sub rbx,r12 1489 vaesdec xmm8,xmm8,xmm1 1490 vmovdqu xmm10,XMMWORD[16+r12] 1491 mov QWORD[((64+32))+rsp],rbx 1492 vaesdec xmm9,xmm9,xmm1 1493 vmovups xmm1,XMMWORD[((-8))+rsi] 1494 lea r12,[16+rbx*1+r12] 1495 vaesdec xmm2,xmm2,xmm0 1496 cmp ecx,DWORD[((32+20))+rsp] 1497 mov rbx,QWORD[((64+40))+rsp] 1498 vaesdec xmm3,xmm3,xmm0 1499 prefetcht0 [31+r13] 1500 vaesdec xmm4,xmm4,xmm0 1501 prefetcht0 [15+r11] 1502 vaesdec xmm5,xmm5,xmm0 1503 lea rbx,[r13*1+rbx] 1504 cmovge r13,rsp 1505 vaesdec xmm6,xmm6,xmm0 1506 cmovg rbx,rsp 1507 vaesdec xmm7,xmm7,xmm0 1508 sub rbx,r13 1509 vaesdec xmm8,xmm8,xmm0 1510 vmovdqu xmm11,XMMWORD[16+r13] 1511 mov QWORD[((64+40))+rsp],rbx 1512 vaesdec xmm9,xmm9,xmm0 1513 vmovups xmm0,XMMWORD[8+rsi] 1514 lea r13,[16+rbx*1+r13] 1515 vaesdec xmm2,xmm2,xmm1 1516 cmp ecx,DWORD[((32+24))+rsp] 1517 mov rbx,QWORD[((64+48))+rsp] 1518 vaesdec xmm3,xmm3,xmm1 1519 prefetcht0 [31+r14] 1520 vaesdec xmm4,xmm4,xmm1 1521 prefetcht0 [15+r12] 1522 vaesdec xmm5,xmm5,xmm1 1523 lea rbx,[rbx*1+r14] 1524 cmovge r14,rsp 1525 vaesdec xmm6,xmm6,xmm1 1526 cmovg rbx,rsp 1527 vaesdec xmm7,xmm7,xmm1 1528 sub rbx,r14 1529 vaesdec xmm8,xmm8,xmm1 1530 vmovdqu xmm12,XMMWORD[16+r14] 1531 mov QWORD[((64+48))+rsp],rbx 1532 vaesdec xmm9,xmm9,xmm1 1533 vmovups xmm1,XMMWORD[24+rsi] 1534 lea r14,[16+rbx*1+r14] 1535 vaesdec xmm2,xmm2,xmm0 1536 cmp ecx,DWORD[((32+28))+rsp] 1537 mov rbx,QWORD[((64+56))+rsp] 1538 vaesdec xmm3,xmm3,xmm0 1539 prefetcht0 [31+r15] 1540 vaesdec xmm4,xmm4,xmm0 1541 prefetcht0 [15+r13] 1542 vaesdec xmm5,xmm5,xmm0 1543 lea rbx,[rbx*1+r15] 1544 cmovge r15,rsp 1545 vaesdec xmm6,xmm6,xmm0 1546 cmovg rbx,rsp 1547 vaesdec xmm7,xmm7,xmm0 1548 sub rbx,r15 1549 vaesdec xmm8,xmm8,xmm0 1550 vmovdqu xmm13,XMMWORD[16+r15] 1551 mov QWORD[((64+56))+rsp],rbx 1552 vaesdec xmm9,xmm9,xmm0 1553 vmovups xmm0,XMMWORD[40+rsi] 1554 lea r15,[16+rbx*1+r15] 1555 vmovdqu xmm14,XMMWORD[32+rsp] 1556 prefetcht0 [15+r14] 1557 prefetcht0 [15+r15] 1558 cmp eax,11 1559 jb NEAR $L$dec8x_tail 1560 1561 vaesdec xmm2,xmm2,xmm1 1562 vaesdec xmm3,xmm3,xmm1 1563 vaesdec xmm4,xmm4,xmm1 1564 vaesdec xmm5,xmm5,xmm1 1565 vaesdec xmm6,xmm6,xmm1 1566 vaesdec xmm7,xmm7,xmm1 1567 vaesdec xmm8,xmm8,xmm1 1568 vaesdec xmm9,xmm9,xmm1 1569 vmovups xmm1,XMMWORD[((176-120))+rsi] 1570 1571 vaesdec xmm2,xmm2,xmm0 1572 vaesdec xmm3,xmm3,xmm0 1573 vaesdec xmm4,xmm4,xmm0 1574 vaesdec xmm5,xmm5,xmm0 1575 vaesdec xmm6,xmm6,xmm0 1576 vaesdec xmm7,xmm7,xmm0 1577 vaesdec xmm8,xmm8,xmm0 1578 vaesdec xmm9,xmm9,xmm0 1579 vmovups xmm0,XMMWORD[((192-120))+rsi] 1580 je NEAR $L$dec8x_tail 1581 1582 vaesdec xmm2,xmm2,xmm1 1583 vaesdec xmm3,xmm3,xmm1 1584 vaesdec xmm4,xmm4,xmm1 1585 vaesdec xmm5,xmm5,xmm1 1586 vaesdec xmm6,xmm6,xmm1 1587 vaesdec xmm7,xmm7,xmm1 1588 vaesdec xmm8,xmm8,xmm1 1589 vaesdec xmm9,xmm9,xmm1 1590 vmovups xmm1,XMMWORD[((208-120))+rsi] 1591 1592 vaesdec xmm2,xmm2,xmm0 1593 vaesdec xmm3,xmm3,xmm0 1594 vaesdec xmm4,xmm4,xmm0 1595 vaesdec xmm5,xmm5,xmm0 1596 vaesdec xmm6,xmm6,xmm0 1597 vaesdec xmm7,xmm7,xmm0 1598 vaesdec xmm8,xmm8,xmm0 1599 vaesdec xmm9,xmm9,xmm0 1600 vmovups xmm0,XMMWORD[((224-120))+rsi] 1601 1602 $L$dec8x_tail: 1603 vaesdec xmm2,xmm2,xmm1 1604 vpxor xmm15,xmm15,xmm15 1605 vaesdec xmm3,xmm3,xmm1 1606 vaesdec xmm4,xmm4,xmm1 1607 vpcmpgtd xmm15,xmm14,xmm15 1608 vaesdec xmm5,xmm5,xmm1 1609 vaesdec xmm6,xmm6,xmm1 1610 vpaddd xmm15,xmm15,xmm14 1611 vmovdqu xmm14,XMMWORD[48+rsp] 1612 vaesdec xmm7,xmm7,xmm1 1613 mov rbx,QWORD[64+rsp] 1614 vaesdec xmm8,xmm8,xmm1 1615 vaesdec xmm9,xmm9,xmm1 1616 vmovups xmm1,XMMWORD[((16-120))+rsi] 1617 1618 vaesdeclast xmm2,xmm2,xmm0 1619 vmovdqa XMMWORD[32+rsp],xmm15 1620 vpxor xmm15,xmm15,xmm15 1621 vaesdeclast xmm3,xmm3,xmm0 1622 vpxor xmm2,xmm2,XMMWORD[rbp] 1623 vaesdeclast xmm4,xmm4,xmm0 1624 vpxor xmm3,xmm3,XMMWORD[16+rbp] 1625 vpcmpgtd xmm15,xmm14,xmm15 1626 vaesdeclast xmm5,xmm5,xmm0 1627 vpxor xmm4,xmm4,XMMWORD[32+rbp] 1628 vaesdeclast xmm6,xmm6,xmm0 1629 vpxor xmm5,xmm5,XMMWORD[48+rbp] 1630 vpaddd xmm14,xmm14,xmm15 1631 vmovdqu xmm15,XMMWORD[((-120))+rsi] 1632 vaesdeclast xmm7,xmm7,xmm0 1633 vpxor xmm6,xmm6,XMMWORD[64+rbp] 1634 vaesdeclast xmm8,xmm8,xmm0 1635 vpxor xmm7,xmm7,XMMWORD[80+rbp] 1636 vmovdqa XMMWORD[48+rsp],xmm14 1637 vaesdeclast xmm9,xmm9,xmm0 1638 vpxor xmm8,xmm8,XMMWORD[96+rbp] 1639 vmovups xmm0,XMMWORD[((32-120))+rsi] 1640 1641 vmovups XMMWORD[(-16)+r8],xmm2 1642 sub r8,rbx 1643 vmovdqu xmm2,XMMWORD[((128+0))+rsp] 1644 vpxor xmm9,xmm9,XMMWORD[112+rbp] 1645 vmovups XMMWORD[(-16)+r9],xmm3 1646 sub r9,QWORD[72+rsp] 1647 vmovdqu XMMWORD[rbp],xmm2 1648 vpxor xmm2,xmm2,xmm15 1649 vmovdqu xmm3,XMMWORD[((128+16))+rsp] 1650 vmovups XMMWORD[(-16)+r10],xmm4 1651 sub r10,QWORD[80+rsp] 1652 vmovdqu XMMWORD[16+rbp],xmm3 1653 vpxor xmm3,xmm3,xmm15 1654 vmovdqu xmm4,XMMWORD[((128+32))+rsp] 1655 vmovups XMMWORD[(-16)+r11],xmm5 1656 sub r11,QWORD[88+rsp] 1657 vmovdqu XMMWORD[32+rbp],xmm4 1658 vpxor xmm4,xmm4,xmm15 1659 vmovdqu xmm5,XMMWORD[((128+48))+rsp] 1660 vmovups XMMWORD[(-16)+r12],xmm6 1661 sub r12,QWORD[96+rsp] 1662 vmovdqu XMMWORD[48+rbp],xmm5 1663 vpxor xmm5,xmm5,xmm15 1664 vmovdqu XMMWORD[64+rbp],xmm10 1665 vpxor xmm6,xmm15,xmm10 1666 vmovups XMMWORD[(-16)+r13],xmm7 1667 sub r13,QWORD[104+rsp] 1668 vmovdqu XMMWORD[80+rbp],xmm11 1669 vpxor xmm7,xmm15,xmm11 1670 vmovups XMMWORD[(-16)+r14],xmm8 1671 sub r14,QWORD[112+rsp] 1672 vmovdqu XMMWORD[96+rbp],xmm12 1673 vpxor xmm8,xmm15,xmm12 1674 vmovups XMMWORD[(-16)+r15],xmm9 1675 sub r15,QWORD[120+rsp] 1676 vmovdqu XMMWORD[112+rbp],xmm13 1677 vpxor xmm9,xmm15,xmm13 1678 1679 xor rbp,128 1680 dec edx 1681 jnz NEAR $L$oop_dec8x 1682 1683 mov rax,QWORD[16+rsp] 1684 1685 1686 1687 1688 1689 1690 $L$dec8x_done: 1691 vzeroupper 1692 movaps xmm6,XMMWORD[((-216))+rax] 1693 movaps xmm7,XMMWORD[((-200))+rax] 1694 movaps xmm8,XMMWORD[((-184))+rax] 1695 movaps xmm9,XMMWORD[((-168))+rax] 1696 movaps xmm10,XMMWORD[((-152))+rax] 1697 movaps xmm11,XMMWORD[((-136))+rax] 1698 movaps xmm12,XMMWORD[((-120))+rax] 1699 movaps xmm13,XMMWORD[((-104))+rax] 1700 movaps xmm14,XMMWORD[((-88))+rax] 1701 movaps xmm15,XMMWORD[((-72))+rax] 1702 mov r15,QWORD[((-48))+rax] 1703 1704 mov r14,QWORD[((-40))+rax] 1705 1706 mov r13,QWORD[((-32))+rax] 1707 1708 mov r12,QWORD[((-24))+rax] 1709 1710 mov rbp,QWORD[((-16))+rax] 1711 1712 mov rbx,QWORD[((-8))+rax] 1713 1714 lea rsp,[rax] 1715 1716 $L$dec8x_epilogue: 1717 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1718 mov rsi,QWORD[16+rsp] 1719 DB 0F3h,0C3h ;repret 1720 1721 $L$SEH_end_aesni_multi_cbc_decrypt_avx: 629 1722 EXTERN __imp_RtlVirtualUnwind 630 1723 … … 728 1821 DD $L$SEH_end_aesni_multi_cbc_decrypt wrt ..imagebase 729 1822 DD $L$SEH_info_aesni_multi_cbc_decrypt wrt ..imagebase 1823 DD $L$SEH_begin_aesni_multi_cbc_encrypt_avx wrt ..imagebase 1824 DD $L$SEH_end_aesni_multi_cbc_encrypt_avx wrt ..imagebase 1825 DD $L$SEH_info_aesni_multi_cbc_encrypt_avx wrt ..imagebase 1826 DD $L$SEH_begin_aesni_multi_cbc_decrypt_avx wrt ..imagebase 1827 DD $L$SEH_end_aesni_multi_cbc_decrypt_avx wrt ..imagebase 1828 DD $L$SEH_info_aesni_multi_cbc_decrypt_avx wrt ..imagebase 730 1829 section .xdata rdata align=8 731 1830 ALIGN 8 … … 738 1837 DD se_handler wrt ..imagebase 739 1838 DD $L$dec4x_body wrt ..imagebase,$L$dec4x_epilogue wrt ..imagebase 1839 $L$SEH_info_aesni_multi_cbc_encrypt_avx: 1840 DB 9,0,0,0 1841 DD se_handler wrt ..imagebase 1842 DD $L$enc8x_body wrt ..imagebase,$L$enc8x_epilogue wrt ..imagebase 1843 $L$SEH_info_aesni_multi_cbc_decrypt_avx: 1844 DB 9,0,0,0 1845 DD se_handler wrt ..imagebase 1846 DD $L$dec8x_body wrt ..imagebase,$L$dec8x_epilogue wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/aesni-sha1-x86_64.S
r97373 r99371 17 17 bt r11,61 18 18 jc NEAR aesni_cbc_sha1_enc_shaext 19 and r11d,268435456 20 and r10d,1073741824 21 or r10d,r11d 22 cmp r10d,1342177280 23 je NEAR aesni_cbc_sha1_enc_avx 19 24 jmp NEAR aesni_cbc_sha1_enc_ssse3 20 25 DB 0F3h,0C3h ;repret … … 1427 1432 1428 1433 $L$SEH_end_aesni_cbc_sha1_enc_ssse3: 1434 1435 ALIGN 32 1436 aesni_cbc_sha1_enc_avx: 1437 mov QWORD[8+rsp],rdi ;WIN64 prologue 1438 mov QWORD[16+rsp],rsi 1439 mov rax,rsp 1440 $L$SEH_begin_aesni_cbc_sha1_enc_avx: 1441 mov rdi,rcx 1442 mov rsi,rdx 1443 mov rdx,r8 1444 mov rcx,r9 1445 mov r8,QWORD[40+rsp] 1446 mov r9,QWORD[48+rsp] 1447 1448 1449 1450 mov r10,QWORD[56+rsp] 1451 1452 1453 push rbx 1454 1455 push rbp 1456 1457 push r12 1458 1459 push r13 1460 1461 push r14 1462 1463 push r15 1464 1465 lea rsp,[((-264))+rsp] 1466 1467 1468 1469 movaps XMMWORD[(96+0)+rsp],xmm6 1470 movaps XMMWORD[(96+16)+rsp],xmm7 1471 movaps XMMWORD[(96+32)+rsp],xmm8 1472 movaps XMMWORD[(96+48)+rsp],xmm9 1473 movaps XMMWORD[(96+64)+rsp],xmm10 1474 movaps XMMWORD[(96+80)+rsp],xmm11 1475 movaps XMMWORD[(96+96)+rsp],xmm12 1476 movaps XMMWORD[(96+112)+rsp],xmm13 1477 movaps XMMWORD[(96+128)+rsp],xmm14 1478 movaps XMMWORD[(96+144)+rsp],xmm15 1479 $L$prologue_avx: 1480 vzeroall 1481 mov r12,rdi 1482 mov r13,rsi 1483 mov r14,rdx 1484 lea r15,[112+rcx] 1485 vmovdqu xmm12,XMMWORD[r8] 1486 mov QWORD[88+rsp],r8 1487 shl r14,6 1488 sub r13,r12 1489 mov r8d,DWORD[((240-112))+r15] 1490 add r14,r10 1491 1492 lea r11,[K_XX_XX] 1493 mov eax,DWORD[r9] 1494 mov ebx,DWORD[4+r9] 1495 mov ecx,DWORD[8+r9] 1496 mov edx,DWORD[12+r9] 1497 mov esi,ebx 1498 mov ebp,DWORD[16+r9] 1499 mov edi,ecx 1500 xor edi,edx 1501 and esi,edi 1502 1503 vmovdqa xmm6,XMMWORD[64+r11] 1504 vmovdqa xmm10,XMMWORD[r11] 1505 vmovdqu xmm0,XMMWORD[r10] 1506 vmovdqu xmm1,XMMWORD[16+r10] 1507 vmovdqu xmm2,XMMWORD[32+r10] 1508 vmovdqu xmm3,XMMWORD[48+r10] 1509 vpshufb xmm0,xmm0,xmm6 1510 add r10,64 1511 vpshufb xmm1,xmm1,xmm6 1512 vpshufb xmm2,xmm2,xmm6 1513 vpshufb xmm3,xmm3,xmm6 1514 vpaddd xmm4,xmm0,xmm10 1515 vpaddd xmm5,xmm1,xmm10 1516 vpaddd xmm6,xmm2,xmm10 1517 vmovdqa XMMWORD[rsp],xmm4 1518 vmovdqa XMMWORD[16+rsp],xmm5 1519 vmovdqa XMMWORD[32+rsp],xmm6 1520 vmovups xmm15,XMMWORD[((-112))+r15] 1521 vmovups xmm14,XMMWORD[((16-112))+r15] 1522 jmp NEAR $L$oop_avx 1523 ALIGN 32 1524 $L$oop_avx: 1525 shrd ebx,ebx,2 1526 vmovdqu xmm13,XMMWORD[r12] 1527 vpxor xmm13,xmm13,xmm15 1528 vpxor xmm12,xmm12,xmm13 1529 vaesenc xmm12,xmm12,xmm14 1530 vmovups xmm15,XMMWORD[((-80))+r15] 1531 xor esi,edx 1532 vpalignr xmm4,xmm1,xmm0,8 1533 mov edi,eax 1534 add ebp,DWORD[rsp] 1535 vpaddd xmm9,xmm10,xmm3 1536 xor ebx,ecx 1537 shld eax,eax,5 1538 vpsrldq xmm8,xmm3,4 1539 add ebp,esi 1540 and edi,ebx 1541 vpxor xmm4,xmm4,xmm0 1542 xor ebx,ecx 1543 add ebp,eax 1544 vpxor xmm8,xmm8,xmm2 1545 shrd eax,eax,7 1546 xor edi,ecx 1547 mov esi,ebp 1548 add edx,DWORD[4+rsp] 1549 vpxor xmm4,xmm4,xmm8 1550 xor eax,ebx 1551 shld ebp,ebp,5 1552 vmovdqa XMMWORD[48+rsp],xmm9 1553 add edx,edi 1554 vaesenc xmm12,xmm12,xmm15 1555 vmovups xmm14,XMMWORD[((-64))+r15] 1556 and esi,eax 1557 vpsrld xmm8,xmm4,31 1558 xor eax,ebx 1559 add edx,ebp 1560 shrd ebp,ebp,7 1561 xor esi,ebx 1562 vpslldq xmm9,xmm4,12 1563 vpaddd xmm4,xmm4,xmm4 1564 mov edi,edx 1565 add ecx,DWORD[8+rsp] 1566 xor ebp,eax 1567 shld edx,edx,5 1568 vpor xmm4,xmm4,xmm8 1569 vpsrld xmm8,xmm9,30 1570 add ecx,esi 1571 and edi,ebp 1572 xor ebp,eax 1573 add ecx,edx 1574 vpslld xmm9,xmm9,2 1575 vpxor xmm4,xmm4,xmm8 1576 shrd edx,edx,7 1577 xor edi,eax 1578 mov esi,ecx 1579 add ebx,DWORD[12+rsp] 1580 vaesenc xmm12,xmm12,xmm14 1581 vmovups xmm15,XMMWORD[((-48))+r15] 1582 vpxor xmm4,xmm4,xmm9 1583 xor edx,ebp 1584 shld ecx,ecx,5 1585 add ebx,edi 1586 and esi,edx 1587 xor edx,ebp 1588 add ebx,ecx 1589 shrd ecx,ecx,7 1590 xor esi,ebp 1591 vpalignr xmm5,xmm2,xmm1,8 1592 mov edi,ebx 1593 add eax,DWORD[16+rsp] 1594 vpaddd xmm9,xmm10,xmm4 1595 xor ecx,edx 1596 shld ebx,ebx,5 1597 vpsrldq xmm8,xmm4,4 1598 add eax,esi 1599 and edi,ecx 1600 vpxor xmm5,xmm5,xmm1 1601 xor ecx,edx 1602 add eax,ebx 1603 vpxor xmm8,xmm8,xmm3 1604 shrd ebx,ebx,7 1605 vaesenc xmm12,xmm12,xmm15 1606 vmovups xmm14,XMMWORD[((-32))+r15] 1607 xor edi,edx 1608 mov esi,eax 1609 add ebp,DWORD[20+rsp] 1610 vpxor xmm5,xmm5,xmm8 1611 xor ebx,ecx 1612 shld eax,eax,5 1613 vmovdqa XMMWORD[rsp],xmm9 1614 add ebp,edi 1615 and esi,ebx 1616 vpsrld xmm8,xmm5,31 1617 xor ebx,ecx 1618 add ebp,eax 1619 shrd eax,eax,7 1620 xor esi,ecx 1621 vpslldq xmm9,xmm5,12 1622 vpaddd xmm5,xmm5,xmm5 1623 mov edi,ebp 1624 add edx,DWORD[24+rsp] 1625 xor eax,ebx 1626 shld ebp,ebp,5 1627 vpor xmm5,xmm5,xmm8 1628 vpsrld xmm8,xmm9,30 1629 add edx,esi 1630 vaesenc xmm12,xmm12,xmm14 1631 vmovups xmm15,XMMWORD[((-16))+r15] 1632 and edi,eax 1633 xor eax,ebx 1634 add edx,ebp 1635 vpslld xmm9,xmm9,2 1636 vpxor xmm5,xmm5,xmm8 1637 shrd ebp,ebp,7 1638 xor edi,ebx 1639 mov esi,edx 1640 add ecx,DWORD[28+rsp] 1641 vpxor xmm5,xmm5,xmm9 1642 xor ebp,eax 1643 shld edx,edx,5 1644 vmovdqa xmm10,XMMWORD[16+r11] 1645 add ecx,edi 1646 and esi,ebp 1647 xor ebp,eax 1648 add ecx,edx 1649 shrd edx,edx,7 1650 xor esi,eax 1651 vpalignr xmm6,xmm3,xmm2,8 1652 mov edi,ecx 1653 add ebx,DWORD[32+rsp] 1654 vaesenc xmm12,xmm12,xmm15 1655 vmovups xmm14,XMMWORD[r15] 1656 vpaddd xmm9,xmm10,xmm5 1657 xor edx,ebp 1658 shld ecx,ecx,5 1659 vpsrldq xmm8,xmm5,4 1660 add ebx,esi 1661 and edi,edx 1662 vpxor xmm6,xmm6,xmm2 1663 xor edx,ebp 1664 add ebx,ecx 1665 vpxor xmm8,xmm8,xmm4 1666 shrd ecx,ecx,7 1667 xor edi,ebp 1668 mov esi,ebx 1669 add eax,DWORD[36+rsp] 1670 vpxor xmm6,xmm6,xmm8 1671 xor ecx,edx 1672 shld ebx,ebx,5 1673 vmovdqa XMMWORD[16+rsp],xmm9 1674 add eax,edi 1675 and esi,ecx 1676 vpsrld xmm8,xmm6,31 1677 xor ecx,edx 1678 add eax,ebx 1679 shrd ebx,ebx,7 1680 vaesenc xmm12,xmm12,xmm14 1681 vmovups xmm15,XMMWORD[16+r15] 1682 xor esi,edx 1683 vpslldq xmm9,xmm6,12 1684 vpaddd xmm6,xmm6,xmm6 1685 mov edi,eax 1686 add ebp,DWORD[40+rsp] 1687 xor ebx,ecx 1688 shld eax,eax,5 1689 vpor xmm6,xmm6,xmm8 1690 vpsrld xmm8,xmm9,30 1691 add ebp,esi 1692 and edi,ebx 1693 xor ebx,ecx 1694 add ebp,eax 1695 vpslld xmm9,xmm9,2 1696 vpxor xmm6,xmm6,xmm8 1697 shrd eax,eax,7 1698 xor edi,ecx 1699 mov esi,ebp 1700 add edx,DWORD[44+rsp] 1701 vpxor xmm6,xmm6,xmm9 1702 xor eax,ebx 1703 shld ebp,ebp,5 1704 add edx,edi 1705 vaesenc xmm12,xmm12,xmm15 1706 vmovups xmm14,XMMWORD[32+r15] 1707 and esi,eax 1708 xor eax,ebx 1709 add edx,ebp 1710 shrd ebp,ebp,7 1711 xor esi,ebx 1712 vpalignr xmm7,xmm4,xmm3,8 1713 mov edi,edx 1714 add ecx,DWORD[48+rsp] 1715 vpaddd xmm9,xmm10,xmm6 1716 xor ebp,eax 1717 shld edx,edx,5 1718 vpsrldq xmm8,xmm6,4 1719 add ecx,esi 1720 and edi,ebp 1721 vpxor xmm7,xmm7,xmm3 1722 xor ebp,eax 1723 add ecx,edx 1724 vpxor xmm8,xmm8,xmm5 1725 shrd edx,edx,7 1726 xor edi,eax 1727 mov esi,ecx 1728 add ebx,DWORD[52+rsp] 1729 vaesenc xmm12,xmm12,xmm14 1730 vmovups xmm15,XMMWORD[48+r15] 1731 vpxor xmm7,xmm7,xmm8 1732 xor edx,ebp 1733 shld ecx,ecx,5 1734 vmovdqa XMMWORD[32+rsp],xmm9 1735 add ebx,edi 1736 and esi,edx 1737 vpsrld xmm8,xmm7,31 1738 xor edx,ebp 1739 add ebx,ecx 1740 shrd ecx,ecx,7 1741 xor esi,ebp 1742 vpslldq xmm9,xmm7,12 1743 vpaddd xmm7,xmm7,xmm7 1744 mov edi,ebx 1745 add eax,DWORD[56+rsp] 1746 xor ecx,edx 1747 shld ebx,ebx,5 1748 vpor xmm7,xmm7,xmm8 1749 vpsrld xmm8,xmm9,30 1750 add eax,esi 1751 and edi,ecx 1752 xor ecx,edx 1753 add eax,ebx 1754 vpslld xmm9,xmm9,2 1755 vpxor xmm7,xmm7,xmm8 1756 shrd ebx,ebx,7 1757 cmp r8d,11 1758 jb NEAR $L$vaesenclast6 1759 vaesenc xmm12,xmm12,xmm15 1760 vmovups xmm14,XMMWORD[64+r15] 1761 vaesenc xmm12,xmm12,xmm14 1762 vmovups xmm15,XMMWORD[80+r15] 1763 je NEAR $L$vaesenclast6 1764 vaesenc xmm12,xmm12,xmm15 1765 vmovups xmm14,XMMWORD[96+r15] 1766 vaesenc xmm12,xmm12,xmm14 1767 vmovups xmm15,XMMWORD[112+r15] 1768 $L$vaesenclast6: 1769 vaesenclast xmm12,xmm12,xmm15 1770 vmovups xmm15,XMMWORD[((-112))+r15] 1771 vmovups xmm14,XMMWORD[((16-112))+r15] 1772 xor edi,edx 1773 mov esi,eax 1774 add ebp,DWORD[60+rsp] 1775 vpxor xmm7,xmm7,xmm9 1776 xor ebx,ecx 1777 shld eax,eax,5 1778 add ebp,edi 1779 and esi,ebx 1780 xor ebx,ecx 1781 add ebp,eax 1782 vpalignr xmm8,xmm7,xmm6,8 1783 vpxor xmm0,xmm0,xmm4 1784 shrd eax,eax,7 1785 xor esi,ecx 1786 mov edi,ebp 1787 add edx,DWORD[rsp] 1788 vpxor xmm0,xmm0,xmm1 1789 xor eax,ebx 1790 shld ebp,ebp,5 1791 vpaddd xmm9,xmm10,xmm7 1792 add edx,esi 1793 vmovdqu xmm13,XMMWORD[16+r12] 1794 vpxor xmm13,xmm13,xmm15 1795 vmovups XMMWORD[r13*1+r12],xmm12 1796 vpxor xmm12,xmm12,xmm13 1797 vaesenc xmm12,xmm12,xmm14 1798 vmovups xmm15,XMMWORD[((-80))+r15] 1799 and edi,eax 1800 vpxor xmm0,xmm0,xmm8 1801 xor eax,ebx 1802 add edx,ebp 1803 shrd ebp,ebp,7 1804 xor edi,ebx 1805 vpsrld xmm8,xmm0,30 1806 vmovdqa XMMWORD[48+rsp],xmm9 1807 mov esi,edx 1808 add ecx,DWORD[4+rsp] 1809 xor ebp,eax 1810 shld edx,edx,5 1811 vpslld xmm0,xmm0,2 1812 add ecx,edi 1813 and esi,ebp 1814 xor ebp,eax 1815 add ecx,edx 1816 shrd edx,edx,7 1817 xor esi,eax 1818 mov edi,ecx 1819 add ebx,DWORD[8+rsp] 1820 vaesenc xmm12,xmm12,xmm15 1821 vmovups xmm14,XMMWORD[((-64))+r15] 1822 vpor xmm0,xmm0,xmm8 1823 xor edx,ebp 1824 shld ecx,ecx,5 1825 add ebx,esi 1826 and edi,edx 1827 xor edx,ebp 1828 add ebx,ecx 1829 add eax,DWORD[12+rsp] 1830 xor edi,ebp 1831 mov esi,ebx 1832 shld ebx,ebx,5 1833 add eax,edi 1834 xor esi,edx 1835 shrd ecx,ecx,7 1836 add eax,ebx 1837 vpalignr xmm8,xmm0,xmm7,8 1838 vpxor xmm1,xmm1,xmm5 1839 add ebp,DWORD[16+rsp] 1840 vaesenc xmm12,xmm12,xmm14 1841 vmovups xmm15,XMMWORD[((-48))+r15] 1842 xor esi,ecx 1843 mov edi,eax 1844 shld eax,eax,5 1845 vpxor xmm1,xmm1,xmm2 1846 add ebp,esi 1847 xor edi,ecx 1848 vpaddd xmm9,xmm10,xmm0 1849 shrd ebx,ebx,7 1850 add ebp,eax 1851 vpxor xmm1,xmm1,xmm8 1852 add edx,DWORD[20+rsp] 1853 xor edi,ebx 1854 mov esi,ebp 1855 shld ebp,ebp,5 1856 vpsrld xmm8,xmm1,30 1857 vmovdqa XMMWORD[rsp],xmm9 1858 add edx,edi 1859 xor esi,ebx 1860 shrd eax,eax,7 1861 add edx,ebp 1862 vpslld xmm1,xmm1,2 1863 add ecx,DWORD[24+rsp] 1864 xor esi,eax 1865 mov edi,edx 1866 shld edx,edx,5 1867 add ecx,esi 1868 vaesenc xmm12,xmm12,xmm15 1869 vmovups xmm14,XMMWORD[((-32))+r15] 1870 xor edi,eax 1871 shrd ebp,ebp,7 1872 add ecx,edx 1873 vpor xmm1,xmm1,xmm8 1874 add ebx,DWORD[28+rsp] 1875 xor edi,ebp 1876 mov esi,ecx 1877 shld ecx,ecx,5 1878 add ebx,edi 1879 xor esi,ebp 1880 shrd edx,edx,7 1881 add ebx,ecx 1882 vpalignr xmm8,xmm1,xmm0,8 1883 vpxor xmm2,xmm2,xmm6 1884 add eax,DWORD[32+rsp] 1885 xor esi,edx 1886 mov edi,ebx 1887 shld ebx,ebx,5 1888 vpxor xmm2,xmm2,xmm3 1889 add eax,esi 1890 xor edi,edx 1891 vpaddd xmm9,xmm10,xmm1 1892 vmovdqa xmm10,XMMWORD[32+r11] 1893 shrd ecx,ecx,7 1894 add eax,ebx 1895 vpxor xmm2,xmm2,xmm8 1896 add ebp,DWORD[36+rsp] 1897 vaesenc xmm12,xmm12,xmm14 1898 vmovups xmm15,XMMWORD[((-16))+r15] 1899 xor edi,ecx 1900 mov esi,eax 1901 shld eax,eax,5 1902 vpsrld xmm8,xmm2,30 1903 vmovdqa XMMWORD[16+rsp],xmm9 1904 add ebp,edi 1905 xor esi,ecx 1906 shrd ebx,ebx,7 1907 add ebp,eax 1908 vpslld xmm2,xmm2,2 1909 add edx,DWORD[40+rsp] 1910 xor esi,ebx 1911 mov edi,ebp 1912 shld ebp,ebp,5 1913 add edx,esi 1914 xor edi,ebx 1915 shrd eax,eax,7 1916 add edx,ebp 1917 vpor xmm2,xmm2,xmm8 1918 add ecx,DWORD[44+rsp] 1919 xor edi,eax 1920 mov esi,edx 1921 shld edx,edx,5 1922 add ecx,edi 1923 vaesenc xmm12,xmm12,xmm15 1924 vmovups xmm14,XMMWORD[r15] 1925 xor esi,eax 1926 shrd ebp,ebp,7 1927 add ecx,edx 1928 vpalignr xmm8,xmm2,xmm1,8 1929 vpxor xmm3,xmm3,xmm7 1930 add ebx,DWORD[48+rsp] 1931 xor esi,ebp 1932 mov edi,ecx 1933 shld ecx,ecx,5 1934 vpxor xmm3,xmm3,xmm4 1935 add ebx,esi 1936 xor edi,ebp 1937 vpaddd xmm9,xmm10,xmm2 1938 shrd edx,edx,7 1939 add ebx,ecx 1940 vpxor xmm3,xmm3,xmm8 1941 add eax,DWORD[52+rsp] 1942 xor edi,edx 1943 mov esi,ebx 1944 shld ebx,ebx,5 1945 vpsrld xmm8,xmm3,30 1946 vmovdqa XMMWORD[32+rsp],xmm9 1947 add eax,edi 1948 xor esi,edx 1949 shrd ecx,ecx,7 1950 add eax,ebx 1951 vpslld xmm3,xmm3,2 1952 add ebp,DWORD[56+rsp] 1953 vaesenc xmm12,xmm12,xmm14 1954 vmovups xmm15,XMMWORD[16+r15] 1955 xor esi,ecx 1956 mov edi,eax 1957 shld eax,eax,5 1958 add ebp,esi 1959 xor edi,ecx 1960 shrd ebx,ebx,7 1961 add ebp,eax 1962 vpor xmm3,xmm3,xmm8 1963 add edx,DWORD[60+rsp] 1964 xor edi,ebx 1965 mov esi,ebp 1966 shld ebp,ebp,5 1967 add edx,edi 1968 xor esi,ebx 1969 shrd eax,eax,7 1970 add edx,ebp 1971 vpalignr xmm8,xmm3,xmm2,8 1972 vpxor xmm4,xmm4,xmm0 1973 add ecx,DWORD[rsp] 1974 xor esi,eax 1975 mov edi,edx 1976 shld edx,edx,5 1977 vpxor xmm4,xmm4,xmm5 1978 add ecx,esi 1979 vaesenc xmm12,xmm12,xmm15 1980 vmovups xmm14,XMMWORD[32+r15] 1981 xor edi,eax 1982 vpaddd xmm9,xmm10,xmm3 1983 shrd ebp,ebp,7 1984 add ecx,edx 1985 vpxor xmm4,xmm4,xmm8 1986 add ebx,DWORD[4+rsp] 1987 xor edi,ebp 1988 mov esi,ecx 1989 shld ecx,ecx,5 1990 vpsrld xmm8,xmm4,30 1991 vmovdqa XMMWORD[48+rsp],xmm9 1992 add ebx,edi 1993 xor esi,ebp 1994 shrd edx,edx,7 1995 add ebx,ecx 1996 vpslld xmm4,xmm4,2 1997 add eax,DWORD[8+rsp] 1998 xor esi,edx 1999 mov edi,ebx 2000 shld ebx,ebx,5 2001 add eax,esi 2002 xor edi,edx 2003 shrd ecx,ecx,7 2004 add eax,ebx 2005 vpor xmm4,xmm4,xmm8 2006 add ebp,DWORD[12+rsp] 2007 vaesenc xmm12,xmm12,xmm14 2008 vmovups xmm15,XMMWORD[48+r15] 2009 xor edi,ecx 2010 mov esi,eax 2011 shld eax,eax,5 2012 add ebp,edi 2013 xor esi,ecx 2014 shrd ebx,ebx,7 2015 add ebp,eax 2016 vpalignr xmm8,xmm4,xmm3,8 2017 vpxor xmm5,xmm5,xmm1 2018 add edx,DWORD[16+rsp] 2019 xor esi,ebx 2020 mov edi,ebp 2021 shld ebp,ebp,5 2022 vpxor xmm5,xmm5,xmm6 2023 add edx,esi 2024 xor edi,ebx 2025 vpaddd xmm9,xmm10,xmm4 2026 shrd eax,eax,7 2027 add edx,ebp 2028 vpxor xmm5,xmm5,xmm8 2029 add ecx,DWORD[20+rsp] 2030 xor edi,eax 2031 mov esi,edx 2032 shld edx,edx,5 2033 vpsrld xmm8,xmm5,30 2034 vmovdqa XMMWORD[rsp],xmm9 2035 add ecx,edi 2036 cmp r8d,11 2037 jb NEAR $L$vaesenclast7 2038 vaesenc xmm12,xmm12,xmm15 2039 vmovups xmm14,XMMWORD[64+r15] 2040 vaesenc xmm12,xmm12,xmm14 2041 vmovups xmm15,XMMWORD[80+r15] 2042 je NEAR $L$vaesenclast7 2043 vaesenc xmm12,xmm12,xmm15 2044 vmovups xmm14,XMMWORD[96+r15] 2045 vaesenc xmm12,xmm12,xmm14 2046 vmovups xmm15,XMMWORD[112+r15] 2047 $L$vaesenclast7: 2048 vaesenclast xmm12,xmm12,xmm15 2049 vmovups xmm15,XMMWORD[((-112))+r15] 2050 vmovups xmm14,XMMWORD[((16-112))+r15] 2051 xor esi,eax 2052 shrd ebp,ebp,7 2053 add ecx,edx 2054 vpslld xmm5,xmm5,2 2055 add ebx,DWORD[24+rsp] 2056 xor esi,ebp 2057 mov edi,ecx 2058 shld ecx,ecx,5 2059 add ebx,esi 2060 xor edi,ebp 2061 shrd edx,edx,7 2062 add ebx,ecx 2063 vpor xmm5,xmm5,xmm8 2064 add eax,DWORD[28+rsp] 2065 shrd ecx,ecx,7 2066 mov esi,ebx 2067 xor edi,edx 2068 shld ebx,ebx,5 2069 add eax,edi 2070 xor esi,ecx 2071 xor ecx,edx 2072 add eax,ebx 2073 vpalignr xmm8,xmm5,xmm4,8 2074 vpxor xmm6,xmm6,xmm2 2075 add ebp,DWORD[32+rsp] 2076 vmovdqu xmm13,XMMWORD[32+r12] 2077 vpxor xmm13,xmm13,xmm15 2078 vmovups XMMWORD[16+r12*1+r13],xmm12 2079 vpxor xmm12,xmm12,xmm13 2080 vaesenc xmm12,xmm12,xmm14 2081 vmovups xmm15,XMMWORD[((-80))+r15] 2082 and esi,ecx 2083 xor ecx,edx 2084 shrd ebx,ebx,7 2085 vpxor xmm6,xmm6,xmm7 2086 mov edi,eax 2087 xor esi,ecx 2088 vpaddd xmm9,xmm10,xmm5 2089 shld eax,eax,5 2090 add ebp,esi 2091 vpxor xmm6,xmm6,xmm8 2092 xor edi,ebx 2093 xor ebx,ecx 2094 add ebp,eax 2095 add edx,DWORD[36+rsp] 2096 vpsrld xmm8,xmm6,30 2097 vmovdqa XMMWORD[16+rsp],xmm9 2098 and edi,ebx 2099 xor ebx,ecx 2100 shrd eax,eax,7 2101 mov esi,ebp 2102 vpslld xmm6,xmm6,2 2103 xor edi,ebx 2104 shld ebp,ebp,5 2105 add edx,edi 2106 vaesenc xmm12,xmm12,xmm15 2107 vmovups xmm14,XMMWORD[((-64))+r15] 2108 xor esi,eax 2109 xor eax,ebx 2110 add edx,ebp 2111 add ecx,DWORD[40+rsp] 2112 and esi,eax 2113 vpor xmm6,xmm6,xmm8 2114 xor eax,ebx 2115 shrd ebp,ebp,7 2116 mov edi,edx 2117 xor esi,eax 2118 shld edx,edx,5 2119 add ecx,esi 2120 xor edi,ebp 2121 xor ebp,eax 2122 add ecx,edx 2123 add ebx,DWORD[44+rsp] 2124 and edi,ebp 2125 xor ebp,eax 2126 shrd edx,edx,7 2127 vaesenc xmm12,xmm12,xmm14 2128 vmovups xmm15,XMMWORD[((-48))+r15] 2129 mov esi,ecx 2130 xor edi,ebp 2131 shld ecx,ecx,5 2132 add ebx,edi 2133 xor esi,edx 2134 xor edx,ebp 2135 add ebx,ecx 2136 vpalignr xmm8,xmm6,xmm5,8 2137 vpxor xmm7,xmm7,xmm3 2138 add eax,DWORD[48+rsp] 2139 and esi,edx 2140 xor edx,ebp 2141 shrd ecx,ecx,7 2142 vpxor xmm7,xmm7,xmm0 2143 mov edi,ebx 2144 xor esi,edx 2145 vpaddd xmm9,xmm10,xmm6 2146 vmovdqa xmm10,XMMWORD[48+r11] 2147 shld ebx,ebx,5 2148 add eax,esi 2149 vpxor xmm7,xmm7,xmm8 2150 xor edi,ecx 2151 xor ecx,edx 2152 add eax,ebx 2153 add ebp,DWORD[52+rsp] 2154 vaesenc xmm12,xmm12,xmm15 2155 vmovups xmm14,XMMWORD[((-32))+r15] 2156 vpsrld xmm8,xmm7,30 2157 vmovdqa XMMWORD[32+rsp],xmm9 2158 and edi,ecx 2159 xor ecx,edx 2160 shrd ebx,ebx,7 2161 mov esi,eax 2162 vpslld xmm7,xmm7,2 2163 xor edi,ecx 2164 shld eax,eax,5 2165 add ebp,edi 2166 xor esi,ebx 2167 xor ebx,ecx 2168 add ebp,eax 2169 add edx,DWORD[56+rsp] 2170 and esi,ebx 2171 vpor xmm7,xmm7,xmm8 2172 xor ebx,ecx 2173 shrd eax,eax,7 2174 mov edi,ebp 2175 xor esi,ebx 2176 shld ebp,ebp,5 2177 add edx,esi 2178 vaesenc xmm12,xmm12,xmm14 2179 vmovups xmm15,XMMWORD[((-16))+r15] 2180 xor edi,eax 2181 xor eax,ebx 2182 add edx,ebp 2183 add ecx,DWORD[60+rsp] 2184 and edi,eax 2185 xor eax,ebx 2186 shrd ebp,ebp,7 2187 mov esi,edx 2188 xor edi,eax 2189 shld edx,edx,5 2190 add ecx,edi 2191 xor esi,ebp 2192 xor ebp,eax 2193 add ecx,edx 2194 vpalignr xmm8,xmm7,xmm6,8 2195 vpxor xmm0,xmm0,xmm4 2196 add ebx,DWORD[rsp] 2197 and esi,ebp 2198 xor ebp,eax 2199 shrd edx,edx,7 2200 vaesenc xmm12,xmm12,xmm15 2201 vmovups xmm14,XMMWORD[r15] 2202 vpxor xmm0,xmm0,xmm1 2203 mov edi,ecx 2204 xor esi,ebp 2205 vpaddd xmm9,xmm10,xmm7 2206 shld ecx,ecx,5 2207 add ebx,esi 2208 vpxor xmm0,xmm0,xmm8 2209 xor edi,edx 2210 xor edx,ebp 2211 add ebx,ecx 2212 add eax,DWORD[4+rsp] 2213 vpsrld xmm8,xmm0,30 2214 vmovdqa XMMWORD[48+rsp],xmm9 2215 and edi,edx 2216 xor edx,ebp 2217 shrd ecx,ecx,7 2218 mov esi,ebx 2219 vpslld xmm0,xmm0,2 2220 xor edi,edx 2221 shld ebx,ebx,5 2222 add eax,edi 2223 xor esi,ecx 2224 xor ecx,edx 2225 add eax,ebx 2226 add ebp,DWORD[8+rsp] 2227 vaesenc xmm12,xmm12,xmm14 2228 vmovups xmm15,XMMWORD[16+r15] 2229 and esi,ecx 2230 vpor xmm0,xmm0,xmm8 2231 xor ecx,edx 2232 shrd ebx,ebx,7 2233 mov edi,eax 2234 xor esi,ecx 2235 shld eax,eax,5 2236 add ebp,esi 2237 xor edi,ebx 2238 xor ebx,ecx 2239 add ebp,eax 2240 add edx,DWORD[12+rsp] 2241 and edi,ebx 2242 xor ebx,ecx 2243 shrd eax,eax,7 2244 mov esi,ebp 2245 xor edi,ebx 2246 shld ebp,ebp,5 2247 add edx,edi 2248 vaesenc xmm12,xmm12,xmm15 2249 vmovups xmm14,XMMWORD[32+r15] 2250 xor esi,eax 2251 xor eax,ebx 2252 add edx,ebp 2253 vpalignr xmm8,xmm0,xmm7,8 2254 vpxor xmm1,xmm1,xmm5 2255 add ecx,DWORD[16+rsp] 2256 and esi,eax 2257 xor eax,ebx 2258 shrd ebp,ebp,7 2259 vpxor xmm1,xmm1,xmm2 2260 mov edi,edx 2261 xor esi,eax 2262 vpaddd xmm9,xmm10,xmm0 2263 shld edx,edx,5 2264 add ecx,esi 2265 vpxor xmm1,xmm1,xmm8 2266 xor edi,ebp 2267 xor ebp,eax 2268 add ecx,edx 2269 add ebx,DWORD[20+rsp] 2270 vpsrld xmm8,xmm1,30 2271 vmovdqa XMMWORD[rsp],xmm9 2272 and edi,ebp 2273 xor ebp,eax 2274 shrd edx,edx,7 2275 vaesenc xmm12,xmm12,xmm14 2276 vmovups xmm15,XMMWORD[48+r15] 2277 mov esi,ecx 2278 vpslld xmm1,xmm1,2 2279 xor edi,ebp 2280 shld ecx,ecx,5 2281 add ebx,edi 2282 xor esi,edx 2283 xor edx,ebp 2284 add ebx,ecx 2285 add eax,DWORD[24+rsp] 2286 and esi,edx 2287 vpor xmm1,xmm1,xmm8 2288 xor edx,ebp 2289 shrd ecx,ecx,7 2290 mov edi,ebx 2291 xor esi,edx 2292 shld ebx,ebx,5 2293 add eax,esi 2294 xor edi,ecx 2295 xor ecx,edx 2296 add eax,ebx 2297 add ebp,DWORD[28+rsp] 2298 cmp r8d,11 2299 jb NEAR $L$vaesenclast8 2300 vaesenc xmm12,xmm12,xmm15 2301 vmovups xmm14,XMMWORD[64+r15] 2302 vaesenc xmm12,xmm12,xmm14 2303 vmovups xmm15,XMMWORD[80+r15] 2304 je NEAR $L$vaesenclast8 2305 vaesenc xmm12,xmm12,xmm15 2306 vmovups xmm14,XMMWORD[96+r15] 2307 vaesenc xmm12,xmm12,xmm14 2308 vmovups xmm15,XMMWORD[112+r15] 2309 $L$vaesenclast8: 2310 vaesenclast xmm12,xmm12,xmm15 2311 vmovups xmm15,XMMWORD[((-112))+r15] 2312 vmovups xmm14,XMMWORD[((16-112))+r15] 2313 and edi,ecx 2314 xor ecx,edx 2315 shrd ebx,ebx,7 2316 mov esi,eax 2317 xor edi,ecx 2318 shld eax,eax,5 2319 add ebp,edi 2320 xor esi,ebx 2321 xor ebx,ecx 2322 add ebp,eax 2323 vpalignr xmm8,xmm1,xmm0,8 2324 vpxor xmm2,xmm2,xmm6 2325 add edx,DWORD[32+rsp] 2326 and esi,ebx 2327 xor ebx,ecx 2328 shrd eax,eax,7 2329 vpxor xmm2,xmm2,xmm3 2330 mov edi,ebp 2331 xor esi,ebx 2332 vpaddd xmm9,xmm10,xmm1 2333 shld ebp,ebp,5 2334 add edx,esi 2335 vmovdqu xmm13,XMMWORD[48+r12] 2336 vpxor xmm13,xmm13,xmm15 2337 vmovups XMMWORD[32+r12*1+r13],xmm12 2338 vpxor xmm12,xmm12,xmm13 2339 vaesenc xmm12,xmm12,xmm14 2340 vmovups xmm15,XMMWORD[((-80))+r15] 2341 vpxor xmm2,xmm2,xmm8 2342 xor edi,eax 2343 xor eax,ebx 2344 add edx,ebp 2345 add ecx,DWORD[36+rsp] 2346 vpsrld xmm8,xmm2,30 2347 vmovdqa XMMWORD[16+rsp],xmm9 2348 and edi,eax 2349 xor eax,ebx 2350 shrd ebp,ebp,7 2351 mov esi,edx 2352 vpslld xmm2,xmm2,2 2353 xor edi,eax 2354 shld edx,edx,5 2355 add ecx,edi 2356 xor esi,ebp 2357 xor ebp,eax 2358 add ecx,edx 2359 add ebx,DWORD[40+rsp] 2360 and esi,ebp 2361 vpor xmm2,xmm2,xmm8 2362 xor ebp,eax 2363 shrd edx,edx,7 2364 vaesenc xmm12,xmm12,xmm15 2365 vmovups xmm14,XMMWORD[((-64))+r15] 2366 mov edi,ecx 2367 xor esi,ebp 2368 shld ecx,ecx,5 2369 add ebx,esi 2370 xor edi,edx 2371 xor edx,ebp 2372 add ebx,ecx 2373 add eax,DWORD[44+rsp] 2374 and edi,edx 2375 xor edx,ebp 2376 shrd ecx,ecx,7 2377 mov esi,ebx 2378 xor edi,edx 2379 shld ebx,ebx,5 2380 add eax,edi 2381 xor esi,edx 2382 add eax,ebx 2383 vpalignr xmm8,xmm2,xmm1,8 2384 vpxor xmm3,xmm3,xmm7 2385 add ebp,DWORD[48+rsp] 2386 vaesenc xmm12,xmm12,xmm14 2387 vmovups xmm15,XMMWORD[((-48))+r15] 2388 xor esi,ecx 2389 mov edi,eax 2390 shld eax,eax,5 2391 vpxor xmm3,xmm3,xmm4 2392 add ebp,esi 2393 xor edi,ecx 2394 vpaddd xmm9,xmm10,xmm2 2395 shrd ebx,ebx,7 2396 add ebp,eax 2397 vpxor xmm3,xmm3,xmm8 2398 add edx,DWORD[52+rsp] 2399 xor edi,ebx 2400 mov esi,ebp 2401 shld ebp,ebp,5 2402 vpsrld xmm8,xmm3,30 2403 vmovdqa XMMWORD[32+rsp],xmm9 2404 add edx,edi 2405 xor esi,ebx 2406 shrd eax,eax,7 2407 add edx,ebp 2408 vpslld xmm3,xmm3,2 2409 add ecx,DWORD[56+rsp] 2410 xor esi,eax 2411 mov edi,edx 2412 shld edx,edx,5 2413 add ecx,esi 2414 vaesenc xmm12,xmm12,xmm15 2415 vmovups xmm14,XMMWORD[((-32))+r15] 2416 xor edi,eax 2417 shrd ebp,ebp,7 2418 add ecx,edx 2419 vpor xmm3,xmm3,xmm8 2420 add ebx,DWORD[60+rsp] 2421 xor edi,ebp 2422 mov esi,ecx 2423 shld ecx,ecx,5 2424 add ebx,edi 2425 xor esi,ebp 2426 shrd edx,edx,7 2427 add ebx,ecx 2428 add eax,DWORD[rsp] 2429 vpaddd xmm9,xmm10,xmm3 2430 xor esi,edx 2431 mov edi,ebx 2432 shld ebx,ebx,5 2433 add eax,esi 2434 vmovdqa XMMWORD[48+rsp],xmm9 2435 xor edi,edx 2436 shrd ecx,ecx,7 2437 add eax,ebx 2438 add ebp,DWORD[4+rsp] 2439 vaesenc xmm12,xmm12,xmm14 2440 vmovups xmm15,XMMWORD[((-16))+r15] 2441 xor edi,ecx 2442 mov esi,eax 2443 shld eax,eax,5 2444 add ebp,edi 2445 xor esi,ecx 2446 shrd ebx,ebx,7 2447 add ebp,eax 2448 add edx,DWORD[8+rsp] 2449 xor esi,ebx 2450 mov edi,ebp 2451 shld ebp,ebp,5 2452 add edx,esi 2453 xor edi,ebx 2454 shrd eax,eax,7 2455 add edx,ebp 2456 add ecx,DWORD[12+rsp] 2457 xor edi,eax 2458 mov esi,edx 2459 shld edx,edx,5 2460 add ecx,edi 2461 vaesenc xmm12,xmm12,xmm15 2462 vmovups xmm14,XMMWORD[r15] 2463 xor esi,eax 2464 shrd ebp,ebp,7 2465 add ecx,edx 2466 cmp r10,r14 2467 je NEAR $L$done_avx 2468 vmovdqa xmm9,XMMWORD[64+r11] 2469 vmovdqa xmm10,XMMWORD[r11] 2470 vmovdqu xmm0,XMMWORD[r10] 2471 vmovdqu xmm1,XMMWORD[16+r10] 2472 vmovdqu xmm2,XMMWORD[32+r10] 2473 vmovdqu xmm3,XMMWORD[48+r10] 2474 vpshufb xmm0,xmm0,xmm9 2475 add r10,64 2476 add ebx,DWORD[16+rsp] 2477 xor esi,ebp 2478 vpshufb xmm1,xmm1,xmm9 2479 mov edi,ecx 2480 shld ecx,ecx,5 2481 vpaddd xmm8,xmm0,xmm10 2482 add ebx,esi 2483 xor edi,ebp 2484 shrd edx,edx,7 2485 add ebx,ecx 2486 vmovdqa XMMWORD[rsp],xmm8 2487 add eax,DWORD[20+rsp] 2488 xor edi,edx 2489 mov esi,ebx 2490 shld ebx,ebx,5 2491 add eax,edi 2492 xor esi,edx 2493 shrd ecx,ecx,7 2494 add eax,ebx 2495 add ebp,DWORD[24+rsp] 2496 vaesenc xmm12,xmm12,xmm14 2497 vmovups xmm15,XMMWORD[16+r15] 2498 xor esi,ecx 2499 mov edi,eax 2500 shld eax,eax,5 2501 add ebp,esi 2502 xor edi,ecx 2503 shrd ebx,ebx,7 2504 add ebp,eax 2505 add edx,DWORD[28+rsp] 2506 xor edi,ebx 2507 mov esi,ebp 2508 shld ebp,ebp,5 2509 add edx,edi 2510 xor esi,ebx 2511 shrd eax,eax,7 2512 add edx,ebp 2513 add ecx,DWORD[32+rsp] 2514 xor esi,eax 2515 vpshufb xmm2,xmm2,xmm9 2516 mov edi,edx 2517 shld edx,edx,5 2518 vpaddd xmm8,xmm1,xmm10 2519 add ecx,esi 2520 vaesenc xmm12,xmm12,xmm15 2521 vmovups xmm14,XMMWORD[32+r15] 2522 xor edi,eax 2523 shrd ebp,ebp,7 2524 add ecx,edx 2525 vmovdqa XMMWORD[16+rsp],xmm8 2526 add ebx,DWORD[36+rsp] 2527 xor edi,ebp 2528 mov esi,ecx 2529 shld ecx,ecx,5 2530 add ebx,edi 2531 xor esi,ebp 2532 shrd edx,edx,7 2533 add ebx,ecx 2534 add eax,DWORD[40+rsp] 2535 xor esi,edx 2536 mov edi,ebx 2537 shld ebx,ebx,5 2538 add eax,esi 2539 xor edi,edx 2540 shrd ecx,ecx,7 2541 add eax,ebx 2542 add ebp,DWORD[44+rsp] 2543 vaesenc xmm12,xmm12,xmm14 2544 vmovups xmm15,XMMWORD[48+r15] 2545 xor edi,ecx 2546 mov esi,eax 2547 shld eax,eax,5 2548 add ebp,edi 2549 xor esi,ecx 2550 shrd ebx,ebx,7 2551 add ebp,eax 2552 add edx,DWORD[48+rsp] 2553 xor esi,ebx 2554 vpshufb xmm3,xmm3,xmm9 2555 mov edi,ebp 2556 shld ebp,ebp,5 2557 vpaddd xmm8,xmm2,xmm10 2558 add edx,esi 2559 xor edi,ebx 2560 shrd eax,eax,7 2561 add edx,ebp 2562 vmovdqa XMMWORD[32+rsp],xmm8 2563 add ecx,DWORD[52+rsp] 2564 xor edi,eax 2565 mov esi,edx 2566 shld edx,edx,5 2567 add ecx,edi 2568 cmp r8d,11 2569 jb NEAR $L$vaesenclast9 2570 vaesenc xmm12,xmm12,xmm15 2571 vmovups xmm14,XMMWORD[64+r15] 2572 vaesenc xmm12,xmm12,xmm14 2573 vmovups xmm15,XMMWORD[80+r15] 2574 je NEAR $L$vaesenclast9 2575 vaesenc xmm12,xmm12,xmm15 2576 vmovups xmm14,XMMWORD[96+r15] 2577 vaesenc xmm12,xmm12,xmm14 2578 vmovups xmm15,XMMWORD[112+r15] 2579 $L$vaesenclast9: 2580 vaesenclast xmm12,xmm12,xmm15 2581 vmovups xmm15,XMMWORD[((-112))+r15] 2582 vmovups xmm14,XMMWORD[((16-112))+r15] 2583 xor esi,eax 2584 shrd ebp,ebp,7 2585 add ecx,edx 2586 add ebx,DWORD[56+rsp] 2587 xor esi,ebp 2588 mov edi,ecx 2589 shld ecx,ecx,5 2590 add ebx,esi 2591 xor edi,ebp 2592 shrd edx,edx,7 2593 add ebx,ecx 2594 add eax,DWORD[60+rsp] 2595 xor edi,edx 2596 mov esi,ebx 2597 shld ebx,ebx,5 2598 add eax,edi 2599 shrd ecx,ecx,7 2600 add eax,ebx 2601 vmovups XMMWORD[48+r12*1+r13],xmm12 2602 lea r12,[64+r12] 2603 2604 add eax,DWORD[r9] 2605 add esi,DWORD[4+r9] 2606 add ecx,DWORD[8+r9] 2607 add edx,DWORD[12+r9] 2608 mov DWORD[r9],eax 2609 add ebp,DWORD[16+r9] 2610 mov DWORD[4+r9],esi 2611 mov ebx,esi 2612 mov DWORD[8+r9],ecx 2613 mov edi,ecx 2614 mov DWORD[12+r9],edx 2615 xor edi,edx 2616 mov DWORD[16+r9],ebp 2617 and esi,edi 2618 jmp NEAR $L$oop_avx 2619 2620 $L$done_avx: 2621 add ebx,DWORD[16+rsp] 2622 xor esi,ebp 2623 mov edi,ecx 2624 shld ecx,ecx,5 2625 add ebx,esi 2626 xor edi,ebp 2627 shrd edx,edx,7 2628 add ebx,ecx 2629 add eax,DWORD[20+rsp] 2630 xor edi,edx 2631 mov esi,ebx 2632 shld ebx,ebx,5 2633 add eax,edi 2634 xor esi,edx 2635 shrd ecx,ecx,7 2636 add eax,ebx 2637 add ebp,DWORD[24+rsp] 2638 vaesenc xmm12,xmm12,xmm14 2639 vmovups xmm15,XMMWORD[16+r15] 2640 xor esi,ecx 2641 mov edi,eax 2642 shld eax,eax,5 2643 add ebp,esi 2644 xor edi,ecx 2645 shrd ebx,ebx,7 2646 add ebp,eax 2647 add edx,DWORD[28+rsp] 2648 xor edi,ebx 2649 mov esi,ebp 2650 shld ebp,ebp,5 2651 add edx,edi 2652 xor esi,ebx 2653 shrd eax,eax,7 2654 add edx,ebp 2655 add ecx,DWORD[32+rsp] 2656 xor esi,eax 2657 mov edi,edx 2658 shld edx,edx,5 2659 add ecx,esi 2660 vaesenc xmm12,xmm12,xmm15 2661 vmovups xmm14,XMMWORD[32+r15] 2662 xor edi,eax 2663 shrd ebp,ebp,7 2664 add ecx,edx 2665 add ebx,DWORD[36+rsp] 2666 xor edi,ebp 2667 mov esi,ecx 2668 shld ecx,ecx,5 2669 add ebx,edi 2670 xor esi,ebp 2671 shrd edx,edx,7 2672 add ebx,ecx 2673 add eax,DWORD[40+rsp] 2674 xor esi,edx 2675 mov edi,ebx 2676 shld ebx,ebx,5 2677 add eax,esi 2678 xor edi,edx 2679 shrd ecx,ecx,7 2680 add eax,ebx 2681 add ebp,DWORD[44+rsp] 2682 vaesenc xmm12,xmm12,xmm14 2683 vmovups xmm15,XMMWORD[48+r15] 2684 xor edi,ecx 2685 mov esi,eax 2686 shld eax,eax,5 2687 add ebp,edi 2688 xor esi,ecx 2689 shrd ebx,ebx,7 2690 add ebp,eax 2691 add edx,DWORD[48+rsp] 2692 xor esi,ebx 2693 mov edi,ebp 2694 shld ebp,ebp,5 2695 add edx,esi 2696 xor edi,ebx 2697 shrd eax,eax,7 2698 add edx,ebp 2699 add ecx,DWORD[52+rsp] 2700 xor edi,eax 2701 mov esi,edx 2702 shld edx,edx,5 2703 add ecx,edi 2704 cmp r8d,11 2705 jb NEAR $L$vaesenclast10 2706 vaesenc xmm12,xmm12,xmm15 2707 vmovups xmm14,XMMWORD[64+r15] 2708 vaesenc xmm12,xmm12,xmm14 2709 vmovups xmm15,XMMWORD[80+r15] 2710 je NEAR $L$vaesenclast10 2711 vaesenc xmm12,xmm12,xmm15 2712 vmovups xmm14,XMMWORD[96+r15] 2713 vaesenc xmm12,xmm12,xmm14 2714 vmovups xmm15,XMMWORD[112+r15] 2715 $L$vaesenclast10: 2716 vaesenclast xmm12,xmm12,xmm15 2717 vmovups xmm15,XMMWORD[((-112))+r15] 2718 vmovups xmm14,XMMWORD[((16-112))+r15] 2719 xor esi,eax 2720 shrd ebp,ebp,7 2721 add ecx,edx 2722 add ebx,DWORD[56+rsp] 2723 xor esi,ebp 2724 mov edi,ecx 2725 shld ecx,ecx,5 2726 add ebx,esi 2727 xor edi,ebp 2728 shrd edx,edx,7 2729 add ebx,ecx 2730 add eax,DWORD[60+rsp] 2731 xor edi,edx 2732 mov esi,ebx 2733 shld ebx,ebx,5 2734 add eax,edi 2735 shrd ecx,ecx,7 2736 add eax,ebx 2737 vmovups XMMWORD[48+r12*1+r13],xmm12 2738 mov r8,QWORD[88+rsp] 2739 2740 add eax,DWORD[r9] 2741 add esi,DWORD[4+r9] 2742 add ecx,DWORD[8+r9] 2743 mov DWORD[r9],eax 2744 add edx,DWORD[12+r9] 2745 mov DWORD[4+r9],esi 2746 add ebp,DWORD[16+r9] 2747 mov DWORD[8+r9],ecx 2748 mov DWORD[12+r9],edx 2749 mov DWORD[16+r9],ebp 2750 vmovups XMMWORD[r8],xmm12 2751 vzeroall 2752 movaps xmm6,XMMWORD[((96+0))+rsp] 2753 movaps xmm7,XMMWORD[((96+16))+rsp] 2754 movaps xmm8,XMMWORD[((96+32))+rsp] 2755 movaps xmm9,XMMWORD[((96+48))+rsp] 2756 movaps xmm10,XMMWORD[((96+64))+rsp] 2757 movaps xmm11,XMMWORD[((96+80))+rsp] 2758 movaps xmm12,XMMWORD[((96+96))+rsp] 2759 movaps xmm13,XMMWORD[((96+112))+rsp] 2760 movaps xmm14,XMMWORD[((96+128))+rsp] 2761 movaps xmm15,XMMWORD[((96+144))+rsp] 2762 lea rsi,[264+rsp] 2763 2764 mov r15,QWORD[rsi] 2765 2766 mov r14,QWORD[8+rsi] 2767 2768 mov r13,QWORD[16+rsi] 2769 2770 mov r12,QWORD[24+rsi] 2771 2772 mov rbp,QWORD[32+rsi] 2773 2774 mov rbx,QWORD[40+rsi] 2775 2776 lea rsp,[48+rsi] 2777 2778 $L$epilogue_avx: 2779 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2780 mov rsi,QWORD[16+rsp] 2781 DB 0F3h,0C3h ;repret 2782 2783 $L$SEH_end_aesni_cbc_sha1_enc_avx: 1429 2784 ALIGN 64 1430 2785 K_XX_XX: … … 1546 2901 DB 15,56,201,243 1547 2902 cmp r11d,11 1548 jb NEAR $L$aesenclast 62903 jb NEAR $L$aesenclast11 1549 2904 movups xmm0,XMMWORD[64+rcx] 1550 2905 DB 102,15,56,220,209 1551 2906 movups xmm1,XMMWORD[80+rcx] 1552 2907 DB 102,15,56,220,208 1553 je NEAR $L$aesenclast 62908 je NEAR $L$aesenclast11 1554 2909 movups xmm0,XMMWORD[96+rcx] 1555 2910 DB 102,15,56,220,209 1556 2911 movups xmm1,XMMWORD[112+rcx] 1557 2912 DB 102,15,56,220,208 1558 $L$aesenclast 6:2913 $L$aesenclast11: 1559 2914 DB 102,15,56,221,209 1560 2915 movups xmm0,XMMWORD[((16-112))+rcx] … … 1612 2967 DB 15,56,201,220 1613 2968 cmp r11d,11 1614 jb NEAR $L$aesenclast 72969 jb NEAR $L$aesenclast12 1615 2970 movups xmm0,XMMWORD[64+rcx] 1616 2971 DB 102,15,56,220,209 1617 2972 movups xmm1,XMMWORD[80+rcx] 1618 2973 DB 102,15,56,220,208 1619 je NEAR $L$aesenclast 72974 je NEAR $L$aesenclast12 1620 2975 movups xmm0,XMMWORD[96+rcx] 1621 2976 DB 102,15,56,220,209 1622 2977 movups xmm1,XMMWORD[112+rcx] 1623 2978 DB 102,15,56,220,208 1624 $L$aesenclast 7:2979 $L$aesenclast12: 1625 2980 DB 102,15,56,221,209 1626 2981 movups xmm0,XMMWORD[((16-112))+rcx] … … 1678 3033 DB 15,56,201,229 1679 3034 cmp r11d,11 1680 jb NEAR $L$aesenclast 83035 jb NEAR $L$aesenclast13 1681 3036 movups xmm0,XMMWORD[64+rcx] 1682 3037 DB 102,15,56,220,209 1683 3038 movups xmm1,XMMWORD[80+rcx] 1684 3039 DB 102,15,56,220,208 1685 je NEAR $L$aesenclast 83040 je NEAR $L$aesenclast13 1686 3041 movups xmm0,XMMWORD[96+rcx] 1687 3042 DB 102,15,56,220,209 1688 3043 movups xmm1,XMMWORD[112+rcx] 1689 3044 DB 102,15,56,220,208 1690 $L$aesenclast 8:3045 $L$aesenclast13: 1691 3046 DB 102,15,56,221,209 1692 3047 movups xmm0,XMMWORD[((16-112))+rcx] … … 1742 3097 DB 102,15,56,220,208 1743 3098 cmp r11d,11 1744 jb NEAR $L$aesenclast 93099 jb NEAR $L$aesenclast14 1745 3100 movups xmm0,XMMWORD[64+rcx] 1746 3101 DB 102,15,56,220,209 1747 3102 movups xmm1,XMMWORD[80+rcx] 1748 3103 DB 102,15,56,220,208 1749 je NEAR $L$aesenclast 93104 je NEAR $L$aesenclast14 1750 3105 movups xmm0,XMMWORD[96+rcx] 1751 3106 DB 102,15,56,220,209 1752 3107 movups xmm1,XMMWORD[112+rcx] 1753 3108 DB 102,15,56,220,208 1754 $L$aesenclast 9:3109 $L$aesenclast14: 1755 3110 DB 102,15,56,221,209 1756 3111 movups xmm0,XMMWORD[((16-112))+rcx] … … 1892 3247 DD $L$SEH_end_aesni_cbc_sha1_enc_ssse3 wrt ..imagebase 1893 3248 DD $L$SEH_info_aesni_cbc_sha1_enc_ssse3 wrt ..imagebase 3249 DD $L$SEH_begin_aesni_cbc_sha1_enc_avx wrt ..imagebase 3250 DD $L$SEH_end_aesni_cbc_sha1_enc_avx wrt ..imagebase 3251 DD $L$SEH_info_aesni_cbc_sha1_enc_avx wrt ..imagebase 1894 3252 DD $L$SEH_begin_aesni_cbc_sha1_enc_shaext wrt ..imagebase 1895 3253 DD $L$SEH_end_aesni_cbc_sha1_enc_shaext wrt ..imagebase … … 1901 3259 DD ssse3_handler wrt ..imagebase 1902 3260 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase 3261 $L$SEH_info_aesni_cbc_sha1_enc_avx: 3262 DB 9,0,0,0 3263 DD ssse3_handler wrt ..imagebase 3264 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase 1903 3265 $L$SEH_info_aesni_cbc_sha1_enc_shaext: 1904 3266 DB 9,0,0,0 -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/aesni-sha256-x86_64.S
r97373 r99371 12 12 aesni_cbc_sha256_enc: 13 13 14 lea r11,[OPENSSL_ia32cap_P] 15 mov eax,1 16 cmp rcx,0 17 je NEAR $L$probe 18 mov eax,DWORD[r11] 19 mov r10,QWORD[4+r11] 20 bt r10,61 21 jc NEAR aesni_cbc_sha256_enc_shaext 22 mov r11,r10 23 shr r11,32 24 25 test r10d,2048 26 jnz NEAR aesni_cbc_sha256_enc_xop 27 and r11d,296 28 cmp r11d,296 29 je NEAR aesni_cbc_sha256_enc_avx2 30 and r10d,268435456 31 jnz NEAR aesni_cbc_sha256_enc_avx 32 ud2 14 33 xor eax,eax 15 34 cmp rcx,0 … … 67 86 DB 46,111,114,103,62,0 68 87 ALIGN 64 88 89 ALIGN 64 90 aesni_cbc_sha256_enc_xop: 91 mov QWORD[8+rsp],rdi ;WIN64 prologue 92 mov QWORD[16+rsp],rsi 93 mov rax,rsp 94 $L$SEH_begin_aesni_cbc_sha256_enc_xop: 95 mov rdi,rcx 96 mov rsi,rdx 97 mov rdx,r8 98 mov rcx,r9 99 mov r8,QWORD[40+rsp] 100 mov r9,QWORD[48+rsp] 101 102 103 104 $L$xop_shortcut: 105 mov r10,QWORD[56+rsp] 106 mov rax,rsp 107 108 push rbx 109 110 push rbp 111 112 push r12 113 114 push r13 115 116 push r14 117 118 push r15 119 120 sub rsp,288 121 and rsp,-64 122 123 shl rdx,6 124 sub rsi,rdi 125 sub r10,rdi 126 add rdx,rdi 127 128 129 mov QWORD[((64+8))+rsp],rsi 130 mov QWORD[((64+16))+rsp],rdx 131 132 mov QWORD[((64+32))+rsp],r8 133 mov QWORD[((64+40))+rsp],r9 134 mov QWORD[((64+48))+rsp],r10 135 mov QWORD[120+rsp],rax 136 137 movaps XMMWORD[128+rsp],xmm6 138 movaps XMMWORD[144+rsp],xmm7 139 movaps XMMWORD[160+rsp],xmm8 140 movaps XMMWORD[176+rsp],xmm9 141 movaps XMMWORD[192+rsp],xmm10 142 movaps XMMWORD[208+rsp],xmm11 143 movaps XMMWORD[224+rsp],xmm12 144 movaps XMMWORD[240+rsp],xmm13 145 movaps XMMWORD[256+rsp],xmm14 146 movaps XMMWORD[272+rsp],xmm15 147 $L$prologue_xop: 148 vzeroall 149 150 mov r12,rdi 151 lea rdi,[128+rcx] 152 lea r13,[((K256+544))] 153 mov r14d,DWORD[((240-128))+rdi] 154 mov r15,r9 155 mov rsi,r10 156 vmovdqu xmm8,XMMWORD[r8] 157 sub r14,9 158 159 mov eax,DWORD[r15] 160 mov ebx,DWORD[4+r15] 161 mov ecx,DWORD[8+r15] 162 mov edx,DWORD[12+r15] 163 mov r8d,DWORD[16+r15] 164 mov r9d,DWORD[20+r15] 165 mov r10d,DWORD[24+r15] 166 mov r11d,DWORD[28+r15] 167 168 vmovdqa xmm14,XMMWORD[r14*8+r13] 169 vmovdqa xmm13,XMMWORD[16+r14*8+r13] 170 vmovdqa xmm12,XMMWORD[32+r14*8+r13] 171 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 172 jmp NEAR $L$loop_xop 173 ALIGN 16 174 $L$loop_xop: 175 vmovdqa xmm7,XMMWORD[((K256+512))] 176 vmovdqu xmm0,XMMWORD[r12*1+rsi] 177 vmovdqu xmm1,XMMWORD[16+r12*1+rsi] 178 vmovdqu xmm2,XMMWORD[32+r12*1+rsi] 179 vmovdqu xmm3,XMMWORD[48+r12*1+rsi] 180 vpshufb xmm0,xmm0,xmm7 181 lea rbp,[K256] 182 vpshufb xmm1,xmm1,xmm7 183 vpshufb xmm2,xmm2,xmm7 184 vpaddd xmm4,xmm0,XMMWORD[rbp] 185 vpshufb xmm3,xmm3,xmm7 186 vpaddd xmm5,xmm1,XMMWORD[32+rbp] 187 vpaddd xmm6,xmm2,XMMWORD[64+rbp] 188 vpaddd xmm7,xmm3,XMMWORD[96+rbp] 189 vmovdqa XMMWORD[rsp],xmm4 190 mov r14d,eax 191 vmovdqa XMMWORD[16+rsp],xmm5 192 mov esi,ebx 193 vmovdqa XMMWORD[32+rsp],xmm6 194 xor esi,ecx 195 vmovdqa XMMWORD[48+rsp],xmm7 196 mov r13d,r8d 197 jmp NEAR $L$xop_00_47 198 199 ALIGN 16 200 $L$xop_00_47: 201 sub rbp,-16*2*4 202 vmovdqu xmm9,XMMWORD[r12] 203 mov QWORD[((64+0))+rsp],r12 204 vpalignr xmm4,xmm1,xmm0,4 205 ror r13d,14 206 mov eax,r14d 207 vpalignr xmm7,xmm3,xmm2,4 208 mov r12d,r9d 209 xor r13d,r8d 210 DB 143,232,120,194,236,14 211 ror r14d,9 212 xor r12d,r10d 213 vpsrld xmm4,xmm4,3 214 ror r13d,5 215 xor r14d,eax 216 vpaddd xmm0,xmm0,xmm7 217 and r12d,r8d 218 vpxor xmm9,xmm9,xmm10 219 vmovdqu xmm10,XMMWORD[((16-128))+rdi] 220 xor r13d,r8d 221 add r11d,DWORD[rsp] 222 mov r15d,eax 223 DB 143,232,120,194,245,11 224 ror r14d,11 225 xor r12d,r10d 226 vpxor xmm4,xmm4,xmm5 227 xor r15d,ebx 228 ror r13d,6 229 add r11d,r12d 230 and esi,r15d 231 DB 143,232,120,194,251,13 232 xor r14d,eax 233 add r11d,r13d 234 vpxor xmm4,xmm4,xmm6 235 xor esi,ebx 236 add edx,r11d 237 vpsrld xmm6,xmm3,10 238 ror r14d,2 239 add r11d,esi 240 vpaddd xmm0,xmm0,xmm4 241 mov r13d,edx 242 add r14d,r11d 243 DB 143,232,120,194,239,2 244 ror r13d,14 245 mov r11d,r14d 246 vpxor xmm7,xmm7,xmm6 247 mov r12d,r8d 248 xor r13d,edx 249 ror r14d,9 250 xor r12d,r9d 251 vpxor xmm7,xmm7,xmm5 252 ror r13d,5 253 xor r14d,r11d 254 and r12d,edx 255 vpxor xmm9,xmm9,xmm8 256 xor r13d,edx 257 vpsrldq xmm7,xmm7,8 258 add r10d,DWORD[4+rsp] 259 mov esi,r11d 260 ror r14d,11 261 xor r12d,r9d 262 vpaddd xmm0,xmm0,xmm7 263 xor esi,eax 264 ror r13d,6 265 add r10d,r12d 266 and r15d,esi 267 DB 143,232,120,194,248,13 268 xor r14d,r11d 269 add r10d,r13d 270 vpsrld xmm6,xmm0,10 271 xor r15d,eax 272 add ecx,r10d 273 DB 143,232,120,194,239,2 274 ror r14d,2 275 add r10d,r15d 276 vpxor xmm7,xmm7,xmm6 277 mov r13d,ecx 278 add r14d,r10d 279 ror r13d,14 280 mov r10d,r14d 281 vpxor xmm7,xmm7,xmm5 282 mov r12d,edx 283 xor r13d,ecx 284 ror r14d,9 285 xor r12d,r8d 286 vpslldq xmm7,xmm7,8 287 ror r13d,5 288 xor r14d,r10d 289 and r12d,ecx 290 vaesenc xmm9,xmm9,xmm10 291 vmovdqu xmm10,XMMWORD[((32-128))+rdi] 292 xor r13d,ecx 293 vpaddd xmm0,xmm0,xmm7 294 add r9d,DWORD[8+rsp] 295 mov r15d,r10d 296 ror r14d,11 297 xor r12d,r8d 298 vpaddd xmm6,xmm0,XMMWORD[rbp] 299 xor r15d,r11d 300 ror r13d,6 301 add r9d,r12d 302 and esi,r15d 303 xor r14d,r10d 304 add r9d,r13d 305 xor esi,r11d 306 add ebx,r9d 307 ror r14d,2 308 add r9d,esi 309 mov r13d,ebx 310 add r14d,r9d 311 ror r13d,14 312 mov r9d,r14d 313 mov r12d,ecx 314 xor r13d,ebx 315 ror r14d,9 316 xor r12d,edx 317 ror r13d,5 318 xor r14d,r9d 319 and r12d,ebx 320 vaesenc xmm9,xmm9,xmm10 321 vmovdqu xmm10,XMMWORD[((48-128))+rdi] 322 xor r13d,ebx 323 add r8d,DWORD[12+rsp] 324 mov esi,r9d 325 ror r14d,11 326 xor r12d,edx 327 xor esi,r10d 328 ror r13d,6 329 add r8d,r12d 330 and r15d,esi 331 xor r14d,r9d 332 add r8d,r13d 333 xor r15d,r10d 334 add eax,r8d 335 ror r14d,2 336 add r8d,r15d 337 mov r13d,eax 338 add r14d,r8d 339 vmovdqa XMMWORD[rsp],xmm6 340 vpalignr xmm4,xmm2,xmm1,4 341 ror r13d,14 342 mov r8d,r14d 343 vpalignr xmm7,xmm0,xmm3,4 344 mov r12d,ebx 345 xor r13d,eax 346 DB 143,232,120,194,236,14 347 ror r14d,9 348 xor r12d,ecx 349 vpsrld xmm4,xmm4,3 350 ror r13d,5 351 xor r14d,r8d 352 vpaddd xmm1,xmm1,xmm7 353 and r12d,eax 354 vaesenc xmm9,xmm9,xmm10 355 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 356 xor r13d,eax 357 add edx,DWORD[16+rsp] 358 mov r15d,r8d 359 DB 143,232,120,194,245,11 360 ror r14d,11 361 xor r12d,ecx 362 vpxor xmm4,xmm4,xmm5 363 xor r15d,r9d 364 ror r13d,6 365 add edx,r12d 366 and esi,r15d 367 DB 143,232,120,194,248,13 368 xor r14d,r8d 369 add edx,r13d 370 vpxor xmm4,xmm4,xmm6 371 xor esi,r9d 372 add r11d,edx 373 vpsrld xmm6,xmm0,10 374 ror r14d,2 375 add edx,esi 376 vpaddd xmm1,xmm1,xmm4 377 mov r13d,r11d 378 add r14d,edx 379 DB 143,232,120,194,239,2 380 ror r13d,14 381 mov edx,r14d 382 vpxor xmm7,xmm7,xmm6 383 mov r12d,eax 384 xor r13d,r11d 385 ror r14d,9 386 xor r12d,ebx 387 vpxor xmm7,xmm7,xmm5 388 ror r13d,5 389 xor r14d,edx 390 and r12d,r11d 391 vaesenc xmm9,xmm9,xmm10 392 vmovdqu xmm10,XMMWORD[((80-128))+rdi] 393 xor r13d,r11d 394 vpsrldq xmm7,xmm7,8 395 add ecx,DWORD[20+rsp] 396 mov esi,edx 397 ror r14d,11 398 xor r12d,ebx 399 vpaddd xmm1,xmm1,xmm7 400 xor esi,r8d 401 ror r13d,6 402 add ecx,r12d 403 and r15d,esi 404 DB 143,232,120,194,249,13 405 xor r14d,edx 406 add ecx,r13d 407 vpsrld xmm6,xmm1,10 408 xor r15d,r8d 409 add r10d,ecx 410 DB 143,232,120,194,239,2 411 ror r14d,2 412 add ecx,r15d 413 vpxor xmm7,xmm7,xmm6 414 mov r13d,r10d 415 add r14d,ecx 416 ror r13d,14 417 mov ecx,r14d 418 vpxor xmm7,xmm7,xmm5 419 mov r12d,r11d 420 xor r13d,r10d 421 ror r14d,9 422 xor r12d,eax 423 vpslldq xmm7,xmm7,8 424 ror r13d,5 425 xor r14d,ecx 426 and r12d,r10d 427 vaesenc xmm9,xmm9,xmm10 428 vmovdqu xmm10,XMMWORD[((96-128))+rdi] 429 xor r13d,r10d 430 vpaddd xmm1,xmm1,xmm7 431 add ebx,DWORD[24+rsp] 432 mov r15d,ecx 433 ror r14d,11 434 xor r12d,eax 435 vpaddd xmm6,xmm1,XMMWORD[32+rbp] 436 xor r15d,edx 437 ror r13d,6 438 add ebx,r12d 439 and esi,r15d 440 xor r14d,ecx 441 add ebx,r13d 442 xor esi,edx 443 add r9d,ebx 444 ror r14d,2 445 add ebx,esi 446 mov r13d,r9d 447 add r14d,ebx 448 ror r13d,14 449 mov ebx,r14d 450 mov r12d,r10d 451 xor r13d,r9d 452 ror r14d,9 453 xor r12d,r11d 454 ror r13d,5 455 xor r14d,ebx 456 and r12d,r9d 457 vaesenc xmm9,xmm9,xmm10 458 vmovdqu xmm10,XMMWORD[((112-128))+rdi] 459 xor r13d,r9d 460 add eax,DWORD[28+rsp] 461 mov esi,ebx 462 ror r14d,11 463 xor r12d,r11d 464 xor esi,ecx 465 ror r13d,6 466 add eax,r12d 467 and r15d,esi 468 xor r14d,ebx 469 add eax,r13d 470 xor r15d,ecx 471 add r8d,eax 472 ror r14d,2 473 add eax,r15d 474 mov r13d,r8d 475 add r14d,eax 476 vmovdqa XMMWORD[16+rsp],xmm6 477 vpalignr xmm4,xmm3,xmm2,4 478 ror r13d,14 479 mov eax,r14d 480 vpalignr xmm7,xmm1,xmm0,4 481 mov r12d,r9d 482 xor r13d,r8d 483 DB 143,232,120,194,236,14 484 ror r14d,9 485 xor r12d,r10d 486 vpsrld xmm4,xmm4,3 487 ror r13d,5 488 xor r14d,eax 489 vpaddd xmm2,xmm2,xmm7 490 and r12d,r8d 491 vaesenc xmm9,xmm9,xmm10 492 vmovdqu xmm10,XMMWORD[((128-128))+rdi] 493 xor r13d,r8d 494 add r11d,DWORD[32+rsp] 495 mov r15d,eax 496 DB 143,232,120,194,245,11 497 ror r14d,11 498 xor r12d,r10d 499 vpxor xmm4,xmm4,xmm5 500 xor r15d,ebx 501 ror r13d,6 502 add r11d,r12d 503 and esi,r15d 504 DB 143,232,120,194,249,13 505 xor r14d,eax 506 add r11d,r13d 507 vpxor xmm4,xmm4,xmm6 508 xor esi,ebx 509 add edx,r11d 510 vpsrld xmm6,xmm1,10 511 ror r14d,2 512 add r11d,esi 513 vpaddd xmm2,xmm2,xmm4 514 mov r13d,edx 515 add r14d,r11d 516 DB 143,232,120,194,239,2 517 ror r13d,14 518 mov r11d,r14d 519 vpxor xmm7,xmm7,xmm6 520 mov r12d,r8d 521 xor r13d,edx 522 ror r14d,9 523 xor r12d,r9d 524 vpxor xmm7,xmm7,xmm5 525 ror r13d,5 526 xor r14d,r11d 527 and r12d,edx 528 vaesenc xmm9,xmm9,xmm10 529 vmovdqu xmm10,XMMWORD[((144-128))+rdi] 530 xor r13d,edx 531 vpsrldq xmm7,xmm7,8 532 add r10d,DWORD[36+rsp] 533 mov esi,r11d 534 ror r14d,11 535 xor r12d,r9d 536 vpaddd xmm2,xmm2,xmm7 537 xor esi,eax 538 ror r13d,6 539 add r10d,r12d 540 and r15d,esi 541 DB 143,232,120,194,250,13 542 xor r14d,r11d 543 add r10d,r13d 544 vpsrld xmm6,xmm2,10 545 xor r15d,eax 546 add ecx,r10d 547 DB 143,232,120,194,239,2 548 ror r14d,2 549 add r10d,r15d 550 vpxor xmm7,xmm7,xmm6 551 mov r13d,ecx 552 add r14d,r10d 553 ror r13d,14 554 mov r10d,r14d 555 vpxor xmm7,xmm7,xmm5 556 mov r12d,edx 557 xor r13d,ecx 558 ror r14d,9 559 xor r12d,r8d 560 vpslldq xmm7,xmm7,8 561 ror r13d,5 562 xor r14d,r10d 563 and r12d,ecx 564 vaesenc xmm9,xmm9,xmm10 565 vmovdqu xmm10,XMMWORD[((160-128))+rdi] 566 xor r13d,ecx 567 vpaddd xmm2,xmm2,xmm7 568 add r9d,DWORD[40+rsp] 569 mov r15d,r10d 570 ror r14d,11 571 xor r12d,r8d 572 vpaddd xmm6,xmm2,XMMWORD[64+rbp] 573 xor r15d,r11d 574 ror r13d,6 575 add r9d,r12d 576 and esi,r15d 577 xor r14d,r10d 578 add r9d,r13d 579 xor esi,r11d 580 add ebx,r9d 581 ror r14d,2 582 add r9d,esi 583 mov r13d,ebx 584 add r14d,r9d 585 ror r13d,14 586 mov r9d,r14d 587 mov r12d,ecx 588 xor r13d,ebx 589 ror r14d,9 590 xor r12d,edx 591 ror r13d,5 592 xor r14d,r9d 593 and r12d,ebx 594 vaesenclast xmm11,xmm9,xmm10 595 vaesenc xmm9,xmm9,xmm10 596 vmovdqu xmm10,XMMWORD[((176-128))+rdi] 597 xor r13d,ebx 598 add r8d,DWORD[44+rsp] 599 mov esi,r9d 600 ror r14d,11 601 xor r12d,edx 602 xor esi,r10d 603 ror r13d,6 604 add r8d,r12d 605 and r15d,esi 606 xor r14d,r9d 607 add r8d,r13d 608 xor r15d,r10d 609 add eax,r8d 610 ror r14d,2 611 add r8d,r15d 612 mov r13d,eax 613 add r14d,r8d 614 vmovdqa XMMWORD[32+rsp],xmm6 615 vpalignr xmm4,xmm0,xmm3,4 616 ror r13d,14 617 mov r8d,r14d 618 vpalignr xmm7,xmm2,xmm1,4 619 mov r12d,ebx 620 xor r13d,eax 621 DB 143,232,120,194,236,14 622 ror r14d,9 623 xor r12d,ecx 624 vpsrld xmm4,xmm4,3 625 ror r13d,5 626 xor r14d,r8d 627 vpaddd xmm3,xmm3,xmm7 628 and r12d,eax 629 vpand xmm8,xmm11,xmm12 630 vaesenc xmm9,xmm9,xmm10 631 vmovdqu xmm10,XMMWORD[((192-128))+rdi] 632 xor r13d,eax 633 add edx,DWORD[48+rsp] 634 mov r15d,r8d 635 DB 143,232,120,194,245,11 636 ror r14d,11 637 xor r12d,ecx 638 vpxor xmm4,xmm4,xmm5 639 xor r15d,r9d 640 ror r13d,6 641 add edx,r12d 642 and esi,r15d 643 DB 143,232,120,194,250,13 644 xor r14d,r8d 645 add edx,r13d 646 vpxor xmm4,xmm4,xmm6 647 xor esi,r9d 648 add r11d,edx 649 vpsrld xmm6,xmm2,10 650 ror r14d,2 651 add edx,esi 652 vpaddd xmm3,xmm3,xmm4 653 mov r13d,r11d 654 add r14d,edx 655 DB 143,232,120,194,239,2 656 ror r13d,14 657 mov edx,r14d 658 vpxor xmm7,xmm7,xmm6 659 mov r12d,eax 660 xor r13d,r11d 661 ror r14d,9 662 xor r12d,ebx 663 vpxor xmm7,xmm7,xmm5 664 ror r13d,5 665 xor r14d,edx 666 and r12d,r11d 667 vaesenclast xmm11,xmm9,xmm10 668 vaesenc xmm9,xmm9,xmm10 669 vmovdqu xmm10,XMMWORD[((208-128))+rdi] 670 xor r13d,r11d 671 vpsrldq xmm7,xmm7,8 672 add ecx,DWORD[52+rsp] 673 mov esi,edx 674 ror r14d,11 675 xor r12d,ebx 676 vpaddd xmm3,xmm3,xmm7 677 xor esi,r8d 678 ror r13d,6 679 add ecx,r12d 680 and r15d,esi 681 DB 143,232,120,194,251,13 682 xor r14d,edx 683 add ecx,r13d 684 vpsrld xmm6,xmm3,10 685 xor r15d,r8d 686 add r10d,ecx 687 DB 143,232,120,194,239,2 688 ror r14d,2 689 add ecx,r15d 690 vpxor xmm7,xmm7,xmm6 691 mov r13d,r10d 692 add r14d,ecx 693 ror r13d,14 694 mov ecx,r14d 695 vpxor xmm7,xmm7,xmm5 696 mov r12d,r11d 697 xor r13d,r10d 698 ror r14d,9 699 xor r12d,eax 700 vpslldq xmm7,xmm7,8 701 ror r13d,5 702 xor r14d,ecx 703 and r12d,r10d 704 vpand xmm11,xmm11,xmm13 705 vaesenc xmm9,xmm9,xmm10 706 vmovdqu xmm10,XMMWORD[((224-128))+rdi] 707 xor r13d,r10d 708 vpaddd xmm3,xmm3,xmm7 709 add ebx,DWORD[56+rsp] 710 mov r15d,ecx 711 ror r14d,11 712 xor r12d,eax 713 vpaddd xmm6,xmm3,XMMWORD[96+rbp] 714 xor r15d,edx 715 ror r13d,6 716 add ebx,r12d 717 and esi,r15d 718 xor r14d,ecx 719 add ebx,r13d 720 xor esi,edx 721 add r9d,ebx 722 ror r14d,2 723 add ebx,esi 724 mov r13d,r9d 725 add r14d,ebx 726 ror r13d,14 727 mov ebx,r14d 728 mov r12d,r10d 729 xor r13d,r9d 730 ror r14d,9 731 xor r12d,r11d 732 ror r13d,5 733 xor r14d,ebx 734 and r12d,r9d 735 vpor xmm8,xmm8,xmm11 736 vaesenclast xmm11,xmm9,xmm10 737 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 738 xor r13d,r9d 739 add eax,DWORD[60+rsp] 740 mov esi,ebx 741 ror r14d,11 742 xor r12d,r11d 743 xor esi,ecx 744 ror r13d,6 745 add eax,r12d 746 and r15d,esi 747 xor r14d,ebx 748 add eax,r13d 749 xor r15d,ecx 750 add r8d,eax 751 ror r14d,2 752 add eax,r15d 753 mov r13d,r8d 754 add r14d,eax 755 vmovdqa XMMWORD[48+rsp],xmm6 756 mov r12,QWORD[((64+0))+rsp] 757 vpand xmm11,xmm11,xmm14 758 mov r15,QWORD[((64+8))+rsp] 759 vpor xmm8,xmm8,xmm11 760 vmovdqu XMMWORD[r12*1+r15],xmm8 761 lea r12,[16+r12] 762 cmp BYTE[131+rbp],0 763 jne NEAR $L$xop_00_47 764 vmovdqu xmm9,XMMWORD[r12] 765 mov QWORD[((64+0))+rsp],r12 766 ror r13d,14 767 mov eax,r14d 768 mov r12d,r9d 769 xor r13d,r8d 770 ror r14d,9 771 xor r12d,r10d 772 ror r13d,5 773 xor r14d,eax 774 and r12d,r8d 775 vpxor xmm9,xmm9,xmm10 776 vmovdqu xmm10,XMMWORD[((16-128))+rdi] 777 xor r13d,r8d 778 add r11d,DWORD[rsp] 779 mov r15d,eax 780 ror r14d,11 781 xor r12d,r10d 782 xor r15d,ebx 783 ror r13d,6 784 add r11d,r12d 785 and esi,r15d 786 xor r14d,eax 787 add r11d,r13d 788 xor esi,ebx 789 add edx,r11d 790 ror r14d,2 791 add r11d,esi 792 mov r13d,edx 793 add r14d,r11d 794 ror r13d,14 795 mov r11d,r14d 796 mov r12d,r8d 797 xor r13d,edx 798 ror r14d,9 799 xor r12d,r9d 800 ror r13d,5 801 xor r14d,r11d 802 and r12d,edx 803 vpxor xmm9,xmm9,xmm8 804 xor r13d,edx 805 add r10d,DWORD[4+rsp] 806 mov esi,r11d 807 ror r14d,11 808 xor r12d,r9d 809 xor esi,eax 810 ror r13d,6 811 add r10d,r12d 812 and r15d,esi 813 xor r14d,r11d 814 add r10d,r13d 815 xor r15d,eax 816 add ecx,r10d 817 ror r14d,2 818 add r10d,r15d 819 mov r13d,ecx 820 add r14d,r10d 821 ror r13d,14 822 mov r10d,r14d 823 mov r12d,edx 824 xor r13d,ecx 825 ror r14d,9 826 xor r12d,r8d 827 ror r13d,5 828 xor r14d,r10d 829 and r12d,ecx 830 vaesenc xmm9,xmm9,xmm10 831 vmovdqu xmm10,XMMWORD[((32-128))+rdi] 832 xor r13d,ecx 833 add r9d,DWORD[8+rsp] 834 mov r15d,r10d 835 ror r14d,11 836 xor r12d,r8d 837 xor r15d,r11d 838 ror r13d,6 839 add r9d,r12d 840 and esi,r15d 841 xor r14d,r10d 842 add r9d,r13d 843 xor esi,r11d 844 add ebx,r9d 845 ror r14d,2 846 add r9d,esi 847 mov r13d,ebx 848 add r14d,r9d 849 ror r13d,14 850 mov r9d,r14d 851 mov r12d,ecx 852 xor r13d,ebx 853 ror r14d,9 854 xor r12d,edx 855 ror r13d,5 856 xor r14d,r9d 857 and r12d,ebx 858 vaesenc xmm9,xmm9,xmm10 859 vmovdqu xmm10,XMMWORD[((48-128))+rdi] 860 xor r13d,ebx 861 add r8d,DWORD[12+rsp] 862 mov esi,r9d 863 ror r14d,11 864 xor r12d,edx 865 xor esi,r10d 866 ror r13d,6 867 add r8d,r12d 868 and r15d,esi 869 xor r14d,r9d 870 add r8d,r13d 871 xor r15d,r10d 872 add eax,r8d 873 ror r14d,2 874 add r8d,r15d 875 mov r13d,eax 876 add r14d,r8d 877 ror r13d,14 878 mov r8d,r14d 879 mov r12d,ebx 880 xor r13d,eax 881 ror r14d,9 882 xor r12d,ecx 883 ror r13d,5 884 xor r14d,r8d 885 and r12d,eax 886 vaesenc xmm9,xmm9,xmm10 887 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 888 xor r13d,eax 889 add edx,DWORD[16+rsp] 890 mov r15d,r8d 891 ror r14d,11 892 xor r12d,ecx 893 xor r15d,r9d 894 ror r13d,6 895 add edx,r12d 896 and esi,r15d 897 xor r14d,r8d 898 add edx,r13d 899 xor esi,r9d 900 add r11d,edx 901 ror r14d,2 902 add edx,esi 903 mov r13d,r11d 904 add r14d,edx 905 ror r13d,14 906 mov edx,r14d 907 mov r12d,eax 908 xor r13d,r11d 909 ror r14d,9 910 xor r12d,ebx 911 ror r13d,5 912 xor r14d,edx 913 and r12d,r11d 914 vaesenc xmm9,xmm9,xmm10 915 vmovdqu xmm10,XMMWORD[((80-128))+rdi] 916 xor r13d,r11d 917 add ecx,DWORD[20+rsp] 918 mov esi,edx 919 ror r14d,11 920 xor r12d,ebx 921 xor esi,r8d 922 ror r13d,6 923 add ecx,r12d 924 and r15d,esi 925 xor r14d,edx 926 add ecx,r13d 927 xor r15d,r8d 928 add r10d,ecx 929 ror r14d,2 930 add ecx,r15d 931 mov r13d,r10d 932 add r14d,ecx 933 ror r13d,14 934 mov ecx,r14d 935 mov r12d,r11d 936 xor r13d,r10d 937 ror r14d,9 938 xor r12d,eax 939 ror r13d,5 940 xor r14d,ecx 941 and r12d,r10d 942 vaesenc xmm9,xmm9,xmm10 943 vmovdqu xmm10,XMMWORD[((96-128))+rdi] 944 xor r13d,r10d 945 add ebx,DWORD[24+rsp] 946 mov r15d,ecx 947 ror r14d,11 948 xor r12d,eax 949 xor r15d,edx 950 ror r13d,6 951 add ebx,r12d 952 and esi,r15d 953 xor r14d,ecx 954 add ebx,r13d 955 xor esi,edx 956 add r9d,ebx 957 ror r14d,2 958 add ebx,esi 959 mov r13d,r9d 960 add r14d,ebx 961 ror r13d,14 962 mov ebx,r14d 963 mov r12d,r10d 964 xor r13d,r9d 965 ror r14d,9 966 xor r12d,r11d 967 ror r13d,5 968 xor r14d,ebx 969 and r12d,r9d 970 vaesenc xmm9,xmm9,xmm10 971 vmovdqu xmm10,XMMWORD[((112-128))+rdi] 972 xor r13d,r9d 973 add eax,DWORD[28+rsp] 974 mov esi,ebx 975 ror r14d,11 976 xor r12d,r11d 977 xor esi,ecx 978 ror r13d,6 979 add eax,r12d 980 and r15d,esi 981 xor r14d,ebx 982 add eax,r13d 983 xor r15d,ecx 984 add r8d,eax 985 ror r14d,2 986 add eax,r15d 987 mov r13d,r8d 988 add r14d,eax 989 ror r13d,14 990 mov eax,r14d 991 mov r12d,r9d 992 xor r13d,r8d 993 ror r14d,9 994 xor r12d,r10d 995 ror r13d,5 996 xor r14d,eax 997 and r12d,r8d 998 vaesenc xmm9,xmm9,xmm10 999 vmovdqu xmm10,XMMWORD[((128-128))+rdi] 1000 xor r13d,r8d 1001 add r11d,DWORD[32+rsp] 1002 mov r15d,eax 1003 ror r14d,11 1004 xor r12d,r10d 1005 xor r15d,ebx 1006 ror r13d,6 1007 add r11d,r12d 1008 and esi,r15d 1009 xor r14d,eax 1010 add r11d,r13d 1011 xor esi,ebx 1012 add edx,r11d 1013 ror r14d,2 1014 add r11d,esi 1015 mov r13d,edx 1016 add r14d,r11d 1017 ror r13d,14 1018 mov r11d,r14d 1019 mov r12d,r8d 1020 xor r13d,edx 1021 ror r14d,9 1022 xor r12d,r9d 1023 ror r13d,5 1024 xor r14d,r11d 1025 and r12d,edx 1026 vaesenc xmm9,xmm9,xmm10 1027 vmovdqu xmm10,XMMWORD[((144-128))+rdi] 1028 xor r13d,edx 1029 add r10d,DWORD[36+rsp] 1030 mov esi,r11d 1031 ror r14d,11 1032 xor r12d,r9d 1033 xor esi,eax 1034 ror r13d,6 1035 add r10d,r12d 1036 and r15d,esi 1037 xor r14d,r11d 1038 add r10d,r13d 1039 xor r15d,eax 1040 add ecx,r10d 1041 ror r14d,2 1042 add r10d,r15d 1043 mov r13d,ecx 1044 add r14d,r10d 1045 ror r13d,14 1046 mov r10d,r14d 1047 mov r12d,edx 1048 xor r13d,ecx 1049 ror r14d,9 1050 xor r12d,r8d 1051 ror r13d,5 1052 xor r14d,r10d 1053 and r12d,ecx 1054 vaesenc xmm9,xmm9,xmm10 1055 vmovdqu xmm10,XMMWORD[((160-128))+rdi] 1056 xor r13d,ecx 1057 add r9d,DWORD[40+rsp] 1058 mov r15d,r10d 1059 ror r14d,11 1060 xor r12d,r8d 1061 xor r15d,r11d 1062 ror r13d,6 1063 add r9d,r12d 1064 and esi,r15d 1065 xor r14d,r10d 1066 add r9d,r13d 1067 xor esi,r11d 1068 add ebx,r9d 1069 ror r14d,2 1070 add r9d,esi 1071 mov r13d,ebx 1072 add r14d,r9d 1073 ror r13d,14 1074 mov r9d,r14d 1075 mov r12d,ecx 1076 xor r13d,ebx 1077 ror r14d,9 1078 xor r12d,edx 1079 ror r13d,5 1080 xor r14d,r9d 1081 and r12d,ebx 1082 vaesenclast xmm11,xmm9,xmm10 1083 vaesenc xmm9,xmm9,xmm10 1084 vmovdqu xmm10,XMMWORD[((176-128))+rdi] 1085 xor r13d,ebx 1086 add r8d,DWORD[44+rsp] 1087 mov esi,r9d 1088 ror r14d,11 1089 xor r12d,edx 1090 xor esi,r10d 1091 ror r13d,6 1092 add r8d,r12d 1093 and r15d,esi 1094 xor r14d,r9d 1095 add r8d,r13d 1096 xor r15d,r10d 1097 add eax,r8d 1098 ror r14d,2 1099 add r8d,r15d 1100 mov r13d,eax 1101 add r14d,r8d 1102 ror r13d,14 1103 mov r8d,r14d 1104 mov r12d,ebx 1105 xor r13d,eax 1106 ror r14d,9 1107 xor r12d,ecx 1108 ror r13d,5 1109 xor r14d,r8d 1110 and r12d,eax 1111 vpand xmm8,xmm11,xmm12 1112 vaesenc xmm9,xmm9,xmm10 1113 vmovdqu xmm10,XMMWORD[((192-128))+rdi] 1114 xor r13d,eax 1115 add edx,DWORD[48+rsp] 1116 mov r15d,r8d 1117 ror r14d,11 1118 xor r12d,ecx 1119 xor r15d,r9d 1120 ror r13d,6 1121 add edx,r12d 1122 and esi,r15d 1123 xor r14d,r8d 1124 add edx,r13d 1125 xor esi,r9d 1126 add r11d,edx 1127 ror r14d,2 1128 add edx,esi 1129 mov r13d,r11d 1130 add r14d,edx 1131 ror r13d,14 1132 mov edx,r14d 1133 mov r12d,eax 1134 xor r13d,r11d 1135 ror r14d,9 1136 xor r12d,ebx 1137 ror r13d,5 1138 xor r14d,edx 1139 and r12d,r11d 1140 vaesenclast xmm11,xmm9,xmm10 1141 vaesenc xmm9,xmm9,xmm10 1142 vmovdqu xmm10,XMMWORD[((208-128))+rdi] 1143 xor r13d,r11d 1144 add ecx,DWORD[52+rsp] 1145 mov esi,edx 1146 ror r14d,11 1147 xor r12d,ebx 1148 xor esi,r8d 1149 ror r13d,6 1150 add ecx,r12d 1151 and r15d,esi 1152 xor r14d,edx 1153 add ecx,r13d 1154 xor r15d,r8d 1155 add r10d,ecx 1156 ror r14d,2 1157 add ecx,r15d 1158 mov r13d,r10d 1159 add r14d,ecx 1160 ror r13d,14 1161 mov ecx,r14d 1162 mov r12d,r11d 1163 xor r13d,r10d 1164 ror r14d,9 1165 xor r12d,eax 1166 ror r13d,5 1167 xor r14d,ecx 1168 and r12d,r10d 1169 vpand xmm11,xmm11,xmm13 1170 vaesenc xmm9,xmm9,xmm10 1171 vmovdqu xmm10,XMMWORD[((224-128))+rdi] 1172 xor r13d,r10d 1173 add ebx,DWORD[56+rsp] 1174 mov r15d,ecx 1175 ror r14d,11 1176 xor r12d,eax 1177 xor r15d,edx 1178 ror r13d,6 1179 add ebx,r12d 1180 and esi,r15d 1181 xor r14d,ecx 1182 add ebx,r13d 1183 xor esi,edx 1184 add r9d,ebx 1185 ror r14d,2 1186 add ebx,esi 1187 mov r13d,r9d 1188 add r14d,ebx 1189 ror r13d,14 1190 mov ebx,r14d 1191 mov r12d,r10d 1192 xor r13d,r9d 1193 ror r14d,9 1194 xor r12d,r11d 1195 ror r13d,5 1196 xor r14d,ebx 1197 and r12d,r9d 1198 vpor xmm8,xmm8,xmm11 1199 vaesenclast xmm11,xmm9,xmm10 1200 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 1201 xor r13d,r9d 1202 add eax,DWORD[60+rsp] 1203 mov esi,ebx 1204 ror r14d,11 1205 xor r12d,r11d 1206 xor esi,ecx 1207 ror r13d,6 1208 add eax,r12d 1209 and r15d,esi 1210 xor r14d,ebx 1211 add eax,r13d 1212 xor r15d,ecx 1213 add r8d,eax 1214 ror r14d,2 1215 add eax,r15d 1216 mov r13d,r8d 1217 add r14d,eax 1218 mov r12,QWORD[((64+0))+rsp] 1219 mov r13,QWORD[((64+8))+rsp] 1220 mov r15,QWORD[((64+40))+rsp] 1221 mov rsi,QWORD[((64+48))+rsp] 1222 1223 vpand xmm11,xmm11,xmm14 1224 mov eax,r14d 1225 vpor xmm8,xmm8,xmm11 1226 vmovdqu XMMWORD[r13*1+r12],xmm8 1227 lea r12,[16+r12] 1228 1229 add eax,DWORD[r15] 1230 add ebx,DWORD[4+r15] 1231 add ecx,DWORD[8+r15] 1232 add edx,DWORD[12+r15] 1233 add r8d,DWORD[16+r15] 1234 add r9d,DWORD[20+r15] 1235 add r10d,DWORD[24+r15] 1236 add r11d,DWORD[28+r15] 1237 1238 cmp r12,QWORD[((64+16))+rsp] 1239 1240 mov DWORD[r15],eax 1241 mov DWORD[4+r15],ebx 1242 mov DWORD[8+r15],ecx 1243 mov DWORD[12+r15],edx 1244 mov DWORD[16+r15],r8d 1245 mov DWORD[20+r15],r9d 1246 mov DWORD[24+r15],r10d 1247 mov DWORD[28+r15],r11d 1248 1249 jb NEAR $L$loop_xop 1250 1251 mov r8,QWORD[((64+32))+rsp] 1252 mov rsi,QWORD[120+rsp] 1253 1254 vmovdqu XMMWORD[r8],xmm8 1255 vzeroall 1256 movaps xmm6,XMMWORD[128+rsp] 1257 movaps xmm7,XMMWORD[144+rsp] 1258 movaps xmm8,XMMWORD[160+rsp] 1259 movaps xmm9,XMMWORD[176+rsp] 1260 movaps xmm10,XMMWORD[192+rsp] 1261 movaps xmm11,XMMWORD[208+rsp] 1262 movaps xmm12,XMMWORD[224+rsp] 1263 movaps xmm13,XMMWORD[240+rsp] 1264 movaps xmm14,XMMWORD[256+rsp] 1265 movaps xmm15,XMMWORD[272+rsp] 1266 mov r15,QWORD[((-48))+rsi] 1267 1268 mov r14,QWORD[((-40))+rsi] 1269 1270 mov r13,QWORD[((-32))+rsi] 1271 1272 mov r12,QWORD[((-24))+rsi] 1273 1274 mov rbp,QWORD[((-16))+rsi] 1275 1276 mov rbx,QWORD[((-8))+rsi] 1277 1278 lea rsp,[rsi] 1279 1280 $L$epilogue_xop: 1281 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1282 mov rsi,QWORD[16+rsp] 1283 DB 0F3h,0C3h ;repret 1284 1285 $L$SEH_end_aesni_cbc_sha256_enc_xop: 1286 1287 ALIGN 64 1288 aesni_cbc_sha256_enc_avx: 1289 mov QWORD[8+rsp],rdi ;WIN64 prologue 1290 mov QWORD[16+rsp],rsi 1291 mov rax,rsp 1292 $L$SEH_begin_aesni_cbc_sha256_enc_avx: 1293 mov rdi,rcx 1294 mov rsi,rdx 1295 mov rdx,r8 1296 mov rcx,r9 1297 mov r8,QWORD[40+rsp] 1298 mov r9,QWORD[48+rsp] 1299 1300 1301 1302 $L$avx_shortcut: 1303 mov r10,QWORD[56+rsp] 1304 mov rax,rsp 1305 1306 push rbx 1307 1308 push rbp 1309 1310 push r12 1311 1312 push r13 1313 1314 push r14 1315 1316 push r15 1317 1318 sub rsp,288 1319 and rsp,-64 1320 1321 shl rdx,6 1322 sub rsi,rdi 1323 sub r10,rdi 1324 add rdx,rdi 1325 1326 1327 mov QWORD[((64+8))+rsp],rsi 1328 mov QWORD[((64+16))+rsp],rdx 1329 1330 mov QWORD[((64+32))+rsp],r8 1331 mov QWORD[((64+40))+rsp],r9 1332 mov QWORD[((64+48))+rsp],r10 1333 mov QWORD[120+rsp],rax 1334 1335 movaps XMMWORD[128+rsp],xmm6 1336 movaps XMMWORD[144+rsp],xmm7 1337 movaps XMMWORD[160+rsp],xmm8 1338 movaps XMMWORD[176+rsp],xmm9 1339 movaps XMMWORD[192+rsp],xmm10 1340 movaps XMMWORD[208+rsp],xmm11 1341 movaps XMMWORD[224+rsp],xmm12 1342 movaps XMMWORD[240+rsp],xmm13 1343 movaps XMMWORD[256+rsp],xmm14 1344 movaps XMMWORD[272+rsp],xmm15 1345 $L$prologue_avx: 1346 vzeroall 1347 1348 mov r12,rdi 1349 lea rdi,[128+rcx] 1350 lea r13,[((K256+544))] 1351 mov r14d,DWORD[((240-128))+rdi] 1352 mov r15,r9 1353 mov rsi,r10 1354 vmovdqu xmm8,XMMWORD[r8] 1355 sub r14,9 1356 1357 mov eax,DWORD[r15] 1358 mov ebx,DWORD[4+r15] 1359 mov ecx,DWORD[8+r15] 1360 mov edx,DWORD[12+r15] 1361 mov r8d,DWORD[16+r15] 1362 mov r9d,DWORD[20+r15] 1363 mov r10d,DWORD[24+r15] 1364 mov r11d,DWORD[28+r15] 1365 1366 vmovdqa xmm14,XMMWORD[r14*8+r13] 1367 vmovdqa xmm13,XMMWORD[16+r14*8+r13] 1368 vmovdqa xmm12,XMMWORD[32+r14*8+r13] 1369 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 1370 jmp NEAR $L$loop_avx 1371 ALIGN 16 1372 $L$loop_avx: 1373 vmovdqa xmm7,XMMWORD[((K256+512))] 1374 vmovdqu xmm0,XMMWORD[r12*1+rsi] 1375 vmovdqu xmm1,XMMWORD[16+r12*1+rsi] 1376 vmovdqu xmm2,XMMWORD[32+r12*1+rsi] 1377 vmovdqu xmm3,XMMWORD[48+r12*1+rsi] 1378 vpshufb xmm0,xmm0,xmm7 1379 lea rbp,[K256] 1380 vpshufb xmm1,xmm1,xmm7 1381 vpshufb xmm2,xmm2,xmm7 1382 vpaddd xmm4,xmm0,XMMWORD[rbp] 1383 vpshufb xmm3,xmm3,xmm7 1384 vpaddd xmm5,xmm1,XMMWORD[32+rbp] 1385 vpaddd xmm6,xmm2,XMMWORD[64+rbp] 1386 vpaddd xmm7,xmm3,XMMWORD[96+rbp] 1387 vmovdqa XMMWORD[rsp],xmm4 1388 mov r14d,eax 1389 vmovdqa XMMWORD[16+rsp],xmm5 1390 mov esi,ebx 1391 vmovdqa XMMWORD[32+rsp],xmm6 1392 xor esi,ecx 1393 vmovdqa XMMWORD[48+rsp],xmm7 1394 mov r13d,r8d 1395 jmp NEAR $L$avx_00_47 1396 1397 ALIGN 16 1398 $L$avx_00_47: 1399 sub rbp,-16*2*4 1400 vmovdqu xmm9,XMMWORD[r12] 1401 mov QWORD[((64+0))+rsp],r12 1402 vpalignr xmm4,xmm1,xmm0,4 1403 shrd r13d,r13d,14 1404 mov eax,r14d 1405 mov r12d,r9d 1406 vpalignr xmm7,xmm3,xmm2,4 1407 xor r13d,r8d 1408 shrd r14d,r14d,9 1409 xor r12d,r10d 1410 vpsrld xmm6,xmm4,7 1411 shrd r13d,r13d,5 1412 xor r14d,eax 1413 and r12d,r8d 1414 vpaddd xmm0,xmm0,xmm7 1415 vpxor xmm9,xmm9,xmm10 1416 vmovdqu xmm10,XMMWORD[((16-128))+rdi] 1417 xor r13d,r8d 1418 add r11d,DWORD[rsp] 1419 mov r15d,eax 1420 vpsrld xmm7,xmm4,3 1421 shrd r14d,r14d,11 1422 xor r12d,r10d 1423 xor r15d,ebx 1424 vpslld xmm5,xmm4,14 1425 shrd r13d,r13d,6 1426 add r11d,r12d 1427 and esi,r15d 1428 vpxor xmm4,xmm7,xmm6 1429 xor r14d,eax 1430 add r11d,r13d 1431 xor esi,ebx 1432 vpshufd xmm7,xmm3,250 1433 add edx,r11d 1434 shrd r14d,r14d,2 1435 add r11d,esi 1436 vpsrld xmm6,xmm6,11 1437 mov r13d,edx 1438 add r14d,r11d 1439 shrd r13d,r13d,14 1440 vpxor xmm4,xmm4,xmm5 1441 mov r11d,r14d 1442 mov r12d,r8d 1443 xor r13d,edx 1444 vpslld xmm5,xmm5,11 1445 shrd r14d,r14d,9 1446 xor r12d,r9d 1447 shrd r13d,r13d,5 1448 vpxor xmm4,xmm4,xmm6 1449 xor r14d,r11d 1450 and r12d,edx 1451 vpxor xmm9,xmm9,xmm8 1452 xor r13d,edx 1453 vpsrld xmm6,xmm7,10 1454 add r10d,DWORD[4+rsp] 1455 mov esi,r11d 1456 shrd r14d,r14d,11 1457 vpxor xmm4,xmm4,xmm5 1458 xor r12d,r9d 1459 xor esi,eax 1460 shrd r13d,r13d,6 1461 vpsrlq xmm7,xmm7,17 1462 add r10d,r12d 1463 and r15d,esi 1464 xor r14d,r11d 1465 vpaddd xmm0,xmm0,xmm4 1466 add r10d,r13d 1467 xor r15d,eax 1468 add ecx,r10d 1469 vpxor xmm6,xmm6,xmm7 1470 shrd r14d,r14d,2 1471 add r10d,r15d 1472 mov r13d,ecx 1473 vpsrlq xmm7,xmm7,2 1474 add r14d,r10d 1475 shrd r13d,r13d,14 1476 mov r10d,r14d 1477 vpxor xmm6,xmm6,xmm7 1478 mov r12d,edx 1479 xor r13d,ecx 1480 shrd r14d,r14d,9 1481 vpshufd xmm6,xmm6,132 1482 xor r12d,r8d 1483 shrd r13d,r13d,5 1484 xor r14d,r10d 1485 vpsrldq xmm6,xmm6,8 1486 and r12d,ecx 1487 vaesenc xmm9,xmm9,xmm10 1488 vmovdqu xmm10,XMMWORD[((32-128))+rdi] 1489 xor r13d,ecx 1490 add r9d,DWORD[8+rsp] 1491 vpaddd xmm0,xmm0,xmm6 1492 mov r15d,r10d 1493 shrd r14d,r14d,11 1494 xor r12d,r8d 1495 vpshufd xmm7,xmm0,80 1496 xor r15d,r11d 1497 shrd r13d,r13d,6 1498 add r9d,r12d 1499 vpsrld xmm6,xmm7,10 1500 and esi,r15d 1501 xor r14d,r10d 1502 add r9d,r13d 1503 vpsrlq xmm7,xmm7,17 1504 xor esi,r11d 1505 add ebx,r9d 1506 shrd r14d,r14d,2 1507 vpxor xmm6,xmm6,xmm7 1508 add r9d,esi 1509 mov r13d,ebx 1510 add r14d,r9d 1511 vpsrlq xmm7,xmm7,2 1512 shrd r13d,r13d,14 1513 mov r9d,r14d 1514 mov r12d,ecx 1515 vpxor xmm6,xmm6,xmm7 1516 xor r13d,ebx 1517 shrd r14d,r14d,9 1518 xor r12d,edx 1519 vpshufd xmm6,xmm6,232 1520 shrd r13d,r13d,5 1521 xor r14d,r9d 1522 and r12d,ebx 1523 vpslldq xmm6,xmm6,8 1524 vaesenc xmm9,xmm9,xmm10 1525 vmovdqu xmm10,XMMWORD[((48-128))+rdi] 1526 xor r13d,ebx 1527 add r8d,DWORD[12+rsp] 1528 mov esi,r9d 1529 vpaddd xmm0,xmm0,xmm6 1530 shrd r14d,r14d,11 1531 xor r12d,edx 1532 xor esi,r10d 1533 vpaddd xmm6,xmm0,XMMWORD[rbp] 1534 shrd r13d,r13d,6 1535 add r8d,r12d 1536 and r15d,esi 1537 xor r14d,r9d 1538 add r8d,r13d 1539 xor r15d,r10d 1540 add eax,r8d 1541 shrd r14d,r14d,2 1542 add r8d,r15d 1543 mov r13d,eax 1544 add r14d,r8d 1545 vmovdqa XMMWORD[rsp],xmm6 1546 vpalignr xmm4,xmm2,xmm1,4 1547 shrd r13d,r13d,14 1548 mov r8d,r14d 1549 mov r12d,ebx 1550 vpalignr xmm7,xmm0,xmm3,4 1551 xor r13d,eax 1552 shrd r14d,r14d,9 1553 xor r12d,ecx 1554 vpsrld xmm6,xmm4,7 1555 shrd r13d,r13d,5 1556 xor r14d,r8d 1557 and r12d,eax 1558 vpaddd xmm1,xmm1,xmm7 1559 vaesenc xmm9,xmm9,xmm10 1560 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 1561 xor r13d,eax 1562 add edx,DWORD[16+rsp] 1563 mov r15d,r8d 1564 vpsrld xmm7,xmm4,3 1565 shrd r14d,r14d,11 1566 xor r12d,ecx 1567 xor r15d,r9d 1568 vpslld xmm5,xmm4,14 1569 shrd r13d,r13d,6 1570 add edx,r12d 1571 and esi,r15d 1572 vpxor xmm4,xmm7,xmm6 1573 xor r14d,r8d 1574 add edx,r13d 1575 xor esi,r9d 1576 vpshufd xmm7,xmm0,250 1577 add r11d,edx 1578 shrd r14d,r14d,2 1579 add edx,esi 1580 vpsrld xmm6,xmm6,11 1581 mov r13d,r11d 1582 add r14d,edx 1583 shrd r13d,r13d,14 1584 vpxor xmm4,xmm4,xmm5 1585 mov edx,r14d 1586 mov r12d,eax 1587 xor r13d,r11d 1588 vpslld xmm5,xmm5,11 1589 shrd r14d,r14d,9 1590 xor r12d,ebx 1591 shrd r13d,r13d,5 1592 vpxor xmm4,xmm4,xmm6 1593 xor r14d,edx 1594 and r12d,r11d 1595 vaesenc xmm9,xmm9,xmm10 1596 vmovdqu xmm10,XMMWORD[((80-128))+rdi] 1597 xor r13d,r11d 1598 vpsrld xmm6,xmm7,10 1599 add ecx,DWORD[20+rsp] 1600 mov esi,edx 1601 shrd r14d,r14d,11 1602 vpxor xmm4,xmm4,xmm5 1603 xor r12d,ebx 1604 xor esi,r8d 1605 shrd r13d,r13d,6 1606 vpsrlq xmm7,xmm7,17 1607 add ecx,r12d 1608 and r15d,esi 1609 xor r14d,edx 1610 vpaddd xmm1,xmm1,xmm4 1611 add ecx,r13d 1612 xor r15d,r8d 1613 add r10d,ecx 1614 vpxor xmm6,xmm6,xmm7 1615 shrd r14d,r14d,2 1616 add ecx,r15d 1617 mov r13d,r10d 1618 vpsrlq xmm7,xmm7,2 1619 add r14d,ecx 1620 shrd r13d,r13d,14 1621 mov ecx,r14d 1622 vpxor xmm6,xmm6,xmm7 1623 mov r12d,r11d 1624 xor r13d,r10d 1625 shrd r14d,r14d,9 1626 vpshufd xmm6,xmm6,132 1627 xor r12d,eax 1628 shrd r13d,r13d,5 1629 xor r14d,ecx 1630 vpsrldq xmm6,xmm6,8 1631 and r12d,r10d 1632 vaesenc xmm9,xmm9,xmm10 1633 vmovdqu xmm10,XMMWORD[((96-128))+rdi] 1634 xor r13d,r10d 1635 add ebx,DWORD[24+rsp] 1636 vpaddd xmm1,xmm1,xmm6 1637 mov r15d,ecx 1638 shrd r14d,r14d,11 1639 xor r12d,eax 1640 vpshufd xmm7,xmm1,80 1641 xor r15d,edx 1642 shrd r13d,r13d,6 1643 add ebx,r12d 1644 vpsrld xmm6,xmm7,10 1645 and esi,r15d 1646 xor r14d,ecx 1647 add ebx,r13d 1648 vpsrlq xmm7,xmm7,17 1649 xor esi,edx 1650 add r9d,ebx 1651 shrd r14d,r14d,2 1652 vpxor xmm6,xmm6,xmm7 1653 add ebx,esi 1654 mov r13d,r9d 1655 add r14d,ebx 1656 vpsrlq xmm7,xmm7,2 1657 shrd r13d,r13d,14 1658 mov ebx,r14d 1659 mov r12d,r10d 1660 vpxor xmm6,xmm6,xmm7 1661 xor r13d,r9d 1662 shrd r14d,r14d,9 1663 xor r12d,r11d 1664 vpshufd xmm6,xmm6,232 1665 shrd r13d,r13d,5 1666 xor r14d,ebx 1667 and r12d,r9d 1668 vpslldq xmm6,xmm6,8 1669 vaesenc xmm9,xmm9,xmm10 1670 vmovdqu xmm10,XMMWORD[((112-128))+rdi] 1671 xor r13d,r9d 1672 add eax,DWORD[28+rsp] 1673 mov esi,ebx 1674 vpaddd xmm1,xmm1,xmm6 1675 shrd r14d,r14d,11 1676 xor r12d,r11d 1677 xor esi,ecx 1678 vpaddd xmm6,xmm1,XMMWORD[32+rbp] 1679 shrd r13d,r13d,6 1680 add eax,r12d 1681 and r15d,esi 1682 xor r14d,ebx 1683 add eax,r13d 1684 xor r15d,ecx 1685 add r8d,eax 1686 shrd r14d,r14d,2 1687 add eax,r15d 1688 mov r13d,r8d 1689 add r14d,eax 1690 vmovdqa XMMWORD[16+rsp],xmm6 1691 vpalignr xmm4,xmm3,xmm2,4 1692 shrd r13d,r13d,14 1693 mov eax,r14d 1694 mov r12d,r9d 1695 vpalignr xmm7,xmm1,xmm0,4 1696 xor r13d,r8d 1697 shrd r14d,r14d,9 1698 xor r12d,r10d 1699 vpsrld xmm6,xmm4,7 1700 shrd r13d,r13d,5 1701 xor r14d,eax 1702 and r12d,r8d 1703 vpaddd xmm2,xmm2,xmm7 1704 vaesenc xmm9,xmm9,xmm10 1705 vmovdqu xmm10,XMMWORD[((128-128))+rdi] 1706 xor r13d,r8d 1707 add r11d,DWORD[32+rsp] 1708 mov r15d,eax 1709 vpsrld xmm7,xmm4,3 1710 shrd r14d,r14d,11 1711 xor r12d,r10d 1712 xor r15d,ebx 1713 vpslld xmm5,xmm4,14 1714 shrd r13d,r13d,6 1715 add r11d,r12d 1716 and esi,r15d 1717 vpxor xmm4,xmm7,xmm6 1718 xor r14d,eax 1719 add r11d,r13d 1720 xor esi,ebx 1721 vpshufd xmm7,xmm1,250 1722 add edx,r11d 1723 shrd r14d,r14d,2 1724 add r11d,esi 1725 vpsrld xmm6,xmm6,11 1726 mov r13d,edx 1727 add r14d,r11d 1728 shrd r13d,r13d,14 1729 vpxor xmm4,xmm4,xmm5 1730 mov r11d,r14d 1731 mov r12d,r8d 1732 xor r13d,edx 1733 vpslld xmm5,xmm5,11 1734 shrd r14d,r14d,9 1735 xor r12d,r9d 1736 shrd r13d,r13d,5 1737 vpxor xmm4,xmm4,xmm6 1738 xor r14d,r11d 1739 and r12d,edx 1740 vaesenc xmm9,xmm9,xmm10 1741 vmovdqu xmm10,XMMWORD[((144-128))+rdi] 1742 xor r13d,edx 1743 vpsrld xmm6,xmm7,10 1744 add r10d,DWORD[36+rsp] 1745 mov esi,r11d 1746 shrd r14d,r14d,11 1747 vpxor xmm4,xmm4,xmm5 1748 xor r12d,r9d 1749 xor esi,eax 1750 shrd r13d,r13d,6 1751 vpsrlq xmm7,xmm7,17 1752 add r10d,r12d 1753 and r15d,esi 1754 xor r14d,r11d 1755 vpaddd xmm2,xmm2,xmm4 1756 add r10d,r13d 1757 xor r15d,eax 1758 add ecx,r10d 1759 vpxor xmm6,xmm6,xmm7 1760 shrd r14d,r14d,2 1761 add r10d,r15d 1762 mov r13d,ecx 1763 vpsrlq xmm7,xmm7,2 1764 add r14d,r10d 1765 shrd r13d,r13d,14 1766 mov r10d,r14d 1767 vpxor xmm6,xmm6,xmm7 1768 mov r12d,edx 1769 xor r13d,ecx 1770 shrd r14d,r14d,9 1771 vpshufd xmm6,xmm6,132 1772 xor r12d,r8d 1773 shrd r13d,r13d,5 1774 xor r14d,r10d 1775 vpsrldq xmm6,xmm6,8 1776 and r12d,ecx 1777 vaesenc xmm9,xmm9,xmm10 1778 vmovdqu xmm10,XMMWORD[((160-128))+rdi] 1779 xor r13d,ecx 1780 add r9d,DWORD[40+rsp] 1781 vpaddd xmm2,xmm2,xmm6 1782 mov r15d,r10d 1783 shrd r14d,r14d,11 1784 xor r12d,r8d 1785 vpshufd xmm7,xmm2,80 1786 xor r15d,r11d 1787 shrd r13d,r13d,6 1788 add r9d,r12d 1789 vpsrld xmm6,xmm7,10 1790 and esi,r15d 1791 xor r14d,r10d 1792 add r9d,r13d 1793 vpsrlq xmm7,xmm7,17 1794 xor esi,r11d 1795 add ebx,r9d 1796 shrd r14d,r14d,2 1797 vpxor xmm6,xmm6,xmm7 1798 add r9d,esi 1799 mov r13d,ebx 1800 add r14d,r9d 1801 vpsrlq xmm7,xmm7,2 1802 shrd r13d,r13d,14 1803 mov r9d,r14d 1804 mov r12d,ecx 1805 vpxor xmm6,xmm6,xmm7 1806 xor r13d,ebx 1807 shrd r14d,r14d,9 1808 xor r12d,edx 1809 vpshufd xmm6,xmm6,232 1810 shrd r13d,r13d,5 1811 xor r14d,r9d 1812 and r12d,ebx 1813 vpslldq xmm6,xmm6,8 1814 vaesenclast xmm11,xmm9,xmm10 1815 vaesenc xmm9,xmm9,xmm10 1816 vmovdqu xmm10,XMMWORD[((176-128))+rdi] 1817 xor r13d,ebx 1818 add r8d,DWORD[44+rsp] 1819 mov esi,r9d 1820 vpaddd xmm2,xmm2,xmm6 1821 shrd r14d,r14d,11 1822 xor r12d,edx 1823 xor esi,r10d 1824 vpaddd xmm6,xmm2,XMMWORD[64+rbp] 1825 shrd r13d,r13d,6 1826 add r8d,r12d 1827 and r15d,esi 1828 xor r14d,r9d 1829 add r8d,r13d 1830 xor r15d,r10d 1831 add eax,r8d 1832 shrd r14d,r14d,2 1833 add r8d,r15d 1834 mov r13d,eax 1835 add r14d,r8d 1836 vmovdqa XMMWORD[32+rsp],xmm6 1837 vpalignr xmm4,xmm0,xmm3,4 1838 shrd r13d,r13d,14 1839 mov r8d,r14d 1840 mov r12d,ebx 1841 vpalignr xmm7,xmm2,xmm1,4 1842 xor r13d,eax 1843 shrd r14d,r14d,9 1844 xor r12d,ecx 1845 vpsrld xmm6,xmm4,7 1846 shrd r13d,r13d,5 1847 xor r14d,r8d 1848 and r12d,eax 1849 vpaddd xmm3,xmm3,xmm7 1850 vpand xmm8,xmm11,xmm12 1851 vaesenc xmm9,xmm9,xmm10 1852 vmovdqu xmm10,XMMWORD[((192-128))+rdi] 1853 xor r13d,eax 1854 add edx,DWORD[48+rsp] 1855 mov r15d,r8d 1856 vpsrld xmm7,xmm4,3 1857 shrd r14d,r14d,11 1858 xor r12d,ecx 1859 xor r15d,r9d 1860 vpslld xmm5,xmm4,14 1861 shrd r13d,r13d,6 1862 add edx,r12d 1863 and esi,r15d 1864 vpxor xmm4,xmm7,xmm6 1865 xor r14d,r8d 1866 add edx,r13d 1867 xor esi,r9d 1868 vpshufd xmm7,xmm2,250 1869 add r11d,edx 1870 shrd r14d,r14d,2 1871 add edx,esi 1872 vpsrld xmm6,xmm6,11 1873 mov r13d,r11d 1874 add r14d,edx 1875 shrd r13d,r13d,14 1876 vpxor xmm4,xmm4,xmm5 1877 mov edx,r14d 1878 mov r12d,eax 1879 xor r13d,r11d 1880 vpslld xmm5,xmm5,11 1881 shrd r14d,r14d,9 1882 xor r12d,ebx 1883 shrd r13d,r13d,5 1884 vpxor xmm4,xmm4,xmm6 1885 xor r14d,edx 1886 and r12d,r11d 1887 vaesenclast xmm11,xmm9,xmm10 1888 vaesenc xmm9,xmm9,xmm10 1889 vmovdqu xmm10,XMMWORD[((208-128))+rdi] 1890 xor r13d,r11d 1891 vpsrld xmm6,xmm7,10 1892 add ecx,DWORD[52+rsp] 1893 mov esi,edx 1894 shrd r14d,r14d,11 1895 vpxor xmm4,xmm4,xmm5 1896 xor r12d,ebx 1897 xor esi,r8d 1898 shrd r13d,r13d,6 1899 vpsrlq xmm7,xmm7,17 1900 add ecx,r12d 1901 and r15d,esi 1902 xor r14d,edx 1903 vpaddd xmm3,xmm3,xmm4 1904 add ecx,r13d 1905 xor r15d,r8d 1906 add r10d,ecx 1907 vpxor xmm6,xmm6,xmm7 1908 shrd r14d,r14d,2 1909 add ecx,r15d 1910 mov r13d,r10d 1911 vpsrlq xmm7,xmm7,2 1912 add r14d,ecx 1913 shrd r13d,r13d,14 1914 mov ecx,r14d 1915 vpxor xmm6,xmm6,xmm7 1916 mov r12d,r11d 1917 xor r13d,r10d 1918 shrd r14d,r14d,9 1919 vpshufd xmm6,xmm6,132 1920 xor r12d,eax 1921 shrd r13d,r13d,5 1922 xor r14d,ecx 1923 vpsrldq xmm6,xmm6,8 1924 and r12d,r10d 1925 vpand xmm11,xmm11,xmm13 1926 vaesenc xmm9,xmm9,xmm10 1927 vmovdqu xmm10,XMMWORD[((224-128))+rdi] 1928 xor r13d,r10d 1929 add ebx,DWORD[56+rsp] 1930 vpaddd xmm3,xmm3,xmm6 1931 mov r15d,ecx 1932 shrd r14d,r14d,11 1933 xor r12d,eax 1934 vpshufd xmm7,xmm3,80 1935 xor r15d,edx 1936 shrd r13d,r13d,6 1937 add ebx,r12d 1938 vpsrld xmm6,xmm7,10 1939 and esi,r15d 1940 xor r14d,ecx 1941 add ebx,r13d 1942 vpsrlq xmm7,xmm7,17 1943 xor esi,edx 1944 add r9d,ebx 1945 shrd r14d,r14d,2 1946 vpxor xmm6,xmm6,xmm7 1947 add ebx,esi 1948 mov r13d,r9d 1949 add r14d,ebx 1950 vpsrlq xmm7,xmm7,2 1951 shrd r13d,r13d,14 1952 mov ebx,r14d 1953 mov r12d,r10d 1954 vpxor xmm6,xmm6,xmm7 1955 xor r13d,r9d 1956 shrd r14d,r14d,9 1957 xor r12d,r11d 1958 vpshufd xmm6,xmm6,232 1959 shrd r13d,r13d,5 1960 xor r14d,ebx 1961 and r12d,r9d 1962 vpslldq xmm6,xmm6,8 1963 vpor xmm8,xmm8,xmm11 1964 vaesenclast xmm11,xmm9,xmm10 1965 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 1966 xor r13d,r9d 1967 add eax,DWORD[60+rsp] 1968 mov esi,ebx 1969 vpaddd xmm3,xmm3,xmm6 1970 shrd r14d,r14d,11 1971 xor r12d,r11d 1972 xor esi,ecx 1973 vpaddd xmm6,xmm3,XMMWORD[96+rbp] 1974 shrd r13d,r13d,6 1975 add eax,r12d 1976 and r15d,esi 1977 xor r14d,ebx 1978 add eax,r13d 1979 xor r15d,ecx 1980 add r8d,eax 1981 shrd r14d,r14d,2 1982 add eax,r15d 1983 mov r13d,r8d 1984 add r14d,eax 1985 vmovdqa XMMWORD[48+rsp],xmm6 1986 mov r12,QWORD[((64+0))+rsp] 1987 vpand xmm11,xmm11,xmm14 1988 mov r15,QWORD[((64+8))+rsp] 1989 vpor xmm8,xmm8,xmm11 1990 vmovdqu XMMWORD[r12*1+r15],xmm8 1991 lea r12,[16+r12] 1992 cmp BYTE[131+rbp],0 1993 jne NEAR $L$avx_00_47 1994 vmovdqu xmm9,XMMWORD[r12] 1995 mov QWORD[((64+0))+rsp],r12 1996 shrd r13d,r13d,14 1997 mov eax,r14d 1998 mov r12d,r9d 1999 xor r13d,r8d 2000 shrd r14d,r14d,9 2001 xor r12d,r10d 2002 shrd r13d,r13d,5 2003 xor r14d,eax 2004 and r12d,r8d 2005 vpxor xmm9,xmm9,xmm10 2006 vmovdqu xmm10,XMMWORD[((16-128))+rdi] 2007 xor r13d,r8d 2008 add r11d,DWORD[rsp] 2009 mov r15d,eax 2010 shrd r14d,r14d,11 2011 xor r12d,r10d 2012 xor r15d,ebx 2013 shrd r13d,r13d,6 2014 add r11d,r12d 2015 and esi,r15d 2016 xor r14d,eax 2017 add r11d,r13d 2018 xor esi,ebx 2019 add edx,r11d 2020 shrd r14d,r14d,2 2021 add r11d,esi 2022 mov r13d,edx 2023 add r14d,r11d 2024 shrd r13d,r13d,14 2025 mov r11d,r14d 2026 mov r12d,r8d 2027 xor r13d,edx 2028 shrd r14d,r14d,9 2029 xor r12d,r9d 2030 shrd r13d,r13d,5 2031 xor r14d,r11d 2032 and r12d,edx 2033 vpxor xmm9,xmm9,xmm8 2034 xor r13d,edx 2035 add r10d,DWORD[4+rsp] 2036 mov esi,r11d 2037 shrd r14d,r14d,11 2038 xor r12d,r9d 2039 xor esi,eax 2040 shrd r13d,r13d,6 2041 add r10d,r12d 2042 and r15d,esi 2043 xor r14d,r11d 2044 add r10d,r13d 2045 xor r15d,eax 2046 add ecx,r10d 2047 shrd r14d,r14d,2 2048 add r10d,r15d 2049 mov r13d,ecx 2050 add r14d,r10d 2051 shrd r13d,r13d,14 2052 mov r10d,r14d 2053 mov r12d,edx 2054 xor r13d,ecx 2055 shrd r14d,r14d,9 2056 xor r12d,r8d 2057 shrd r13d,r13d,5 2058 xor r14d,r10d 2059 and r12d,ecx 2060 vaesenc xmm9,xmm9,xmm10 2061 vmovdqu xmm10,XMMWORD[((32-128))+rdi] 2062 xor r13d,ecx 2063 add r9d,DWORD[8+rsp] 2064 mov r15d,r10d 2065 shrd r14d,r14d,11 2066 xor r12d,r8d 2067 xor r15d,r11d 2068 shrd r13d,r13d,6 2069 add r9d,r12d 2070 and esi,r15d 2071 xor r14d,r10d 2072 add r9d,r13d 2073 xor esi,r11d 2074 add ebx,r9d 2075 shrd r14d,r14d,2 2076 add r9d,esi 2077 mov r13d,ebx 2078 add r14d,r9d 2079 shrd r13d,r13d,14 2080 mov r9d,r14d 2081 mov r12d,ecx 2082 xor r13d,ebx 2083 shrd r14d,r14d,9 2084 xor r12d,edx 2085 shrd r13d,r13d,5 2086 xor r14d,r9d 2087 and r12d,ebx 2088 vaesenc xmm9,xmm9,xmm10 2089 vmovdqu xmm10,XMMWORD[((48-128))+rdi] 2090 xor r13d,ebx 2091 add r8d,DWORD[12+rsp] 2092 mov esi,r9d 2093 shrd r14d,r14d,11 2094 xor r12d,edx 2095 xor esi,r10d 2096 shrd r13d,r13d,6 2097 add r8d,r12d 2098 and r15d,esi 2099 xor r14d,r9d 2100 add r8d,r13d 2101 xor r15d,r10d 2102 add eax,r8d 2103 shrd r14d,r14d,2 2104 add r8d,r15d 2105 mov r13d,eax 2106 add r14d,r8d 2107 shrd r13d,r13d,14 2108 mov r8d,r14d 2109 mov r12d,ebx 2110 xor r13d,eax 2111 shrd r14d,r14d,9 2112 xor r12d,ecx 2113 shrd r13d,r13d,5 2114 xor r14d,r8d 2115 and r12d,eax 2116 vaesenc xmm9,xmm9,xmm10 2117 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 2118 xor r13d,eax 2119 add edx,DWORD[16+rsp] 2120 mov r15d,r8d 2121 shrd r14d,r14d,11 2122 xor r12d,ecx 2123 xor r15d,r9d 2124 shrd r13d,r13d,6 2125 add edx,r12d 2126 and esi,r15d 2127 xor r14d,r8d 2128 add edx,r13d 2129 xor esi,r9d 2130 add r11d,edx 2131 shrd r14d,r14d,2 2132 add edx,esi 2133 mov r13d,r11d 2134 add r14d,edx 2135 shrd r13d,r13d,14 2136 mov edx,r14d 2137 mov r12d,eax 2138 xor r13d,r11d 2139 shrd r14d,r14d,9 2140 xor r12d,ebx 2141 shrd r13d,r13d,5 2142 xor r14d,edx 2143 and r12d,r11d 2144 vaesenc xmm9,xmm9,xmm10 2145 vmovdqu xmm10,XMMWORD[((80-128))+rdi] 2146 xor r13d,r11d 2147 add ecx,DWORD[20+rsp] 2148 mov esi,edx 2149 shrd r14d,r14d,11 2150 xor r12d,ebx 2151 xor esi,r8d 2152 shrd r13d,r13d,6 2153 add ecx,r12d 2154 and r15d,esi 2155 xor r14d,edx 2156 add ecx,r13d 2157 xor r15d,r8d 2158 add r10d,ecx 2159 shrd r14d,r14d,2 2160 add ecx,r15d 2161 mov r13d,r10d 2162 add r14d,ecx 2163 shrd r13d,r13d,14 2164 mov ecx,r14d 2165 mov r12d,r11d 2166 xor r13d,r10d 2167 shrd r14d,r14d,9 2168 xor r12d,eax 2169 shrd r13d,r13d,5 2170 xor r14d,ecx 2171 and r12d,r10d 2172 vaesenc xmm9,xmm9,xmm10 2173 vmovdqu xmm10,XMMWORD[((96-128))+rdi] 2174 xor r13d,r10d 2175 add ebx,DWORD[24+rsp] 2176 mov r15d,ecx 2177 shrd r14d,r14d,11 2178 xor r12d,eax 2179 xor r15d,edx 2180 shrd r13d,r13d,6 2181 add ebx,r12d 2182 and esi,r15d 2183 xor r14d,ecx 2184 add ebx,r13d 2185 xor esi,edx 2186 add r9d,ebx 2187 shrd r14d,r14d,2 2188 add ebx,esi 2189 mov r13d,r9d 2190 add r14d,ebx 2191 shrd r13d,r13d,14 2192 mov ebx,r14d 2193 mov r12d,r10d 2194 xor r13d,r9d 2195 shrd r14d,r14d,9 2196 xor r12d,r11d 2197 shrd r13d,r13d,5 2198 xor r14d,ebx 2199 and r12d,r9d 2200 vaesenc xmm9,xmm9,xmm10 2201 vmovdqu xmm10,XMMWORD[((112-128))+rdi] 2202 xor r13d,r9d 2203 add eax,DWORD[28+rsp] 2204 mov esi,ebx 2205 shrd r14d,r14d,11 2206 xor r12d,r11d 2207 xor esi,ecx 2208 shrd r13d,r13d,6 2209 add eax,r12d 2210 and r15d,esi 2211 xor r14d,ebx 2212 add eax,r13d 2213 xor r15d,ecx 2214 add r8d,eax 2215 shrd r14d,r14d,2 2216 add eax,r15d 2217 mov r13d,r8d 2218 add r14d,eax 2219 shrd r13d,r13d,14 2220 mov eax,r14d 2221 mov r12d,r9d 2222 xor r13d,r8d 2223 shrd r14d,r14d,9 2224 xor r12d,r10d 2225 shrd r13d,r13d,5 2226 xor r14d,eax 2227 and r12d,r8d 2228 vaesenc xmm9,xmm9,xmm10 2229 vmovdqu xmm10,XMMWORD[((128-128))+rdi] 2230 xor r13d,r8d 2231 add r11d,DWORD[32+rsp] 2232 mov r15d,eax 2233 shrd r14d,r14d,11 2234 xor r12d,r10d 2235 xor r15d,ebx 2236 shrd r13d,r13d,6 2237 add r11d,r12d 2238 and esi,r15d 2239 xor r14d,eax 2240 add r11d,r13d 2241 xor esi,ebx 2242 add edx,r11d 2243 shrd r14d,r14d,2 2244 add r11d,esi 2245 mov r13d,edx 2246 add r14d,r11d 2247 shrd r13d,r13d,14 2248 mov r11d,r14d 2249 mov r12d,r8d 2250 xor r13d,edx 2251 shrd r14d,r14d,9 2252 xor r12d,r9d 2253 shrd r13d,r13d,5 2254 xor r14d,r11d 2255 and r12d,edx 2256 vaesenc xmm9,xmm9,xmm10 2257 vmovdqu xmm10,XMMWORD[((144-128))+rdi] 2258 xor r13d,edx 2259 add r10d,DWORD[36+rsp] 2260 mov esi,r11d 2261 shrd r14d,r14d,11 2262 xor r12d,r9d 2263 xor esi,eax 2264 shrd r13d,r13d,6 2265 add r10d,r12d 2266 and r15d,esi 2267 xor r14d,r11d 2268 add r10d,r13d 2269 xor r15d,eax 2270 add ecx,r10d 2271 shrd r14d,r14d,2 2272 add r10d,r15d 2273 mov r13d,ecx 2274 add r14d,r10d 2275 shrd r13d,r13d,14 2276 mov r10d,r14d 2277 mov r12d,edx 2278 xor r13d,ecx 2279 shrd r14d,r14d,9 2280 xor r12d,r8d 2281 shrd r13d,r13d,5 2282 xor r14d,r10d 2283 and r12d,ecx 2284 vaesenc xmm9,xmm9,xmm10 2285 vmovdqu xmm10,XMMWORD[((160-128))+rdi] 2286 xor r13d,ecx 2287 add r9d,DWORD[40+rsp] 2288 mov r15d,r10d 2289 shrd r14d,r14d,11 2290 xor r12d,r8d 2291 xor r15d,r11d 2292 shrd r13d,r13d,6 2293 add r9d,r12d 2294 and esi,r15d 2295 xor r14d,r10d 2296 add r9d,r13d 2297 xor esi,r11d 2298 add ebx,r9d 2299 shrd r14d,r14d,2 2300 add r9d,esi 2301 mov r13d,ebx 2302 add r14d,r9d 2303 shrd r13d,r13d,14 2304 mov r9d,r14d 2305 mov r12d,ecx 2306 xor r13d,ebx 2307 shrd r14d,r14d,9 2308 xor r12d,edx 2309 shrd r13d,r13d,5 2310 xor r14d,r9d 2311 and r12d,ebx 2312 vaesenclast xmm11,xmm9,xmm10 2313 vaesenc xmm9,xmm9,xmm10 2314 vmovdqu xmm10,XMMWORD[((176-128))+rdi] 2315 xor r13d,ebx 2316 add r8d,DWORD[44+rsp] 2317 mov esi,r9d 2318 shrd r14d,r14d,11 2319 xor r12d,edx 2320 xor esi,r10d 2321 shrd r13d,r13d,6 2322 add r8d,r12d 2323 and r15d,esi 2324 xor r14d,r9d 2325 add r8d,r13d 2326 xor r15d,r10d 2327 add eax,r8d 2328 shrd r14d,r14d,2 2329 add r8d,r15d 2330 mov r13d,eax 2331 add r14d,r8d 2332 shrd r13d,r13d,14 2333 mov r8d,r14d 2334 mov r12d,ebx 2335 xor r13d,eax 2336 shrd r14d,r14d,9 2337 xor r12d,ecx 2338 shrd r13d,r13d,5 2339 xor r14d,r8d 2340 and r12d,eax 2341 vpand xmm8,xmm11,xmm12 2342 vaesenc xmm9,xmm9,xmm10 2343 vmovdqu xmm10,XMMWORD[((192-128))+rdi] 2344 xor r13d,eax 2345 add edx,DWORD[48+rsp] 2346 mov r15d,r8d 2347 shrd r14d,r14d,11 2348 xor r12d,ecx 2349 xor r15d,r9d 2350 shrd r13d,r13d,6 2351 add edx,r12d 2352 and esi,r15d 2353 xor r14d,r8d 2354 add edx,r13d 2355 xor esi,r9d 2356 add r11d,edx 2357 shrd r14d,r14d,2 2358 add edx,esi 2359 mov r13d,r11d 2360 add r14d,edx 2361 shrd r13d,r13d,14 2362 mov edx,r14d 2363 mov r12d,eax 2364 xor r13d,r11d 2365 shrd r14d,r14d,9 2366 xor r12d,ebx 2367 shrd r13d,r13d,5 2368 xor r14d,edx 2369 and r12d,r11d 2370 vaesenclast xmm11,xmm9,xmm10 2371 vaesenc xmm9,xmm9,xmm10 2372 vmovdqu xmm10,XMMWORD[((208-128))+rdi] 2373 xor r13d,r11d 2374 add ecx,DWORD[52+rsp] 2375 mov esi,edx 2376 shrd r14d,r14d,11 2377 xor r12d,ebx 2378 xor esi,r8d 2379 shrd r13d,r13d,6 2380 add ecx,r12d 2381 and r15d,esi 2382 xor r14d,edx 2383 add ecx,r13d 2384 xor r15d,r8d 2385 add r10d,ecx 2386 shrd r14d,r14d,2 2387 add ecx,r15d 2388 mov r13d,r10d 2389 add r14d,ecx 2390 shrd r13d,r13d,14 2391 mov ecx,r14d 2392 mov r12d,r11d 2393 xor r13d,r10d 2394 shrd r14d,r14d,9 2395 xor r12d,eax 2396 shrd r13d,r13d,5 2397 xor r14d,ecx 2398 and r12d,r10d 2399 vpand xmm11,xmm11,xmm13 2400 vaesenc xmm9,xmm9,xmm10 2401 vmovdqu xmm10,XMMWORD[((224-128))+rdi] 2402 xor r13d,r10d 2403 add ebx,DWORD[56+rsp] 2404 mov r15d,ecx 2405 shrd r14d,r14d,11 2406 xor r12d,eax 2407 xor r15d,edx 2408 shrd r13d,r13d,6 2409 add ebx,r12d 2410 and esi,r15d 2411 xor r14d,ecx 2412 add ebx,r13d 2413 xor esi,edx 2414 add r9d,ebx 2415 shrd r14d,r14d,2 2416 add ebx,esi 2417 mov r13d,r9d 2418 add r14d,ebx 2419 shrd r13d,r13d,14 2420 mov ebx,r14d 2421 mov r12d,r10d 2422 xor r13d,r9d 2423 shrd r14d,r14d,9 2424 xor r12d,r11d 2425 shrd r13d,r13d,5 2426 xor r14d,ebx 2427 and r12d,r9d 2428 vpor xmm8,xmm8,xmm11 2429 vaesenclast xmm11,xmm9,xmm10 2430 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 2431 xor r13d,r9d 2432 add eax,DWORD[60+rsp] 2433 mov esi,ebx 2434 shrd r14d,r14d,11 2435 xor r12d,r11d 2436 xor esi,ecx 2437 shrd r13d,r13d,6 2438 add eax,r12d 2439 and r15d,esi 2440 xor r14d,ebx 2441 add eax,r13d 2442 xor r15d,ecx 2443 add r8d,eax 2444 shrd r14d,r14d,2 2445 add eax,r15d 2446 mov r13d,r8d 2447 add r14d,eax 2448 mov r12,QWORD[((64+0))+rsp] 2449 mov r13,QWORD[((64+8))+rsp] 2450 mov r15,QWORD[((64+40))+rsp] 2451 mov rsi,QWORD[((64+48))+rsp] 2452 2453 vpand xmm11,xmm11,xmm14 2454 mov eax,r14d 2455 vpor xmm8,xmm8,xmm11 2456 vmovdqu XMMWORD[r13*1+r12],xmm8 2457 lea r12,[16+r12] 2458 2459 add eax,DWORD[r15] 2460 add ebx,DWORD[4+r15] 2461 add ecx,DWORD[8+r15] 2462 add edx,DWORD[12+r15] 2463 add r8d,DWORD[16+r15] 2464 add r9d,DWORD[20+r15] 2465 add r10d,DWORD[24+r15] 2466 add r11d,DWORD[28+r15] 2467 2468 cmp r12,QWORD[((64+16))+rsp] 2469 2470 mov DWORD[r15],eax 2471 mov DWORD[4+r15],ebx 2472 mov DWORD[8+r15],ecx 2473 mov DWORD[12+r15],edx 2474 mov DWORD[16+r15],r8d 2475 mov DWORD[20+r15],r9d 2476 mov DWORD[24+r15],r10d 2477 mov DWORD[28+r15],r11d 2478 jb NEAR $L$loop_avx 2479 2480 mov r8,QWORD[((64+32))+rsp] 2481 mov rsi,QWORD[120+rsp] 2482 2483 vmovdqu XMMWORD[r8],xmm8 2484 vzeroall 2485 movaps xmm6,XMMWORD[128+rsp] 2486 movaps xmm7,XMMWORD[144+rsp] 2487 movaps xmm8,XMMWORD[160+rsp] 2488 movaps xmm9,XMMWORD[176+rsp] 2489 movaps xmm10,XMMWORD[192+rsp] 2490 movaps xmm11,XMMWORD[208+rsp] 2491 movaps xmm12,XMMWORD[224+rsp] 2492 movaps xmm13,XMMWORD[240+rsp] 2493 movaps xmm14,XMMWORD[256+rsp] 2494 movaps xmm15,XMMWORD[272+rsp] 2495 mov r15,QWORD[((-48))+rsi] 2496 2497 mov r14,QWORD[((-40))+rsi] 2498 2499 mov r13,QWORD[((-32))+rsi] 2500 2501 mov r12,QWORD[((-24))+rsi] 2502 2503 mov rbp,QWORD[((-16))+rsi] 2504 2505 mov rbx,QWORD[((-8))+rsi] 2506 2507 lea rsp,[rsi] 2508 2509 $L$epilogue_avx: 2510 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2511 mov rsi,QWORD[16+rsp] 2512 DB 0F3h,0C3h ;repret 2513 2514 $L$SEH_end_aesni_cbc_sha256_enc_avx: 2515 2516 ALIGN 64 2517 aesni_cbc_sha256_enc_avx2: 2518 mov QWORD[8+rsp],rdi ;WIN64 prologue 2519 mov QWORD[16+rsp],rsi 2520 mov rax,rsp 2521 $L$SEH_begin_aesni_cbc_sha256_enc_avx2: 2522 mov rdi,rcx 2523 mov rsi,rdx 2524 mov rdx,r8 2525 mov rcx,r9 2526 mov r8,QWORD[40+rsp] 2527 mov r9,QWORD[48+rsp] 2528 2529 2530 2531 $L$avx2_shortcut: 2532 mov r10,QWORD[56+rsp] 2533 mov rax,rsp 2534 2535 push rbx 2536 2537 push rbp 2538 2539 push r12 2540 2541 push r13 2542 2543 push r14 2544 2545 push r15 2546 2547 sub rsp,736 2548 and rsp,-256*4 2549 add rsp,448 2550 2551 shl rdx,6 2552 sub rsi,rdi 2553 sub r10,rdi 2554 add rdx,rdi 2555 2556 2557 2558 mov QWORD[((64+16))+rsp],rdx 2559 2560 mov QWORD[((64+32))+rsp],r8 2561 mov QWORD[((64+40))+rsp],r9 2562 mov QWORD[((64+48))+rsp],r10 2563 mov QWORD[120+rsp],rax 2564 2565 movaps XMMWORD[128+rsp],xmm6 2566 movaps XMMWORD[144+rsp],xmm7 2567 movaps XMMWORD[160+rsp],xmm8 2568 movaps XMMWORD[176+rsp],xmm9 2569 movaps XMMWORD[192+rsp],xmm10 2570 movaps XMMWORD[208+rsp],xmm11 2571 movaps XMMWORD[224+rsp],xmm12 2572 movaps XMMWORD[240+rsp],xmm13 2573 movaps XMMWORD[256+rsp],xmm14 2574 movaps XMMWORD[272+rsp],xmm15 2575 $L$prologue_avx2: 2576 vzeroall 2577 2578 mov r13,rdi 2579 vpinsrq xmm15,xmm15,rsi,1 2580 lea rdi,[128+rcx] 2581 lea r12,[((K256+544))] 2582 mov r14d,DWORD[((240-128))+rdi] 2583 mov r15,r9 2584 mov rsi,r10 2585 vmovdqu xmm8,XMMWORD[r8] 2586 lea r14,[((-9))+r14] 2587 2588 vmovdqa xmm14,XMMWORD[r14*8+r12] 2589 vmovdqa xmm13,XMMWORD[16+r14*8+r12] 2590 vmovdqa xmm12,XMMWORD[32+r14*8+r12] 2591 2592 sub r13,-16*4 2593 mov eax,DWORD[r15] 2594 lea r12,[r13*1+rsi] 2595 mov ebx,DWORD[4+r15] 2596 cmp r13,rdx 2597 mov ecx,DWORD[8+r15] 2598 cmove r12,rsp 2599 mov edx,DWORD[12+r15] 2600 mov r8d,DWORD[16+r15] 2601 mov r9d,DWORD[20+r15] 2602 mov r10d,DWORD[24+r15] 2603 mov r11d,DWORD[28+r15] 2604 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 2605 jmp NEAR $L$oop_avx2 2606 ALIGN 16 2607 $L$oop_avx2: 2608 vmovdqa ymm7,YMMWORD[((K256+512))] 2609 vmovdqu xmm0,XMMWORD[((-64+0))+r13*1+rsi] 2610 vmovdqu xmm1,XMMWORD[((-64+16))+r13*1+rsi] 2611 vmovdqu xmm2,XMMWORD[((-64+32))+r13*1+rsi] 2612 vmovdqu xmm3,XMMWORD[((-64+48))+r13*1+rsi] 2613 2614 vinserti128 ymm0,ymm0,XMMWORD[r12],1 2615 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1 2616 vpshufb ymm0,ymm0,ymm7 2617 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1 2618 vpshufb ymm1,ymm1,ymm7 2619 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1 2620 2621 lea rbp,[K256] 2622 vpshufb ymm2,ymm2,ymm7 2623 lea r13,[((-64))+r13] 2624 vpaddd ymm4,ymm0,YMMWORD[rbp] 2625 vpshufb ymm3,ymm3,ymm7 2626 vpaddd ymm5,ymm1,YMMWORD[32+rbp] 2627 vpaddd ymm6,ymm2,YMMWORD[64+rbp] 2628 vpaddd ymm7,ymm3,YMMWORD[96+rbp] 2629 vmovdqa YMMWORD[rsp],ymm4 2630 xor r14d,r14d 2631 vmovdqa YMMWORD[32+rsp],ymm5 2632 lea rsp,[((-64))+rsp] 2633 mov esi,ebx 2634 vmovdqa YMMWORD[rsp],ymm6 2635 xor esi,ecx 2636 vmovdqa YMMWORD[32+rsp],ymm7 2637 mov r12d,r9d 2638 sub rbp,-16*2*4 2639 jmp NEAR $L$avx2_00_47 2640 2641 ALIGN 16 2642 $L$avx2_00_47: 2643 vmovdqu xmm9,XMMWORD[r13] 2644 vpinsrq xmm15,xmm15,r13,0 2645 lea rsp,[((-64))+rsp] 2646 vpalignr ymm4,ymm1,ymm0,4 2647 add r11d,DWORD[((0+128))+rsp] 2648 and r12d,r8d 2649 rorx r13d,r8d,25 2650 vpalignr ymm7,ymm3,ymm2,4 2651 rorx r15d,r8d,11 2652 lea eax,[r14*1+rax] 2653 lea r11d,[r12*1+r11] 2654 vpsrld ymm6,ymm4,7 2655 andn r12d,r8d,r10d 2656 xor r13d,r15d 2657 rorx r14d,r8d,6 2658 vpaddd ymm0,ymm0,ymm7 2659 lea r11d,[r12*1+r11] 2660 xor r13d,r14d 2661 mov r15d,eax 2662 vpsrld ymm7,ymm4,3 2663 rorx r12d,eax,22 2664 lea r11d,[r13*1+r11] 2665 xor r15d,ebx 2666 vpslld ymm5,ymm4,14 2667 rorx r14d,eax,13 2668 rorx r13d,eax,2 2669 lea edx,[r11*1+rdx] 2670 vpxor ymm4,ymm7,ymm6 2671 and esi,r15d 2672 vpxor xmm9,xmm9,xmm10 2673 vmovdqu xmm10,XMMWORD[((16-128))+rdi] 2674 xor r14d,r12d 2675 xor esi,ebx 2676 vpshufd ymm7,ymm3,250 2677 xor r14d,r13d 2678 lea r11d,[rsi*1+r11] 2679 mov r12d,r8d 2680 vpsrld ymm6,ymm6,11 2681 add r10d,DWORD[((4+128))+rsp] 2682 and r12d,edx 2683 rorx r13d,edx,25 2684 vpxor ymm4,ymm4,ymm5 2685 rorx esi,edx,11 2686 lea r11d,[r14*1+r11] 2687 lea r10d,[r12*1+r10] 2688 vpslld ymm5,ymm5,11 2689 andn r12d,edx,r9d 2690 xor r13d,esi 2691 rorx r14d,edx,6 2692 vpxor ymm4,ymm4,ymm6 2693 lea r10d,[r12*1+r10] 2694 xor r13d,r14d 2695 mov esi,r11d 2696 vpsrld ymm6,ymm7,10 2697 rorx r12d,r11d,22 2698 lea r10d,[r13*1+r10] 2699 xor esi,eax 2700 vpxor ymm4,ymm4,ymm5 2701 rorx r14d,r11d,13 2702 rorx r13d,r11d,2 2703 lea ecx,[r10*1+rcx] 2704 vpsrlq ymm7,ymm7,17 2705 and r15d,esi 2706 vpxor xmm9,xmm9,xmm8 2707 xor r14d,r12d 2708 xor r15d,eax 2709 vpaddd ymm0,ymm0,ymm4 2710 xor r14d,r13d 2711 lea r10d,[r15*1+r10] 2712 mov r12d,edx 2713 vpxor ymm6,ymm6,ymm7 2714 add r9d,DWORD[((8+128))+rsp] 2715 and r12d,ecx 2716 rorx r13d,ecx,25 2717 vpsrlq ymm7,ymm7,2 2718 rorx r15d,ecx,11 2719 lea r10d,[r14*1+r10] 2720 lea r9d,[r12*1+r9] 2721 vpxor ymm6,ymm6,ymm7 2722 andn r12d,ecx,r8d 2723 xor r13d,r15d 2724 rorx r14d,ecx,6 2725 vpshufd ymm6,ymm6,132 2726 lea r9d,[r12*1+r9] 2727 xor r13d,r14d 2728 mov r15d,r10d 2729 vpsrldq ymm6,ymm6,8 2730 rorx r12d,r10d,22 2731 lea r9d,[r13*1+r9] 2732 xor r15d,r11d 2733 vpaddd ymm0,ymm0,ymm6 2734 rorx r14d,r10d,13 2735 rorx r13d,r10d,2 2736 lea ebx,[r9*1+rbx] 2737 vpshufd ymm7,ymm0,80 2738 and esi,r15d 2739 vaesenc xmm9,xmm9,xmm10 2740 vmovdqu xmm10,XMMWORD[((32-128))+rdi] 2741 xor r14d,r12d 2742 xor esi,r11d 2743 vpsrld ymm6,ymm7,10 2744 xor r14d,r13d 2745 lea r9d,[rsi*1+r9] 2746 mov r12d,ecx 2747 vpsrlq ymm7,ymm7,17 2748 add r8d,DWORD[((12+128))+rsp] 2749 and r12d,ebx 2750 rorx r13d,ebx,25 2751 vpxor ymm6,ymm6,ymm7 2752 rorx esi,ebx,11 2753 lea r9d,[r14*1+r9] 2754 lea r8d,[r12*1+r8] 2755 vpsrlq ymm7,ymm7,2 2756 andn r12d,ebx,edx 2757 xor r13d,esi 2758 rorx r14d,ebx,6 2759 vpxor ymm6,ymm6,ymm7 2760 lea r8d,[r12*1+r8] 2761 xor r13d,r14d 2762 mov esi,r9d 2763 vpshufd ymm6,ymm6,232 2764 rorx r12d,r9d,22 2765 lea r8d,[r13*1+r8] 2766 xor esi,r10d 2767 vpslldq ymm6,ymm6,8 2768 rorx r14d,r9d,13 2769 rorx r13d,r9d,2 2770 lea eax,[r8*1+rax] 2771 vpaddd ymm0,ymm0,ymm6 2772 and r15d,esi 2773 vaesenc xmm9,xmm9,xmm10 2774 vmovdqu xmm10,XMMWORD[((48-128))+rdi] 2775 xor r14d,r12d 2776 xor r15d,r10d 2777 vpaddd ymm6,ymm0,YMMWORD[rbp] 2778 xor r14d,r13d 2779 lea r8d,[r15*1+r8] 2780 mov r12d,ebx 2781 vmovdqa YMMWORD[rsp],ymm6 2782 vpalignr ymm4,ymm2,ymm1,4 2783 add edx,DWORD[((32+128))+rsp] 2784 and r12d,eax 2785 rorx r13d,eax,25 2786 vpalignr ymm7,ymm0,ymm3,4 2787 rorx r15d,eax,11 2788 lea r8d,[r14*1+r8] 2789 lea edx,[r12*1+rdx] 2790 vpsrld ymm6,ymm4,7 2791 andn r12d,eax,ecx 2792 xor r13d,r15d 2793 rorx r14d,eax,6 2794 vpaddd ymm1,ymm1,ymm7 2795 lea edx,[r12*1+rdx] 2796 xor r13d,r14d 2797 mov r15d,r8d 2798 vpsrld ymm7,ymm4,3 2799 rorx r12d,r8d,22 2800 lea edx,[r13*1+rdx] 2801 xor r15d,r9d 2802 vpslld ymm5,ymm4,14 2803 rorx r14d,r8d,13 2804 rorx r13d,r8d,2 2805 lea r11d,[rdx*1+r11] 2806 vpxor ymm4,ymm7,ymm6 2807 and esi,r15d 2808 vaesenc xmm9,xmm9,xmm10 2809 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 2810 xor r14d,r12d 2811 xor esi,r9d 2812 vpshufd ymm7,ymm0,250 2813 xor r14d,r13d 2814 lea edx,[rsi*1+rdx] 2815 mov r12d,eax 2816 vpsrld ymm6,ymm6,11 2817 add ecx,DWORD[((36+128))+rsp] 2818 and r12d,r11d 2819 rorx r13d,r11d,25 2820 vpxor ymm4,ymm4,ymm5 2821 rorx esi,r11d,11 2822 lea edx,[r14*1+rdx] 2823 lea ecx,[r12*1+rcx] 2824 vpslld ymm5,ymm5,11 2825 andn r12d,r11d,ebx 2826 xor r13d,esi 2827 rorx r14d,r11d,6 2828 vpxor ymm4,ymm4,ymm6 2829 lea ecx,[r12*1+rcx] 2830 xor r13d,r14d 2831 mov esi,edx 2832 vpsrld ymm6,ymm7,10 2833 rorx r12d,edx,22 2834 lea ecx,[r13*1+rcx] 2835 xor esi,r8d 2836 vpxor ymm4,ymm4,ymm5 2837 rorx r14d,edx,13 2838 rorx r13d,edx,2 2839 lea r10d,[rcx*1+r10] 2840 vpsrlq ymm7,ymm7,17 2841 and r15d,esi 2842 vaesenc xmm9,xmm9,xmm10 2843 vmovdqu xmm10,XMMWORD[((80-128))+rdi] 2844 xor r14d,r12d 2845 xor r15d,r8d 2846 vpaddd ymm1,ymm1,ymm4 2847 xor r14d,r13d 2848 lea ecx,[r15*1+rcx] 2849 mov r12d,r11d 2850 vpxor ymm6,ymm6,ymm7 2851 add ebx,DWORD[((40+128))+rsp] 2852 and r12d,r10d 2853 rorx r13d,r10d,25 2854 vpsrlq ymm7,ymm7,2 2855 rorx r15d,r10d,11 2856 lea ecx,[r14*1+rcx] 2857 lea ebx,[r12*1+rbx] 2858 vpxor ymm6,ymm6,ymm7 2859 andn r12d,r10d,eax 2860 xor r13d,r15d 2861 rorx r14d,r10d,6 2862 vpshufd ymm6,ymm6,132 2863 lea ebx,[r12*1+rbx] 2864 xor r13d,r14d 2865 mov r15d,ecx 2866 vpsrldq ymm6,ymm6,8 2867 rorx r12d,ecx,22 2868 lea ebx,[r13*1+rbx] 2869 xor r15d,edx 2870 vpaddd ymm1,ymm1,ymm6 2871 rorx r14d,ecx,13 2872 rorx r13d,ecx,2 2873 lea r9d,[rbx*1+r9] 2874 vpshufd ymm7,ymm1,80 2875 and esi,r15d 2876 vaesenc xmm9,xmm9,xmm10 2877 vmovdqu xmm10,XMMWORD[((96-128))+rdi] 2878 xor r14d,r12d 2879 xor esi,edx 2880 vpsrld ymm6,ymm7,10 2881 xor r14d,r13d 2882 lea ebx,[rsi*1+rbx] 2883 mov r12d,r10d 2884 vpsrlq ymm7,ymm7,17 2885 add eax,DWORD[((44+128))+rsp] 2886 and r12d,r9d 2887 rorx r13d,r9d,25 2888 vpxor ymm6,ymm6,ymm7 2889 rorx esi,r9d,11 2890 lea ebx,[r14*1+rbx] 2891 lea eax,[r12*1+rax] 2892 vpsrlq ymm7,ymm7,2 2893 andn r12d,r9d,r11d 2894 xor r13d,esi 2895 rorx r14d,r9d,6 2896 vpxor ymm6,ymm6,ymm7 2897 lea eax,[r12*1+rax] 2898 xor r13d,r14d 2899 mov esi,ebx 2900 vpshufd ymm6,ymm6,232 2901 rorx r12d,ebx,22 2902 lea eax,[r13*1+rax] 2903 xor esi,ecx 2904 vpslldq ymm6,ymm6,8 2905 rorx r14d,ebx,13 2906 rorx r13d,ebx,2 2907 lea r8d,[rax*1+r8] 2908 vpaddd ymm1,ymm1,ymm6 2909 and r15d,esi 2910 vaesenc xmm9,xmm9,xmm10 2911 vmovdqu xmm10,XMMWORD[((112-128))+rdi] 2912 xor r14d,r12d 2913 xor r15d,ecx 2914 vpaddd ymm6,ymm1,YMMWORD[32+rbp] 2915 xor r14d,r13d 2916 lea eax,[r15*1+rax] 2917 mov r12d,r9d 2918 vmovdqa YMMWORD[32+rsp],ymm6 2919 lea rsp,[((-64))+rsp] 2920 vpalignr ymm4,ymm3,ymm2,4 2921 add r11d,DWORD[((0+128))+rsp] 2922 and r12d,r8d 2923 rorx r13d,r8d,25 2924 vpalignr ymm7,ymm1,ymm0,4 2925 rorx r15d,r8d,11 2926 lea eax,[r14*1+rax] 2927 lea r11d,[r12*1+r11] 2928 vpsrld ymm6,ymm4,7 2929 andn r12d,r8d,r10d 2930 xor r13d,r15d 2931 rorx r14d,r8d,6 2932 vpaddd ymm2,ymm2,ymm7 2933 lea r11d,[r12*1+r11] 2934 xor r13d,r14d 2935 mov r15d,eax 2936 vpsrld ymm7,ymm4,3 2937 rorx r12d,eax,22 2938 lea r11d,[r13*1+r11] 2939 xor r15d,ebx 2940 vpslld ymm5,ymm4,14 2941 rorx r14d,eax,13 2942 rorx r13d,eax,2 2943 lea edx,[r11*1+rdx] 2944 vpxor ymm4,ymm7,ymm6 2945 and esi,r15d 2946 vaesenc xmm9,xmm9,xmm10 2947 vmovdqu xmm10,XMMWORD[((128-128))+rdi] 2948 xor r14d,r12d 2949 xor esi,ebx 2950 vpshufd ymm7,ymm1,250 2951 xor r14d,r13d 2952 lea r11d,[rsi*1+r11] 2953 mov r12d,r8d 2954 vpsrld ymm6,ymm6,11 2955 add r10d,DWORD[((4+128))+rsp] 2956 and r12d,edx 2957 rorx r13d,edx,25 2958 vpxor ymm4,ymm4,ymm5 2959 rorx esi,edx,11 2960 lea r11d,[r14*1+r11] 2961 lea r10d,[r12*1+r10] 2962 vpslld ymm5,ymm5,11 2963 andn r12d,edx,r9d 2964 xor r13d,esi 2965 rorx r14d,edx,6 2966 vpxor ymm4,ymm4,ymm6 2967 lea r10d,[r12*1+r10] 2968 xor r13d,r14d 2969 mov esi,r11d 2970 vpsrld ymm6,ymm7,10 2971 rorx r12d,r11d,22 2972 lea r10d,[r13*1+r10] 2973 xor esi,eax 2974 vpxor ymm4,ymm4,ymm5 2975 rorx r14d,r11d,13 2976 rorx r13d,r11d,2 2977 lea ecx,[r10*1+rcx] 2978 vpsrlq ymm7,ymm7,17 2979 and r15d,esi 2980 vaesenc xmm9,xmm9,xmm10 2981 vmovdqu xmm10,XMMWORD[((144-128))+rdi] 2982 xor r14d,r12d 2983 xor r15d,eax 2984 vpaddd ymm2,ymm2,ymm4 2985 xor r14d,r13d 2986 lea r10d,[r15*1+r10] 2987 mov r12d,edx 2988 vpxor ymm6,ymm6,ymm7 2989 add r9d,DWORD[((8+128))+rsp] 2990 and r12d,ecx 2991 rorx r13d,ecx,25 2992 vpsrlq ymm7,ymm7,2 2993 rorx r15d,ecx,11 2994 lea r10d,[r14*1+r10] 2995 lea r9d,[r12*1+r9] 2996 vpxor ymm6,ymm6,ymm7 2997 andn r12d,ecx,r8d 2998 xor r13d,r15d 2999 rorx r14d,ecx,6 3000 vpshufd ymm6,ymm6,132 3001 lea r9d,[r12*1+r9] 3002 xor r13d,r14d 3003 mov r15d,r10d 3004 vpsrldq ymm6,ymm6,8 3005 rorx r12d,r10d,22 3006 lea r9d,[r13*1+r9] 3007 xor r15d,r11d 3008 vpaddd ymm2,ymm2,ymm6 3009 rorx r14d,r10d,13 3010 rorx r13d,r10d,2 3011 lea ebx,[r9*1+rbx] 3012 vpshufd ymm7,ymm2,80 3013 and esi,r15d 3014 vaesenc xmm9,xmm9,xmm10 3015 vmovdqu xmm10,XMMWORD[((160-128))+rdi] 3016 xor r14d,r12d 3017 xor esi,r11d 3018 vpsrld ymm6,ymm7,10 3019 xor r14d,r13d 3020 lea r9d,[rsi*1+r9] 3021 mov r12d,ecx 3022 vpsrlq ymm7,ymm7,17 3023 add r8d,DWORD[((12+128))+rsp] 3024 and r12d,ebx 3025 rorx r13d,ebx,25 3026 vpxor ymm6,ymm6,ymm7 3027 rorx esi,ebx,11 3028 lea r9d,[r14*1+r9] 3029 lea r8d,[r12*1+r8] 3030 vpsrlq ymm7,ymm7,2 3031 andn r12d,ebx,edx 3032 xor r13d,esi 3033 rorx r14d,ebx,6 3034 vpxor ymm6,ymm6,ymm7 3035 lea r8d,[r12*1+r8] 3036 xor r13d,r14d 3037 mov esi,r9d 3038 vpshufd ymm6,ymm6,232 3039 rorx r12d,r9d,22 3040 lea r8d,[r13*1+r8] 3041 xor esi,r10d 3042 vpslldq ymm6,ymm6,8 3043 rorx r14d,r9d,13 3044 rorx r13d,r9d,2 3045 lea eax,[r8*1+rax] 3046 vpaddd ymm2,ymm2,ymm6 3047 and r15d,esi 3048 vaesenclast xmm11,xmm9,xmm10 3049 vaesenc xmm9,xmm9,xmm10 3050 vmovdqu xmm10,XMMWORD[((176-128))+rdi] 3051 xor r14d,r12d 3052 xor r15d,r10d 3053 vpaddd ymm6,ymm2,YMMWORD[64+rbp] 3054 xor r14d,r13d 3055 lea r8d,[r15*1+r8] 3056 mov r12d,ebx 3057 vmovdqa YMMWORD[rsp],ymm6 3058 vpalignr ymm4,ymm0,ymm3,4 3059 add edx,DWORD[((32+128))+rsp] 3060 and r12d,eax 3061 rorx r13d,eax,25 3062 vpalignr ymm7,ymm2,ymm1,4 3063 rorx r15d,eax,11 3064 lea r8d,[r14*1+r8] 3065 lea edx,[r12*1+rdx] 3066 vpsrld ymm6,ymm4,7 3067 andn r12d,eax,ecx 3068 xor r13d,r15d 3069 rorx r14d,eax,6 3070 vpaddd ymm3,ymm3,ymm7 3071 lea edx,[r12*1+rdx] 3072 xor r13d,r14d 3073 mov r15d,r8d 3074 vpsrld ymm7,ymm4,3 3075 rorx r12d,r8d,22 3076 lea edx,[r13*1+rdx] 3077 xor r15d,r9d 3078 vpslld ymm5,ymm4,14 3079 rorx r14d,r8d,13 3080 rorx r13d,r8d,2 3081 lea r11d,[rdx*1+r11] 3082 vpxor ymm4,ymm7,ymm6 3083 and esi,r15d 3084 vpand xmm8,xmm11,xmm12 3085 vaesenc xmm9,xmm9,xmm10 3086 vmovdqu xmm10,XMMWORD[((192-128))+rdi] 3087 xor r14d,r12d 3088 xor esi,r9d 3089 vpshufd ymm7,ymm2,250 3090 xor r14d,r13d 3091 lea edx,[rsi*1+rdx] 3092 mov r12d,eax 3093 vpsrld ymm6,ymm6,11 3094 add ecx,DWORD[((36+128))+rsp] 3095 and r12d,r11d 3096 rorx r13d,r11d,25 3097 vpxor ymm4,ymm4,ymm5 3098 rorx esi,r11d,11 3099 lea edx,[r14*1+rdx] 3100 lea ecx,[r12*1+rcx] 3101 vpslld ymm5,ymm5,11 3102 andn r12d,r11d,ebx 3103 xor r13d,esi 3104 rorx r14d,r11d,6 3105 vpxor ymm4,ymm4,ymm6 3106 lea ecx,[r12*1+rcx] 3107 xor r13d,r14d 3108 mov esi,edx 3109 vpsrld ymm6,ymm7,10 3110 rorx r12d,edx,22 3111 lea ecx,[r13*1+rcx] 3112 xor esi,r8d 3113 vpxor ymm4,ymm4,ymm5 3114 rorx r14d,edx,13 3115 rorx r13d,edx,2 3116 lea r10d,[rcx*1+r10] 3117 vpsrlq ymm7,ymm7,17 3118 and r15d,esi 3119 vaesenclast xmm11,xmm9,xmm10 3120 vaesenc xmm9,xmm9,xmm10 3121 vmovdqu xmm10,XMMWORD[((208-128))+rdi] 3122 xor r14d,r12d 3123 xor r15d,r8d 3124 vpaddd ymm3,ymm3,ymm4 3125 xor r14d,r13d 3126 lea ecx,[r15*1+rcx] 3127 mov r12d,r11d 3128 vpxor ymm6,ymm6,ymm7 3129 add ebx,DWORD[((40+128))+rsp] 3130 and r12d,r10d 3131 rorx r13d,r10d,25 3132 vpsrlq ymm7,ymm7,2 3133 rorx r15d,r10d,11 3134 lea ecx,[r14*1+rcx] 3135 lea ebx,[r12*1+rbx] 3136 vpxor ymm6,ymm6,ymm7 3137 andn r12d,r10d,eax 3138 xor r13d,r15d 3139 rorx r14d,r10d,6 3140 vpshufd ymm6,ymm6,132 3141 lea ebx,[r12*1+rbx] 3142 xor r13d,r14d 3143 mov r15d,ecx 3144 vpsrldq ymm6,ymm6,8 3145 rorx r12d,ecx,22 3146 lea ebx,[r13*1+rbx] 3147 xor r15d,edx 3148 vpaddd ymm3,ymm3,ymm6 3149 rorx r14d,ecx,13 3150 rorx r13d,ecx,2 3151 lea r9d,[rbx*1+r9] 3152 vpshufd ymm7,ymm3,80 3153 and esi,r15d 3154 vpand xmm11,xmm11,xmm13 3155 vaesenc xmm9,xmm9,xmm10 3156 vmovdqu xmm10,XMMWORD[((224-128))+rdi] 3157 xor r14d,r12d 3158 xor esi,edx 3159 vpsrld ymm6,ymm7,10 3160 xor r14d,r13d 3161 lea ebx,[rsi*1+rbx] 3162 mov r12d,r10d 3163 vpsrlq ymm7,ymm7,17 3164 add eax,DWORD[((44+128))+rsp] 3165 and r12d,r9d 3166 rorx r13d,r9d,25 3167 vpxor ymm6,ymm6,ymm7 3168 rorx esi,r9d,11 3169 lea ebx,[r14*1+rbx] 3170 lea eax,[r12*1+rax] 3171 vpsrlq ymm7,ymm7,2 3172 andn r12d,r9d,r11d 3173 xor r13d,esi 3174 rorx r14d,r9d,6 3175 vpxor ymm6,ymm6,ymm7 3176 lea eax,[r12*1+rax] 3177 xor r13d,r14d 3178 mov esi,ebx 3179 vpshufd ymm6,ymm6,232 3180 rorx r12d,ebx,22 3181 lea eax,[r13*1+rax] 3182 xor esi,ecx 3183 vpslldq ymm6,ymm6,8 3184 rorx r14d,ebx,13 3185 rorx r13d,ebx,2 3186 lea r8d,[rax*1+r8] 3187 vpaddd ymm3,ymm3,ymm6 3188 and r15d,esi 3189 vpor xmm8,xmm8,xmm11 3190 vaesenclast xmm11,xmm9,xmm10 3191 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 3192 xor r14d,r12d 3193 xor r15d,ecx 3194 vpaddd ymm6,ymm3,YMMWORD[96+rbp] 3195 xor r14d,r13d 3196 lea eax,[r15*1+rax] 3197 mov r12d,r9d 3198 vmovdqa YMMWORD[32+rsp],ymm6 3199 vmovq r13,xmm15 3200 vpextrq r15,xmm15,1 3201 vpand xmm11,xmm11,xmm14 3202 vpor xmm8,xmm8,xmm11 3203 vmovdqu XMMWORD[r13*1+r15],xmm8 3204 lea r13,[16+r13] 3205 lea rbp,[128+rbp] 3206 cmp BYTE[3+rbp],0 3207 jne NEAR $L$avx2_00_47 3208 vmovdqu xmm9,XMMWORD[r13] 3209 vpinsrq xmm15,xmm15,r13,0 3210 add r11d,DWORD[((0+64))+rsp] 3211 and r12d,r8d 3212 rorx r13d,r8d,25 3213 rorx r15d,r8d,11 3214 lea eax,[r14*1+rax] 3215 lea r11d,[r12*1+r11] 3216 andn r12d,r8d,r10d 3217 xor r13d,r15d 3218 rorx r14d,r8d,6 3219 lea r11d,[r12*1+r11] 3220 xor r13d,r14d 3221 mov r15d,eax 3222 rorx r12d,eax,22 3223 lea r11d,[r13*1+r11] 3224 xor r15d,ebx 3225 rorx r14d,eax,13 3226 rorx r13d,eax,2 3227 lea edx,[r11*1+rdx] 3228 and esi,r15d 3229 vpxor xmm9,xmm9,xmm10 3230 vmovdqu xmm10,XMMWORD[((16-128))+rdi] 3231 xor r14d,r12d 3232 xor esi,ebx 3233 xor r14d,r13d 3234 lea r11d,[rsi*1+r11] 3235 mov r12d,r8d 3236 add r10d,DWORD[((4+64))+rsp] 3237 and r12d,edx 3238 rorx r13d,edx,25 3239 rorx esi,edx,11 3240 lea r11d,[r14*1+r11] 3241 lea r10d,[r12*1+r10] 3242 andn r12d,edx,r9d 3243 xor r13d,esi 3244 rorx r14d,edx,6 3245 lea r10d,[r12*1+r10] 3246 xor r13d,r14d 3247 mov esi,r11d 3248 rorx r12d,r11d,22 3249 lea r10d,[r13*1+r10] 3250 xor esi,eax 3251 rorx r14d,r11d,13 3252 rorx r13d,r11d,2 3253 lea ecx,[r10*1+rcx] 3254 and r15d,esi 3255 vpxor xmm9,xmm9,xmm8 3256 xor r14d,r12d 3257 xor r15d,eax 3258 xor r14d,r13d 3259 lea r10d,[r15*1+r10] 3260 mov r12d,edx 3261 add r9d,DWORD[((8+64))+rsp] 3262 and r12d,ecx 3263 rorx r13d,ecx,25 3264 rorx r15d,ecx,11 3265 lea r10d,[r14*1+r10] 3266 lea r9d,[r12*1+r9] 3267 andn r12d,ecx,r8d 3268 xor r13d,r15d 3269 rorx r14d,ecx,6 3270 lea r9d,[r12*1+r9] 3271 xor r13d,r14d 3272 mov r15d,r10d 3273 rorx r12d,r10d,22 3274 lea r9d,[r13*1+r9] 3275 xor r15d,r11d 3276 rorx r14d,r10d,13 3277 rorx r13d,r10d,2 3278 lea ebx,[r9*1+rbx] 3279 and esi,r15d 3280 vaesenc xmm9,xmm9,xmm10 3281 vmovdqu xmm10,XMMWORD[((32-128))+rdi] 3282 xor r14d,r12d 3283 xor esi,r11d 3284 xor r14d,r13d 3285 lea r9d,[rsi*1+r9] 3286 mov r12d,ecx 3287 add r8d,DWORD[((12+64))+rsp] 3288 and r12d,ebx 3289 rorx r13d,ebx,25 3290 rorx esi,ebx,11 3291 lea r9d,[r14*1+r9] 3292 lea r8d,[r12*1+r8] 3293 andn r12d,ebx,edx 3294 xor r13d,esi 3295 rorx r14d,ebx,6 3296 lea r8d,[r12*1+r8] 3297 xor r13d,r14d 3298 mov esi,r9d 3299 rorx r12d,r9d,22 3300 lea r8d,[r13*1+r8] 3301 xor esi,r10d 3302 rorx r14d,r9d,13 3303 rorx r13d,r9d,2 3304 lea eax,[r8*1+rax] 3305 and r15d,esi 3306 vaesenc xmm9,xmm9,xmm10 3307 vmovdqu xmm10,XMMWORD[((48-128))+rdi] 3308 xor r14d,r12d 3309 xor r15d,r10d 3310 xor r14d,r13d 3311 lea r8d,[r15*1+r8] 3312 mov r12d,ebx 3313 add edx,DWORD[((32+64))+rsp] 3314 and r12d,eax 3315 rorx r13d,eax,25 3316 rorx r15d,eax,11 3317 lea r8d,[r14*1+r8] 3318 lea edx,[r12*1+rdx] 3319 andn r12d,eax,ecx 3320 xor r13d,r15d 3321 rorx r14d,eax,6 3322 lea edx,[r12*1+rdx] 3323 xor r13d,r14d 3324 mov r15d,r8d 3325 rorx r12d,r8d,22 3326 lea edx,[r13*1+rdx] 3327 xor r15d,r9d 3328 rorx r14d,r8d,13 3329 rorx r13d,r8d,2 3330 lea r11d,[rdx*1+r11] 3331 and esi,r15d 3332 vaesenc xmm9,xmm9,xmm10 3333 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 3334 xor r14d,r12d 3335 xor esi,r9d 3336 xor r14d,r13d 3337 lea edx,[rsi*1+rdx] 3338 mov r12d,eax 3339 add ecx,DWORD[((36+64))+rsp] 3340 and r12d,r11d 3341 rorx r13d,r11d,25 3342 rorx esi,r11d,11 3343 lea edx,[r14*1+rdx] 3344 lea ecx,[r12*1+rcx] 3345 andn r12d,r11d,ebx 3346 xor r13d,esi 3347 rorx r14d,r11d,6 3348 lea ecx,[r12*1+rcx] 3349 xor r13d,r14d 3350 mov esi,edx 3351 rorx r12d,edx,22 3352 lea ecx,[r13*1+rcx] 3353 xor esi,r8d 3354 rorx r14d,edx,13 3355 rorx r13d,edx,2 3356 lea r10d,[rcx*1+r10] 3357 and r15d,esi 3358 vaesenc xmm9,xmm9,xmm10 3359 vmovdqu xmm10,XMMWORD[((80-128))+rdi] 3360 xor r14d,r12d 3361 xor r15d,r8d 3362 xor r14d,r13d 3363 lea ecx,[r15*1+rcx] 3364 mov r12d,r11d 3365 add ebx,DWORD[((40+64))+rsp] 3366 and r12d,r10d 3367 rorx r13d,r10d,25 3368 rorx r15d,r10d,11 3369 lea ecx,[r14*1+rcx] 3370 lea ebx,[r12*1+rbx] 3371 andn r12d,r10d,eax 3372 xor r13d,r15d 3373 rorx r14d,r10d,6 3374 lea ebx,[r12*1+rbx] 3375 xor r13d,r14d 3376 mov r15d,ecx 3377 rorx r12d,ecx,22 3378 lea ebx,[r13*1+rbx] 3379 xor r15d,edx 3380 rorx r14d,ecx,13 3381 rorx r13d,ecx,2 3382 lea r9d,[rbx*1+r9] 3383 and esi,r15d 3384 vaesenc xmm9,xmm9,xmm10 3385 vmovdqu xmm10,XMMWORD[((96-128))+rdi] 3386 xor r14d,r12d 3387 xor esi,edx 3388 xor r14d,r13d 3389 lea ebx,[rsi*1+rbx] 3390 mov r12d,r10d 3391 add eax,DWORD[((44+64))+rsp] 3392 and r12d,r9d 3393 rorx r13d,r9d,25 3394 rorx esi,r9d,11 3395 lea ebx,[r14*1+rbx] 3396 lea eax,[r12*1+rax] 3397 andn r12d,r9d,r11d 3398 xor r13d,esi 3399 rorx r14d,r9d,6 3400 lea eax,[r12*1+rax] 3401 xor r13d,r14d 3402 mov esi,ebx 3403 rorx r12d,ebx,22 3404 lea eax,[r13*1+rax] 3405 xor esi,ecx 3406 rorx r14d,ebx,13 3407 rorx r13d,ebx,2 3408 lea r8d,[rax*1+r8] 3409 and r15d,esi 3410 vaesenc xmm9,xmm9,xmm10 3411 vmovdqu xmm10,XMMWORD[((112-128))+rdi] 3412 xor r14d,r12d 3413 xor r15d,ecx 3414 xor r14d,r13d 3415 lea eax,[r15*1+rax] 3416 mov r12d,r9d 3417 add r11d,DWORD[rsp] 3418 and r12d,r8d 3419 rorx r13d,r8d,25 3420 rorx r15d,r8d,11 3421 lea eax,[r14*1+rax] 3422 lea r11d,[r12*1+r11] 3423 andn r12d,r8d,r10d 3424 xor r13d,r15d 3425 rorx r14d,r8d,6 3426 lea r11d,[r12*1+r11] 3427 xor r13d,r14d 3428 mov r15d,eax 3429 rorx r12d,eax,22 3430 lea r11d,[r13*1+r11] 3431 xor r15d,ebx 3432 rorx r14d,eax,13 3433 rorx r13d,eax,2 3434 lea edx,[r11*1+rdx] 3435 and esi,r15d 3436 vaesenc xmm9,xmm9,xmm10 3437 vmovdqu xmm10,XMMWORD[((128-128))+rdi] 3438 xor r14d,r12d 3439 xor esi,ebx 3440 xor r14d,r13d 3441 lea r11d,[rsi*1+r11] 3442 mov r12d,r8d 3443 add r10d,DWORD[4+rsp] 3444 and r12d,edx 3445 rorx r13d,edx,25 3446 rorx esi,edx,11 3447 lea r11d,[r14*1+r11] 3448 lea r10d,[r12*1+r10] 3449 andn r12d,edx,r9d 3450 xor r13d,esi 3451 rorx r14d,edx,6 3452 lea r10d,[r12*1+r10] 3453 xor r13d,r14d 3454 mov esi,r11d 3455 rorx r12d,r11d,22 3456 lea r10d,[r13*1+r10] 3457 xor esi,eax 3458 rorx r14d,r11d,13 3459 rorx r13d,r11d,2 3460 lea ecx,[r10*1+rcx] 3461 and r15d,esi 3462 vaesenc xmm9,xmm9,xmm10 3463 vmovdqu xmm10,XMMWORD[((144-128))+rdi] 3464 xor r14d,r12d 3465 xor r15d,eax 3466 xor r14d,r13d 3467 lea r10d,[r15*1+r10] 3468 mov r12d,edx 3469 add r9d,DWORD[8+rsp] 3470 and r12d,ecx 3471 rorx r13d,ecx,25 3472 rorx r15d,ecx,11 3473 lea r10d,[r14*1+r10] 3474 lea r9d,[r12*1+r9] 3475 andn r12d,ecx,r8d 3476 xor r13d,r15d 3477 rorx r14d,ecx,6 3478 lea r9d,[r12*1+r9] 3479 xor r13d,r14d 3480 mov r15d,r10d 3481 rorx r12d,r10d,22 3482 lea r9d,[r13*1+r9] 3483 xor r15d,r11d 3484 rorx r14d,r10d,13 3485 rorx r13d,r10d,2 3486 lea ebx,[r9*1+rbx] 3487 and esi,r15d 3488 vaesenc xmm9,xmm9,xmm10 3489 vmovdqu xmm10,XMMWORD[((160-128))+rdi] 3490 xor r14d,r12d 3491 xor esi,r11d 3492 xor r14d,r13d 3493 lea r9d,[rsi*1+r9] 3494 mov r12d,ecx 3495 add r8d,DWORD[12+rsp] 3496 and r12d,ebx 3497 rorx r13d,ebx,25 3498 rorx esi,ebx,11 3499 lea r9d,[r14*1+r9] 3500 lea r8d,[r12*1+r8] 3501 andn r12d,ebx,edx 3502 xor r13d,esi 3503 rorx r14d,ebx,6 3504 lea r8d,[r12*1+r8] 3505 xor r13d,r14d 3506 mov esi,r9d 3507 rorx r12d,r9d,22 3508 lea r8d,[r13*1+r8] 3509 xor esi,r10d 3510 rorx r14d,r9d,13 3511 rorx r13d,r9d,2 3512 lea eax,[r8*1+rax] 3513 and r15d,esi 3514 vaesenclast xmm11,xmm9,xmm10 3515 vaesenc xmm9,xmm9,xmm10 3516 vmovdqu xmm10,XMMWORD[((176-128))+rdi] 3517 xor r14d,r12d 3518 xor r15d,r10d 3519 xor r14d,r13d 3520 lea r8d,[r15*1+r8] 3521 mov r12d,ebx 3522 add edx,DWORD[32+rsp] 3523 and r12d,eax 3524 rorx r13d,eax,25 3525 rorx r15d,eax,11 3526 lea r8d,[r14*1+r8] 3527 lea edx,[r12*1+rdx] 3528 andn r12d,eax,ecx 3529 xor r13d,r15d 3530 rorx r14d,eax,6 3531 lea edx,[r12*1+rdx] 3532 xor r13d,r14d 3533 mov r15d,r8d 3534 rorx r12d,r8d,22 3535 lea edx,[r13*1+rdx] 3536 xor r15d,r9d 3537 rorx r14d,r8d,13 3538 rorx r13d,r8d,2 3539 lea r11d,[rdx*1+r11] 3540 and esi,r15d 3541 vpand xmm8,xmm11,xmm12 3542 vaesenc xmm9,xmm9,xmm10 3543 vmovdqu xmm10,XMMWORD[((192-128))+rdi] 3544 xor r14d,r12d 3545 xor esi,r9d 3546 xor r14d,r13d 3547 lea edx,[rsi*1+rdx] 3548 mov r12d,eax 3549 add ecx,DWORD[36+rsp] 3550 and r12d,r11d 3551 rorx r13d,r11d,25 3552 rorx esi,r11d,11 3553 lea edx,[r14*1+rdx] 3554 lea ecx,[r12*1+rcx] 3555 andn r12d,r11d,ebx 3556 xor r13d,esi 3557 rorx r14d,r11d,6 3558 lea ecx,[r12*1+rcx] 3559 xor r13d,r14d 3560 mov esi,edx 3561 rorx r12d,edx,22 3562 lea ecx,[r13*1+rcx] 3563 xor esi,r8d 3564 rorx r14d,edx,13 3565 rorx r13d,edx,2 3566 lea r10d,[rcx*1+r10] 3567 and r15d,esi 3568 vaesenclast xmm11,xmm9,xmm10 3569 vaesenc xmm9,xmm9,xmm10 3570 vmovdqu xmm10,XMMWORD[((208-128))+rdi] 3571 xor r14d,r12d 3572 xor r15d,r8d 3573 xor r14d,r13d 3574 lea ecx,[r15*1+rcx] 3575 mov r12d,r11d 3576 add ebx,DWORD[40+rsp] 3577 and r12d,r10d 3578 rorx r13d,r10d,25 3579 rorx r15d,r10d,11 3580 lea ecx,[r14*1+rcx] 3581 lea ebx,[r12*1+rbx] 3582 andn r12d,r10d,eax 3583 xor r13d,r15d 3584 rorx r14d,r10d,6 3585 lea ebx,[r12*1+rbx] 3586 xor r13d,r14d 3587 mov r15d,ecx 3588 rorx r12d,ecx,22 3589 lea ebx,[r13*1+rbx] 3590 xor r15d,edx 3591 rorx r14d,ecx,13 3592 rorx r13d,ecx,2 3593 lea r9d,[rbx*1+r9] 3594 and esi,r15d 3595 vpand xmm11,xmm11,xmm13 3596 vaesenc xmm9,xmm9,xmm10 3597 vmovdqu xmm10,XMMWORD[((224-128))+rdi] 3598 xor r14d,r12d 3599 xor esi,edx 3600 xor r14d,r13d 3601 lea ebx,[rsi*1+rbx] 3602 mov r12d,r10d 3603 add eax,DWORD[44+rsp] 3604 and r12d,r9d 3605 rorx r13d,r9d,25 3606 rorx esi,r9d,11 3607 lea ebx,[r14*1+rbx] 3608 lea eax,[r12*1+rax] 3609 andn r12d,r9d,r11d 3610 xor r13d,esi 3611 rorx r14d,r9d,6 3612 lea eax,[r12*1+rax] 3613 xor r13d,r14d 3614 mov esi,ebx 3615 rorx r12d,ebx,22 3616 lea eax,[r13*1+rax] 3617 xor esi,ecx 3618 rorx r14d,ebx,13 3619 rorx r13d,ebx,2 3620 lea r8d,[rax*1+r8] 3621 and r15d,esi 3622 vpor xmm8,xmm8,xmm11 3623 vaesenclast xmm11,xmm9,xmm10 3624 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 3625 xor r14d,r12d 3626 xor r15d,ecx 3627 xor r14d,r13d 3628 lea eax,[r15*1+rax] 3629 mov r12d,r9d 3630 vpextrq r12,xmm15,1 3631 vmovq r13,xmm15 3632 mov r15,QWORD[552+rsp] 3633 add eax,r14d 3634 lea rbp,[448+rsp] 3635 3636 vpand xmm11,xmm11,xmm14 3637 vpor xmm8,xmm8,xmm11 3638 vmovdqu XMMWORD[r13*1+r12],xmm8 3639 lea r13,[16+r13] 3640 3641 add eax,DWORD[r15] 3642 add ebx,DWORD[4+r15] 3643 add ecx,DWORD[8+r15] 3644 add edx,DWORD[12+r15] 3645 add r8d,DWORD[16+r15] 3646 add r9d,DWORD[20+r15] 3647 add r10d,DWORD[24+r15] 3648 add r11d,DWORD[28+r15] 3649 3650 mov DWORD[r15],eax 3651 mov DWORD[4+r15],ebx 3652 mov DWORD[8+r15],ecx 3653 mov DWORD[12+r15],edx 3654 mov DWORD[16+r15],r8d 3655 mov DWORD[20+r15],r9d 3656 mov DWORD[24+r15],r10d 3657 mov DWORD[28+r15],r11d 3658 3659 cmp r13,QWORD[80+rbp] 3660 je NEAR $L$done_avx2 3661 3662 xor r14d,r14d 3663 mov esi,ebx 3664 mov r12d,r9d 3665 xor esi,ecx 3666 jmp NEAR $L$ower_avx2 3667 ALIGN 16 3668 $L$ower_avx2: 3669 vmovdqu xmm9,XMMWORD[r13] 3670 vpinsrq xmm15,xmm15,r13,0 3671 add r11d,DWORD[((0+16))+rbp] 3672 and r12d,r8d 3673 rorx r13d,r8d,25 3674 rorx r15d,r8d,11 3675 lea eax,[r14*1+rax] 3676 lea r11d,[r12*1+r11] 3677 andn r12d,r8d,r10d 3678 xor r13d,r15d 3679 rorx r14d,r8d,6 3680 lea r11d,[r12*1+r11] 3681 xor r13d,r14d 3682 mov r15d,eax 3683 rorx r12d,eax,22 3684 lea r11d,[r13*1+r11] 3685 xor r15d,ebx 3686 rorx r14d,eax,13 3687 rorx r13d,eax,2 3688 lea edx,[r11*1+rdx] 3689 and esi,r15d 3690 vpxor xmm9,xmm9,xmm10 3691 vmovdqu xmm10,XMMWORD[((16-128))+rdi] 3692 xor r14d,r12d 3693 xor esi,ebx 3694 xor r14d,r13d 3695 lea r11d,[rsi*1+r11] 3696 mov r12d,r8d 3697 add r10d,DWORD[((4+16))+rbp] 3698 and r12d,edx 3699 rorx r13d,edx,25 3700 rorx esi,edx,11 3701 lea r11d,[r14*1+r11] 3702 lea r10d,[r12*1+r10] 3703 andn r12d,edx,r9d 3704 xor r13d,esi 3705 rorx r14d,edx,6 3706 lea r10d,[r12*1+r10] 3707 xor r13d,r14d 3708 mov esi,r11d 3709 rorx r12d,r11d,22 3710 lea r10d,[r13*1+r10] 3711 xor esi,eax 3712 rorx r14d,r11d,13 3713 rorx r13d,r11d,2 3714 lea ecx,[r10*1+rcx] 3715 and r15d,esi 3716 vpxor xmm9,xmm9,xmm8 3717 xor r14d,r12d 3718 xor r15d,eax 3719 xor r14d,r13d 3720 lea r10d,[r15*1+r10] 3721 mov r12d,edx 3722 add r9d,DWORD[((8+16))+rbp] 3723 and r12d,ecx 3724 rorx r13d,ecx,25 3725 rorx r15d,ecx,11 3726 lea r10d,[r14*1+r10] 3727 lea r9d,[r12*1+r9] 3728 andn r12d,ecx,r8d 3729 xor r13d,r15d 3730 rorx r14d,ecx,6 3731 lea r9d,[r12*1+r9] 3732 xor r13d,r14d 3733 mov r15d,r10d 3734 rorx r12d,r10d,22 3735 lea r9d,[r13*1+r9] 3736 xor r15d,r11d 3737 rorx r14d,r10d,13 3738 rorx r13d,r10d,2 3739 lea ebx,[r9*1+rbx] 3740 and esi,r15d 3741 vaesenc xmm9,xmm9,xmm10 3742 vmovdqu xmm10,XMMWORD[((32-128))+rdi] 3743 xor r14d,r12d 3744 xor esi,r11d 3745 xor r14d,r13d 3746 lea r9d,[rsi*1+r9] 3747 mov r12d,ecx 3748 add r8d,DWORD[((12+16))+rbp] 3749 and r12d,ebx 3750 rorx r13d,ebx,25 3751 rorx esi,ebx,11 3752 lea r9d,[r14*1+r9] 3753 lea r8d,[r12*1+r8] 3754 andn r12d,ebx,edx 3755 xor r13d,esi 3756 rorx r14d,ebx,6 3757 lea r8d,[r12*1+r8] 3758 xor r13d,r14d 3759 mov esi,r9d 3760 rorx r12d,r9d,22 3761 lea r8d,[r13*1+r8] 3762 xor esi,r10d 3763 rorx r14d,r9d,13 3764 rorx r13d,r9d,2 3765 lea eax,[r8*1+rax] 3766 and r15d,esi 3767 vaesenc xmm9,xmm9,xmm10 3768 vmovdqu xmm10,XMMWORD[((48-128))+rdi] 3769 xor r14d,r12d 3770 xor r15d,r10d 3771 xor r14d,r13d 3772 lea r8d,[r15*1+r8] 3773 mov r12d,ebx 3774 add edx,DWORD[((32+16))+rbp] 3775 and r12d,eax 3776 rorx r13d,eax,25 3777 rorx r15d,eax,11 3778 lea r8d,[r14*1+r8] 3779 lea edx,[r12*1+rdx] 3780 andn r12d,eax,ecx 3781 xor r13d,r15d 3782 rorx r14d,eax,6 3783 lea edx,[r12*1+rdx] 3784 xor r13d,r14d 3785 mov r15d,r8d 3786 rorx r12d,r8d,22 3787 lea edx,[r13*1+rdx] 3788 xor r15d,r9d 3789 rorx r14d,r8d,13 3790 rorx r13d,r8d,2 3791 lea r11d,[rdx*1+r11] 3792 and esi,r15d 3793 vaesenc xmm9,xmm9,xmm10 3794 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 3795 xor r14d,r12d 3796 xor esi,r9d 3797 xor r14d,r13d 3798 lea edx,[rsi*1+rdx] 3799 mov r12d,eax 3800 add ecx,DWORD[((36+16))+rbp] 3801 and r12d,r11d 3802 rorx r13d,r11d,25 3803 rorx esi,r11d,11 3804 lea edx,[r14*1+rdx] 3805 lea ecx,[r12*1+rcx] 3806 andn r12d,r11d,ebx 3807 xor r13d,esi 3808 rorx r14d,r11d,6 3809 lea ecx,[r12*1+rcx] 3810 xor r13d,r14d 3811 mov esi,edx 3812 rorx r12d,edx,22 3813 lea ecx,[r13*1+rcx] 3814 xor esi,r8d 3815 rorx r14d,edx,13 3816 rorx r13d,edx,2 3817 lea r10d,[rcx*1+r10] 3818 and r15d,esi 3819 vaesenc xmm9,xmm9,xmm10 3820 vmovdqu xmm10,XMMWORD[((80-128))+rdi] 3821 xor r14d,r12d 3822 xor r15d,r8d 3823 xor r14d,r13d 3824 lea ecx,[r15*1+rcx] 3825 mov r12d,r11d 3826 add ebx,DWORD[((40+16))+rbp] 3827 and r12d,r10d 3828 rorx r13d,r10d,25 3829 rorx r15d,r10d,11 3830 lea ecx,[r14*1+rcx] 3831 lea ebx,[r12*1+rbx] 3832 andn r12d,r10d,eax 3833 xor r13d,r15d 3834 rorx r14d,r10d,6 3835 lea ebx,[r12*1+rbx] 3836 xor r13d,r14d 3837 mov r15d,ecx 3838 rorx r12d,ecx,22 3839 lea ebx,[r13*1+rbx] 3840 xor r15d,edx 3841 rorx r14d,ecx,13 3842 rorx r13d,ecx,2 3843 lea r9d,[rbx*1+r9] 3844 and esi,r15d 3845 vaesenc xmm9,xmm9,xmm10 3846 vmovdqu xmm10,XMMWORD[((96-128))+rdi] 3847 xor r14d,r12d 3848 xor esi,edx 3849 xor r14d,r13d 3850 lea ebx,[rsi*1+rbx] 3851 mov r12d,r10d 3852 add eax,DWORD[((44+16))+rbp] 3853 and r12d,r9d 3854 rorx r13d,r9d,25 3855 rorx esi,r9d,11 3856 lea ebx,[r14*1+rbx] 3857 lea eax,[r12*1+rax] 3858 andn r12d,r9d,r11d 3859 xor r13d,esi 3860 rorx r14d,r9d,6 3861 lea eax,[r12*1+rax] 3862 xor r13d,r14d 3863 mov esi,ebx 3864 rorx r12d,ebx,22 3865 lea eax,[r13*1+rax] 3866 xor esi,ecx 3867 rorx r14d,ebx,13 3868 rorx r13d,ebx,2 3869 lea r8d,[rax*1+r8] 3870 and r15d,esi 3871 vaesenc xmm9,xmm9,xmm10 3872 vmovdqu xmm10,XMMWORD[((112-128))+rdi] 3873 xor r14d,r12d 3874 xor r15d,ecx 3875 xor r14d,r13d 3876 lea eax,[r15*1+rax] 3877 mov r12d,r9d 3878 lea rbp,[((-64))+rbp] 3879 add r11d,DWORD[((0+16))+rbp] 3880 and r12d,r8d 3881 rorx r13d,r8d,25 3882 rorx r15d,r8d,11 3883 lea eax,[r14*1+rax] 3884 lea r11d,[r12*1+r11] 3885 andn r12d,r8d,r10d 3886 xor r13d,r15d 3887 rorx r14d,r8d,6 3888 lea r11d,[r12*1+r11] 3889 xor r13d,r14d 3890 mov r15d,eax 3891 rorx r12d,eax,22 3892 lea r11d,[r13*1+r11] 3893 xor r15d,ebx 3894 rorx r14d,eax,13 3895 rorx r13d,eax,2 3896 lea edx,[r11*1+rdx] 3897 and esi,r15d 3898 vaesenc xmm9,xmm9,xmm10 3899 vmovdqu xmm10,XMMWORD[((128-128))+rdi] 3900 xor r14d,r12d 3901 xor esi,ebx 3902 xor r14d,r13d 3903 lea r11d,[rsi*1+r11] 3904 mov r12d,r8d 3905 add r10d,DWORD[((4+16))+rbp] 3906 and r12d,edx 3907 rorx r13d,edx,25 3908 rorx esi,edx,11 3909 lea r11d,[r14*1+r11] 3910 lea r10d,[r12*1+r10] 3911 andn r12d,edx,r9d 3912 xor r13d,esi 3913 rorx r14d,edx,6 3914 lea r10d,[r12*1+r10] 3915 xor r13d,r14d 3916 mov esi,r11d 3917 rorx r12d,r11d,22 3918 lea r10d,[r13*1+r10] 3919 xor esi,eax 3920 rorx r14d,r11d,13 3921 rorx r13d,r11d,2 3922 lea ecx,[r10*1+rcx] 3923 and r15d,esi 3924 vaesenc xmm9,xmm9,xmm10 3925 vmovdqu xmm10,XMMWORD[((144-128))+rdi] 3926 xor r14d,r12d 3927 xor r15d,eax 3928 xor r14d,r13d 3929 lea r10d,[r15*1+r10] 3930 mov r12d,edx 3931 add r9d,DWORD[((8+16))+rbp] 3932 and r12d,ecx 3933 rorx r13d,ecx,25 3934 rorx r15d,ecx,11 3935 lea r10d,[r14*1+r10] 3936 lea r9d,[r12*1+r9] 3937 andn r12d,ecx,r8d 3938 xor r13d,r15d 3939 rorx r14d,ecx,6 3940 lea r9d,[r12*1+r9] 3941 xor r13d,r14d 3942 mov r15d,r10d 3943 rorx r12d,r10d,22 3944 lea r9d,[r13*1+r9] 3945 xor r15d,r11d 3946 rorx r14d,r10d,13 3947 rorx r13d,r10d,2 3948 lea ebx,[r9*1+rbx] 3949 and esi,r15d 3950 vaesenc xmm9,xmm9,xmm10 3951 vmovdqu xmm10,XMMWORD[((160-128))+rdi] 3952 xor r14d,r12d 3953 xor esi,r11d 3954 xor r14d,r13d 3955 lea r9d,[rsi*1+r9] 3956 mov r12d,ecx 3957 add r8d,DWORD[((12+16))+rbp] 3958 and r12d,ebx 3959 rorx r13d,ebx,25 3960 rorx esi,ebx,11 3961 lea r9d,[r14*1+r9] 3962 lea r8d,[r12*1+r8] 3963 andn r12d,ebx,edx 3964 xor r13d,esi 3965 rorx r14d,ebx,6 3966 lea r8d,[r12*1+r8] 3967 xor r13d,r14d 3968 mov esi,r9d 3969 rorx r12d,r9d,22 3970 lea r8d,[r13*1+r8] 3971 xor esi,r10d 3972 rorx r14d,r9d,13 3973 rorx r13d,r9d,2 3974 lea eax,[r8*1+rax] 3975 and r15d,esi 3976 vaesenclast xmm11,xmm9,xmm10 3977 vaesenc xmm9,xmm9,xmm10 3978 vmovdqu xmm10,XMMWORD[((176-128))+rdi] 3979 xor r14d,r12d 3980 xor r15d,r10d 3981 xor r14d,r13d 3982 lea r8d,[r15*1+r8] 3983 mov r12d,ebx 3984 add edx,DWORD[((32+16))+rbp] 3985 and r12d,eax 3986 rorx r13d,eax,25 3987 rorx r15d,eax,11 3988 lea r8d,[r14*1+r8] 3989 lea edx,[r12*1+rdx] 3990 andn r12d,eax,ecx 3991 xor r13d,r15d 3992 rorx r14d,eax,6 3993 lea edx,[r12*1+rdx] 3994 xor r13d,r14d 3995 mov r15d,r8d 3996 rorx r12d,r8d,22 3997 lea edx,[r13*1+rdx] 3998 xor r15d,r9d 3999 rorx r14d,r8d,13 4000 rorx r13d,r8d,2 4001 lea r11d,[rdx*1+r11] 4002 and esi,r15d 4003 vpand xmm8,xmm11,xmm12 4004 vaesenc xmm9,xmm9,xmm10 4005 vmovdqu xmm10,XMMWORD[((192-128))+rdi] 4006 xor r14d,r12d 4007 xor esi,r9d 4008 xor r14d,r13d 4009 lea edx,[rsi*1+rdx] 4010 mov r12d,eax 4011 add ecx,DWORD[((36+16))+rbp] 4012 and r12d,r11d 4013 rorx r13d,r11d,25 4014 rorx esi,r11d,11 4015 lea edx,[r14*1+rdx] 4016 lea ecx,[r12*1+rcx] 4017 andn r12d,r11d,ebx 4018 xor r13d,esi 4019 rorx r14d,r11d,6 4020 lea ecx,[r12*1+rcx] 4021 xor r13d,r14d 4022 mov esi,edx 4023 rorx r12d,edx,22 4024 lea ecx,[r13*1+rcx] 4025 xor esi,r8d 4026 rorx r14d,edx,13 4027 rorx r13d,edx,2 4028 lea r10d,[rcx*1+r10] 4029 and r15d,esi 4030 vaesenclast xmm11,xmm9,xmm10 4031 vaesenc xmm9,xmm9,xmm10 4032 vmovdqu xmm10,XMMWORD[((208-128))+rdi] 4033 xor r14d,r12d 4034 xor r15d,r8d 4035 xor r14d,r13d 4036 lea ecx,[r15*1+rcx] 4037 mov r12d,r11d 4038 add ebx,DWORD[((40+16))+rbp] 4039 and r12d,r10d 4040 rorx r13d,r10d,25 4041 rorx r15d,r10d,11 4042 lea ecx,[r14*1+rcx] 4043 lea ebx,[r12*1+rbx] 4044 andn r12d,r10d,eax 4045 xor r13d,r15d 4046 rorx r14d,r10d,6 4047 lea ebx,[r12*1+rbx] 4048 xor r13d,r14d 4049 mov r15d,ecx 4050 rorx r12d,ecx,22 4051 lea ebx,[r13*1+rbx] 4052 xor r15d,edx 4053 rorx r14d,ecx,13 4054 rorx r13d,ecx,2 4055 lea r9d,[rbx*1+r9] 4056 and esi,r15d 4057 vpand xmm11,xmm11,xmm13 4058 vaesenc xmm9,xmm9,xmm10 4059 vmovdqu xmm10,XMMWORD[((224-128))+rdi] 4060 xor r14d,r12d 4061 xor esi,edx 4062 xor r14d,r13d 4063 lea ebx,[rsi*1+rbx] 4064 mov r12d,r10d 4065 add eax,DWORD[((44+16))+rbp] 4066 and r12d,r9d 4067 rorx r13d,r9d,25 4068 rorx esi,r9d,11 4069 lea ebx,[r14*1+rbx] 4070 lea eax,[r12*1+rax] 4071 andn r12d,r9d,r11d 4072 xor r13d,esi 4073 rorx r14d,r9d,6 4074 lea eax,[r12*1+rax] 4075 xor r13d,r14d 4076 mov esi,ebx 4077 rorx r12d,ebx,22 4078 lea eax,[r13*1+rax] 4079 xor esi,ecx 4080 rorx r14d,ebx,13 4081 rorx r13d,ebx,2 4082 lea r8d,[rax*1+r8] 4083 and r15d,esi 4084 vpor xmm8,xmm8,xmm11 4085 vaesenclast xmm11,xmm9,xmm10 4086 vmovdqu xmm10,XMMWORD[((0-128))+rdi] 4087 xor r14d,r12d 4088 xor r15d,ecx 4089 xor r14d,r13d 4090 lea eax,[r15*1+rax] 4091 mov r12d,r9d 4092 vmovq r13,xmm15 4093 vpextrq r15,xmm15,1 4094 vpand xmm11,xmm11,xmm14 4095 vpor xmm8,xmm8,xmm11 4096 lea rbp,[((-64))+rbp] 4097 vmovdqu XMMWORD[r13*1+r15],xmm8 4098 lea r13,[16+r13] 4099 cmp rbp,rsp 4100 jae NEAR $L$ower_avx2 4101 4102 mov r15,QWORD[552+rsp] 4103 lea r13,[64+r13] 4104 mov rsi,QWORD[560+rsp] 4105 add eax,r14d 4106 lea rsp,[448+rsp] 4107 4108 add eax,DWORD[r15] 4109 add ebx,DWORD[4+r15] 4110 add ecx,DWORD[8+r15] 4111 add edx,DWORD[12+r15] 4112 add r8d,DWORD[16+r15] 4113 add r9d,DWORD[20+r15] 4114 add r10d,DWORD[24+r15] 4115 lea r12,[r13*1+rsi] 4116 add r11d,DWORD[28+r15] 4117 4118 cmp r13,QWORD[((64+16))+rsp] 4119 4120 mov DWORD[r15],eax 4121 cmove r12,rsp 4122 mov DWORD[4+r15],ebx 4123 mov DWORD[8+r15],ecx 4124 mov DWORD[12+r15],edx 4125 mov DWORD[16+r15],r8d 4126 mov DWORD[20+r15],r9d 4127 mov DWORD[24+r15],r10d 4128 mov DWORD[28+r15],r11d 4129 4130 jbe NEAR $L$oop_avx2 4131 lea rbp,[rsp] 4132 4133 4134 4135 4136 $L$done_avx2: 4137 mov r8,QWORD[((64+32))+rbp] 4138 mov rsi,QWORD[((64+56))+rbp] 4139 4140 vmovdqu XMMWORD[r8],xmm8 4141 vzeroall 4142 movaps xmm6,XMMWORD[128+rbp] 4143 movaps xmm7,XMMWORD[144+rbp] 4144 movaps xmm8,XMMWORD[160+rbp] 4145 movaps xmm9,XMMWORD[176+rbp] 4146 movaps xmm10,XMMWORD[192+rbp] 4147 movaps xmm11,XMMWORD[208+rbp] 4148 movaps xmm12,XMMWORD[224+rbp] 4149 movaps xmm13,XMMWORD[240+rbp] 4150 movaps xmm14,XMMWORD[256+rbp] 4151 movaps xmm15,XMMWORD[272+rbp] 4152 mov r15,QWORD[((-48))+rsi] 4153 4154 mov r14,QWORD[((-40))+rsi] 4155 4156 mov r13,QWORD[((-32))+rsi] 4157 4158 mov r12,QWORD[((-24))+rsi] 4159 4160 mov rbp,QWORD[((-16))+rsi] 4161 4162 mov rbx,QWORD[((-8))+rsi] 4163 4164 lea rsp,[rsi] 4165 4166 $L$epilogue_avx2: 4167 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 4168 mov rsi,QWORD[16+rsp] 4169 DB 0F3h,0C3h ;repret 4170 4171 $L$SEH_end_aesni_cbc_sha256_enc_avx2: 4172 4173 ALIGN 32 4174 aesni_cbc_sha256_enc_shaext: 4175 mov QWORD[8+rsp],rdi ;WIN64 prologue 4176 mov QWORD[16+rsp],rsi 4177 mov rax,rsp 4178 $L$SEH_begin_aesni_cbc_sha256_enc_shaext: 4179 mov rdi,rcx 4180 mov rsi,rdx 4181 mov rdx,r8 4182 mov rcx,r9 4183 mov r8,QWORD[40+rsp] 4184 mov r9,QWORD[48+rsp] 4185 4186 4187 4188 mov r10,QWORD[56+rsp] 4189 lea rsp,[((-168))+rsp] 4190 movaps XMMWORD[(-8-160)+rax],xmm6 4191 movaps XMMWORD[(-8-144)+rax],xmm7 4192 movaps XMMWORD[(-8-128)+rax],xmm8 4193 movaps XMMWORD[(-8-112)+rax],xmm9 4194 movaps XMMWORD[(-8-96)+rax],xmm10 4195 movaps XMMWORD[(-8-80)+rax],xmm11 4196 movaps XMMWORD[(-8-64)+rax],xmm12 4197 movaps XMMWORD[(-8-48)+rax],xmm13 4198 movaps XMMWORD[(-8-32)+rax],xmm14 4199 movaps XMMWORD[(-8-16)+rax],xmm15 4200 $L$prologue_shaext: 4201 lea rax,[((K256+128))] 4202 movdqu xmm1,XMMWORD[r9] 4203 movdqu xmm2,XMMWORD[16+r9] 4204 movdqa xmm3,XMMWORD[((512-128))+rax] 4205 4206 mov r11d,DWORD[240+rcx] 4207 sub rsi,rdi 4208 movups xmm15,XMMWORD[rcx] 4209 movups xmm6,XMMWORD[r8] 4210 movups xmm4,XMMWORD[16+rcx] 4211 lea rcx,[112+rcx] 4212 4213 pshufd xmm0,xmm1,0x1b 4214 pshufd xmm1,xmm1,0xb1 4215 pshufd xmm2,xmm2,0x1b 4216 movdqa xmm7,xmm3 4217 DB 102,15,58,15,202,8 4218 punpcklqdq xmm2,xmm0 4219 4220 jmp NEAR $L$oop_shaext 4221 4222 ALIGN 16 4223 $L$oop_shaext: 4224 movdqu xmm10,XMMWORD[r10] 4225 movdqu xmm11,XMMWORD[16+r10] 4226 movdqu xmm12,XMMWORD[32+r10] 4227 DB 102,68,15,56,0,211 4228 movdqu xmm13,XMMWORD[48+r10] 4229 4230 movdqa xmm0,XMMWORD[((0-128))+rax] 4231 paddd xmm0,xmm10 4232 DB 102,68,15,56,0,219 4233 movdqa xmm9,xmm2 4234 movdqa xmm8,xmm1 4235 movups xmm14,XMMWORD[rdi] 4236 xorps xmm14,xmm15 4237 xorps xmm6,xmm14 4238 movups xmm5,XMMWORD[((-80))+rcx] 4239 aesenc xmm6,xmm4 4240 DB 15,56,203,209 4241 pshufd xmm0,xmm0,0x0e 4242 movups xmm4,XMMWORD[((-64))+rcx] 4243 aesenc xmm6,xmm5 4244 DB 15,56,203,202 4245 4246 movdqa xmm0,XMMWORD[((32-128))+rax] 4247 paddd xmm0,xmm11 4248 DB 102,68,15,56,0,227 4249 lea r10,[64+r10] 4250 movups xmm5,XMMWORD[((-48))+rcx] 4251 aesenc xmm6,xmm4 4252 DB 15,56,203,209 4253 pshufd xmm0,xmm0,0x0e 4254 movups xmm4,XMMWORD[((-32))+rcx] 4255 aesenc xmm6,xmm5 4256 DB 15,56,203,202 4257 4258 movdqa xmm0,XMMWORD[((64-128))+rax] 4259 paddd xmm0,xmm12 4260 DB 102,68,15,56,0,235 4261 DB 69,15,56,204,211 4262 movups xmm5,XMMWORD[((-16))+rcx] 4263 aesenc xmm6,xmm4 4264 DB 15,56,203,209 4265 pshufd xmm0,xmm0,0x0e 4266 movdqa xmm3,xmm13 4267 DB 102,65,15,58,15,220,4 4268 paddd xmm10,xmm3 4269 movups xmm4,XMMWORD[rcx] 4270 aesenc xmm6,xmm5 4271 DB 15,56,203,202 4272 4273 movdqa xmm0,XMMWORD[((96-128))+rax] 4274 paddd xmm0,xmm13 4275 DB 69,15,56,205,213 4276 DB 69,15,56,204,220 4277 movups xmm5,XMMWORD[16+rcx] 4278 aesenc xmm6,xmm4 4279 DB 15,56,203,209 4280 pshufd xmm0,xmm0,0x0e 4281 movups xmm4,XMMWORD[32+rcx] 4282 aesenc xmm6,xmm5 4283 movdqa xmm3,xmm10 4284 DB 102,65,15,58,15,221,4 4285 paddd xmm11,xmm3 4286 DB 15,56,203,202 4287 movdqa xmm0,XMMWORD[((128-128))+rax] 4288 paddd xmm0,xmm10 4289 DB 69,15,56,205,218 4290 DB 69,15,56,204,229 4291 movups xmm5,XMMWORD[48+rcx] 4292 aesenc xmm6,xmm4 4293 DB 15,56,203,209 4294 pshufd xmm0,xmm0,0x0e 4295 movdqa xmm3,xmm11 4296 DB 102,65,15,58,15,218,4 4297 paddd xmm12,xmm3 4298 cmp r11d,11 4299 jb NEAR $L$aesenclast1 4300 movups xmm4,XMMWORD[64+rcx] 4301 aesenc xmm6,xmm5 4302 movups xmm5,XMMWORD[80+rcx] 4303 aesenc xmm6,xmm4 4304 je NEAR $L$aesenclast1 4305 movups xmm4,XMMWORD[96+rcx] 4306 aesenc xmm6,xmm5 4307 movups xmm5,XMMWORD[112+rcx] 4308 aesenc xmm6,xmm4 4309 $L$aesenclast1: 4310 aesenclast xmm6,xmm5 4311 movups xmm4,XMMWORD[((16-112))+rcx] 4312 nop 4313 DB 15,56,203,202 4314 movups xmm14,XMMWORD[16+rdi] 4315 xorps xmm14,xmm15 4316 movups XMMWORD[rdi*1+rsi],xmm6 4317 xorps xmm6,xmm14 4318 movups xmm5,XMMWORD[((-80))+rcx] 4319 aesenc xmm6,xmm4 4320 movdqa xmm0,XMMWORD[((160-128))+rax] 4321 paddd xmm0,xmm11 4322 DB 69,15,56,205,227 4323 DB 69,15,56,204,234 4324 movups xmm4,XMMWORD[((-64))+rcx] 4325 aesenc xmm6,xmm5 4326 DB 15,56,203,209 4327 pshufd xmm0,xmm0,0x0e 4328 movdqa xmm3,xmm12 4329 DB 102,65,15,58,15,219,4 4330 paddd xmm13,xmm3 4331 movups xmm5,XMMWORD[((-48))+rcx] 4332 aesenc xmm6,xmm4 4333 DB 15,56,203,202 4334 movdqa xmm0,XMMWORD[((192-128))+rax] 4335 paddd xmm0,xmm12 4336 DB 69,15,56,205,236 4337 DB 69,15,56,204,211 4338 movups xmm4,XMMWORD[((-32))+rcx] 4339 aesenc xmm6,xmm5 4340 DB 15,56,203,209 4341 pshufd xmm0,xmm0,0x0e 4342 movdqa xmm3,xmm13 4343 DB 102,65,15,58,15,220,4 4344 paddd xmm10,xmm3 4345 movups xmm5,XMMWORD[((-16))+rcx] 4346 aesenc xmm6,xmm4 4347 DB 15,56,203,202 4348 movdqa xmm0,XMMWORD[((224-128))+rax] 4349 paddd xmm0,xmm13 4350 DB 69,15,56,205,213 4351 DB 69,15,56,204,220 4352 movups xmm4,XMMWORD[rcx] 4353 aesenc xmm6,xmm5 4354 DB 15,56,203,209 4355 pshufd xmm0,xmm0,0x0e 4356 movdqa xmm3,xmm10 4357 DB 102,65,15,58,15,221,4 4358 paddd xmm11,xmm3 4359 movups xmm5,XMMWORD[16+rcx] 4360 aesenc xmm6,xmm4 4361 DB 15,56,203,202 4362 movdqa xmm0,XMMWORD[((256-128))+rax] 4363 paddd xmm0,xmm10 4364 DB 69,15,56,205,218 4365 DB 69,15,56,204,229 4366 movups xmm4,XMMWORD[32+rcx] 4367 aesenc xmm6,xmm5 4368 DB 15,56,203,209 4369 pshufd xmm0,xmm0,0x0e 4370 movdqa xmm3,xmm11 4371 DB 102,65,15,58,15,218,4 4372 paddd xmm12,xmm3 4373 movups xmm5,XMMWORD[48+rcx] 4374 aesenc xmm6,xmm4 4375 cmp r11d,11 4376 jb NEAR $L$aesenclast2 4377 movups xmm4,XMMWORD[64+rcx] 4378 aesenc xmm6,xmm5 4379 movups xmm5,XMMWORD[80+rcx] 4380 aesenc xmm6,xmm4 4381 je NEAR $L$aesenclast2 4382 movups xmm4,XMMWORD[96+rcx] 4383 aesenc xmm6,xmm5 4384 movups xmm5,XMMWORD[112+rcx] 4385 aesenc xmm6,xmm4 4386 $L$aesenclast2: 4387 aesenclast xmm6,xmm5 4388 movups xmm4,XMMWORD[((16-112))+rcx] 4389 nop 4390 DB 15,56,203,202 4391 movups xmm14,XMMWORD[32+rdi] 4392 xorps xmm14,xmm15 4393 movups XMMWORD[16+rdi*1+rsi],xmm6 4394 xorps xmm6,xmm14 4395 movups xmm5,XMMWORD[((-80))+rcx] 4396 aesenc xmm6,xmm4 4397 movdqa xmm0,XMMWORD[((288-128))+rax] 4398 paddd xmm0,xmm11 4399 DB 69,15,56,205,227 4400 DB 69,15,56,204,234 4401 movups xmm4,XMMWORD[((-64))+rcx] 4402 aesenc xmm6,xmm5 4403 DB 15,56,203,209 4404 pshufd xmm0,xmm0,0x0e 4405 movdqa xmm3,xmm12 4406 DB 102,65,15,58,15,219,4 4407 paddd xmm13,xmm3 4408 movups xmm5,XMMWORD[((-48))+rcx] 4409 aesenc xmm6,xmm4 4410 DB 15,56,203,202 4411 movdqa xmm0,XMMWORD[((320-128))+rax] 4412 paddd xmm0,xmm12 4413 DB 69,15,56,205,236 4414 DB 69,15,56,204,211 4415 movups xmm4,XMMWORD[((-32))+rcx] 4416 aesenc xmm6,xmm5 4417 DB 15,56,203,209 4418 pshufd xmm0,xmm0,0x0e 4419 movdqa xmm3,xmm13 4420 DB 102,65,15,58,15,220,4 4421 paddd xmm10,xmm3 4422 movups xmm5,XMMWORD[((-16))+rcx] 4423 aesenc xmm6,xmm4 4424 DB 15,56,203,202 4425 movdqa xmm0,XMMWORD[((352-128))+rax] 4426 paddd xmm0,xmm13 4427 DB 69,15,56,205,213 4428 DB 69,15,56,204,220 4429 movups xmm4,XMMWORD[rcx] 4430 aesenc xmm6,xmm5 4431 DB 15,56,203,209 4432 pshufd xmm0,xmm0,0x0e 4433 movdqa xmm3,xmm10 4434 DB 102,65,15,58,15,221,4 4435 paddd xmm11,xmm3 4436 movups xmm5,XMMWORD[16+rcx] 4437 aesenc xmm6,xmm4 4438 DB 15,56,203,202 4439 movdqa xmm0,XMMWORD[((384-128))+rax] 4440 paddd xmm0,xmm10 4441 DB 69,15,56,205,218 4442 DB 69,15,56,204,229 4443 movups xmm4,XMMWORD[32+rcx] 4444 aesenc xmm6,xmm5 4445 DB 15,56,203,209 4446 pshufd xmm0,xmm0,0x0e 4447 movdqa xmm3,xmm11 4448 DB 102,65,15,58,15,218,4 4449 paddd xmm12,xmm3 4450 movups xmm5,XMMWORD[48+rcx] 4451 aesenc xmm6,xmm4 4452 DB 15,56,203,202 4453 movdqa xmm0,XMMWORD[((416-128))+rax] 4454 paddd xmm0,xmm11 4455 DB 69,15,56,205,227 4456 DB 69,15,56,204,234 4457 cmp r11d,11 4458 jb NEAR $L$aesenclast3 4459 movups xmm4,XMMWORD[64+rcx] 4460 aesenc xmm6,xmm5 4461 movups xmm5,XMMWORD[80+rcx] 4462 aesenc xmm6,xmm4 4463 je NEAR $L$aesenclast3 4464 movups xmm4,XMMWORD[96+rcx] 4465 aesenc xmm6,xmm5 4466 movups xmm5,XMMWORD[112+rcx] 4467 aesenc xmm6,xmm4 4468 $L$aesenclast3: 4469 aesenclast xmm6,xmm5 4470 movups xmm4,XMMWORD[((16-112))+rcx] 4471 nop 4472 DB 15,56,203,209 4473 pshufd xmm0,xmm0,0x0e 4474 movdqa xmm3,xmm12 4475 DB 102,65,15,58,15,219,4 4476 paddd xmm13,xmm3 4477 movups xmm14,XMMWORD[48+rdi] 4478 xorps xmm14,xmm15 4479 movups XMMWORD[32+rdi*1+rsi],xmm6 4480 xorps xmm6,xmm14 4481 movups xmm5,XMMWORD[((-80))+rcx] 4482 aesenc xmm6,xmm4 4483 movups xmm4,XMMWORD[((-64))+rcx] 4484 aesenc xmm6,xmm5 4485 DB 15,56,203,202 4486 4487 movdqa xmm0,XMMWORD[((448-128))+rax] 4488 paddd xmm0,xmm12 4489 DB 69,15,56,205,236 4490 movdqa xmm3,xmm7 4491 movups xmm5,XMMWORD[((-48))+rcx] 4492 aesenc xmm6,xmm4 4493 DB 15,56,203,209 4494 pshufd xmm0,xmm0,0x0e 4495 movups xmm4,XMMWORD[((-32))+rcx] 4496 aesenc xmm6,xmm5 4497 DB 15,56,203,202 4498 4499 movdqa xmm0,XMMWORD[((480-128))+rax] 4500 paddd xmm0,xmm13 4501 movups xmm5,XMMWORD[((-16))+rcx] 4502 aesenc xmm6,xmm4 4503 movups xmm4,XMMWORD[rcx] 4504 aesenc xmm6,xmm5 4505 DB 15,56,203,209 4506 pshufd xmm0,xmm0,0x0e 4507 movups xmm5,XMMWORD[16+rcx] 4508 aesenc xmm6,xmm4 4509 DB 15,56,203,202 4510 4511 movups xmm4,XMMWORD[32+rcx] 4512 aesenc xmm6,xmm5 4513 movups xmm5,XMMWORD[48+rcx] 4514 aesenc xmm6,xmm4 4515 cmp r11d,11 4516 jb NEAR $L$aesenclast4 4517 movups xmm4,XMMWORD[64+rcx] 4518 aesenc xmm6,xmm5 4519 movups xmm5,XMMWORD[80+rcx] 4520 aesenc xmm6,xmm4 4521 je NEAR $L$aesenclast4 4522 movups xmm4,XMMWORD[96+rcx] 4523 aesenc xmm6,xmm5 4524 movups xmm5,XMMWORD[112+rcx] 4525 aesenc xmm6,xmm4 4526 $L$aesenclast4: 4527 aesenclast xmm6,xmm5 4528 movups xmm4,XMMWORD[((16-112))+rcx] 4529 nop 4530 4531 paddd xmm2,xmm9 4532 paddd xmm1,xmm8 4533 4534 dec rdx 4535 movups XMMWORD[48+rdi*1+rsi],xmm6 4536 lea rdi,[64+rdi] 4537 jnz NEAR $L$oop_shaext 4538 4539 pshufd xmm2,xmm2,0xb1 4540 pshufd xmm3,xmm1,0x1b 4541 pshufd xmm1,xmm1,0xb1 4542 punpckhqdq xmm1,xmm2 4543 DB 102,15,58,15,211,8 4544 4545 movups XMMWORD[r8],xmm6 4546 movdqu XMMWORD[r9],xmm1 4547 movdqu XMMWORD[16+r9],xmm2 4548 movaps xmm6,XMMWORD[rsp] 4549 movaps xmm7,XMMWORD[16+rsp] 4550 movaps xmm8,XMMWORD[32+rsp] 4551 movaps xmm9,XMMWORD[48+rsp] 4552 movaps xmm10,XMMWORD[64+rsp] 4553 movaps xmm11,XMMWORD[80+rsp] 4554 movaps xmm12,XMMWORD[96+rsp] 4555 movaps xmm13,XMMWORD[112+rsp] 4556 movaps xmm14,XMMWORD[128+rsp] 4557 movaps xmm15,XMMWORD[144+rsp] 4558 lea rsp,[((8+160))+rsp] 4559 $L$epilogue_shaext: 4560 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 4561 mov rsi,QWORD[16+rsp] 4562 DB 0F3h,0C3h ;repret 4563 4564 $L$SEH_end_aesni_cbc_sha256_enc_shaext: 4565 EXTERN __imp_RtlVirtualUnwind 4566 4567 ALIGN 16 4568 se_handler: 4569 push rsi 4570 push rdi 4571 push rbx 4572 push rbp 4573 push r12 4574 push r13 4575 push r14 4576 push r15 4577 pushfq 4578 sub rsp,64 4579 4580 mov rax,QWORD[120+r8] 4581 mov rbx,QWORD[248+r8] 4582 4583 mov rsi,QWORD[8+r9] 4584 mov r11,QWORD[56+r9] 4585 4586 mov r10d,DWORD[r11] 4587 lea r10,[r10*1+rsi] 4588 cmp rbx,r10 4589 jb NEAR $L$in_prologue 4590 4591 mov rax,QWORD[152+r8] 4592 4593 mov r10d,DWORD[4+r11] 4594 lea r10,[r10*1+rsi] 4595 cmp rbx,r10 4596 jae NEAR $L$in_prologue 4597 lea r10,[aesni_cbc_sha256_enc_shaext] 4598 cmp rbx,r10 4599 jb NEAR $L$not_in_shaext 4600 4601 lea rsi,[rax] 4602 lea rdi,[512+r8] 4603 mov ecx,20 4604 DD 0xa548f3fc 4605 lea rax,[168+rax] 4606 jmp NEAR $L$in_prologue 4607 $L$not_in_shaext: 4608 lea r10,[$L$avx2_shortcut] 4609 cmp rbx,r10 4610 jb NEAR $L$not_in_avx2 4611 4612 and rax,-256*4 4613 add rax,448 4614 $L$not_in_avx2: 4615 mov rsi,rax 4616 mov rax,QWORD[((64+56))+rax] 4617 4618 mov rbx,QWORD[((-8))+rax] 4619 mov rbp,QWORD[((-16))+rax] 4620 mov r12,QWORD[((-24))+rax] 4621 mov r13,QWORD[((-32))+rax] 4622 mov r14,QWORD[((-40))+rax] 4623 mov r15,QWORD[((-48))+rax] 4624 mov QWORD[144+r8],rbx 4625 mov QWORD[160+r8],rbp 4626 mov QWORD[216+r8],r12 4627 mov QWORD[224+r8],r13 4628 mov QWORD[232+r8],r14 4629 mov QWORD[240+r8],r15 4630 4631 lea rsi,[((64+64))+rsi] 4632 lea rdi,[512+r8] 4633 mov ecx,20 4634 DD 0xa548f3fc 4635 4636 $L$in_prologue: 4637 mov rdi,QWORD[8+rax] 4638 mov rsi,QWORD[16+rax] 4639 mov QWORD[152+r8],rax 4640 mov QWORD[168+r8],rsi 4641 mov QWORD[176+r8],rdi 4642 4643 mov rdi,QWORD[40+r9] 4644 mov rsi,r8 4645 mov ecx,154 4646 DD 0xa548f3fc 4647 4648 mov rsi,r9 4649 xor rcx,rcx 4650 mov rdx,QWORD[8+rsi] 4651 mov r8,QWORD[rsi] 4652 mov r9,QWORD[16+rsi] 4653 mov r10,QWORD[40+rsi] 4654 lea r11,[56+rsi] 4655 lea r12,[24+rsi] 4656 mov QWORD[32+rsp],r10 4657 mov QWORD[40+rsp],r11 4658 mov QWORD[48+rsp],r12 4659 mov QWORD[56+rsp],rcx 4660 call QWORD[__imp_RtlVirtualUnwind] 4661 4662 mov eax,1 4663 add rsp,64 4664 popfq 4665 pop r15 4666 pop r14 4667 pop r13 4668 pop r12 4669 pop rbp 4670 pop rbx 4671 pop rdi 4672 pop rsi 4673 DB 0F3h,0C3h ;repret 4674 4675 4676 section .pdata rdata align=4 4677 DD $L$SEH_begin_aesni_cbc_sha256_enc_xop wrt ..imagebase 4678 DD $L$SEH_end_aesni_cbc_sha256_enc_xop wrt ..imagebase 4679 DD $L$SEH_info_aesni_cbc_sha256_enc_xop wrt ..imagebase 4680 4681 DD $L$SEH_begin_aesni_cbc_sha256_enc_avx wrt ..imagebase 4682 DD $L$SEH_end_aesni_cbc_sha256_enc_avx wrt ..imagebase 4683 DD $L$SEH_info_aesni_cbc_sha256_enc_avx wrt ..imagebase 4684 DD $L$SEH_begin_aesni_cbc_sha256_enc_avx2 wrt ..imagebase 4685 DD $L$SEH_end_aesni_cbc_sha256_enc_avx2 wrt ..imagebase 4686 DD $L$SEH_info_aesni_cbc_sha256_enc_avx2 wrt ..imagebase 4687 DD $L$SEH_begin_aesni_cbc_sha256_enc_shaext wrt ..imagebase 4688 DD $L$SEH_end_aesni_cbc_sha256_enc_shaext wrt ..imagebase 4689 DD $L$SEH_info_aesni_cbc_sha256_enc_shaext wrt ..imagebase 4690 section .xdata rdata align=8 4691 ALIGN 8 4692 $L$SEH_info_aesni_cbc_sha256_enc_xop: 4693 DB 9,0,0,0 4694 DD se_handler wrt ..imagebase 4695 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase 4696 4697 $L$SEH_info_aesni_cbc_sha256_enc_avx: 4698 DB 9,0,0,0 4699 DD se_handler wrt ..imagebase 4700 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase 4701 $L$SEH_info_aesni_cbc_sha256_enc_avx2: 4702 DB 9,0,0,0 4703 DD se_handler wrt ..imagebase 4704 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase 4705 $L$SEH_info_aesni_cbc_sha256_enc_shaext: 4706 DB 9,0,0,0 4707 DD se_handler wrt ..imagebase 4708 DD $L$prologue_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/chacha-x86_64.S
r97373 r99371 62 62 je NEAR $L$no_data 63 63 mov r10,QWORD[((OPENSSL_ia32cap_P+4))] 64 bt r10,48 65 jc NEAR $L$ChaCha20_avx512 66 test r10,r10 67 js NEAR $L$ChaCha20_avx512vl 64 68 test r10d,512 65 69 jnz NEAR $L$ChaCha20_ssse3 … … 357 361 mov r9,rsp 358 362 363 test r10d,2048 364 jnz NEAR $L$ChaCha20_4xop 359 365 cmp rdx,128 360 366 je NEAR $L$ChaCha20_128 … … 696 702 697 703 mov r11,r10 704 shr r10,32 705 test r10,32 706 jnz NEAR $L$ChaCha20_8x 698 707 cmp rdx,192 699 708 ja NEAR $L$proceed4x … … 1260 1269 1261 1270 $L$SEH_end_ChaCha20_4x: 1271 1272 ALIGN 32 1273 ChaCha20_4xop: 1274 mov QWORD[8+rsp],rdi ;WIN64 prologue 1275 mov QWORD[16+rsp],rsi 1276 mov rax,rsp 1277 $L$SEH_begin_ChaCha20_4xop: 1278 mov rdi,rcx 1279 mov rsi,rdx 1280 mov rdx,r8 1281 mov rcx,r9 1282 mov r8,QWORD[40+rsp] 1283 1284 1285 1286 $L$ChaCha20_4xop: 1287 mov r9,rsp 1288 1289 sub rsp,0x140+168 1290 movaps XMMWORD[(-168)+r9],xmm6 1291 movaps XMMWORD[(-152)+r9],xmm7 1292 movaps XMMWORD[(-136)+r9],xmm8 1293 movaps XMMWORD[(-120)+r9],xmm9 1294 movaps XMMWORD[(-104)+r9],xmm10 1295 movaps XMMWORD[(-88)+r9],xmm11 1296 movaps XMMWORD[(-72)+r9],xmm12 1297 movaps XMMWORD[(-56)+r9],xmm13 1298 movaps XMMWORD[(-40)+r9],xmm14 1299 movaps XMMWORD[(-24)+r9],xmm15 1300 $L$4xop_body: 1301 vzeroupper 1302 1303 vmovdqa xmm11,XMMWORD[$L$sigma] 1304 vmovdqu xmm3,XMMWORD[rcx] 1305 vmovdqu xmm15,XMMWORD[16+rcx] 1306 vmovdqu xmm7,XMMWORD[r8] 1307 lea rcx,[256+rsp] 1308 1309 vpshufd xmm8,xmm11,0x00 1310 vpshufd xmm9,xmm11,0x55 1311 vmovdqa XMMWORD[64+rsp],xmm8 1312 vpshufd xmm10,xmm11,0xaa 1313 vmovdqa XMMWORD[80+rsp],xmm9 1314 vpshufd xmm11,xmm11,0xff 1315 vmovdqa XMMWORD[96+rsp],xmm10 1316 vmovdqa XMMWORD[112+rsp],xmm11 1317 1318 vpshufd xmm0,xmm3,0x00 1319 vpshufd xmm1,xmm3,0x55 1320 vmovdqa XMMWORD[(128-256)+rcx],xmm0 1321 vpshufd xmm2,xmm3,0xaa 1322 vmovdqa XMMWORD[(144-256)+rcx],xmm1 1323 vpshufd xmm3,xmm3,0xff 1324 vmovdqa XMMWORD[(160-256)+rcx],xmm2 1325 vmovdqa XMMWORD[(176-256)+rcx],xmm3 1326 1327 vpshufd xmm12,xmm15,0x00 1328 vpshufd xmm13,xmm15,0x55 1329 vmovdqa XMMWORD[(192-256)+rcx],xmm12 1330 vpshufd xmm14,xmm15,0xaa 1331 vmovdqa XMMWORD[(208-256)+rcx],xmm13 1332 vpshufd xmm15,xmm15,0xff 1333 vmovdqa XMMWORD[(224-256)+rcx],xmm14 1334 vmovdqa XMMWORD[(240-256)+rcx],xmm15 1335 1336 vpshufd xmm4,xmm7,0x00 1337 vpshufd xmm5,xmm7,0x55 1338 vpaddd xmm4,xmm4,XMMWORD[$L$inc] 1339 vpshufd xmm6,xmm7,0xaa 1340 vmovdqa XMMWORD[(272-256)+rcx],xmm5 1341 vpshufd xmm7,xmm7,0xff 1342 vmovdqa XMMWORD[(288-256)+rcx],xmm6 1343 vmovdqa XMMWORD[(304-256)+rcx],xmm7 1344 1345 jmp NEAR $L$oop_enter4xop 1346 1347 ALIGN 32 1348 $L$oop_outer4xop: 1349 vmovdqa xmm8,XMMWORD[64+rsp] 1350 vmovdqa xmm9,XMMWORD[80+rsp] 1351 vmovdqa xmm10,XMMWORD[96+rsp] 1352 vmovdqa xmm11,XMMWORD[112+rsp] 1353 vmovdqa xmm0,XMMWORD[((128-256))+rcx] 1354 vmovdqa xmm1,XMMWORD[((144-256))+rcx] 1355 vmovdqa xmm2,XMMWORD[((160-256))+rcx] 1356 vmovdqa xmm3,XMMWORD[((176-256))+rcx] 1357 vmovdqa xmm12,XMMWORD[((192-256))+rcx] 1358 vmovdqa xmm13,XMMWORD[((208-256))+rcx] 1359 vmovdqa xmm14,XMMWORD[((224-256))+rcx] 1360 vmovdqa xmm15,XMMWORD[((240-256))+rcx] 1361 vmovdqa xmm4,XMMWORD[((256-256))+rcx] 1362 vmovdqa xmm5,XMMWORD[((272-256))+rcx] 1363 vmovdqa xmm6,XMMWORD[((288-256))+rcx] 1364 vmovdqa xmm7,XMMWORD[((304-256))+rcx] 1365 vpaddd xmm4,xmm4,XMMWORD[$L$four] 1366 1367 $L$oop_enter4xop: 1368 mov eax,10 1369 vmovdqa XMMWORD[(256-256)+rcx],xmm4 1370 jmp NEAR $L$oop4xop 1371 1372 ALIGN 32 1373 $L$oop4xop: 1374 vpaddd xmm8,xmm8,xmm0 1375 vpaddd xmm9,xmm9,xmm1 1376 vpaddd xmm10,xmm10,xmm2 1377 vpaddd xmm11,xmm11,xmm3 1378 vpxor xmm4,xmm8,xmm4 1379 vpxor xmm5,xmm9,xmm5 1380 vpxor xmm6,xmm10,xmm6 1381 vpxor xmm7,xmm11,xmm7 1382 DB 143,232,120,194,228,16 1383 DB 143,232,120,194,237,16 1384 DB 143,232,120,194,246,16 1385 DB 143,232,120,194,255,16 1386 vpaddd xmm12,xmm12,xmm4 1387 vpaddd xmm13,xmm13,xmm5 1388 vpaddd xmm14,xmm14,xmm6 1389 vpaddd xmm15,xmm15,xmm7 1390 vpxor xmm0,xmm12,xmm0 1391 vpxor xmm1,xmm13,xmm1 1392 vpxor xmm2,xmm2,xmm14 1393 vpxor xmm3,xmm3,xmm15 1394 DB 143,232,120,194,192,12 1395 DB 143,232,120,194,201,12 1396 DB 143,232,120,194,210,12 1397 DB 143,232,120,194,219,12 1398 vpaddd xmm8,xmm0,xmm8 1399 vpaddd xmm9,xmm1,xmm9 1400 vpaddd xmm10,xmm10,xmm2 1401 vpaddd xmm11,xmm11,xmm3 1402 vpxor xmm4,xmm8,xmm4 1403 vpxor xmm5,xmm9,xmm5 1404 vpxor xmm6,xmm10,xmm6 1405 vpxor xmm7,xmm11,xmm7 1406 DB 143,232,120,194,228,8 1407 DB 143,232,120,194,237,8 1408 DB 143,232,120,194,246,8 1409 DB 143,232,120,194,255,8 1410 vpaddd xmm12,xmm12,xmm4 1411 vpaddd xmm13,xmm13,xmm5 1412 vpaddd xmm14,xmm14,xmm6 1413 vpaddd xmm15,xmm15,xmm7 1414 vpxor xmm0,xmm12,xmm0 1415 vpxor xmm1,xmm13,xmm1 1416 vpxor xmm2,xmm2,xmm14 1417 vpxor xmm3,xmm3,xmm15 1418 DB 143,232,120,194,192,7 1419 DB 143,232,120,194,201,7 1420 DB 143,232,120,194,210,7 1421 DB 143,232,120,194,219,7 1422 vpaddd xmm8,xmm8,xmm1 1423 vpaddd xmm9,xmm9,xmm2 1424 vpaddd xmm10,xmm10,xmm3 1425 vpaddd xmm11,xmm11,xmm0 1426 vpxor xmm7,xmm8,xmm7 1427 vpxor xmm4,xmm9,xmm4 1428 vpxor xmm5,xmm10,xmm5 1429 vpxor xmm6,xmm11,xmm6 1430 DB 143,232,120,194,255,16 1431 DB 143,232,120,194,228,16 1432 DB 143,232,120,194,237,16 1433 DB 143,232,120,194,246,16 1434 vpaddd xmm14,xmm14,xmm7 1435 vpaddd xmm15,xmm15,xmm4 1436 vpaddd xmm12,xmm12,xmm5 1437 vpaddd xmm13,xmm13,xmm6 1438 vpxor xmm1,xmm14,xmm1 1439 vpxor xmm2,xmm15,xmm2 1440 vpxor xmm3,xmm3,xmm12 1441 vpxor xmm0,xmm0,xmm13 1442 DB 143,232,120,194,201,12 1443 DB 143,232,120,194,210,12 1444 DB 143,232,120,194,219,12 1445 DB 143,232,120,194,192,12 1446 vpaddd xmm8,xmm1,xmm8 1447 vpaddd xmm9,xmm2,xmm9 1448 vpaddd xmm10,xmm10,xmm3 1449 vpaddd xmm11,xmm11,xmm0 1450 vpxor xmm7,xmm8,xmm7 1451 vpxor xmm4,xmm9,xmm4 1452 vpxor xmm5,xmm10,xmm5 1453 vpxor xmm6,xmm11,xmm6 1454 DB 143,232,120,194,255,8 1455 DB 143,232,120,194,228,8 1456 DB 143,232,120,194,237,8 1457 DB 143,232,120,194,246,8 1458 vpaddd xmm14,xmm14,xmm7 1459 vpaddd xmm15,xmm15,xmm4 1460 vpaddd xmm12,xmm12,xmm5 1461 vpaddd xmm13,xmm13,xmm6 1462 vpxor xmm1,xmm14,xmm1 1463 vpxor xmm2,xmm15,xmm2 1464 vpxor xmm3,xmm3,xmm12 1465 vpxor xmm0,xmm0,xmm13 1466 DB 143,232,120,194,201,7 1467 DB 143,232,120,194,210,7 1468 DB 143,232,120,194,219,7 1469 DB 143,232,120,194,192,7 1470 dec eax 1471 jnz NEAR $L$oop4xop 1472 1473 vpaddd xmm8,xmm8,XMMWORD[64+rsp] 1474 vpaddd xmm9,xmm9,XMMWORD[80+rsp] 1475 vpaddd xmm10,xmm10,XMMWORD[96+rsp] 1476 vpaddd xmm11,xmm11,XMMWORD[112+rsp] 1477 1478 vmovdqa XMMWORD[32+rsp],xmm14 1479 vmovdqa XMMWORD[48+rsp],xmm15 1480 1481 vpunpckldq xmm14,xmm8,xmm9 1482 vpunpckldq xmm15,xmm10,xmm11 1483 vpunpckhdq xmm8,xmm8,xmm9 1484 vpunpckhdq xmm10,xmm10,xmm11 1485 vpunpcklqdq xmm9,xmm14,xmm15 1486 vpunpckhqdq xmm14,xmm14,xmm15 1487 vpunpcklqdq xmm11,xmm8,xmm10 1488 vpunpckhqdq xmm8,xmm8,xmm10 1489 vpaddd xmm0,xmm0,XMMWORD[((128-256))+rcx] 1490 vpaddd xmm1,xmm1,XMMWORD[((144-256))+rcx] 1491 vpaddd xmm2,xmm2,XMMWORD[((160-256))+rcx] 1492 vpaddd xmm3,xmm3,XMMWORD[((176-256))+rcx] 1493 1494 vmovdqa XMMWORD[rsp],xmm9 1495 vmovdqa XMMWORD[16+rsp],xmm14 1496 vmovdqa xmm9,XMMWORD[32+rsp] 1497 vmovdqa xmm14,XMMWORD[48+rsp] 1498 1499 vpunpckldq xmm10,xmm0,xmm1 1500 vpunpckldq xmm15,xmm2,xmm3 1501 vpunpckhdq xmm0,xmm0,xmm1 1502 vpunpckhdq xmm2,xmm2,xmm3 1503 vpunpcklqdq xmm1,xmm10,xmm15 1504 vpunpckhqdq xmm10,xmm10,xmm15 1505 vpunpcklqdq xmm3,xmm0,xmm2 1506 vpunpckhqdq xmm0,xmm0,xmm2 1507 vpaddd xmm12,xmm12,XMMWORD[((192-256))+rcx] 1508 vpaddd xmm13,xmm13,XMMWORD[((208-256))+rcx] 1509 vpaddd xmm9,xmm9,XMMWORD[((224-256))+rcx] 1510 vpaddd xmm14,xmm14,XMMWORD[((240-256))+rcx] 1511 1512 vpunpckldq xmm2,xmm12,xmm13 1513 vpunpckldq xmm15,xmm9,xmm14 1514 vpunpckhdq xmm12,xmm12,xmm13 1515 vpunpckhdq xmm9,xmm9,xmm14 1516 vpunpcklqdq xmm13,xmm2,xmm15 1517 vpunpckhqdq xmm2,xmm2,xmm15 1518 vpunpcklqdq xmm14,xmm12,xmm9 1519 vpunpckhqdq xmm12,xmm12,xmm9 1520 vpaddd xmm4,xmm4,XMMWORD[((256-256))+rcx] 1521 vpaddd xmm5,xmm5,XMMWORD[((272-256))+rcx] 1522 vpaddd xmm6,xmm6,XMMWORD[((288-256))+rcx] 1523 vpaddd xmm7,xmm7,XMMWORD[((304-256))+rcx] 1524 1525 vpunpckldq xmm9,xmm4,xmm5 1526 vpunpckldq xmm15,xmm6,xmm7 1527 vpunpckhdq xmm4,xmm4,xmm5 1528 vpunpckhdq xmm6,xmm6,xmm7 1529 vpunpcklqdq xmm5,xmm9,xmm15 1530 vpunpckhqdq xmm9,xmm9,xmm15 1531 vpunpcklqdq xmm7,xmm4,xmm6 1532 vpunpckhqdq xmm4,xmm4,xmm6 1533 vmovdqa xmm6,XMMWORD[rsp] 1534 vmovdqa xmm15,XMMWORD[16+rsp] 1535 1536 cmp rdx,64*4 1537 jb NEAR $L$tail4xop 1538 1539 vpxor xmm6,xmm6,XMMWORD[rsi] 1540 vpxor xmm1,xmm1,XMMWORD[16+rsi] 1541 vpxor xmm13,xmm13,XMMWORD[32+rsi] 1542 vpxor xmm5,xmm5,XMMWORD[48+rsi] 1543 vpxor xmm15,xmm15,XMMWORD[64+rsi] 1544 vpxor xmm10,xmm10,XMMWORD[80+rsi] 1545 vpxor xmm2,xmm2,XMMWORD[96+rsi] 1546 vpxor xmm9,xmm9,XMMWORD[112+rsi] 1547 lea rsi,[128+rsi] 1548 vpxor xmm11,xmm11,XMMWORD[rsi] 1549 vpxor xmm3,xmm3,XMMWORD[16+rsi] 1550 vpxor xmm14,xmm14,XMMWORD[32+rsi] 1551 vpxor xmm7,xmm7,XMMWORD[48+rsi] 1552 vpxor xmm8,xmm8,XMMWORD[64+rsi] 1553 vpxor xmm0,xmm0,XMMWORD[80+rsi] 1554 vpxor xmm12,xmm12,XMMWORD[96+rsi] 1555 vpxor xmm4,xmm4,XMMWORD[112+rsi] 1556 lea rsi,[128+rsi] 1557 1558 vmovdqu XMMWORD[rdi],xmm6 1559 vmovdqu XMMWORD[16+rdi],xmm1 1560 vmovdqu XMMWORD[32+rdi],xmm13 1561 vmovdqu XMMWORD[48+rdi],xmm5 1562 vmovdqu XMMWORD[64+rdi],xmm15 1563 vmovdqu XMMWORD[80+rdi],xmm10 1564 vmovdqu XMMWORD[96+rdi],xmm2 1565 vmovdqu XMMWORD[112+rdi],xmm9 1566 lea rdi,[128+rdi] 1567 vmovdqu XMMWORD[rdi],xmm11 1568 vmovdqu XMMWORD[16+rdi],xmm3 1569 vmovdqu XMMWORD[32+rdi],xmm14 1570 vmovdqu XMMWORD[48+rdi],xmm7 1571 vmovdqu XMMWORD[64+rdi],xmm8 1572 vmovdqu XMMWORD[80+rdi],xmm0 1573 vmovdqu XMMWORD[96+rdi],xmm12 1574 vmovdqu XMMWORD[112+rdi],xmm4 1575 lea rdi,[128+rdi] 1576 1577 sub rdx,64*4 1578 jnz NEAR $L$oop_outer4xop 1579 1580 jmp NEAR $L$done4xop 1581 1582 ALIGN 32 1583 $L$tail4xop: 1584 cmp rdx,192 1585 jae NEAR $L$192_or_more4xop 1586 cmp rdx,128 1587 jae NEAR $L$128_or_more4xop 1588 cmp rdx,64 1589 jae NEAR $L$64_or_more4xop 1590 1591 xor r10,r10 1592 vmovdqa XMMWORD[rsp],xmm6 1593 vmovdqa XMMWORD[16+rsp],xmm1 1594 vmovdqa XMMWORD[32+rsp],xmm13 1595 vmovdqa XMMWORD[48+rsp],xmm5 1596 jmp NEAR $L$oop_tail4xop 1597 1598 ALIGN 32 1599 $L$64_or_more4xop: 1600 vpxor xmm6,xmm6,XMMWORD[rsi] 1601 vpxor xmm1,xmm1,XMMWORD[16+rsi] 1602 vpxor xmm13,xmm13,XMMWORD[32+rsi] 1603 vpxor xmm5,xmm5,XMMWORD[48+rsi] 1604 vmovdqu XMMWORD[rdi],xmm6 1605 vmovdqu XMMWORD[16+rdi],xmm1 1606 vmovdqu XMMWORD[32+rdi],xmm13 1607 vmovdqu XMMWORD[48+rdi],xmm5 1608 je NEAR $L$done4xop 1609 1610 lea rsi,[64+rsi] 1611 vmovdqa XMMWORD[rsp],xmm15 1612 xor r10,r10 1613 vmovdqa XMMWORD[16+rsp],xmm10 1614 lea rdi,[64+rdi] 1615 vmovdqa XMMWORD[32+rsp],xmm2 1616 sub rdx,64 1617 vmovdqa XMMWORD[48+rsp],xmm9 1618 jmp NEAR $L$oop_tail4xop 1619 1620 ALIGN 32 1621 $L$128_or_more4xop: 1622 vpxor xmm6,xmm6,XMMWORD[rsi] 1623 vpxor xmm1,xmm1,XMMWORD[16+rsi] 1624 vpxor xmm13,xmm13,XMMWORD[32+rsi] 1625 vpxor xmm5,xmm5,XMMWORD[48+rsi] 1626 vpxor xmm15,xmm15,XMMWORD[64+rsi] 1627 vpxor xmm10,xmm10,XMMWORD[80+rsi] 1628 vpxor xmm2,xmm2,XMMWORD[96+rsi] 1629 vpxor xmm9,xmm9,XMMWORD[112+rsi] 1630 1631 vmovdqu XMMWORD[rdi],xmm6 1632 vmovdqu XMMWORD[16+rdi],xmm1 1633 vmovdqu XMMWORD[32+rdi],xmm13 1634 vmovdqu XMMWORD[48+rdi],xmm5 1635 vmovdqu XMMWORD[64+rdi],xmm15 1636 vmovdqu XMMWORD[80+rdi],xmm10 1637 vmovdqu XMMWORD[96+rdi],xmm2 1638 vmovdqu XMMWORD[112+rdi],xmm9 1639 je NEAR $L$done4xop 1640 1641 lea rsi,[128+rsi] 1642 vmovdqa XMMWORD[rsp],xmm11 1643 xor r10,r10 1644 vmovdqa XMMWORD[16+rsp],xmm3 1645 lea rdi,[128+rdi] 1646 vmovdqa XMMWORD[32+rsp],xmm14 1647 sub rdx,128 1648 vmovdqa XMMWORD[48+rsp],xmm7 1649 jmp NEAR $L$oop_tail4xop 1650 1651 ALIGN 32 1652 $L$192_or_more4xop: 1653 vpxor xmm6,xmm6,XMMWORD[rsi] 1654 vpxor xmm1,xmm1,XMMWORD[16+rsi] 1655 vpxor xmm13,xmm13,XMMWORD[32+rsi] 1656 vpxor xmm5,xmm5,XMMWORD[48+rsi] 1657 vpxor xmm15,xmm15,XMMWORD[64+rsi] 1658 vpxor xmm10,xmm10,XMMWORD[80+rsi] 1659 vpxor xmm2,xmm2,XMMWORD[96+rsi] 1660 vpxor xmm9,xmm9,XMMWORD[112+rsi] 1661 lea rsi,[128+rsi] 1662 vpxor xmm11,xmm11,XMMWORD[rsi] 1663 vpxor xmm3,xmm3,XMMWORD[16+rsi] 1664 vpxor xmm14,xmm14,XMMWORD[32+rsi] 1665 vpxor xmm7,xmm7,XMMWORD[48+rsi] 1666 1667 vmovdqu XMMWORD[rdi],xmm6 1668 vmovdqu XMMWORD[16+rdi],xmm1 1669 vmovdqu XMMWORD[32+rdi],xmm13 1670 vmovdqu XMMWORD[48+rdi],xmm5 1671 vmovdqu XMMWORD[64+rdi],xmm15 1672 vmovdqu XMMWORD[80+rdi],xmm10 1673 vmovdqu XMMWORD[96+rdi],xmm2 1674 vmovdqu XMMWORD[112+rdi],xmm9 1675 lea rdi,[128+rdi] 1676 vmovdqu XMMWORD[rdi],xmm11 1677 vmovdqu XMMWORD[16+rdi],xmm3 1678 vmovdqu XMMWORD[32+rdi],xmm14 1679 vmovdqu XMMWORD[48+rdi],xmm7 1680 je NEAR $L$done4xop 1681 1682 lea rsi,[64+rsi] 1683 vmovdqa XMMWORD[rsp],xmm8 1684 xor r10,r10 1685 vmovdqa XMMWORD[16+rsp],xmm0 1686 lea rdi,[64+rdi] 1687 vmovdqa XMMWORD[32+rsp],xmm12 1688 sub rdx,192 1689 vmovdqa XMMWORD[48+rsp],xmm4 1690 1691 $L$oop_tail4xop: 1692 movzx eax,BYTE[r10*1+rsi] 1693 movzx ecx,BYTE[r10*1+rsp] 1694 lea r10,[1+r10] 1695 xor eax,ecx 1696 mov BYTE[((-1))+r10*1+rdi],al 1697 dec rdx 1698 jnz NEAR $L$oop_tail4xop 1699 1700 $L$done4xop: 1701 vzeroupper 1702 movaps xmm6,XMMWORD[((-168))+r9] 1703 movaps xmm7,XMMWORD[((-152))+r9] 1704 movaps xmm8,XMMWORD[((-136))+r9] 1705 movaps xmm9,XMMWORD[((-120))+r9] 1706 movaps xmm10,XMMWORD[((-104))+r9] 1707 movaps xmm11,XMMWORD[((-88))+r9] 1708 movaps xmm12,XMMWORD[((-72))+r9] 1709 movaps xmm13,XMMWORD[((-56))+r9] 1710 movaps xmm14,XMMWORD[((-40))+r9] 1711 movaps xmm15,XMMWORD[((-24))+r9] 1712 lea rsp,[r9] 1713 1714 $L$4xop_epilogue: 1715 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1716 mov rsi,QWORD[16+rsp] 1717 DB 0F3h,0C3h ;repret 1718 1719 $L$SEH_end_ChaCha20_4xop: 1720 1721 ALIGN 32 1722 ChaCha20_8x: 1723 mov QWORD[8+rsp],rdi ;WIN64 prologue 1724 mov QWORD[16+rsp],rsi 1725 mov rax,rsp 1726 $L$SEH_begin_ChaCha20_8x: 1727 mov rdi,rcx 1728 mov rsi,rdx 1729 mov rdx,r8 1730 mov rcx,r9 1731 mov r8,QWORD[40+rsp] 1732 1733 1734 1735 $L$ChaCha20_8x: 1736 mov r9,rsp 1737 1738 sub rsp,0x280+168 1739 and rsp,-32 1740 movaps XMMWORD[(-168)+r9],xmm6 1741 movaps XMMWORD[(-152)+r9],xmm7 1742 movaps XMMWORD[(-136)+r9],xmm8 1743 movaps XMMWORD[(-120)+r9],xmm9 1744 movaps XMMWORD[(-104)+r9],xmm10 1745 movaps XMMWORD[(-88)+r9],xmm11 1746 movaps XMMWORD[(-72)+r9],xmm12 1747 movaps XMMWORD[(-56)+r9],xmm13 1748 movaps XMMWORD[(-40)+r9],xmm14 1749 movaps XMMWORD[(-24)+r9],xmm15 1750 $L$8x_body: 1751 vzeroupper 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 vbroadcasti128 ymm11,XMMWORD[$L$sigma] 1763 vbroadcasti128 ymm3,XMMWORD[rcx] 1764 vbroadcasti128 ymm15,XMMWORD[16+rcx] 1765 vbroadcasti128 ymm7,XMMWORD[r8] 1766 lea rcx,[256+rsp] 1767 lea rax,[512+rsp] 1768 lea r10,[$L$rot16] 1769 lea r11,[$L$rot24] 1770 1771 vpshufd ymm8,ymm11,0x00 1772 vpshufd ymm9,ymm11,0x55 1773 vmovdqa YMMWORD[(128-256)+rcx],ymm8 1774 vpshufd ymm10,ymm11,0xaa 1775 vmovdqa YMMWORD[(160-256)+rcx],ymm9 1776 vpshufd ymm11,ymm11,0xff 1777 vmovdqa YMMWORD[(192-256)+rcx],ymm10 1778 vmovdqa YMMWORD[(224-256)+rcx],ymm11 1779 1780 vpshufd ymm0,ymm3,0x00 1781 vpshufd ymm1,ymm3,0x55 1782 vmovdqa YMMWORD[(256-256)+rcx],ymm0 1783 vpshufd ymm2,ymm3,0xaa 1784 vmovdqa YMMWORD[(288-256)+rcx],ymm1 1785 vpshufd ymm3,ymm3,0xff 1786 vmovdqa YMMWORD[(320-256)+rcx],ymm2 1787 vmovdqa YMMWORD[(352-256)+rcx],ymm3 1788 1789 vpshufd ymm12,ymm15,0x00 1790 vpshufd ymm13,ymm15,0x55 1791 vmovdqa YMMWORD[(384-512)+rax],ymm12 1792 vpshufd ymm14,ymm15,0xaa 1793 vmovdqa YMMWORD[(416-512)+rax],ymm13 1794 vpshufd ymm15,ymm15,0xff 1795 vmovdqa YMMWORD[(448-512)+rax],ymm14 1796 vmovdqa YMMWORD[(480-512)+rax],ymm15 1797 1798 vpshufd ymm4,ymm7,0x00 1799 vpshufd ymm5,ymm7,0x55 1800 vpaddd ymm4,ymm4,YMMWORD[$L$incy] 1801 vpshufd ymm6,ymm7,0xaa 1802 vmovdqa YMMWORD[(544-512)+rax],ymm5 1803 vpshufd ymm7,ymm7,0xff 1804 vmovdqa YMMWORD[(576-512)+rax],ymm6 1805 vmovdqa YMMWORD[(608-512)+rax],ymm7 1806 1807 jmp NEAR $L$oop_enter8x 1808 1809 ALIGN 32 1810 $L$oop_outer8x: 1811 vmovdqa ymm8,YMMWORD[((128-256))+rcx] 1812 vmovdqa ymm9,YMMWORD[((160-256))+rcx] 1813 vmovdqa ymm10,YMMWORD[((192-256))+rcx] 1814 vmovdqa ymm11,YMMWORD[((224-256))+rcx] 1815 vmovdqa ymm0,YMMWORD[((256-256))+rcx] 1816 vmovdqa ymm1,YMMWORD[((288-256))+rcx] 1817 vmovdqa ymm2,YMMWORD[((320-256))+rcx] 1818 vmovdqa ymm3,YMMWORD[((352-256))+rcx] 1819 vmovdqa ymm12,YMMWORD[((384-512))+rax] 1820 vmovdqa ymm13,YMMWORD[((416-512))+rax] 1821 vmovdqa ymm14,YMMWORD[((448-512))+rax] 1822 vmovdqa ymm15,YMMWORD[((480-512))+rax] 1823 vmovdqa ymm4,YMMWORD[((512-512))+rax] 1824 vmovdqa ymm5,YMMWORD[((544-512))+rax] 1825 vmovdqa ymm6,YMMWORD[((576-512))+rax] 1826 vmovdqa ymm7,YMMWORD[((608-512))+rax] 1827 vpaddd ymm4,ymm4,YMMWORD[$L$eight] 1828 1829 $L$oop_enter8x: 1830 vmovdqa YMMWORD[64+rsp],ymm14 1831 vmovdqa YMMWORD[96+rsp],ymm15 1832 vbroadcasti128 ymm15,XMMWORD[r10] 1833 vmovdqa YMMWORD[(512-512)+rax],ymm4 1834 mov eax,10 1835 jmp NEAR $L$oop8x 1836 1837 ALIGN 32 1838 $L$oop8x: 1839 vpaddd ymm8,ymm8,ymm0 1840 vpxor ymm4,ymm8,ymm4 1841 vpshufb ymm4,ymm4,ymm15 1842 vpaddd ymm9,ymm9,ymm1 1843 vpxor ymm5,ymm9,ymm5 1844 vpshufb ymm5,ymm5,ymm15 1845 vpaddd ymm12,ymm12,ymm4 1846 vpxor ymm0,ymm12,ymm0 1847 vpslld ymm14,ymm0,12 1848 vpsrld ymm0,ymm0,20 1849 vpor ymm0,ymm14,ymm0 1850 vbroadcasti128 ymm14,XMMWORD[r11] 1851 vpaddd ymm13,ymm13,ymm5 1852 vpxor ymm1,ymm13,ymm1 1853 vpslld ymm15,ymm1,12 1854 vpsrld ymm1,ymm1,20 1855 vpor ymm1,ymm15,ymm1 1856 vpaddd ymm8,ymm8,ymm0 1857 vpxor ymm4,ymm8,ymm4 1858 vpshufb ymm4,ymm4,ymm14 1859 vpaddd ymm9,ymm9,ymm1 1860 vpxor ymm5,ymm9,ymm5 1861 vpshufb ymm5,ymm5,ymm14 1862 vpaddd ymm12,ymm12,ymm4 1863 vpxor ymm0,ymm12,ymm0 1864 vpslld ymm15,ymm0,7 1865 vpsrld ymm0,ymm0,25 1866 vpor ymm0,ymm15,ymm0 1867 vbroadcasti128 ymm15,XMMWORD[r10] 1868 vpaddd ymm13,ymm13,ymm5 1869 vpxor ymm1,ymm13,ymm1 1870 vpslld ymm14,ymm1,7 1871 vpsrld ymm1,ymm1,25 1872 vpor ymm1,ymm14,ymm1 1873 vmovdqa YMMWORD[rsp],ymm12 1874 vmovdqa YMMWORD[32+rsp],ymm13 1875 vmovdqa ymm12,YMMWORD[64+rsp] 1876 vmovdqa ymm13,YMMWORD[96+rsp] 1877 vpaddd ymm10,ymm10,ymm2 1878 vpxor ymm6,ymm10,ymm6 1879 vpshufb ymm6,ymm6,ymm15 1880 vpaddd ymm11,ymm11,ymm3 1881 vpxor ymm7,ymm11,ymm7 1882 vpshufb ymm7,ymm7,ymm15 1883 vpaddd ymm12,ymm12,ymm6 1884 vpxor ymm2,ymm12,ymm2 1885 vpslld ymm14,ymm2,12 1886 vpsrld ymm2,ymm2,20 1887 vpor ymm2,ymm14,ymm2 1888 vbroadcasti128 ymm14,XMMWORD[r11] 1889 vpaddd ymm13,ymm13,ymm7 1890 vpxor ymm3,ymm13,ymm3 1891 vpslld ymm15,ymm3,12 1892 vpsrld ymm3,ymm3,20 1893 vpor ymm3,ymm15,ymm3 1894 vpaddd ymm10,ymm10,ymm2 1895 vpxor ymm6,ymm10,ymm6 1896 vpshufb ymm6,ymm6,ymm14 1897 vpaddd ymm11,ymm11,ymm3 1898 vpxor ymm7,ymm11,ymm7 1899 vpshufb ymm7,ymm7,ymm14 1900 vpaddd ymm12,ymm12,ymm6 1901 vpxor ymm2,ymm12,ymm2 1902 vpslld ymm15,ymm2,7 1903 vpsrld ymm2,ymm2,25 1904 vpor ymm2,ymm15,ymm2 1905 vbroadcasti128 ymm15,XMMWORD[r10] 1906 vpaddd ymm13,ymm13,ymm7 1907 vpxor ymm3,ymm13,ymm3 1908 vpslld ymm14,ymm3,7 1909 vpsrld ymm3,ymm3,25 1910 vpor ymm3,ymm14,ymm3 1911 vpaddd ymm8,ymm8,ymm1 1912 vpxor ymm7,ymm8,ymm7 1913 vpshufb ymm7,ymm7,ymm15 1914 vpaddd ymm9,ymm9,ymm2 1915 vpxor ymm4,ymm9,ymm4 1916 vpshufb ymm4,ymm4,ymm15 1917 vpaddd ymm12,ymm12,ymm7 1918 vpxor ymm1,ymm12,ymm1 1919 vpslld ymm14,ymm1,12 1920 vpsrld ymm1,ymm1,20 1921 vpor ymm1,ymm14,ymm1 1922 vbroadcasti128 ymm14,XMMWORD[r11] 1923 vpaddd ymm13,ymm13,ymm4 1924 vpxor ymm2,ymm13,ymm2 1925 vpslld ymm15,ymm2,12 1926 vpsrld ymm2,ymm2,20 1927 vpor ymm2,ymm15,ymm2 1928 vpaddd ymm8,ymm8,ymm1 1929 vpxor ymm7,ymm8,ymm7 1930 vpshufb ymm7,ymm7,ymm14 1931 vpaddd ymm9,ymm9,ymm2 1932 vpxor ymm4,ymm9,ymm4 1933 vpshufb ymm4,ymm4,ymm14 1934 vpaddd ymm12,ymm12,ymm7 1935 vpxor ymm1,ymm12,ymm1 1936 vpslld ymm15,ymm1,7 1937 vpsrld ymm1,ymm1,25 1938 vpor ymm1,ymm15,ymm1 1939 vbroadcasti128 ymm15,XMMWORD[r10] 1940 vpaddd ymm13,ymm13,ymm4 1941 vpxor ymm2,ymm13,ymm2 1942 vpslld ymm14,ymm2,7 1943 vpsrld ymm2,ymm2,25 1944 vpor ymm2,ymm14,ymm2 1945 vmovdqa YMMWORD[64+rsp],ymm12 1946 vmovdqa YMMWORD[96+rsp],ymm13 1947 vmovdqa ymm12,YMMWORD[rsp] 1948 vmovdqa ymm13,YMMWORD[32+rsp] 1949 vpaddd ymm10,ymm10,ymm3 1950 vpxor ymm5,ymm10,ymm5 1951 vpshufb ymm5,ymm5,ymm15 1952 vpaddd ymm11,ymm11,ymm0 1953 vpxor ymm6,ymm11,ymm6 1954 vpshufb ymm6,ymm6,ymm15 1955 vpaddd ymm12,ymm12,ymm5 1956 vpxor ymm3,ymm12,ymm3 1957 vpslld ymm14,ymm3,12 1958 vpsrld ymm3,ymm3,20 1959 vpor ymm3,ymm14,ymm3 1960 vbroadcasti128 ymm14,XMMWORD[r11] 1961 vpaddd ymm13,ymm13,ymm6 1962 vpxor ymm0,ymm13,ymm0 1963 vpslld ymm15,ymm0,12 1964 vpsrld ymm0,ymm0,20 1965 vpor ymm0,ymm15,ymm0 1966 vpaddd ymm10,ymm10,ymm3 1967 vpxor ymm5,ymm10,ymm5 1968 vpshufb ymm5,ymm5,ymm14 1969 vpaddd ymm11,ymm11,ymm0 1970 vpxor ymm6,ymm11,ymm6 1971 vpshufb ymm6,ymm6,ymm14 1972 vpaddd ymm12,ymm12,ymm5 1973 vpxor ymm3,ymm12,ymm3 1974 vpslld ymm15,ymm3,7 1975 vpsrld ymm3,ymm3,25 1976 vpor ymm3,ymm15,ymm3 1977 vbroadcasti128 ymm15,XMMWORD[r10] 1978 vpaddd ymm13,ymm13,ymm6 1979 vpxor ymm0,ymm13,ymm0 1980 vpslld ymm14,ymm0,7 1981 vpsrld ymm0,ymm0,25 1982 vpor ymm0,ymm14,ymm0 1983 dec eax 1984 jnz NEAR $L$oop8x 1985 1986 lea rax,[512+rsp] 1987 vpaddd ymm8,ymm8,YMMWORD[((128-256))+rcx] 1988 vpaddd ymm9,ymm9,YMMWORD[((160-256))+rcx] 1989 vpaddd ymm10,ymm10,YMMWORD[((192-256))+rcx] 1990 vpaddd ymm11,ymm11,YMMWORD[((224-256))+rcx] 1991 1992 vpunpckldq ymm14,ymm8,ymm9 1993 vpunpckldq ymm15,ymm10,ymm11 1994 vpunpckhdq ymm8,ymm8,ymm9 1995 vpunpckhdq ymm10,ymm10,ymm11 1996 vpunpcklqdq ymm9,ymm14,ymm15 1997 vpunpckhqdq ymm14,ymm14,ymm15 1998 vpunpcklqdq ymm11,ymm8,ymm10 1999 vpunpckhqdq ymm8,ymm8,ymm10 2000 vpaddd ymm0,ymm0,YMMWORD[((256-256))+rcx] 2001 vpaddd ymm1,ymm1,YMMWORD[((288-256))+rcx] 2002 vpaddd ymm2,ymm2,YMMWORD[((320-256))+rcx] 2003 vpaddd ymm3,ymm3,YMMWORD[((352-256))+rcx] 2004 2005 vpunpckldq ymm10,ymm0,ymm1 2006 vpunpckldq ymm15,ymm2,ymm3 2007 vpunpckhdq ymm0,ymm0,ymm1 2008 vpunpckhdq ymm2,ymm2,ymm3 2009 vpunpcklqdq ymm1,ymm10,ymm15 2010 vpunpckhqdq ymm10,ymm10,ymm15 2011 vpunpcklqdq ymm3,ymm0,ymm2 2012 vpunpckhqdq ymm0,ymm0,ymm2 2013 vperm2i128 ymm15,ymm9,ymm1,0x20 2014 vperm2i128 ymm1,ymm9,ymm1,0x31 2015 vperm2i128 ymm9,ymm14,ymm10,0x20 2016 vperm2i128 ymm10,ymm14,ymm10,0x31 2017 vperm2i128 ymm14,ymm11,ymm3,0x20 2018 vperm2i128 ymm3,ymm11,ymm3,0x31 2019 vperm2i128 ymm11,ymm8,ymm0,0x20 2020 vperm2i128 ymm0,ymm8,ymm0,0x31 2021 vmovdqa YMMWORD[rsp],ymm15 2022 vmovdqa YMMWORD[32+rsp],ymm9 2023 vmovdqa ymm15,YMMWORD[64+rsp] 2024 vmovdqa ymm9,YMMWORD[96+rsp] 2025 2026 vpaddd ymm12,ymm12,YMMWORD[((384-512))+rax] 2027 vpaddd ymm13,ymm13,YMMWORD[((416-512))+rax] 2028 vpaddd ymm15,ymm15,YMMWORD[((448-512))+rax] 2029 vpaddd ymm9,ymm9,YMMWORD[((480-512))+rax] 2030 2031 vpunpckldq ymm2,ymm12,ymm13 2032 vpunpckldq ymm8,ymm15,ymm9 2033 vpunpckhdq ymm12,ymm12,ymm13 2034 vpunpckhdq ymm15,ymm15,ymm9 2035 vpunpcklqdq ymm13,ymm2,ymm8 2036 vpunpckhqdq ymm2,ymm2,ymm8 2037 vpunpcklqdq ymm9,ymm12,ymm15 2038 vpunpckhqdq ymm12,ymm12,ymm15 2039 vpaddd ymm4,ymm4,YMMWORD[((512-512))+rax] 2040 vpaddd ymm5,ymm5,YMMWORD[((544-512))+rax] 2041 vpaddd ymm6,ymm6,YMMWORD[((576-512))+rax] 2042 vpaddd ymm7,ymm7,YMMWORD[((608-512))+rax] 2043 2044 vpunpckldq ymm15,ymm4,ymm5 2045 vpunpckldq ymm8,ymm6,ymm7 2046 vpunpckhdq ymm4,ymm4,ymm5 2047 vpunpckhdq ymm6,ymm6,ymm7 2048 vpunpcklqdq ymm5,ymm15,ymm8 2049 vpunpckhqdq ymm15,ymm15,ymm8 2050 vpunpcklqdq ymm7,ymm4,ymm6 2051 vpunpckhqdq ymm4,ymm4,ymm6 2052 vperm2i128 ymm8,ymm13,ymm5,0x20 2053 vperm2i128 ymm5,ymm13,ymm5,0x31 2054 vperm2i128 ymm13,ymm2,ymm15,0x20 2055 vperm2i128 ymm15,ymm2,ymm15,0x31 2056 vperm2i128 ymm2,ymm9,ymm7,0x20 2057 vperm2i128 ymm7,ymm9,ymm7,0x31 2058 vperm2i128 ymm9,ymm12,ymm4,0x20 2059 vperm2i128 ymm4,ymm12,ymm4,0x31 2060 vmovdqa ymm6,YMMWORD[rsp] 2061 vmovdqa ymm12,YMMWORD[32+rsp] 2062 2063 cmp rdx,64*8 2064 jb NEAR $L$tail8x 2065 2066 vpxor ymm6,ymm6,YMMWORD[rsi] 2067 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2068 vpxor ymm1,ymm1,YMMWORD[64+rsi] 2069 vpxor ymm5,ymm5,YMMWORD[96+rsi] 2070 lea rsi,[128+rsi] 2071 vmovdqu YMMWORD[rdi],ymm6 2072 vmovdqu YMMWORD[32+rdi],ymm8 2073 vmovdqu YMMWORD[64+rdi],ymm1 2074 vmovdqu YMMWORD[96+rdi],ymm5 2075 lea rdi,[128+rdi] 2076 2077 vpxor ymm12,ymm12,YMMWORD[rsi] 2078 vpxor ymm13,ymm13,YMMWORD[32+rsi] 2079 vpxor ymm10,ymm10,YMMWORD[64+rsi] 2080 vpxor ymm15,ymm15,YMMWORD[96+rsi] 2081 lea rsi,[128+rsi] 2082 vmovdqu YMMWORD[rdi],ymm12 2083 vmovdqu YMMWORD[32+rdi],ymm13 2084 vmovdqu YMMWORD[64+rdi],ymm10 2085 vmovdqu YMMWORD[96+rdi],ymm15 2086 lea rdi,[128+rdi] 2087 2088 vpxor ymm14,ymm14,YMMWORD[rsi] 2089 vpxor ymm2,ymm2,YMMWORD[32+rsi] 2090 vpxor ymm3,ymm3,YMMWORD[64+rsi] 2091 vpxor ymm7,ymm7,YMMWORD[96+rsi] 2092 lea rsi,[128+rsi] 2093 vmovdqu YMMWORD[rdi],ymm14 2094 vmovdqu YMMWORD[32+rdi],ymm2 2095 vmovdqu YMMWORD[64+rdi],ymm3 2096 vmovdqu YMMWORD[96+rdi],ymm7 2097 lea rdi,[128+rdi] 2098 2099 vpxor ymm11,ymm11,YMMWORD[rsi] 2100 vpxor ymm9,ymm9,YMMWORD[32+rsi] 2101 vpxor ymm0,ymm0,YMMWORD[64+rsi] 2102 vpxor ymm4,ymm4,YMMWORD[96+rsi] 2103 lea rsi,[128+rsi] 2104 vmovdqu YMMWORD[rdi],ymm11 2105 vmovdqu YMMWORD[32+rdi],ymm9 2106 vmovdqu YMMWORD[64+rdi],ymm0 2107 vmovdqu YMMWORD[96+rdi],ymm4 2108 lea rdi,[128+rdi] 2109 2110 sub rdx,64*8 2111 jnz NEAR $L$oop_outer8x 2112 2113 jmp NEAR $L$done8x 2114 2115 $L$tail8x: 2116 cmp rdx,448 2117 jae NEAR $L$448_or_more8x 2118 cmp rdx,384 2119 jae NEAR $L$384_or_more8x 2120 cmp rdx,320 2121 jae NEAR $L$320_or_more8x 2122 cmp rdx,256 2123 jae NEAR $L$256_or_more8x 2124 cmp rdx,192 2125 jae NEAR $L$192_or_more8x 2126 cmp rdx,128 2127 jae NEAR $L$128_or_more8x 2128 cmp rdx,64 2129 jae NEAR $L$64_or_more8x 2130 2131 xor r10,r10 2132 vmovdqa YMMWORD[rsp],ymm6 2133 vmovdqa YMMWORD[32+rsp],ymm8 2134 jmp NEAR $L$oop_tail8x 2135 2136 ALIGN 32 2137 $L$64_or_more8x: 2138 vpxor ymm6,ymm6,YMMWORD[rsi] 2139 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2140 vmovdqu YMMWORD[rdi],ymm6 2141 vmovdqu YMMWORD[32+rdi],ymm8 2142 je NEAR $L$done8x 2143 2144 lea rsi,[64+rsi] 2145 xor r10,r10 2146 vmovdqa YMMWORD[rsp],ymm1 2147 lea rdi,[64+rdi] 2148 sub rdx,64 2149 vmovdqa YMMWORD[32+rsp],ymm5 2150 jmp NEAR $L$oop_tail8x 2151 2152 ALIGN 32 2153 $L$128_or_more8x: 2154 vpxor ymm6,ymm6,YMMWORD[rsi] 2155 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2156 vpxor ymm1,ymm1,YMMWORD[64+rsi] 2157 vpxor ymm5,ymm5,YMMWORD[96+rsi] 2158 vmovdqu YMMWORD[rdi],ymm6 2159 vmovdqu YMMWORD[32+rdi],ymm8 2160 vmovdqu YMMWORD[64+rdi],ymm1 2161 vmovdqu YMMWORD[96+rdi],ymm5 2162 je NEAR $L$done8x 2163 2164 lea rsi,[128+rsi] 2165 xor r10,r10 2166 vmovdqa YMMWORD[rsp],ymm12 2167 lea rdi,[128+rdi] 2168 sub rdx,128 2169 vmovdqa YMMWORD[32+rsp],ymm13 2170 jmp NEAR $L$oop_tail8x 2171 2172 ALIGN 32 2173 $L$192_or_more8x: 2174 vpxor ymm6,ymm6,YMMWORD[rsi] 2175 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2176 vpxor ymm1,ymm1,YMMWORD[64+rsi] 2177 vpxor ymm5,ymm5,YMMWORD[96+rsi] 2178 vpxor ymm12,ymm12,YMMWORD[128+rsi] 2179 vpxor ymm13,ymm13,YMMWORD[160+rsi] 2180 vmovdqu YMMWORD[rdi],ymm6 2181 vmovdqu YMMWORD[32+rdi],ymm8 2182 vmovdqu YMMWORD[64+rdi],ymm1 2183 vmovdqu YMMWORD[96+rdi],ymm5 2184 vmovdqu YMMWORD[128+rdi],ymm12 2185 vmovdqu YMMWORD[160+rdi],ymm13 2186 je NEAR $L$done8x 2187 2188 lea rsi,[192+rsi] 2189 xor r10,r10 2190 vmovdqa YMMWORD[rsp],ymm10 2191 lea rdi,[192+rdi] 2192 sub rdx,192 2193 vmovdqa YMMWORD[32+rsp],ymm15 2194 jmp NEAR $L$oop_tail8x 2195 2196 ALIGN 32 2197 $L$256_or_more8x: 2198 vpxor ymm6,ymm6,YMMWORD[rsi] 2199 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2200 vpxor ymm1,ymm1,YMMWORD[64+rsi] 2201 vpxor ymm5,ymm5,YMMWORD[96+rsi] 2202 vpxor ymm12,ymm12,YMMWORD[128+rsi] 2203 vpxor ymm13,ymm13,YMMWORD[160+rsi] 2204 vpxor ymm10,ymm10,YMMWORD[192+rsi] 2205 vpxor ymm15,ymm15,YMMWORD[224+rsi] 2206 vmovdqu YMMWORD[rdi],ymm6 2207 vmovdqu YMMWORD[32+rdi],ymm8 2208 vmovdqu YMMWORD[64+rdi],ymm1 2209 vmovdqu YMMWORD[96+rdi],ymm5 2210 vmovdqu YMMWORD[128+rdi],ymm12 2211 vmovdqu YMMWORD[160+rdi],ymm13 2212 vmovdqu YMMWORD[192+rdi],ymm10 2213 vmovdqu YMMWORD[224+rdi],ymm15 2214 je NEAR $L$done8x 2215 2216 lea rsi,[256+rsi] 2217 xor r10,r10 2218 vmovdqa YMMWORD[rsp],ymm14 2219 lea rdi,[256+rdi] 2220 sub rdx,256 2221 vmovdqa YMMWORD[32+rsp],ymm2 2222 jmp NEAR $L$oop_tail8x 2223 2224 ALIGN 32 2225 $L$320_or_more8x: 2226 vpxor ymm6,ymm6,YMMWORD[rsi] 2227 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2228 vpxor ymm1,ymm1,YMMWORD[64+rsi] 2229 vpxor ymm5,ymm5,YMMWORD[96+rsi] 2230 vpxor ymm12,ymm12,YMMWORD[128+rsi] 2231 vpxor ymm13,ymm13,YMMWORD[160+rsi] 2232 vpxor ymm10,ymm10,YMMWORD[192+rsi] 2233 vpxor ymm15,ymm15,YMMWORD[224+rsi] 2234 vpxor ymm14,ymm14,YMMWORD[256+rsi] 2235 vpxor ymm2,ymm2,YMMWORD[288+rsi] 2236 vmovdqu YMMWORD[rdi],ymm6 2237 vmovdqu YMMWORD[32+rdi],ymm8 2238 vmovdqu YMMWORD[64+rdi],ymm1 2239 vmovdqu YMMWORD[96+rdi],ymm5 2240 vmovdqu YMMWORD[128+rdi],ymm12 2241 vmovdqu YMMWORD[160+rdi],ymm13 2242 vmovdqu YMMWORD[192+rdi],ymm10 2243 vmovdqu YMMWORD[224+rdi],ymm15 2244 vmovdqu YMMWORD[256+rdi],ymm14 2245 vmovdqu YMMWORD[288+rdi],ymm2 2246 je NEAR $L$done8x 2247 2248 lea rsi,[320+rsi] 2249 xor r10,r10 2250 vmovdqa YMMWORD[rsp],ymm3 2251 lea rdi,[320+rdi] 2252 sub rdx,320 2253 vmovdqa YMMWORD[32+rsp],ymm7 2254 jmp NEAR $L$oop_tail8x 2255 2256 ALIGN 32 2257 $L$384_or_more8x: 2258 vpxor ymm6,ymm6,YMMWORD[rsi] 2259 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2260 vpxor ymm1,ymm1,YMMWORD[64+rsi] 2261 vpxor ymm5,ymm5,YMMWORD[96+rsi] 2262 vpxor ymm12,ymm12,YMMWORD[128+rsi] 2263 vpxor ymm13,ymm13,YMMWORD[160+rsi] 2264 vpxor ymm10,ymm10,YMMWORD[192+rsi] 2265 vpxor ymm15,ymm15,YMMWORD[224+rsi] 2266 vpxor ymm14,ymm14,YMMWORD[256+rsi] 2267 vpxor ymm2,ymm2,YMMWORD[288+rsi] 2268 vpxor ymm3,ymm3,YMMWORD[320+rsi] 2269 vpxor ymm7,ymm7,YMMWORD[352+rsi] 2270 vmovdqu YMMWORD[rdi],ymm6 2271 vmovdqu YMMWORD[32+rdi],ymm8 2272 vmovdqu YMMWORD[64+rdi],ymm1 2273 vmovdqu YMMWORD[96+rdi],ymm5 2274 vmovdqu YMMWORD[128+rdi],ymm12 2275 vmovdqu YMMWORD[160+rdi],ymm13 2276 vmovdqu YMMWORD[192+rdi],ymm10 2277 vmovdqu YMMWORD[224+rdi],ymm15 2278 vmovdqu YMMWORD[256+rdi],ymm14 2279 vmovdqu YMMWORD[288+rdi],ymm2 2280 vmovdqu YMMWORD[320+rdi],ymm3 2281 vmovdqu YMMWORD[352+rdi],ymm7 2282 je NEAR $L$done8x 2283 2284 lea rsi,[384+rsi] 2285 xor r10,r10 2286 vmovdqa YMMWORD[rsp],ymm11 2287 lea rdi,[384+rdi] 2288 sub rdx,384 2289 vmovdqa YMMWORD[32+rsp],ymm9 2290 jmp NEAR $L$oop_tail8x 2291 2292 ALIGN 32 2293 $L$448_or_more8x: 2294 vpxor ymm6,ymm6,YMMWORD[rsi] 2295 vpxor ymm8,ymm8,YMMWORD[32+rsi] 2296 vpxor ymm1,ymm1,YMMWORD[64+rsi] 2297 vpxor ymm5,ymm5,YMMWORD[96+rsi] 2298 vpxor ymm12,ymm12,YMMWORD[128+rsi] 2299 vpxor ymm13,ymm13,YMMWORD[160+rsi] 2300 vpxor ymm10,ymm10,YMMWORD[192+rsi] 2301 vpxor ymm15,ymm15,YMMWORD[224+rsi] 2302 vpxor ymm14,ymm14,YMMWORD[256+rsi] 2303 vpxor ymm2,ymm2,YMMWORD[288+rsi] 2304 vpxor ymm3,ymm3,YMMWORD[320+rsi] 2305 vpxor ymm7,ymm7,YMMWORD[352+rsi] 2306 vpxor ymm11,ymm11,YMMWORD[384+rsi] 2307 vpxor ymm9,ymm9,YMMWORD[416+rsi] 2308 vmovdqu YMMWORD[rdi],ymm6 2309 vmovdqu YMMWORD[32+rdi],ymm8 2310 vmovdqu YMMWORD[64+rdi],ymm1 2311 vmovdqu YMMWORD[96+rdi],ymm5 2312 vmovdqu YMMWORD[128+rdi],ymm12 2313 vmovdqu YMMWORD[160+rdi],ymm13 2314 vmovdqu YMMWORD[192+rdi],ymm10 2315 vmovdqu YMMWORD[224+rdi],ymm15 2316 vmovdqu YMMWORD[256+rdi],ymm14 2317 vmovdqu YMMWORD[288+rdi],ymm2 2318 vmovdqu YMMWORD[320+rdi],ymm3 2319 vmovdqu YMMWORD[352+rdi],ymm7 2320 vmovdqu YMMWORD[384+rdi],ymm11 2321 vmovdqu YMMWORD[416+rdi],ymm9 2322 je NEAR $L$done8x 2323 2324 lea rsi,[448+rsi] 2325 xor r10,r10 2326 vmovdqa YMMWORD[rsp],ymm0 2327 lea rdi,[448+rdi] 2328 sub rdx,448 2329 vmovdqa YMMWORD[32+rsp],ymm4 2330 2331 $L$oop_tail8x: 2332 movzx eax,BYTE[r10*1+rsi] 2333 movzx ecx,BYTE[r10*1+rsp] 2334 lea r10,[1+r10] 2335 xor eax,ecx 2336 mov BYTE[((-1))+r10*1+rdi],al 2337 dec rdx 2338 jnz NEAR $L$oop_tail8x 2339 2340 $L$done8x: 2341 vzeroall 2342 movaps xmm6,XMMWORD[((-168))+r9] 2343 movaps xmm7,XMMWORD[((-152))+r9] 2344 movaps xmm8,XMMWORD[((-136))+r9] 2345 movaps xmm9,XMMWORD[((-120))+r9] 2346 movaps xmm10,XMMWORD[((-104))+r9] 2347 movaps xmm11,XMMWORD[((-88))+r9] 2348 movaps xmm12,XMMWORD[((-72))+r9] 2349 movaps xmm13,XMMWORD[((-56))+r9] 2350 movaps xmm14,XMMWORD[((-40))+r9] 2351 movaps xmm15,XMMWORD[((-24))+r9] 2352 lea rsp,[r9] 2353 2354 $L$8x_epilogue: 2355 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2356 mov rsi,QWORD[16+rsp] 2357 DB 0F3h,0C3h ;repret 2358 2359 $L$SEH_end_ChaCha20_8x: 2360 2361 ALIGN 32 2362 ChaCha20_avx512: 2363 mov QWORD[8+rsp],rdi ;WIN64 prologue 2364 mov QWORD[16+rsp],rsi 2365 mov rax,rsp 2366 $L$SEH_begin_ChaCha20_avx512: 2367 mov rdi,rcx 2368 mov rsi,rdx 2369 mov rdx,r8 2370 mov rcx,r9 2371 mov r8,QWORD[40+rsp] 2372 2373 2374 2375 $L$ChaCha20_avx512: 2376 mov r9,rsp 2377 2378 cmp rdx,512 2379 ja NEAR $L$ChaCha20_16x 2380 2381 sub rsp,64+168 2382 movaps XMMWORD[(-168)+r9],xmm6 2383 movaps XMMWORD[(-152)+r9],xmm7 2384 movaps XMMWORD[(-136)+r9],xmm8 2385 movaps XMMWORD[(-120)+r9],xmm9 2386 movaps XMMWORD[(-104)+r9],xmm10 2387 movaps XMMWORD[(-88)+r9],xmm11 2388 movaps XMMWORD[(-72)+r9],xmm12 2389 movaps XMMWORD[(-56)+r9],xmm13 2390 movaps XMMWORD[(-40)+r9],xmm14 2391 movaps XMMWORD[(-24)+r9],xmm15 2392 $L$avx512_body: 2393 vbroadcasti32x4 zmm0,ZMMWORD[$L$sigma] 2394 vbroadcasti32x4 zmm1,ZMMWORD[rcx] 2395 vbroadcasti32x4 zmm2,ZMMWORD[16+rcx] 2396 vbroadcasti32x4 zmm3,ZMMWORD[r8] 2397 2398 vmovdqa32 zmm16,zmm0 2399 vmovdqa32 zmm17,zmm1 2400 vmovdqa32 zmm18,zmm2 2401 vpaddd zmm3,zmm3,ZMMWORD[$L$zeroz] 2402 vmovdqa32 zmm20,ZMMWORD[$L$fourz] 2403 mov r8,10 2404 vmovdqa32 zmm19,zmm3 2405 jmp NEAR $L$oop_avx512 2406 2407 ALIGN 16 2408 $L$oop_outer_avx512: 2409 vmovdqa32 zmm0,zmm16 2410 vmovdqa32 zmm1,zmm17 2411 vmovdqa32 zmm2,zmm18 2412 vpaddd zmm3,zmm19,zmm20 2413 mov r8,10 2414 vmovdqa32 zmm19,zmm3 2415 jmp NEAR $L$oop_avx512 2416 2417 ALIGN 32 2418 $L$oop_avx512: 2419 vpaddd zmm0,zmm0,zmm1 2420 vpxord zmm3,zmm3,zmm0 2421 vprold zmm3,zmm3,16 2422 vpaddd zmm2,zmm2,zmm3 2423 vpxord zmm1,zmm1,zmm2 2424 vprold zmm1,zmm1,12 2425 vpaddd zmm0,zmm0,zmm1 2426 vpxord zmm3,zmm3,zmm0 2427 vprold zmm3,zmm3,8 2428 vpaddd zmm2,zmm2,zmm3 2429 vpxord zmm1,zmm1,zmm2 2430 vprold zmm1,zmm1,7 2431 vpshufd zmm2,zmm2,78 2432 vpshufd zmm1,zmm1,57 2433 vpshufd zmm3,zmm3,147 2434 vpaddd zmm0,zmm0,zmm1 2435 vpxord zmm3,zmm3,zmm0 2436 vprold zmm3,zmm3,16 2437 vpaddd zmm2,zmm2,zmm3 2438 vpxord zmm1,zmm1,zmm2 2439 vprold zmm1,zmm1,12 2440 vpaddd zmm0,zmm0,zmm1 2441 vpxord zmm3,zmm3,zmm0 2442 vprold zmm3,zmm3,8 2443 vpaddd zmm2,zmm2,zmm3 2444 vpxord zmm1,zmm1,zmm2 2445 vprold zmm1,zmm1,7 2446 vpshufd zmm2,zmm2,78 2447 vpshufd zmm1,zmm1,147 2448 vpshufd zmm3,zmm3,57 2449 dec r8 2450 jnz NEAR $L$oop_avx512 2451 vpaddd zmm0,zmm0,zmm16 2452 vpaddd zmm1,zmm1,zmm17 2453 vpaddd zmm2,zmm2,zmm18 2454 vpaddd zmm3,zmm3,zmm19 2455 2456 sub rdx,64 2457 jb NEAR $L$tail64_avx512 2458 2459 vpxor xmm4,xmm0,XMMWORD[rsi] 2460 vpxor xmm5,xmm1,XMMWORD[16+rsi] 2461 vpxor xmm6,xmm2,XMMWORD[32+rsi] 2462 vpxor xmm7,xmm3,XMMWORD[48+rsi] 2463 lea rsi,[64+rsi] 2464 2465 vmovdqu XMMWORD[rdi],xmm4 2466 vmovdqu XMMWORD[16+rdi],xmm5 2467 vmovdqu XMMWORD[32+rdi],xmm6 2468 vmovdqu XMMWORD[48+rdi],xmm7 2469 lea rdi,[64+rdi] 2470 2471 jz NEAR $L$done_avx512 2472 2473 vextracti32x4 xmm4,zmm0,1 2474 vextracti32x4 xmm5,zmm1,1 2475 vextracti32x4 xmm6,zmm2,1 2476 vextracti32x4 xmm7,zmm3,1 2477 2478 sub rdx,64 2479 jb NEAR $L$tail_avx512 2480 2481 vpxor xmm4,xmm4,XMMWORD[rsi] 2482 vpxor xmm5,xmm5,XMMWORD[16+rsi] 2483 vpxor xmm6,xmm6,XMMWORD[32+rsi] 2484 vpxor xmm7,xmm7,XMMWORD[48+rsi] 2485 lea rsi,[64+rsi] 2486 2487 vmovdqu XMMWORD[rdi],xmm4 2488 vmovdqu XMMWORD[16+rdi],xmm5 2489 vmovdqu XMMWORD[32+rdi],xmm6 2490 vmovdqu XMMWORD[48+rdi],xmm7 2491 lea rdi,[64+rdi] 2492 2493 jz NEAR $L$done_avx512 2494 2495 vextracti32x4 xmm4,zmm0,2 2496 vextracti32x4 xmm5,zmm1,2 2497 vextracti32x4 xmm6,zmm2,2 2498 vextracti32x4 xmm7,zmm3,2 2499 2500 sub rdx,64 2501 jb NEAR $L$tail_avx512 2502 2503 vpxor xmm4,xmm4,XMMWORD[rsi] 2504 vpxor xmm5,xmm5,XMMWORD[16+rsi] 2505 vpxor xmm6,xmm6,XMMWORD[32+rsi] 2506 vpxor xmm7,xmm7,XMMWORD[48+rsi] 2507 lea rsi,[64+rsi] 2508 2509 vmovdqu XMMWORD[rdi],xmm4 2510 vmovdqu XMMWORD[16+rdi],xmm5 2511 vmovdqu XMMWORD[32+rdi],xmm6 2512 vmovdqu XMMWORD[48+rdi],xmm7 2513 lea rdi,[64+rdi] 2514 2515 jz NEAR $L$done_avx512 2516 2517 vextracti32x4 xmm4,zmm0,3 2518 vextracti32x4 xmm5,zmm1,3 2519 vextracti32x4 xmm6,zmm2,3 2520 vextracti32x4 xmm7,zmm3,3 2521 2522 sub rdx,64 2523 jb NEAR $L$tail_avx512 2524 2525 vpxor xmm4,xmm4,XMMWORD[rsi] 2526 vpxor xmm5,xmm5,XMMWORD[16+rsi] 2527 vpxor xmm6,xmm6,XMMWORD[32+rsi] 2528 vpxor xmm7,xmm7,XMMWORD[48+rsi] 2529 lea rsi,[64+rsi] 2530 2531 vmovdqu XMMWORD[rdi],xmm4 2532 vmovdqu XMMWORD[16+rdi],xmm5 2533 vmovdqu XMMWORD[32+rdi],xmm6 2534 vmovdqu XMMWORD[48+rdi],xmm7 2535 lea rdi,[64+rdi] 2536 2537 jnz NEAR $L$oop_outer_avx512 2538 2539 jmp NEAR $L$done_avx512 2540 2541 ALIGN 16 2542 $L$tail64_avx512: 2543 vmovdqa XMMWORD[rsp],xmm0 2544 vmovdqa XMMWORD[16+rsp],xmm1 2545 vmovdqa XMMWORD[32+rsp],xmm2 2546 vmovdqa XMMWORD[48+rsp],xmm3 2547 add rdx,64 2548 jmp NEAR $L$oop_tail_avx512 2549 2550 ALIGN 16 2551 $L$tail_avx512: 2552 vmovdqa XMMWORD[rsp],xmm4 2553 vmovdqa XMMWORD[16+rsp],xmm5 2554 vmovdqa XMMWORD[32+rsp],xmm6 2555 vmovdqa XMMWORD[48+rsp],xmm7 2556 add rdx,64 2557 2558 $L$oop_tail_avx512: 2559 movzx eax,BYTE[r8*1+rsi] 2560 movzx ecx,BYTE[r8*1+rsp] 2561 lea r8,[1+r8] 2562 xor eax,ecx 2563 mov BYTE[((-1))+r8*1+rdi],al 2564 dec rdx 2565 jnz NEAR $L$oop_tail_avx512 2566 2567 vmovdqu32 ZMMWORD[rsp],zmm16 2568 2569 $L$done_avx512: 2570 vzeroall 2571 movaps xmm6,XMMWORD[((-168))+r9] 2572 movaps xmm7,XMMWORD[((-152))+r9] 2573 movaps xmm8,XMMWORD[((-136))+r9] 2574 movaps xmm9,XMMWORD[((-120))+r9] 2575 movaps xmm10,XMMWORD[((-104))+r9] 2576 movaps xmm11,XMMWORD[((-88))+r9] 2577 movaps xmm12,XMMWORD[((-72))+r9] 2578 movaps xmm13,XMMWORD[((-56))+r9] 2579 movaps xmm14,XMMWORD[((-40))+r9] 2580 movaps xmm15,XMMWORD[((-24))+r9] 2581 lea rsp,[r9] 2582 2583 $L$avx512_epilogue: 2584 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2585 mov rsi,QWORD[16+rsp] 2586 DB 0F3h,0C3h ;repret 2587 2588 $L$SEH_end_ChaCha20_avx512: 2589 2590 ALIGN 32 2591 ChaCha20_avx512vl: 2592 mov QWORD[8+rsp],rdi ;WIN64 prologue 2593 mov QWORD[16+rsp],rsi 2594 mov rax,rsp 2595 $L$SEH_begin_ChaCha20_avx512vl: 2596 mov rdi,rcx 2597 mov rsi,rdx 2598 mov rdx,r8 2599 mov rcx,r9 2600 mov r8,QWORD[40+rsp] 2601 2602 2603 2604 $L$ChaCha20_avx512vl: 2605 mov r9,rsp 2606 2607 cmp rdx,128 2608 ja NEAR $L$ChaCha20_8xvl 2609 2610 sub rsp,64+168 2611 movaps XMMWORD[(-168)+r9],xmm6 2612 movaps XMMWORD[(-152)+r9],xmm7 2613 movaps XMMWORD[(-136)+r9],xmm8 2614 movaps XMMWORD[(-120)+r9],xmm9 2615 movaps XMMWORD[(-104)+r9],xmm10 2616 movaps XMMWORD[(-88)+r9],xmm11 2617 movaps XMMWORD[(-72)+r9],xmm12 2618 movaps XMMWORD[(-56)+r9],xmm13 2619 movaps XMMWORD[(-40)+r9],xmm14 2620 movaps XMMWORD[(-24)+r9],xmm15 2621 $L$avx512vl_body: 2622 vbroadcasti128 ymm0,XMMWORD[$L$sigma] 2623 vbroadcasti128 ymm1,XMMWORD[rcx] 2624 vbroadcasti128 ymm2,XMMWORD[16+rcx] 2625 vbroadcasti128 ymm3,XMMWORD[r8] 2626 2627 vmovdqa32 ymm16,ymm0 2628 vmovdqa32 ymm17,ymm1 2629 vmovdqa32 ymm18,ymm2 2630 vpaddd ymm3,ymm3,YMMWORD[$L$zeroz] 2631 vmovdqa32 ymm20,YMMWORD[$L$twoy] 2632 mov r8,10 2633 vmovdqa32 ymm19,ymm3 2634 jmp NEAR $L$oop_avx512vl 2635 2636 ALIGN 16 2637 $L$oop_outer_avx512vl: 2638 vmovdqa32 ymm2,ymm18 2639 vpaddd ymm3,ymm19,ymm20 2640 mov r8,10 2641 vmovdqa32 ymm19,ymm3 2642 jmp NEAR $L$oop_avx512vl 2643 2644 ALIGN 32 2645 $L$oop_avx512vl: 2646 vpaddd ymm0,ymm0,ymm1 2647 vpxor ymm3,ymm3,ymm0 2648 vprold ymm3,ymm3,16 2649 vpaddd ymm2,ymm2,ymm3 2650 vpxor ymm1,ymm1,ymm2 2651 vprold ymm1,ymm1,12 2652 vpaddd ymm0,ymm0,ymm1 2653 vpxor ymm3,ymm3,ymm0 2654 vprold ymm3,ymm3,8 2655 vpaddd ymm2,ymm2,ymm3 2656 vpxor ymm1,ymm1,ymm2 2657 vprold ymm1,ymm1,7 2658 vpshufd ymm2,ymm2,78 2659 vpshufd ymm1,ymm1,57 2660 vpshufd ymm3,ymm3,147 2661 vpaddd ymm0,ymm0,ymm1 2662 vpxor ymm3,ymm3,ymm0 2663 vprold ymm3,ymm3,16 2664 vpaddd ymm2,ymm2,ymm3 2665 vpxor ymm1,ymm1,ymm2 2666 vprold ymm1,ymm1,12 2667 vpaddd ymm0,ymm0,ymm1 2668 vpxor ymm3,ymm3,ymm0 2669 vprold ymm3,ymm3,8 2670 vpaddd ymm2,ymm2,ymm3 2671 vpxor ymm1,ymm1,ymm2 2672 vprold ymm1,ymm1,7 2673 vpshufd ymm2,ymm2,78 2674 vpshufd ymm1,ymm1,147 2675 vpshufd ymm3,ymm3,57 2676 dec r8 2677 jnz NEAR $L$oop_avx512vl 2678 vpaddd ymm0,ymm0,ymm16 2679 vpaddd ymm1,ymm1,ymm17 2680 vpaddd ymm2,ymm2,ymm18 2681 vpaddd ymm3,ymm3,ymm19 2682 2683 sub rdx,64 2684 jb NEAR $L$tail64_avx512vl 2685 2686 vpxor xmm4,xmm0,XMMWORD[rsi] 2687 vpxor xmm5,xmm1,XMMWORD[16+rsi] 2688 vpxor xmm6,xmm2,XMMWORD[32+rsi] 2689 vpxor xmm7,xmm3,XMMWORD[48+rsi] 2690 lea rsi,[64+rsi] 2691 2692 vmovdqu XMMWORD[rdi],xmm4 2693 vmovdqu XMMWORD[16+rdi],xmm5 2694 vmovdqu XMMWORD[32+rdi],xmm6 2695 vmovdqu XMMWORD[48+rdi],xmm7 2696 lea rdi,[64+rdi] 2697 2698 jz NEAR $L$done_avx512vl 2699 2700 vextracti128 xmm4,ymm0,1 2701 vextracti128 xmm5,ymm1,1 2702 vextracti128 xmm6,ymm2,1 2703 vextracti128 xmm7,ymm3,1 2704 2705 sub rdx,64 2706 jb NEAR $L$tail_avx512vl 2707 2708 vpxor xmm4,xmm4,XMMWORD[rsi] 2709 vpxor xmm5,xmm5,XMMWORD[16+rsi] 2710 vpxor xmm6,xmm6,XMMWORD[32+rsi] 2711 vpxor xmm7,xmm7,XMMWORD[48+rsi] 2712 lea rsi,[64+rsi] 2713 2714 vmovdqu XMMWORD[rdi],xmm4 2715 vmovdqu XMMWORD[16+rdi],xmm5 2716 vmovdqu XMMWORD[32+rdi],xmm6 2717 vmovdqu XMMWORD[48+rdi],xmm7 2718 lea rdi,[64+rdi] 2719 2720 vmovdqa32 ymm0,ymm16 2721 vmovdqa32 ymm1,ymm17 2722 jnz NEAR $L$oop_outer_avx512vl 2723 2724 jmp NEAR $L$done_avx512vl 2725 2726 ALIGN 16 2727 $L$tail64_avx512vl: 2728 vmovdqa XMMWORD[rsp],xmm0 2729 vmovdqa XMMWORD[16+rsp],xmm1 2730 vmovdqa XMMWORD[32+rsp],xmm2 2731 vmovdqa XMMWORD[48+rsp],xmm3 2732 add rdx,64 2733 jmp NEAR $L$oop_tail_avx512vl 2734 2735 ALIGN 16 2736 $L$tail_avx512vl: 2737 vmovdqa XMMWORD[rsp],xmm4 2738 vmovdqa XMMWORD[16+rsp],xmm5 2739 vmovdqa XMMWORD[32+rsp],xmm6 2740 vmovdqa XMMWORD[48+rsp],xmm7 2741 add rdx,64 2742 2743 $L$oop_tail_avx512vl: 2744 movzx eax,BYTE[r8*1+rsi] 2745 movzx ecx,BYTE[r8*1+rsp] 2746 lea r8,[1+r8] 2747 xor eax,ecx 2748 mov BYTE[((-1))+r8*1+rdi],al 2749 dec rdx 2750 jnz NEAR $L$oop_tail_avx512vl 2751 2752 vmovdqu32 YMMWORD[rsp],ymm16 2753 vmovdqu32 YMMWORD[32+rsp],ymm16 2754 2755 $L$done_avx512vl: 2756 vzeroall 2757 movaps xmm6,XMMWORD[((-168))+r9] 2758 movaps xmm7,XMMWORD[((-152))+r9] 2759 movaps xmm8,XMMWORD[((-136))+r9] 2760 movaps xmm9,XMMWORD[((-120))+r9] 2761 movaps xmm10,XMMWORD[((-104))+r9] 2762 movaps xmm11,XMMWORD[((-88))+r9] 2763 movaps xmm12,XMMWORD[((-72))+r9] 2764 movaps xmm13,XMMWORD[((-56))+r9] 2765 movaps xmm14,XMMWORD[((-40))+r9] 2766 movaps xmm15,XMMWORD[((-24))+r9] 2767 lea rsp,[r9] 2768 2769 $L$avx512vl_epilogue: 2770 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2771 mov rsi,QWORD[16+rsp] 2772 DB 0F3h,0C3h ;repret 2773 2774 $L$SEH_end_ChaCha20_avx512vl: 2775 2776 ALIGN 32 2777 ChaCha20_16x: 2778 mov QWORD[8+rsp],rdi ;WIN64 prologue 2779 mov QWORD[16+rsp],rsi 2780 mov rax,rsp 2781 $L$SEH_begin_ChaCha20_16x: 2782 mov rdi,rcx 2783 mov rsi,rdx 2784 mov rdx,r8 2785 mov rcx,r9 2786 mov r8,QWORD[40+rsp] 2787 2788 2789 2790 $L$ChaCha20_16x: 2791 mov r9,rsp 2792 2793 sub rsp,64+168 2794 and rsp,-64 2795 movaps XMMWORD[(-168)+r9],xmm6 2796 movaps XMMWORD[(-152)+r9],xmm7 2797 movaps XMMWORD[(-136)+r9],xmm8 2798 movaps XMMWORD[(-120)+r9],xmm9 2799 movaps XMMWORD[(-104)+r9],xmm10 2800 movaps XMMWORD[(-88)+r9],xmm11 2801 movaps XMMWORD[(-72)+r9],xmm12 2802 movaps XMMWORD[(-56)+r9],xmm13 2803 movaps XMMWORD[(-40)+r9],xmm14 2804 movaps XMMWORD[(-24)+r9],xmm15 2805 $L$16x_body: 2806 vzeroupper 2807 2808 lea r10,[$L$sigma] 2809 vbroadcasti32x4 zmm3,ZMMWORD[r10] 2810 vbroadcasti32x4 zmm7,ZMMWORD[rcx] 2811 vbroadcasti32x4 zmm11,ZMMWORD[16+rcx] 2812 vbroadcasti32x4 zmm15,ZMMWORD[r8] 2813 2814 vpshufd zmm0,zmm3,0x00 2815 vpshufd zmm1,zmm3,0x55 2816 vpshufd zmm2,zmm3,0xaa 2817 vpshufd zmm3,zmm3,0xff 2818 vmovdqa64 zmm16,zmm0 2819 vmovdqa64 zmm17,zmm1 2820 vmovdqa64 zmm18,zmm2 2821 vmovdqa64 zmm19,zmm3 2822 2823 vpshufd zmm4,zmm7,0x00 2824 vpshufd zmm5,zmm7,0x55 2825 vpshufd zmm6,zmm7,0xaa 2826 vpshufd zmm7,zmm7,0xff 2827 vmovdqa64 zmm20,zmm4 2828 vmovdqa64 zmm21,zmm5 2829 vmovdqa64 zmm22,zmm6 2830 vmovdqa64 zmm23,zmm7 2831 2832 vpshufd zmm8,zmm11,0x00 2833 vpshufd zmm9,zmm11,0x55 2834 vpshufd zmm10,zmm11,0xaa 2835 vpshufd zmm11,zmm11,0xff 2836 vmovdqa64 zmm24,zmm8 2837 vmovdqa64 zmm25,zmm9 2838 vmovdqa64 zmm26,zmm10 2839 vmovdqa64 zmm27,zmm11 2840 2841 vpshufd zmm12,zmm15,0x00 2842 vpshufd zmm13,zmm15,0x55 2843 vpshufd zmm14,zmm15,0xaa 2844 vpshufd zmm15,zmm15,0xff 2845 vpaddd zmm12,zmm12,ZMMWORD[$L$incz] 2846 vmovdqa64 zmm28,zmm12 2847 vmovdqa64 zmm29,zmm13 2848 vmovdqa64 zmm30,zmm14 2849 vmovdqa64 zmm31,zmm15 2850 2851 mov eax,10 2852 jmp NEAR $L$oop16x 2853 2854 ALIGN 32 2855 $L$oop_outer16x: 2856 vpbroadcastd zmm0,DWORD[r10] 2857 vpbroadcastd zmm1,DWORD[4+r10] 2858 vpbroadcastd zmm2,DWORD[8+r10] 2859 vpbroadcastd zmm3,DWORD[12+r10] 2860 vpaddd zmm28,zmm28,ZMMWORD[$L$sixteen] 2861 vmovdqa64 zmm4,zmm20 2862 vmovdqa64 zmm5,zmm21 2863 vmovdqa64 zmm6,zmm22 2864 vmovdqa64 zmm7,zmm23 2865 vmovdqa64 zmm8,zmm24 2866 vmovdqa64 zmm9,zmm25 2867 vmovdqa64 zmm10,zmm26 2868 vmovdqa64 zmm11,zmm27 2869 vmovdqa64 zmm12,zmm28 2870 vmovdqa64 zmm13,zmm29 2871 vmovdqa64 zmm14,zmm30 2872 vmovdqa64 zmm15,zmm31 2873 2874 vmovdqa64 zmm16,zmm0 2875 vmovdqa64 zmm17,zmm1 2876 vmovdqa64 zmm18,zmm2 2877 vmovdqa64 zmm19,zmm3 2878 2879 mov eax,10 2880 jmp NEAR $L$oop16x 2881 2882 ALIGN 32 2883 $L$oop16x: 2884 vpaddd zmm0,zmm0,zmm4 2885 vpaddd zmm1,zmm1,zmm5 2886 vpaddd zmm2,zmm2,zmm6 2887 vpaddd zmm3,zmm3,zmm7 2888 vpxord zmm12,zmm12,zmm0 2889 vpxord zmm13,zmm13,zmm1 2890 vpxord zmm14,zmm14,zmm2 2891 vpxord zmm15,zmm15,zmm3 2892 vprold zmm12,zmm12,16 2893 vprold zmm13,zmm13,16 2894 vprold zmm14,zmm14,16 2895 vprold zmm15,zmm15,16 2896 vpaddd zmm8,zmm8,zmm12 2897 vpaddd zmm9,zmm9,zmm13 2898 vpaddd zmm10,zmm10,zmm14 2899 vpaddd zmm11,zmm11,zmm15 2900 vpxord zmm4,zmm4,zmm8 2901 vpxord zmm5,zmm5,zmm9 2902 vpxord zmm6,zmm6,zmm10 2903 vpxord zmm7,zmm7,zmm11 2904 vprold zmm4,zmm4,12 2905 vprold zmm5,zmm5,12 2906 vprold zmm6,zmm6,12 2907 vprold zmm7,zmm7,12 2908 vpaddd zmm0,zmm0,zmm4 2909 vpaddd zmm1,zmm1,zmm5 2910 vpaddd zmm2,zmm2,zmm6 2911 vpaddd zmm3,zmm3,zmm7 2912 vpxord zmm12,zmm12,zmm0 2913 vpxord zmm13,zmm13,zmm1 2914 vpxord zmm14,zmm14,zmm2 2915 vpxord zmm15,zmm15,zmm3 2916 vprold zmm12,zmm12,8 2917 vprold zmm13,zmm13,8 2918 vprold zmm14,zmm14,8 2919 vprold zmm15,zmm15,8 2920 vpaddd zmm8,zmm8,zmm12 2921 vpaddd zmm9,zmm9,zmm13 2922 vpaddd zmm10,zmm10,zmm14 2923 vpaddd zmm11,zmm11,zmm15 2924 vpxord zmm4,zmm4,zmm8 2925 vpxord zmm5,zmm5,zmm9 2926 vpxord zmm6,zmm6,zmm10 2927 vpxord zmm7,zmm7,zmm11 2928 vprold zmm4,zmm4,7 2929 vprold zmm5,zmm5,7 2930 vprold zmm6,zmm6,7 2931 vprold zmm7,zmm7,7 2932 vpaddd zmm0,zmm0,zmm5 2933 vpaddd zmm1,zmm1,zmm6 2934 vpaddd zmm2,zmm2,zmm7 2935 vpaddd zmm3,zmm3,zmm4 2936 vpxord zmm15,zmm15,zmm0 2937 vpxord zmm12,zmm12,zmm1 2938 vpxord zmm13,zmm13,zmm2 2939 vpxord zmm14,zmm14,zmm3 2940 vprold zmm15,zmm15,16 2941 vprold zmm12,zmm12,16 2942 vprold zmm13,zmm13,16 2943 vprold zmm14,zmm14,16 2944 vpaddd zmm10,zmm10,zmm15 2945 vpaddd zmm11,zmm11,zmm12 2946 vpaddd zmm8,zmm8,zmm13 2947 vpaddd zmm9,zmm9,zmm14 2948 vpxord zmm5,zmm5,zmm10 2949 vpxord zmm6,zmm6,zmm11 2950 vpxord zmm7,zmm7,zmm8 2951 vpxord zmm4,zmm4,zmm9 2952 vprold zmm5,zmm5,12 2953 vprold zmm6,zmm6,12 2954 vprold zmm7,zmm7,12 2955 vprold zmm4,zmm4,12 2956 vpaddd zmm0,zmm0,zmm5 2957 vpaddd zmm1,zmm1,zmm6 2958 vpaddd zmm2,zmm2,zmm7 2959 vpaddd zmm3,zmm3,zmm4 2960 vpxord zmm15,zmm15,zmm0 2961 vpxord zmm12,zmm12,zmm1 2962 vpxord zmm13,zmm13,zmm2 2963 vpxord zmm14,zmm14,zmm3 2964 vprold zmm15,zmm15,8 2965 vprold zmm12,zmm12,8 2966 vprold zmm13,zmm13,8 2967 vprold zmm14,zmm14,8 2968 vpaddd zmm10,zmm10,zmm15 2969 vpaddd zmm11,zmm11,zmm12 2970 vpaddd zmm8,zmm8,zmm13 2971 vpaddd zmm9,zmm9,zmm14 2972 vpxord zmm5,zmm5,zmm10 2973 vpxord zmm6,zmm6,zmm11 2974 vpxord zmm7,zmm7,zmm8 2975 vpxord zmm4,zmm4,zmm9 2976 vprold zmm5,zmm5,7 2977 vprold zmm6,zmm6,7 2978 vprold zmm7,zmm7,7 2979 vprold zmm4,zmm4,7 2980 dec eax 2981 jnz NEAR $L$oop16x 2982 2983 vpaddd zmm0,zmm0,zmm16 2984 vpaddd zmm1,zmm1,zmm17 2985 vpaddd zmm2,zmm2,zmm18 2986 vpaddd zmm3,zmm3,zmm19 2987 2988 vpunpckldq zmm18,zmm0,zmm1 2989 vpunpckldq zmm19,zmm2,zmm3 2990 vpunpckhdq zmm0,zmm0,zmm1 2991 vpunpckhdq zmm2,zmm2,zmm3 2992 vpunpcklqdq zmm1,zmm18,zmm19 2993 vpunpckhqdq zmm18,zmm18,zmm19 2994 vpunpcklqdq zmm3,zmm0,zmm2 2995 vpunpckhqdq zmm0,zmm0,zmm2 2996 vpaddd zmm4,zmm4,zmm20 2997 vpaddd zmm5,zmm5,zmm21 2998 vpaddd zmm6,zmm6,zmm22 2999 vpaddd zmm7,zmm7,zmm23 3000 3001 vpunpckldq zmm2,zmm4,zmm5 3002 vpunpckldq zmm19,zmm6,zmm7 3003 vpunpckhdq zmm4,zmm4,zmm5 3004 vpunpckhdq zmm6,zmm6,zmm7 3005 vpunpcklqdq zmm5,zmm2,zmm19 3006 vpunpckhqdq zmm2,zmm2,zmm19 3007 vpunpcklqdq zmm7,zmm4,zmm6 3008 vpunpckhqdq zmm4,zmm4,zmm6 3009 vshufi32x4 zmm19,zmm1,zmm5,0x44 3010 vshufi32x4 zmm5,zmm1,zmm5,0xee 3011 vshufi32x4 zmm1,zmm18,zmm2,0x44 3012 vshufi32x4 zmm2,zmm18,zmm2,0xee 3013 vshufi32x4 zmm18,zmm3,zmm7,0x44 3014 vshufi32x4 zmm7,zmm3,zmm7,0xee 3015 vshufi32x4 zmm3,zmm0,zmm4,0x44 3016 vshufi32x4 zmm4,zmm0,zmm4,0xee 3017 vpaddd zmm8,zmm8,zmm24 3018 vpaddd zmm9,zmm9,zmm25 3019 vpaddd zmm10,zmm10,zmm26 3020 vpaddd zmm11,zmm11,zmm27 3021 3022 vpunpckldq zmm6,zmm8,zmm9 3023 vpunpckldq zmm0,zmm10,zmm11 3024 vpunpckhdq zmm8,zmm8,zmm9 3025 vpunpckhdq zmm10,zmm10,zmm11 3026 vpunpcklqdq zmm9,zmm6,zmm0 3027 vpunpckhqdq zmm6,zmm6,zmm0 3028 vpunpcklqdq zmm11,zmm8,zmm10 3029 vpunpckhqdq zmm8,zmm8,zmm10 3030 vpaddd zmm12,zmm12,zmm28 3031 vpaddd zmm13,zmm13,zmm29 3032 vpaddd zmm14,zmm14,zmm30 3033 vpaddd zmm15,zmm15,zmm31 3034 3035 vpunpckldq zmm10,zmm12,zmm13 3036 vpunpckldq zmm0,zmm14,zmm15 3037 vpunpckhdq zmm12,zmm12,zmm13 3038 vpunpckhdq zmm14,zmm14,zmm15 3039 vpunpcklqdq zmm13,zmm10,zmm0 3040 vpunpckhqdq zmm10,zmm10,zmm0 3041 vpunpcklqdq zmm15,zmm12,zmm14 3042 vpunpckhqdq zmm12,zmm12,zmm14 3043 vshufi32x4 zmm0,zmm9,zmm13,0x44 3044 vshufi32x4 zmm13,zmm9,zmm13,0xee 3045 vshufi32x4 zmm9,zmm6,zmm10,0x44 3046 vshufi32x4 zmm10,zmm6,zmm10,0xee 3047 vshufi32x4 zmm6,zmm11,zmm15,0x44 3048 vshufi32x4 zmm15,zmm11,zmm15,0xee 3049 vshufi32x4 zmm11,zmm8,zmm12,0x44 3050 vshufi32x4 zmm12,zmm8,zmm12,0xee 3051 vshufi32x4 zmm16,zmm19,zmm0,0x88 3052 vshufi32x4 zmm19,zmm19,zmm0,0xdd 3053 vshufi32x4 zmm0,zmm5,zmm13,0x88 3054 vshufi32x4 zmm13,zmm5,zmm13,0xdd 3055 vshufi32x4 zmm17,zmm1,zmm9,0x88 3056 vshufi32x4 zmm1,zmm1,zmm9,0xdd 3057 vshufi32x4 zmm9,zmm2,zmm10,0x88 3058 vshufi32x4 zmm10,zmm2,zmm10,0xdd 3059 vshufi32x4 zmm14,zmm18,zmm6,0x88 3060 vshufi32x4 zmm18,zmm18,zmm6,0xdd 3061 vshufi32x4 zmm6,zmm7,zmm15,0x88 3062 vshufi32x4 zmm15,zmm7,zmm15,0xdd 3063 vshufi32x4 zmm8,zmm3,zmm11,0x88 3064 vshufi32x4 zmm3,zmm3,zmm11,0xdd 3065 vshufi32x4 zmm11,zmm4,zmm12,0x88 3066 vshufi32x4 zmm12,zmm4,zmm12,0xdd 3067 cmp rdx,64*16 3068 jb NEAR $L$tail16x 3069 3070 vpxord zmm16,zmm16,ZMMWORD[rsi] 3071 vpxord zmm17,zmm17,ZMMWORD[64+rsi] 3072 vpxord zmm14,zmm14,ZMMWORD[128+rsi] 3073 vpxord zmm8,zmm8,ZMMWORD[192+rsi] 3074 vmovdqu32 ZMMWORD[rdi],zmm16 3075 vmovdqu32 ZMMWORD[64+rdi],zmm17 3076 vmovdqu32 ZMMWORD[128+rdi],zmm14 3077 vmovdqu32 ZMMWORD[192+rdi],zmm8 3078 3079 vpxord zmm19,zmm19,ZMMWORD[256+rsi] 3080 vpxord zmm1,zmm1,ZMMWORD[320+rsi] 3081 vpxord zmm18,zmm18,ZMMWORD[384+rsi] 3082 vpxord zmm3,zmm3,ZMMWORD[448+rsi] 3083 vmovdqu32 ZMMWORD[256+rdi],zmm19 3084 vmovdqu32 ZMMWORD[320+rdi],zmm1 3085 vmovdqu32 ZMMWORD[384+rdi],zmm18 3086 vmovdqu32 ZMMWORD[448+rdi],zmm3 3087 3088 vpxord zmm0,zmm0,ZMMWORD[512+rsi] 3089 vpxord zmm9,zmm9,ZMMWORD[576+rsi] 3090 vpxord zmm6,zmm6,ZMMWORD[640+rsi] 3091 vpxord zmm11,zmm11,ZMMWORD[704+rsi] 3092 vmovdqu32 ZMMWORD[512+rdi],zmm0 3093 vmovdqu32 ZMMWORD[576+rdi],zmm9 3094 vmovdqu32 ZMMWORD[640+rdi],zmm6 3095 vmovdqu32 ZMMWORD[704+rdi],zmm11 3096 3097 vpxord zmm13,zmm13,ZMMWORD[768+rsi] 3098 vpxord zmm10,zmm10,ZMMWORD[832+rsi] 3099 vpxord zmm15,zmm15,ZMMWORD[896+rsi] 3100 vpxord zmm12,zmm12,ZMMWORD[960+rsi] 3101 lea rsi,[1024+rsi] 3102 vmovdqu32 ZMMWORD[768+rdi],zmm13 3103 vmovdqu32 ZMMWORD[832+rdi],zmm10 3104 vmovdqu32 ZMMWORD[896+rdi],zmm15 3105 vmovdqu32 ZMMWORD[960+rdi],zmm12 3106 lea rdi,[1024+rdi] 3107 3108 sub rdx,64*16 3109 jnz NEAR $L$oop_outer16x 3110 3111 jmp NEAR $L$done16x 3112 3113 ALIGN 32 3114 $L$tail16x: 3115 xor r10,r10 3116 sub rdi,rsi 3117 cmp rdx,64*1 3118 jb NEAR $L$ess_than_64_16x 3119 vpxord zmm16,zmm16,ZMMWORD[rsi] 3120 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm16 3121 je NEAR $L$done16x 3122 vmovdqa32 zmm16,zmm17 3123 lea rsi,[64+rsi] 3124 3125 cmp rdx,64*2 3126 jb NEAR $L$ess_than_64_16x 3127 vpxord zmm17,zmm17,ZMMWORD[rsi] 3128 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm17 3129 je NEAR $L$done16x 3130 vmovdqa32 zmm16,zmm14 3131 lea rsi,[64+rsi] 3132 3133 cmp rdx,64*3 3134 jb NEAR $L$ess_than_64_16x 3135 vpxord zmm14,zmm14,ZMMWORD[rsi] 3136 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm14 3137 je NEAR $L$done16x 3138 vmovdqa32 zmm16,zmm8 3139 lea rsi,[64+rsi] 3140 3141 cmp rdx,64*4 3142 jb NEAR $L$ess_than_64_16x 3143 vpxord zmm8,zmm8,ZMMWORD[rsi] 3144 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm8 3145 je NEAR $L$done16x 3146 vmovdqa32 zmm16,zmm19 3147 lea rsi,[64+rsi] 3148 3149 cmp rdx,64*5 3150 jb NEAR $L$ess_than_64_16x 3151 vpxord zmm19,zmm19,ZMMWORD[rsi] 3152 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm19 3153 je NEAR $L$done16x 3154 vmovdqa32 zmm16,zmm1 3155 lea rsi,[64+rsi] 3156 3157 cmp rdx,64*6 3158 jb NEAR $L$ess_than_64_16x 3159 vpxord zmm1,zmm1,ZMMWORD[rsi] 3160 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm1 3161 je NEAR $L$done16x 3162 vmovdqa32 zmm16,zmm18 3163 lea rsi,[64+rsi] 3164 3165 cmp rdx,64*7 3166 jb NEAR $L$ess_than_64_16x 3167 vpxord zmm18,zmm18,ZMMWORD[rsi] 3168 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm18 3169 je NEAR $L$done16x 3170 vmovdqa32 zmm16,zmm3 3171 lea rsi,[64+rsi] 3172 3173 cmp rdx,64*8 3174 jb NEAR $L$ess_than_64_16x 3175 vpxord zmm3,zmm3,ZMMWORD[rsi] 3176 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm3 3177 je NEAR $L$done16x 3178 vmovdqa32 zmm16,zmm0 3179 lea rsi,[64+rsi] 3180 3181 cmp rdx,64*9 3182 jb NEAR $L$ess_than_64_16x 3183 vpxord zmm0,zmm0,ZMMWORD[rsi] 3184 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm0 3185 je NEAR $L$done16x 3186 vmovdqa32 zmm16,zmm9 3187 lea rsi,[64+rsi] 3188 3189 cmp rdx,64*10 3190 jb NEAR $L$ess_than_64_16x 3191 vpxord zmm9,zmm9,ZMMWORD[rsi] 3192 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm9 3193 je NEAR $L$done16x 3194 vmovdqa32 zmm16,zmm6 3195 lea rsi,[64+rsi] 3196 3197 cmp rdx,64*11 3198 jb NEAR $L$ess_than_64_16x 3199 vpxord zmm6,zmm6,ZMMWORD[rsi] 3200 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm6 3201 je NEAR $L$done16x 3202 vmovdqa32 zmm16,zmm11 3203 lea rsi,[64+rsi] 3204 3205 cmp rdx,64*12 3206 jb NEAR $L$ess_than_64_16x 3207 vpxord zmm11,zmm11,ZMMWORD[rsi] 3208 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm11 3209 je NEAR $L$done16x 3210 vmovdqa32 zmm16,zmm13 3211 lea rsi,[64+rsi] 3212 3213 cmp rdx,64*13 3214 jb NEAR $L$ess_than_64_16x 3215 vpxord zmm13,zmm13,ZMMWORD[rsi] 3216 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm13 3217 je NEAR $L$done16x 3218 vmovdqa32 zmm16,zmm10 3219 lea rsi,[64+rsi] 3220 3221 cmp rdx,64*14 3222 jb NEAR $L$ess_than_64_16x 3223 vpxord zmm10,zmm10,ZMMWORD[rsi] 3224 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm10 3225 je NEAR $L$done16x 3226 vmovdqa32 zmm16,zmm15 3227 lea rsi,[64+rsi] 3228 3229 cmp rdx,64*15 3230 jb NEAR $L$ess_than_64_16x 3231 vpxord zmm15,zmm15,ZMMWORD[rsi] 3232 vmovdqu32 ZMMWORD[rsi*1+rdi],zmm15 3233 je NEAR $L$done16x 3234 vmovdqa32 zmm16,zmm12 3235 lea rsi,[64+rsi] 3236 3237 $L$ess_than_64_16x: 3238 vmovdqa32 ZMMWORD[rsp],zmm16 3239 lea rdi,[rsi*1+rdi] 3240 and rdx,63 3241 3242 $L$oop_tail16x: 3243 movzx eax,BYTE[r10*1+rsi] 3244 movzx ecx,BYTE[r10*1+rsp] 3245 lea r10,[1+r10] 3246 xor eax,ecx 3247 mov BYTE[((-1))+r10*1+rdi],al 3248 dec rdx 3249 jnz NEAR $L$oop_tail16x 3250 3251 vpxord zmm16,zmm16,zmm16 3252 vmovdqa32 ZMMWORD[rsp],zmm16 3253 3254 $L$done16x: 3255 vzeroall 3256 movaps xmm6,XMMWORD[((-168))+r9] 3257 movaps xmm7,XMMWORD[((-152))+r9] 3258 movaps xmm8,XMMWORD[((-136))+r9] 3259 movaps xmm9,XMMWORD[((-120))+r9] 3260 movaps xmm10,XMMWORD[((-104))+r9] 3261 movaps xmm11,XMMWORD[((-88))+r9] 3262 movaps xmm12,XMMWORD[((-72))+r9] 3263 movaps xmm13,XMMWORD[((-56))+r9] 3264 movaps xmm14,XMMWORD[((-40))+r9] 3265 movaps xmm15,XMMWORD[((-24))+r9] 3266 lea rsp,[r9] 3267 3268 $L$16x_epilogue: 3269 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3270 mov rsi,QWORD[16+rsp] 3271 DB 0F3h,0C3h ;repret 3272 3273 $L$SEH_end_ChaCha20_16x: 3274 3275 ALIGN 32 3276 ChaCha20_8xvl: 3277 mov QWORD[8+rsp],rdi ;WIN64 prologue 3278 mov QWORD[16+rsp],rsi 3279 mov rax,rsp 3280 $L$SEH_begin_ChaCha20_8xvl: 3281 mov rdi,rcx 3282 mov rsi,rdx 3283 mov rdx,r8 3284 mov rcx,r9 3285 mov r8,QWORD[40+rsp] 3286 3287 3288 3289 $L$ChaCha20_8xvl: 3290 mov r9,rsp 3291 3292 sub rsp,64+168 3293 and rsp,-64 3294 movaps XMMWORD[(-168)+r9],xmm6 3295 movaps XMMWORD[(-152)+r9],xmm7 3296 movaps XMMWORD[(-136)+r9],xmm8 3297 movaps XMMWORD[(-120)+r9],xmm9 3298 movaps XMMWORD[(-104)+r9],xmm10 3299 movaps XMMWORD[(-88)+r9],xmm11 3300 movaps XMMWORD[(-72)+r9],xmm12 3301 movaps XMMWORD[(-56)+r9],xmm13 3302 movaps XMMWORD[(-40)+r9],xmm14 3303 movaps XMMWORD[(-24)+r9],xmm15 3304 $L$8xvl_body: 3305 vzeroupper 3306 3307 lea r10,[$L$sigma] 3308 vbroadcasti128 ymm3,XMMWORD[r10] 3309 vbroadcasti128 ymm7,XMMWORD[rcx] 3310 vbroadcasti128 ymm11,XMMWORD[16+rcx] 3311 vbroadcasti128 ymm15,XMMWORD[r8] 3312 3313 vpshufd ymm0,ymm3,0x00 3314 vpshufd ymm1,ymm3,0x55 3315 vpshufd ymm2,ymm3,0xaa 3316 vpshufd ymm3,ymm3,0xff 3317 vmovdqa64 ymm16,ymm0 3318 vmovdqa64 ymm17,ymm1 3319 vmovdqa64 ymm18,ymm2 3320 vmovdqa64 ymm19,ymm3 3321 3322 vpshufd ymm4,ymm7,0x00 3323 vpshufd ymm5,ymm7,0x55 3324 vpshufd ymm6,ymm7,0xaa 3325 vpshufd ymm7,ymm7,0xff 3326 vmovdqa64 ymm20,ymm4 3327 vmovdqa64 ymm21,ymm5 3328 vmovdqa64 ymm22,ymm6 3329 vmovdqa64 ymm23,ymm7 3330 3331 vpshufd ymm8,ymm11,0x00 3332 vpshufd ymm9,ymm11,0x55 3333 vpshufd ymm10,ymm11,0xaa 3334 vpshufd ymm11,ymm11,0xff 3335 vmovdqa64 ymm24,ymm8 3336 vmovdqa64 ymm25,ymm9 3337 vmovdqa64 ymm26,ymm10 3338 vmovdqa64 ymm27,ymm11 3339 3340 vpshufd ymm12,ymm15,0x00 3341 vpshufd ymm13,ymm15,0x55 3342 vpshufd ymm14,ymm15,0xaa 3343 vpshufd ymm15,ymm15,0xff 3344 vpaddd ymm12,ymm12,YMMWORD[$L$incy] 3345 vmovdqa64 ymm28,ymm12 3346 vmovdqa64 ymm29,ymm13 3347 vmovdqa64 ymm30,ymm14 3348 vmovdqa64 ymm31,ymm15 3349 3350 mov eax,10 3351 jmp NEAR $L$oop8xvl 3352 3353 ALIGN 32 3354 $L$oop_outer8xvl: 3355 3356 3357 vpbroadcastd ymm2,DWORD[8+r10] 3358 vpbroadcastd ymm3,DWORD[12+r10] 3359 vpaddd ymm28,ymm28,YMMWORD[$L$eight] 3360 vmovdqa64 ymm4,ymm20 3361 vmovdqa64 ymm5,ymm21 3362 vmovdqa64 ymm6,ymm22 3363 vmovdqa64 ymm7,ymm23 3364 vmovdqa64 ymm8,ymm24 3365 vmovdqa64 ymm9,ymm25 3366 vmovdqa64 ymm10,ymm26 3367 vmovdqa64 ymm11,ymm27 3368 vmovdqa64 ymm12,ymm28 3369 vmovdqa64 ymm13,ymm29 3370 vmovdqa64 ymm14,ymm30 3371 vmovdqa64 ymm15,ymm31 3372 3373 vmovdqa64 ymm16,ymm0 3374 vmovdqa64 ymm17,ymm1 3375 vmovdqa64 ymm18,ymm2 3376 vmovdqa64 ymm19,ymm3 3377 3378 mov eax,10 3379 jmp NEAR $L$oop8xvl 3380 3381 ALIGN 32 3382 $L$oop8xvl: 3383 vpaddd ymm0,ymm0,ymm4 3384 vpaddd ymm1,ymm1,ymm5 3385 vpaddd ymm2,ymm2,ymm6 3386 vpaddd ymm3,ymm3,ymm7 3387 vpxor ymm12,ymm12,ymm0 3388 vpxor ymm13,ymm13,ymm1 3389 vpxor ymm14,ymm14,ymm2 3390 vpxor ymm15,ymm15,ymm3 3391 vprold ymm12,ymm12,16 3392 vprold ymm13,ymm13,16 3393 vprold ymm14,ymm14,16 3394 vprold ymm15,ymm15,16 3395 vpaddd ymm8,ymm8,ymm12 3396 vpaddd ymm9,ymm9,ymm13 3397 vpaddd ymm10,ymm10,ymm14 3398 vpaddd ymm11,ymm11,ymm15 3399 vpxor ymm4,ymm4,ymm8 3400 vpxor ymm5,ymm5,ymm9 3401 vpxor ymm6,ymm6,ymm10 3402 vpxor ymm7,ymm7,ymm11 3403 vprold ymm4,ymm4,12 3404 vprold ymm5,ymm5,12 3405 vprold ymm6,ymm6,12 3406 vprold ymm7,ymm7,12 3407 vpaddd ymm0,ymm0,ymm4 3408 vpaddd ymm1,ymm1,ymm5 3409 vpaddd ymm2,ymm2,ymm6 3410 vpaddd ymm3,ymm3,ymm7 3411 vpxor ymm12,ymm12,ymm0 3412 vpxor ymm13,ymm13,ymm1 3413 vpxor ymm14,ymm14,ymm2 3414 vpxor ymm15,ymm15,ymm3 3415 vprold ymm12,ymm12,8 3416 vprold ymm13,ymm13,8 3417 vprold ymm14,ymm14,8 3418 vprold ymm15,ymm15,8 3419 vpaddd ymm8,ymm8,ymm12 3420 vpaddd ymm9,ymm9,ymm13 3421 vpaddd ymm10,ymm10,ymm14 3422 vpaddd ymm11,ymm11,ymm15 3423 vpxor ymm4,ymm4,ymm8 3424 vpxor ymm5,ymm5,ymm9 3425 vpxor ymm6,ymm6,ymm10 3426 vpxor ymm7,ymm7,ymm11 3427 vprold ymm4,ymm4,7 3428 vprold ymm5,ymm5,7 3429 vprold ymm6,ymm6,7 3430 vprold ymm7,ymm7,7 3431 vpaddd ymm0,ymm0,ymm5 3432 vpaddd ymm1,ymm1,ymm6 3433 vpaddd ymm2,ymm2,ymm7 3434 vpaddd ymm3,ymm3,ymm4 3435 vpxor ymm15,ymm15,ymm0 3436 vpxor ymm12,ymm12,ymm1 3437 vpxor ymm13,ymm13,ymm2 3438 vpxor ymm14,ymm14,ymm3 3439 vprold ymm15,ymm15,16 3440 vprold ymm12,ymm12,16 3441 vprold ymm13,ymm13,16 3442 vprold ymm14,ymm14,16 3443 vpaddd ymm10,ymm10,ymm15 3444 vpaddd ymm11,ymm11,ymm12 3445 vpaddd ymm8,ymm8,ymm13 3446 vpaddd ymm9,ymm9,ymm14 3447 vpxor ymm5,ymm5,ymm10 3448 vpxor ymm6,ymm6,ymm11 3449 vpxor ymm7,ymm7,ymm8 3450 vpxor ymm4,ymm4,ymm9 3451 vprold ymm5,ymm5,12 3452 vprold ymm6,ymm6,12 3453 vprold ymm7,ymm7,12 3454 vprold ymm4,ymm4,12 3455 vpaddd ymm0,ymm0,ymm5 3456 vpaddd ymm1,ymm1,ymm6 3457 vpaddd ymm2,ymm2,ymm7 3458 vpaddd ymm3,ymm3,ymm4 3459 vpxor ymm15,ymm15,ymm0 3460 vpxor ymm12,ymm12,ymm1 3461 vpxor ymm13,ymm13,ymm2 3462 vpxor ymm14,ymm14,ymm3 3463 vprold ymm15,ymm15,8 3464 vprold ymm12,ymm12,8 3465 vprold ymm13,ymm13,8 3466 vprold ymm14,ymm14,8 3467 vpaddd ymm10,ymm10,ymm15 3468 vpaddd ymm11,ymm11,ymm12 3469 vpaddd ymm8,ymm8,ymm13 3470 vpaddd ymm9,ymm9,ymm14 3471 vpxor ymm5,ymm5,ymm10 3472 vpxor ymm6,ymm6,ymm11 3473 vpxor ymm7,ymm7,ymm8 3474 vpxor ymm4,ymm4,ymm9 3475 vprold ymm5,ymm5,7 3476 vprold ymm6,ymm6,7 3477 vprold ymm7,ymm7,7 3478 vprold ymm4,ymm4,7 3479 dec eax 3480 jnz NEAR $L$oop8xvl 3481 3482 vpaddd ymm0,ymm0,ymm16 3483 vpaddd ymm1,ymm1,ymm17 3484 vpaddd ymm2,ymm2,ymm18 3485 vpaddd ymm3,ymm3,ymm19 3486 3487 vpunpckldq ymm18,ymm0,ymm1 3488 vpunpckldq ymm19,ymm2,ymm3 3489 vpunpckhdq ymm0,ymm0,ymm1 3490 vpunpckhdq ymm2,ymm2,ymm3 3491 vpunpcklqdq ymm1,ymm18,ymm19 3492 vpunpckhqdq ymm18,ymm18,ymm19 3493 vpunpcklqdq ymm3,ymm0,ymm2 3494 vpunpckhqdq ymm0,ymm0,ymm2 3495 vpaddd ymm4,ymm4,ymm20 3496 vpaddd ymm5,ymm5,ymm21 3497 vpaddd ymm6,ymm6,ymm22 3498 vpaddd ymm7,ymm7,ymm23 3499 3500 vpunpckldq ymm2,ymm4,ymm5 3501 vpunpckldq ymm19,ymm6,ymm7 3502 vpunpckhdq ymm4,ymm4,ymm5 3503 vpunpckhdq ymm6,ymm6,ymm7 3504 vpunpcklqdq ymm5,ymm2,ymm19 3505 vpunpckhqdq ymm2,ymm2,ymm19 3506 vpunpcklqdq ymm7,ymm4,ymm6 3507 vpunpckhqdq ymm4,ymm4,ymm6 3508 vshufi32x4 ymm19,ymm1,ymm5,0 3509 vshufi32x4 ymm5,ymm1,ymm5,3 3510 vshufi32x4 ymm1,ymm18,ymm2,0 3511 vshufi32x4 ymm2,ymm18,ymm2,3 3512 vshufi32x4 ymm18,ymm3,ymm7,0 3513 vshufi32x4 ymm7,ymm3,ymm7,3 3514 vshufi32x4 ymm3,ymm0,ymm4,0 3515 vshufi32x4 ymm4,ymm0,ymm4,3 3516 vpaddd ymm8,ymm8,ymm24 3517 vpaddd ymm9,ymm9,ymm25 3518 vpaddd ymm10,ymm10,ymm26 3519 vpaddd ymm11,ymm11,ymm27 3520 3521 vpunpckldq ymm6,ymm8,ymm9 3522 vpunpckldq ymm0,ymm10,ymm11 3523 vpunpckhdq ymm8,ymm8,ymm9 3524 vpunpckhdq ymm10,ymm10,ymm11 3525 vpunpcklqdq ymm9,ymm6,ymm0 3526 vpunpckhqdq ymm6,ymm6,ymm0 3527 vpunpcklqdq ymm11,ymm8,ymm10 3528 vpunpckhqdq ymm8,ymm8,ymm10 3529 vpaddd ymm12,ymm12,ymm28 3530 vpaddd ymm13,ymm13,ymm29 3531 vpaddd ymm14,ymm14,ymm30 3532 vpaddd ymm15,ymm15,ymm31 3533 3534 vpunpckldq ymm10,ymm12,ymm13 3535 vpunpckldq ymm0,ymm14,ymm15 3536 vpunpckhdq ymm12,ymm12,ymm13 3537 vpunpckhdq ymm14,ymm14,ymm15 3538 vpunpcklqdq ymm13,ymm10,ymm0 3539 vpunpckhqdq ymm10,ymm10,ymm0 3540 vpunpcklqdq ymm15,ymm12,ymm14 3541 vpunpckhqdq ymm12,ymm12,ymm14 3542 vperm2i128 ymm0,ymm9,ymm13,0x20 3543 vperm2i128 ymm13,ymm9,ymm13,0x31 3544 vperm2i128 ymm9,ymm6,ymm10,0x20 3545 vperm2i128 ymm10,ymm6,ymm10,0x31 3546 vperm2i128 ymm6,ymm11,ymm15,0x20 3547 vperm2i128 ymm15,ymm11,ymm15,0x31 3548 vperm2i128 ymm11,ymm8,ymm12,0x20 3549 vperm2i128 ymm12,ymm8,ymm12,0x31 3550 cmp rdx,64*8 3551 jb NEAR $L$tail8xvl 3552 3553 mov eax,0x80 3554 vpxord ymm19,ymm19,YMMWORD[rsi] 3555 vpxor ymm0,ymm0,YMMWORD[32+rsi] 3556 vpxor ymm5,ymm5,YMMWORD[64+rsi] 3557 vpxor ymm13,ymm13,YMMWORD[96+rsi] 3558 lea rsi,[rax*1+rsi] 3559 vmovdqu32 YMMWORD[rdi],ymm19 3560 vmovdqu YMMWORD[32+rdi],ymm0 3561 vmovdqu YMMWORD[64+rdi],ymm5 3562 vmovdqu YMMWORD[96+rdi],ymm13 3563 lea rdi,[rax*1+rdi] 3564 3565 vpxor ymm1,ymm1,YMMWORD[rsi] 3566 vpxor ymm9,ymm9,YMMWORD[32+rsi] 3567 vpxor ymm2,ymm2,YMMWORD[64+rsi] 3568 vpxor ymm10,ymm10,YMMWORD[96+rsi] 3569 lea rsi,[rax*1+rsi] 3570 vmovdqu YMMWORD[rdi],ymm1 3571 vmovdqu YMMWORD[32+rdi],ymm9 3572 vmovdqu YMMWORD[64+rdi],ymm2 3573 vmovdqu YMMWORD[96+rdi],ymm10 3574 lea rdi,[rax*1+rdi] 3575 3576 vpxord ymm18,ymm18,YMMWORD[rsi] 3577 vpxor ymm6,ymm6,YMMWORD[32+rsi] 3578 vpxor ymm7,ymm7,YMMWORD[64+rsi] 3579 vpxor ymm15,ymm15,YMMWORD[96+rsi] 3580 lea rsi,[rax*1+rsi] 3581 vmovdqu32 YMMWORD[rdi],ymm18 3582 vmovdqu YMMWORD[32+rdi],ymm6 3583 vmovdqu YMMWORD[64+rdi],ymm7 3584 vmovdqu YMMWORD[96+rdi],ymm15 3585 lea rdi,[rax*1+rdi] 3586 3587 vpxor ymm3,ymm3,YMMWORD[rsi] 3588 vpxor ymm11,ymm11,YMMWORD[32+rsi] 3589 vpxor ymm4,ymm4,YMMWORD[64+rsi] 3590 vpxor ymm12,ymm12,YMMWORD[96+rsi] 3591 lea rsi,[rax*1+rsi] 3592 vmovdqu YMMWORD[rdi],ymm3 3593 vmovdqu YMMWORD[32+rdi],ymm11 3594 vmovdqu YMMWORD[64+rdi],ymm4 3595 vmovdqu YMMWORD[96+rdi],ymm12 3596 lea rdi,[rax*1+rdi] 3597 3598 vpbroadcastd ymm0,DWORD[r10] 3599 vpbroadcastd ymm1,DWORD[4+r10] 3600 3601 sub rdx,64*8 3602 jnz NEAR $L$oop_outer8xvl 3603 3604 jmp NEAR $L$done8xvl 3605 3606 ALIGN 32 3607 $L$tail8xvl: 3608 vmovdqa64 ymm8,ymm19 3609 xor r10,r10 3610 sub rdi,rsi 3611 cmp rdx,64*1 3612 jb NEAR $L$ess_than_64_8xvl 3613 vpxor ymm8,ymm8,YMMWORD[rsi] 3614 vpxor ymm0,ymm0,YMMWORD[32+rsi] 3615 vmovdqu YMMWORD[rsi*1+rdi],ymm8 3616 vmovdqu YMMWORD[32+rsi*1+rdi],ymm0 3617 je NEAR $L$done8xvl 3618 vmovdqa ymm8,ymm5 3619 vmovdqa ymm0,ymm13 3620 lea rsi,[64+rsi] 3621 3622 cmp rdx,64*2 3623 jb NEAR $L$ess_than_64_8xvl 3624 vpxor ymm5,ymm5,YMMWORD[rsi] 3625 vpxor ymm13,ymm13,YMMWORD[32+rsi] 3626 vmovdqu YMMWORD[rsi*1+rdi],ymm5 3627 vmovdqu YMMWORD[32+rsi*1+rdi],ymm13 3628 je NEAR $L$done8xvl 3629 vmovdqa ymm8,ymm1 3630 vmovdqa ymm0,ymm9 3631 lea rsi,[64+rsi] 3632 3633 cmp rdx,64*3 3634 jb NEAR $L$ess_than_64_8xvl 3635 vpxor ymm1,ymm1,YMMWORD[rsi] 3636 vpxor ymm9,ymm9,YMMWORD[32+rsi] 3637 vmovdqu YMMWORD[rsi*1+rdi],ymm1 3638 vmovdqu YMMWORD[32+rsi*1+rdi],ymm9 3639 je NEAR $L$done8xvl 3640 vmovdqa ymm8,ymm2 3641 vmovdqa ymm0,ymm10 3642 lea rsi,[64+rsi] 3643 3644 cmp rdx,64*4 3645 jb NEAR $L$ess_than_64_8xvl 3646 vpxor ymm2,ymm2,YMMWORD[rsi] 3647 vpxor ymm10,ymm10,YMMWORD[32+rsi] 3648 vmovdqu YMMWORD[rsi*1+rdi],ymm2 3649 vmovdqu YMMWORD[32+rsi*1+rdi],ymm10 3650 je NEAR $L$done8xvl 3651 vmovdqa32 ymm8,ymm18 3652 vmovdqa ymm0,ymm6 3653 lea rsi,[64+rsi] 3654 3655 cmp rdx,64*5 3656 jb NEAR $L$ess_than_64_8xvl 3657 vpxord ymm18,ymm18,YMMWORD[rsi] 3658 vpxor ymm6,ymm6,YMMWORD[32+rsi] 3659 vmovdqu32 YMMWORD[rsi*1+rdi],ymm18 3660 vmovdqu YMMWORD[32+rsi*1+rdi],ymm6 3661 je NEAR $L$done8xvl 3662 vmovdqa ymm8,ymm7 3663 vmovdqa ymm0,ymm15 3664 lea rsi,[64+rsi] 3665 3666 cmp rdx,64*6 3667 jb NEAR $L$ess_than_64_8xvl 3668 vpxor ymm7,ymm7,YMMWORD[rsi] 3669 vpxor ymm15,ymm15,YMMWORD[32+rsi] 3670 vmovdqu YMMWORD[rsi*1+rdi],ymm7 3671 vmovdqu YMMWORD[32+rsi*1+rdi],ymm15 3672 je NEAR $L$done8xvl 3673 vmovdqa ymm8,ymm3 3674 vmovdqa ymm0,ymm11 3675 lea rsi,[64+rsi] 3676 3677 cmp rdx,64*7 3678 jb NEAR $L$ess_than_64_8xvl 3679 vpxor ymm3,ymm3,YMMWORD[rsi] 3680 vpxor ymm11,ymm11,YMMWORD[32+rsi] 3681 vmovdqu YMMWORD[rsi*1+rdi],ymm3 3682 vmovdqu YMMWORD[32+rsi*1+rdi],ymm11 3683 je NEAR $L$done8xvl 3684 vmovdqa ymm8,ymm4 3685 vmovdqa ymm0,ymm12 3686 lea rsi,[64+rsi] 3687 3688 $L$ess_than_64_8xvl: 3689 vmovdqa YMMWORD[rsp],ymm8 3690 vmovdqa YMMWORD[32+rsp],ymm0 3691 lea rdi,[rsi*1+rdi] 3692 and rdx,63 3693 3694 $L$oop_tail8xvl: 3695 movzx eax,BYTE[r10*1+rsi] 3696 movzx ecx,BYTE[r10*1+rsp] 3697 lea r10,[1+r10] 3698 xor eax,ecx 3699 mov BYTE[((-1))+r10*1+rdi],al 3700 dec rdx 3701 jnz NEAR $L$oop_tail8xvl 3702 3703 vpxor ymm8,ymm8,ymm8 3704 vmovdqa YMMWORD[rsp],ymm8 3705 vmovdqa YMMWORD[32+rsp],ymm8 3706 3707 $L$done8xvl: 3708 vzeroall 3709 movaps xmm6,XMMWORD[((-168))+r9] 3710 movaps xmm7,XMMWORD[((-152))+r9] 3711 movaps xmm8,XMMWORD[((-136))+r9] 3712 movaps xmm9,XMMWORD[((-120))+r9] 3713 movaps xmm10,XMMWORD[((-104))+r9] 3714 movaps xmm11,XMMWORD[((-88))+r9] 3715 movaps xmm12,XMMWORD[((-72))+r9] 3716 movaps xmm13,XMMWORD[((-56))+r9] 3717 movaps xmm14,XMMWORD[((-40))+r9] 3718 movaps xmm15,XMMWORD[((-24))+r9] 3719 lea rsp,[r9] 3720 3721 $L$8xvl_epilogue: 3722 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3723 mov rsi,QWORD[16+rsp] 3724 DB 0F3h,0C3h ;repret 3725 3726 $L$SEH_end_ChaCha20_8xvl: 1262 3727 EXTERN __imp_RtlVirtualUnwind 1263 3728 … … 1406 3871 DD $L$SEH_end_ChaCha20_4x wrt ..imagebase 1407 3872 DD $L$SEH_info_ChaCha20_4x wrt ..imagebase 3873 DD $L$SEH_begin_ChaCha20_4xop wrt ..imagebase 3874 DD $L$SEH_end_ChaCha20_4xop wrt ..imagebase 3875 DD $L$SEH_info_ChaCha20_4xop wrt ..imagebase 3876 DD $L$SEH_begin_ChaCha20_8x wrt ..imagebase 3877 DD $L$SEH_end_ChaCha20_8x wrt ..imagebase 3878 DD $L$SEH_info_ChaCha20_8x wrt ..imagebase 3879 DD $L$SEH_begin_ChaCha20_avx512 wrt ..imagebase 3880 DD $L$SEH_end_ChaCha20_avx512 wrt ..imagebase 3881 DD $L$SEH_info_ChaCha20_avx512 wrt ..imagebase 3882 3883 DD $L$SEH_begin_ChaCha20_avx512vl wrt ..imagebase 3884 DD $L$SEH_end_ChaCha20_avx512vl wrt ..imagebase 3885 DD $L$SEH_info_ChaCha20_avx512vl wrt ..imagebase 3886 3887 DD $L$SEH_begin_ChaCha20_16x wrt ..imagebase 3888 DD $L$SEH_end_ChaCha20_16x wrt ..imagebase 3889 DD $L$SEH_info_ChaCha20_16x wrt ..imagebase 3890 3891 DD $L$SEH_begin_ChaCha20_8xvl wrt ..imagebase 3892 DD $L$SEH_end_ChaCha20_8xvl wrt ..imagebase 3893 DD $L$SEH_info_ChaCha20_8xvl wrt ..imagebase 1408 3894 section .xdata rdata align=8 1409 3895 ALIGN 8 … … 1429 3915 DD $L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase 1430 3916 DD 0xa0,0 3917 $L$SEH_info_ChaCha20_4xop: 3918 DB 9,0,0,0 3919 DD simd_handler wrt ..imagebase 3920 DD $L$4xop_body wrt ..imagebase,$L$4xop_epilogue wrt ..imagebase 3921 DD 0xa0,0 3922 $L$SEH_info_ChaCha20_8x: 3923 DB 9,0,0,0 3924 DD simd_handler wrt ..imagebase 3925 DD $L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase 3926 DD 0xa0,0 3927 $L$SEH_info_ChaCha20_avx512: 3928 DB 9,0,0,0 3929 DD simd_handler wrt ..imagebase 3930 DD $L$avx512_body wrt ..imagebase,$L$avx512_epilogue wrt ..imagebase 3931 DD 0x20,0 3932 3933 $L$SEH_info_ChaCha20_avx512vl: 3934 DB 9,0,0,0 3935 DD simd_handler wrt ..imagebase 3936 DD $L$avx512vl_body wrt ..imagebase,$L$avx512vl_epilogue wrt ..imagebase 3937 DD 0x20,0 3938 3939 $L$SEH_info_ChaCha20_16x: 3940 DB 9,0,0,0 3941 DD simd_handler wrt ..imagebase 3942 DD $L$16x_body wrt ..imagebase,$L$16x_epilogue wrt ..imagebase 3943 DD 0xa0,0 3944 3945 $L$SEH_info_ChaCha20_8xvl: 3946 DB 9,0,0,0 3947 DD simd_handler wrt ..imagebase 3948 DD $L$8xvl_body wrt ..imagebase,$L$8xvl_epilogue wrt ..imagebase 3949 DD 0xa0,0 -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/ecp_nistz256-x86_64.S
r97373 r99371 2854 2854 2855 2855 2856 mov ecx,0x80100 2857 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 2858 cmp ecx,0x80100 2859 je NEAR $L$ecp_nistz256_ord_mul_montx 2856 2860 push rbp 2857 2861 … … 3187 3191 3188 3192 3193 mov ecx,0x80100 3194 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 3195 cmp ecx,0x80100 3196 je NEAR $L$ecp_nistz256_ord_sqr_montx 3189 3197 push rbp 3190 3198 … … 3471 3479 3472 3480 3481 ALIGN 32 3482 ecp_nistz256_ord_mul_montx: 3483 mov QWORD[8+rsp],rdi ;WIN64 prologue 3484 mov QWORD[16+rsp],rsi 3485 mov rax,rsp 3486 $L$SEH_begin_ecp_nistz256_ord_mul_montx: 3487 mov rdi,rcx 3488 mov rsi,rdx 3489 mov rdx,r8 3490 3491 3492 3493 $L$ecp_nistz256_ord_mul_montx: 3494 push rbp 3495 3496 push rbx 3497 3498 push r12 3499 3500 push r13 3501 3502 push r14 3503 3504 push r15 3505 3506 $L$ord_mulx_body: 3507 3508 mov rbx,rdx 3509 mov rdx,QWORD[rdx] 3510 mov r9,QWORD[rsi] 3511 mov r10,QWORD[8+rsi] 3512 mov r11,QWORD[16+rsi] 3513 mov r12,QWORD[24+rsi] 3514 lea rsi,[((-128))+rsi] 3515 lea r14,[(($L$ord-128))] 3516 mov r15,QWORD[$L$ordK] 3517 3518 3519 mulx r9,r8,r9 3520 mulx r10,rcx,r10 3521 mulx r11,rbp,r11 3522 add r9,rcx 3523 mulx r12,rcx,r12 3524 mov rdx,r8 3525 mulx rax,rdx,r15 3526 adc r10,rbp 3527 adc r11,rcx 3528 adc r12,0 3529 3530 3531 xor r13,r13 3532 mulx rbp,rcx,QWORD[((0+128))+r14] 3533 adcx r8,rcx 3534 adox r9,rbp 3535 3536 mulx rbp,rcx,QWORD[((8+128))+r14] 3537 adcx r9,rcx 3538 adox r10,rbp 3539 3540 mulx rbp,rcx,QWORD[((16+128))+r14] 3541 adcx r10,rcx 3542 adox r11,rbp 3543 3544 mulx rbp,rcx,QWORD[((24+128))+r14] 3545 mov rdx,QWORD[8+rbx] 3546 adcx r11,rcx 3547 adox r12,rbp 3548 adcx r12,r8 3549 adox r13,r8 3550 adc r13,0 3551 3552 3553 mulx rbp,rcx,QWORD[((0+128))+rsi] 3554 adcx r9,rcx 3555 adox r10,rbp 3556 3557 mulx rbp,rcx,QWORD[((8+128))+rsi] 3558 adcx r10,rcx 3559 adox r11,rbp 3560 3561 mulx rbp,rcx,QWORD[((16+128))+rsi] 3562 adcx r11,rcx 3563 adox r12,rbp 3564 3565 mulx rbp,rcx,QWORD[((24+128))+rsi] 3566 mov rdx,r9 3567 mulx rax,rdx,r15 3568 adcx r12,rcx 3569 adox r13,rbp 3570 3571 adcx r13,r8 3572 adox r8,r8 3573 adc r8,0 3574 3575 3576 mulx rbp,rcx,QWORD[((0+128))+r14] 3577 adcx r9,rcx 3578 adox r10,rbp 3579 3580 mulx rbp,rcx,QWORD[((8+128))+r14] 3581 adcx r10,rcx 3582 adox r11,rbp 3583 3584 mulx rbp,rcx,QWORD[((16+128))+r14] 3585 adcx r11,rcx 3586 adox r12,rbp 3587 3588 mulx rbp,rcx,QWORD[((24+128))+r14] 3589 mov rdx,QWORD[16+rbx] 3590 adcx r12,rcx 3591 adox r13,rbp 3592 adcx r13,r9 3593 adox r8,r9 3594 adc r8,0 3595 3596 3597 mulx rbp,rcx,QWORD[((0+128))+rsi] 3598 adcx r10,rcx 3599 adox r11,rbp 3600 3601 mulx rbp,rcx,QWORD[((8+128))+rsi] 3602 adcx r11,rcx 3603 adox r12,rbp 3604 3605 mulx rbp,rcx,QWORD[((16+128))+rsi] 3606 adcx r12,rcx 3607 adox r13,rbp 3608 3609 mulx rbp,rcx,QWORD[((24+128))+rsi] 3610 mov rdx,r10 3611 mulx rax,rdx,r15 3612 adcx r13,rcx 3613 adox r8,rbp 3614 3615 adcx r8,r9 3616 adox r9,r9 3617 adc r9,0 3618 3619 3620 mulx rbp,rcx,QWORD[((0+128))+r14] 3621 adcx r10,rcx 3622 adox r11,rbp 3623 3624 mulx rbp,rcx,QWORD[((8+128))+r14] 3625 adcx r11,rcx 3626 adox r12,rbp 3627 3628 mulx rbp,rcx,QWORD[((16+128))+r14] 3629 adcx r12,rcx 3630 adox r13,rbp 3631 3632 mulx rbp,rcx,QWORD[((24+128))+r14] 3633 mov rdx,QWORD[24+rbx] 3634 adcx r13,rcx 3635 adox r8,rbp 3636 adcx r8,r10 3637 adox r9,r10 3638 adc r9,0 3639 3640 3641 mulx rbp,rcx,QWORD[((0+128))+rsi] 3642 adcx r11,rcx 3643 adox r12,rbp 3644 3645 mulx rbp,rcx,QWORD[((8+128))+rsi] 3646 adcx r12,rcx 3647 adox r13,rbp 3648 3649 mulx rbp,rcx,QWORD[((16+128))+rsi] 3650 adcx r13,rcx 3651 adox r8,rbp 3652 3653 mulx rbp,rcx,QWORD[((24+128))+rsi] 3654 mov rdx,r11 3655 mulx rax,rdx,r15 3656 adcx r8,rcx 3657 adox r9,rbp 3658 3659 adcx r9,r10 3660 adox r10,r10 3661 adc r10,0 3662 3663 3664 mulx rbp,rcx,QWORD[((0+128))+r14] 3665 adcx r11,rcx 3666 adox r12,rbp 3667 3668 mulx rbp,rcx,QWORD[((8+128))+r14] 3669 adcx r12,rcx 3670 adox r13,rbp 3671 3672 mulx rbp,rcx,QWORD[((16+128))+r14] 3673 adcx r13,rcx 3674 adox r8,rbp 3675 3676 mulx rbp,rcx,QWORD[((24+128))+r14] 3677 lea r14,[128+r14] 3678 mov rbx,r12 3679 adcx r8,rcx 3680 adox r9,rbp 3681 mov rdx,r13 3682 adcx r9,r11 3683 adox r10,r11 3684 adc r10,0 3685 3686 3687 3688 mov rcx,r8 3689 sub r12,QWORD[r14] 3690 sbb r13,QWORD[8+r14] 3691 sbb r8,QWORD[16+r14] 3692 mov rbp,r9 3693 sbb r9,QWORD[24+r14] 3694 sbb r10,0 3695 3696 cmovc r12,rbx 3697 cmovc r13,rdx 3698 cmovc r8,rcx 3699 cmovc r9,rbp 3700 3701 mov QWORD[rdi],r12 3702 mov QWORD[8+rdi],r13 3703 mov QWORD[16+rdi],r8 3704 mov QWORD[24+rdi],r9 3705 3706 mov r15,QWORD[rsp] 3707 3708 mov r14,QWORD[8+rsp] 3709 3710 mov r13,QWORD[16+rsp] 3711 3712 mov r12,QWORD[24+rsp] 3713 3714 mov rbx,QWORD[32+rsp] 3715 3716 mov rbp,QWORD[40+rsp] 3717 3718 lea rsp,[48+rsp] 3719 3720 $L$ord_mulx_epilogue: 3721 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3722 mov rsi,QWORD[16+rsp] 3723 DB 0F3h,0C3h ;repret 3724 3725 $L$SEH_end_ecp_nistz256_ord_mul_montx: 3726 3727 3728 ALIGN 32 3729 ecp_nistz256_ord_sqr_montx: 3730 mov QWORD[8+rsp],rdi ;WIN64 prologue 3731 mov QWORD[16+rsp],rsi 3732 mov rax,rsp 3733 $L$SEH_begin_ecp_nistz256_ord_sqr_montx: 3734 mov rdi,rcx 3735 mov rsi,rdx 3736 mov rdx,r8 3737 3738 3739 3740 $L$ecp_nistz256_ord_sqr_montx: 3741 push rbp 3742 3743 push rbx 3744 3745 push r12 3746 3747 push r13 3748 3749 push r14 3750 3751 push r15 3752 3753 $L$ord_sqrx_body: 3754 3755 mov rbx,rdx 3756 mov rdx,QWORD[rsi] 3757 mov r14,QWORD[8+rsi] 3758 mov r15,QWORD[16+rsi] 3759 mov r8,QWORD[24+rsi] 3760 lea rsi,[$L$ord] 3761 jmp NEAR $L$oop_ord_sqrx 3762 3763 ALIGN 32 3764 $L$oop_ord_sqrx: 3765 mulx r10,r9,r14 3766 mulx r11,rcx,r15 3767 mov rax,rdx 3768 DB 102,73,15,110,206 3769 mulx r12,rbp,r8 3770 mov rdx,r14 3771 add r10,rcx 3772 DB 102,73,15,110,215 3773 adc r11,rbp 3774 adc r12,0 3775 xor r13,r13 3776 3777 mulx rbp,rcx,r15 3778 adcx r11,rcx 3779 adox r12,rbp 3780 3781 mulx rbp,rcx,r8 3782 mov rdx,r15 3783 adcx r12,rcx 3784 adox r13,rbp 3785 adc r13,0 3786 3787 mulx r14,rcx,r8 3788 mov rdx,rax 3789 DB 102,73,15,110,216 3790 xor r15,r15 3791 adcx r9,r9 3792 adox r13,rcx 3793 adcx r10,r10 3794 adox r14,r15 3795 3796 3797 mulx rbp,r8,rdx 3798 DB 102,72,15,126,202 3799 adcx r11,r11 3800 adox r9,rbp 3801 adcx r12,r12 3802 mulx rax,rcx,rdx 3803 DB 102,72,15,126,210 3804 adcx r13,r13 3805 adox r10,rcx 3806 adcx r14,r14 3807 mulx rbp,rcx,rdx 3808 DB 0x67 3809 DB 102,72,15,126,218 3810 adox r11,rax 3811 adcx r15,r15 3812 adox r12,rcx 3813 adox r13,rbp 3814 mulx rax,rcx,rdx 3815 adox r14,rcx 3816 adox r15,rax 3817 3818 3819 mov rdx,r8 3820 mulx rcx,rdx,QWORD[32+rsi] 3821 3822 xor rax,rax 3823 mulx rbp,rcx,QWORD[rsi] 3824 adcx r8,rcx 3825 adox r9,rbp 3826 mulx rbp,rcx,QWORD[8+rsi] 3827 adcx r9,rcx 3828 adox r10,rbp 3829 mulx rbp,rcx,QWORD[16+rsi] 3830 adcx r10,rcx 3831 adox r11,rbp 3832 mulx rbp,rcx,QWORD[24+rsi] 3833 adcx r11,rcx 3834 adox r8,rbp 3835 adcx r8,rax 3836 3837 3838 mov rdx,r9 3839 mulx rcx,rdx,QWORD[32+rsi] 3840 3841 mulx rbp,rcx,QWORD[rsi] 3842 adox r9,rcx 3843 adcx r10,rbp 3844 mulx rbp,rcx,QWORD[8+rsi] 3845 adox r10,rcx 3846 adcx r11,rbp 3847 mulx rbp,rcx,QWORD[16+rsi] 3848 adox r11,rcx 3849 adcx r8,rbp 3850 mulx rbp,rcx,QWORD[24+rsi] 3851 adox r8,rcx 3852 adcx r9,rbp 3853 adox r9,rax 3854 3855 3856 mov rdx,r10 3857 mulx rcx,rdx,QWORD[32+rsi] 3858 3859 mulx rbp,rcx,QWORD[rsi] 3860 adcx r10,rcx 3861 adox r11,rbp 3862 mulx rbp,rcx,QWORD[8+rsi] 3863 adcx r11,rcx 3864 adox r8,rbp 3865 mulx rbp,rcx,QWORD[16+rsi] 3866 adcx r8,rcx 3867 adox r9,rbp 3868 mulx rbp,rcx,QWORD[24+rsi] 3869 adcx r9,rcx 3870 adox r10,rbp 3871 adcx r10,rax 3872 3873 3874 mov rdx,r11 3875 mulx rcx,rdx,QWORD[32+rsi] 3876 3877 mulx rbp,rcx,QWORD[rsi] 3878 adox r11,rcx 3879 adcx r8,rbp 3880 mulx rbp,rcx,QWORD[8+rsi] 3881 adox r8,rcx 3882 adcx r9,rbp 3883 mulx rbp,rcx,QWORD[16+rsi] 3884 adox r9,rcx 3885 adcx r10,rbp 3886 mulx rbp,rcx,QWORD[24+rsi] 3887 adox r10,rcx 3888 adcx r11,rbp 3889 adox r11,rax 3890 3891 3892 add r12,r8 3893 adc r9,r13 3894 mov rdx,r12 3895 adc r10,r14 3896 adc r11,r15 3897 mov r14,r9 3898 adc rax,0 3899 3900 3901 sub r12,QWORD[rsi] 3902 mov r15,r10 3903 sbb r9,QWORD[8+rsi] 3904 sbb r10,QWORD[16+rsi] 3905 mov r8,r11 3906 sbb r11,QWORD[24+rsi] 3907 sbb rax,0 3908 3909 cmovnc rdx,r12 3910 cmovnc r14,r9 3911 cmovnc r15,r10 3912 cmovnc r8,r11 3913 3914 dec rbx 3915 jnz NEAR $L$oop_ord_sqrx 3916 3917 mov QWORD[rdi],rdx 3918 mov QWORD[8+rdi],r14 3919 pxor xmm1,xmm1 3920 mov QWORD[16+rdi],r15 3921 pxor xmm2,xmm2 3922 mov QWORD[24+rdi],r8 3923 pxor xmm3,xmm3 3924 3925 mov r15,QWORD[rsp] 3926 3927 mov r14,QWORD[8+rsp] 3928 3929 mov r13,QWORD[16+rsp] 3930 3931 mov r12,QWORD[24+rsp] 3932 3933 mov rbx,QWORD[32+rsp] 3934 3935 mov rbp,QWORD[40+rsp] 3936 3937 lea rsp,[48+rsp] 3938 3939 $L$ord_sqrx_epilogue: 3940 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3941 mov rsi,QWORD[16+rsp] 3942 DB 0F3h,0C3h ;repret 3943 3944 $L$SEH_end_ecp_nistz256_ord_sqr_montx: 3945 3946 3473 3947 3474 3948 … … 3486 3960 3487 3961 3962 mov ecx,0x80100 3963 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 3488 3964 lea rdx,[$L$RR] 3489 3965 jmp NEAR $L$mul_mont … … 3511 3987 3512 3988 3989 mov ecx,0x80100 3990 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 3513 3991 $L$mul_mont: 3514 3992 push rbp … … 3525 4003 3526 4004 $L$mul_body: 4005 cmp ecx,0x80100 4006 je NEAR $L$mul_montx 3527 4007 mov rbx,rdx 3528 4008 mov rax,QWORD[rdx] … … 3533 4013 3534 4014 call __ecp_nistz256_mul_montq 4015 jmp NEAR $L$mul_mont_done 4016 4017 ALIGN 32 4018 $L$mul_montx: 4019 mov rbx,rdx 4020 mov rdx,QWORD[rdx] 4021 mov r9,QWORD[rsi] 4022 mov r10,QWORD[8+rsi] 4023 mov r11,QWORD[16+rsi] 4024 mov r12,QWORD[24+rsi] 4025 lea rsi,[((-128))+rsi] 4026 4027 call __ecp_nistz256_mul_montx 3535 4028 $L$mul_mont_done: 3536 4029 mov r15,QWORD[rsp] … … 3793 4286 3794 4287 4288 mov ecx,0x80100 4289 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 3795 4290 push rbp 3796 4291 … … 3806 4301 3807 4302 $L$sqr_body: 4303 cmp ecx,0x80100 4304 je NEAR $L$sqr_montx 3808 4305 mov rax,QWORD[rsi] 3809 4306 mov r14,QWORD[8+rsi] … … 3812 4309 3813 4310 call __ecp_nistz256_sqr_montq 4311 jmp NEAR $L$sqr_mont_done 4312 4313 ALIGN 32 4314 $L$sqr_montx: 4315 mov rdx,QWORD[rsi] 4316 mov r14,QWORD[8+rsi] 4317 mov r15,QWORD[16+rsi] 4318 mov r8,QWORD[24+rsi] 4319 lea rsi,[((-128))+rsi] 4320 4321 call __ecp_nistz256_sqr_montx 3814 4322 $L$sqr_mont_done: 3815 4323 mov r15,QWORD[rsp] … … 3991 4499 mov QWORD[8+rdi],r13 3992 4500 cmovc r15,rcx 4501 mov QWORD[16+rdi],r14 4502 mov QWORD[24+rdi],r15 4503 4504 DB 0F3h,0C3h ;repret 4505 4506 4507 4508 ALIGN 32 4509 __ecp_nistz256_mul_montx: 4510 4511 4512 4513 mulx r9,r8,r9 4514 mulx r10,rcx,r10 4515 mov r14,32 4516 xor r13,r13 4517 mulx r11,rbp,r11 4518 mov r15,QWORD[(($L$poly+24))] 4519 adc r9,rcx 4520 mulx r12,rcx,r12 4521 mov rdx,r8 4522 adc r10,rbp 4523 shlx rbp,r8,r14 4524 adc r11,rcx 4525 shrx rcx,r8,r14 4526 adc r12,0 4527 4528 4529 4530 add r9,rbp 4531 adc r10,rcx 4532 4533 mulx rbp,rcx,r15 4534 mov rdx,QWORD[8+rbx] 4535 adc r11,rcx 4536 adc r12,rbp 4537 adc r13,0 4538 xor r8,r8 4539 4540 4541 4542 mulx rbp,rcx,QWORD[((0+128))+rsi] 4543 adcx r9,rcx 4544 adox r10,rbp 4545 4546 mulx rbp,rcx,QWORD[((8+128))+rsi] 4547 adcx r10,rcx 4548 adox r11,rbp 4549 4550 mulx rbp,rcx,QWORD[((16+128))+rsi] 4551 adcx r11,rcx 4552 adox r12,rbp 4553 4554 mulx rbp,rcx,QWORD[((24+128))+rsi] 4555 mov rdx,r9 4556 adcx r12,rcx 4557 shlx rcx,r9,r14 4558 adox r13,rbp 4559 shrx rbp,r9,r14 4560 4561 adcx r13,r8 4562 adox r8,r8 4563 adc r8,0 4564 4565 4566 4567 add r10,rcx 4568 adc r11,rbp 4569 4570 mulx rbp,rcx,r15 4571 mov rdx,QWORD[16+rbx] 4572 adc r12,rcx 4573 adc r13,rbp 4574 adc r8,0 4575 xor r9,r9 4576 4577 4578 4579 mulx rbp,rcx,QWORD[((0+128))+rsi] 4580 adcx r10,rcx 4581 adox r11,rbp 4582 4583 mulx rbp,rcx,QWORD[((8+128))+rsi] 4584 adcx r11,rcx 4585 adox r12,rbp 4586 4587 mulx rbp,rcx,QWORD[((16+128))+rsi] 4588 adcx r12,rcx 4589 adox r13,rbp 4590 4591 mulx rbp,rcx,QWORD[((24+128))+rsi] 4592 mov rdx,r10 4593 adcx r13,rcx 4594 shlx rcx,r10,r14 4595 adox r8,rbp 4596 shrx rbp,r10,r14 4597 4598 adcx r8,r9 4599 adox r9,r9 4600 adc r9,0 4601 4602 4603 4604 add r11,rcx 4605 adc r12,rbp 4606 4607 mulx rbp,rcx,r15 4608 mov rdx,QWORD[24+rbx] 4609 adc r13,rcx 4610 adc r8,rbp 4611 adc r9,0 4612 xor r10,r10 4613 4614 4615 4616 mulx rbp,rcx,QWORD[((0+128))+rsi] 4617 adcx r11,rcx 4618 adox r12,rbp 4619 4620 mulx rbp,rcx,QWORD[((8+128))+rsi] 4621 adcx r12,rcx 4622 adox r13,rbp 4623 4624 mulx rbp,rcx,QWORD[((16+128))+rsi] 4625 adcx r13,rcx 4626 adox r8,rbp 4627 4628 mulx rbp,rcx,QWORD[((24+128))+rsi] 4629 mov rdx,r11 4630 adcx r8,rcx 4631 shlx rcx,r11,r14 4632 adox r9,rbp 4633 shrx rbp,r11,r14 4634 4635 adcx r9,r10 4636 adox r10,r10 4637 adc r10,0 4638 4639 4640 4641 add r12,rcx 4642 adc r13,rbp 4643 4644 mulx rbp,rcx,r15 4645 mov rbx,r12 4646 mov r14,QWORD[(($L$poly+8))] 4647 adc r8,rcx 4648 mov rdx,r13 4649 adc r9,rbp 4650 adc r10,0 4651 4652 4653 4654 xor eax,eax 4655 mov rcx,r8 4656 sbb r12,-1 4657 sbb r13,r14 4658 sbb r8,0 4659 mov rbp,r9 4660 sbb r9,r15 4661 sbb r10,0 4662 4663 cmovc r12,rbx 4664 cmovc r13,rdx 4665 mov QWORD[rdi],r12 4666 cmovc r8,rcx 4667 mov QWORD[8+rdi],r13 4668 cmovc r9,rbp 4669 mov QWORD[16+rdi],r8 4670 mov QWORD[24+rdi],r9 4671 4672 DB 0F3h,0C3h ;repret 4673 4674 4675 4676 4677 ALIGN 32 4678 __ecp_nistz256_sqr_montx: 4679 4680 mulx r10,r9,r14 4681 mulx r11,rcx,r15 4682 xor eax,eax 4683 adc r10,rcx 4684 mulx r12,rbp,r8 4685 mov rdx,r14 4686 adc r11,rbp 4687 adc r12,0 4688 xor r13,r13 4689 4690 4691 mulx rbp,rcx,r15 4692 adcx r11,rcx 4693 adox r12,rbp 4694 4695 mulx rbp,rcx,r8 4696 mov rdx,r15 4697 adcx r12,rcx 4698 adox r13,rbp 4699 adc r13,0 4700 4701 4702 mulx r14,rcx,r8 4703 mov rdx,QWORD[((0+128))+rsi] 4704 xor r15,r15 4705 adcx r9,r9 4706 adox r13,rcx 4707 adcx r10,r10 4708 adox r14,r15 4709 4710 mulx rbp,r8,rdx 4711 mov rdx,QWORD[((8+128))+rsi] 4712 adcx r11,r11 4713 adox r9,rbp 4714 adcx r12,r12 4715 mulx rax,rcx,rdx 4716 mov rdx,QWORD[((16+128))+rsi] 4717 adcx r13,r13 4718 adox r10,rcx 4719 adcx r14,r14 4720 DB 0x67 4721 mulx rbp,rcx,rdx 4722 mov rdx,QWORD[((24+128))+rsi] 4723 adox r11,rax 4724 adcx r15,r15 4725 adox r12,rcx 4726 mov rsi,32 4727 adox r13,rbp 4728 DB 0x67,0x67 4729 mulx rax,rcx,rdx 4730 mov rdx,QWORD[(($L$poly+24))] 4731 adox r14,rcx 4732 shlx rcx,r8,rsi 4733 adox r15,rax 4734 shrx rax,r8,rsi 4735 mov rbp,rdx 4736 4737 4738 add r9,rcx 4739 adc r10,rax 4740 4741 mulx r8,rcx,r8 4742 adc r11,rcx 4743 shlx rcx,r9,rsi 4744 adc r8,0 4745 shrx rax,r9,rsi 4746 4747 4748 add r10,rcx 4749 adc r11,rax 4750 4751 mulx r9,rcx,r9 4752 adc r8,rcx 4753 shlx rcx,r10,rsi 4754 adc r9,0 4755 shrx rax,r10,rsi 4756 4757 4758 add r11,rcx 4759 adc r8,rax 4760 4761 mulx r10,rcx,r10 4762 adc r9,rcx 4763 shlx rcx,r11,rsi 4764 adc r10,0 4765 shrx rax,r11,rsi 4766 4767 4768 add r8,rcx 4769 adc r9,rax 4770 4771 mulx r11,rcx,r11 4772 adc r10,rcx 4773 adc r11,0 4774 4775 xor rdx,rdx 4776 add r12,r8 4777 mov rsi,QWORD[(($L$poly+8))] 4778 adc r13,r9 4779 mov r8,r12 4780 adc r14,r10 4781 adc r15,r11 4782 mov r9,r13 4783 adc rdx,0 4784 4785 sub r12,-1 4786 mov r10,r14 4787 sbb r13,rsi 4788 sbb r14,0 4789 mov r11,r15 4790 sbb r15,rbp 4791 sbb rdx,0 4792 4793 cmovc r12,r8 4794 cmovc r13,r9 4795 mov QWORD[rdi],r12 4796 cmovc r14,r10 4797 mov QWORD[8+rdi],r13 4798 cmovc r15,r11 3993 4799 mov QWORD[16+rdi],r14 3994 4800 mov QWORD[24+rdi],r15 … … 4146 4952 ecp_nistz256_gather_w5: 4147 4953 4954 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 4955 test eax,32 4956 jnz NEAR $L$avx2_gather_w5 4148 4957 lea rax,[((-136))+rsp] 4149 4958 $L$SEH_begin_ecp_nistz256_gather_w5: … … 4253 5062 ecp_nistz256_gather_w7: 4254 5063 5064 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 5065 test eax,32 5066 jnz NEAR $L$avx2_gather_w7 4255 5067 lea rax,[((-136))+rsp] 4256 5068 $L$SEH_begin_ecp_nistz256_gather_w7: … … 4320 5132 $L$SEH_end_ecp_nistz256_gather_w7: 4321 5133 5134 5135 5136 5137 ALIGN 32 5138 ecp_nistz256_avx2_gather_w5: 5139 5140 $L$avx2_gather_w5: 5141 vzeroupper 5142 lea rax,[((-136))+rsp] 5143 mov r11,rsp 5144 $L$SEH_begin_ecp_nistz256_avx2_gather_w5: 5145 DB 0x48,0x8d,0x60,0xe0 5146 DB 0xc5,0xf8,0x29,0x70,0xe0 5147 DB 0xc5,0xf8,0x29,0x78,0xf0 5148 DB 0xc5,0x78,0x29,0x40,0x00 5149 DB 0xc5,0x78,0x29,0x48,0x10 5150 DB 0xc5,0x78,0x29,0x50,0x20 5151 DB 0xc5,0x78,0x29,0x58,0x30 5152 DB 0xc5,0x78,0x29,0x60,0x40 5153 DB 0xc5,0x78,0x29,0x68,0x50 5154 DB 0xc5,0x78,0x29,0x70,0x60 5155 DB 0xc5,0x78,0x29,0x78,0x70 5156 vmovdqa ymm0,YMMWORD[$L$Two] 5157 5158 vpxor ymm2,ymm2,ymm2 5159 vpxor ymm3,ymm3,ymm3 5160 vpxor ymm4,ymm4,ymm4 5161 5162 vmovdqa ymm5,YMMWORD[$L$One] 5163 vmovdqa ymm10,YMMWORD[$L$Two] 5164 5165 vmovd xmm1,r8d 5166 vpermd ymm1,ymm2,ymm1 5167 5168 mov rax,8 5169 $L$select_loop_avx2_w5: 5170 5171 vmovdqa ymm6,YMMWORD[rdx] 5172 vmovdqa ymm7,YMMWORD[32+rdx] 5173 vmovdqa ymm8,YMMWORD[64+rdx] 5174 5175 vmovdqa ymm11,YMMWORD[96+rdx] 5176 vmovdqa ymm12,YMMWORD[128+rdx] 5177 vmovdqa ymm13,YMMWORD[160+rdx] 5178 5179 vpcmpeqd ymm9,ymm5,ymm1 5180 vpcmpeqd ymm14,ymm10,ymm1 5181 5182 vpaddd ymm5,ymm5,ymm0 5183 vpaddd ymm10,ymm10,ymm0 5184 lea rdx,[192+rdx] 5185 5186 vpand ymm6,ymm6,ymm9 5187 vpand ymm7,ymm7,ymm9 5188 vpand ymm8,ymm8,ymm9 5189 vpand ymm11,ymm11,ymm14 5190 vpand ymm12,ymm12,ymm14 5191 vpand ymm13,ymm13,ymm14 5192 5193 vpxor ymm2,ymm2,ymm6 5194 vpxor ymm3,ymm3,ymm7 5195 vpxor ymm4,ymm4,ymm8 5196 vpxor ymm2,ymm2,ymm11 5197 vpxor ymm3,ymm3,ymm12 5198 vpxor ymm4,ymm4,ymm13 5199 5200 dec rax 5201 jnz NEAR $L$select_loop_avx2_w5 5202 5203 vmovdqu YMMWORD[rcx],ymm2 5204 vmovdqu YMMWORD[32+rcx],ymm3 5205 vmovdqu YMMWORD[64+rcx],ymm4 5206 vzeroupper 5207 movaps xmm6,XMMWORD[rsp] 5208 movaps xmm7,XMMWORD[16+rsp] 5209 movaps xmm8,XMMWORD[32+rsp] 5210 movaps xmm9,XMMWORD[48+rsp] 5211 movaps xmm10,XMMWORD[64+rsp] 5212 movaps xmm11,XMMWORD[80+rsp] 5213 movaps xmm12,XMMWORD[96+rsp] 5214 movaps xmm13,XMMWORD[112+rsp] 5215 movaps xmm14,XMMWORD[128+rsp] 5216 movaps xmm15,XMMWORD[144+rsp] 5217 lea rsp,[r11] 5218 DB 0F3h,0C3h ;repret 5219 5220 $L$SEH_end_ecp_nistz256_avx2_gather_w5: 5221 5222 5223 5224 4322 5225 global ecp_nistz256_avx2_gather_w7 4323 5226 4324 5227 ALIGN 32 4325 5228 ecp_nistz256_avx2_gather_w7: 4326 mov QWORD[8+rsp],rdi ;WIN64 prologue 4327 mov QWORD[16+rsp],rsi 4328 mov rax,rsp 5229 5230 $L$avx2_gather_w7: 5231 vzeroupper 5232 mov r11,rsp 5233 lea rax,[((-136))+rsp] 4329 5234 $L$SEH_begin_ecp_nistz256_avx2_gather_w7: 4330 mov rdi,rcx 4331 mov rsi,rdx 4332 mov rdx,r8 4333 4334 4335 4336 DB 0x0f,0x0b 4337 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 4338 mov rsi,QWORD[16+rsp] 5235 DB 0x48,0x8d,0x60,0xe0 5236 DB 0xc5,0xf8,0x29,0x70,0xe0 5237 DB 0xc5,0xf8,0x29,0x78,0xf0 5238 DB 0xc5,0x78,0x29,0x40,0x00 5239 DB 0xc5,0x78,0x29,0x48,0x10 5240 DB 0xc5,0x78,0x29,0x50,0x20 5241 DB 0xc5,0x78,0x29,0x58,0x30 5242 DB 0xc5,0x78,0x29,0x60,0x40 5243 DB 0xc5,0x78,0x29,0x68,0x50 5244 DB 0xc5,0x78,0x29,0x70,0x60 5245 DB 0xc5,0x78,0x29,0x78,0x70 5246 vmovdqa ymm0,YMMWORD[$L$Three] 5247 5248 vpxor ymm2,ymm2,ymm2 5249 vpxor ymm3,ymm3,ymm3 5250 5251 vmovdqa ymm4,YMMWORD[$L$One] 5252 vmovdqa ymm8,YMMWORD[$L$Two] 5253 vmovdqa ymm12,YMMWORD[$L$Three] 5254 5255 vmovd xmm1,r8d 5256 vpermd ymm1,ymm2,ymm1 5257 5258 5259 mov rax,21 5260 $L$select_loop_avx2_w7: 5261 5262 vmovdqa ymm5,YMMWORD[rdx] 5263 vmovdqa ymm6,YMMWORD[32+rdx] 5264 5265 vmovdqa ymm9,YMMWORD[64+rdx] 5266 vmovdqa ymm10,YMMWORD[96+rdx] 5267 5268 vmovdqa ymm13,YMMWORD[128+rdx] 5269 vmovdqa ymm14,YMMWORD[160+rdx] 5270 5271 vpcmpeqd ymm7,ymm4,ymm1 5272 vpcmpeqd ymm11,ymm8,ymm1 5273 vpcmpeqd ymm15,ymm12,ymm1 5274 5275 vpaddd ymm4,ymm4,ymm0 5276 vpaddd ymm8,ymm8,ymm0 5277 vpaddd ymm12,ymm12,ymm0 5278 lea rdx,[192+rdx] 5279 5280 vpand ymm5,ymm5,ymm7 5281 vpand ymm6,ymm6,ymm7 5282 vpand ymm9,ymm9,ymm11 5283 vpand ymm10,ymm10,ymm11 5284 vpand ymm13,ymm13,ymm15 5285 vpand ymm14,ymm14,ymm15 5286 5287 vpxor ymm2,ymm2,ymm5 5288 vpxor ymm3,ymm3,ymm6 5289 vpxor ymm2,ymm2,ymm9 5290 vpxor ymm3,ymm3,ymm10 5291 vpxor ymm2,ymm2,ymm13 5292 vpxor ymm3,ymm3,ymm14 5293 5294 dec rax 5295 jnz NEAR $L$select_loop_avx2_w7 5296 5297 5298 vmovdqa ymm5,YMMWORD[rdx] 5299 vmovdqa ymm6,YMMWORD[32+rdx] 5300 5301 vpcmpeqd ymm7,ymm4,ymm1 5302 5303 vpand ymm5,ymm5,ymm7 5304 vpand ymm6,ymm6,ymm7 5305 5306 vpxor ymm2,ymm2,ymm5 5307 vpxor ymm3,ymm3,ymm6 5308 5309 vmovdqu YMMWORD[rcx],ymm2 5310 vmovdqu YMMWORD[32+rcx],ymm3 5311 vzeroupper 5312 movaps xmm6,XMMWORD[rsp] 5313 movaps xmm7,XMMWORD[16+rsp] 5314 movaps xmm8,XMMWORD[32+rsp] 5315 movaps xmm9,XMMWORD[48+rsp] 5316 movaps xmm10,XMMWORD[64+rsp] 5317 movaps xmm11,XMMWORD[80+rsp] 5318 movaps xmm12,XMMWORD[96+rsp] 5319 movaps xmm13,XMMWORD[112+rsp] 5320 movaps xmm14,XMMWORD[128+rsp] 5321 movaps xmm15,XMMWORD[144+rsp] 5322 lea rsp,[r11] 4339 5323 DB 0F3h,0C3h ;repret 4340 5324 4341 5325 $L$SEH_end_ecp_nistz256_avx2_gather_w7: 5326 4342 5327 4343 5328 ALIGN 32 … … 4482 5467 4483 5468 5469 mov ecx,0x80100 5470 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 5471 cmp ecx,0x80100 5472 je NEAR $L$point_doublex 4484 5473 push rbp 4485 5474 … … 4715 5704 4716 5705 5706 mov ecx,0x80100 5707 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 5708 cmp ecx,0x80100 5709 je NEAR $L$point_addx 4717 5710 push rbp 4718 5711 … … 5134 6127 5135 6128 6129 mov ecx,0x80100 6130 and ecx,DWORD[((OPENSSL_ia32cap_P+8))] 6131 cmp ecx,0x80100 6132 je NEAR $L$point_add_affinex 5136 6133 push rbp 5137 6134 … … 5453 6450 5454 6451 $L$SEH_end_ecp_nistz256_point_add_affine: 6452 6453 ALIGN 32 6454 __ecp_nistz256_add_tox: 6455 6456 xor r11,r11 6457 adc r12,QWORD[rbx] 6458 adc r13,QWORD[8+rbx] 6459 mov rax,r12 6460 adc r8,QWORD[16+rbx] 6461 adc r9,QWORD[24+rbx] 6462 mov rbp,r13 6463 adc r11,0 6464 6465 xor r10,r10 6466 sbb r12,-1 6467 mov rcx,r8 6468 sbb r13,r14 6469 sbb r8,0 6470 mov r10,r9 6471 sbb r9,r15 6472 sbb r11,0 6473 6474 cmovc r12,rax 6475 cmovc r13,rbp 6476 mov QWORD[rdi],r12 6477 cmovc r8,rcx 6478 mov QWORD[8+rdi],r13 6479 cmovc r9,r10 6480 mov QWORD[16+rdi],r8 6481 mov QWORD[24+rdi],r9 6482 6483 DB 0F3h,0C3h ;repret 6484 6485 6486 6487 6488 ALIGN 32 6489 __ecp_nistz256_sub_fromx: 6490 6491 xor r11,r11 6492 sbb r12,QWORD[rbx] 6493 sbb r13,QWORD[8+rbx] 6494 mov rax,r12 6495 sbb r8,QWORD[16+rbx] 6496 sbb r9,QWORD[24+rbx] 6497 mov rbp,r13 6498 sbb r11,0 6499 6500 xor r10,r10 6501 adc r12,-1 6502 mov rcx,r8 6503 adc r13,r14 6504 adc r8,0 6505 mov r10,r9 6506 adc r9,r15 6507 6508 bt r11,0 6509 cmovnc r12,rax 6510 cmovnc r13,rbp 6511 mov QWORD[rdi],r12 6512 cmovnc r8,rcx 6513 mov QWORD[8+rdi],r13 6514 cmovnc r9,r10 6515 mov QWORD[16+rdi],r8 6516 mov QWORD[24+rdi],r9 6517 6518 DB 0F3h,0C3h ;repret 6519 6520 6521 6522 6523 ALIGN 32 6524 __ecp_nistz256_subx: 6525 6526 xor r11,r11 6527 sbb rax,r12 6528 sbb rbp,r13 6529 mov r12,rax 6530 sbb rcx,r8 6531 sbb r10,r9 6532 mov r13,rbp 6533 sbb r11,0 6534 6535 xor r9,r9 6536 adc rax,-1 6537 mov r8,rcx 6538 adc rbp,r14 6539 adc rcx,0 6540 mov r9,r10 6541 adc r10,r15 6542 6543 bt r11,0 6544 cmovc r12,rax 6545 cmovc r13,rbp 6546 cmovc r8,rcx 6547 cmovc r9,r10 6548 6549 DB 0F3h,0C3h ;repret 6550 6551 6552 6553 6554 ALIGN 32 6555 __ecp_nistz256_mul_by_2x: 6556 6557 xor r11,r11 6558 adc r12,r12 6559 adc r13,r13 6560 mov rax,r12 6561 adc r8,r8 6562 adc r9,r9 6563 mov rbp,r13 6564 adc r11,0 6565 6566 xor r10,r10 6567 sbb r12,-1 6568 mov rcx,r8 6569 sbb r13,r14 6570 sbb r8,0 6571 mov r10,r9 6572 sbb r9,r15 6573 sbb r11,0 6574 6575 cmovc r12,rax 6576 cmovc r13,rbp 6577 mov QWORD[rdi],r12 6578 cmovc r8,rcx 6579 mov QWORD[8+rdi],r13 6580 cmovc r9,r10 6581 mov QWORD[16+rdi],r8 6582 mov QWORD[24+rdi],r9 6583 6584 DB 0F3h,0C3h ;repret 6585 6586 6587 6588 ALIGN 32 6589 ecp_nistz256_point_doublex: 6590 mov QWORD[8+rsp],rdi ;WIN64 prologue 6591 mov QWORD[16+rsp],rsi 6592 mov rax,rsp 6593 $L$SEH_begin_ecp_nistz256_point_doublex: 6594 mov rdi,rcx 6595 mov rsi,rdx 6596 6597 6598 6599 $L$point_doublex: 6600 push rbp 6601 6602 push rbx 6603 6604 push r12 6605 6606 push r13 6607 6608 push r14 6609 6610 push r15 6611 6612 sub rsp,32*5+8 6613 6614 $L$point_doublex_body: 6615 6616 $L$point_double_shortcutx: 6617 movdqu xmm0,XMMWORD[rsi] 6618 mov rbx,rsi 6619 movdqu xmm1,XMMWORD[16+rsi] 6620 mov r12,QWORD[((32+0))+rsi] 6621 mov r13,QWORD[((32+8))+rsi] 6622 mov r8,QWORD[((32+16))+rsi] 6623 mov r9,QWORD[((32+24))+rsi] 6624 mov r14,QWORD[(($L$poly+8))] 6625 mov r15,QWORD[(($L$poly+24))] 6626 movdqa XMMWORD[96+rsp],xmm0 6627 movdqa XMMWORD[(96+16)+rsp],xmm1 6628 lea r10,[32+rdi] 6629 lea r11,[64+rdi] 6630 DB 102,72,15,110,199 6631 DB 102,73,15,110,202 6632 DB 102,73,15,110,211 6633 6634 lea rdi,[rsp] 6635 call __ecp_nistz256_mul_by_2x 6636 6637 mov rdx,QWORD[((64+0))+rsi] 6638 mov r14,QWORD[((64+8))+rsi] 6639 mov r15,QWORD[((64+16))+rsi] 6640 mov r8,QWORD[((64+24))+rsi] 6641 lea rsi,[((64-128))+rsi] 6642 lea rdi,[64+rsp] 6643 call __ecp_nistz256_sqr_montx 6644 6645 mov rdx,QWORD[((0+0))+rsp] 6646 mov r14,QWORD[((8+0))+rsp] 6647 lea rsi,[((-128+0))+rsp] 6648 mov r15,QWORD[((16+0))+rsp] 6649 mov r8,QWORD[((24+0))+rsp] 6650 lea rdi,[rsp] 6651 call __ecp_nistz256_sqr_montx 6652 6653 mov rdx,QWORD[32+rbx] 6654 mov r9,QWORD[((64+0))+rbx] 6655 mov r10,QWORD[((64+8))+rbx] 6656 mov r11,QWORD[((64+16))+rbx] 6657 mov r12,QWORD[((64+24))+rbx] 6658 lea rsi,[((64-128))+rbx] 6659 lea rbx,[32+rbx] 6660 DB 102,72,15,126,215 6661 call __ecp_nistz256_mul_montx 6662 call __ecp_nistz256_mul_by_2x 6663 6664 mov r12,QWORD[((96+0))+rsp] 6665 mov r13,QWORD[((96+8))+rsp] 6666 lea rbx,[64+rsp] 6667 mov r8,QWORD[((96+16))+rsp] 6668 mov r9,QWORD[((96+24))+rsp] 6669 lea rdi,[32+rsp] 6670 call __ecp_nistz256_add_tox 6671 6672 mov r12,QWORD[((96+0))+rsp] 6673 mov r13,QWORD[((96+8))+rsp] 6674 lea rbx,[64+rsp] 6675 mov r8,QWORD[((96+16))+rsp] 6676 mov r9,QWORD[((96+24))+rsp] 6677 lea rdi,[64+rsp] 6678 call __ecp_nistz256_sub_fromx 6679 6680 mov rdx,QWORD[((0+0))+rsp] 6681 mov r14,QWORD[((8+0))+rsp] 6682 lea rsi,[((-128+0))+rsp] 6683 mov r15,QWORD[((16+0))+rsp] 6684 mov r8,QWORD[((24+0))+rsp] 6685 DB 102,72,15,126,207 6686 call __ecp_nistz256_sqr_montx 6687 xor r9,r9 6688 mov rax,r12 6689 add r12,-1 6690 mov r10,r13 6691 adc r13,rsi 6692 mov rcx,r14 6693 adc r14,0 6694 mov r8,r15 6695 adc r15,rbp 6696 adc r9,0 6697 xor rsi,rsi 6698 test rax,1 6699 6700 cmovz r12,rax 6701 cmovz r13,r10 6702 cmovz r14,rcx 6703 cmovz r15,r8 6704 cmovz r9,rsi 6705 6706 mov rax,r13 6707 shr r12,1 6708 shl rax,63 6709 mov r10,r14 6710 shr r13,1 6711 or r12,rax 6712 shl r10,63 6713 mov rcx,r15 6714 shr r14,1 6715 or r13,r10 6716 shl rcx,63 6717 mov QWORD[rdi],r12 6718 shr r15,1 6719 mov QWORD[8+rdi],r13 6720 shl r9,63 6721 or r14,rcx 6722 or r15,r9 6723 mov QWORD[16+rdi],r14 6724 mov QWORD[24+rdi],r15 6725 mov rdx,QWORD[64+rsp] 6726 lea rbx,[64+rsp] 6727 mov r9,QWORD[((0+32))+rsp] 6728 mov r10,QWORD[((8+32))+rsp] 6729 lea rsi,[((-128+32))+rsp] 6730 mov r11,QWORD[((16+32))+rsp] 6731 mov r12,QWORD[((24+32))+rsp] 6732 lea rdi,[32+rsp] 6733 call __ecp_nistz256_mul_montx 6734 6735 lea rdi,[128+rsp] 6736 call __ecp_nistz256_mul_by_2x 6737 6738 lea rbx,[32+rsp] 6739 lea rdi,[32+rsp] 6740 call __ecp_nistz256_add_tox 6741 6742 mov rdx,QWORD[96+rsp] 6743 lea rbx,[96+rsp] 6744 mov r9,QWORD[((0+0))+rsp] 6745 mov r10,QWORD[((8+0))+rsp] 6746 lea rsi,[((-128+0))+rsp] 6747 mov r11,QWORD[((16+0))+rsp] 6748 mov r12,QWORD[((24+0))+rsp] 6749 lea rdi,[rsp] 6750 call __ecp_nistz256_mul_montx 6751 6752 lea rdi,[128+rsp] 6753 call __ecp_nistz256_mul_by_2x 6754 6755 mov rdx,QWORD[((0+32))+rsp] 6756 mov r14,QWORD[((8+32))+rsp] 6757 lea rsi,[((-128+32))+rsp] 6758 mov r15,QWORD[((16+32))+rsp] 6759 mov r8,QWORD[((24+32))+rsp] 6760 DB 102,72,15,126,199 6761 call __ecp_nistz256_sqr_montx 6762 6763 lea rbx,[128+rsp] 6764 mov r8,r14 6765 mov r9,r15 6766 mov r14,rsi 6767 mov r15,rbp 6768 call __ecp_nistz256_sub_fromx 6769 6770 mov rax,QWORD[((0+0))+rsp] 6771 mov rbp,QWORD[((0+8))+rsp] 6772 mov rcx,QWORD[((0+16))+rsp] 6773 mov r10,QWORD[((0+24))+rsp] 6774 lea rdi,[rsp] 6775 call __ecp_nistz256_subx 6776 6777 mov rdx,QWORD[32+rsp] 6778 lea rbx,[32+rsp] 6779 mov r14,r12 6780 xor ecx,ecx 6781 mov QWORD[((0+0))+rsp],r12 6782 mov r10,r13 6783 mov QWORD[((0+8))+rsp],r13 6784 cmovz r11,r8 6785 mov QWORD[((0+16))+rsp],r8 6786 lea rsi,[((0-128))+rsp] 6787 cmovz r12,r9 6788 mov QWORD[((0+24))+rsp],r9 6789 mov r9,r14 6790 lea rdi,[rsp] 6791 call __ecp_nistz256_mul_montx 6792 6793 DB 102,72,15,126,203 6794 DB 102,72,15,126,207 6795 call __ecp_nistz256_sub_fromx 6796 6797 lea rsi,[((160+56))+rsp] 6798 6799 mov r15,QWORD[((-48))+rsi] 6800 6801 mov r14,QWORD[((-40))+rsi] 6802 6803 mov r13,QWORD[((-32))+rsi] 6804 6805 mov r12,QWORD[((-24))+rsi] 6806 6807 mov rbx,QWORD[((-16))+rsi] 6808 6809 mov rbp,QWORD[((-8))+rsi] 6810 6811 lea rsp,[rsi] 6812 6813 $L$point_doublex_epilogue: 6814 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 6815 mov rsi,QWORD[16+rsp] 6816 DB 0F3h,0C3h ;repret 6817 6818 $L$SEH_end_ecp_nistz256_point_doublex: 6819 6820 ALIGN 32 6821 ecp_nistz256_point_addx: 6822 mov QWORD[8+rsp],rdi ;WIN64 prologue 6823 mov QWORD[16+rsp],rsi 6824 mov rax,rsp 6825 $L$SEH_begin_ecp_nistz256_point_addx: 6826 mov rdi,rcx 6827 mov rsi,rdx 6828 mov rdx,r8 6829 6830 6831 6832 $L$point_addx: 6833 push rbp 6834 6835 push rbx 6836 6837 push r12 6838 6839 push r13 6840 6841 push r14 6842 6843 push r15 6844 6845 sub rsp,32*18+8 6846 6847 $L$point_addx_body: 6848 6849 movdqu xmm0,XMMWORD[rsi] 6850 movdqu xmm1,XMMWORD[16+rsi] 6851 movdqu xmm2,XMMWORD[32+rsi] 6852 movdqu xmm3,XMMWORD[48+rsi] 6853 movdqu xmm4,XMMWORD[64+rsi] 6854 movdqu xmm5,XMMWORD[80+rsi] 6855 mov rbx,rsi 6856 mov rsi,rdx 6857 movdqa XMMWORD[384+rsp],xmm0 6858 movdqa XMMWORD[(384+16)+rsp],xmm1 6859 movdqa XMMWORD[416+rsp],xmm2 6860 movdqa XMMWORD[(416+16)+rsp],xmm3 6861 movdqa XMMWORD[448+rsp],xmm4 6862 movdqa XMMWORD[(448+16)+rsp],xmm5 6863 por xmm5,xmm4 6864 6865 movdqu xmm0,XMMWORD[rsi] 6866 pshufd xmm3,xmm5,0xb1 6867 movdqu xmm1,XMMWORD[16+rsi] 6868 movdqu xmm2,XMMWORD[32+rsi] 6869 por xmm5,xmm3 6870 movdqu xmm3,XMMWORD[48+rsi] 6871 mov rdx,QWORD[((64+0))+rsi] 6872 mov r14,QWORD[((64+8))+rsi] 6873 mov r15,QWORD[((64+16))+rsi] 6874 mov r8,QWORD[((64+24))+rsi] 6875 movdqa XMMWORD[480+rsp],xmm0 6876 pshufd xmm4,xmm5,0x1e 6877 movdqa XMMWORD[(480+16)+rsp],xmm1 6878 movdqu xmm0,XMMWORD[64+rsi] 6879 movdqu xmm1,XMMWORD[80+rsi] 6880 movdqa XMMWORD[512+rsp],xmm2 6881 movdqa XMMWORD[(512+16)+rsp],xmm3 6882 por xmm5,xmm4 6883 pxor xmm4,xmm4 6884 por xmm1,xmm0 6885 DB 102,72,15,110,199 6886 6887 lea rsi,[((64-128))+rsi] 6888 mov QWORD[((544+0))+rsp],rdx 6889 mov QWORD[((544+8))+rsp],r14 6890 mov QWORD[((544+16))+rsp],r15 6891 mov QWORD[((544+24))+rsp],r8 6892 lea rdi,[96+rsp] 6893 call __ecp_nistz256_sqr_montx 6894 6895 pcmpeqd xmm5,xmm4 6896 pshufd xmm4,xmm1,0xb1 6897 por xmm4,xmm1 6898 pshufd xmm5,xmm5,0 6899 pshufd xmm3,xmm4,0x1e 6900 por xmm4,xmm3 6901 pxor xmm3,xmm3 6902 pcmpeqd xmm4,xmm3 6903 pshufd xmm4,xmm4,0 6904 mov rdx,QWORD[((64+0))+rbx] 6905 mov r14,QWORD[((64+8))+rbx] 6906 mov r15,QWORD[((64+16))+rbx] 6907 mov r8,QWORD[((64+24))+rbx] 6908 DB 102,72,15,110,203 6909 6910 lea rsi,[((64-128))+rbx] 6911 lea rdi,[32+rsp] 6912 call __ecp_nistz256_sqr_montx 6913 6914 mov rdx,QWORD[544+rsp] 6915 lea rbx,[544+rsp] 6916 mov r9,QWORD[((0+96))+rsp] 6917 mov r10,QWORD[((8+96))+rsp] 6918 lea rsi,[((-128+96))+rsp] 6919 mov r11,QWORD[((16+96))+rsp] 6920 mov r12,QWORD[((24+96))+rsp] 6921 lea rdi,[224+rsp] 6922 call __ecp_nistz256_mul_montx 6923 6924 mov rdx,QWORD[448+rsp] 6925 lea rbx,[448+rsp] 6926 mov r9,QWORD[((0+32))+rsp] 6927 mov r10,QWORD[((8+32))+rsp] 6928 lea rsi,[((-128+32))+rsp] 6929 mov r11,QWORD[((16+32))+rsp] 6930 mov r12,QWORD[((24+32))+rsp] 6931 lea rdi,[256+rsp] 6932 call __ecp_nistz256_mul_montx 6933 6934 mov rdx,QWORD[416+rsp] 6935 lea rbx,[416+rsp] 6936 mov r9,QWORD[((0+224))+rsp] 6937 mov r10,QWORD[((8+224))+rsp] 6938 lea rsi,[((-128+224))+rsp] 6939 mov r11,QWORD[((16+224))+rsp] 6940 mov r12,QWORD[((24+224))+rsp] 6941 lea rdi,[224+rsp] 6942 call __ecp_nistz256_mul_montx 6943 6944 mov rdx,QWORD[512+rsp] 6945 lea rbx,[512+rsp] 6946 mov r9,QWORD[((0+256))+rsp] 6947 mov r10,QWORD[((8+256))+rsp] 6948 lea rsi,[((-128+256))+rsp] 6949 mov r11,QWORD[((16+256))+rsp] 6950 mov r12,QWORD[((24+256))+rsp] 6951 lea rdi,[256+rsp] 6952 call __ecp_nistz256_mul_montx 6953 6954 lea rbx,[224+rsp] 6955 lea rdi,[64+rsp] 6956 call __ecp_nistz256_sub_fromx 6957 6958 or r12,r13 6959 movdqa xmm2,xmm4 6960 or r12,r8 6961 or r12,r9 6962 por xmm2,xmm5 6963 DB 102,73,15,110,220 6964 6965 mov rdx,QWORD[384+rsp] 6966 lea rbx,[384+rsp] 6967 mov r9,QWORD[((0+96))+rsp] 6968 mov r10,QWORD[((8+96))+rsp] 6969 lea rsi,[((-128+96))+rsp] 6970 mov r11,QWORD[((16+96))+rsp] 6971 mov r12,QWORD[((24+96))+rsp] 6972 lea rdi,[160+rsp] 6973 call __ecp_nistz256_mul_montx 6974 6975 mov rdx,QWORD[480+rsp] 6976 lea rbx,[480+rsp] 6977 mov r9,QWORD[((0+32))+rsp] 6978 mov r10,QWORD[((8+32))+rsp] 6979 lea rsi,[((-128+32))+rsp] 6980 mov r11,QWORD[((16+32))+rsp] 6981 mov r12,QWORD[((24+32))+rsp] 6982 lea rdi,[192+rsp] 6983 call __ecp_nistz256_mul_montx 6984 6985 lea rbx,[160+rsp] 6986 lea rdi,[rsp] 6987 call __ecp_nistz256_sub_fromx 6988 6989 or r12,r13 6990 or r12,r8 6991 or r12,r9 6992 6993 DB 102,73,15,126,208 6994 DB 102,73,15,126,217 6995 6996 or r12,r8 6997 or r12,r9 6998 6999 7000 DB 0x3e 7001 jnz NEAR $L$add_proceedx 7002 7003 $L$add_doublex: 7004 DB 102,72,15,126,206 7005 DB 102,72,15,126,199 7006 add rsp,416 7007 7008 jmp NEAR $L$point_double_shortcutx 7009 7010 7011 ALIGN 32 7012 $L$add_proceedx: 7013 mov rdx,QWORD[((0+64))+rsp] 7014 mov r14,QWORD[((8+64))+rsp] 7015 lea rsi,[((-128+64))+rsp] 7016 mov r15,QWORD[((16+64))+rsp] 7017 mov r8,QWORD[((24+64))+rsp] 7018 lea rdi,[96+rsp] 7019 call __ecp_nistz256_sqr_montx 7020 7021 mov rdx,QWORD[448+rsp] 7022 lea rbx,[448+rsp] 7023 mov r9,QWORD[((0+0))+rsp] 7024 mov r10,QWORD[((8+0))+rsp] 7025 lea rsi,[((-128+0))+rsp] 7026 mov r11,QWORD[((16+0))+rsp] 7027 mov r12,QWORD[((24+0))+rsp] 7028 lea rdi,[352+rsp] 7029 call __ecp_nistz256_mul_montx 7030 7031 mov rdx,QWORD[((0+0))+rsp] 7032 mov r14,QWORD[((8+0))+rsp] 7033 lea rsi,[((-128+0))+rsp] 7034 mov r15,QWORD[((16+0))+rsp] 7035 mov r8,QWORD[((24+0))+rsp] 7036 lea rdi,[32+rsp] 7037 call __ecp_nistz256_sqr_montx 7038 7039 mov rdx,QWORD[544+rsp] 7040 lea rbx,[544+rsp] 7041 mov r9,QWORD[((0+352))+rsp] 7042 mov r10,QWORD[((8+352))+rsp] 7043 lea rsi,[((-128+352))+rsp] 7044 mov r11,QWORD[((16+352))+rsp] 7045 mov r12,QWORD[((24+352))+rsp] 7046 lea rdi,[352+rsp] 7047 call __ecp_nistz256_mul_montx 7048 7049 mov rdx,QWORD[rsp] 7050 lea rbx,[rsp] 7051 mov r9,QWORD[((0+32))+rsp] 7052 mov r10,QWORD[((8+32))+rsp] 7053 lea rsi,[((-128+32))+rsp] 7054 mov r11,QWORD[((16+32))+rsp] 7055 mov r12,QWORD[((24+32))+rsp] 7056 lea rdi,[128+rsp] 7057 call __ecp_nistz256_mul_montx 7058 7059 mov rdx,QWORD[160+rsp] 7060 lea rbx,[160+rsp] 7061 mov r9,QWORD[((0+32))+rsp] 7062 mov r10,QWORD[((8+32))+rsp] 7063 lea rsi,[((-128+32))+rsp] 7064 mov r11,QWORD[((16+32))+rsp] 7065 mov r12,QWORD[((24+32))+rsp] 7066 lea rdi,[192+rsp] 7067 call __ecp_nistz256_mul_montx 7068 7069 7070 7071 7072 xor r11,r11 7073 add r12,r12 7074 lea rsi,[96+rsp] 7075 adc r13,r13 7076 mov rax,r12 7077 adc r8,r8 7078 adc r9,r9 7079 mov rbp,r13 7080 adc r11,0 7081 7082 sub r12,-1 7083 mov rcx,r8 7084 sbb r13,r14 7085 sbb r8,0 7086 mov r10,r9 7087 sbb r9,r15 7088 sbb r11,0 7089 7090 cmovc r12,rax 7091 mov rax,QWORD[rsi] 7092 cmovc r13,rbp 7093 mov rbp,QWORD[8+rsi] 7094 cmovc r8,rcx 7095 mov rcx,QWORD[16+rsi] 7096 cmovc r9,r10 7097 mov r10,QWORD[24+rsi] 7098 7099 call __ecp_nistz256_subx 7100 7101 lea rbx,[128+rsp] 7102 lea rdi,[288+rsp] 7103 call __ecp_nistz256_sub_fromx 7104 7105 mov rax,QWORD[((192+0))+rsp] 7106 mov rbp,QWORD[((192+8))+rsp] 7107 mov rcx,QWORD[((192+16))+rsp] 7108 mov r10,QWORD[((192+24))+rsp] 7109 lea rdi,[320+rsp] 7110 7111 call __ecp_nistz256_subx 7112 7113 mov QWORD[rdi],r12 7114 mov QWORD[8+rdi],r13 7115 mov QWORD[16+rdi],r8 7116 mov QWORD[24+rdi],r9 7117 mov rdx,QWORD[128+rsp] 7118 lea rbx,[128+rsp] 7119 mov r9,QWORD[((0+224))+rsp] 7120 mov r10,QWORD[((8+224))+rsp] 7121 lea rsi,[((-128+224))+rsp] 7122 mov r11,QWORD[((16+224))+rsp] 7123 mov r12,QWORD[((24+224))+rsp] 7124 lea rdi,[256+rsp] 7125 call __ecp_nistz256_mul_montx 7126 7127 mov rdx,QWORD[320+rsp] 7128 lea rbx,[320+rsp] 7129 mov r9,QWORD[((0+64))+rsp] 7130 mov r10,QWORD[((8+64))+rsp] 7131 lea rsi,[((-128+64))+rsp] 7132 mov r11,QWORD[((16+64))+rsp] 7133 mov r12,QWORD[((24+64))+rsp] 7134 lea rdi,[320+rsp] 7135 call __ecp_nistz256_mul_montx 7136 7137 lea rbx,[256+rsp] 7138 lea rdi,[320+rsp] 7139 call __ecp_nistz256_sub_fromx 7140 7141 DB 102,72,15,126,199 7142 7143 movdqa xmm0,xmm5 7144 movdqa xmm1,xmm5 7145 pandn xmm0,XMMWORD[352+rsp] 7146 movdqa xmm2,xmm5 7147 pandn xmm1,XMMWORD[((352+16))+rsp] 7148 movdqa xmm3,xmm5 7149 pand xmm2,XMMWORD[544+rsp] 7150 pand xmm3,XMMWORD[((544+16))+rsp] 7151 por xmm2,xmm0 7152 por xmm3,xmm1 7153 7154 movdqa xmm0,xmm4 7155 movdqa xmm1,xmm4 7156 pandn xmm0,xmm2 7157 movdqa xmm2,xmm4 7158 pandn xmm1,xmm3 7159 movdqa xmm3,xmm4 7160 pand xmm2,XMMWORD[448+rsp] 7161 pand xmm3,XMMWORD[((448+16))+rsp] 7162 por xmm2,xmm0 7163 por xmm3,xmm1 7164 movdqu XMMWORD[64+rdi],xmm2 7165 movdqu XMMWORD[80+rdi],xmm3 7166 7167 movdqa xmm0,xmm5 7168 movdqa xmm1,xmm5 7169 pandn xmm0,XMMWORD[288+rsp] 7170 movdqa xmm2,xmm5 7171 pandn xmm1,XMMWORD[((288+16))+rsp] 7172 movdqa xmm3,xmm5 7173 pand xmm2,XMMWORD[480+rsp] 7174 pand xmm3,XMMWORD[((480+16))+rsp] 7175 por xmm2,xmm0 7176 por xmm3,xmm1 7177 7178 movdqa xmm0,xmm4 7179 movdqa xmm1,xmm4 7180 pandn xmm0,xmm2 7181 movdqa xmm2,xmm4 7182 pandn xmm1,xmm3 7183 movdqa xmm3,xmm4 7184 pand xmm2,XMMWORD[384+rsp] 7185 pand xmm3,XMMWORD[((384+16))+rsp] 7186 por xmm2,xmm0 7187 por xmm3,xmm1 7188 movdqu XMMWORD[rdi],xmm2 7189 movdqu XMMWORD[16+rdi],xmm3 7190 7191 movdqa xmm0,xmm5 7192 movdqa xmm1,xmm5 7193 pandn xmm0,XMMWORD[320+rsp] 7194 movdqa xmm2,xmm5 7195 pandn xmm1,XMMWORD[((320+16))+rsp] 7196 movdqa xmm3,xmm5 7197 pand xmm2,XMMWORD[512+rsp] 7198 pand xmm3,XMMWORD[((512+16))+rsp] 7199 por xmm2,xmm0 7200 por xmm3,xmm1 7201 7202 movdqa xmm0,xmm4 7203 movdqa xmm1,xmm4 7204 pandn xmm0,xmm2 7205 movdqa xmm2,xmm4 7206 pandn xmm1,xmm3 7207 movdqa xmm3,xmm4 7208 pand xmm2,XMMWORD[416+rsp] 7209 pand xmm3,XMMWORD[((416+16))+rsp] 7210 por xmm2,xmm0 7211 por xmm3,xmm1 7212 movdqu XMMWORD[32+rdi],xmm2 7213 movdqu XMMWORD[48+rdi],xmm3 7214 7215 $L$add_donex: 7216 lea rsi,[((576+56))+rsp] 7217 7218 mov r15,QWORD[((-48))+rsi] 7219 7220 mov r14,QWORD[((-40))+rsi] 7221 7222 mov r13,QWORD[((-32))+rsi] 7223 7224 mov r12,QWORD[((-24))+rsi] 7225 7226 mov rbx,QWORD[((-16))+rsi] 7227 7228 mov rbp,QWORD[((-8))+rsi] 7229 7230 lea rsp,[rsi] 7231 7232 $L$point_addx_epilogue: 7233 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 7234 mov rsi,QWORD[16+rsp] 7235 DB 0F3h,0C3h ;repret 7236 7237 $L$SEH_end_ecp_nistz256_point_addx: 7238 7239 ALIGN 32 7240 ecp_nistz256_point_add_affinex: 7241 mov QWORD[8+rsp],rdi ;WIN64 prologue 7242 mov QWORD[16+rsp],rsi 7243 mov rax,rsp 7244 $L$SEH_begin_ecp_nistz256_point_add_affinex: 7245 mov rdi,rcx 7246 mov rsi,rdx 7247 mov rdx,r8 7248 7249 7250 7251 $L$point_add_affinex: 7252 push rbp 7253 7254 push rbx 7255 7256 push r12 7257 7258 push r13 7259 7260 push r14 7261 7262 push r15 7263 7264 sub rsp,32*15+8 7265 7266 $L$add_affinex_body: 7267 7268 movdqu xmm0,XMMWORD[rsi] 7269 mov rbx,rdx 7270 movdqu xmm1,XMMWORD[16+rsi] 7271 movdqu xmm2,XMMWORD[32+rsi] 7272 movdqu xmm3,XMMWORD[48+rsi] 7273 movdqu xmm4,XMMWORD[64+rsi] 7274 movdqu xmm5,XMMWORD[80+rsi] 7275 mov rdx,QWORD[((64+0))+rsi] 7276 mov r14,QWORD[((64+8))+rsi] 7277 mov r15,QWORD[((64+16))+rsi] 7278 mov r8,QWORD[((64+24))+rsi] 7279 movdqa XMMWORD[320+rsp],xmm0 7280 movdqa XMMWORD[(320+16)+rsp],xmm1 7281 movdqa XMMWORD[352+rsp],xmm2 7282 movdqa XMMWORD[(352+16)+rsp],xmm3 7283 movdqa XMMWORD[384+rsp],xmm4 7284 movdqa XMMWORD[(384+16)+rsp],xmm5 7285 por xmm5,xmm4 7286 7287 movdqu xmm0,XMMWORD[rbx] 7288 pshufd xmm3,xmm5,0xb1 7289 movdqu xmm1,XMMWORD[16+rbx] 7290 movdqu xmm2,XMMWORD[32+rbx] 7291 por xmm5,xmm3 7292 movdqu xmm3,XMMWORD[48+rbx] 7293 movdqa XMMWORD[416+rsp],xmm0 7294 pshufd xmm4,xmm5,0x1e 7295 movdqa XMMWORD[(416+16)+rsp],xmm1 7296 por xmm1,xmm0 7297 DB 102,72,15,110,199 7298 movdqa XMMWORD[448+rsp],xmm2 7299 movdqa XMMWORD[(448+16)+rsp],xmm3 7300 por xmm3,xmm2 7301 por xmm5,xmm4 7302 pxor xmm4,xmm4 7303 por xmm3,xmm1 7304 7305 lea rsi,[((64-128))+rsi] 7306 lea rdi,[32+rsp] 7307 call __ecp_nistz256_sqr_montx 7308 7309 pcmpeqd xmm5,xmm4 7310 pshufd xmm4,xmm3,0xb1 7311 mov rdx,QWORD[rbx] 7312 7313 mov r9,r12 7314 por xmm4,xmm3 7315 pshufd xmm5,xmm5,0 7316 pshufd xmm3,xmm4,0x1e 7317 mov r10,r13 7318 por xmm4,xmm3 7319 pxor xmm3,xmm3 7320 mov r11,r14 7321 pcmpeqd xmm4,xmm3 7322 pshufd xmm4,xmm4,0 7323 7324 lea rsi,[((32-128))+rsp] 7325 mov r12,r15 7326 lea rdi,[rsp] 7327 call __ecp_nistz256_mul_montx 7328 7329 lea rbx,[320+rsp] 7330 lea rdi,[64+rsp] 7331 call __ecp_nistz256_sub_fromx 7332 7333 mov rdx,QWORD[384+rsp] 7334 lea rbx,[384+rsp] 7335 mov r9,QWORD[((0+32))+rsp] 7336 mov r10,QWORD[((8+32))+rsp] 7337 lea rsi,[((-128+32))+rsp] 7338 mov r11,QWORD[((16+32))+rsp] 7339 mov r12,QWORD[((24+32))+rsp] 7340 lea rdi,[32+rsp] 7341 call __ecp_nistz256_mul_montx 7342 7343 mov rdx,QWORD[384+rsp] 7344 lea rbx,[384+rsp] 7345 mov r9,QWORD[((0+64))+rsp] 7346 mov r10,QWORD[((8+64))+rsp] 7347 lea rsi,[((-128+64))+rsp] 7348 mov r11,QWORD[((16+64))+rsp] 7349 mov r12,QWORD[((24+64))+rsp] 7350 lea rdi,[288+rsp] 7351 call __ecp_nistz256_mul_montx 7352 7353 mov rdx,QWORD[448+rsp] 7354 lea rbx,[448+rsp] 7355 mov r9,QWORD[((0+32))+rsp] 7356 mov r10,QWORD[((8+32))+rsp] 7357 lea rsi,[((-128+32))+rsp] 7358 mov r11,QWORD[((16+32))+rsp] 7359 mov r12,QWORD[((24+32))+rsp] 7360 lea rdi,[32+rsp] 7361 call __ecp_nistz256_mul_montx 7362 7363 lea rbx,[352+rsp] 7364 lea rdi,[96+rsp] 7365 call __ecp_nistz256_sub_fromx 7366 7367 mov rdx,QWORD[((0+64))+rsp] 7368 mov r14,QWORD[((8+64))+rsp] 7369 lea rsi,[((-128+64))+rsp] 7370 mov r15,QWORD[((16+64))+rsp] 7371 mov r8,QWORD[((24+64))+rsp] 7372 lea rdi,[128+rsp] 7373 call __ecp_nistz256_sqr_montx 7374 7375 mov rdx,QWORD[((0+96))+rsp] 7376 mov r14,QWORD[((8+96))+rsp] 7377 lea rsi,[((-128+96))+rsp] 7378 mov r15,QWORD[((16+96))+rsp] 7379 mov r8,QWORD[((24+96))+rsp] 7380 lea rdi,[192+rsp] 7381 call __ecp_nistz256_sqr_montx 7382 7383 mov rdx,QWORD[128+rsp] 7384 lea rbx,[128+rsp] 7385 mov r9,QWORD[((0+64))+rsp] 7386 mov r10,QWORD[((8+64))+rsp] 7387 lea rsi,[((-128+64))+rsp] 7388 mov r11,QWORD[((16+64))+rsp] 7389 mov r12,QWORD[((24+64))+rsp] 7390 lea rdi,[160+rsp] 7391 call __ecp_nistz256_mul_montx 7392 7393 mov rdx,QWORD[320+rsp] 7394 lea rbx,[320+rsp] 7395 mov r9,QWORD[((0+128))+rsp] 7396 mov r10,QWORD[((8+128))+rsp] 7397 lea rsi,[((-128+128))+rsp] 7398 mov r11,QWORD[((16+128))+rsp] 7399 mov r12,QWORD[((24+128))+rsp] 7400 lea rdi,[rsp] 7401 call __ecp_nistz256_mul_montx 7402 7403 7404 7405 7406 xor r11,r11 7407 add r12,r12 7408 lea rsi,[192+rsp] 7409 adc r13,r13 7410 mov rax,r12 7411 adc r8,r8 7412 adc r9,r9 7413 mov rbp,r13 7414 adc r11,0 7415 7416 sub r12,-1 7417 mov rcx,r8 7418 sbb r13,r14 7419 sbb r8,0 7420 mov r10,r9 7421 sbb r9,r15 7422 sbb r11,0 7423 7424 cmovc r12,rax 7425 mov rax,QWORD[rsi] 7426 cmovc r13,rbp 7427 mov rbp,QWORD[8+rsi] 7428 cmovc r8,rcx 7429 mov rcx,QWORD[16+rsi] 7430 cmovc r9,r10 7431 mov r10,QWORD[24+rsi] 7432 7433 call __ecp_nistz256_subx 7434 7435 lea rbx,[160+rsp] 7436 lea rdi,[224+rsp] 7437 call __ecp_nistz256_sub_fromx 7438 7439 mov rax,QWORD[((0+0))+rsp] 7440 mov rbp,QWORD[((0+8))+rsp] 7441 mov rcx,QWORD[((0+16))+rsp] 7442 mov r10,QWORD[((0+24))+rsp] 7443 lea rdi,[64+rsp] 7444 7445 call __ecp_nistz256_subx 7446 7447 mov QWORD[rdi],r12 7448 mov QWORD[8+rdi],r13 7449 mov QWORD[16+rdi],r8 7450 mov QWORD[24+rdi],r9 7451 mov rdx,QWORD[352+rsp] 7452 lea rbx,[352+rsp] 7453 mov r9,QWORD[((0+160))+rsp] 7454 mov r10,QWORD[((8+160))+rsp] 7455 lea rsi,[((-128+160))+rsp] 7456 mov r11,QWORD[((16+160))+rsp] 7457 mov r12,QWORD[((24+160))+rsp] 7458 lea rdi,[32+rsp] 7459 call __ecp_nistz256_mul_montx 7460 7461 mov rdx,QWORD[96+rsp] 7462 lea rbx,[96+rsp] 7463 mov r9,QWORD[((0+64))+rsp] 7464 mov r10,QWORD[((8+64))+rsp] 7465 lea rsi,[((-128+64))+rsp] 7466 mov r11,QWORD[((16+64))+rsp] 7467 mov r12,QWORD[((24+64))+rsp] 7468 lea rdi,[64+rsp] 7469 call __ecp_nistz256_mul_montx 7470 7471 lea rbx,[32+rsp] 7472 lea rdi,[256+rsp] 7473 call __ecp_nistz256_sub_fromx 7474 7475 DB 102,72,15,126,199 7476 7477 movdqa xmm0,xmm5 7478 movdqa xmm1,xmm5 7479 pandn xmm0,XMMWORD[288+rsp] 7480 movdqa xmm2,xmm5 7481 pandn xmm1,XMMWORD[((288+16))+rsp] 7482 movdqa xmm3,xmm5 7483 pand xmm2,XMMWORD[$L$ONE_mont] 7484 pand xmm3,XMMWORD[(($L$ONE_mont+16))] 7485 por xmm2,xmm0 7486 por xmm3,xmm1 7487 7488 movdqa xmm0,xmm4 7489 movdqa xmm1,xmm4 7490 pandn xmm0,xmm2 7491 movdqa xmm2,xmm4 7492 pandn xmm1,xmm3 7493 movdqa xmm3,xmm4 7494 pand xmm2,XMMWORD[384+rsp] 7495 pand xmm3,XMMWORD[((384+16))+rsp] 7496 por xmm2,xmm0 7497 por xmm3,xmm1 7498 movdqu XMMWORD[64+rdi],xmm2 7499 movdqu XMMWORD[80+rdi],xmm3 7500 7501 movdqa xmm0,xmm5 7502 movdqa xmm1,xmm5 7503 pandn xmm0,XMMWORD[224+rsp] 7504 movdqa xmm2,xmm5 7505 pandn xmm1,XMMWORD[((224+16))+rsp] 7506 movdqa xmm3,xmm5 7507 pand xmm2,XMMWORD[416+rsp] 7508 pand xmm3,XMMWORD[((416+16))+rsp] 7509 por xmm2,xmm0 7510 por xmm3,xmm1 7511 7512 movdqa xmm0,xmm4 7513 movdqa xmm1,xmm4 7514 pandn xmm0,xmm2 7515 movdqa xmm2,xmm4 7516 pandn xmm1,xmm3 7517 movdqa xmm3,xmm4 7518 pand xmm2,XMMWORD[320+rsp] 7519 pand xmm3,XMMWORD[((320+16))+rsp] 7520 por xmm2,xmm0 7521 por xmm3,xmm1 7522 movdqu XMMWORD[rdi],xmm2 7523 movdqu XMMWORD[16+rdi],xmm3 7524 7525 movdqa xmm0,xmm5 7526 movdqa xmm1,xmm5 7527 pandn xmm0,XMMWORD[256+rsp] 7528 movdqa xmm2,xmm5 7529 pandn xmm1,XMMWORD[((256+16))+rsp] 7530 movdqa xmm3,xmm5 7531 pand xmm2,XMMWORD[448+rsp] 7532 pand xmm3,XMMWORD[((448+16))+rsp] 7533 por xmm2,xmm0 7534 por xmm3,xmm1 7535 7536 movdqa xmm0,xmm4 7537 movdqa xmm1,xmm4 7538 pandn xmm0,xmm2 7539 movdqa xmm2,xmm4 7540 pandn xmm1,xmm3 7541 movdqa xmm3,xmm4 7542 pand xmm2,XMMWORD[352+rsp] 7543 pand xmm3,XMMWORD[((352+16))+rsp] 7544 por xmm2,xmm0 7545 por xmm3,xmm1 7546 movdqu XMMWORD[32+rdi],xmm2 7547 movdqu XMMWORD[48+rdi],xmm3 7548 7549 lea rsi,[((480+56))+rsp] 7550 7551 mov r15,QWORD[((-48))+rsi] 7552 7553 mov r14,QWORD[((-40))+rsi] 7554 7555 mov r13,QWORD[((-32))+rsi] 7556 7557 mov r12,QWORD[((-24))+rsi] 7558 7559 mov rbx,QWORD[((-16))+rsi] 7560 7561 mov rbp,QWORD[((-8))+rsi] 7562 7563 lea rsp,[rsi] 7564 7565 $L$add_affinex_epilogue: 7566 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 7567 mov rsi,QWORD[16+rsp] 7568 DB 0F3h,0C3h ;repret 7569 7570 $L$SEH_end_ecp_nistz256_point_add_affinex: 5455 7571 EXTERN __imp_RtlVirtualUnwind 5456 7572 … … 5618 7734 DD $L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase 5619 7735 DD $L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase 7736 DD $L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase 7737 DD $L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase 7738 DD $L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase 7739 7740 DD $L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase 7741 DD $L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase 7742 DD $L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase 5620 7743 DD $L$SEH_begin_ecp_nistz256_to_mont wrt ..imagebase 5621 7744 DD $L$SEH_end_ecp_nistz256_to_mont wrt ..imagebase … … 5641 7764 DD $L$SEH_end_ecp_nistz256_gather_w7 wrt ..imagebase 5642 7765 DD $L$SEH_info_ecp_nistz256_gather_wX wrt ..imagebase 7766 DD $L$SEH_begin_ecp_nistz256_avx2_gather_w5 wrt ..imagebase 7767 DD $L$SEH_end_ecp_nistz256_avx2_gather_w5 wrt ..imagebase 7768 DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase 7769 7770 DD $L$SEH_begin_ecp_nistz256_avx2_gather_w7 wrt ..imagebase 7771 DD $L$SEH_end_ecp_nistz256_avx2_gather_w7 wrt ..imagebase 7772 DD $L$SEH_info_ecp_nistz256_avx2_gather_wX wrt ..imagebase 5643 7773 DD $L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase 5644 7774 DD $L$SEH_end_ecp_nistz256_point_double wrt ..imagebase … … 5652 7782 DD $L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase 5653 7783 DD $L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase 7784 DD $L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase 7785 DD $L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase 7786 DD $L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase 7787 7788 DD $L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase 7789 DD $L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase 7790 DD $L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase 7791 7792 DD $L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase 7793 DD $L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase 7794 DD $L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase 5654 7795 5655 7796 section .xdata rdata align=8 … … 5689 7830 DD $L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase 5690 7831 DD 48,0 7832 $L$SEH_info_ecp_nistz256_ord_mul_montx: 7833 DB 9,0,0,0 7834 DD full_handler wrt ..imagebase 7835 DD $L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase 7836 DD 48,0 7837 $L$SEH_info_ecp_nistz256_ord_sqr_montx: 7838 DB 9,0,0,0 7839 DD full_handler wrt ..imagebase 7840 DD $L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase 7841 DD 48,0 5691 7842 $L$SEH_info_ecp_nistz256_to_mont: 5692 7843 DB 9,0,0,0 … … 5722 7873 DB 0x04,0x01,0x15,0x00 5723 7874 ALIGN 8 7875 $L$SEH_info_ecp_nistz256_avx2_gather_wX: 7876 DB 0x01,0x36,0x17,0x0b 7877 DB 0x36,0xf8,0x09,0x00 7878 DB 0x31,0xe8,0x08,0x00 7879 DB 0x2c,0xd8,0x07,0x00 7880 DB 0x27,0xc8,0x06,0x00 7881 DB 0x22,0xb8,0x05,0x00 7882 DB 0x1d,0xa8,0x04,0x00 7883 DB 0x18,0x98,0x03,0x00 7884 DB 0x13,0x88,0x02,0x00 7885 DB 0x0e,0x78,0x01,0x00 7886 DB 0x09,0x68,0x00,0x00 7887 DB 0x04,0x01,0x15,0x00 7888 DB 0x00,0xb3,0x00,0x00 7889 ALIGN 8 5724 7890 $L$SEH_info_ecp_nistz256_point_double: 5725 7891 DB 9,0,0,0 … … 5737 7903 DD $L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase 5738 7904 DD 32*15+56,0 7905 ALIGN 8 7906 $L$SEH_info_ecp_nistz256_point_doublex: 7907 DB 9,0,0,0 7908 DD full_handler wrt ..imagebase 7909 DD $L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase 7910 DD 32*5+56,0 7911 $L$SEH_info_ecp_nistz256_point_addx: 7912 DB 9,0,0,0 7913 DD full_handler wrt ..imagebase 7914 DD $L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase 7915 DD 32*18+56,0 7916 $L$SEH_info_ecp_nistz256_point_add_affinex: 7917 DB 9,0,0,0 7918 DD full_handler wrt ..imagebase 7919 DD $L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase 7920 DD 32*15+56,0 -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/ghash-x86_64.S
r97373 r99371 1355 1355 gcm_init_avx: 1356 1356 1357 jmp NEAR $L$_init_clmul 1357 $L$SEH_begin_gcm_init_avx: 1358 1359 DB 0x48,0x83,0xec,0x18 1360 DB 0x0f,0x29,0x34,0x24 1361 vzeroupper 1362 1363 vmovdqu xmm2,XMMWORD[rdx] 1364 vpshufd xmm2,xmm2,78 1365 1366 1367 vpshufd xmm4,xmm2,255 1368 vpsrlq xmm3,xmm2,63 1369 vpsllq xmm2,xmm2,1 1370 vpxor xmm5,xmm5,xmm5 1371 vpcmpgtd xmm5,xmm5,xmm4 1372 vpslldq xmm3,xmm3,8 1373 vpor xmm2,xmm2,xmm3 1374 1375 1376 vpand xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial] 1377 vpxor xmm2,xmm2,xmm5 1378 1379 vpunpckhqdq xmm6,xmm2,xmm2 1380 vmovdqa xmm0,xmm2 1381 vpxor xmm6,xmm6,xmm2 1382 mov r10,4 1383 jmp NEAR $L$init_start_avx 1384 ALIGN 32 1385 $L$init_loop_avx: 1386 vpalignr xmm5,xmm4,xmm3,8 1387 vmovdqu XMMWORD[(-16)+rcx],xmm5 1388 vpunpckhqdq xmm3,xmm0,xmm0 1389 vpxor xmm3,xmm3,xmm0 1390 vpclmulqdq xmm1,xmm0,xmm2,0x11 1391 vpclmulqdq xmm0,xmm0,xmm2,0x00 1392 vpclmulqdq xmm3,xmm3,xmm6,0x00 1393 vpxor xmm4,xmm1,xmm0 1394 vpxor xmm3,xmm3,xmm4 1395 1396 vpslldq xmm4,xmm3,8 1397 vpsrldq xmm3,xmm3,8 1398 vpxor xmm0,xmm0,xmm4 1399 vpxor xmm1,xmm1,xmm3 1400 vpsllq xmm3,xmm0,57 1401 vpsllq xmm4,xmm0,62 1402 vpxor xmm4,xmm4,xmm3 1403 vpsllq xmm3,xmm0,63 1404 vpxor xmm4,xmm4,xmm3 1405 vpslldq xmm3,xmm4,8 1406 vpsrldq xmm4,xmm4,8 1407 vpxor xmm0,xmm0,xmm3 1408 vpxor xmm1,xmm1,xmm4 1409 1410 vpsrlq xmm4,xmm0,1 1411 vpxor xmm1,xmm1,xmm0 1412 vpxor xmm0,xmm0,xmm4 1413 vpsrlq xmm4,xmm4,5 1414 vpxor xmm0,xmm0,xmm4 1415 vpsrlq xmm0,xmm0,1 1416 vpxor xmm0,xmm0,xmm1 1417 $L$init_start_avx: 1418 vmovdqa xmm5,xmm0 1419 vpunpckhqdq xmm3,xmm0,xmm0 1420 vpxor xmm3,xmm3,xmm0 1421 vpclmulqdq xmm1,xmm0,xmm2,0x11 1422 vpclmulqdq xmm0,xmm0,xmm2,0x00 1423 vpclmulqdq xmm3,xmm3,xmm6,0x00 1424 vpxor xmm4,xmm1,xmm0 1425 vpxor xmm3,xmm3,xmm4 1426 1427 vpslldq xmm4,xmm3,8 1428 vpsrldq xmm3,xmm3,8 1429 vpxor xmm0,xmm0,xmm4 1430 vpxor xmm1,xmm1,xmm3 1431 vpsllq xmm3,xmm0,57 1432 vpsllq xmm4,xmm0,62 1433 vpxor xmm4,xmm4,xmm3 1434 vpsllq xmm3,xmm0,63 1435 vpxor xmm4,xmm4,xmm3 1436 vpslldq xmm3,xmm4,8 1437 vpsrldq xmm4,xmm4,8 1438 vpxor xmm0,xmm0,xmm3 1439 vpxor xmm1,xmm1,xmm4 1440 1441 vpsrlq xmm4,xmm0,1 1442 vpxor xmm1,xmm1,xmm0 1443 vpxor xmm0,xmm0,xmm4 1444 vpsrlq xmm4,xmm4,5 1445 vpxor xmm0,xmm0,xmm4 1446 vpsrlq xmm0,xmm0,1 1447 vpxor xmm0,xmm0,xmm1 1448 vpshufd xmm3,xmm5,78 1449 vpshufd xmm4,xmm0,78 1450 vpxor xmm3,xmm3,xmm5 1451 vmovdqu XMMWORD[rcx],xmm5 1452 vpxor xmm4,xmm4,xmm0 1453 vmovdqu XMMWORD[16+rcx],xmm0 1454 lea rcx,[48+rcx] 1455 sub r10,1 1456 jnz NEAR $L$init_loop_avx 1457 1458 vpalignr xmm5,xmm3,xmm4,8 1459 vmovdqu XMMWORD[(-16)+rcx],xmm5 1460 1461 vzeroupper 1462 movaps xmm6,XMMWORD[rsp] 1463 lea rsp,[24+rsp] 1464 $L$SEH_end_gcm_init_avx: 1465 DB 0F3h,0C3h ;repret 1358 1466 1359 1467 … … 1373 1481 1374 1482 DB 243,15,30,250 1375 jmp NEAR $L$_ghash_clmul 1483 lea rax,[((-136))+rsp] 1484 $L$SEH_begin_gcm_ghash_avx: 1485 1486 DB 0x48,0x8d,0x60,0xe0 1487 DB 0x0f,0x29,0x70,0xe0 1488 DB 0x0f,0x29,0x78,0xf0 1489 DB 0x44,0x0f,0x29,0x00 1490 DB 0x44,0x0f,0x29,0x48,0x10 1491 DB 0x44,0x0f,0x29,0x50,0x20 1492 DB 0x44,0x0f,0x29,0x58,0x30 1493 DB 0x44,0x0f,0x29,0x60,0x40 1494 DB 0x44,0x0f,0x29,0x68,0x50 1495 DB 0x44,0x0f,0x29,0x70,0x60 1496 DB 0x44,0x0f,0x29,0x78,0x70 1497 vzeroupper 1498 1499 vmovdqu xmm10,XMMWORD[rcx] 1500 lea r10,[$L$0x1c2_polynomial] 1501 lea rdx,[64+rdx] 1502 vmovdqu xmm13,XMMWORD[$L$bswap_mask] 1503 vpshufb xmm10,xmm10,xmm13 1504 cmp r9,0x80 1505 jb NEAR $L$short_avx 1506 sub r9,0x80 1507 1508 vmovdqu xmm14,XMMWORD[112+r8] 1509 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1510 vpshufb xmm14,xmm14,xmm13 1511 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1512 1513 vpunpckhqdq xmm9,xmm14,xmm14 1514 vmovdqu xmm15,XMMWORD[96+r8] 1515 vpclmulqdq xmm0,xmm14,xmm6,0x00 1516 vpxor xmm9,xmm9,xmm14 1517 vpshufb xmm15,xmm15,xmm13 1518 vpclmulqdq xmm1,xmm14,xmm6,0x11 1519 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1520 vpunpckhqdq xmm8,xmm15,xmm15 1521 vmovdqu xmm14,XMMWORD[80+r8] 1522 vpclmulqdq xmm2,xmm9,xmm7,0x00 1523 vpxor xmm8,xmm8,xmm15 1524 1525 vpshufb xmm14,xmm14,xmm13 1526 vpclmulqdq xmm3,xmm15,xmm6,0x00 1527 vpunpckhqdq xmm9,xmm14,xmm14 1528 vpclmulqdq xmm4,xmm15,xmm6,0x11 1529 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1530 vpxor xmm9,xmm9,xmm14 1531 vmovdqu xmm15,XMMWORD[64+r8] 1532 vpclmulqdq xmm5,xmm8,xmm7,0x10 1533 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1534 1535 vpshufb xmm15,xmm15,xmm13 1536 vpxor xmm3,xmm3,xmm0 1537 vpclmulqdq xmm0,xmm14,xmm6,0x00 1538 vpxor xmm4,xmm4,xmm1 1539 vpunpckhqdq xmm8,xmm15,xmm15 1540 vpclmulqdq xmm1,xmm14,xmm6,0x11 1541 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1542 vpxor xmm5,xmm5,xmm2 1543 vpclmulqdq xmm2,xmm9,xmm7,0x00 1544 vpxor xmm8,xmm8,xmm15 1545 1546 vmovdqu xmm14,XMMWORD[48+r8] 1547 vpxor xmm0,xmm0,xmm3 1548 vpclmulqdq xmm3,xmm15,xmm6,0x00 1549 vpxor xmm1,xmm1,xmm4 1550 vpshufb xmm14,xmm14,xmm13 1551 vpclmulqdq xmm4,xmm15,xmm6,0x11 1552 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1553 vpxor xmm2,xmm2,xmm5 1554 vpunpckhqdq xmm9,xmm14,xmm14 1555 vpclmulqdq xmm5,xmm8,xmm7,0x10 1556 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1557 vpxor xmm9,xmm9,xmm14 1558 1559 vmovdqu xmm15,XMMWORD[32+r8] 1560 vpxor xmm3,xmm3,xmm0 1561 vpclmulqdq xmm0,xmm14,xmm6,0x00 1562 vpxor xmm4,xmm4,xmm1 1563 vpshufb xmm15,xmm15,xmm13 1564 vpclmulqdq xmm1,xmm14,xmm6,0x11 1565 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1566 vpxor xmm5,xmm5,xmm2 1567 vpunpckhqdq xmm8,xmm15,xmm15 1568 vpclmulqdq xmm2,xmm9,xmm7,0x00 1569 vpxor xmm8,xmm8,xmm15 1570 1571 vmovdqu xmm14,XMMWORD[16+r8] 1572 vpxor xmm0,xmm0,xmm3 1573 vpclmulqdq xmm3,xmm15,xmm6,0x00 1574 vpxor xmm1,xmm1,xmm4 1575 vpshufb xmm14,xmm14,xmm13 1576 vpclmulqdq xmm4,xmm15,xmm6,0x11 1577 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1578 vpxor xmm2,xmm2,xmm5 1579 vpunpckhqdq xmm9,xmm14,xmm14 1580 vpclmulqdq xmm5,xmm8,xmm7,0x10 1581 vmovdqu xmm7,XMMWORD[((176-64))+rdx] 1582 vpxor xmm9,xmm9,xmm14 1583 1584 vmovdqu xmm15,XMMWORD[r8] 1585 vpxor xmm3,xmm3,xmm0 1586 vpclmulqdq xmm0,xmm14,xmm6,0x00 1587 vpxor xmm4,xmm4,xmm1 1588 vpshufb xmm15,xmm15,xmm13 1589 vpclmulqdq xmm1,xmm14,xmm6,0x11 1590 vmovdqu xmm6,XMMWORD[((160-64))+rdx] 1591 vpxor xmm5,xmm5,xmm2 1592 vpclmulqdq xmm2,xmm9,xmm7,0x10 1593 1594 lea r8,[128+r8] 1595 cmp r9,0x80 1596 jb NEAR $L$tail_avx 1597 1598 vpxor xmm15,xmm15,xmm10 1599 sub r9,0x80 1600 jmp NEAR $L$oop8x_avx 1601 1602 ALIGN 32 1603 $L$oop8x_avx: 1604 vpunpckhqdq xmm8,xmm15,xmm15 1605 vmovdqu xmm14,XMMWORD[112+r8] 1606 vpxor xmm3,xmm3,xmm0 1607 vpxor xmm8,xmm8,xmm15 1608 vpclmulqdq xmm10,xmm15,xmm6,0x00 1609 vpshufb xmm14,xmm14,xmm13 1610 vpxor xmm4,xmm4,xmm1 1611 vpclmulqdq xmm11,xmm15,xmm6,0x11 1612 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1613 vpunpckhqdq xmm9,xmm14,xmm14 1614 vpxor xmm5,xmm5,xmm2 1615 vpclmulqdq xmm12,xmm8,xmm7,0x00 1616 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1617 vpxor xmm9,xmm9,xmm14 1618 1619 vmovdqu xmm15,XMMWORD[96+r8] 1620 vpclmulqdq xmm0,xmm14,xmm6,0x00 1621 vpxor xmm10,xmm10,xmm3 1622 vpshufb xmm15,xmm15,xmm13 1623 vpclmulqdq xmm1,xmm14,xmm6,0x11 1624 vxorps xmm11,xmm11,xmm4 1625 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1626 vpunpckhqdq xmm8,xmm15,xmm15 1627 vpclmulqdq xmm2,xmm9,xmm7,0x00 1628 vpxor xmm12,xmm12,xmm5 1629 vxorps xmm8,xmm8,xmm15 1630 1631 vmovdqu xmm14,XMMWORD[80+r8] 1632 vpxor xmm12,xmm12,xmm10 1633 vpclmulqdq xmm3,xmm15,xmm6,0x00 1634 vpxor xmm12,xmm12,xmm11 1635 vpslldq xmm9,xmm12,8 1636 vpxor xmm3,xmm3,xmm0 1637 vpclmulqdq xmm4,xmm15,xmm6,0x11 1638 vpsrldq xmm12,xmm12,8 1639 vpxor xmm10,xmm10,xmm9 1640 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1641 vpshufb xmm14,xmm14,xmm13 1642 vxorps xmm11,xmm11,xmm12 1643 vpxor xmm4,xmm4,xmm1 1644 vpunpckhqdq xmm9,xmm14,xmm14 1645 vpclmulqdq xmm5,xmm8,xmm7,0x10 1646 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1647 vpxor xmm9,xmm9,xmm14 1648 vpxor xmm5,xmm5,xmm2 1649 1650 vmovdqu xmm15,XMMWORD[64+r8] 1651 vpalignr xmm12,xmm10,xmm10,8 1652 vpclmulqdq xmm0,xmm14,xmm6,0x00 1653 vpshufb xmm15,xmm15,xmm13 1654 vpxor xmm0,xmm0,xmm3 1655 vpclmulqdq xmm1,xmm14,xmm6,0x11 1656 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1657 vpunpckhqdq xmm8,xmm15,xmm15 1658 vpxor xmm1,xmm1,xmm4 1659 vpclmulqdq xmm2,xmm9,xmm7,0x00 1660 vxorps xmm8,xmm8,xmm15 1661 vpxor xmm2,xmm2,xmm5 1662 1663 vmovdqu xmm14,XMMWORD[48+r8] 1664 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 1665 vpclmulqdq xmm3,xmm15,xmm6,0x00 1666 vpshufb xmm14,xmm14,xmm13 1667 vpxor xmm3,xmm3,xmm0 1668 vpclmulqdq xmm4,xmm15,xmm6,0x11 1669 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1670 vpunpckhqdq xmm9,xmm14,xmm14 1671 vpxor xmm4,xmm4,xmm1 1672 vpclmulqdq xmm5,xmm8,xmm7,0x10 1673 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1674 vpxor xmm9,xmm9,xmm14 1675 vpxor xmm5,xmm5,xmm2 1676 1677 vmovdqu xmm15,XMMWORD[32+r8] 1678 vpclmulqdq xmm0,xmm14,xmm6,0x00 1679 vpshufb xmm15,xmm15,xmm13 1680 vpxor xmm0,xmm0,xmm3 1681 vpclmulqdq xmm1,xmm14,xmm6,0x11 1682 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1683 vpunpckhqdq xmm8,xmm15,xmm15 1684 vpxor xmm1,xmm1,xmm4 1685 vpclmulqdq xmm2,xmm9,xmm7,0x00 1686 vpxor xmm8,xmm8,xmm15 1687 vpxor xmm2,xmm2,xmm5 1688 vxorps xmm10,xmm10,xmm12 1689 1690 vmovdqu xmm14,XMMWORD[16+r8] 1691 vpalignr xmm12,xmm10,xmm10,8 1692 vpclmulqdq xmm3,xmm15,xmm6,0x00 1693 vpshufb xmm14,xmm14,xmm13 1694 vpxor xmm3,xmm3,xmm0 1695 vpclmulqdq xmm4,xmm15,xmm6,0x11 1696 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1697 vpclmulqdq xmm10,xmm10,XMMWORD[r10],0x10 1698 vxorps xmm12,xmm12,xmm11 1699 vpunpckhqdq xmm9,xmm14,xmm14 1700 vpxor xmm4,xmm4,xmm1 1701 vpclmulqdq xmm5,xmm8,xmm7,0x10 1702 vmovdqu xmm7,XMMWORD[((176-64))+rdx] 1703 vpxor xmm9,xmm9,xmm14 1704 vpxor xmm5,xmm5,xmm2 1705 1706 vmovdqu xmm15,XMMWORD[r8] 1707 vpclmulqdq xmm0,xmm14,xmm6,0x00 1708 vpshufb xmm15,xmm15,xmm13 1709 vpclmulqdq xmm1,xmm14,xmm6,0x11 1710 vmovdqu xmm6,XMMWORD[((160-64))+rdx] 1711 vpxor xmm15,xmm15,xmm12 1712 vpclmulqdq xmm2,xmm9,xmm7,0x10 1713 vpxor xmm15,xmm15,xmm10 1714 1715 lea r8,[128+r8] 1716 sub r9,0x80 1717 jnc NEAR $L$oop8x_avx 1718 1719 add r9,0x80 1720 jmp NEAR $L$tail_no_xor_avx 1721 1722 ALIGN 32 1723 $L$short_avx: 1724 vmovdqu xmm14,XMMWORD[((-16))+r9*1+r8] 1725 lea r8,[r9*1+r8] 1726 vmovdqu xmm6,XMMWORD[((0-64))+rdx] 1727 vmovdqu xmm7,XMMWORD[((32-64))+rdx] 1728 vpshufb xmm15,xmm14,xmm13 1729 1730 vmovdqa xmm3,xmm0 1731 vmovdqa xmm4,xmm1 1732 vmovdqa xmm5,xmm2 1733 sub r9,0x10 1734 jz NEAR $L$tail_avx 1735 1736 vpunpckhqdq xmm8,xmm15,xmm15 1737 vpxor xmm3,xmm3,xmm0 1738 vpclmulqdq xmm0,xmm15,xmm6,0x00 1739 vpxor xmm8,xmm8,xmm15 1740 vmovdqu xmm14,XMMWORD[((-32))+r8] 1741 vpxor xmm4,xmm4,xmm1 1742 vpclmulqdq xmm1,xmm15,xmm6,0x11 1743 vmovdqu xmm6,XMMWORD[((16-64))+rdx] 1744 vpshufb xmm15,xmm14,xmm13 1745 vpxor xmm5,xmm5,xmm2 1746 vpclmulqdq xmm2,xmm8,xmm7,0x00 1747 vpsrldq xmm7,xmm7,8 1748 sub r9,0x10 1749 jz NEAR $L$tail_avx 1750 1751 vpunpckhqdq xmm8,xmm15,xmm15 1752 vpxor xmm3,xmm3,xmm0 1753 vpclmulqdq xmm0,xmm15,xmm6,0x00 1754 vpxor xmm8,xmm8,xmm15 1755 vmovdqu xmm14,XMMWORD[((-48))+r8] 1756 vpxor xmm4,xmm4,xmm1 1757 vpclmulqdq xmm1,xmm15,xmm6,0x11 1758 vmovdqu xmm6,XMMWORD[((48-64))+rdx] 1759 vpshufb xmm15,xmm14,xmm13 1760 vpxor xmm5,xmm5,xmm2 1761 vpclmulqdq xmm2,xmm8,xmm7,0x00 1762 vmovdqu xmm7,XMMWORD[((80-64))+rdx] 1763 sub r9,0x10 1764 jz NEAR $L$tail_avx 1765 1766 vpunpckhqdq xmm8,xmm15,xmm15 1767 vpxor xmm3,xmm3,xmm0 1768 vpclmulqdq xmm0,xmm15,xmm6,0x00 1769 vpxor xmm8,xmm8,xmm15 1770 vmovdqu xmm14,XMMWORD[((-64))+r8] 1771 vpxor xmm4,xmm4,xmm1 1772 vpclmulqdq xmm1,xmm15,xmm6,0x11 1773 vmovdqu xmm6,XMMWORD[((64-64))+rdx] 1774 vpshufb xmm15,xmm14,xmm13 1775 vpxor xmm5,xmm5,xmm2 1776 vpclmulqdq xmm2,xmm8,xmm7,0x00 1777 vpsrldq xmm7,xmm7,8 1778 sub r9,0x10 1779 jz NEAR $L$tail_avx 1780 1781 vpunpckhqdq xmm8,xmm15,xmm15 1782 vpxor xmm3,xmm3,xmm0 1783 vpclmulqdq xmm0,xmm15,xmm6,0x00 1784 vpxor xmm8,xmm8,xmm15 1785 vmovdqu xmm14,XMMWORD[((-80))+r8] 1786 vpxor xmm4,xmm4,xmm1 1787 vpclmulqdq xmm1,xmm15,xmm6,0x11 1788 vmovdqu xmm6,XMMWORD[((96-64))+rdx] 1789 vpshufb xmm15,xmm14,xmm13 1790 vpxor xmm5,xmm5,xmm2 1791 vpclmulqdq xmm2,xmm8,xmm7,0x00 1792 vmovdqu xmm7,XMMWORD[((128-64))+rdx] 1793 sub r9,0x10 1794 jz NEAR $L$tail_avx 1795 1796 vpunpckhqdq xmm8,xmm15,xmm15 1797 vpxor xmm3,xmm3,xmm0 1798 vpclmulqdq xmm0,xmm15,xmm6,0x00 1799 vpxor xmm8,xmm8,xmm15 1800 vmovdqu xmm14,XMMWORD[((-96))+r8] 1801 vpxor xmm4,xmm4,xmm1 1802 vpclmulqdq xmm1,xmm15,xmm6,0x11 1803 vmovdqu xmm6,XMMWORD[((112-64))+rdx] 1804 vpshufb xmm15,xmm14,xmm13 1805 vpxor xmm5,xmm5,xmm2 1806 vpclmulqdq xmm2,xmm8,xmm7,0x00 1807 vpsrldq xmm7,xmm7,8 1808 sub r9,0x10 1809 jz NEAR $L$tail_avx 1810 1811 vpunpckhqdq xmm8,xmm15,xmm15 1812 vpxor xmm3,xmm3,xmm0 1813 vpclmulqdq xmm0,xmm15,xmm6,0x00 1814 vpxor xmm8,xmm8,xmm15 1815 vmovdqu xmm14,XMMWORD[((-112))+r8] 1816 vpxor xmm4,xmm4,xmm1 1817 vpclmulqdq xmm1,xmm15,xmm6,0x11 1818 vmovdqu xmm6,XMMWORD[((144-64))+rdx] 1819 vpshufb xmm15,xmm14,xmm13 1820 vpxor xmm5,xmm5,xmm2 1821 vpclmulqdq xmm2,xmm8,xmm7,0x00 1822 vmovq xmm7,QWORD[((184-64))+rdx] 1823 sub r9,0x10 1824 jmp NEAR $L$tail_avx 1825 1826 ALIGN 32 1827 $L$tail_avx: 1828 vpxor xmm15,xmm15,xmm10 1829 $L$tail_no_xor_avx: 1830 vpunpckhqdq xmm8,xmm15,xmm15 1831 vpxor xmm3,xmm3,xmm0 1832 vpclmulqdq xmm0,xmm15,xmm6,0x00 1833 vpxor xmm8,xmm8,xmm15 1834 vpxor xmm4,xmm4,xmm1 1835 vpclmulqdq xmm1,xmm15,xmm6,0x11 1836 vpxor xmm5,xmm5,xmm2 1837 vpclmulqdq xmm2,xmm8,xmm7,0x00 1838 1839 vmovdqu xmm12,XMMWORD[r10] 1840 1841 vpxor xmm10,xmm3,xmm0 1842 vpxor xmm11,xmm4,xmm1 1843 vpxor xmm5,xmm5,xmm2 1844 1845 vpxor xmm5,xmm5,xmm10 1846 vpxor xmm5,xmm5,xmm11 1847 vpslldq xmm9,xmm5,8 1848 vpsrldq xmm5,xmm5,8 1849 vpxor xmm10,xmm10,xmm9 1850 vpxor xmm11,xmm11,xmm5 1851 1852 vpclmulqdq xmm9,xmm10,xmm12,0x10 1853 vpalignr xmm10,xmm10,xmm10,8 1854 vpxor xmm10,xmm10,xmm9 1855 1856 vpclmulqdq xmm9,xmm10,xmm12,0x10 1857 vpalignr xmm10,xmm10,xmm10,8 1858 vpxor xmm10,xmm10,xmm11 1859 vpxor xmm10,xmm10,xmm9 1860 1861 cmp r9,0 1862 jne NEAR $L$short_avx 1863 1864 vpshufb xmm10,xmm10,xmm13 1865 vmovdqu XMMWORD[rcx],xmm10 1866 vzeroupper 1867 movaps xmm6,XMMWORD[rsp] 1868 movaps xmm7,XMMWORD[16+rsp] 1869 movaps xmm8,XMMWORD[32+rsp] 1870 movaps xmm9,XMMWORD[48+rsp] 1871 movaps xmm10,XMMWORD[64+rsp] 1872 movaps xmm11,XMMWORD[80+rsp] 1873 movaps xmm12,XMMWORD[96+rsp] 1874 movaps xmm13,XMMWORD[112+rsp] 1875 movaps xmm14,XMMWORD[128+rsp] 1876 movaps xmm15,XMMWORD[144+rsp] 1877 lea rsp,[168+rsp] 1878 $L$SEH_end_gcm_ghash_avx: 1879 DB 0F3h,0C3h ;repret 1376 1880 1377 1881 … … 1537 2041 DD $L$SEH_end_gcm_ghash_clmul wrt ..imagebase 1538 2042 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase 2043 DD $L$SEH_begin_gcm_init_avx wrt ..imagebase 2044 DD $L$SEH_end_gcm_init_avx wrt ..imagebase 2045 DD $L$SEH_info_gcm_init_clmul wrt ..imagebase 2046 2047 DD $L$SEH_begin_gcm_ghash_avx wrt ..imagebase 2048 DD $L$SEH_end_gcm_ghash_avx wrt ..imagebase 2049 DD $L$SEH_info_gcm_ghash_clmul wrt ..imagebase 1539 2050 section .xdata rdata align=8 1540 2051 ALIGN 8 -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/poly1305-x86_64.S
r97373 r99371 38 38 lea r10,[poly1305_blocks] 39 39 lea r11,[poly1305_emit] 40 mov r9,QWORD[((OPENSSL_ia32cap_P+4))] 41 lea rax,[poly1305_blocks_avx] 42 lea rcx,[poly1305_emit_avx] 43 bt r9,28 44 cmovc r10,rax 45 cmovc r11,rcx 46 lea rax,[poly1305_blocks_avx2] 47 bt r9,37 48 cmovc r10,rax 49 mov rax,2149646336 50 shr r9,32 51 and r9,rax 52 cmp r9,rax 53 je NEAR $L$init_base2_44 40 54 mov rax,0x0ffffffc0fffffff 41 55 mov rcx,0x0ffffffc0ffffffc … … 212 226 213 227 $L$SEH_end_poly1305_emit: 228 229 ALIGN 32 230 __poly1305_block: 231 232 mul r14 233 mov r9,rax 234 mov rax,r11 235 mov r10,rdx 236 237 mul r14 238 mov r14,rax 239 mov rax,r11 240 mov r8,rdx 241 242 mul rbx 243 add r9,rax 244 mov rax,r13 245 adc r10,rdx 246 247 mul rbx 248 mov rbx,rbp 249 add r14,rax 250 adc r8,rdx 251 252 imul rbx,r13 253 add r9,rbx 254 mov rbx,r8 255 adc r10,0 256 257 imul rbp,r11 258 add rbx,r9 259 mov rax,-4 260 adc r10,rbp 261 262 and rax,r10 263 mov rbp,r10 264 shr r10,2 265 and rbp,3 266 add rax,r10 267 add r14,rax 268 adc rbx,0 269 adc rbp,0 270 DB 0F3h,0C3h ;repret 271 272 273 274 275 ALIGN 32 276 __poly1305_init_avx: 277 278 mov r14,r11 279 mov rbx,r12 280 xor rbp,rbp 281 282 lea rdi,[((48+64))+rdi] 283 284 mov rax,r12 285 call __poly1305_block 286 287 mov eax,0x3ffffff 288 mov edx,0x3ffffff 289 mov r8,r14 290 and eax,r14d 291 mov r9,r11 292 and edx,r11d 293 mov DWORD[((-64))+rdi],eax 294 shr r8,26 295 mov DWORD[((-60))+rdi],edx 296 shr r9,26 297 298 mov eax,0x3ffffff 299 mov edx,0x3ffffff 300 and eax,r8d 301 and edx,r9d 302 mov DWORD[((-48))+rdi],eax 303 lea eax,[rax*4+rax] 304 mov DWORD[((-44))+rdi],edx 305 lea edx,[rdx*4+rdx] 306 mov DWORD[((-32))+rdi],eax 307 shr r8,26 308 mov DWORD[((-28))+rdi],edx 309 shr r9,26 310 311 mov rax,rbx 312 mov rdx,r12 313 shl rax,12 314 shl rdx,12 315 or rax,r8 316 or rdx,r9 317 and eax,0x3ffffff 318 and edx,0x3ffffff 319 mov DWORD[((-16))+rdi],eax 320 lea eax,[rax*4+rax] 321 mov DWORD[((-12))+rdi],edx 322 lea edx,[rdx*4+rdx] 323 mov DWORD[rdi],eax 324 mov r8,rbx 325 mov DWORD[4+rdi],edx 326 mov r9,r12 327 328 mov eax,0x3ffffff 329 mov edx,0x3ffffff 330 shr r8,14 331 shr r9,14 332 and eax,r8d 333 and edx,r9d 334 mov DWORD[16+rdi],eax 335 lea eax,[rax*4+rax] 336 mov DWORD[20+rdi],edx 337 lea edx,[rdx*4+rdx] 338 mov DWORD[32+rdi],eax 339 shr r8,26 340 mov DWORD[36+rdi],edx 341 shr r9,26 342 343 mov rax,rbp 344 shl rax,24 345 or r8,rax 346 mov DWORD[48+rdi],r8d 347 lea r8,[r8*4+r8] 348 mov DWORD[52+rdi],r9d 349 lea r9,[r9*4+r9] 350 mov DWORD[64+rdi],r8d 351 mov DWORD[68+rdi],r9d 352 353 mov rax,r12 354 call __poly1305_block 355 356 mov eax,0x3ffffff 357 mov r8,r14 358 and eax,r14d 359 shr r8,26 360 mov DWORD[((-52))+rdi],eax 361 362 mov edx,0x3ffffff 363 and edx,r8d 364 mov DWORD[((-36))+rdi],edx 365 lea edx,[rdx*4+rdx] 366 shr r8,26 367 mov DWORD[((-20))+rdi],edx 368 369 mov rax,rbx 370 shl rax,12 371 or rax,r8 372 and eax,0x3ffffff 373 mov DWORD[((-4))+rdi],eax 374 lea eax,[rax*4+rax] 375 mov r8,rbx 376 mov DWORD[12+rdi],eax 377 378 mov edx,0x3ffffff 379 shr r8,14 380 and edx,r8d 381 mov DWORD[28+rdi],edx 382 lea edx,[rdx*4+rdx] 383 shr r8,26 384 mov DWORD[44+rdi],edx 385 386 mov rax,rbp 387 shl rax,24 388 or r8,rax 389 mov DWORD[60+rdi],r8d 390 lea r8,[r8*4+r8] 391 mov DWORD[76+rdi],r8d 392 393 mov rax,r12 394 call __poly1305_block 395 396 mov eax,0x3ffffff 397 mov r8,r14 398 and eax,r14d 399 shr r8,26 400 mov DWORD[((-56))+rdi],eax 401 402 mov edx,0x3ffffff 403 and edx,r8d 404 mov DWORD[((-40))+rdi],edx 405 lea edx,[rdx*4+rdx] 406 shr r8,26 407 mov DWORD[((-24))+rdi],edx 408 409 mov rax,rbx 410 shl rax,12 411 or rax,r8 412 and eax,0x3ffffff 413 mov DWORD[((-8))+rdi],eax 414 lea eax,[rax*4+rax] 415 mov r8,rbx 416 mov DWORD[8+rdi],eax 417 418 mov edx,0x3ffffff 419 shr r8,14 420 and edx,r8d 421 mov DWORD[24+rdi],edx 422 lea edx,[rdx*4+rdx] 423 shr r8,26 424 mov DWORD[40+rdi],edx 425 426 mov rax,rbp 427 shl rax,24 428 or r8,rax 429 mov DWORD[56+rdi],r8d 430 lea r8,[r8*4+r8] 431 mov DWORD[72+rdi],r8d 432 433 lea rdi,[((-48-64))+rdi] 434 DB 0F3h,0C3h ;repret 435 436 437 438 439 ALIGN 32 440 poly1305_blocks_avx: 441 mov QWORD[8+rsp],rdi ;WIN64 prologue 442 mov QWORD[16+rsp],rsi 443 mov rax,rsp 444 $L$SEH_begin_poly1305_blocks_avx: 445 mov rdi,rcx 446 mov rsi,rdx 447 mov rdx,r8 448 mov rcx,r9 449 450 451 452 mov r8d,DWORD[20+rdi] 453 cmp rdx,128 454 jae NEAR $L$blocks_avx 455 test r8d,r8d 456 jz NEAR $L$blocks 457 458 $L$blocks_avx: 459 and rdx,-16 460 jz NEAR $L$no_data_avx 461 462 vzeroupper 463 464 test r8d,r8d 465 jz NEAR $L$base2_64_avx 466 467 test rdx,31 468 jz NEAR $L$even_avx 469 470 push rbx 471 472 push rbp 473 474 push r12 475 476 push r13 477 478 push r14 479 480 push r15 481 482 $L$blocks_avx_body: 483 484 mov r15,rdx 485 486 mov r8,QWORD[rdi] 487 mov r9,QWORD[8+rdi] 488 mov ebp,DWORD[16+rdi] 489 490 mov r11,QWORD[24+rdi] 491 mov r13,QWORD[32+rdi] 492 493 494 mov r14d,r8d 495 and r8,-2147483648 496 mov r12,r9 497 mov ebx,r9d 498 and r9,-2147483648 499 500 shr r8,6 501 shl r12,52 502 add r14,r8 503 shr rbx,12 504 shr r9,18 505 add r14,r12 506 adc rbx,r9 507 508 mov r8,rbp 509 shl r8,40 510 shr rbp,24 511 add rbx,r8 512 adc rbp,0 513 514 mov r9,-4 515 mov r8,rbp 516 and r9,rbp 517 shr r8,2 518 and rbp,3 519 add r8,r9 520 add r14,r8 521 adc rbx,0 522 adc rbp,0 523 524 mov r12,r13 525 mov rax,r13 526 shr r13,2 527 add r13,r12 528 529 add r14,QWORD[rsi] 530 adc rbx,QWORD[8+rsi] 531 lea rsi,[16+rsi] 532 adc rbp,rcx 533 534 call __poly1305_block 535 536 test rcx,rcx 537 jz NEAR $L$store_base2_64_avx 538 539 540 mov rax,r14 541 mov rdx,r14 542 shr r14,52 543 mov r11,rbx 544 mov r12,rbx 545 shr rdx,26 546 and rax,0x3ffffff 547 shl r11,12 548 and rdx,0x3ffffff 549 shr rbx,14 550 or r14,r11 551 shl rbp,24 552 and r14,0x3ffffff 553 shr r12,40 554 and rbx,0x3ffffff 555 or rbp,r12 556 557 sub r15,16 558 jz NEAR $L$store_base2_26_avx 559 560 vmovd xmm0,eax 561 vmovd xmm1,edx 562 vmovd xmm2,r14d 563 vmovd xmm3,ebx 564 vmovd xmm4,ebp 565 jmp NEAR $L$proceed_avx 566 567 ALIGN 32 568 $L$store_base2_64_avx: 569 mov QWORD[rdi],r14 570 mov QWORD[8+rdi],rbx 571 mov QWORD[16+rdi],rbp 572 jmp NEAR $L$done_avx 573 574 ALIGN 16 575 $L$store_base2_26_avx: 576 mov DWORD[rdi],eax 577 mov DWORD[4+rdi],edx 578 mov DWORD[8+rdi],r14d 579 mov DWORD[12+rdi],ebx 580 mov DWORD[16+rdi],ebp 581 ALIGN 16 582 $L$done_avx: 583 mov r15,QWORD[rsp] 584 585 mov r14,QWORD[8+rsp] 586 587 mov r13,QWORD[16+rsp] 588 589 mov r12,QWORD[24+rsp] 590 591 mov rbp,QWORD[32+rsp] 592 593 mov rbx,QWORD[40+rsp] 594 595 lea rsp,[48+rsp] 596 597 $L$no_data_avx: 598 $L$blocks_avx_epilogue: 599 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 600 mov rsi,QWORD[16+rsp] 601 DB 0F3h,0C3h ;repret 602 603 604 ALIGN 32 605 $L$base2_64_avx: 606 607 push rbx 608 609 push rbp 610 611 push r12 612 613 push r13 614 615 push r14 616 617 push r15 618 619 $L$base2_64_avx_body: 620 621 mov r15,rdx 622 623 mov r11,QWORD[24+rdi] 624 mov r13,QWORD[32+rdi] 625 626 mov r14,QWORD[rdi] 627 mov rbx,QWORD[8+rdi] 628 mov ebp,DWORD[16+rdi] 629 630 mov r12,r13 631 mov rax,r13 632 shr r13,2 633 add r13,r12 634 635 test rdx,31 636 jz NEAR $L$init_avx 637 638 add r14,QWORD[rsi] 639 adc rbx,QWORD[8+rsi] 640 lea rsi,[16+rsi] 641 adc rbp,rcx 642 sub r15,16 643 644 call __poly1305_block 645 646 $L$init_avx: 647 648 mov rax,r14 649 mov rdx,r14 650 shr r14,52 651 mov r8,rbx 652 mov r9,rbx 653 shr rdx,26 654 and rax,0x3ffffff 655 shl r8,12 656 and rdx,0x3ffffff 657 shr rbx,14 658 or r14,r8 659 shl rbp,24 660 and r14,0x3ffffff 661 shr r9,40 662 and rbx,0x3ffffff 663 or rbp,r9 664 665 vmovd xmm0,eax 666 vmovd xmm1,edx 667 vmovd xmm2,r14d 668 vmovd xmm3,ebx 669 vmovd xmm4,ebp 670 mov DWORD[20+rdi],1 671 672 call __poly1305_init_avx 673 674 $L$proceed_avx: 675 mov rdx,r15 676 677 mov r15,QWORD[rsp] 678 679 mov r14,QWORD[8+rsp] 680 681 mov r13,QWORD[16+rsp] 682 683 mov r12,QWORD[24+rsp] 684 685 mov rbp,QWORD[32+rsp] 686 687 mov rbx,QWORD[40+rsp] 688 689 lea rax,[48+rsp] 690 lea rsp,[48+rsp] 691 692 $L$base2_64_avx_epilogue: 693 jmp NEAR $L$do_avx 694 695 696 ALIGN 32 697 $L$even_avx: 698 699 vmovd xmm0,DWORD[rdi] 700 vmovd xmm1,DWORD[4+rdi] 701 vmovd xmm2,DWORD[8+rdi] 702 vmovd xmm3,DWORD[12+rdi] 703 vmovd xmm4,DWORD[16+rdi] 704 705 $L$do_avx: 706 lea r11,[((-248))+rsp] 707 sub rsp,0x218 708 vmovdqa XMMWORD[80+r11],xmm6 709 vmovdqa XMMWORD[96+r11],xmm7 710 vmovdqa XMMWORD[112+r11],xmm8 711 vmovdqa XMMWORD[128+r11],xmm9 712 vmovdqa XMMWORD[144+r11],xmm10 713 vmovdqa XMMWORD[160+r11],xmm11 714 vmovdqa XMMWORD[176+r11],xmm12 715 vmovdqa XMMWORD[192+r11],xmm13 716 vmovdqa XMMWORD[208+r11],xmm14 717 vmovdqa XMMWORD[224+r11],xmm15 718 $L$do_avx_body: 719 sub rdx,64 720 lea rax,[((-32))+rsi] 721 cmovc rsi,rax 722 723 vmovdqu xmm14,XMMWORD[48+rdi] 724 lea rdi,[112+rdi] 725 lea rcx,[$L$const] 726 727 728 729 vmovdqu xmm5,XMMWORD[32+rsi] 730 vmovdqu xmm6,XMMWORD[48+rsi] 731 vmovdqa xmm15,XMMWORD[64+rcx] 732 733 vpsrldq xmm7,xmm5,6 734 vpsrldq xmm8,xmm6,6 735 vpunpckhqdq xmm9,xmm5,xmm6 736 vpunpcklqdq xmm5,xmm5,xmm6 737 vpunpcklqdq xmm8,xmm7,xmm8 738 739 vpsrlq xmm9,xmm9,40 740 vpsrlq xmm6,xmm5,26 741 vpand xmm5,xmm5,xmm15 742 vpsrlq xmm7,xmm8,4 743 vpand xmm6,xmm6,xmm15 744 vpsrlq xmm8,xmm8,30 745 vpand xmm7,xmm7,xmm15 746 vpand xmm8,xmm8,xmm15 747 vpor xmm9,xmm9,XMMWORD[32+rcx] 748 749 jbe NEAR $L$skip_loop_avx 750 751 752 vmovdqu xmm11,XMMWORD[((-48))+rdi] 753 vmovdqu xmm12,XMMWORD[((-32))+rdi] 754 vpshufd xmm13,xmm14,0xEE 755 vpshufd xmm10,xmm14,0x44 756 vmovdqa XMMWORD[(-144)+r11],xmm13 757 vmovdqa XMMWORD[rsp],xmm10 758 vpshufd xmm14,xmm11,0xEE 759 vmovdqu xmm10,XMMWORD[((-16))+rdi] 760 vpshufd xmm11,xmm11,0x44 761 vmovdqa XMMWORD[(-128)+r11],xmm14 762 vmovdqa XMMWORD[16+rsp],xmm11 763 vpshufd xmm13,xmm12,0xEE 764 vmovdqu xmm11,XMMWORD[rdi] 765 vpshufd xmm12,xmm12,0x44 766 vmovdqa XMMWORD[(-112)+r11],xmm13 767 vmovdqa XMMWORD[32+rsp],xmm12 768 vpshufd xmm14,xmm10,0xEE 769 vmovdqu xmm12,XMMWORD[16+rdi] 770 vpshufd xmm10,xmm10,0x44 771 vmovdqa XMMWORD[(-96)+r11],xmm14 772 vmovdqa XMMWORD[48+rsp],xmm10 773 vpshufd xmm13,xmm11,0xEE 774 vmovdqu xmm10,XMMWORD[32+rdi] 775 vpshufd xmm11,xmm11,0x44 776 vmovdqa XMMWORD[(-80)+r11],xmm13 777 vmovdqa XMMWORD[64+rsp],xmm11 778 vpshufd xmm14,xmm12,0xEE 779 vmovdqu xmm11,XMMWORD[48+rdi] 780 vpshufd xmm12,xmm12,0x44 781 vmovdqa XMMWORD[(-64)+r11],xmm14 782 vmovdqa XMMWORD[80+rsp],xmm12 783 vpshufd xmm13,xmm10,0xEE 784 vmovdqu xmm12,XMMWORD[64+rdi] 785 vpshufd xmm10,xmm10,0x44 786 vmovdqa XMMWORD[(-48)+r11],xmm13 787 vmovdqa XMMWORD[96+rsp],xmm10 788 vpshufd xmm14,xmm11,0xEE 789 vpshufd xmm11,xmm11,0x44 790 vmovdqa XMMWORD[(-32)+r11],xmm14 791 vmovdqa XMMWORD[112+rsp],xmm11 792 vpshufd xmm13,xmm12,0xEE 793 vmovdqa xmm14,XMMWORD[rsp] 794 vpshufd xmm12,xmm12,0x44 795 vmovdqa XMMWORD[(-16)+r11],xmm13 796 vmovdqa XMMWORD[128+rsp],xmm12 797 798 jmp NEAR $L$oop_avx 799 800 ALIGN 32 801 $L$oop_avx: 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 vpmuludq xmm10,xmm14,xmm5 823 vpmuludq xmm11,xmm14,xmm6 824 vmovdqa XMMWORD[32+r11],xmm2 825 vpmuludq xmm12,xmm14,xmm7 826 vmovdqa xmm2,XMMWORD[16+rsp] 827 vpmuludq xmm13,xmm14,xmm8 828 vpmuludq xmm14,xmm14,xmm9 829 830 vmovdqa XMMWORD[r11],xmm0 831 vpmuludq xmm0,xmm9,XMMWORD[32+rsp] 832 vmovdqa XMMWORD[16+r11],xmm1 833 vpmuludq xmm1,xmm2,xmm8 834 vpaddq xmm10,xmm10,xmm0 835 vpaddq xmm14,xmm14,xmm1 836 vmovdqa XMMWORD[48+r11],xmm3 837 vpmuludq xmm0,xmm2,xmm7 838 vpmuludq xmm1,xmm2,xmm6 839 vpaddq xmm13,xmm13,xmm0 840 vmovdqa xmm3,XMMWORD[48+rsp] 841 vpaddq xmm12,xmm12,xmm1 842 vmovdqa XMMWORD[64+r11],xmm4 843 vpmuludq xmm2,xmm2,xmm5 844 vpmuludq xmm0,xmm3,xmm7 845 vpaddq xmm11,xmm11,xmm2 846 847 vmovdqa xmm4,XMMWORD[64+rsp] 848 vpaddq xmm14,xmm14,xmm0 849 vpmuludq xmm1,xmm3,xmm6 850 vpmuludq xmm3,xmm3,xmm5 851 vpaddq xmm13,xmm13,xmm1 852 vmovdqa xmm2,XMMWORD[80+rsp] 853 vpaddq xmm12,xmm12,xmm3 854 vpmuludq xmm0,xmm4,xmm9 855 vpmuludq xmm4,xmm4,xmm8 856 vpaddq xmm11,xmm11,xmm0 857 vmovdqa xmm3,XMMWORD[96+rsp] 858 vpaddq xmm10,xmm10,xmm4 859 860 vmovdqa xmm4,XMMWORD[128+rsp] 861 vpmuludq xmm1,xmm2,xmm6 862 vpmuludq xmm2,xmm2,xmm5 863 vpaddq xmm14,xmm14,xmm1 864 vpaddq xmm13,xmm13,xmm2 865 vpmuludq xmm0,xmm3,xmm9 866 vpmuludq xmm1,xmm3,xmm8 867 vpaddq xmm12,xmm12,xmm0 868 vmovdqu xmm0,XMMWORD[rsi] 869 vpaddq xmm11,xmm11,xmm1 870 vpmuludq xmm3,xmm3,xmm7 871 vpmuludq xmm7,xmm4,xmm7 872 vpaddq xmm10,xmm10,xmm3 873 874 vmovdqu xmm1,XMMWORD[16+rsi] 875 vpaddq xmm11,xmm11,xmm7 876 vpmuludq xmm8,xmm4,xmm8 877 vpmuludq xmm9,xmm4,xmm9 878 vpsrldq xmm2,xmm0,6 879 vpaddq xmm12,xmm12,xmm8 880 vpaddq xmm13,xmm13,xmm9 881 vpsrldq xmm3,xmm1,6 882 vpmuludq xmm9,xmm5,XMMWORD[112+rsp] 883 vpmuludq xmm5,xmm4,xmm6 884 vpunpckhqdq xmm4,xmm0,xmm1 885 vpaddq xmm14,xmm14,xmm9 886 vmovdqa xmm9,XMMWORD[((-144))+r11] 887 vpaddq xmm10,xmm10,xmm5 888 889 vpunpcklqdq xmm0,xmm0,xmm1 890 vpunpcklqdq xmm3,xmm2,xmm3 891 892 893 vpsrldq xmm4,xmm4,5 894 vpsrlq xmm1,xmm0,26 895 vpand xmm0,xmm0,xmm15 896 vpsrlq xmm2,xmm3,4 897 vpand xmm1,xmm1,xmm15 898 vpand xmm4,xmm4,XMMWORD[rcx] 899 vpsrlq xmm3,xmm3,30 900 vpand xmm2,xmm2,xmm15 901 vpand xmm3,xmm3,xmm15 902 vpor xmm4,xmm4,XMMWORD[32+rcx] 903 904 vpaddq xmm0,xmm0,XMMWORD[r11] 905 vpaddq xmm1,xmm1,XMMWORD[16+r11] 906 vpaddq xmm2,xmm2,XMMWORD[32+r11] 907 vpaddq xmm3,xmm3,XMMWORD[48+r11] 908 vpaddq xmm4,xmm4,XMMWORD[64+r11] 909 910 lea rax,[32+rsi] 911 lea rsi,[64+rsi] 912 sub rdx,64 913 cmovc rsi,rax 914 915 916 917 918 919 920 921 922 923 924 vpmuludq xmm5,xmm9,xmm0 925 vpmuludq xmm6,xmm9,xmm1 926 vpaddq xmm10,xmm10,xmm5 927 vpaddq xmm11,xmm11,xmm6 928 vmovdqa xmm7,XMMWORD[((-128))+r11] 929 vpmuludq xmm5,xmm9,xmm2 930 vpmuludq xmm6,xmm9,xmm3 931 vpaddq xmm12,xmm12,xmm5 932 vpaddq xmm13,xmm13,xmm6 933 vpmuludq xmm9,xmm9,xmm4 934 vpmuludq xmm5,xmm4,XMMWORD[((-112))+r11] 935 vpaddq xmm14,xmm14,xmm9 936 937 vpaddq xmm10,xmm10,xmm5 938 vpmuludq xmm6,xmm7,xmm2 939 vpmuludq xmm5,xmm7,xmm3 940 vpaddq xmm13,xmm13,xmm6 941 vmovdqa xmm8,XMMWORD[((-96))+r11] 942 vpaddq xmm14,xmm14,xmm5 943 vpmuludq xmm6,xmm7,xmm1 944 vpmuludq xmm7,xmm7,xmm0 945 vpaddq xmm12,xmm12,xmm6 946 vpaddq xmm11,xmm11,xmm7 947 948 vmovdqa xmm9,XMMWORD[((-80))+r11] 949 vpmuludq xmm5,xmm8,xmm2 950 vpmuludq xmm6,xmm8,xmm1 951 vpaddq xmm14,xmm14,xmm5 952 vpaddq xmm13,xmm13,xmm6 953 vmovdqa xmm7,XMMWORD[((-64))+r11] 954 vpmuludq xmm8,xmm8,xmm0 955 vpmuludq xmm5,xmm9,xmm4 956 vpaddq xmm12,xmm12,xmm8 957 vpaddq xmm11,xmm11,xmm5 958 vmovdqa xmm8,XMMWORD[((-48))+r11] 959 vpmuludq xmm9,xmm9,xmm3 960 vpmuludq xmm6,xmm7,xmm1 961 vpaddq xmm10,xmm10,xmm9 962 963 vmovdqa xmm9,XMMWORD[((-16))+r11] 964 vpaddq xmm14,xmm14,xmm6 965 vpmuludq xmm7,xmm7,xmm0 966 vpmuludq xmm5,xmm8,xmm4 967 vpaddq xmm13,xmm13,xmm7 968 vpaddq xmm12,xmm12,xmm5 969 vmovdqu xmm5,XMMWORD[32+rsi] 970 vpmuludq xmm7,xmm8,xmm3 971 vpmuludq xmm8,xmm8,xmm2 972 vpaddq xmm11,xmm11,xmm7 973 vmovdqu xmm6,XMMWORD[48+rsi] 974 vpaddq xmm10,xmm10,xmm8 975 976 vpmuludq xmm2,xmm9,xmm2 977 vpmuludq xmm3,xmm9,xmm3 978 vpsrldq xmm7,xmm5,6 979 vpaddq xmm11,xmm11,xmm2 980 vpmuludq xmm4,xmm9,xmm4 981 vpsrldq xmm8,xmm6,6 982 vpaddq xmm2,xmm12,xmm3 983 vpaddq xmm3,xmm13,xmm4 984 vpmuludq xmm4,xmm0,XMMWORD[((-32))+r11] 985 vpmuludq xmm0,xmm9,xmm1 986 vpunpckhqdq xmm9,xmm5,xmm6 987 vpaddq xmm4,xmm14,xmm4 988 vpaddq xmm0,xmm10,xmm0 989 990 vpunpcklqdq xmm5,xmm5,xmm6 991 vpunpcklqdq xmm8,xmm7,xmm8 992 993 994 vpsrldq xmm9,xmm9,5 995 vpsrlq xmm6,xmm5,26 996 vmovdqa xmm14,XMMWORD[rsp] 997 vpand xmm5,xmm5,xmm15 998 vpsrlq xmm7,xmm8,4 999 vpand xmm6,xmm6,xmm15 1000 vpand xmm9,xmm9,XMMWORD[rcx] 1001 vpsrlq xmm8,xmm8,30 1002 vpand xmm7,xmm7,xmm15 1003 vpand xmm8,xmm8,xmm15 1004 vpor xmm9,xmm9,XMMWORD[32+rcx] 1005 1006 1007 1008 1009 1010 vpsrlq xmm13,xmm3,26 1011 vpand xmm3,xmm3,xmm15 1012 vpaddq xmm4,xmm4,xmm13 1013 1014 vpsrlq xmm10,xmm0,26 1015 vpand xmm0,xmm0,xmm15 1016 vpaddq xmm1,xmm11,xmm10 1017 1018 vpsrlq xmm10,xmm4,26 1019 vpand xmm4,xmm4,xmm15 1020 1021 vpsrlq xmm11,xmm1,26 1022 vpand xmm1,xmm1,xmm15 1023 vpaddq xmm2,xmm2,xmm11 1024 1025 vpaddq xmm0,xmm0,xmm10 1026 vpsllq xmm10,xmm10,2 1027 vpaddq xmm0,xmm0,xmm10 1028 1029 vpsrlq xmm12,xmm2,26 1030 vpand xmm2,xmm2,xmm15 1031 vpaddq xmm3,xmm3,xmm12 1032 1033 vpsrlq xmm10,xmm0,26 1034 vpand xmm0,xmm0,xmm15 1035 vpaddq xmm1,xmm1,xmm10 1036 1037 vpsrlq xmm13,xmm3,26 1038 vpand xmm3,xmm3,xmm15 1039 vpaddq xmm4,xmm4,xmm13 1040 1041 ja NEAR $L$oop_avx 1042 1043 $L$skip_loop_avx: 1044 1045 1046 1047 vpshufd xmm14,xmm14,0x10 1048 add rdx,32 1049 jnz NEAR $L$ong_tail_avx 1050 1051 vpaddq xmm7,xmm7,xmm2 1052 vpaddq xmm5,xmm5,xmm0 1053 vpaddq xmm6,xmm6,xmm1 1054 vpaddq xmm8,xmm8,xmm3 1055 vpaddq xmm9,xmm9,xmm4 1056 1057 $L$ong_tail_avx: 1058 vmovdqa XMMWORD[32+r11],xmm2 1059 vmovdqa XMMWORD[r11],xmm0 1060 vmovdqa XMMWORD[16+r11],xmm1 1061 vmovdqa XMMWORD[48+r11],xmm3 1062 vmovdqa XMMWORD[64+r11],xmm4 1063 1064 1065 1066 1067 1068 1069 1070 vpmuludq xmm12,xmm14,xmm7 1071 vpmuludq xmm10,xmm14,xmm5 1072 vpshufd xmm2,XMMWORD[((-48))+rdi],0x10 1073 vpmuludq xmm11,xmm14,xmm6 1074 vpmuludq xmm13,xmm14,xmm8 1075 vpmuludq xmm14,xmm14,xmm9 1076 1077 vpmuludq xmm0,xmm2,xmm8 1078 vpaddq xmm14,xmm14,xmm0 1079 vpshufd xmm3,XMMWORD[((-32))+rdi],0x10 1080 vpmuludq xmm1,xmm2,xmm7 1081 vpaddq xmm13,xmm13,xmm1 1082 vpshufd xmm4,XMMWORD[((-16))+rdi],0x10 1083 vpmuludq xmm0,xmm2,xmm6 1084 vpaddq xmm12,xmm12,xmm0 1085 vpmuludq xmm2,xmm2,xmm5 1086 vpaddq xmm11,xmm11,xmm2 1087 vpmuludq xmm3,xmm3,xmm9 1088 vpaddq xmm10,xmm10,xmm3 1089 1090 vpshufd xmm2,XMMWORD[rdi],0x10 1091 vpmuludq xmm1,xmm4,xmm7 1092 vpaddq xmm14,xmm14,xmm1 1093 vpmuludq xmm0,xmm4,xmm6 1094 vpaddq xmm13,xmm13,xmm0 1095 vpshufd xmm3,XMMWORD[16+rdi],0x10 1096 vpmuludq xmm4,xmm4,xmm5 1097 vpaddq xmm12,xmm12,xmm4 1098 vpmuludq xmm1,xmm2,xmm9 1099 vpaddq xmm11,xmm11,xmm1 1100 vpshufd xmm4,XMMWORD[32+rdi],0x10 1101 vpmuludq xmm2,xmm2,xmm8 1102 vpaddq xmm10,xmm10,xmm2 1103 1104 vpmuludq xmm0,xmm3,xmm6 1105 vpaddq xmm14,xmm14,xmm0 1106 vpmuludq xmm3,xmm3,xmm5 1107 vpaddq xmm13,xmm13,xmm3 1108 vpshufd xmm2,XMMWORD[48+rdi],0x10 1109 vpmuludq xmm1,xmm4,xmm9 1110 vpaddq xmm12,xmm12,xmm1 1111 vpshufd xmm3,XMMWORD[64+rdi],0x10 1112 vpmuludq xmm0,xmm4,xmm8 1113 vpaddq xmm11,xmm11,xmm0 1114 vpmuludq xmm4,xmm4,xmm7 1115 vpaddq xmm10,xmm10,xmm4 1116 1117 vpmuludq xmm2,xmm2,xmm5 1118 vpaddq xmm14,xmm14,xmm2 1119 vpmuludq xmm1,xmm3,xmm9 1120 vpaddq xmm13,xmm13,xmm1 1121 vpmuludq xmm0,xmm3,xmm8 1122 vpaddq xmm12,xmm12,xmm0 1123 vpmuludq xmm1,xmm3,xmm7 1124 vpaddq xmm11,xmm11,xmm1 1125 vpmuludq xmm3,xmm3,xmm6 1126 vpaddq xmm10,xmm10,xmm3 1127 1128 jz NEAR $L$short_tail_avx 1129 1130 vmovdqu xmm0,XMMWORD[rsi] 1131 vmovdqu xmm1,XMMWORD[16+rsi] 1132 1133 vpsrldq xmm2,xmm0,6 1134 vpsrldq xmm3,xmm1,6 1135 vpunpckhqdq xmm4,xmm0,xmm1 1136 vpunpcklqdq xmm0,xmm0,xmm1 1137 vpunpcklqdq xmm3,xmm2,xmm3 1138 1139 vpsrlq xmm4,xmm4,40 1140 vpsrlq xmm1,xmm0,26 1141 vpand xmm0,xmm0,xmm15 1142 vpsrlq xmm2,xmm3,4 1143 vpand xmm1,xmm1,xmm15 1144 vpsrlq xmm3,xmm3,30 1145 vpand xmm2,xmm2,xmm15 1146 vpand xmm3,xmm3,xmm15 1147 vpor xmm4,xmm4,XMMWORD[32+rcx] 1148 1149 vpshufd xmm9,XMMWORD[((-64))+rdi],0x32 1150 vpaddq xmm0,xmm0,XMMWORD[r11] 1151 vpaddq xmm1,xmm1,XMMWORD[16+r11] 1152 vpaddq xmm2,xmm2,XMMWORD[32+r11] 1153 vpaddq xmm3,xmm3,XMMWORD[48+r11] 1154 vpaddq xmm4,xmm4,XMMWORD[64+r11] 1155 1156 1157 1158 1159 vpmuludq xmm5,xmm9,xmm0 1160 vpaddq xmm10,xmm10,xmm5 1161 vpmuludq xmm6,xmm9,xmm1 1162 vpaddq xmm11,xmm11,xmm6 1163 vpmuludq xmm5,xmm9,xmm2 1164 vpaddq xmm12,xmm12,xmm5 1165 vpshufd xmm7,XMMWORD[((-48))+rdi],0x32 1166 vpmuludq xmm6,xmm9,xmm3 1167 vpaddq xmm13,xmm13,xmm6 1168 vpmuludq xmm9,xmm9,xmm4 1169 vpaddq xmm14,xmm14,xmm9 1170 1171 vpmuludq xmm5,xmm7,xmm3 1172 vpaddq xmm14,xmm14,xmm5 1173 vpshufd xmm8,XMMWORD[((-32))+rdi],0x32 1174 vpmuludq xmm6,xmm7,xmm2 1175 vpaddq xmm13,xmm13,xmm6 1176 vpshufd xmm9,XMMWORD[((-16))+rdi],0x32 1177 vpmuludq xmm5,xmm7,xmm1 1178 vpaddq xmm12,xmm12,xmm5 1179 vpmuludq xmm7,xmm7,xmm0 1180 vpaddq xmm11,xmm11,xmm7 1181 vpmuludq xmm8,xmm8,xmm4 1182 vpaddq xmm10,xmm10,xmm8 1183 1184 vpshufd xmm7,XMMWORD[rdi],0x32 1185 vpmuludq xmm6,xmm9,xmm2 1186 vpaddq xmm14,xmm14,xmm6 1187 vpmuludq xmm5,xmm9,xmm1 1188 vpaddq xmm13,xmm13,xmm5 1189 vpshufd xmm8,XMMWORD[16+rdi],0x32 1190 vpmuludq xmm9,xmm9,xmm0 1191 vpaddq xmm12,xmm12,xmm9 1192 vpmuludq xmm6,xmm7,xmm4 1193 vpaddq xmm11,xmm11,xmm6 1194 vpshufd xmm9,XMMWORD[32+rdi],0x32 1195 vpmuludq xmm7,xmm7,xmm3 1196 vpaddq xmm10,xmm10,xmm7 1197 1198 vpmuludq xmm5,xmm8,xmm1 1199 vpaddq xmm14,xmm14,xmm5 1200 vpmuludq xmm8,xmm8,xmm0 1201 vpaddq xmm13,xmm13,xmm8 1202 vpshufd xmm7,XMMWORD[48+rdi],0x32 1203 vpmuludq xmm6,xmm9,xmm4 1204 vpaddq xmm12,xmm12,xmm6 1205 vpshufd xmm8,XMMWORD[64+rdi],0x32 1206 vpmuludq xmm5,xmm9,xmm3 1207 vpaddq xmm11,xmm11,xmm5 1208 vpmuludq xmm9,xmm9,xmm2 1209 vpaddq xmm10,xmm10,xmm9 1210 1211 vpmuludq xmm7,xmm7,xmm0 1212 vpaddq xmm14,xmm14,xmm7 1213 vpmuludq xmm6,xmm8,xmm4 1214 vpaddq xmm13,xmm13,xmm6 1215 vpmuludq xmm5,xmm8,xmm3 1216 vpaddq xmm12,xmm12,xmm5 1217 vpmuludq xmm6,xmm8,xmm2 1218 vpaddq xmm11,xmm11,xmm6 1219 vpmuludq xmm8,xmm8,xmm1 1220 vpaddq xmm10,xmm10,xmm8 1221 1222 $L$short_tail_avx: 1223 1224 1225 1226 vpsrldq xmm9,xmm14,8 1227 vpsrldq xmm8,xmm13,8 1228 vpsrldq xmm6,xmm11,8 1229 vpsrldq xmm5,xmm10,8 1230 vpsrldq xmm7,xmm12,8 1231 vpaddq xmm13,xmm13,xmm8 1232 vpaddq xmm14,xmm14,xmm9 1233 vpaddq xmm10,xmm10,xmm5 1234 vpaddq xmm11,xmm11,xmm6 1235 vpaddq xmm12,xmm12,xmm7 1236 1237 1238 1239 1240 vpsrlq xmm3,xmm13,26 1241 vpand xmm13,xmm13,xmm15 1242 vpaddq xmm14,xmm14,xmm3 1243 1244 vpsrlq xmm0,xmm10,26 1245 vpand xmm10,xmm10,xmm15 1246 vpaddq xmm11,xmm11,xmm0 1247 1248 vpsrlq xmm4,xmm14,26 1249 vpand xmm14,xmm14,xmm15 1250 1251 vpsrlq xmm1,xmm11,26 1252 vpand xmm11,xmm11,xmm15 1253 vpaddq xmm12,xmm12,xmm1 1254 1255 vpaddq xmm10,xmm10,xmm4 1256 vpsllq xmm4,xmm4,2 1257 vpaddq xmm10,xmm10,xmm4 1258 1259 vpsrlq xmm2,xmm12,26 1260 vpand xmm12,xmm12,xmm15 1261 vpaddq xmm13,xmm13,xmm2 1262 1263 vpsrlq xmm0,xmm10,26 1264 vpand xmm10,xmm10,xmm15 1265 vpaddq xmm11,xmm11,xmm0 1266 1267 vpsrlq xmm3,xmm13,26 1268 vpand xmm13,xmm13,xmm15 1269 vpaddq xmm14,xmm14,xmm3 1270 1271 vmovd DWORD[(-112)+rdi],xmm10 1272 vmovd DWORD[(-108)+rdi],xmm11 1273 vmovd DWORD[(-104)+rdi],xmm12 1274 vmovd DWORD[(-100)+rdi],xmm13 1275 vmovd DWORD[(-96)+rdi],xmm14 1276 vmovdqa xmm6,XMMWORD[80+r11] 1277 vmovdqa xmm7,XMMWORD[96+r11] 1278 vmovdqa xmm8,XMMWORD[112+r11] 1279 vmovdqa xmm9,XMMWORD[128+r11] 1280 vmovdqa xmm10,XMMWORD[144+r11] 1281 vmovdqa xmm11,XMMWORD[160+r11] 1282 vmovdqa xmm12,XMMWORD[176+r11] 1283 vmovdqa xmm13,XMMWORD[192+r11] 1284 vmovdqa xmm14,XMMWORD[208+r11] 1285 vmovdqa xmm15,XMMWORD[224+r11] 1286 lea rsp,[248+r11] 1287 $L$do_avx_epilogue: 1288 vzeroupper 1289 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1290 mov rsi,QWORD[16+rsp] 1291 DB 0F3h,0C3h ;repret 1292 1293 $L$SEH_end_poly1305_blocks_avx: 1294 1295 1296 ALIGN 32 1297 poly1305_emit_avx: 1298 mov QWORD[8+rsp],rdi ;WIN64 prologue 1299 mov QWORD[16+rsp],rsi 1300 mov rax,rsp 1301 $L$SEH_begin_poly1305_emit_avx: 1302 mov rdi,rcx 1303 mov rsi,rdx 1304 mov rdx,r8 1305 1306 1307 1308 cmp DWORD[20+rdi],0 1309 je NEAR $L$emit 1310 1311 mov eax,DWORD[rdi] 1312 mov ecx,DWORD[4+rdi] 1313 mov r8d,DWORD[8+rdi] 1314 mov r11d,DWORD[12+rdi] 1315 mov r10d,DWORD[16+rdi] 1316 1317 shl rcx,26 1318 mov r9,r8 1319 shl r8,52 1320 add rax,rcx 1321 shr r9,12 1322 add r8,rax 1323 adc r9,0 1324 1325 shl r11,14 1326 mov rax,r10 1327 shr r10,24 1328 add r9,r11 1329 shl rax,40 1330 add r9,rax 1331 adc r10,0 1332 1333 mov rax,r10 1334 mov rcx,r10 1335 and r10,3 1336 shr rax,2 1337 and rcx,-4 1338 add rax,rcx 1339 add r8,rax 1340 adc r9,0 1341 adc r10,0 1342 1343 mov rax,r8 1344 add r8,5 1345 mov rcx,r9 1346 adc r9,0 1347 adc r10,0 1348 shr r10,2 1349 cmovnz rax,r8 1350 cmovnz rcx,r9 1351 1352 add rax,QWORD[rdx] 1353 adc rcx,QWORD[8+rdx] 1354 mov QWORD[rsi],rax 1355 mov QWORD[8+rsi],rcx 1356 1357 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1358 mov rsi,QWORD[16+rsp] 1359 DB 0F3h,0C3h ;repret 1360 1361 $L$SEH_end_poly1305_emit_avx: 1362 1363 ALIGN 32 1364 poly1305_blocks_avx2: 1365 mov QWORD[8+rsp],rdi ;WIN64 prologue 1366 mov QWORD[16+rsp],rsi 1367 mov rax,rsp 1368 $L$SEH_begin_poly1305_blocks_avx2: 1369 mov rdi,rcx 1370 mov rsi,rdx 1371 mov rdx,r8 1372 mov rcx,r9 1373 1374 1375 1376 mov r8d,DWORD[20+rdi] 1377 cmp rdx,128 1378 jae NEAR $L$blocks_avx2 1379 test r8d,r8d 1380 jz NEAR $L$blocks 1381 1382 $L$blocks_avx2: 1383 and rdx,-16 1384 jz NEAR $L$no_data_avx2 1385 1386 vzeroupper 1387 1388 test r8d,r8d 1389 jz NEAR $L$base2_64_avx2 1390 1391 test rdx,63 1392 jz NEAR $L$even_avx2 1393 1394 push rbx 1395 1396 push rbp 1397 1398 push r12 1399 1400 push r13 1401 1402 push r14 1403 1404 push r15 1405 1406 $L$blocks_avx2_body: 1407 1408 mov r15,rdx 1409 1410 mov r8,QWORD[rdi] 1411 mov r9,QWORD[8+rdi] 1412 mov ebp,DWORD[16+rdi] 1413 1414 mov r11,QWORD[24+rdi] 1415 mov r13,QWORD[32+rdi] 1416 1417 1418 mov r14d,r8d 1419 and r8,-2147483648 1420 mov r12,r9 1421 mov ebx,r9d 1422 and r9,-2147483648 1423 1424 shr r8,6 1425 shl r12,52 1426 add r14,r8 1427 shr rbx,12 1428 shr r9,18 1429 add r14,r12 1430 adc rbx,r9 1431 1432 mov r8,rbp 1433 shl r8,40 1434 shr rbp,24 1435 add rbx,r8 1436 adc rbp,0 1437 1438 mov r9,-4 1439 mov r8,rbp 1440 and r9,rbp 1441 shr r8,2 1442 and rbp,3 1443 add r8,r9 1444 add r14,r8 1445 adc rbx,0 1446 adc rbp,0 1447 1448 mov r12,r13 1449 mov rax,r13 1450 shr r13,2 1451 add r13,r12 1452 1453 $L$base2_26_pre_avx2: 1454 add r14,QWORD[rsi] 1455 adc rbx,QWORD[8+rsi] 1456 lea rsi,[16+rsi] 1457 adc rbp,rcx 1458 sub r15,16 1459 1460 call __poly1305_block 1461 mov rax,r12 1462 1463 test r15,63 1464 jnz NEAR $L$base2_26_pre_avx2 1465 1466 test rcx,rcx 1467 jz NEAR $L$store_base2_64_avx2 1468 1469 1470 mov rax,r14 1471 mov rdx,r14 1472 shr r14,52 1473 mov r11,rbx 1474 mov r12,rbx 1475 shr rdx,26 1476 and rax,0x3ffffff 1477 shl r11,12 1478 and rdx,0x3ffffff 1479 shr rbx,14 1480 or r14,r11 1481 shl rbp,24 1482 and r14,0x3ffffff 1483 shr r12,40 1484 and rbx,0x3ffffff 1485 or rbp,r12 1486 1487 test r15,r15 1488 jz NEAR $L$store_base2_26_avx2 1489 1490 vmovd xmm0,eax 1491 vmovd xmm1,edx 1492 vmovd xmm2,r14d 1493 vmovd xmm3,ebx 1494 vmovd xmm4,ebp 1495 jmp NEAR $L$proceed_avx2 1496 1497 ALIGN 32 1498 $L$store_base2_64_avx2: 1499 mov QWORD[rdi],r14 1500 mov QWORD[8+rdi],rbx 1501 mov QWORD[16+rdi],rbp 1502 jmp NEAR $L$done_avx2 1503 1504 ALIGN 16 1505 $L$store_base2_26_avx2: 1506 mov DWORD[rdi],eax 1507 mov DWORD[4+rdi],edx 1508 mov DWORD[8+rdi],r14d 1509 mov DWORD[12+rdi],ebx 1510 mov DWORD[16+rdi],ebp 1511 ALIGN 16 1512 $L$done_avx2: 1513 mov r15,QWORD[rsp] 1514 1515 mov r14,QWORD[8+rsp] 1516 1517 mov r13,QWORD[16+rsp] 1518 1519 mov r12,QWORD[24+rsp] 1520 1521 mov rbp,QWORD[32+rsp] 1522 1523 mov rbx,QWORD[40+rsp] 1524 1525 lea rsp,[48+rsp] 1526 1527 $L$no_data_avx2: 1528 $L$blocks_avx2_epilogue: 1529 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1530 mov rsi,QWORD[16+rsp] 1531 DB 0F3h,0C3h ;repret 1532 1533 1534 ALIGN 32 1535 $L$base2_64_avx2: 1536 1537 push rbx 1538 1539 push rbp 1540 1541 push r12 1542 1543 push r13 1544 1545 push r14 1546 1547 push r15 1548 1549 $L$base2_64_avx2_body: 1550 1551 mov r15,rdx 1552 1553 mov r11,QWORD[24+rdi] 1554 mov r13,QWORD[32+rdi] 1555 1556 mov r14,QWORD[rdi] 1557 mov rbx,QWORD[8+rdi] 1558 mov ebp,DWORD[16+rdi] 1559 1560 mov r12,r13 1561 mov rax,r13 1562 shr r13,2 1563 add r13,r12 1564 1565 test rdx,63 1566 jz NEAR $L$init_avx2 1567 1568 $L$base2_64_pre_avx2: 1569 add r14,QWORD[rsi] 1570 adc rbx,QWORD[8+rsi] 1571 lea rsi,[16+rsi] 1572 adc rbp,rcx 1573 sub r15,16 1574 1575 call __poly1305_block 1576 mov rax,r12 1577 1578 test r15,63 1579 jnz NEAR $L$base2_64_pre_avx2 1580 1581 $L$init_avx2: 1582 1583 mov rax,r14 1584 mov rdx,r14 1585 shr r14,52 1586 mov r8,rbx 1587 mov r9,rbx 1588 shr rdx,26 1589 and rax,0x3ffffff 1590 shl r8,12 1591 and rdx,0x3ffffff 1592 shr rbx,14 1593 or r14,r8 1594 shl rbp,24 1595 and r14,0x3ffffff 1596 shr r9,40 1597 and rbx,0x3ffffff 1598 or rbp,r9 1599 1600 vmovd xmm0,eax 1601 vmovd xmm1,edx 1602 vmovd xmm2,r14d 1603 vmovd xmm3,ebx 1604 vmovd xmm4,ebp 1605 mov DWORD[20+rdi],1 1606 1607 call __poly1305_init_avx 1608 1609 $L$proceed_avx2: 1610 mov rdx,r15 1611 mov r10d,DWORD[((OPENSSL_ia32cap_P+8))] 1612 mov r11d,3221291008 1613 1614 mov r15,QWORD[rsp] 1615 1616 mov r14,QWORD[8+rsp] 1617 1618 mov r13,QWORD[16+rsp] 1619 1620 mov r12,QWORD[24+rsp] 1621 1622 mov rbp,QWORD[32+rsp] 1623 1624 mov rbx,QWORD[40+rsp] 1625 1626 lea rax,[48+rsp] 1627 lea rsp,[48+rsp] 1628 1629 $L$base2_64_avx2_epilogue: 1630 jmp NEAR $L$do_avx2 1631 1632 1633 ALIGN 32 1634 $L$even_avx2: 1635 1636 mov r10d,DWORD[((OPENSSL_ia32cap_P+8))] 1637 vmovd xmm0,DWORD[rdi] 1638 vmovd xmm1,DWORD[4+rdi] 1639 vmovd xmm2,DWORD[8+rdi] 1640 vmovd xmm3,DWORD[12+rdi] 1641 vmovd xmm4,DWORD[16+rdi] 1642 1643 $L$do_avx2: 1644 cmp rdx,512 1645 jb NEAR $L$skip_avx512 1646 and r10d,r11d 1647 test r10d,65536 1648 jnz NEAR $L$blocks_avx512 1649 $L$skip_avx512: 1650 lea r11,[((-248))+rsp] 1651 sub rsp,0x1c8 1652 vmovdqa XMMWORD[80+r11],xmm6 1653 vmovdqa XMMWORD[96+r11],xmm7 1654 vmovdqa XMMWORD[112+r11],xmm8 1655 vmovdqa XMMWORD[128+r11],xmm9 1656 vmovdqa XMMWORD[144+r11],xmm10 1657 vmovdqa XMMWORD[160+r11],xmm11 1658 vmovdqa XMMWORD[176+r11],xmm12 1659 vmovdqa XMMWORD[192+r11],xmm13 1660 vmovdqa XMMWORD[208+r11],xmm14 1661 vmovdqa XMMWORD[224+r11],xmm15 1662 $L$do_avx2_body: 1663 lea rcx,[$L$const] 1664 lea rdi,[((48+64))+rdi] 1665 vmovdqa ymm7,YMMWORD[96+rcx] 1666 1667 1668 vmovdqu xmm9,XMMWORD[((-64))+rdi] 1669 and rsp,-512 1670 vmovdqu xmm10,XMMWORD[((-48))+rdi] 1671 vmovdqu xmm6,XMMWORD[((-32))+rdi] 1672 vmovdqu xmm11,XMMWORD[((-16))+rdi] 1673 vmovdqu xmm12,XMMWORD[rdi] 1674 vmovdqu xmm13,XMMWORD[16+rdi] 1675 lea rax,[144+rsp] 1676 vmovdqu xmm14,XMMWORD[32+rdi] 1677 vpermd ymm9,ymm7,ymm9 1678 vmovdqu xmm15,XMMWORD[48+rdi] 1679 vpermd ymm10,ymm7,ymm10 1680 vmovdqu xmm5,XMMWORD[64+rdi] 1681 vpermd ymm6,ymm7,ymm6 1682 vmovdqa YMMWORD[rsp],ymm9 1683 vpermd ymm11,ymm7,ymm11 1684 vmovdqa YMMWORD[(32-144)+rax],ymm10 1685 vpermd ymm12,ymm7,ymm12 1686 vmovdqa YMMWORD[(64-144)+rax],ymm6 1687 vpermd ymm13,ymm7,ymm13 1688 vmovdqa YMMWORD[(96-144)+rax],ymm11 1689 vpermd ymm14,ymm7,ymm14 1690 vmovdqa YMMWORD[(128-144)+rax],ymm12 1691 vpermd ymm15,ymm7,ymm15 1692 vmovdqa YMMWORD[(160-144)+rax],ymm13 1693 vpermd ymm5,ymm7,ymm5 1694 vmovdqa YMMWORD[(192-144)+rax],ymm14 1695 vmovdqa YMMWORD[(224-144)+rax],ymm15 1696 vmovdqa YMMWORD[(256-144)+rax],ymm5 1697 vmovdqa ymm5,YMMWORD[64+rcx] 1698 1699 1700 1701 vmovdqu xmm7,XMMWORD[rsi] 1702 vmovdqu xmm8,XMMWORD[16+rsi] 1703 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],1 1704 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],1 1705 lea rsi,[64+rsi] 1706 1707 vpsrldq ymm9,ymm7,6 1708 vpsrldq ymm10,ymm8,6 1709 vpunpckhqdq ymm6,ymm7,ymm8 1710 vpunpcklqdq ymm9,ymm9,ymm10 1711 vpunpcklqdq ymm7,ymm7,ymm8 1712 1713 vpsrlq ymm10,ymm9,30 1714 vpsrlq ymm9,ymm9,4 1715 vpsrlq ymm8,ymm7,26 1716 vpsrlq ymm6,ymm6,40 1717 vpand ymm9,ymm9,ymm5 1718 vpand ymm7,ymm7,ymm5 1719 vpand ymm8,ymm8,ymm5 1720 vpand ymm10,ymm10,ymm5 1721 vpor ymm6,ymm6,YMMWORD[32+rcx] 1722 1723 vpaddq ymm2,ymm9,ymm2 1724 sub rdx,64 1725 jz NEAR $L$tail_avx2 1726 jmp NEAR $L$oop_avx2 1727 1728 ALIGN 32 1729 $L$oop_avx2: 1730 1731 1732 1733 1734 1735 1736 1737 1738 vpaddq ymm0,ymm7,ymm0 1739 vmovdqa ymm7,YMMWORD[rsp] 1740 vpaddq ymm1,ymm8,ymm1 1741 vmovdqa ymm8,YMMWORD[32+rsp] 1742 vpaddq ymm3,ymm10,ymm3 1743 vmovdqa ymm9,YMMWORD[96+rsp] 1744 vpaddq ymm4,ymm6,ymm4 1745 vmovdqa ymm10,YMMWORD[48+rax] 1746 vmovdqa ymm5,YMMWORD[112+rax] 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 vpmuludq ymm13,ymm7,ymm2 1764 vpmuludq ymm14,ymm8,ymm2 1765 vpmuludq ymm15,ymm9,ymm2 1766 vpmuludq ymm11,ymm10,ymm2 1767 vpmuludq ymm12,ymm5,ymm2 1768 1769 vpmuludq ymm6,ymm8,ymm0 1770 vpmuludq ymm2,ymm8,ymm1 1771 vpaddq ymm12,ymm12,ymm6 1772 vpaddq ymm13,ymm13,ymm2 1773 vpmuludq ymm6,ymm8,ymm3 1774 vpmuludq ymm2,ymm4,YMMWORD[64+rsp] 1775 vpaddq ymm15,ymm15,ymm6 1776 vpaddq ymm11,ymm11,ymm2 1777 vmovdqa ymm8,YMMWORD[((-16))+rax] 1778 1779 vpmuludq ymm6,ymm7,ymm0 1780 vpmuludq ymm2,ymm7,ymm1 1781 vpaddq ymm11,ymm11,ymm6 1782 vpaddq ymm12,ymm12,ymm2 1783 vpmuludq ymm6,ymm7,ymm3 1784 vpmuludq ymm2,ymm7,ymm4 1785 vmovdqu xmm7,XMMWORD[rsi] 1786 vpaddq ymm14,ymm14,ymm6 1787 vpaddq ymm15,ymm15,ymm2 1788 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],1 1789 1790 vpmuludq ymm6,ymm8,ymm3 1791 vpmuludq ymm2,ymm8,ymm4 1792 vmovdqu xmm8,XMMWORD[16+rsi] 1793 vpaddq ymm11,ymm11,ymm6 1794 vpaddq ymm12,ymm12,ymm2 1795 vmovdqa ymm2,YMMWORD[16+rax] 1796 vpmuludq ymm6,ymm9,ymm1 1797 vpmuludq ymm9,ymm9,ymm0 1798 vpaddq ymm14,ymm14,ymm6 1799 vpaddq ymm13,ymm13,ymm9 1800 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],1 1801 lea rsi,[64+rsi] 1802 1803 vpmuludq ymm6,ymm2,ymm1 1804 vpmuludq ymm2,ymm2,ymm0 1805 vpsrldq ymm9,ymm7,6 1806 vpaddq ymm15,ymm15,ymm6 1807 vpaddq ymm14,ymm14,ymm2 1808 vpmuludq ymm6,ymm10,ymm3 1809 vpmuludq ymm2,ymm10,ymm4 1810 vpsrldq ymm10,ymm8,6 1811 vpaddq ymm12,ymm12,ymm6 1812 vpaddq ymm13,ymm13,ymm2 1813 vpunpckhqdq ymm6,ymm7,ymm8 1814 1815 vpmuludq ymm3,ymm5,ymm3 1816 vpmuludq ymm4,ymm5,ymm4 1817 vpunpcklqdq ymm7,ymm7,ymm8 1818 vpaddq ymm2,ymm13,ymm3 1819 vpaddq ymm3,ymm14,ymm4 1820 vpunpcklqdq ymm10,ymm9,ymm10 1821 vpmuludq ymm4,ymm0,YMMWORD[80+rax] 1822 vpmuludq ymm0,ymm5,ymm1 1823 vmovdqa ymm5,YMMWORD[64+rcx] 1824 vpaddq ymm4,ymm15,ymm4 1825 vpaddq ymm0,ymm11,ymm0 1826 1827 1828 1829 1830 vpsrlq ymm14,ymm3,26 1831 vpand ymm3,ymm3,ymm5 1832 vpaddq ymm4,ymm4,ymm14 1833 1834 vpsrlq ymm11,ymm0,26 1835 vpand ymm0,ymm0,ymm5 1836 vpaddq ymm1,ymm12,ymm11 1837 1838 vpsrlq ymm15,ymm4,26 1839 vpand ymm4,ymm4,ymm5 1840 1841 vpsrlq ymm9,ymm10,4 1842 1843 vpsrlq ymm12,ymm1,26 1844 vpand ymm1,ymm1,ymm5 1845 vpaddq ymm2,ymm2,ymm12 1846 1847 vpaddq ymm0,ymm0,ymm15 1848 vpsllq ymm15,ymm15,2 1849 vpaddq ymm0,ymm0,ymm15 1850 1851 vpand ymm9,ymm9,ymm5 1852 vpsrlq ymm8,ymm7,26 1853 1854 vpsrlq ymm13,ymm2,26 1855 vpand ymm2,ymm2,ymm5 1856 vpaddq ymm3,ymm3,ymm13 1857 1858 vpaddq ymm2,ymm2,ymm9 1859 vpsrlq ymm10,ymm10,30 1860 1861 vpsrlq ymm11,ymm0,26 1862 vpand ymm0,ymm0,ymm5 1863 vpaddq ymm1,ymm1,ymm11 1864 1865 vpsrlq ymm6,ymm6,40 1866 1867 vpsrlq ymm14,ymm3,26 1868 vpand ymm3,ymm3,ymm5 1869 vpaddq ymm4,ymm4,ymm14 1870 1871 vpand ymm7,ymm7,ymm5 1872 vpand ymm8,ymm8,ymm5 1873 vpand ymm10,ymm10,ymm5 1874 vpor ymm6,ymm6,YMMWORD[32+rcx] 1875 1876 sub rdx,64 1877 jnz NEAR $L$oop_avx2 1878 1879 DB 0x66,0x90 1880 $L$tail_avx2: 1881 1882 1883 1884 1885 1886 1887 1888 vpaddq ymm0,ymm7,ymm0 1889 vmovdqu ymm7,YMMWORD[4+rsp] 1890 vpaddq ymm1,ymm8,ymm1 1891 vmovdqu ymm8,YMMWORD[36+rsp] 1892 vpaddq ymm3,ymm10,ymm3 1893 vmovdqu ymm9,YMMWORD[100+rsp] 1894 vpaddq ymm4,ymm6,ymm4 1895 vmovdqu ymm10,YMMWORD[52+rax] 1896 vmovdqu ymm5,YMMWORD[116+rax] 1897 1898 vpmuludq ymm13,ymm7,ymm2 1899 vpmuludq ymm14,ymm8,ymm2 1900 vpmuludq ymm15,ymm9,ymm2 1901 vpmuludq ymm11,ymm10,ymm2 1902 vpmuludq ymm12,ymm5,ymm2 1903 1904 vpmuludq ymm6,ymm8,ymm0 1905 vpmuludq ymm2,ymm8,ymm1 1906 vpaddq ymm12,ymm12,ymm6 1907 vpaddq ymm13,ymm13,ymm2 1908 vpmuludq ymm6,ymm8,ymm3 1909 vpmuludq ymm2,ymm4,YMMWORD[68+rsp] 1910 vpaddq ymm15,ymm15,ymm6 1911 vpaddq ymm11,ymm11,ymm2 1912 1913 vpmuludq ymm6,ymm7,ymm0 1914 vpmuludq ymm2,ymm7,ymm1 1915 vpaddq ymm11,ymm11,ymm6 1916 vmovdqu ymm8,YMMWORD[((-12))+rax] 1917 vpaddq ymm12,ymm12,ymm2 1918 vpmuludq ymm6,ymm7,ymm3 1919 vpmuludq ymm2,ymm7,ymm4 1920 vpaddq ymm14,ymm14,ymm6 1921 vpaddq ymm15,ymm15,ymm2 1922 1923 vpmuludq ymm6,ymm8,ymm3 1924 vpmuludq ymm2,ymm8,ymm4 1925 vpaddq ymm11,ymm11,ymm6 1926 vpaddq ymm12,ymm12,ymm2 1927 vmovdqu ymm2,YMMWORD[20+rax] 1928 vpmuludq ymm6,ymm9,ymm1 1929 vpmuludq ymm9,ymm9,ymm0 1930 vpaddq ymm14,ymm14,ymm6 1931 vpaddq ymm13,ymm13,ymm9 1932 1933 vpmuludq ymm6,ymm2,ymm1 1934 vpmuludq ymm2,ymm2,ymm0 1935 vpaddq ymm15,ymm15,ymm6 1936 vpaddq ymm14,ymm14,ymm2 1937 vpmuludq ymm6,ymm10,ymm3 1938 vpmuludq ymm2,ymm10,ymm4 1939 vpaddq ymm12,ymm12,ymm6 1940 vpaddq ymm13,ymm13,ymm2 1941 1942 vpmuludq ymm3,ymm5,ymm3 1943 vpmuludq ymm4,ymm5,ymm4 1944 vpaddq ymm2,ymm13,ymm3 1945 vpaddq ymm3,ymm14,ymm4 1946 vpmuludq ymm4,ymm0,YMMWORD[84+rax] 1947 vpmuludq ymm0,ymm5,ymm1 1948 vmovdqa ymm5,YMMWORD[64+rcx] 1949 vpaddq ymm4,ymm15,ymm4 1950 vpaddq ymm0,ymm11,ymm0 1951 1952 1953 1954 1955 vpsrldq ymm8,ymm12,8 1956 vpsrldq ymm9,ymm2,8 1957 vpsrldq ymm10,ymm3,8 1958 vpsrldq ymm6,ymm4,8 1959 vpsrldq ymm7,ymm0,8 1960 vpaddq ymm12,ymm12,ymm8 1961 vpaddq ymm2,ymm2,ymm9 1962 vpaddq ymm3,ymm3,ymm10 1963 vpaddq ymm4,ymm4,ymm6 1964 vpaddq ymm0,ymm0,ymm7 1965 1966 vpermq ymm10,ymm3,0x2 1967 vpermq ymm6,ymm4,0x2 1968 vpermq ymm7,ymm0,0x2 1969 vpermq ymm8,ymm12,0x2 1970 vpermq ymm9,ymm2,0x2 1971 vpaddq ymm3,ymm3,ymm10 1972 vpaddq ymm4,ymm4,ymm6 1973 vpaddq ymm0,ymm0,ymm7 1974 vpaddq ymm12,ymm12,ymm8 1975 vpaddq ymm2,ymm2,ymm9 1976 1977 1978 1979 1980 vpsrlq ymm14,ymm3,26 1981 vpand ymm3,ymm3,ymm5 1982 vpaddq ymm4,ymm4,ymm14 1983 1984 vpsrlq ymm11,ymm0,26 1985 vpand ymm0,ymm0,ymm5 1986 vpaddq ymm1,ymm12,ymm11 1987 1988 vpsrlq ymm15,ymm4,26 1989 vpand ymm4,ymm4,ymm5 1990 1991 vpsrlq ymm12,ymm1,26 1992 vpand ymm1,ymm1,ymm5 1993 vpaddq ymm2,ymm2,ymm12 1994 1995 vpaddq ymm0,ymm0,ymm15 1996 vpsllq ymm15,ymm15,2 1997 vpaddq ymm0,ymm0,ymm15 1998 1999 vpsrlq ymm13,ymm2,26 2000 vpand ymm2,ymm2,ymm5 2001 vpaddq ymm3,ymm3,ymm13 2002 2003 vpsrlq ymm11,ymm0,26 2004 vpand ymm0,ymm0,ymm5 2005 vpaddq ymm1,ymm1,ymm11 2006 2007 vpsrlq ymm14,ymm3,26 2008 vpand ymm3,ymm3,ymm5 2009 vpaddq ymm4,ymm4,ymm14 2010 2011 vmovd DWORD[(-112)+rdi],xmm0 2012 vmovd DWORD[(-108)+rdi],xmm1 2013 vmovd DWORD[(-104)+rdi],xmm2 2014 vmovd DWORD[(-100)+rdi],xmm3 2015 vmovd DWORD[(-96)+rdi],xmm4 2016 vmovdqa xmm6,XMMWORD[80+r11] 2017 vmovdqa xmm7,XMMWORD[96+r11] 2018 vmovdqa xmm8,XMMWORD[112+r11] 2019 vmovdqa xmm9,XMMWORD[128+r11] 2020 vmovdqa xmm10,XMMWORD[144+r11] 2021 vmovdqa xmm11,XMMWORD[160+r11] 2022 vmovdqa xmm12,XMMWORD[176+r11] 2023 vmovdqa xmm13,XMMWORD[192+r11] 2024 vmovdqa xmm14,XMMWORD[208+r11] 2025 vmovdqa xmm15,XMMWORD[224+r11] 2026 lea rsp,[248+r11] 2027 $L$do_avx2_epilogue: 2028 vzeroupper 2029 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2030 mov rsi,QWORD[16+rsp] 2031 DB 0F3h,0C3h ;repret 2032 2033 $L$SEH_end_poly1305_blocks_avx2: 2034 2035 ALIGN 32 2036 poly1305_blocks_avx512: 2037 mov QWORD[8+rsp],rdi ;WIN64 prologue 2038 mov QWORD[16+rsp],rsi 2039 mov rax,rsp 2040 $L$SEH_begin_poly1305_blocks_avx512: 2041 mov rdi,rcx 2042 mov rsi,rdx 2043 mov rdx,r8 2044 mov rcx,r9 2045 2046 2047 2048 $L$blocks_avx512: 2049 mov eax,15 2050 kmovw k2,eax 2051 lea r11,[((-248))+rsp] 2052 sub rsp,0x1c8 2053 vmovdqa XMMWORD[80+r11],xmm6 2054 vmovdqa XMMWORD[96+r11],xmm7 2055 vmovdqa XMMWORD[112+r11],xmm8 2056 vmovdqa XMMWORD[128+r11],xmm9 2057 vmovdqa XMMWORD[144+r11],xmm10 2058 vmovdqa XMMWORD[160+r11],xmm11 2059 vmovdqa XMMWORD[176+r11],xmm12 2060 vmovdqa XMMWORD[192+r11],xmm13 2061 vmovdqa XMMWORD[208+r11],xmm14 2062 vmovdqa XMMWORD[224+r11],xmm15 2063 $L$do_avx512_body: 2064 lea rcx,[$L$const] 2065 lea rdi,[((48+64))+rdi] 2066 vmovdqa ymm9,YMMWORD[96+rcx] 2067 2068 2069 vmovdqu xmm11,XMMWORD[((-64))+rdi] 2070 and rsp,-512 2071 vmovdqu xmm12,XMMWORD[((-48))+rdi] 2072 mov rax,0x20 2073 vmovdqu xmm7,XMMWORD[((-32))+rdi] 2074 vmovdqu xmm13,XMMWORD[((-16))+rdi] 2075 vmovdqu xmm8,XMMWORD[rdi] 2076 vmovdqu xmm14,XMMWORD[16+rdi] 2077 vmovdqu xmm10,XMMWORD[32+rdi] 2078 vmovdqu xmm15,XMMWORD[48+rdi] 2079 vmovdqu xmm6,XMMWORD[64+rdi] 2080 vpermd zmm16,zmm9,zmm11 2081 vpbroadcastq zmm5,QWORD[64+rcx] 2082 vpermd zmm17,zmm9,zmm12 2083 vpermd zmm21,zmm9,zmm7 2084 vpermd zmm18,zmm9,zmm13 2085 vmovdqa64 ZMMWORD[rsp]{k2},zmm16 2086 vpsrlq zmm7,zmm16,32 2087 vpermd zmm22,zmm9,zmm8 2088 vmovdqu64 ZMMWORD[rax*1+rsp]{k2},zmm17 2089 vpsrlq zmm8,zmm17,32 2090 vpermd zmm19,zmm9,zmm14 2091 vmovdqa64 ZMMWORD[64+rsp]{k2},zmm21 2092 vpermd zmm23,zmm9,zmm10 2093 vpermd zmm20,zmm9,zmm15 2094 vmovdqu64 ZMMWORD[64+rax*1+rsp]{k2},zmm18 2095 vpermd zmm24,zmm9,zmm6 2096 vmovdqa64 ZMMWORD[128+rsp]{k2},zmm22 2097 vmovdqu64 ZMMWORD[128+rax*1+rsp]{k2},zmm19 2098 vmovdqa64 ZMMWORD[192+rsp]{k2},zmm23 2099 vmovdqu64 ZMMWORD[192+rax*1+rsp]{k2},zmm20 2100 vmovdqa64 ZMMWORD[256+rsp]{k2},zmm24 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 vpmuludq zmm11,zmm16,zmm7 2112 vpmuludq zmm12,zmm17,zmm7 2113 vpmuludq zmm13,zmm18,zmm7 2114 vpmuludq zmm14,zmm19,zmm7 2115 vpmuludq zmm15,zmm20,zmm7 2116 vpsrlq zmm9,zmm18,32 2117 2118 vpmuludq zmm25,zmm24,zmm8 2119 vpmuludq zmm26,zmm16,zmm8 2120 vpmuludq zmm27,zmm17,zmm8 2121 vpmuludq zmm28,zmm18,zmm8 2122 vpmuludq zmm29,zmm19,zmm8 2123 vpsrlq zmm10,zmm19,32 2124 vpaddq zmm11,zmm11,zmm25 2125 vpaddq zmm12,zmm12,zmm26 2126 vpaddq zmm13,zmm13,zmm27 2127 vpaddq zmm14,zmm14,zmm28 2128 vpaddq zmm15,zmm15,zmm29 2129 2130 vpmuludq zmm25,zmm23,zmm9 2131 vpmuludq zmm26,zmm24,zmm9 2132 vpmuludq zmm28,zmm17,zmm9 2133 vpmuludq zmm29,zmm18,zmm9 2134 vpmuludq zmm27,zmm16,zmm9 2135 vpsrlq zmm6,zmm20,32 2136 vpaddq zmm11,zmm11,zmm25 2137 vpaddq zmm12,zmm12,zmm26 2138 vpaddq zmm14,zmm14,zmm28 2139 vpaddq zmm15,zmm15,zmm29 2140 vpaddq zmm13,zmm13,zmm27 2141 2142 vpmuludq zmm25,zmm22,zmm10 2143 vpmuludq zmm28,zmm16,zmm10 2144 vpmuludq zmm29,zmm17,zmm10 2145 vpmuludq zmm26,zmm23,zmm10 2146 vpmuludq zmm27,zmm24,zmm10 2147 vpaddq zmm11,zmm11,zmm25 2148 vpaddq zmm14,zmm14,zmm28 2149 vpaddq zmm15,zmm15,zmm29 2150 vpaddq zmm12,zmm12,zmm26 2151 vpaddq zmm13,zmm13,zmm27 2152 2153 vpmuludq zmm28,zmm24,zmm6 2154 vpmuludq zmm29,zmm16,zmm6 2155 vpmuludq zmm25,zmm21,zmm6 2156 vpmuludq zmm26,zmm22,zmm6 2157 vpmuludq zmm27,zmm23,zmm6 2158 vpaddq zmm14,zmm14,zmm28 2159 vpaddq zmm15,zmm15,zmm29 2160 vpaddq zmm11,zmm11,zmm25 2161 vpaddq zmm12,zmm12,zmm26 2162 vpaddq zmm13,zmm13,zmm27 2163 2164 2165 2166 vmovdqu64 zmm10,ZMMWORD[rsi] 2167 vmovdqu64 zmm6,ZMMWORD[64+rsi] 2168 lea rsi,[128+rsi] 2169 2170 2171 2172 2173 vpsrlq zmm28,zmm14,26 2174 vpandq zmm14,zmm14,zmm5 2175 vpaddq zmm15,zmm15,zmm28 2176 2177 vpsrlq zmm25,zmm11,26 2178 vpandq zmm11,zmm11,zmm5 2179 vpaddq zmm12,zmm12,zmm25 2180 2181 vpsrlq zmm29,zmm15,26 2182 vpandq zmm15,zmm15,zmm5 2183 2184 vpsrlq zmm26,zmm12,26 2185 vpandq zmm12,zmm12,zmm5 2186 vpaddq zmm13,zmm13,zmm26 2187 2188 vpaddq zmm11,zmm11,zmm29 2189 vpsllq zmm29,zmm29,2 2190 vpaddq zmm11,zmm11,zmm29 2191 2192 vpsrlq zmm27,zmm13,26 2193 vpandq zmm13,zmm13,zmm5 2194 vpaddq zmm14,zmm14,zmm27 2195 2196 vpsrlq zmm25,zmm11,26 2197 vpandq zmm11,zmm11,zmm5 2198 vpaddq zmm12,zmm12,zmm25 2199 2200 vpsrlq zmm28,zmm14,26 2201 vpandq zmm14,zmm14,zmm5 2202 vpaddq zmm15,zmm15,zmm28 2203 2204 2205 2206 2207 2208 vpunpcklqdq zmm7,zmm10,zmm6 2209 vpunpckhqdq zmm6,zmm10,zmm6 2210 2211 2212 2213 2214 2215 2216 vmovdqa32 zmm25,ZMMWORD[128+rcx] 2217 mov eax,0x7777 2218 kmovw k1,eax 2219 2220 vpermd zmm16,zmm25,zmm16 2221 vpermd zmm17,zmm25,zmm17 2222 vpermd zmm18,zmm25,zmm18 2223 vpermd zmm19,zmm25,zmm19 2224 vpermd zmm20,zmm25,zmm20 2225 2226 vpermd zmm16{k1},zmm25,zmm11 2227 vpermd zmm17{k1},zmm25,zmm12 2228 vpermd zmm18{k1},zmm25,zmm13 2229 vpermd zmm19{k1},zmm25,zmm14 2230 vpermd zmm20{k1},zmm25,zmm15 2231 2232 vpslld zmm21,zmm17,2 2233 vpslld zmm22,zmm18,2 2234 vpslld zmm23,zmm19,2 2235 vpslld zmm24,zmm20,2 2236 vpaddd zmm21,zmm21,zmm17 2237 vpaddd zmm22,zmm22,zmm18 2238 vpaddd zmm23,zmm23,zmm19 2239 vpaddd zmm24,zmm24,zmm20 2240 2241 vpbroadcastq zmm30,QWORD[32+rcx] 2242 2243 vpsrlq zmm9,zmm7,52 2244 vpsllq zmm10,zmm6,12 2245 vporq zmm9,zmm9,zmm10 2246 vpsrlq zmm8,zmm7,26 2247 vpsrlq zmm10,zmm6,14 2248 vpsrlq zmm6,zmm6,40 2249 vpandq zmm9,zmm9,zmm5 2250 vpandq zmm7,zmm7,zmm5 2251 2252 2253 2254 2255 vpaddq zmm2,zmm9,zmm2 2256 sub rdx,192 2257 jbe NEAR $L$tail_avx512 2258 jmp NEAR $L$oop_avx512 2259 2260 ALIGN 32 2261 $L$oop_avx512: 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 vpmuludq zmm14,zmm17,zmm2 2291 vpaddq zmm0,zmm7,zmm0 2292 vpmuludq zmm15,zmm18,zmm2 2293 vpandq zmm8,zmm8,zmm5 2294 vpmuludq zmm11,zmm23,zmm2 2295 vpandq zmm10,zmm10,zmm5 2296 vpmuludq zmm12,zmm24,zmm2 2297 vporq zmm6,zmm6,zmm30 2298 vpmuludq zmm13,zmm16,zmm2 2299 vpaddq zmm1,zmm8,zmm1 2300 vpaddq zmm3,zmm10,zmm3 2301 vpaddq zmm4,zmm6,zmm4 2302 2303 vmovdqu64 zmm10,ZMMWORD[rsi] 2304 vmovdqu64 zmm6,ZMMWORD[64+rsi] 2305 lea rsi,[128+rsi] 2306 vpmuludq zmm28,zmm19,zmm0 2307 vpmuludq zmm29,zmm20,zmm0 2308 vpmuludq zmm25,zmm16,zmm0 2309 vpmuludq zmm26,zmm17,zmm0 2310 vpaddq zmm14,zmm14,zmm28 2311 vpaddq zmm15,zmm15,zmm29 2312 vpaddq zmm11,zmm11,zmm25 2313 vpaddq zmm12,zmm12,zmm26 2314 2315 vpmuludq zmm28,zmm18,zmm1 2316 vpmuludq zmm29,zmm19,zmm1 2317 vpmuludq zmm25,zmm24,zmm1 2318 vpmuludq zmm27,zmm18,zmm0 2319 vpaddq zmm14,zmm14,zmm28 2320 vpaddq zmm15,zmm15,zmm29 2321 vpaddq zmm11,zmm11,zmm25 2322 vpaddq zmm13,zmm13,zmm27 2323 2324 vpunpcklqdq zmm7,zmm10,zmm6 2325 vpunpckhqdq zmm6,zmm10,zmm6 2326 2327 vpmuludq zmm28,zmm16,zmm3 2328 vpmuludq zmm29,zmm17,zmm3 2329 vpmuludq zmm26,zmm16,zmm1 2330 vpmuludq zmm27,zmm17,zmm1 2331 vpaddq zmm14,zmm14,zmm28 2332 vpaddq zmm15,zmm15,zmm29 2333 vpaddq zmm12,zmm12,zmm26 2334 vpaddq zmm13,zmm13,zmm27 2335 2336 vpmuludq zmm28,zmm24,zmm4 2337 vpmuludq zmm29,zmm16,zmm4 2338 vpmuludq zmm25,zmm22,zmm3 2339 vpmuludq zmm26,zmm23,zmm3 2340 vpaddq zmm14,zmm14,zmm28 2341 vpmuludq zmm27,zmm24,zmm3 2342 vpaddq zmm15,zmm15,zmm29 2343 vpaddq zmm11,zmm11,zmm25 2344 vpaddq zmm12,zmm12,zmm26 2345 vpaddq zmm13,zmm13,zmm27 2346 2347 vpmuludq zmm25,zmm21,zmm4 2348 vpmuludq zmm26,zmm22,zmm4 2349 vpmuludq zmm27,zmm23,zmm4 2350 vpaddq zmm0,zmm11,zmm25 2351 vpaddq zmm1,zmm12,zmm26 2352 vpaddq zmm2,zmm13,zmm27 2353 2354 2355 2356 2357 vpsrlq zmm9,zmm7,52 2358 vpsllq zmm10,zmm6,12 2359 2360 vpsrlq zmm3,zmm14,26 2361 vpandq zmm14,zmm14,zmm5 2362 vpaddq zmm4,zmm15,zmm3 2363 2364 vporq zmm9,zmm9,zmm10 2365 2366 vpsrlq zmm11,zmm0,26 2367 vpandq zmm0,zmm0,zmm5 2368 vpaddq zmm1,zmm1,zmm11 2369 2370 vpandq zmm9,zmm9,zmm5 2371 2372 vpsrlq zmm15,zmm4,26 2373 vpandq zmm4,zmm4,zmm5 2374 2375 vpsrlq zmm12,zmm1,26 2376 vpandq zmm1,zmm1,zmm5 2377 vpaddq zmm2,zmm2,zmm12 2378 2379 vpaddq zmm0,zmm0,zmm15 2380 vpsllq zmm15,zmm15,2 2381 vpaddq zmm0,zmm0,zmm15 2382 2383 vpaddq zmm2,zmm2,zmm9 2384 vpsrlq zmm8,zmm7,26 2385 2386 vpsrlq zmm13,zmm2,26 2387 vpandq zmm2,zmm2,zmm5 2388 vpaddq zmm3,zmm14,zmm13 2389 2390 vpsrlq zmm10,zmm6,14 2391 2392 vpsrlq zmm11,zmm0,26 2393 vpandq zmm0,zmm0,zmm5 2394 vpaddq zmm1,zmm1,zmm11 2395 2396 vpsrlq zmm6,zmm6,40 2397 2398 vpsrlq zmm14,zmm3,26 2399 vpandq zmm3,zmm3,zmm5 2400 vpaddq zmm4,zmm4,zmm14 2401 2402 vpandq zmm7,zmm7,zmm5 2403 2404 2405 2406 2407 sub rdx,128 2408 ja NEAR $L$oop_avx512 2409 2410 $L$tail_avx512: 2411 2412 2413 2414 2415 2416 vpsrlq zmm16,zmm16,32 2417 vpsrlq zmm17,zmm17,32 2418 vpsrlq zmm18,zmm18,32 2419 vpsrlq zmm23,zmm23,32 2420 vpsrlq zmm24,zmm24,32 2421 vpsrlq zmm19,zmm19,32 2422 vpsrlq zmm20,zmm20,32 2423 vpsrlq zmm21,zmm21,32 2424 vpsrlq zmm22,zmm22,32 2425 2426 2427 2428 lea rsi,[rdx*1+rsi] 2429 2430 2431 vpaddq zmm0,zmm7,zmm0 2432 2433 vpmuludq zmm14,zmm17,zmm2 2434 vpmuludq zmm15,zmm18,zmm2 2435 vpmuludq zmm11,zmm23,zmm2 2436 vpandq zmm8,zmm8,zmm5 2437 vpmuludq zmm12,zmm24,zmm2 2438 vpandq zmm10,zmm10,zmm5 2439 vpmuludq zmm13,zmm16,zmm2 2440 vporq zmm6,zmm6,zmm30 2441 vpaddq zmm1,zmm8,zmm1 2442 vpaddq zmm3,zmm10,zmm3 2443 vpaddq zmm4,zmm6,zmm4 2444 2445 vmovdqu xmm7,XMMWORD[rsi] 2446 vpmuludq zmm28,zmm19,zmm0 2447 vpmuludq zmm29,zmm20,zmm0 2448 vpmuludq zmm25,zmm16,zmm0 2449 vpmuludq zmm26,zmm17,zmm0 2450 vpaddq zmm14,zmm14,zmm28 2451 vpaddq zmm15,zmm15,zmm29 2452 vpaddq zmm11,zmm11,zmm25 2453 vpaddq zmm12,zmm12,zmm26 2454 2455 vmovdqu xmm8,XMMWORD[16+rsi] 2456 vpmuludq zmm28,zmm18,zmm1 2457 vpmuludq zmm29,zmm19,zmm1 2458 vpmuludq zmm25,zmm24,zmm1 2459 vpmuludq zmm27,zmm18,zmm0 2460 vpaddq zmm14,zmm14,zmm28 2461 vpaddq zmm15,zmm15,zmm29 2462 vpaddq zmm11,zmm11,zmm25 2463 vpaddq zmm13,zmm13,zmm27 2464 2465 vinserti128 ymm7,ymm7,XMMWORD[32+rsi],1 2466 vpmuludq zmm28,zmm16,zmm3 2467 vpmuludq zmm29,zmm17,zmm3 2468 vpmuludq zmm26,zmm16,zmm1 2469 vpmuludq zmm27,zmm17,zmm1 2470 vpaddq zmm14,zmm14,zmm28 2471 vpaddq zmm15,zmm15,zmm29 2472 vpaddq zmm12,zmm12,zmm26 2473 vpaddq zmm13,zmm13,zmm27 2474 2475 vinserti128 ymm8,ymm8,XMMWORD[48+rsi],1 2476 vpmuludq zmm28,zmm24,zmm4 2477 vpmuludq zmm29,zmm16,zmm4 2478 vpmuludq zmm25,zmm22,zmm3 2479 vpmuludq zmm26,zmm23,zmm3 2480 vpmuludq zmm27,zmm24,zmm3 2481 vpaddq zmm3,zmm14,zmm28 2482 vpaddq zmm15,zmm15,zmm29 2483 vpaddq zmm11,zmm11,zmm25 2484 vpaddq zmm12,zmm12,zmm26 2485 vpaddq zmm13,zmm13,zmm27 2486 2487 vpmuludq zmm25,zmm21,zmm4 2488 vpmuludq zmm26,zmm22,zmm4 2489 vpmuludq zmm27,zmm23,zmm4 2490 vpaddq zmm0,zmm11,zmm25 2491 vpaddq zmm1,zmm12,zmm26 2492 vpaddq zmm2,zmm13,zmm27 2493 2494 2495 2496 2497 mov eax,1 2498 vpermq zmm14,zmm3,0xb1 2499 vpermq zmm4,zmm15,0xb1 2500 vpermq zmm11,zmm0,0xb1 2501 vpermq zmm12,zmm1,0xb1 2502 vpermq zmm13,zmm2,0xb1 2503 vpaddq zmm3,zmm3,zmm14 2504 vpaddq zmm4,zmm4,zmm15 2505 vpaddq zmm0,zmm0,zmm11 2506 vpaddq zmm1,zmm1,zmm12 2507 vpaddq zmm2,zmm2,zmm13 2508 2509 kmovw k3,eax 2510 vpermq zmm14,zmm3,0x2 2511 vpermq zmm15,zmm4,0x2 2512 vpermq zmm11,zmm0,0x2 2513 vpermq zmm12,zmm1,0x2 2514 vpermq zmm13,zmm2,0x2 2515 vpaddq zmm3,zmm3,zmm14 2516 vpaddq zmm4,zmm4,zmm15 2517 vpaddq zmm0,zmm0,zmm11 2518 vpaddq zmm1,zmm1,zmm12 2519 vpaddq zmm2,zmm2,zmm13 2520 2521 vextracti64x4 ymm14,zmm3,0x1 2522 vextracti64x4 ymm15,zmm4,0x1 2523 vextracti64x4 ymm11,zmm0,0x1 2524 vextracti64x4 ymm12,zmm1,0x1 2525 vextracti64x4 ymm13,zmm2,0x1 2526 vpaddq zmm3{k3}{z},zmm3,zmm14 2527 vpaddq zmm4{k3}{z},zmm4,zmm15 2528 vpaddq zmm0{k3}{z},zmm0,zmm11 2529 vpaddq zmm1{k3}{z},zmm1,zmm12 2530 vpaddq zmm2{k3}{z},zmm2,zmm13 2531 2532 2533 2534 vpsrlq ymm14,ymm3,26 2535 vpand ymm3,ymm3,ymm5 2536 vpsrldq ymm9,ymm7,6 2537 vpsrldq ymm10,ymm8,6 2538 vpunpckhqdq ymm6,ymm7,ymm8 2539 vpaddq ymm4,ymm4,ymm14 2540 2541 vpsrlq ymm11,ymm0,26 2542 vpand ymm0,ymm0,ymm5 2543 vpunpcklqdq ymm9,ymm9,ymm10 2544 vpunpcklqdq ymm7,ymm7,ymm8 2545 vpaddq ymm1,ymm1,ymm11 2546 2547 vpsrlq ymm15,ymm4,26 2548 vpand ymm4,ymm4,ymm5 2549 2550 vpsrlq ymm12,ymm1,26 2551 vpand ymm1,ymm1,ymm5 2552 vpsrlq ymm10,ymm9,30 2553 vpsrlq ymm9,ymm9,4 2554 vpaddq ymm2,ymm2,ymm12 2555 2556 vpaddq ymm0,ymm0,ymm15 2557 vpsllq ymm15,ymm15,2 2558 vpsrlq ymm8,ymm7,26 2559 vpsrlq ymm6,ymm6,40 2560 vpaddq ymm0,ymm0,ymm15 2561 2562 vpsrlq ymm13,ymm2,26 2563 vpand ymm2,ymm2,ymm5 2564 vpand ymm9,ymm9,ymm5 2565 vpand ymm7,ymm7,ymm5 2566 vpaddq ymm3,ymm3,ymm13 2567 2568 vpsrlq ymm11,ymm0,26 2569 vpand ymm0,ymm0,ymm5 2570 vpaddq ymm2,ymm9,ymm2 2571 vpand ymm8,ymm8,ymm5 2572 vpaddq ymm1,ymm1,ymm11 2573 2574 vpsrlq ymm14,ymm3,26 2575 vpand ymm3,ymm3,ymm5 2576 vpand ymm10,ymm10,ymm5 2577 vpor ymm6,ymm6,YMMWORD[32+rcx] 2578 vpaddq ymm4,ymm4,ymm14 2579 2580 lea rax,[144+rsp] 2581 add rdx,64 2582 jnz NEAR $L$tail_avx2 2583 2584 vpsubq ymm2,ymm2,ymm9 2585 vmovd DWORD[(-112)+rdi],xmm0 2586 vmovd DWORD[(-108)+rdi],xmm1 2587 vmovd DWORD[(-104)+rdi],xmm2 2588 vmovd DWORD[(-100)+rdi],xmm3 2589 vmovd DWORD[(-96)+rdi],xmm4 2590 vzeroall 2591 movdqa xmm6,XMMWORD[80+r11] 2592 movdqa xmm7,XMMWORD[96+r11] 2593 movdqa xmm8,XMMWORD[112+r11] 2594 movdqa xmm9,XMMWORD[128+r11] 2595 movdqa xmm10,XMMWORD[144+r11] 2596 movdqa xmm11,XMMWORD[160+r11] 2597 movdqa xmm12,XMMWORD[176+r11] 2598 movdqa xmm13,XMMWORD[192+r11] 2599 movdqa xmm14,XMMWORD[208+r11] 2600 movdqa xmm15,XMMWORD[224+r11] 2601 lea rsp,[248+r11] 2602 $L$do_avx512_epilogue: 2603 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2604 mov rsi,QWORD[16+rsp] 2605 DB 0F3h,0C3h ;repret 2606 2607 $L$SEH_end_poly1305_blocks_avx512: 2608 2609 ALIGN 32 2610 poly1305_init_base2_44: 2611 mov QWORD[8+rsp],rdi ;WIN64 prologue 2612 mov QWORD[16+rsp],rsi 2613 mov rax,rsp 2614 $L$SEH_begin_poly1305_init_base2_44: 2615 mov rdi,rcx 2616 mov rsi,rdx 2617 mov rdx,r8 2618 2619 2620 2621 xor rax,rax 2622 mov QWORD[rdi],rax 2623 mov QWORD[8+rdi],rax 2624 mov QWORD[16+rdi],rax 2625 2626 $L$init_base2_44: 2627 lea r10,[poly1305_blocks_vpmadd52] 2628 lea r11,[poly1305_emit_base2_44] 2629 2630 mov rax,0x0ffffffc0fffffff 2631 mov rcx,0x0ffffffc0ffffffc 2632 and rax,QWORD[rsi] 2633 mov r8,0x00000fffffffffff 2634 and rcx,QWORD[8+rsi] 2635 mov r9,0x00000fffffffffff 2636 and r8,rax 2637 shrd rax,rcx,44 2638 mov QWORD[40+rdi],r8 2639 and rax,r9 2640 shr rcx,24 2641 mov QWORD[48+rdi],rax 2642 lea rax,[rax*4+rax] 2643 mov QWORD[56+rdi],rcx 2644 shl rax,2 2645 lea rcx,[rcx*4+rcx] 2646 shl rcx,2 2647 mov QWORD[24+rdi],rax 2648 mov QWORD[32+rdi],rcx 2649 mov QWORD[64+rdi],-1 2650 mov QWORD[rdx],r10 2651 mov QWORD[8+rdx],r11 2652 mov eax,1 2653 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2654 mov rsi,QWORD[16+rsp] 2655 DB 0F3h,0C3h ;repret 2656 2657 $L$SEH_end_poly1305_init_base2_44: 2658 2659 ALIGN 32 2660 poly1305_blocks_vpmadd52: 2661 mov QWORD[8+rsp],rdi ;WIN64 prologue 2662 mov QWORD[16+rsp],rsi 2663 mov rax,rsp 2664 $L$SEH_begin_poly1305_blocks_vpmadd52: 2665 mov rdi,rcx 2666 mov rsi,rdx 2667 mov rdx,r8 2668 mov rcx,r9 2669 2670 2671 2672 DB 243,15,30,250 2673 shr rdx,4 2674 jz NEAR $L$no_data_vpmadd52 2675 2676 shl rcx,40 2677 mov r8,QWORD[64+rdi] 2678 2679 2680 2681 2682 2683 2684 mov rax,3 2685 mov r10,1 2686 cmp rdx,4 2687 cmovae rax,r10 2688 test r8,r8 2689 cmovns rax,r10 2690 2691 and rax,rdx 2692 jz NEAR $L$blocks_vpmadd52_4x 2693 2694 sub rdx,rax 2695 mov r10d,7 2696 mov r11d,1 2697 kmovw k7,r10d 2698 lea r10,[$L$2_44_inp_permd] 2699 kmovw k1,r11d 2700 2701 vmovq xmm21,rcx 2702 vmovdqa64 ymm19,YMMWORD[r10] 2703 vmovdqa64 ymm20,YMMWORD[32+r10] 2704 vpermq ymm21,ymm21,0xcf 2705 vmovdqa64 ymm22,YMMWORD[64+r10] 2706 2707 vmovdqu64 ymm16{k7}{z},[rdi] 2708 vmovdqu64 ymm3{k7}{z},[40+rdi] 2709 vmovdqu64 ymm4{k7}{z},[32+rdi] 2710 vmovdqu64 ymm5{k7}{z},[24+rdi] 2711 2712 vmovdqa64 ymm23,YMMWORD[96+r10] 2713 vmovdqa64 ymm24,YMMWORD[128+r10] 2714 2715 jmp NEAR $L$oop_vpmadd52 2716 2717 ALIGN 32 2718 $L$oop_vpmadd52: 2719 vmovdqu32 xmm18,XMMWORD[rsi] 2720 lea rsi,[16+rsi] 2721 2722 vpermd ymm18,ymm19,ymm18 2723 vpsrlvq ymm18,ymm18,ymm20 2724 vpandq ymm18,ymm18,ymm22 2725 vporq ymm18,ymm18,ymm21 2726 2727 vpaddq ymm16,ymm16,ymm18 2728 2729 vpermq ymm0{k7}{z},ymm16,0 2730 vpermq ymm1{k7}{z},ymm16,85 2731 vpermq ymm2{k7}{z},ymm16,170 2732 2733 vpxord ymm16,ymm16,ymm16 2734 vpxord ymm17,ymm17,ymm17 2735 2736 vpmadd52luq ymm16,ymm0,ymm3 2737 vpmadd52huq ymm17,ymm0,ymm3 2738 2739 vpmadd52luq ymm16,ymm1,ymm4 2740 vpmadd52huq ymm17,ymm1,ymm4 2741 2742 vpmadd52luq ymm16,ymm2,ymm5 2743 vpmadd52huq ymm17,ymm2,ymm5 2744 2745 vpsrlvq ymm18,ymm16,ymm23 2746 vpsllvq ymm17,ymm17,ymm24 2747 vpandq ymm16,ymm16,ymm22 2748 2749 vpaddq ymm17,ymm17,ymm18 2750 2751 vpermq ymm17,ymm17,147 2752 2753 vpaddq ymm16,ymm16,ymm17 2754 2755 vpsrlvq ymm18,ymm16,ymm23 2756 vpandq ymm16,ymm16,ymm22 2757 2758 vpermq ymm18,ymm18,147 2759 2760 vpaddq ymm16,ymm16,ymm18 2761 2762 vpermq ymm18{k1}{z},ymm16,147 2763 2764 vpaddq ymm16,ymm16,ymm18 2765 vpsllq ymm18,ymm18,2 2766 2767 vpaddq ymm16,ymm16,ymm18 2768 2769 dec rax 2770 jnz NEAR $L$oop_vpmadd52 2771 2772 vmovdqu64 YMMWORD[rdi]{k7},ymm16 2773 2774 test rdx,rdx 2775 jnz NEAR $L$blocks_vpmadd52_4x 2776 2777 $L$no_data_vpmadd52: 2778 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2779 mov rsi,QWORD[16+rsp] 2780 DB 0F3h,0C3h ;repret 2781 2782 $L$SEH_end_poly1305_blocks_vpmadd52: 2783 2784 ALIGN 32 2785 poly1305_blocks_vpmadd52_4x: 2786 mov QWORD[8+rsp],rdi ;WIN64 prologue 2787 mov QWORD[16+rsp],rsi 2788 mov rax,rsp 2789 $L$SEH_begin_poly1305_blocks_vpmadd52_4x: 2790 mov rdi,rcx 2791 mov rsi,rdx 2792 mov rdx,r8 2793 mov rcx,r9 2794 2795 2796 2797 shr rdx,4 2798 jz NEAR $L$no_data_vpmadd52_4x 2799 2800 shl rcx,40 2801 mov r8,QWORD[64+rdi] 2802 2803 $L$blocks_vpmadd52_4x: 2804 vpbroadcastq ymm31,rcx 2805 2806 vmovdqa64 ymm28,YMMWORD[$L$x_mask44] 2807 mov eax,5 2808 vmovdqa64 ymm29,YMMWORD[$L$x_mask42] 2809 kmovw k1,eax 2810 2811 test r8,r8 2812 js NEAR $L$init_vpmadd52 2813 2814 vmovq xmm0,QWORD[rdi] 2815 vmovq xmm1,QWORD[8+rdi] 2816 vmovq xmm2,QWORD[16+rdi] 2817 2818 test rdx,3 2819 jnz NEAR $L$blocks_vpmadd52_2x_do 2820 2821 $L$blocks_vpmadd52_4x_do: 2822 vpbroadcastq ymm3,QWORD[64+rdi] 2823 vpbroadcastq ymm4,QWORD[96+rdi] 2824 vpbroadcastq ymm5,QWORD[128+rdi] 2825 vpbroadcastq ymm16,QWORD[160+rdi] 2826 2827 $L$blocks_vpmadd52_4x_key_loaded: 2828 vpsllq ymm17,ymm5,2 2829 vpaddq ymm17,ymm17,ymm5 2830 vpsllq ymm17,ymm17,2 2831 2832 test rdx,7 2833 jz NEAR $L$blocks_vpmadd52_8x 2834 2835 vmovdqu64 ymm26,YMMWORD[rsi] 2836 vmovdqu64 ymm27,YMMWORD[32+rsi] 2837 lea rsi,[64+rsi] 2838 2839 vpunpcklqdq ymm25,ymm26,ymm27 2840 vpunpckhqdq ymm27,ymm26,ymm27 2841 2842 2843 2844 vpsrlq ymm26,ymm27,24 2845 vporq ymm26,ymm26,ymm31 2846 vpaddq ymm2,ymm2,ymm26 2847 vpandq ymm24,ymm25,ymm28 2848 vpsrlq ymm25,ymm25,44 2849 vpsllq ymm27,ymm27,20 2850 vporq ymm25,ymm25,ymm27 2851 vpandq ymm25,ymm25,ymm28 2852 2853 sub rdx,4 2854 jz NEAR $L$tail_vpmadd52_4x 2855 jmp NEAR $L$oop_vpmadd52_4x 2856 ud2 2857 2858 ALIGN 32 2859 $L$init_vpmadd52: 2860 vmovq xmm16,QWORD[24+rdi] 2861 vmovq xmm2,QWORD[56+rdi] 2862 vmovq xmm17,QWORD[32+rdi] 2863 vmovq xmm3,QWORD[40+rdi] 2864 vmovq xmm4,QWORD[48+rdi] 2865 2866 vmovdqa ymm0,ymm3 2867 vmovdqa ymm1,ymm4 2868 vmovdqa ymm5,ymm2 2869 2870 mov eax,2 2871 2872 $L$mul_init_vpmadd52: 2873 vpxorq ymm18,ymm18,ymm18 2874 vpmadd52luq ymm18,ymm16,ymm2 2875 vpxorq ymm19,ymm19,ymm19 2876 vpmadd52huq ymm19,ymm16,ymm2 2877 vpxorq ymm20,ymm20,ymm20 2878 vpmadd52luq ymm20,ymm17,ymm2 2879 vpxorq ymm21,ymm21,ymm21 2880 vpmadd52huq ymm21,ymm17,ymm2 2881 vpxorq ymm22,ymm22,ymm22 2882 vpmadd52luq ymm22,ymm3,ymm2 2883 vpxorq ymm23,ymm23,ymm23 2884 vpmadd52huq ymm23,ymm3,ymm2 2885 2886 vpmadd52luq ymm18,ymm3,ymm0 2887 vpmadd52huq ymm19,ymm3,ymm0 2888 vpmadd52luq ymm20,ymm4,ymm0 2889 vpmadd52huq ymm21,ymm4,ymm0 2890 vpmadd52luq ymm22,ymm5,ymm0 2891 vpmadd52huq ymm23,ymm5,ymm0 2892 2893 vpmadd52luq ymm18,ymm17,ymm1 2894 vpmadd52huq ymm19,ymm17,ymm1 2895 vpmadd52luq ymm20,ymm3,ymm1 2896 vpmadd52huq ymm21,ymm3,ymm1 2897 vpmadd52luq ymm22,ymm4,ymm1 2898 vpmadd52huq ymm23,ymm4,ymm1 2899 2900 2901 2902 vpsrlq ymm30,ymm18,44 2903 vpsllq ymm19,ymm19,8 2904 vpandq ymm0,ymm18,ymm28 2905 vpaddq ymm19,ymm19,ymm30 2906 2907 vpaddq ymm20,ymm20,ymm19 2908 2909 vpsrlq ymm30,ymm20,44 2910 vpsllq ymm21,ymm21,8 2911 vpandq ymm1,ymm20,ymm28 2912 vpaddq ymm21,ymm21,ymm30 2913 2914 vpaddq ymm22,ymm22,ymm21 2915 2916 vpsrlq ymm30,ymm22,42 2917 vpsllq ymm23,ymm23,10 2918 vpandq ymm2,ymm22,ymm29 2919 vpaddq ymm23,ymm23,ymm30 2920 2921 vpaddq ymm0,ymm0,ymm23 2922 vpsllq ymm23,ymm23,2 2923 2924 vpaddq ymm0,ymm0,ymm23 2925 2926 vpsrlq ymm30,ymm0,44 2927 vpandq ymm0,ymm0,ymm28 2928 2929 vpaddq ymm1,ymm1,ymm30 2930 2931 dec eax 2932 jz NEAR $L$done_init_vpmadd52 2933 2934 vpunpcklqdq ymm4,ymm1,ymm4 2935 vpbroadcastq xmm1,xmm1 2936 vpunpcklqdq ymm5,ymm2,ymm5 2937 vpbroadcastq xmm2,xmm2 2938 vpunpcklqdq ymm3,ymm0,ymm3 2939 vpbroadcastq xmm0,xmm0 2940 2941 vpsllq ymm16,ymm4,2 2942 vpsllq ymm17,ymm5,2 2943 vpaddq ymm16,ymm16,ymm4 2944 vpaddq ymm17,ymm17,ymm5 2945 vpsllq ymm16,ymm16,2 2946 vpsllq ymm17,ymm17,2 2947 2948 jmp NEAR $L$mul_init_vpmadd52 2949 ud2 2950 2951 ALIGN 32 2952 $L$done_init_vpmadd52: 2953 vinserti128 ymm4,ymm1,xmm4,1 2954 vinserti128 ymm5,ymm2,xmm5,1 2955 vinserti128 ymm3,ymm0,xmm3,1 2956 2957 vpermq ymm4,ymm4,216 2958 vpermq ymm5,ymm5,216 2959 vpermq ymm3,ymm3,216 2960 2961 vpsllq ymm16,ymm4,2 2962 vpaddq ymm16,ymm16,ymm4 2963 vpsllq ymm16,ymm16,2 2964 2965 vmovq xmm0,QWORD[rdi] 2966 vmovq xmm1,QWORD[8+rdi] 2967 vmovq xmm2,QWORD[16+rdi] 2968 2969 test rdx,3 2970 jnz NEAR $L$done_init_vpmadd52_2x 2971 2972 vmovdqu64 YMMWORD[64+rdi],ymm3 2973 vpbroadcastq ymm3,xmm3 2974 vmovdqu64 YMMWORD[96+rdi],ymm4 2975 vpbroadcastq ymm4,xmm4 2976 vmovdqu64 YMMWORD[128+rdi],ymm5 2977 vpbroadcastq ymm5,xmm5 2978 vmovdqu64 YMMWORD[160+rdi],ymm16 2979 vpbroadcastq ymm16,xmm16 2980 2981 jmp NEAR $L$blocks_vpmadd52_4x_key_loaded 2982 ud2 2983 2984 ALIGN 32 2985 $L$done_init_vpmadd52_2x: 2986 vmovdqu64 YMMWORD[64+rdi],ymm3 2987 vpsrldq ymm3,ymm3,8 2988 vmovdqu64 YMMWORD[96+rdi],ymm4 2989 vpsrldq ymm4,ymm4,8 2990 vmovdqu64 YMMWORD[128+rdi],ymm5 2991 vpsrldq ymm5,ymm5,8 2992 vmovdqu64 YMMWORD[160+rdi],ymm16 2993 vpsrldq ymm16,ymm16,8 2994 jmp NEAR $L$blocks_vpmadd52_2x_key_loaded 2995 ud2 2996 2997 ALIGN 32 2998 $L$blocks_vpmadd52_2x_do: 2999 vmovdqu64 ymm5{k1}{z},[((128+8))+rdi] 3000 vmovdqu64 ymm16{k1}{z},[((160+8))+rdi] 3001 vmovdqu64 ymm3{k1}{z},[((64+8))+rdi] 3002 vmovdqu64 ymm4{k1}{z},[((96+8))+rdi] 3003 3004 $L$blocks_vpmadd52_2x_key_loaded: 3005 vmovdqu64 ymm26,YMMWORD[rsi] 3006 vpxorq ymm27,ymm27,ymm27 3007 lea rsi,[32+rsi] 3008 3009 vpunpcklqdq ymm25,ymm26,ymm27 3010 vpunpckhqdq ymm27,ymm26,ymm27 3011 3012 3013 3014 vpsrlq ymm26,ymm27,24 3015 vporq ymm26,ymm26,ymm31 3016 vpaddq ymm2,ymm2,ymm26 3017 vpandq ymm24,ymm25,ymm28 3018 vpsrlq ymm25,ymm25,44 3019 vpsllq ymm27,ymm27,20 3020 vporq ymm25,ymm25,ymm27 3021 vpandq ymm25,ymm25,ymm28 3022 3023 jmp NEAR $L$tail_vpmadd52_2x 3024 ud2 3025 3026 ALIGN 32 3027 $L$oop_vpmadd52_4x: 3028 3029 vpaddq ymm0,ymm0,ymm24 3030 vpaddq ymm1,ymm1,ymm25 3031 3032 vpxorq ymm18,ymm18,ymm18 3033 vpmadd52luq ymm18,ymm16,ymm2 3034 vpxorq ymm19,ymm19,ymm19 3035 vpmadd52huq ymm19,ymm16,ymm2 3036 vpxorq ymm20,ymm20,ymm20 3037 vpmadd52luq ymm20,ymm17,ymm2 3038 vpxorq ymm21,ymm21,ymm21 3039 vpmadd52huq ymm21,ymm17,ymm2 3040 vpxorq ymm22,ymm22,ymm22 3041 vpmadd52luq ymm22,ymm3,ymm2 3042 vpxorq ymm23,ymm23,ymm23 3043 vpmadd52huq ymm23,ymm3,ymm2 3044 3045 vmovdqu64 ymm26,YMMWORD[rsi] 3046 vmovdqu64 ymm27,YMMWORD[32+rsi] 3047 lea rsi,[64+rsi] 3048 vpmadd52luq ymm18,ymm3,ymm0 3049 vpmadd52huq ymm19,ymm3,ymm0 3050 vpmadd52luq ymm20,ymm4,ymm0 3051 vpmadd52huq ymm21,ymm4,ymm0 3052 vpmadd52luq ymm22,ymm5,ymm0 3053 vpmadd52huq ymm23,ymm5,ymm0 3054 3055 vpunpcklqdq ymm25,ymm26,ymm27 3056 vpunpckhqdq ymm27,ymm26,ymm27 3057 vpmadd52luq ymm18,ymm17,ymm1 3058 vpmadd52huq ymm19,ymm17,ymm1 3059 vpmadd52luq ymm20,ymm3,ymm1 3060 vpmadd52huq ymm21,ymm3,ymm1 3061 vpmadd52luq ymm22,ymm4,ymm1 3062 vpmadd52huq ymm23,ymm4,ymm1 3063 3064 3065 3066 vpsrlq ymm30,ymm18,44 3067 vpsllq ymm19,ymm19,8 3068 vpandq ymm0,ymm18,ymm28 3069 vpaddq ymm19,ymm19,ymm30 3070 3071 vpsrlq ymm26,ymm27,24 3072 vporq ymm26,ymm26,ymm31 3073 vpaddq ymm20,ymm20,ymm19 3074 3075 vpsrlq ymm30,ymm20,44 3076 vpsllq ymm21,ymm21,8 3077 vpandq ymm1,ymm20,ymm28 3078 vpaddq ymm21,ymm21,ymm30 3079 3080 vpandq ymm24,ymm25,ymm28 3081 vpsrlq ymm25,ymm25,44 3082 vpsllq ymm27,ymm27,20 3083 vpaddq ymm22,ymm22,ymm21 3084 3085 vpsrlq ymm30,ymm22,42 3086 vpsllq ymm23,ymm23,10 3087 vpandq ymm2,ymm22,ymm29 3088 vpaddq ymm23,ymm23,ymm30 3089 3090 vpaddq ymm2,ymm2,ymm26 3091 vpaddq ymm0,ymm0,ymm23 3092 vpsllq ymm23,ymm23,2 3093 3094 vpaddq ymm0,ymm0,ymm23 3095 vporq ymm25,ymm25,ymm27 3096 vpandq ymm25,ymm25,ymm28 3097 3098 vpsrlq ymm30,ymm0,44 3099 vpandq ymm0,ymm0,ymm28 3100 3101 vpaddq ymm1,ymm1,ymm30 3102 3103 sub rdx,4 3104 jnz NEAR $L$oop_vpmadd52_4x 3105 3106 $L$tail_vpmadd52_4x: 3107 vmovdqu64 ymm5,YMMWORD[128+rdi] 3108 vmovdqu64 ymm16,YMMWORD[160+rdi] 3109 vmovdqu64 ymm3,YMMWORD[64+rdi] 3110 vmovdqu64 ymm4,YMMWORD[96+rdi] 3111 3112 $L$tail_vpmadd52_2x: 3113 vpsllq ymm17,ymm5,2 3114 vpaddq ymm17,ymm17,ymm5 3115 vpsllq ymm17,ymm17,2 3116 3117 3118 vpaddq ymm0,ymm0,ymm24 3119 vpaddq ymm1,ymm1,ymm25 3120 3121 vpxorq ymm18,ymm18,ymm18 3122 vpmadd52luq ymm18,ymm16,ymm2 3123 vpxorq ymm19,ymm19,ymm19 3124 vpmadd52huq ymm19,ymm16,ymm2 3125 vpxorq ymm20,ymm20,ymm20 3126 vpmadd52luq ymm20,ymm17,ymm2 3127 vpxorq ymm21,ymm21,ymm21 3128 vpmadd52huq ymm21,ymm17,ymm2 3129 vpxorq ymm22,ymm22,ymm22 3130 vpmadd52luq ymm22,ymm3,ymm2 3131 vpxorq ymm23,ymm23,ymm23 3132 vpmadd52huq ymm23,ymm3,ymm2 3133 3134 vpmadd52luq ymm18,ymm3,ymm0 3135 vpmadd52huq ymm19,ymm3,ymm0 3136 vpmadd52luq ymm20,ymm4,ymm0 3137 vpmadd52huq ymm21,ymm4,ymm0 3138 vpmadd52luq ymm22,ymm5,ymm0 3139 vpmadd52huq ymm23,ymm5,ymm0 3140 3141 vpmadd52luq ymm18,ymm17,ymm1 3142 vpmadd52huq ymm19,ymm17,ymm1 3143 vpmadd52luq ymm20,ymm3,ymm1 3144 vpmadd52huq ymm21,ymm3,ymm1 3145 vpmadd52luq ymm22,ymm4,ymm1 3146 vpmadd52huq ymm23,ymm4,ymm1 3147 3148 3149 3150 3151 mov eax,1 3152 kmovw k1,eax 3153 vpsrldq ymm24,ymm18,8 3154 vpsrldq ymm0,ymm19,8 3155 vpsrldq ymm25,ymm20,8 3156 vpsrldq ymm1,ymm21,8 3157 vpaddq ymm18,ymm18,ymm24 3158 vpaddq ymm19,ymm19,ymm0 3159 vpsrldq ymm26,ymm22,8 3160 vpsrldq ymm2,ymm23,8 3161 vpaddq ymm20,ymm20,ymm25 3162 vpaddq ymm21,ymm21,ymm1 3163 vpermq ymm24,ymm18,0x2 3164 vpermq ymm0,ymm19,0x2 3165 vpaddq ymm22,ymm22,ymm26 3166 vpaddq ymm23,ymm23,ymm2 3167 3168 vpermq ymm25,ymm20,0x2 3169 vpermq ymm1,ymm21,0x2 3170 vpaddq ymm18{k1}{z},ymm18,ymm24 3171 vpaddq ymm19{k1}{z},ymm19,ymm0 3172 vpermq ymm26,ymm22,0x2 3173 vpermq ymm2,ymm23,0x2 3174 vpaddq ymm20{k1}{z},ymm20,ymm25 3175 vpaddq ymm21{k1}{z},ymm21,ymm1 3176 vpaddq ymm22{k1}{z},ymm22,ymm26 3177 vpaddq ymm23{k1}{z},ymm23,ymm2 3178 3179 3180 3181 vpsrlq ymm30,ymm18,44 3182 vpsllq ymm19,ymm19,8 3183 vpandq ymm0,ymm18,ymm28 3184 vpaddq ymm19,ymm19,ymm30 3185 3186 vpaddq ymm20,ymm20,ymm19 3187 3188 vpsrlq ymm30,ymm20,44 3189 vpsllq ymm21,ymm21,8 3190 vpandq ymm1,ymm20,ymm28 3191 vpaddq ymm21,ymm21,ymm30 3192 3193 vpaddq ymm22,ymm22,ymm21 3194 3195 vpsrlq ymm30,ymm22,42 3196 vpsllq ymm23,ymm23,10 3197 vpandq ymm2,ymm22,ymm29 3198 vpaddq ymm23,ymm23,ymm30 3199 3200 vpaddq ymm0,ymm0,ymm23 3201 vpsllq ymm23,ymm23,2 3202 3203 vpaddq ymm0,ymm0,ymm23 3204 3205 vpsrlq ymm30,ymm0,44 3206 vpandq ymm0,ymm0,ymm28 3207 3208 vpaddq ymm1,ymm1,ymm30 3209 3210 3211 sub rdx,2 3212 ja NEAR $L$blocks_vpmadd52_4x_do 3213 3214 vmovq QWORD[rdi],xmm0 3215 vmovq QWORD[8+rdi],xmm1 3216 vmovq QWORD[16+rdi],xmm2 3217 vzeroall 3218 3219 $L$no_data_vpmadd52_4x: 3220 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3221 mov rsi,QWORD[16+rsp] 3222 DB 0F3h,0C3h ;repret 3223 3224 $L$SEH_end_poly1305_blocks_vpmadd52_4x: 3225 3226 ALIGN 32 3227 poly1305_blocks_vpmadd52_8x: 3228 mov QWORD[8+rsp],rdi ;WIN64 prologue 3229 mov QWORD[16+rsp],rsi 3230 mov rax,rsp 3231 $L$SEH_begin_poly1305_blocks_vpmadd52_8x: 3232 mov rdi,rcx 3233 mov rsi,rdx 3234 mov rdx,r8 3235 mov rcx,r9 3236 3237 3238 3239 shr rdx,4 3240 jz NEAR $L$no_data_vpmadd52_8x 3241 3242 shl rcx,40 3243 mov r8,QWORD[64+rdi] 3244 3245 vmovdqa64 ymm28,YMMWORD[$L$x_mask44] 3246 vmovdqa64 ymm29,YMMWORD[$L$x_mask42] 3247 3248 test r8,r8 3249 js NEAR $L$init_vpmadd52 3250 3251 vmovq xmm0,QWORD[rdi] 3252 vmovq xmm1,QWORD[8+rdi] 3253 vmovq xmm2,QWORD[16+rdi] 3254 3255 $L$blocks_vpmadd52_8x: 3256 3257 3258 3259 vmovdqu64 ymm5,YMMWORD[128+rdi] 3260 vmovdqu64 ymm16,YMMWORD[160+rdi] 3261 vmovdqu64 ymm3,YMMWORD[64+rdi] 3262 vmovdqu64 ymm4,YMMWORD[96+rdi] 3263 3264 vpsllq ymm17,ymm5,2 3265 vpaddq ymm17,ymm17,ymm5 3266 vpsllq ymm17,ymm17,2 3267 3268 vpbroadcastq ymm8,xmm5 3269 vpbroadcastq ymm6,xmm3 3270 vpbroadcastq ymm7,xmm4 3271 3272 vpxorq ymm18,ymm18,ymm18 3273 vpmadd52luq ymm18,ymm16,ymm8 3274 vpxorq ymm19,ymm19,ymm19 3275 vpmadd52huq ymm19,ymm16,ymm8 3276 vpxorq ymm20,ymm20,ymm20 3277 vpmadd52luq ymm20,ymm17,ymm8 3278 vpxorq ymm21,ymm21,ymm21 3279 vpmadd52huq ymm21,ymm17,ymm8 3280 vpxorq ymm22,ymm22,ymm22 3281 vpmadd52luq ymm22,ymm3,ymm8 3282 vpxorq ymm23,ymm23,ymm23 3283 vpmadd52huq ymm23,ymm3,ymm8 3284 3285 vpmadd52luq ymm18,ymm3,ymm6 3286 vpmadd52huq ymm19,ymm3,ymm6 3287 vpmadd52luq ymm20,ymm4,ymm6 3288 vpmadd52huq ymm21,ymm4,ymm6 3289 vpmadd52luq ymm22,ymm5,ymm6 3290 vpmadd52huq ymm23,ymm5,ymm6 3291 3292 vpmadd52luq ymm18,ymm17,ymm7 3293 vpmadd52huq ymm19,ymm17,ymm7 3294 vpmadd52luq ymm20,ymm3,ymm7 3295 vpmadd52huq ymm21,ymm3,ymm7 3296 vpmadd52luq ymm22,ymm4,ymm7 3297 vpmadd52huq ymm23,ymm4,ymm7 3298 3299 3300 3301 vpsrlq ymm30,ymm18,44 3302 vpsllq ymm19,ymm19,8 3303 vpandq ymm6,ymm18,ymm28 3304 vpaddq ymm19,ymm19,ymm30 3305 3306 vpaddq ymm20,ymm20,ymm19 3307 3308 vpsrlq ymm30,ymm20,44 3309 vpsllq ymm21,ymm21,8 3310 vpandq ymm7,ymm20,ymm28 3311 vpaddq ymm21,ymm21,ymm30 3312 3313 vpaddq ymm22,ymm22,ymm21 3314 3315 vpsrlq ymm30,ymm22,42 3316 vpsllq ymm23,ymm23,10 3317 vpandq ymm8,ymm22,ymm29 3318 vpaddq ymm23,ymm23,ymm30 3319 3320 vpaddq ymm6,ymm6,ymm23 3321 vpsllq ymm23,ymm23,2 3322 3323 vpaddq ymm6,ymm6,ymm23 3324 3325 vpsrlq ymm30,ymm6,44 3326 vpandq ymm6,ymm6,ymm28 3327 3328 vpaddq ymm7,ymm7,ymm30 3329 3330 3331 3332 3333 3334 vpunpcklqdq ymm26,ymm8,ymm5 3335 vpunpckhqdq ymm5,ymm8,ymm5 3336 vpunpcklqdq ymm24,ymm6,ymm3 3337 vpunpckhqdq ymm3,ymm6,ymm3 3338 vpunpcklqdq ymm25,ymm7,ymm4 3339 vpunpckhqdq ymm4,ymm7,ymm4 3340 vshufi64x2 zmm8,zmm26,zmm5,0x44 3341 vshufi64x2 zmm6,zmm24,zmm3,0x44 3342 vshufi64x2 zmm7,zmm25,zmm4,0x44 3343 3344 vmovdqu64 zmm26,ZMMWORD[rsi] 3345 vmovdqu64 zmm27,ZMMWORD[64+rsi] 3346 lea rsi,[128+rsi] 3347 3348 vpsllq zmm10,zmm8,2 3349 vpsllq zmm9,zmm7,2 3350 vpaddq zmm10,zmm10,zmm8 3351 vpaddq zmm9,zmm9,zmm7 3352 vpsllq zmm10,zmm10,2 3353 vpsllq zmm9,zmm9,2 3354 3355 vpbroadcastq zmm31,rcx 3356 vpbroadcastq zmm28,xmm28 3357 vpbroadcastq zmm29,xmm29 3358 3359 vpbroadcastq zmm16,xmm9 3360 vpbroadcastq zmm17,xmm10 3361 vpbroadcastq zmm3,xmm6 3362 vpbroadcastq zmm4,xmm7 3363 vpbroadcastq zmm5,xmm8 3364 3365 vpunpcklqdq zmm25,zmm26,zmm27 3366 vpunpckhqdq zmm27,zmm26,zmm27 3367 3368 3369 3370 vpsrlq zmm26,zmm27,24 3371 vporq zmm26,zmm26,zmm31 3372 vpaddq zmm2,zmm2,zmm26 3373 vpandq zmm24,zmm25,zmm28 3374 vpsrlq zmm25,zmm25,44 3375 vpsllq zmm27,zmm27,20 3376 vporq zmm25,zmm25,zmm27 3377 vpandq zmm25,zmm25,zmm28 3378 3379 sub rdx,8 3380 jz NEAR $L$tail_vpmadd52_8x 3381 jmp NEAR $L$oop_vpmadd52_8x 3382 3383 ALIGN 32 3384 $L$oop_vpmadd52_8x: 3385 3386 vpaddq zmm0,zmm0,zmm24 3387 vpaddq zmm1,zmm1,zmm25 3388 3389 vpxorq zmm18,zmm18,zmm18 3390 vpmadd52luq zmm18,zmm16,zmm2 3391 vpxorq zmm19,zmm19,zmm19 3392 vpmadd52huq zmm19,zmm16,zmm2 3393 vpxorq zmm20,zmm20,zmm20 3394 vpmadd52luq zmm20,zmm17,zmm2 3395 vpxorq zmm21,zmm21,zmm21 3396 vpmadd52huq zmm21,zmm17,zmm2 3397 vpxorq zmm22,zmm22,zmm22 3398 vpmadd52luq zmm22,zmm3,zmm2 3399 vpxorq zmm23,zmm23,zmm23 3400 vpmadd52huq zmm23,zmm3,zmm2 3401 3402 vmovdqu64 zmm26,ZMMWORD[rsi] 3403 vmovdqu64 zmm27,ZMMWORD[64+rsi] 3404 lea rsi,[128+rsi] 3405 vpmadd52luq zmm18,zmm3,zmm0 3406 vpmadd52huq zmm19,zmm3,zmm0 3407 vpmadd52luq zmm20,zmm4,zmm0 3408 vpmadd52huq zmm21,zmm4,zmm0 3409 vpmadd52luq zmm22,zmm5,zmm0 3410 vpmadd52huq zmm23,zmm5,zmm0 3411 3412 vpunpcklqdq zmm25,zmm26,zmm27 3413 vpunpckhqdq zmm27,zmm26,zmm27 3414 vpmadd52luq zmm18,zmm17,zmm1 3415 vpmadd52huq zmm19,zmm17,zmm1 3416 vpmadd52luq zmm20,zmm3,zmm1 3417 vpmadd52huq zmm21,zmm3,zmm1 3418 vpmadd52luq zmm22,zmm4,zmm1 3419 vpmadd52huq zmm23,zmm4,zmm1 3420 3421 3422 3423 vpsrlq zmm30,zmm18,44 3424 vpsllq zmm19,zmm19,8 3425 vpandq zmm0,zmm18,zmm28 3426 vpaddq zmm19,zmm19,zmm30 3427 3428 vpsrlq zmm26,zmm27,24 3429 vporq zmm26,zmm26,zmm31 3430 vpaddq zmm20,zmm20,zmm19 3431 3432 vpsrlq zmm30,zmm20,44 3433 vpsllq zmm21,zmm21,8 3434 vpandq zmm1,zmm20,zmm28 3435 vpaddq zmm21,zmm21,zmm30 3436 3437 vpandq zmm24,zmm25,zmm28 3438 vpsrlq zmm25,zmm25,44 3439 vpsllq zmm27,zmm27,20 3440 vpaddq zmm22,zmm22,zmm21 3441 3442 vpsrlq zmm30,zmm22,42 3443 vpsllq zmm23,zmm23,10 3444 vpandq zmm2,zmm22,zmm29 3445 vpaddq zmm23,zmm23,zmm30 3446 3447 vpaddq zmm2,zmm2,zmm26 3448 vpaddq zmm0,zmm0,zmm23 3449 vpsllq zmm23,zmm23,2 3450 3451 vpaddq zmm0,zmm0,zmm23 3452 vporq zmm25,zmm25,zmm27 3453 vpandq zmm25,zmm25,zmm28 3454 3455 vpsrlq zmm30,zmm0,44 3456 vpandq zmm0,zmm0,zmm28 3457 3458 vpaddq zmm1,zmm1,zmm30 3459 3460 sub rdx,8 3461 jnz NEAR $L$oop_vpmadd52_8x 3462 3463 $L$tail_vpmadd52_8x: 3464 3465 vpaddq zmm0,zmm0,zmm24 3466 vpaddq zmm1,zmm1,zmm25 3467 3468 vpxorq zmm18,zmm18,zmm18 3469 vpmadd52luq zmm18,zmm9,zmm2 3470 vpxorq zmm19,zmm19,zmm19 3471 vpmadd52huq zmm19,zmm9,zmm2 3472 vpxorq zmm20,zmm20,zmm20 3473 vpmadd52luq zmm20,zmm10,zmm2 3474 vpxorq zmm21,zmm21,zmm21 3475 vpmadd52huq zmm21,zmm10,zmm2 3476 vpxorq zmm22,zmm22,zmm22 3477 vpmadd52luq zmm22,zmm6,zmm2 3478 vpxorq zmm23,zmm23,zmm23 3479 vpmadd52huq zmm23,zmm6,zmm2 3480 3481 vpmadd52luq zmm18,zmm6,zmm0 3482 vpmadd52huq zmm19,zmm6,zmm0 3483 vpmadd52luq zmm20,zmm7,zmm0 3484 vpmadd52huq zmm21,zmm7,zmm0 3485 vpmadd52luq zmm22,zmm8,zmm0 3486 vpmadd52huq zmm23,zmm8,zmm0 3487 3488 vpmadd52luq zmm18,zmm10,zmm1 3489 vpmadd52huq zmm19,zmm10,zmm1 3490 vpmadd52luq zmm20,zmm6,zmm1 3491 vpmadd52huq zmm21,zmm6,zmm1 3492 vpmadd52luq zmm22,zmm7,zmm1 3493 vpmadd52huq zmm23,zmm7,zmm1 3494 3495 3496 3497 3498 mov eax,1 3499 kmovw k1,eax 3500 vpsrldq zmm24,zmm18,8 3501 vpsrldq zmm0,zmm19,8 3502 vpsrldq zmm25,zmm20,8 3503 vpsrldq zmm1,zmm21,8 3504 vpaddq zmm18,zmm18,zmm24 3505 vpaddq zmm19,zmm19,zmm0 3506 vpsrldq zmm26,zmm22,8 3507 vpsrldq zmm2,zmm23,8 3508 vpaddq zmm20,zmm20,zmm25 3509 vpaddq zmm21,zmm21,zmm1 3510 vpermq zmm24,zmm18,0x2 3511 vpermq zmm0,zmm19,0x2 3512 vpaddq zmm22,zmm22,zmm26 3513 vpaddq zmm23,zmm23,zmm2 3514 3515 vpermq zmm25,zmm20,0x2 3516 vpermq zmm1,zmm21,0x2 3517 vpaddq zmm18,zmm18,zmm24 3518 vpaddq zmm19,zmm19,zmm0 3519 vpermq zmm26,zmm22,0x2 3520 vpermq zmm2,zmm23,0x2 3521 vpaddq zmm20,zmm20,zmm25 3522 vpaddq zmm21,zmm21,zmm1 3523 vextracti64x4 ymm24,zmm18,1 3524 vextracti64x4 ymm0,zmm19,1 3525 vpaddq zmm22,zmm22,zmm26 3526 vpaddq zmm23,zmm23,zmm2 3527 3528 vextracti64x4 ymm25,zmm20,1 3529 vextracti64x4 ymm1,zmm21,1 3530 vextracti64x4 ymm26,zmm22,1 3531 vextracti64x4 ymm2,zmm23,1 3532 vpaddq ymm18{k1}{z},ymm18,ymm24 3533 vpaddq ymm19{k1}{z},ymm19,ymm0 3534 vpaddq ymm20{k1}{z},ymm20,ymm25 3535 vpaddq ymm21{k1}{z},ymm21,ymm1 3536 vpaddq ymm22{k1}{z},ymm22,ymm26 3537 vpaddq ymm23{k1}{z},ymm23,ymm2 3538 3539 3540 3541 vpsrlq ymm30,ymm18,44 3542 vpsllq ymm19,ymm19,8 3543 vpandq ymm0,ymm18,ymm28 3544 vpaddq ymm19,ymm19,ymm30 3545 3546 vpaddq ymm20,ymm20,ymm19 3547 3548 vpsrlq ymm30,ymm20,44 3549 vpsllq ymm21,ymm21,8 3550 vpandq ymm1,ymm20,ymm28 3551 vpaddq ymm21,ymm21,ymm30 3552 3553 vpaddq ymm22,ymm22,ymm21 3554 3555 vpsrlq ymm30,ymm22,42 3556 vpsllq ymm23,ymm23,10 3557 vpandq ymm2,ymm22,ymm29 3558 vpaddq ymm23,ymm23,ymm30 3559 3560 vpaddq ymm0,ymm0,ymm23 3561 vpsllq ymm23,ymm23,2 3562 3563 vpaddq ymm0,ymm0,ymm23 3564 3565 vpsrlq ymm30,ymm0,44 3566 vpandq ymm0,ymm0,ymm28 3567 3568 vpaddq ymm1,ymm1,ymm30 3569 3570 3571 3572 vmovq QWORD[rdi],xmm0 3573 vmovq QWORD[8+rdi],xmm1 3574 vmovq QWORD[16+rdi],xmm2 3575 vzeroall 3576 3577 $L$no_data_vpmadd52_8x: 3578 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3579 mov rsi,QWORD[16+rsp] 3580 DB 0F3h,0C3h ;repret 3581 3582 $L$SEH_end_poly1305_blocks_vpmadd52_8x: 3583 3584 ALIGN 32 3585 poly1305_emit_base2_44: 3586 mov QWORD[8+rsp],rdi ;WIN64 prologue 3587 mov QWORD[16+rsp],rsi 3588 mov rax,rsp 3589 $L$SEH_begin_poly1305_emit_base2_44: 3590 mov rdi,rcx 3591 mov rsi,rdx 3592 mov rdx,r8 3593 3594 3595 3596 DB 243,15,30,250 3597 mov r8,QWORD[rdi] 3598 mov r9,QWORD[8+rdi] 3599 mov r10,QWORD[16+rdi] 3600 3601 mov rax,r9 3602 shr r9,20 3603 shl rax,44 3604 mov rcx,r10 3605 shr r10,40 3606 shl rcx,24 3607 3608 add r8,rax 3609 adc r9,rcx 3610 adc r10,0 3611 3612 mov rax,r8 3613 add r8,5 3614 mov rcx,r9 3615 adc r9,0 3616 adc r10,0 3617 shr r10,2 3618 cmovnz rax,r8 3619 cmovnz rcx,r9 3620 3621 add rax,QWORD[rdx] 3622 adc rcx,QWORD[8+rdx] 3623 mov QWORD[rsi],rax 3624 mov QWORD[8+rsi],rcx 3625 3626 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3627 mov rsi,QWORD[16+rsp] 3628 DB 0F3h,0C3h ;repret 3629 3630 $L$SEH_end_poly1305_emit_base2_44: 3631 ALIGN 64 3632 $L$const: 3633 $L$mask24: 3634 DD 0x0ffffff,0,0x0ffffff,0,0x0ffffff,0,0x0ffffff,0 3635 $L$129: 3636 DD 16777216,0,16777216,0,16777216,0,16777216,0 3637 $L$mask26: 3638 DD 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0 3639 $L$permd_avx2: 3640 DD 2,2,2,3,2,0,2,1 3641 $L$permd_avx512: 3642 DD 0,0,0,1,0,2,0,3,0,4,0,5,0,6,0,7 3643 3644 $L$2_44_inp_permd: 3645 DD 0,1,1,2,2,3,7,7 3646 $L$2_44_inp_shift: 3647 DQ 0,12,24,64 3648 $L$2_44_mask: 3649 DQ 0xfffffffffff,0xfffffffffff,0x3ffffffffff,0xffffffffffffffff 3650 $L$2_44_shift_rgt: 3651 DQ 44,44,42,64 3652 $L$2_44_shift_lft: 3653 DQ 8,8,10,64 3654 3655 ALIGN 64 3656 $L$x_mask44: 3657 DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff 3658 DQ 0xfffffffffff,0xfffffffffff,0xfffffffffff,0xfffffffffff 3659 $L$x_mask42: 3660 DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff 3661 DQ 0x3ffffffffff,0x3ffffffffff,0x3ffffffffff,0x3ffffffffff 214 3662 DB 80,111,108,121,49,51,48,53,32,102,111,114,32,120,56,54 215 3663 DB 95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32 … … 462 3910 DD $L$SEH_end_poly1305_emit wrt ..imagebase 463 3911 DD $L$SEH_info_poly1305_emit wrt ..imagebase 3912 DD $L$SEH_begin_poly1305_blocks_avx wrt ..imagebase 3913 DD $L$base2_64_avx wrt ..imagebase 3914 DD $L$SEH_info_poly1305_blocks_avx_1 wrt ..imagebase 3915 3916 DD $L$base2_64_avx wrt ..imagebase 3917 DD $L$even_avx wrt ..imagebase 3918 DD $L$SEH_info_poly1305_blocks_avx_2 wrt ..imagebase 3919 3920 DD $L$even_avx wrt ..imagebase 3921 DD $L$SEH_end_poly1305_blocks_avx wrt ..imagebase 3922 DD $L$SEH_info_poly1305_blocks_avx_3 wrt ..imagebase 3923 3924 DD $L$SEH_begin_poly1305_emit_avx wrt ..imagebase 3925 DD $L$SEH_end_poly1305_emit_avx wrt ..imagebase 3926 DD $L$SEH_info_poly1305_emit_avx wrt ..imagebase 3927 DD $L$SEH_begin_poly1305_blocks_avx2 wrt ..imagebase 3928 DD $L$base2_64_avx2 wrt ..imagebase 3929 DD $L$SEH_info_poly1305_blocks_avx2_1 wrt ..imagebase 3930 3931 DD $L$base2_64_avx2 wrt ..imagebase 3932 DD $L$even_avx2 wrt ..imagebase 3933 DD $L$SEH_info_poly1305_blocks_avx2_2 wrt ..imagebase 3934 3935 DD $L$even_avx2 wrt ..imagebase 3936 DD $L$SEH_end_poly1305_blocks_avx2 wrt ..imagebase 3937 DD $L$SEH_info_poly1305_blocks_avx2_3 wrt ..imagebase 3938 DD $L$SEH_begin_poly1305_blocks_avx512 wrt ..imagebase 3939 DD $L$SEH_end_poly1305_blocks_avx512 wrt ..imagebase 3940 DD $L$SEH_info_poly1305_blocks_avx512 wrt ..imagebase 464 3941 section .xdata rdata align=8 465 3942 ALIGN 8 … … 478 3955 DD se_handler wrt ..imagebase 479 3956 DD $L$SEH_begin_poly1305_emit wrt ..imagebase,$L$SEH_begin_poly1305_emit wrt ..imagebase 3957 $L$SEH_info_poly1305_blocks_avx_1: 3958 DB 9,0,0,0 3959 DD se_handler wrt ..imagebase 3960 DD $L$blocks_avx_body wrt ..imagebase,$L$blocks_avx_epilogue wrt ..imagebase 3961 3962 $L$SEH_info_poly1305_blocks_avx_2: 3963 DB 9,0,0,0 3964 DD se_handler wrt ..imagebase 3965 DD $L$base2_64_avx_body wrt ..imagebase,$L$base2_64_avx_epilogue wrt ..imagebase 3966 3967 $L$SEH_info_poly1305_blocks_avx_3: 3968 DB 9,0,0,0 3969 DD avx_handler wrt ..imagebase 3970 DD $L$do_avx_body wrt ..imagebase,$L$do_avx_epilogue wrt ..imagebase 3971 3972 $L$SEH_info_poly1305_emit_avx: 3973 DB 9,0,0,0 3974 DD se_handler wrt ..imagebase 3975 DD $L$SEH_begin_poly1305_emit_avx wrt ..imagebase,$L$SEH_begin_poly1305_emit_avx wrt ..imagebase 3976 $L$SEH_info_poly1305_blocks_avx2_1: 3977 DB 9,0,0,0 3978 DD se_handler wrt ..imagebase 3979 DD $L$blocks_avx2_body wrt ..imagebase,$L$blocks_avx2_epilogue wrt ..imagebase 3980 3981 $L$SEH_info_poly1305_blocks_avx2_2: 3982 DB 9,0,0,0 3983 DD se_handler wrt ..imagebase 3984 DD $L$base2_64_avx2_body wrt ..imagebase,$L$base2_64_avx2_epilogue wrt ..imagebase 3985 3986 $L$SEH_info_poly1305_blocks_avx2_3: 3987 DB 9,0,0,0 3988 DD avx_handler wrt ..imagebase 3989 DD $L$do_avx2_body wrt ..imagebase,$L$do_avx2_epilogue wrt ..imagebase 3990 $L$SEH_info_poly1305_blocks_avx512: 3991 DB 9,0,0,0 3992 DD avx_handler wrt ..imagebase 3993 DD $L$do_avx512_body wrt ..imagebase,$L$do_avx512_epilogue wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/rsaz-avx2.S
r97373 r99371 6 6 7 7 8 global rsaz_1024_sqr_avx2 9 10 ALIGN 64 11 rsaz_1024_sqr_avx2: 12 mov QWORD[8+rsp],rdi ;WIN64 prologue 13 mov QWORD[16+rsp],rsi 14 mov rax,rsp 15 $L$SEH_begin_rsaz_1024_sqr_avx2: 16 mov rdi,rcx 17 mov rsi,rdx 18 mov rdx,r8 19 mov rcx,r9 20 mov r8,QWORD[40+rsp] 21 22 23 24 lea rax,[rsp] 25 26 push rbx 27 28 push rbp 29 30 push r12 31 32 push r13 33 34 push r14 35 36 push r15 37 38 vzeroupper 39 lea rsp,[((-168))+rsp] 40 vmovaps XMMWORD[(-216)+rax],xmm6 41 vmovaps XMMWORD[(-200)+rax],xmm7 42 vmovaps XMMWORD[(-184)+rax],xmm8 43 vmovaps XMMWORD[(-168)+rax],xmm9 44 vmovaps XMMWORD[(-152)+rax],xmm10 45 vmovaps XMMWORD[(-136)+rax],xmm11 46 vmovaps XMMWORD[(-120)+rax],xmm12 47 vmovaps XMMWORD[(-104)+rax],xmm13 48 vmovaps XMMWORD[(-88)+rax],xmm14 49 vmovaps XMMWORD[(-72)+rax],xmm15 50 $L$sqr_1024_body: 51 mov rbp,rax 52 53 mov r13,rdx 54 sub rsp,832 55 mov r15,r13 56 sub rdi,-128 57 sub rsi,-128 58 sub r13,-128 59 60 and r15,4095 61 add r15,32*10 62 shr r15,12 63 vpxor ymm9,ymm9,ymm9 64 jz NEAR $L$sqr_1024_no_n_copy 65 66 67 68 69 70 sub rsp,32*10 71 vmovdqu ymm0,YMMWORD[((0-128))+r13] 72 and rsp,-2048 73 vmovdqu ymm1,YMMWORD[((32-128))+r13] 74 vmovdqu ymm2,YMMWORD[((64-128))+r13] 75 vmovdqu ymm3,YMMWORD[((96-128))+r13] 76 vmovdqu ymm4,YMMWORD[((128-128))+r13] 77 vmovdqu ymm5,YMMWORD[((160-128))+r13] 78 vmovdqu ymm6,YMMWORD[((192-128))+r13] 79 vmovdqu ymm7,YMMWORD[((224-128))+r13] 80 vmovdqu ymm8,YMMWORD[((256-128))+r13] 81 lea r13,[((832+128))+rsp] 82 vmovdqu YMMWORD[(0-128)+r13],ymm0 83 vmovdqu YMMWORD[(32-128)+r13],ymm1 84 vmovdqu YMMWORD[(64-128)+r13],ymm2 85 vmovdqu YMMWORD[(96-128)+r13],ymm3 86 vmovdqu YMMWORD[(128-128)+r13],ymm4 87 vmovdqu YMMWORD[(160-128)+r13],ymm5 88 vmovdqu YMMWORD[(192-128)+r13],ymm6 89 vmovdqu YMMWORD[(224-128)+r13],ymm7 90 vmovdqu YMMWORD[(256-128)+r13],ymm8 91 vmovdqu YMMWORD[(288-128)+r13],ymm9 92 93 $L$sqr_1024_no_n_copy: 94 and rsp,-1024 95 96 vmovdqu ymm1,YMMWORD[((32-128))+rsi] 97 vmovdqu ymm2,YMMWORD[((64-128))+rsi] 98 vmovdqu ymm3,YMMWORD[((96-128))+rsi] 99 vmovdqu ymm4,YMMWORD[((128-128))+rsi] 100 vmovdqu ymm5,YMMWORD[((160-128))+rsi] 101 vmovdqu ymm6,YMMWORD[((192-128))+rsi] 102 vmovdqu ymm7,YMMWORD[((224-128))+rsi] 103 vmovdqu ymm8,YMMWORD[((256-128))+rsi] 104 105 lea rbx,[192+rsp] 106 vmovdqu ymm15,YMMWORD[$L$and_mask] 107 jmp NEAR $L$OOP_GRANDE_SQR_1024 108 109 ALIGN 32 110 $L$OOP_GRANDE_SQR_1024: 111 lea r9,[((576+128))+rsp] 112 lea r12,[448+rsp] 113 114 115 116 117 vpaddq ymm1,ymm1,ymm1 118 vpbroadcastq ymm10,QWORD[((0-128))+rsi] 119 vpaddq ymm2,ymm2,ymm2 120 vmovdqa YMMWORD[(0-128)+r9],ymm1 121 vpaddq ymm3,ymm3,ymm3 122 vmovdqa YMMWORD[(32-128)+r9],ymm2 123 vpaddq ymm4,ymm4,ymm4 124 vmovdqa YMMWORD[(64-128)+r9],ymm3 125 vpaddq ymm5,ymm5,ymm5 126 vmovdqa YMMWORD[(96-128)+r9],ymm4 127 vpaddq ymm6,ymm6,ymm6 128 vmovdqa YMMWORD[(128-128)+r9],ymm5 129 vpaddq ymm7,ymm7,ymm7 130 vmovdqa YMMWORD[(160-128)+r9],ymm6 131 vpaddq ymm8,ymm8,ymm8 132 vmovdqa YMMWORD[(192-128)+r9],ymm7 133 vpxor ymm9,ymm9,ymm9 134 vmovdqa YMMWORD[(224-128)+r9],ymm8 135 136 vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] 137 vpbroadcastq ymm11,QWORD[((32-128))+rsi] 138 vmovdqu YMMWORD[(288-192)+rbx],ymm9 139 vpmuludq ymm1,ymm1,ymm10 140 vmovdqu YMMWORD[(320-448)+r12],ymm9 141 vpmuludq ymm2,ymm2,ymm10 142 vmovdqu YMMWORD[(352-448)+r12],ymm9 143 vpmuludq ymm3,ymm3,ymm10 144 vmovdqu YMMWORD[(384-448)+r12],ymm9 145 vpmuludq ymm4,ymm4,ymm10 146 vmovdqu YMMWORD[(416-448)+r12],ymm9 147 vpmuludq ymm5,ymm5,ymm10 148 vmovdqu YMMWORD[(448-448)+r12],ymm9 149 vpmuludq ymm6,ymm6,ymm10 150 vmovdqu YMMWORD[(480-448)+r12],ymm9 151 vpmuludq ymm7,ymm7,ymm10 152 vmovdqu YMMWORD[(512-448)+r12],ymm9 153 vpmuludq ymm8,ymm8,ymm10 154 vpbroadcastq ymm10,QWORD[((64-128))+rsi] 155 vmovdqu YMMWORD[(544-448)+r12],ymm9 156 157 mov r15,rsi 158 mov r14d,4 159 jmp NEAR $L$sqr_entry_1024 160 ALIGN 32 161 $L$OOP_SQR_1024: 162 vpbroadcastq ymm11,QWORD[((32-128))+r15] 163 vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] 164 vpaddq ymm0,ymm0,YMMWORD[((0-192))+rbx] 165 vpmuludq ymm1,ymm10,YMMWORD[((0-128))+r9] 166 vpaddq ymm1,ymm1,YMMWORD[((32-192))+rbx] 167 vpmuludq ymm2,ymm10,YMMWORD[((32-128))+r9] 168 vpaddq ymm2,ymm2,YMMWORD[((64-192))+rbx] 169 vpmuludq ymm3,ymm10,YMMWORD[((64-128))+r9] 170 vpaddq ymm3,ymm3,YMMWORD[((96-192))+rbx] 171 vpmuludq ymm4,ymm10,YMMWORD[((96-128))+r9] 172 vpaddq ymm4,ymm4,YMMWORD[((128-192))+rbx] 173 vpmuludq ymm5,ymm10,YMMWORD[((128-128))+r9] 174 vpaddq ymm5,ymm5,YMMWORD[((160-192))+rbx] 175 vpmuludq ymm6,ymm10,YMMWORD[((160-128))+r9] 176 vpaddq ymm6,ymm6,YMMWORD[((192-192))+rbx] 177 vpmuludq ymm7,ymm10,YMMWORD[((192-128))+r9] 178 vpaddq ymm7,ymm7,YMMWORD[((224-192))+rbx] 179 vpmuludq ymm8,ymm10,YMMWORD[((224-128))+r9] 180 vpbroadcastq ymm10,QWORD[((64-128))+r15] 181 vpaddq ymm8,ymm8,YMMWORD[((256-192))+rbx] 182 $L$sqr_entry_1024: 183 vmovdqu YMMWORD[(0-192)+rbx],ymm0 184 vmovdqu YMMWORD[(32-192)+rbx],ymm1 185 186 vpmuludq ymm12,ymm11,YMMWORD[((32-128))+rsi] 187 vpaddq ymm2,ymm2,ymm12 188 vpmuludq ymm14,ymm11,YMMWORD[((32-128))+r9] 189 vpaddq ymm3,ymm3,ymm14 190 vpmuludq ymm13,ymm11,YMMWORD[((64-128))+r9] 191 vpaddq ymm4,ymm4,ymm13 192 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+r9] 193 vpaddq ymm5,ymm5,ymm12 194 vpmuludq ymm14,ymm11,YMMWORD[((128-128))+r9] 195 vpaddq ymm6,ymm6,ymm14 196 vpmuludq ymm13,ymm11,YMMWORD[((160-128))+r9] 197 vpaddq ymm7,ymm7,ymm13 198 vpmuludq ymm12,ymm11,YMMWORD[((192-128))+r9] 199 vpaddq ymm8,ymm8,ymm12 200 vpmuludq ymm0,ymm11,YMMWORD[((224-128))+r9] 201 vpbroadcastq ymm11,QWORD[((96-128))+r15] 202 vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] 203 204 vmovdqu YMMWORD[(64-192)+rbx],ymm2 205 vmovdqu YMMWORD[(96-192)+rbx],ymm3 206 207 vpmuludq ymm13,ymm10,YMMWORD[((64-128))+rsi] 208 vpaddq ymm4,ymm4,ymm13 209 vpmuludq ymm12,ymm10,YMMWORD[((64-128))+r9] 210 vpaddq ymm5,ymm5,ymm12 211 vpmuludq ymm14,ymm10,YMMWORD[((96-128))+r9] 212 vpaddq ymm6,ymm6,ymm14 213 vpmuludq ymm13,ymm10,YMMWORD[((128-128))+r9] 214 vpaddq ymm7,ymm7,ymm13 215 vpmuludq ymm12,ymm10,YMMWORD[((160-128))+r9] 216 vpaddq ymm8,ymm8,ymm12 217 vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] 218 vpaddq ymm0,ymm0,ymm14 219 vpmuludq ymm1,ymm10,YMMWORD[((224-128))+r9] 220 vpbroadcastq ymm10,QWORD[((128-128))+r15] 221 vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] 222 223 vmovdqu YMMWORD[(128-192)+rbx],ymm4 224 vmovdqu YMMWORD[(160-192)+rbx],ymm5 225 226 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rsi] 227 vpaddq ymm6,ymm6,ymm12 228 vpmuludq ymm14,ymm11,YMMWORD[((96-128))+r9] 229 vpaddq ymm7,ymm7,ymm14 230 vpmuludq ymm13,ymm11,YMMWORD[((128-128))+r9] 231 vpaddq ymm8,ymm8,ymm13 232 vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] 233 vpaddq ymm0,ymm0,ymm12 234 vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] 235 vpaddq ymm1,ymm1,ymm14 236 vpmuludq ymm2,ymm11,YMMWORD[((224-128))+r9] 237 vpbroadcastq ymm11,QWORD[((160-128))+r15] 238 vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] 239 240 vmovdqu YMMWORD[(192-192)+rbx],ymm6 241 vmovdqu YMMWORD[(224-192)+rbx],ymm7 242 243 vpmuludq ymm12,ymm10,YMMWORD[((128-128))+rsi] 244 vpaddq ymm8,ymm8,ymm12 245 vpmuludq ymm14,ymm10,YMMWORD[((128-128))+r9] 246 vpaddq ymm0,ymm0,ymm14 247 vpmuludq ymm13,ymm10,YMMWORD[((160-128))+r9] 248 vpaddq ymm1,ymm1,ymm13 249 vpmuludq ymm12,ymm10,YMMWORD[((192-128))+r9] 250 vpaddq ymm2,ymm2,ymm12 251 vpmuludq ymm3,ymm10,YMMWORD[((224-128))+r9] 252 vpbroadcastq ymm10,QWORD[((192-128))+r15] 253 vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] 254 255 vmovdqu YMMWORD[(256-192)+rbx],ymm8 256 vmovdqu YMMWORD[(288-192)+rbx],ymm0 257 lea rbx,[8+rbx] 258 259 vpmuludq ymm13,ymm11,YMMWORD[((160-128))+rsi] 260 vpaddq ymm1,ymm1,ymm13 261 vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] 262 vpaddq ymm2,ymm2,ymm12 263 vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] 264 vpaddq ymm3,ymm3,ymm14 265 vpmuludq ymm4,ymm11,YMMWORD[((224-128))+r9] 266 vpbroadcastq ymm11,QWORD[((224-128))+r15] 267 vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] 268 269 vmovdqu YMMWORD[(320-448)+r12],ymm1 270 vmovdqu YMMWORD[(352-448)+r12],ymm2 271 272 vpmuludq ymm12,ymm10,YMMWORD[((192-128))+rsi] 273 vpaddq ymm3,ymm3,ymm12 274 vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] 275 vpbroadcastq ymm0,QWORD[((256-128))+r15] 276 vpaddq ymm4,ymm4,ymm14 277 vpmuludq ymm5,ymm10,YMMWORD[((224-128))+r9] 278 vpbroadcastq ymm10,QWORD[((0+8-128))+r15] 279 vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] 280 281 vmovdqu YMMWORD[(384-448)+r12],ymm3 282 vmovdqu YMMWORD[(416-448)+r12],ymm4 283 lea r15,[8+r15] 284 285 vpmuludq ymm12,ymm11,YMMWORD[((224-128))+rsi] 286 vpaddq ymm5,ymm5,ymm12 287 vpmuludq ymm6,ymm11,YMMWORD[((224-128))+r9] 288 vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] 289 290 vpmuludq ymm7,ymm0,YMMWORD[((256-128))+rsi] 291 vmovdqu YMMWORD[(448-448)+r12],ymm5 292 vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] 293 vmovdqu YMMWORD[(480-448)+r12],ymm6 294 vmovdqu YMMWORD[(512-448)+r12],ymm7 295 lea r12,[8+r12] 296 297 dec r14d 298 jnz NEAR $L$OOP_SQR_1024 299 300 vmovdqu ymm8,YMMWORD[256+rsp] 301 vmovdqu ymm1,YMMWORD[288+rsp] 302 vmovdqu ymm2,YMMWORD[320+rsp] 303 lea rbx,[192+rsp] 304 305 vpsrlq ymm14,ymm8,29 306 vpand ymm8,ymm8,ymm15 307 vpsrlq ymm11,ymm1,29 308 vpand ymm1,ymm1,ymm15 309 310 vpermq ymm14,ymm14,0x93 311 vpxor ymm9,ymm9,ymm9 312 vpermq ymm11,ymm11,0x93 313 314 vpblendd ymm10,ymm14,ymm9,3 315 vpblendd ymm14,ymm11,ymm14,3 316 vpaddq ymm8,ymm8,ymm10 317 vpblendd ymm11,ymm9,ymm11,3 318 vpaddq ymm1,ymm1,ymm14 319 vpaddq ymm2,ymm2,ymm11 320 vmovdqu YMMWORD[(288-192)+rbx],ymm1 321 vmovdqu YMMWORD[(320-192)+rbx],ymm2 322 323 mov rax,QWORD[rsp] 324 mov r10,QWORD[8+rsp] 325 mov r11,QWORD[16+rsp] 326 mov r12,QWORD[24+rsp] 327 vmovdqu ymm1,YMMWORD[32+rsp] 328 vmovdqu ymm2,YMMWORD[((64-192))+rbx] 329 vmovdqu ymm3,YMMWORD[((96-192))+rbx] 330 vmovdqu ymm4,YMMWORD[((128-192))+rbx] 331 vmovdqu ymm5,YMMWORD[((160-192))+rbx] 332 vmovdqu ymm6,YMMWORD[((192-192))+rbx] 333 vmovdqu ymm7,YMMWORD[((224-192))+rbx] 334 335 mov r9,rax 336 imul eax,ecx 337 and eax,0x1fffffff 338 vmovd xmm12,eax 339 340 mov rdx,rax 341 imul rax,QWORD[((-128))+r13] 342 vpbroadcastq ymm12,xmm12 343 add r9,rax 344 mov rax,rdx 345 imul rax,QWORD[((8-128))+r13] 346 shr r9,29 347 add r10,rax 348 mov rax,rdx 349 imul rax,QWORD[((16-128))+r13] 350 add r10,r9 351 add r11,rax 352 imul rdx,QWORD[((24-128))+r13] 353 add r12,rdx 354 355 mov rax,r10 356 imul eax,ecx 357 and eax,0x1fffffff 358 359 mov r14d,9 360 jmp NEAR $L$OOP_REDUCE_1024 361 362 ALIGN 32 363 $L$OOP_REDUCE_1024: 364 vmovd xmm13,eax 365 vpbroadcastq ymm13,xmm13 366 367 vpmuludq ymm10,ymm12,YMMWORD[((32-128))+r13] 368 mov rdx,rax 369 imul rax,QWORD[((-128))+r13] 370 vpaddq ymm1,ymm1,ymm10 371 add r10,rax 372 vpmuludq ymm14,ymm12,YMMWORD[((64-128))+r13] 373 mov rax,rdx 374 imul rax,QWORD[((8-128))+r13] 375 vpaddq ymm2,ymm2,ymm14 376 vpmuludq ymm11,ymm12,YMMWORD[((96-128))+r13] 377 DB 0x67 378 add r11,rax 379 DB 0x67 380 mov rax,rdx 381 imul rax,QWORD[((16-128))+r13] 382 shr r10,29 383 vpaddq ymm3,ymm3,ymm11 384 vpmuludq ymm10,ymm12,YMMWORD[((128-128))+r13] 385 add r12,rax 386 add r11,r10 387 vpaddq ymm4,ymm4,ymm10 388 vpmuludq ymm14,ymm12,YMMWORD[((160-128))+r13] 389 mov rax,r11 390 imul eax,ecx 391 vpaddq ymm5,ymm5,ymm14 392 vpmuludq ymm11,ymm12,YMMWORD[((192-128))+r13] 393 and eax,0x1fffffff 394 vpaddq ymm6,ymm6,ymm11 395 vpmuludq ymm10,ymm12,YMMWORD[((224-128))+r13] 396 vpaddq ymm7,ymm7,ymm10 397 vpmuludq ymm14,ymm12,YMMWORD[((256-128))+r13] 398 vmovd xmm12,eax 399 400 vpaddq ymm8,ymm8,ymm14 401 402 vpbroadcastq ymm12,xmm12 403 404 vpmuludq ymm11,ymm13,YMMWORD[((32-8-128))+r13] 405 vmovdqu ymm14,YMMWORD[((96-8-128))+r13] 406 mov rdx,rax 407 imul rax,QWORD[((-128))+r13] 408 vpaddq ymm1,ymm1,ymm11 409 vpmuludq ymm10,ymm13,YMMWORD[((64-8-128))+r13] 410 vmovdqu ymm11,YMMWORD[((128-8-128))+r13] 411 add r11,rax 412 mov rax,rdx 413 imul rax,QWORD[((8-128))+r13] 414 vpaddq ymm2,ymm2,ymm10 415 add rax,r12 416 shr r11,29 417 vpmuludq ymm14,ymm14,ymm13 418 vmovdqu ymm10,YMMWORD[((160-8-128))+r13] 419 add rax,r11 420 vpaddq ymm3,ymm3,ymm14 421 vpmuludq ymm11,ymm11,ymm13 422 vmovdqu ymm14,YMMWORD[((192-8-128))+r13] 423 DB 0x67 424 mov r12,rax 425 imul eax,ecx 426 vpaddq ymm4,ymm4,ymm11 427 vpmuludq ymm10,ymm10,ymm13 428 DB 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 429 and eax,0x1fffffff 430 vpaddq ymm5,ymm5,ymm10 431 vpmuludq ymm14,ymm14,ymm13 432 vmovdqu ymm10,YMMWORD[((256-8-128))+r13] 433 vpaddq ymm6,ymm6,ymm14 434 vpmuludq ymm11,ymm11,ymm13 435 vmovdqu ymm9,YMMWORD[((288-8-128))+r13] 436 vmovd xmm0,eax 437 imul rax,QWORD[((-128))+r13] 438 vpaddq ymm7,ymm7,ymm11 439 vpmuludq ymm10,ymm10,ymm13 440 vmovdqu ymm14,YMMWORD[((32-16-128))+r13] 441 vpbroadcastq ymm0,xmm0 442 vpaddq ymm8,ymm8,ymm10 443 vpmuludq ymm9,ymm9,ymm13 444 vmovdqu ymm11,YMMWORD[((64-16-128))+r13] 445 add r12,rax 446 447 vmovdqu ymm13,YMMWORD[((32-24-128))+r13] 448 vpmuludq ymm14,ymm14,ymm12 449 vmovdqu ymm10,YMMWORD[((96-16-128))+r13] 450 vpaddq ymm1,ymm1,ymm14 451 vpmuludq ymm13,ymm13,ymm0 452 vpmuludq ymm11,ymm11,ymm12 453 DB 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff 454 vpaddq ymm13,ymm13,ymm1 455 vpaddq ymm2,ymm2,ymm11 456 vpmuludq ymm10,ymm10,ymm12 457 vmovdqu ymm11,YMMWORD[((160-16-128))+r13] 458 DB 0x67 459 vmovq rax,xmm13 460 vmovdqu YMMWORD[rsp],ymm13 461 vpaddq ymm3,ymm3,ymm10 462 vpmuludq ymm14,ymm14,ymm12 463 vmovdqu ymm10,YMMWORD[((192-16-128))+r13] 464 vpaddq ymm4,ymm4,ymm14 465 vpmuludq ymm11,ymm11,ymm12 466 vmovdqu ymm14,YMMWORD[((224-16-128))+r13] 467 vpaddq ymm5,ymm5,ymm11 468 vpmuludq ymm10,ymm10,ymm12 469 vmovdqu ymm11,YMMWORD[((256-16-128))+r13] 470 vpaddq ymm6,ymm6,ymm10 471 vpmuludq ymm14,ymm14,ymm12 472 shr r12,29 473 vmovdqu ymm10,YMMWORD[((288-16-128))+r13] 474 add rax,r12 475 vpaddq ymm7,ymm7,ymm14 476 vpmuludq ymm11,ymm11,ymm12 477 478 mov r9,rax 479 imul eax,ecx 480 vpaddq ymm8,ymm8,ymm11 481 vpmuludq ymm10,ymm10,ymm12 482 and eax,0x1fffffff 483 vmovd xmm12,eax 484 vmovdqu ymm11,YMMWORD[((96-24-128))+r13] 485 DB 0x67 486 vpaddq ymm9,ymm9,ymm10 487 vpbroadcastq ymm12,xmm12 488 489 vpmuludq ymm14,ymm0,YMMWORD[((64-24-128))+r13] 490 vmovdqu ymm10,YMMWORD[((128-24-128))+r13] 491 mov rdx,rax 492 imul rax,QWORD[((-128))+r13] 493 mov r10,QWORD[8+rsp] 494 vpaddq ymm1,ymm2,ymm14 495 vpmuludq ymm11,ymm11,ymm0 496 vmovdqu ymm14,YMMWORD[((160-24-128))+r13] 497 add r9,rax 498 mov rax,rdx 499 imul rax,QWORD[((8-128))+r13] 500 DB 0x67 501 shr r9,29 502 mov r11,QWORD[16+rsp] 503 vpaddq ymm2,ymm3,ymm11 504 vpmuludq ymm10,ymm10,ymm0 505 vmovdqu ymm11,YMMWORD[((192-24-128))+r13] 506 add r10,rax 507 mov rax,rdx 508 imul rax,QWORD[((16-128))+r13] 509 vpaddq ymm3,ymm4,ymm10 510 vpmuludq ymm14,ymm14,ymm0 511 vmovdqu ymm10,YMMWORD[((224-24-128))+r13] 512 imul rdx,QWORD[((24-128))+r13] 513 add r11,rax 514 lea rax,[r10*1+r9] 515 vpaddq ymm4,ymm5,ymm14 516 vpmuludq ymm11,ymm11,ymm0 517 vmovdqu ymm14,YMMWORD[((256-24-128))+r13] 518 mov r10,rax 519 imul eax,ecx 520 vpmuludq ymm10,ymm10,ymm0 521 vpaddq ymm5,ymm6,ymm11 522 vmovdqu ymm11,YMMWORD[((288-24-128))+r13] 523 and eax,0x1fffffff 524 vpaddq ymm6,ymm7,ymm10 525 vpmuludq ymm14,ymm14,ymm0 526 add rdx,QWORD[24+rsp] 527 vpaddq ymm7,ymm8,ymm14 528 vpmuludq ymm11,ymm11,ymm0 529 vpaddq ymm8,ymm9,ymm11 530 vmovq xmm9,r12 531 mov r12,rdx 532 533 dec r14d 534 jnz NEAR $L$OOP_REDUCE_1024 535 lea r12,[448+rsp] 536 vpaddq ymm0,ymm13,ymm9 537 vpxor ymm9,ymm9,ymm9 538 539 vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] 540 vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] 541 vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] 542 vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] 543 vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] 544 vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] 545 vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] 546 vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] 547 vpaddq ymm8,ymm8,YMMWORD[((544-448))+r12] 548 549 vpsrlq ymm14,ymm0,29 550 vpand ymm0,ymm0,ymm15 551 vpsrlq ymm11,ymm1,29 552 vpand ymm1,ymm1,ymm15 553 vpsrlq ymm12,ymm2,29 554 vpermq ymm14,ymm14,0x93 555 vpand ymm2,ymm2,ymm15 556 vpsrlq ymm13,ymm3,29 557 vpermq ymm11,ymm11,0x93 558 vpand ymm3,ymm3,ymm15 559 vpermq ymm12,ymm12,0x93 560 561 vpblendd ymm10,ymm14,ymm9,3 562 vpermq ymm13,ymm13,0x93 563 vpblendd ymm14,ymm11,ymm14,3 564 vpaddq ymm0,ymm0,ymm10 565 vpblendd ymm11,ymm12,ymm11,3 566 vpaddq ymm1,ymm1,ymm14 567 vpblendd ymm12,ymm13,ymm12,3 568 vpaddq ymm2,ymm2,ymm11 569 vpblendd ymm13,ymm9,ymm13,3 570 vpaddq ymm3,ymm3,ymm12 571 vpaddq ymm4,ymm4,ymm13 572 573 vpsrlq ymm14,ymm0,29 574 vpand ymm0,ymm0,ymm15 575 vpsrlq ymm11,ymm1,29 576 vpand ymm1,ymm1,ymm15 577 vpsrlq ymm12,ymm2,29 578 vpermq ymm14,ymm14,0x93 579 vpand ymm2,ymm2,ymm15 580 vpsrlq ymm13,ymm3,29 581 vpermq ymm11,ymm11,0x93 582 vpand ymm3,ymm3,ymm15 583 vpermq ymm12,ymm12,0x93 584 585 vpblendd ymm10,ymm14,ymm9,3 586 vpermq ymm13,ymm13,0x93 587 vpblendd ymm14,ymm11,ymm14,3 588 vpaddq ymm0,ymm0,ymm10 589 vpblendd ymm11,ymm12,ymm11,3 590 vpaddq ymm1,ymm1,ymm14 591 vmovdqu YMMWORD[(0-128)+rdi],ymm0 592 vpblendd ymm12,ymm13,ymm12,3 593 vpaddq ymm2,ymm2,ymm11 594 vmovdqu YMMWORD[(32-128)+rdi],ymm1 595 vpblendd ymm13,ymm9,ymm13,3 596 vpaddq ymm3,ymm3,ymm12 597 vmovdqu YMMWORD[(64-128)+rdi],ymm2 598 vpaddq ymm4,ymm4,ymm13 599 vmovdqu YMMWORD[(96-128)+rdi],ymm3 600 vpsrlq ymm14,ymm4,29 601 vpand ymm4,ymm4,ymm15 602 vpsrlq ymm11,ymm5,29 603 vpand ymm5,ymm5,ymm15 604 vpsrlq ymm12,ymm6,29 605 vpermq ymm14,ymm14,0x93 606 vpand ymm6,ymm6,ymm15 607 vpsrlq ymm13,ymm7,29 608 vpermq ymm11,ymm11,0x93 609 vpand ymm7,ymm7,ymm15 610 vpsrlq ymm0,ymm8,29 611 vpermq ymm12,ymm12,0x93 612 vpand ymm8,ymm8,ymm15 613 vpermq ymm13,ymm13,0x93 614 615 vpblendd ymm10,ymm14,ymm9,3 616 vpermq ymm0,ymm0,0x93 617 vpblendd ymm14,ymm11,ymm14,3 618 vpaddq ymm4,ymm4,ymm10 619 vpblendd ymm11,ymm12,ymm11,3 620 vpaddq ymm5,ymm5,ymm14 621 vpblendd ymm12,ymm13,ymm12,3 622 vpaddq ymm6,ymm6,ymm11 623 vpblendd ymm13,ymm0,ymm13,3 624 vpaddq ymm7,ymm7,ymm12 625 vpaddq ymm8,ymm8,ymm13 626 627 vpsrlq ymm14,ymm4,29 628 vpand ymm4,ymm4,ymm15 629 vpsrlq ymm11,ymm5,29 630 vpand ymm5,ymm5,ymm15 631 vpsrlq ymm12,ymm6,29 632 vpermq ymm14,ymm14,0x93 633 vpand ymm6,ymm6,ymm15 634 vpsrlq ymm13,ymm7,29 635 vpermq ymm11,ymm11,0x93 636 vpand ymm7,ymm7,ymm15 637 vpsrlq ymm0,ymm8,29 638 vpermq ymm12,ymm12,0x93 639 vpand ymm8,ymm8,ymm15 640 vpermq ymm13,ymm13,0x93 641 642 vpblendd ymm10,ymm14,ymm9,3 643 vpermq ymm0,ymm0,0x93 644 vpblendd ymm14,ymm11,ymm14,3 645 vpaddq ymm4,ymm4,ymm10 646 vpblendd ymm11,ymm12,ymm11,3 647 vpaddq ymm5,ymm5,ymm14 648 vmovdqu YMMWORD[(128-128)+rdi],ymm4 649 vpblendd ymm12,ymm13,ymm12,3 650 vpaddq ymm6,ymm6,ymm11 651 vmovdqu YMMWORD[(160-128)+rdi],ymm5 652 vpblendd ymm13,ymm0,ymm13,3 653 vpaddq ymm7,ymm7,ymm12 654 vmovdqu YMMWORD[(192-128)+rdi],ymm6 655 vpaddq ymm8,ymm8,ymm13 656 vmovdqu YMMWORD[(224-128)+rdi],ymm7 657 vmovdqu YMMWORD[(256-128)+rdi],ymm8 658 659 mov rsi,rdi 660 dec r8d 661 jne NEAR $L$OOP_GRANDE_SQR_1024 662 663 vzeroall 664 mov rax,rbp 665 666 $L$sqr_1024_in_tail: 667 movaps xmm6,XMMWORD[((-216))+rax] 668 movaps xmm7,XMMWORD[((-200))+rax] 669 movaps xmm8,XMMWORD[((-184))+rax] 670 movaps xmm9,XMMWORD[((-168))+rax] 671 movaps xmm10,XMMWORD[((-152))+rax] 672 movaps xmm11,XMMWORD[((-136))+rax] 673 movaps xmm12,XMMWORD[((-120))+rax] 674 movaps xmm13,XMMWORD[((-104))+rax] 675 movaps xmm14,XMMWORD[((-88))+rax] 676 movaps xmm15,XMMWORD[((-72))+rax] 677 mov r15,QWORD[((-48))+rax] 678 679 mov r14,QWORD[((-40))+rax] 680 681 mov r13,QWORD[((-32))+rax] 682 683 mov r12,QWORD[((-24))+rax] 684 685 mov rbp,QWORD[((-16))+rax] 686 687 mov rbx,QWORD[((-8))+rax] 688 689 lea rsp,[rax] 690 691 $L$sqr_1024_epilogue: 692 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 693 mov rsi,QWORD[16+rsp] 694 DB 0F3h,0C3h ;repret 695 696 $L$SEH_end_rsaz_1024_sqr_avx2: 697 global rsaz_1024_mul_avx2 698 699 ALIGN 64 700 rsaz_1024_mul_avx2: 701 mov QWORD[8+rsp],rdi ;WIN64 prologue 702 mov QWORD[16+rsp],rsi 703 mov rax,rsp 704 $L$SEH_begin_rsaz_1024_mul_avx2: 705 mov rdi,rcx 706 mov rsi,rdx 707 mov rdx,r8 708 mov rcx,r9 709 mov r8,QWORD[40+rsp] 710 711 712 713 lea rax,[rsp] 714 715 push rbx 716 717 push rbp 718 719 push r12 720 721 push r13 722 723 push r14 724 725 push r15 726 727 vzeroupper 728 lea rsp,[((-168))+rsp] 729 vmovaps XMMWORD[(-216)+rax],xmm6 730 vmovaps XMMWORD[(-200)+rax],xmm7 731 vmovaps XMMWORD[(-184)+rax],xmm8 732 vmovaps XMMWORD[(-168)+rax],xmm9 733 vmovaps XMMWORD[(-152)+rax],xmm10 734 vmovaps XMMWORD[(-136)+rax],xmm11 735 vmovaps XMMWORD[(-120)+rax],xmm12 736 vmovaps XMMWORD[(-104)+rax],xmm13 737 vmovaps XMMWORD[(-88)+rax],xmm14 738 vmovaps XMMWORD[(-72)+rax],xmm15 739 $L$mul_1024_body: 740 mov rbp,rax 741 742 vzeroall 743 mov r13,rdx 744 sub rsp,64 745 746 747 748 749 750 751 DB 0x67,0x67 752 mov r15,rsi 753 and r15,4095 754 add r15,32*10 755 shr r15,12 756 mov r15,rsi 757 cmovnz rsi,r13 758 cmovnz r13,r15 759 760 mov r15,rcx 761 sub rsi,-128 762 sub rcx,-128 763 sub rdi,-128 764 765 and r15,4095 766 add r15,32*10 767 DB 0x67,0x67 768 shr r15,12 769 jz NEAR $L$mul_1024_no_n_copy 770 771 772 773 774 775 sub rsp,32*10 776 vmovdqu ymm0,YMMWORD[((0-128))+rcx] 777 and rsp,-512 778 vmovdqu ymm1,YMMWORD[((32-128))+rcx] 779 vmovdqu ymm2,YMMWORD[((64-128))+rcx] 780 vmovdqu ymm3,YMMWORD[((96-128))+rcx] 781 vmovdqu ymm4,YMMWORD[((128-128))+rcx] 782 vmovdqu ymm5,YMMWORD[((160-128))+rcx] 783 vmovdqu ymm6,YMMWORD[((192-128))+rcx] 784 vmovdqu ymm7,YMMWORD[((224-128))+rcx] 785 vmovdqu ymm8,YMMWORD[((256-128))+rcx] 786 lea rcx,[((64+128))+rsp] 787 vmovdqu YMMWORD[(0-128)+rcx],ymm0 788 vpxor ymm0,ymm0,ymm0 789 vmovdqu YMMWORD[(32-128)+rcx],ymm1 790 vpxor ymm1,ymm1,ymm1 791 vmovdqu YMMWORD[(64-128)+rcx],ymm2 792 vpxor ymm2,ymm2,ymm2 793 vmovdqu YMMWORD[(96-128)+rcx],ymm3 794 vpxor ymm3,ymm3,ymm3 795 vmovdqu YMMWORD[(128-128)+rcx],ymm4 796 vpxor ymm4,ymm4,ymm4 797 vmovdqu YMMWORD[(160-128)+rcx],ymm5 798 vpxor ymm5,ymm5,ymm5 799 vmovdqu YMMWORD[(192-128)+rcx],ymm6 800 vpxor ymm6,ymm6,ymm6 801 vmovdqu YMMWORD[(224-128)+rcx],ymm7 802 vpxor ymm7,ymm7,ymm7 803 vmovdqu YMMWORD[(256-128)+rcx],ymm8 804 vmovdqa ymm8,ymm0 805 vmovdqu YMMWORD[(288-128)+rcx],ymm9 806 $L$mul_1024_no_n_copy: 807 and rsp,-64 808 809 mov rbx,QWORD[r13] 810 vpbroadcastq ymm10,QWORD[r13] 811 vmovdqu YMMWORD[rsp],ymm0 812 xor r9,r9 813 DB 0x67 814 xor r10,r10 815 xor r11,r11 816 xor r12,r12 817 818 vmovdqu ymm15,YMMWORD[$L$and_mask] 819 mov r14d,9 820 vmovdqu YMMWORD[(288-128)+rdi],ymm9 821 jmp NEAR $L$oop_mul_1024 822 823 ALIGN 32 824 $L$oop_mul_1024: 825 vpsrlq ymm9,ymm3,29 826 mov rax,rbx 827 imul rax,QWORD[((-128))+rsi] 828 add rax,r9 829 mov r10,rbx 830 imul r10,QWORD[((8-128))+rsi] 831 add r10,QWORD[8+rsp] 832 833 mov r9,rax 834 imul eax,r8d 835 and eax,0x1fffffff 836 837 mov r11,rbx 838 imul r11,QWORD[((16-128))+rsi] 839 add r11,QWORD[16+rsp] 840 841 mov r12,rbx 842 imul r12,QWORD[((24-128))+rsi] 843 add r12,QWORD[24+rsp] 844 vpmuludq ymm0,ymm10,YMMWORD[((32-128))+rsi] 845 vmovd xmm11,eax 846 vpaddq ymm1,ymm1,ymm0 847 vpmuludq ymm12,ymm10,YMMWORD[((64-128))+rsi] 848 vpbroadcastq ymm11,xmm11 849 vpaddq ymm2,ymm2,ymm12 850 vpmuludq ymm13,ymm10,YMMWORD[((96-128))+rsi] 851 vpand ymm3,ymm3,ymm15 852 vpaddq ymm3,ymm3,ymm13 853 vpmuludq ymm0,ymm10,YMMWORD[((128-128))+rsi] 854 vpaddq ymm4,ymm4,ymm0 855 vpmuludq ymm12,ymm10,YMMWORD[((160-128))+rsi] 856 vpaddq ymm5,ymm5,ymm12 857 vpmuludq ymm13,ymm10,YMMWORD[((192-128))+rsi] 858 vpaddq ymm6,ymm6,ymm13 859 vpmuludq ymm0,ymm10,YMMWORD[((224-128))+rsi] 860 vpermq ymm9,ymm9,0x93 861 vpaddq ymm7,ymm7,ymm0 862 vpmuludq ymm12,ymm10,YMMWORD[((256-128))+rsi] 863 vpbroadcastq ymm10,QWORD[8+r13] 864 vpaddq ymm8,ymm8,ymm12 865 866 mov rdx,rax 867 imul rax,QWORD[((-128))+rcx] 868 add r9,rax 869 mov rax,rdx 870 imul rax,QWORD[((8-128))+rcx] 871 add r10,rax 872 mov rax,rdx 873 imul rax,QWORD[((16-128))+rcx] 874 add r11,rax 875 shr r9,29 876 imul rdx,QWORD[((24-128))+rcx] 877 add r12,rdx 878 add r10,r9 879 880 vpmuludq ymm13,ymm11,YMMWORD[((32-128))+rcx] 881 vmovq rbx,xmm10 882 vpaddq ymm1,ymm1,ymm13 883 vpmuludq ymm0,ymm11,YMMWORD[((64-128))+rcx] 884 vpaddq ymm2,ymm2,ymm0 885 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rcx] 886 vpaddq ymm3,ymm3,ymm12 887 vpmuludq ymm13,ymm11,YMMWORD[((128-128))+rcx] 888 vpaddq ymm4,ymm4,ymm13 889 vpmuludq ymm0,ymm11,YMMWORD[((160-128))+rcx] 890 vpaddq ymm5,ymm5,ymm0 891 vpmuludq ymm12,ymm11,YMMWORD[((192-128))+rcx] 892 vpaddq ymm6,ymm6,ymm12 893 vpmuludq ymm13,ymm11,YMMWORD[((224-128))+rcx] 894 vpblendd ymm12,ymm9,ymm14,3 895 vpaddq ymm7,ymm7,ymm13 896 vpmuludq ymm0,ymm11,YMMWORD[((256-128))+rcx] 897 vpaddq ymm3,ymm3,ymm12 898 vpaddq ymm8,ymm8,ymm0 899 900 mov rax,rbx 901 imul rax,QWORD[((-128))+rsi] 902 add r10,rax 903 vmovdqu ymm12,YMMWORD[((-8+32-128))+rsi] 904 mov rax,rbx 905 imul rax,QWORD[((8-128))+rsi] 906 add r11,rax 907 vmovdqu ymm13,YMMWORD[((-8+64-128))+rsi] 908 909 mov rax,r10 910 vpblendd ymm9,ymm9,ymm14,0xfc 911 imul eax,r8d 912 vpaddq ymm4,ymm4,ymm9 913 and eax,0x1fffffff 914 915 imul rbx,QWORD[((16-128))+rsi] 916 add r12,rbx 917 vpmuludq ymm12,ymm12,ymm10 918 vmovd xmm11,eax 919 vmovdqu ymm0,YMMWORD[((-8+96-128))+rsi] 920 vpaddq ymm1,ymm1,ymm12 921 vpmuludq ymm13,ymm13,ymm10 922 vpbroadcastq ymm11,xmm11 923 vmovdqu ymm12,YMMWORD[((-8+128-128))+rsi] 924 vpaddq ymm2,ymm2,ymm13 925 vpmuludq ymm0,ymm0,ymm10 926 vmovdqu ymm13,YMMWORD[((-8+160-128))+rsi] 927 vpaddq ymm3,ymm3,ymm0 928 vpmuludq ymm12,ymm12,ymm10 929 vmovdqu ymm0,YMMWORD[((-8+192-128))+rsi] 930 vpaddq ymm4,ymm4,ymm12 931 vpmuludq ymm13,ymm13,ymm10 932 vmovdqu ymm12,YMMWORD[((-8+224-128))+rsi] 933 vpaddq ymm5,ymm5,ymm13 934 vpmuludq ymm0,ymm0,ymm10 935 vmovdqu ymm13,YMMWORD[((-8+256-128))+rsi] 936 vpaddq ymm6,ymm6,ymm0 937 vpmuludq ymm12,ymm12,ymm10 938 vmovdqu ymm9,YMMWORD[((-8+288-128))+rsi] 939 vpaddq ymm7,ymm7,ymm12 940 vpmuludq ymm13,ymm13,ymm10 941 vpaddq ymm8,ymm8,ymm13 942 vpmuludq ymm9,ymm9,ymm10 943 vpbroadcastq ymm10,QWORD[16+r13] 944 945 mov rdx,rax 946 imul rax,QWORD[((-128))+rcx] 947 add r10,rax 948 vmovdqu ymm0,YMMWORD[((-8+32-128))+rcx] 949 mov rax,rdx 950 imul rax,QWORD[((8-128))+rcx] 951 add r11,rax 952 vmovdqu ymm12,YMMWORD[((-8+64-128))+rcx] 953 shr r10,29 954 imul rdx,QWORD[((16-128))+rcx] 955 add r12,rdx 956 add r11,r10 957 958 vpmuludq ymm0,ymm0,ymm11 959 vmovq rbx,xmm10 960 vmovdqu ymm13,YMMWORD[((-8+96-128))+rcx] 961 vpaddq ymm1,ymm1,ymm0 962 vpmuludq ymm12,ymm12,ymm11 963 vmovdqu ymm0,YMMWORD[((-8+128-128))+rcx] 964 vpaddq ymm2,ymm2,ymm12 965 vpmuludq ymm13,ymm13,ymm11 966 vmovdqu ymm12,YMMWORD[((-8+160-128))+rcx] 967 vpaddq ymm3,ymm3,ymm13 968 vpmuludq ymm0,ymm0,ymm11 969 vmovdqu ymm13,YMMWORD[((-8+192-128))+rcx] 970 vpaddq ymm4,ymm4,ymm0 971 vpmuludq ymm12,ymm12,ymm11 972 vmovdqu ymm0,YMMWORD[((-8+224-128))+rcx] 973 vpaddq ymm5,ymm5,ymm12 974 vpmuludq ymm13,ymm13,ymm11 975 vmovdqu ymm12,YMMWORD[((-8+256-128))+rcx] 976 vpaddq ymm6,ymm6,ymm13 977 vpmuludq ymm0,ymm0,ymm11 978 vmovdqu ymm13,YMMWORD[((-8+288-128))+rcx] 979 vpaddq ymm7,ymm7,ymm0 980 vpmuludq ymm12,ymm12,ymm11 981 vpaddq ymm8,ymm8,ymm12 982 vpmuludq ymm13,ymm13,ymm11 983 vpaddq ymm9,ymm9,ymm13 984 985 vmovdqu ymm0,YMMWORD[((-16+32-128))+rsi] 986 mov rax,rbx 987 imul rax,QWORD[((-128))+rsi] 988 add rax,r11 989 990 vmovdqu ymm12,YMMWORD[((-16+64-128))+rsi] 991 mov r11,rax 992 imul eax,r8d 993 and eax,0x1fffffff 994 995 imul rbx,QWORD[((8-128))+rsi] 996 add r12,rbx 997 vpmuludq ymm0,ymm0,ymm10 998 vmovd xmm11,eax 999 vmovdqu ymm13,YMMWORD[((-16+96-128))+rsi] 1000 vpaddq ymm1,ymm1,ymm0 1001 vpmuludq ymm12,ymm12,ymm10 1002 vpbroadcastq ymm11,xmm11 1003 vmovdqu ymm0,YMMWORD[((-16+128-128))+rsi] 1004 vpaddq ymm2,ymm2,ymm12 1005 vpmuludq ymm13,ymm13,ymm10 1006 vmovdqu ymm12,YMMWORD[((-16+160-128))+rsi] 1007 vpaddq ymm3,ymm3,ymm13 1008 vpmuludq ymm0,ymm0,ymm10 1009 vmovdqu ymm13,YMMWORD[((-16+192-128))+rsi] 1010 vpaddq ymm4,ymm4,ymm0 1011 vpmuludq ymm12,ymm12,ymm10 1012 vmovdqu ymm0,YMMWORD[((-16+224-128))+rsi] 1013 vpaddq ymm5,ymm5,ymm12 1014 vpmuludq ymm13,ymm13,ymm10 1015 vmovdqu ymm12,YMMWORD[((-16+256-128))+rsi] 1016 vpaddq ymm6,ymm6,ymm13 1017 vpmuludq ymm0,ymm0,ymm10 1018 vmovdqu ymm13,YMMWORD[((-16+288-128))+rsi] 1019 vpaddq ymm7,ymm7,ymm0 1020 vpmuludq ymm12,ymm12,ymm10 1021 vpaddq ymm8,ymm8,ymm12 1022 vpmuludq ymm13,ymm13,ymm10 1023 vpbroadcastq ymm10,QWORD[24+r13] 1024 vpaddq ymm9,ymm9,ymm13 1025 1026 vmovdqu ymm0,YMMWORD[((-16+32-128))+rcx] 1027 mov rdx,rax 1028 imul rax,QWORD[((-128))+rcx] 1029 add r11,rax 1030 vmovdqu ymm12,YMMWORD[((-16+64-128))+rcx] 1031 imul rdx,QWORD[((8-128))+rcx] 1032 add r12,rdx 1033 shr r11,29 1034 1035 vpmuludq ymm0,ymm0,ymm11 1036 vmovq rbx,xmm10 1037 vmovdqu ymm13,YMMWORD[((-16+96-128))+rcx] 1038 vpaddq ymm1,ymm1,ymm0 1039 vpmuludq ymm12,ymm12,ymm11 1040 vmovdqu ymm0,YMMWORD[((-16+128-128))+rcx] 1041 vpaddq ymm2,ymm2,ymm12 1042 vpmuludq ymm13,ymm13,ymm11 1043 vmovdqu ymm12,YMMWORD[((-16+160-128))+rcx] 1044 vpaddq ymm3,ymm3,ymm13 1045 vpmuludq ymm0,ymm0,ymm11 1046 vmovdqu ymm13,YMMWORD[((-16+192-128))+rcx] 1047 vpaddq ymm4,ymm4,ymm0 1048 vpmuludq ymm12,ymm12,ymm11 1049 vmovdqu ymm0,YMMWORD[((-16+224-128))+rcx] 1050 vpaddq ymm5,ymm5,ymm12 1051 vpmuludq ymm13,ymm13,ymm11 1052 vmovdqu ymm12,YMMWORD[((-16+256-128))+rcx] 1053 vpaddq ymm6,ymm6,ymm13 1054 vpmuludq ymm0,ymm0,ymm11 1055 vmovdqu ymm13,YMMWORD[((-16+288-128))+rcx] 1056 vpaddq ymm7,ymm7,ymm0 1057 vpmuludq ymm12,ymm12,ymm11 1058 vmovdqu ymm0,YMMWORD[((-24+32-128))+rsi] 1059 vpaddq ymm8,ymm8,ymm12 1060 vpmuludq ymm13,ymm13,ymm11 1061 vmovdqu ymm12,YMMWORD[((-24+64-128))+rsi] 1062 vpaddq ymm9,ymm9,ymm13 1063 1064 add r12,r11 1065 imul rbx,QWORD[((-128))+rsi] 1066 add r12,rbx 1067 1068 mov rax,r12 1069 imul eax,r8d 1070 and eax,0x1fffffff 1071 1072 vpmuludq ymm0,ymm0,ymm10 1073 vmovd xmm11,eax 1074 vmovdqu ymm13,YMMWORD[((-24+96-128))+rsi] 1075 vpaddq ymm1,ymm1,ymm0 1076 vpmuludq ymm12,ymm12,ymm10 1077 vpbroadcastq ymm11,xmm11 1078 vmovdqu ymm0,YMMWORD[((-24+128-128))+rsi] 1079 vpaddq ymm2,ymm2,ymm12 1080 vpmuludq ymm13,ymm13,ymm10 1081 vmovdqu ymm12,YMMWORD[((-24+160-128))+rsi] 1082 vpaddq ymm3,ymm3,ymm13 1083 vpmuludq ymm0,ymm0,ymm10 1084 vmovdqu ymm13,YMMWORD[((-24+192-128))+rsi] 1085 vpaddq ymm4,ymm4,ymm0 1086 vpmuludq ymm12,ymm12,ymm10 1087 vmovdqu ymm0,YMMWORD[((-24+224-128))+rsi] 1088 vpaddq ymm5,ymm5,ymm12 1089 vpmuludq ymm13,ymm13,ymm10 1090 vmovdqu ymm12,YMMWORD[((-24+256-128))+rsi] 1091 vpaddq ymm6,ymm6,ymm13 1092 vpmuludq ymm0,ymm0,ymm10 1093 vmovdqu ymm13,YMMWORD[((-24+288-128))+rsi] 1094 vpaddq ymm7,ymm7,ymm0 1095 vpmuludq ymm12,ymm12,ymm10 1096 vpaddq ymm8,ymm8,ymm12 1097 vpmuludq ymm13,ymm13,ymm10 1098 vpbroadcastq ymm10,QWORD[32+r13] 1099 vpaddq ymm9,ymm9,ymm13 1100 add r13,32 1101 1102 vmovdqu ymm0,YMMWORD[((-24+32-128))+rcx] 1103 imul rax,QWORD[((-128))+rcx] 1104 add r12,rax 1105 shr r12,29 1106 1107 vmovdqu ymm12,YMMWORD[((-24+64-128))+rcx] 1108 vpmuludq ymm0,ymm0,ymm11 1109 vmovq rbx,xmm10 1110 vmovdqu ymm13,YMMWORD[((-24+96-128))+rcx] 1111 vpaddq ymm0,ymm1,ymm0 1112 vpmuludq ymm12,ymm12,ymm11 1113 vmovdqu YMMWORD[rsp],ymm0 1114 vpaddq ymm1,ymm2,ymm12 1115 vmovdqu ymm0,YMMWORD[((-24+128-128))+rcx] 1116 vpmuludq ymm13,ymm13,ymm11 1117 vmovdqu ymm12,YMMWORD[((-24+160-128))+rcx] 1118 vpaddq ymm2,ymm3,ymm13 1119 vpmuludq ymm0,ymm0,ymm11 1120 vmovdqu ymm13,YMMWORD[((-24+192-128))+rcx] 1121 vpaddq ymm3,ymm4,ymm0 1122 vpmuludq ymm12,ymm12,ymm11 1123 vmovdqu ymm0,YMMWORD[((-24+224-128))+rcx] 1124 vpaddq ymm4,ymm5,ymm12 1125 vpmuludq ymm13,ymm13,ymm11 1126 vmovdqu ymm12,YMMWORD[((-24+256-128))+rcx] 1127 vpaddq ymm5,ymm6,ymm13 1128 vpmuludq ymm0,ymm0,ymm11 1129 vmovdqu ymm13,YMMWORD[((-24+288-128))+rcx] 1130 mov r9,r12 1131 vpaddq ymm6,ymm7,ymm0 1132 vpmuludq ymm12,ymm12,ymm11 1133 add r9,QWORD[rsp] 1134 vpaddq ymm7,ymm8,ymm12 1135 vpmuludq ymm13,ymm13,ymm11 1136 vmovq xmm12,r12 1137 vpaddq ymm8,ymm9,ymm13 1138 1139 dec r14d 1140 jnz NEAR $L$oop_mul_1024 1141 vpaddq ymm0,ymm12,YMMWORD[rsp] 1142 1143 vpsrlq ymm12,ymm0,29 1144 vpand ymm0,ymm0,ymm15 1145 vpsrlq ymm13,ymm1,29 1146 vpand ymm1,ymm1,ymm15 1147 vpsrlq ymm10,ymm2,29 1148 vpermq ymm12,ymm12,0x93 1149 vpand ymm2,ymm2,ymm15 1150 vpsrlq ymm11,ymm3,29 1151 vpermq ymm13,ymm13,0x93 1152 vpand ymm3,ymm3,ymm15 1153 1154 vpblendd ymm9,ymm12,ymm14,3 1155 vpermq ymm10,ymm10,0x93 1156 vpblendd ymm12,ymm13,ymm12,3 1157 vpermq ymm11,ymm11,0x93 1158 vpaddq ymm0,ymm0,ymm9 1159 vpblendd ymm13,ymm10,ymm13,3 1160 vpaddq ymm1,ymm1,ymm12 1161 vpblendd ymm10,ymm11,ymm10,3 1162 vpaddq ymm2,ymm2,ymm13 1163 vpblendd ymm11,ymm14,ymm11,3 1164 vpaddq ymm3,ymm3,ymm10 1165 vpaddq ymm4,ymm4,ymm11 1166 1167 vpsrlq ymm12,ymm0,29 1168 vpand ymm0,ymm0,ymm15 1169 vpsrlq ymm13,ymm1,29 1170 vpand ymm1,ymm1,ymm15 1171 vpsrlq ymm10,ymm2,29 1172 vpermq ymm12,ymm12,0x93 1173 vpand ymm2,ymm2,ymm15 1174 vpsrlq ymm11,ymm3,29 1175 vpermq ymm13,ymm13,0x93 1176 vpand ymm3,ymm3,ymm15 1177 vpermq ymm10,ymm10,0x93 1178 1179 vpblendd ymm9,ymm12,ymm14,3 1180 vpermq ymm11,ymm11,0x93 1181 vpblendd ymm12,ymm13,ymm12,3 1182 vpaddq ymm0,ymm0,ymm9 1183 vpblendd ymm13,ymm10,ymm13,3 1184 vpaddq ymm1,ymm1,ymm12 1185 vpblendd ymm10,ymm11,ymm10,3 1186 vpaddq ymm2,ymm2,ymm13 1187 vpblendd ymm11,ymm14,ymm11,3 1188 vpaddq ymm3,ymm3,ymm10 1189 vpaddq ymm4,ymm4,ymm11 1190 1191 vmovdqu YMMWORD[(0-128)+rdi],ymm0 1192 vmovdqu YMMWORD[(32-128)+rdi],ymm1 1193 vmovdqu YMMWORD[(64-128)+rdi],ymm2 1194 vmovdqu YMMWORD[(96-128)+rdi],ymm3 1195 vpsrlq ymm12,ymm4,29 1196 vpand ymm4,ymm4,ymm15 1197 vpsrlq ymm13,ymm5,29 1198 vpand ymm5,ymm5,ymm15 1199 vpsrlq ymm10,ymm6,29 1200 vpermq ymm12,ymm12,0x93 1201 vpand ymm6,ymm6,ymm15 1202 vpsrlq ymm11,ymm7,29 1203 vpermq ymm13,ymm13,0x93 1204 vpand ymm7,ymm7,ymm15 1205 vpsrlq ymm0,ymm8,29 1206 vpermq ymm10,ymm10,0x93 1207 vpand ymm8,ymm8,ymm15 1208 vpermq ymm11,ymm11,0x93 1209 1210 vpblendd ymm9,ymm12,ymm14,3 1211 vpermq ymm0,ymm0,0x93 1212 vpblendd ymm12,ymm13,ymm12,3 1213 vpaddq ymm4,ymm4,ymm9 1214 vpblendd ymm13,ymm10,ymm13,3 1215 vpaddq ymm5,ymm5,ymm12 1216 vpblendd ymm10,ymm11,ymm10,3 1217 vpaddq ymm6,ymm6,ymm13 1218 vpblendd ymm11,ymm0,ymm11,3 1219 vpaddq ymm7,ymm7,ymm10 1220 vpaddq ymm8,ymm8,ymm11 1221 1222 vpsrlq ymm12,ymm4,29 1223 vpand ymm4,ymm4,ymm15 1224 vpsrlq ymm13,ymm5,29 1225 vpand ymm5,ymm5,ymm15 1226 vpsrlq ymm10,ymm6,29 1227 vpermq ymm12,ymm12,0x93 1228 vpand ymm6,ymm6,ymm15 1229 vpsrlq ymm11,ymm7,29 1230 vpermq ymm13,ymm13,0x93 1231 vpand ymm7,ymm7,ymm15 1232 vpsrlq ymm0,ymm8,29 1233 vpermq ymm10,ymm10,0x93 1234 vpand ymm8,ymm8,ymm15 1235 vpermq ymm11,ymm11,0x93 1236 1237 vpblendd ymm9,ymm12,ymm14,3 1238 vpermq ymm0,ymm0,0x93 1239 vpblendd ymm12,ymm13,ymm12,3 1240 vpaddq ymm4,ymm4,ymm9 1241 vpblendd ymm13,ymm10,ymm13,3 1242 vpaddq ymm5,ymm5,ymm12 1243 vpblendd ymm10,ymm11,ymm10,3 1244 vpaddq ymm6,ymm6,ymm13 1245 vpblendd ymm11,ymm0,ymm11,3 1246 vpaddq ymm7,ymm7,ymm10 1247 vpaddq ymm8,ymm8,ymm11 1248 1249 vmovdqu YMMWORD[(128-128)+rdi],ymm4 1250 vmovdqu YMMWORD[(160-128)+rdi],ymm5 1251 vmovdqu YMMWORD[(192-128)+rdi],ymm6 1252 vmovdqu YMMWORD[(224-128)+rdi],ymm7 1253 vmovdqu YMMWORD[(256-128)+rdi],ymm8 1254 vzeroupper 1255 1256 mov rax,rbp 1257 1258 $L$mul_1024_in_tail: 1259 movaps xmm6,XMMWORD[((-216))+rax] 1260 movaps xmm7,XMMWORD[((-200))+rax] 1261 movaps xmm8,XMMWORD[((-184))+rax] 1262 movaps xmm9,XMMWORD[((-168))+rax] 1263 movaps xmm10,XMMWORD[((-152))+rax] 1264 movaps xmm11,XMMWORD[((-136))+rax] 1265 movaps xmm12,XMMWORD[((-120))+rax] 1266 movaps xmm13,XMMWORD[((-104))+rax] 1267 movaps xmm14,XMMWORD[((-88))+rax] 1268 movaps xmm15,XMMWORD[((-72))+rax] 1269 mov r15,QWORD[((-48))+rax] 1270 1271 mov r14,QWORD[((-40))+rax] 1272 1273 mov r13,QWORD[((-32))+rax] 1274 1275 mov r12,QWORD[((-24))+rax] 1276 1277 mov rbp,QWORD[((-16))+rax] 1278 1279 mov rbx,QWORD[((-8))+rax] 1280 1281 lea rsp,[rax] 1282 1283 $L$mul_1024_epilogue: 1284 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1285 mov rsi,QWORD[16+rsp] 1286 DB 0F3h,0C3h ;repret 1287 1288 $L$SEH_end_rsaz_1024_mul_avx2: 1289 global rsaz_1024_red2norm_avx2 1290 1291 ALIGN 32 1292 rsaz_1024_red2norm_avx2: 1293 1294 sub rdx,-128 1295 xor rax,rax 1296 mov r8,QWORD[((-128))+rdx] 1297 mov r9,QWORD[((-120))+rdx] 1298 mov r10,QWORD[((-112))+rdx] 1299 shl r8,0 1300 shl r9,29 1301 mov r11,r10 1302 shl r10,58 1303 shr r11,6 1304 add rax,r8 1305 add rax,r9 1306 add rax,r10 1307 adc r11,0 1308 mov QWORD[rcx],rax 1309 mov rax,r11 1310 mov r8,QWORD[((-104))+rdx] 1311 mov r9,QWORD[((-96))+rdx] 1312 shl r8,23 1313 mov r10,r9 1314 shl r9,52 1315 shr r10,12 1316 add rax,r8 1317 add rax,r9 1318 adc r10,0 1319 mov QWORD[8+rcx],rax 1320 mov rax,r10 1321 mov r11,QWORD[((-88))+rdx] 1322 mov r8,QWORD[((-80))+rdx] 1323 shl r11,17 1324 mov r9,r8 1325 shl r8,46 1326 shr r9,18 1327 add rax,r11 1328 add rax,r8 1329 adc r9,0 1330 mov QWORD[16+rcx],rax 1331 mov rax,r9 1332 mov r10,QWORD[((-72))+rdx] 1333 mov r11,QWORD[((-64))+rdx] 1334 shl r10,11 1335 mov r8,r11 1336 shl r11,40 1337 shr r8,24 1338 add rax,r10 1339 add rax,r11 1340 adc r8,0 1341 mov QWORD[24+rcx],rax 1342 mov rax,r8 1343 mov r9,QWORD[((-56))+rdx] 1344 mov r10,QWORD[((-48))+rdx] 1345 mov r11,QWORD[((-40))+rdx] 1346 shl r9,5 1347 shl r10,34 1348 mov r8,r11 1349 shl r11,63 1350 shr r8,1 1351 add rax,r9 1352 add rax,r10 1353 add rax,r11 1354 adc r8,0 1355 mov QWORD[32+rcx],rax 1356 mov rax,r8 1357 mov r9,QWORD[((-32))+rdx] 1358 mov r10,QWORD[((-24))+rdx] 1359 shl r9,28 1360 mov r11,r10 1361 shl r10,57 1362 shr r11,7 1363 add rax,r9 1364 add rax,r10 1365 adc r11,0 1366 mov QWORD[40+rcx],rax 1367 mov rax,r11 1368 mov r8,QWORD[((-16))+rdx] 1369 mov r9,QWORD[((-8))+rdx] 1370 shl r8,22 1371 mov r10,r9 1372 shl r9,51 1373 shr r10,13 1374 add rax,r8 1375 add rax,r9 1376 adc r10,0 1377 mov QWORD[48+rcx],rax 1378 mov rax,r10 1379 mov r11,QWORD[rdx] 1380 mov r8,QWORD[8+rdx] 1381 shl r11,16 1382 mov r9,r8 1383 shl r8,45 1384 shr r9,19 1385 add rax,r11 1386 add rax,r8 1387 adc r9,0 1388 mov QWORD[56+rcx],rax 1389 mov rax,r9 1390 mov r10,QWORD[16+rdx] 1391 mov r11,QWORD[24+rdx] 1392 shl r10,10 1393 mov r8,r11 1394 shl r11,39 1395 shr r8,25 1396 add rax,r10 1397 add rax,r11 1398 adc r8,0 1399 mov QWORD[64+rcx],rax 1400 mov rax,r8 1401 mov r9,QWORD[32+rdx] 1402 mov r10,QWORD[40+rdx] 1403 mov r11,QWORD[48+rdx] 1404 shl r9,4 1405 shl r10,33 1406 mov r8,r11 1407 shl r11,62 1408 shr r8,2 1409 add rax,r9 1410 add rax,r10 1411 add rax,r11 1412 adc r8,0 1413 mov QWORD[72+rcx],rax 1414 mov rax,r8 1415 mov r9,QWORD[56+rdx] 1416 mov r10,QWORD[64+rdx] 1417 shl r9,27 1418 mov r11,r10 1419 shl r10,56 1420 shr r11,8 1421 add rax,r9 1422 add rax,r10 1423 adc r11,0 1424 mov QWORD[80+rcx],rax 1425 mov rax,r11 1426 mov r8,QWORD[72+rdx] 1427 mov r9,QWORD[80+rdx] 1428 shl r8,21 1429 mov r10,r9 1430 shl r9,50 1431 shr r10,14 1432 add rax,r8 1433 add rax,r9 1434 adc r10,0 1435 mov QWORD[88+rcx],rax 1436 mov rax,r10 1437 mov r11,QWORD[88+rdx] 1438 mov r8,QWORD[96+rdx] 1439 shl r11,15 1440 mov r9,r8 1441 shl r8,44 1442 shr r9,20 1443 add rax,r11 1444 add rax,r8 1445 adc r9,0 1446 mov QWORD[96+rcx],rax 1447 mov rax,r9 1448 mov r10,QWORD[104+rdx] 1449 mov r11,QWORD[112+rdx] 1450 shl r10,9 1451 mov r8,r11 1452 shl r11,38 1453 shr r8,26 1454 add rax,r10 1455 add rax,r11 1456 adc r8,0 1457 mov QWORD[104+rcx],rax 1458 mov rax,r8 1459 mov r9,QWORD[120+rdx] 1460 mov r10,QWORD[128+rdx] 1461 mov r11,QWORD[136+rdx] 1462 shl r9,3 1463 shl r10,32 1464 mov r8,r11 1465 shl r11,61 1466 shr r8,3 1467 add rax,r9 1468 add rax,r10 1469 add rax,r11 1470 adc r8,0 1471 mov QWORD[112+rcx],rax 1472 mov rax,r8 1473 mov r9,QWORD[144+rdx] 1474 mov r10,QWORD[152+rdx] 1475 shl r9,26 1476 mov r11,r10 1477 shl r10,55 1478 shr r11,9 1479 add rax,r9 1480 add rax,r10 1481 adc r11,0 1482 mov QWORD[120+rcx],rax 1483 mov rax,r11 1484 DB 0F3h,0C3h ;repret 1485 1486 1487 1488 global rsaz_1024_norm2red_avx2 1489 1490 ALIGN 32 1491 rsaz_1024_norm2red_avx2: 1492 1493 sub rcx,-128 1494 mov r8,QWORD[rdx] 1495 mov eax,0x1fffffff 1496 mov r9,QWORD[8+rdx] 1497 mov r11,r8 1498 shr r11,0 1499 and r11,rax 1500 mov QWORD[((-128))+rcx],r11 1501 mov r10,r8 1502 shr r10,29 1503 and r10,rax 1504 mov QWORD[((-120))+rcx],r10 1505 shrd r8,r9,58 1506 and r8,rax 1507 mov QWORD[((-112))+rcx],r8 1508 mov r10,QWORD[16+rdx] 1509 mov r8,r9 1510 shr r8,23 1511 and r8,rax 1512 mov QWORD[((-104))+rcx],r8 1513 shrd r9,r10,52 1514 and r9,rax 1515 mov QWORD[((-96))+rcx],r9 1516 mov r11,QWORD[24+rdx] 1517 mov r9,r10 1518 shr r9,17 1519 and r9,rax 1520 mov QWORD[((-88))+rcx],r9 1521 shrd r10,r11,46 1522 and r10,rax 1523 mov QWORD[((-80))+rcx],r10 1524 mov r8,QWORD[32+rdx] 1525 mov r10,r11 1526 shr r10,11 1527 and r10,rax 1528 mov QWORD[((-72))+rcx],r10 1529 shrd r11,r8,40 1530 and r11,rax 1531 mov QWORD[((-64))+rcx],r11 1532 mov r9,QWORD[40+rdx] 1533 mov r11,r8 1534 shr r11,5 1535 and r11,rax 1536 mov QWORD[((-56))+rcx],r11 1537 mov r10,r8 1538 shr r10,34 1539 and r10,rax 1540 mov QWORD[((-48))+rcx],r10 1541 shrd r8,r9,63 1542 and r8,rax 1543 mov QWORD[((-40))+rcx],r8 1544 mov r10,QWORD[48+rdx] 1545 mov r8,r9 1546 shr r8,28 1547 and r8,rax 1548 mov QWORD[((-32))+rcx],r8 1549 shrd r9,r10,57 1550 and r9,rax 1551 mov QWORD[((-24))+rcx],r9 1552 mov r11,QWORD[56+rdx] 1553 mov r9,r10 1554 shr r9,22 1555 and r9,rax 1556 mov QWORD[((-16))+rcx],r9 1557 shrd r10,r11,51 1558 and r10,rax 1559 mov QWORD[((-8))+rcx],r10 1560 mov r8,QWORD[64+rdx] 1561 mov r10,r11 1562 shr r10,16 1563 and r10,rax 1564 mov QWORD[rcx],r10 1565 shrd r11,r8,45 1566 and r11,rax 1567 mov QWORD[8+rcx],r11 1568 mov r9,QWORD[72+rdx] 1569 mov r11,r8 1570 shr r11,10 1571 and r11,rax 1572 mov QWORD[16+rcx],r11 1573 shrd r8,r9,39 1574 and r8,rax 1575 mov QWORD[24+rcx],r8 1576 mov r10,QWORD[80+rdx] 1577 mov r8,r9 1578 shr r8,4 1579 and r8,rax 1580 mov QWORD[32+rcx],r8 1581 mov r11,r9 1582 shr r11,33 1583 and r11,rax 1584 mov QWORD[40+rcx],r11 1585 shrd r9,r10,62 1586 and r9,rax 1587 mov QWORD[48+rcx],r9 1588 mov r11,QWORD[88+rdx] 1589 mov r9,r10 1590 shr r9,27 1591 and r9,rax 1592 mov QWORD[56+rcx],r9 1593 shrd r10,r11,56 1594 and r10,rax 1595 mov QWORD[64+rcx],r10 1596 mov r8,QWORD[96+rdx] 1597 mov r10,r11 1598 shr r10,21 1599 and r10,rax 1600 mov QWORD[72+rcx],r10 1601 shrd r11,r8,50 1602 and r11,rax 1603 mov QWORD[80+rcx],r11 1604 mov r9,QWORD[104+rdx] 1605 mov r11,r8 1606 shr r11,15 1607 and r11,rax 1608 mov QWORD[88+rcx],r11 1609 shrd r8,r9,44 1610 and r8,rax 1611 mov QWORD[96+rcx],r8 1612 mov r10,QWORD[112+rdx] 1613 mov r8,r9 1614 shr r8,9 1615 and r8,rax 1616 mov QWORD[104+rcx],r8 1617 shrd r9,r10,38 1618 and r9,rax 1619 mov QWORD[112+rcx],r9 1620 mov r11,QWORD[120+rdx] 1621 mov r9,r10 1622 shr r9,3 1623 and r9,rax 1624 mov QWORD[120+rcx],r9 1625 mov r8,r10 1626 shr r8,32 1627 and r8,rax 1628 mov QWORD[128+rcx],r8 1629 shrd r10,r11,61 1630 and r10,rax 1631 mov QWORD[136+rcx],r10 1632 xor r8,r8 1633 mov r10,r11 1634 shr r10,26 1635 and r10,rax 1636 mov QWORD[144+rcx],r10 1637 shrd r11,r8,55 1638 and r11,rax 1639 mov QWORD[152+rcx],r11 1640 mov QWORD[160+rcx],r8 1641 mov QWORD[168+rcx],r8 1642 mov QWORD[176+rcx],r8 1643 mov QWORD[184+rcx],r8 1644 DB 0F3h,0C3h ;repret 1645 1646 1647 global rsaz_1024_scatter5_avx2 1648 1649 ALIGN 32 1650 rsaz_1024_scatter5_avx2: 1651 1652 vzeroupper 1653 vmovdqu ymm5,YMMWORD[$L$scatter_permd] 1654 shl r8d,4 1655 lea rcx,[r8*1+rcx] 1656 mov eax,9 1657 jmp NEAR $L$oop_scatter_1024 1658 1659 ALIGN 32 1660 $L$oop_scatter_1024: 1661 vmovdqu ymm0,YMMWORD[rdx] 1662 lea rdx,[32+rdx] 1663 vpermd ymm0,ymm5,ymm0 1664 vmovdqu XMMWORD[rcx],xmm0 1665 lea rcx,[512+rcx] 1666 dec eax 1667 jnz NEAR $L$oop_scatter_1024 1668 1669 vzeroupper 1670 DB 0F3h,0C3h ;repret 1671 1672 1673 1674 global rsaz_1024_gather5_avx2 1675 1676 ALIGN 32 1677 rsaz_1024_gather5_avx2: 1678 1679 vzeroupper 1680 mov r11,rsp 1681 1682 lea rax,[((-136))+rsp] 1683 $L$SEH_begin_rsaz_1024_gather5: 1684 1685 DB 0x48,0x8d,0x60,0xe0 1686 DB 0xc5,0xf8,0x29,0x70,0xe0 1687 DB 0xc5,0xf8,0x29,0x78,0xf0 1688 DB 0xc5,0x78,0x29,0x40,0x00 1689 DB 0xc5,0x78,0x29,0x48,0x10 1690 DB 0xc5,0x78,0x29,0x50,0x20 1691 DB 0xc5,0x78,0x29,0x58,0x30 1692 DB 0xc5,0x78,0x29,0x60,0x40 1693 DB 0xc5,0x78,0x29,0x68,0x50 1694 DB 0xc5,0x78,0x29,0x70,0x60 1695 DB 0xc5,0x78,0x29,0x78,0x70 1696 lea rsp,[((-256))+rsp] 1697 and rsp,-32 1698 lea r10,[$L$inc] 1699 lea rax,[((-128))+rsp] 1700 1701 vmovd xmm4,r8d 1702 vmovdqa ymm0,YMMWORD[r10] 1703 vmovdqa ymm1,YMMWORD[32+r10] 1704 vmovdqa ymm5,YMMWORD[64+r10] 1705 vpbroadcastd ymm4,xmm4 1706 1707 vpaddd ymm2,ymm0,ymm5 1708 vpcmpeqd ymm0,ymm0,ymm4 1709 vpaddd ymm3,ymm1,ymm5 1710 vpcmpeqd ymm1,ymm1,ymm4 1711 vmovdqa YMMWORD[(0+128)+rax],ymm0 1712 vpaddd ymm0,ymm2,ymm5 1713 vpcmpeqd ymm2,ymm2,ymm4 1714 vmovdqa YMMWORD[(32+128)+rax],ymm1 1715 vpaddd ymm1,ymm3,ymm5 1716 vpcmpeqd ymm3,ymm3,ymm4 1717 vmovdqa YMMWORD[(64+128)+rax],ymm2 1718 vpaddd ymm2,ymm0,ymm5 1719 vpcmpeqd ymm0,ymm0,ymm4 1720 vmovdqa YMMWORD[(96+128)+rax],ymm3 1721 vpaddd ymm3,ymm1,ymm5 1722 vpcmpeqd ymm1,ymm1,ymm4 1723 vmovdqa YMMWORD[(128+128)+rax],ymm0 1724 vpaddd ymm8,ymm2,ymm5 1725 vpcmpeqd ymm2,ymm2,ymm4 1726 vmovdqa YMMWORD[(160+128)+rax],ymm1 1727 vpaddd ymm9,ymm3,ymm5 1728 vpcmpeqd ymm3,ymm3,ymm4 1729 vmovdqa YMMWORD[(192+128)+rax],ymm2 1730 vpaddd ymm10,ymm8,ymm5 1731 vpcmpeqd ymm8,ymm8,ymm4 1732 vmovdqa YMMWORD[(224+128)+rax],ymm3 1733 vpaddd ymm11,ymm9,ymm5 1734 vpcmpeqd ymm9,ymm9,ymm4 1735 vpaddd ymm12,ymm10,ymm5 1736 vpcmpeqd ymm10,ymm10,ymm4 1737 vpaddd ymm13,ymm11,ymm5 1738 vpcmpeqd ymm11,ymm11,ymm4 1739 vpaddd ymm14,ymm12,ymm5 1740 vpcmpeqd ymm12,ymm12,ymm4 1741 vpaddd ymm15,ymm13,ymm5 1742 vpcmpeqd ymm13,ymm13,ymm4 1743 vpcmpeqd ymm14,ymm14,ymm4 1744 vpcmpeqd ymm15,ymm15,ymm4 1745 1746 vmovdqa ymm7,YMMWORD[((-32))+r10] 1747 lea rdx,[128+rdx] 1748 mov r8d,9 1749 1750 $L$oop_gather_1024: 1751 vmovdqa ymm0,YMMWORD[((0-128))+rdx] 1752 vmovdqa ymm1,YMMWORD[((32-128))+rdx] 1753 vmovdqa ymm2,YMMWORD[((64-128))+rdx] 1754 vmovdqa ymm3,YMMWORD[((96-128))+rdx] 1755 vpand ymm0,ymm0,YMMWORD[((0+128))+rax] 1756 vpand ymm1,ymm1,YMMWORD[((32+128))+rax] 1757 vpand ymm2,ymm2,YMMWORD[((64+128))+rax] 1758 vpor ymm4,ymm1,ymm0 1759 vpand ymm3,ymm3,YMMWORD[((96+128))+rax] 1760 vmovdqa ymm0,YMMWORD[((128-128))+rdx] 1761 vmovdqa ymm1,YMMWORD[((160-128))+rdx] 1762 vpor ymm5,ymm3,ymm2 1763 vmovdqa ymm2,YMMWORD[((192-128))+rdx] 1764 vmovdqa ymm3,YMMWORD[((224-128))+rdx] 1765 vpand ymm0,ymm0,YMMWORD[((128+128))+rax] 1766 vpand ymm1,ymm1,YMMWORD[((160+128))+rax] 1767 vpand ymm2,ymm2,YMMWORD[((192+128))+rax] 1768 vpor ymm4,ymm4,ymm0 1769 vpand ymm3,ymm3,YMMWORD[((224+128))+rax] 1770 vpand ymm0,ymm8,YMMWORD[((256-128))+rdx] 1771 vpor ymm5,ymm5,ymm1 1772 vpand ymm1,ymm9,YMMWORD[((288-128))+rdx] 1773 vpor ymm4,ymm4,ymm2 1774 vpand ymm2,ymm10,YMMWORD[((320-128))+rdx] 1775 vpor ymm5,ymm5,ymm3 1776 vpand ymm3,ymm11,YMMWORD[((352-128))+rdx] 1777 vpor ymm4,ymm4,ymm0 1778 vpand ymm0,ymm12,YMMWORD[((384-128))+rdx] 1779 vpor ymm5,ymm5,ymm1 1780 vpand ymm1,ymm13,YMMWORD[((416-128))+rdx] 1781 vpor ymm4,ymm4,ymm2 1782 vpand ymm2,ymm14,YMMWORD[((448-128))+rdx] 1783 vpor ymm5,ymm5,ymm3 1784 vpand ymm3,ymm15,YMMWORD[((480-128))+rdx] 1785 lea rdx,[512+rdx] 1786 vpor ymm4,ymm4,ymm0 1787 vpor ymm5,ymm5,ymm1 1788 vpor ymm4,ymm4,ymm2 1789 vpor ymm5,ymm5,ymm3 1790 1791 vpor ymm4,ymm4,ymm5 1792 vextracti128 xmm5,ymm4,1 1793 vpor xmm5,xmm5,xmm4 1794 vpermd ymm5,ymm7,ymm5 1795 vmovdqu YMMWORD[rcx],ymm5 1796 lea rcx,[32+rcx] 1797 dec r8d 1798 jnz NEAR $L$oop_gather_1024 1799 1800 vpxor ymm0,ymm0,ymm0 1801 vmovdqu YMMWORD[rcx],ymm0 1802 vzeroupper 1803 movaps xmm6,XMMWORD[((-168))+r11] 1804 movaps xmm7,XMMWORD[((-152))+r11] 1805 movaps xmm8,XMMWORD[((-136))+r11] 1806 movaps xmm9,XMMWORD[((-120))+r11] 1807 movaps xmm10,XMMWORD[((-104))+r11] 1808 movaps xmm11,XMMWORD[((-88))+r11] 1809 movaps xmm12,XMMWORD[((-72))+r11] 1810 movaps xmm13,XMMWORD[((-56))+r11] 1811 movaps xmm14,XMMWORD[((-40))+r11] 1812 movaps xmm15,XMMWORD[((-24))+r11] 1813 lea rsp,[r11] 1814 1815 DB 0F3h,0C3h ;repret 1816 1817 $L$SEH_end_rsaz_1024_gather5: 1818 1819 EXTERN OPENSSL_ia32cap_P 8 1820 global rsaz_avx2_eligible 9 1821 1822 ALIGN 32 10 1823 rsaz_avx2_eligible: 11 xor eax,eax 1824 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 1825 mov ecx,524544 1826 mov edx,0 1827 and ecx,eax 1828 cmp ecx,524544 1829 cmove eax,edx 1830 and eax,32 1831 shr eax,5 12 1832 DB 0F3h,0C3h ;repret 13 1833 14 1834 15 global rsaz_1024_sqr_avx2 16 global rsaz_1024_mul_avx2 17 global rsaz_1024_norm2red_avx2 18 global rsaz_1024_red2norm_avx2 19 global rsaz_1024_scatter5_avx2 20 global rsaz_1024_gather5_avx2 21 22 rsaz_1024_sqr_avx2: 23 rsaz_1024_mul_avx2: 24 rsaz_1024_norm2red_avx2: 25 rsaz_1024_red2norm_avx2: 26 rsaz_1024_scatter5_avx2: 27 rsaz_1024_gather5_avx2: 28 DB 0x0f,0x0b 1835 ALIGN 64 1836 $L$and_mask: 1837 DQ 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff 1838 $L$scatter_permd: 1839 DD 0,2,4,6,7,7,7,7 1840 $L$gather_permd: 1841 DD 0,7,1,7,2,7,3,7 1842 $L$inc: 1843 DD 0,0,0,0,1,1,1,1 1844 DD 2,2,2,2,3,3,3,3 1845 DD 4,4,4,4,4,4,4,4 1846 ALIGN 64 1847 EXTERN __imp_RtlVirtualUnwind 1848 1849 ALIGN 16 1850 rsaz_se_handler: 1851 push rsi 1852 push rdi 1853 push rbx 1854 push rbp 1855 push r12 1856 push r13 1857 push r14 1858 push r15 1859 pushfq 1860 sub rsp,64 1861 1862 mov rax,QWORD[120+r8] 1863 mov rbx,QWORD[248+r8] 1864 1865 mov rsi,QWORD[8+r9] 1866 mov r11,QWORD[56+r9] 1867 1868 mov r10d,DWORD[r11] 1869 lea r10,[r10*1+rsi] 1870 cmp rbx,r10 1871 jb NEAR $L$common_seh_tail 1872 1873 mov r10d,DWORD[4+r11] 1874 lea r10,[r10*1+rsi] 1875 cmp rbx,r10 1876 jae NEAR $L$common_seh_tail 1877 1878 mov rbp,QWORD[160+r8] 1879 1880 mov r10d,DWORD[8+r11] 1881 lea r10,[r10*1+rsi] 1882 cmp rbx,r10 1883 cmovc rax,rbp 1884 1885 mov r15,QWORD[((-48))+rax] 1886 mov r14,QWORD[((-40))+rax] 1887 mov r13,QWORD[((-32))+rax] 1888 mov r12,QWORD[((-24))+rax] 1889 mov rbp,QWORD[((-16))+rax] 1890 mov rbx,QWORD[((-8))+rax] 1891 mov QWORD[240+r8],r15 1892 mov QWORD[232+r8],r14 1893 mov QWORD[224+r8],r13 1894 mov QWORD[216+r8],r12 1895 mov QWORD[160+r8],rbp 1896 mov QWORD[144+r8],rbx 1897 1898 lea rsi,[((-216))+rax] 1899 lea rdi,[512+r8] 1900 mov ecx,20 1901 DD 0xa548f3fc 1902 1903 $L$common_seh_tail: 1904 mov rdi,QWORD[8+rax] 1905 mov rsi,QWORD[16+rax] 1906 mov QWORD[152+r8],rax 1907 mov QWORD[168+r8],rsi 1908 mov QWORD[176+r8],rdi 1909 1910 mov rdi,QWORD[40+r9] 1911 mov rsi,r8 1912 mov ecx,154 1913 DD 0xa548f3fc 1914 1915 mov rsi,r9 1916 xor rcx,rcx 1917 mov rdx,QWORD[8+rsi] 1918 mov r8,QWORD[rsi] 1919 mov r9,QWORD[16+rsi] 1920 mov r10,QWORD[40+rsi] 1921 lea r11,[56+rsi] 1922 lea r12,[24+rsi] 1923 mov QWORD[32+rsp],r10 1924 mov QWORD[40+rsp],r11 1925 mov QWORD[48+rsp],r12 1926 mov QWORD[56+rsp],rcx 1927 call QWORD[__imp_RtlVirtualUnwind] 1928 1929 mov eax,1 1930 add rsp,64 1931 popfq 1932 pop r15 1933 pop r14 1934 pop r13 1935 pop r12 1936 pop rbp 1937 pop rbx 1938 pop rdi 1939 pop rsi 29 1940 DB 0F3h,0C3h ;repret 30 1941 1942 1943 section .pdata rdata align=4 1944 ALIGN 4 1945 DD $L$SEH_begin_rsaz_1024_sqr_avx2 wrt ..imagebase 1946 DD $L$SEH_end_rsaz_1024_sqr_avx2 wrt ..imagebase 1947 DD $L$SEH_info_rsaz_1024_sqr_avx2 wrt ..imagebase 1948 1949 DD $L$SEH_begin_rsaz_1024_mul_avx2 wrt ..imagebase 1950 DD $L$SEH_end_rsaz_1024_mul_avx2 wrt ..imagebase 1951 DD $L$SEH_info_rsaz_1024_mul_avx2 wrt ..imagebase 1952 1953 DD $L$SEH_begin_rsaz_1024_gather5 wrt ..imagebase 1954 DD $L$SEH_end_rsaz_1024_gather5 wrt ..imagebase 1955 DD $L$SEH_info_rsaz_1024_gather5 wrt ..imagebase 1956 section .xdata rdata align=8 1957 ALIGN 8 1958 $L$SEH_info_rsaz_1024_sqr_avx2: 1959 DB 9,0,0,0 1960 DD rsaz_se_handler wrt ..imagebase 1961 DD $L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase,$L$sqr_1024_in_tail wrt ..imagebase 1962 DD 0 1963 $L$SEH_info_rsaz_1024_mul_avx2: 1964 DB 9,0,0,0 1965 DD rsaz_se_handler wrt ..imagebase 1966 DD $L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase,$L$mul_1024_in_tail wrt ..imagebase 1967 DD 0 1968 $L$SEH_info_rsaz_1024_gather5: 1969 DB 0x01,0x36,0x17,0x0b 1970 DB 0x36,0xf8,0x09,0x00 1971 DB 0x31,0xe8,0x08,0x00 1972 DB 0x2c,0xd8,0x07,0x00 1973 DB 0x27,0xc8,0x06,0x00 1974 DB 0x22,0xb8,0x05,0x00 1975 DB 0x1d,0xa8,0x04,0x00 1976 DB 0x18,0x98,0x03,0x00 1977 DB 0x13,0x88,0x02,0x00 1978 DB 0x0e,0x78,0x01,0x00 1979 DB 0x09,0x68,0x00,0x00 1980 DB 0x04,0x01,0x15,0x00 1981 DB 0x00,0xb3,0x00,0x00 -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/rsaz-x86_64.S
r97373 r99371 44 44 mov rax,QWORD[8+rsi] 45 45 mov QWORD[128+rsp],rcx 46 mov r11d,0x80100 47 and r11d,DWORD[((OPENSSL_ia32cap_P+8))] 48 cmp r11d,0x80100 49 je NEAR $L$oop_sqrx 46 50 jmp NEAR $L$oop_sqr 47 51 … … 414 418 dec r8d 415 419 jnz NEAR $L$oop_sqr 420 jmp NEAR $L$sqr_tail 421 422 ALIGN 32 423 $L$oop_sqrx: 424 mov DWORD[((128+8))+rsp],r8d 425 DB 102,72,15,110,199 426 427 mulx r9,r8,rax 428 mov rbx,rax 429 430 mulx r10,rcx,QWORD[16+rsi] 431 xor rbp,rbp 432 433 mulx r11,rax,QWORD[24+rsi] 434 adcx r9,rcx 435 436 DB 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00 437 adcx r10,rax 438 439 DB 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00 440 adcx r11,rcx 441 442 mulx r14,rcx,QWORD[48+rsi] 443 adcx r12,rax 444 adcx r13,rcx 445 446 mulx r15,rax,QWORD[56+rsi] 447 adcx r14,rax 448 adcx r15,rbp 449 450 mulx rdi,rax,rdx 451 mov rdx,rbx 452 xor rcx,rcx 453 adox r8,r8 454 adcx r8,rdi 455 adox rcx,rbp 456 adcx rcx,rbp 457 458 mov QWORD[rsp],rax 459 mov QWORD[8+rsp],r8 460 461 462 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00 463 adox r10,rax 464 adcx r11,rbx 465 466 mulx r8,rdi,QWORD[24+rsi] 467 adox r11,rdi 468 DB 0x66 469 adcx r12,r8 470 471 mulx rbx,rax,QWORD[32+rsi] 472 adox r12,rax 473 adcx r13,rbx 474 475 mulx r8,rdi,QWORD[40+rsi] 476 adox r13,rdi 477 adcx r14,r8 478 479 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 480 adox r14,rax 481 adcx r15,rbx 482 483 DB 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00 484 adox r15,rdi 485 adcx r8,rbp 486 mulx rdi,rax,rdx 487 adox r8,rbp 488 DB 0x48,0x8b,0x96,0x10,0x00,0x00,0x00 489 490 xor rbx,rbx 491 adox r9,r9 492 493 adcx rax,rcx 494 adox r10,r10 495 adcx r9,rax 496 adox rbx,rbp 497 adcx r10,rdi 498 adcx rbx,rbp 499 500 mov QWORD[16+rsp],r9 501 DB 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00 502 503 504 mulx r9,rdi,QWORD[24+rsi] 505 adox r12,rdi 506 adcx r13,r9 507 508 mulx rcx,rax,QWORD[32+rsi] 509 adox r13,rax 510 adcx r14,rcx 511 512 DB 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00 513 adox r14,rdi 514 adcx r15,r9 515 516 DB 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00 517 adox r15,rax 518 adcx r8,rcx 519 520 mulx r9,rdi,QWORD[56+rsi] 521 adox r8,rdi 522 adcx r9,rbp 523 mulx rdi,rax,rdx 524 adox r9,rbp 525 mov rdx,QWORD[24+rsi] 526 527 xor rcx,rcx 528 adox r11,r11 529 530 adcx rax,rbx 531 adox r12,r12 532 adcx r11,rax 533 adox rcx,rbp 534 adcx r12,rdi 535 adcx rcx,rbp 536 537 mov QWORD[32+rsp],r11 538 mov QWORD[40+rsp],r12 539 540 541 mulx rbx,rax,QWORD[32+rsi] 542 adox r14,rax 543 adcx r15,rbx 544 545 mulx r10,rdi,QWORD[40+rsi] 546 adox r15,rdi 547 adcx r8,r10 548 549 mulx rbx,rax,QWORD[48+rsi] 550 adox r8,rax 551 adcx r9,rbx 552 553 mulx r10,rdi,QWORD[56+rsi] 554 adox r9,rdi 555 adcx r10,rbp 556 mulx rdi,rax,rdx 557 adox r10,rbp 558 mov rdx,QWORD[32+rsi] 559 560 xor rbx,rbx 561 adox r13,r13 562 563 adcx rax,rcx 564 adox r14,r14 565 adcx r13,rax 566 adox rbx,rbp 567 adcx r14,rdi 568 adcx rbx,rbp 569 570 mov QWORD[48+rsp],r13 571 mov QWORD[56+rsp],r14 572 573 574 mulx r11,rdi,QWORD[40+rsi] 575 adox r8,rdi 576 adcx r9,r11 577 578 mulx rcx,rax,QWORD[48+rsi] 579 adox r9,rax 580 adcx r10,rcx 581 582 mulx r11,rdi,QWORD[56+rsi] 583 adox r10,rdi 584 adcx r11,rbp 585 mulx rdi,rax,rdx 586 mov rdx,QWORD[40+rsi] 587 adox r11,rbp 588 589 xor rcx,rcx 590 adox r15,r15 591 592 adcx rax,rbx 593 adox r8,r8 594 adcx r15,rax 595 adox rcx,rbp 596 adcx r8,rdi 597 adcx rcx,rbp 598 599 mov QWORD[64+rsp],r15 600 mov QWORD[72+rsp],r8 601 602 603 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00 604 adox r10,rax 605 adcx r11,rbx 606 607 DB 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00 608 adox r11,rdi 609 adcx r12,rbp 610 mulx rdi,rax,rdx 611 adox r12,rbp 612 mov rdx,QWORD[48+rsi] 613 614 xor rbx,rbx 615 adox r9,r9 616 617 adcx rax,rcx 618 adox r10,r10 619 adcx r9,rax 620 adcx r10,rdi 621 adox rbx,rbp 622 adcx rbx,rbp 623 624 mov QWORD[80+rsp],r9 625 mov QWORD[88+rsp],r10 626 627 628 DB 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00 629 adox r12,rax 630 adox r13,rbp 631 632 mulx rdi,rax,rdx 633 xor rcx,rcx 634 mov rdx,QWORD[56+rsi] 635 adox r11,r11 636 637 adcx rax,rbx 638 adox r12,r12 639 adcx r11,rax 640 adox rcx,rbp 641 adcx r12,rdi 642 adcx rcx,rbp 643 644 DB 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00 645 DB 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00 646 647 648 mulx rdx,rax,rdx 649 xor rbx,rbx 650 adox r13,r13 651 652 adcx rax,rcx 653 adox rbx,rbp 654 adcx rax,r13 655 adcx rbx,rdx 656 657 DB 102,72,15,126,199 658 DB 102,72,15,126,205 659 660 mov rdx,QWORD[128+rsp] 661 mov r8,QWORD[rsp] 662 mov r9,QWORD[8+rsp] 663 mov r10,QWORD[16+rsp] 664 mov r11,QWORD[24+rsp] 665 mov r12,QWORD[32+rsp] 666 mov r13,QWORD[40+rsp] 667 mov r14,QWORD[48+rsp] 668 mov r15,QWORD[56+rsp] 669 670 mov QWORD[112+rsp],rax 671 mov QWORD[120+rsp],rbx 672 673 call __rsaz_512_reducex 674 675 add r8,QWORD[64+rsp] 676 adc r9,QWORD[72+rsp] 677 adc r10,QWORD[80+rsp] 678 adc r11,QWORD[88+rsp] 679 adc r12,QWORD[96+rsp] 680 adc r13,QWORD[104+rsp] 681 adc r14,QWORD[112+rsp] 682 adc r15,QWORD[120+rsp] 683 sbb rcx,rcx 684 685 call __rsaz_512_subtract 686 687 mov rdx,r8 688 mov rax,r9 689 mov r8d,DWORD[((128+8))+rsp] 690 mov rsi,rdi 691 692 dec r8d 693 jnz NEAR $L$oop_sqrx 694 695 $L$sqr_tail: 416 696 417 697 lea rax,[((128+24+48))+rsp] … … 472 752 DB 102,72,15,110,201 473 753 mov QWORD[128+rsp],r8 754 mov r11d,0x80100 755 and r11d,DWORD[((OPENSSL_ia32cap_P+8))] 756 cmp r11d,0x80100 757 je NEAR $L$mulx 474 758 mov rbx,QWORD[rdx] 475 759 mov rbp,rdx … … 489 773 490 774 call __rsaz_512_reduce 775 jmp NEAR $L$mul_tail 776 777 ALIGN 32 778 $L$mulx: 779 mov rbp,rdx 780 mov rdx,QWORD[rdx] 781 call __rsaz_512_mulx 782 783 DB 102,72,15,126,199 784 DB 102,72,15,126,205 785 786 mov rdx,QWORD[128+rsp] 787 mov r8,QWORD[rsp] 788 mov r9,QWORD[8+rsp] 789 mov r10,QWORD[16+rsp] 790 mov r11,QWORD[24+rsp] 791 mov r12,QWORD[32+rsp] 792 mov r13,QWORD[40+rsp] 793 mov r14,QWORD[48+rsp] 794 mov r15,QWORD[56+rsp] 795 796 call __rsaz_512_reducex 797 $L$mul_tail: 491 798 add r8,QWORD[64+rsp] 492 799 adc r9,QWORD[72+rsp] … … 620 927 pshufd xmm9,xmm8,0x4e 621 928 por xmm8,xmm9 929 mov r11d,0x80100 930 and r11d,DWORD[((OPENSSL_ia32cap_P+8))] 931 cmp r11d,0x80100 932 je NEAR $L$mulx_gather 622 933 DB 102,76,15,126,195 623 934 … … 800 1111 801 1112 call __rsaz_512_reduce 1113 jmp NEAR $L$mul_gather_tail 1114 1115 ALIGN 32 1116 $L$mulx_gather: 1117 DB 102,76,15,126,194 1118 1119 mov QWORD[128+rsp],r8 1120 mov QWORD[((128+8))+rsp],rdi 1121 mov QWORD[((128+16))+rsp],rcx 1122 1123 mulx r8,rbx,QWORD[rsi] 1124 mov QWORD[rsp],rbx 1125 xor edi,edi 1126 1127 mulx r9,rax,QWORD[8+rsi] 1128 1129 mulx r10,rbx,QWORD[16+rsi] 1130 adcx r8,rax 1131 1132 mulx r11,rax,QWORD[24+rsi] 1133 adcx r9,rbx 1134 1135 mulx r12,rbx,QWORD[32+rsi] 1136 adcx r10,rax 1137 1138 mulx r13,rax,QWORD[40+rsi] 1139 adcx r11,rbx 1140 1141 mulx r14,rbx,QWORD[48+rsi] 1142 adcx r12,rax 1143 1144 mulx r15,rax,QWORD[56+rsi] 1145 adcx r13,rbx 1146 adcx r14,rax 1147 DB 0x67 1148 mov rbx,r8 1149 adcx r15,rdi 1150 1151 mov rcx,-7 1152 jmp NEAR $L$oop_mulx_gather 1153 1154 ALIGN 32 1155 $L$oop_mulx_gather: 1156 movdqa xmm8,XMMWORD[rbp] 1157 movdqa xmm9,XMMWORD[16+rbp] 1158 movdqa xmm10,XMMWORD[32+rbp] 1159 movdqa xmm11,XMMWORD[48+rbp] 1160 pand xmm8,xmm0 1161 movdqa xmm12,XMMWORD[64+rbp] 1162 pand xmm9,xmm1 1163 movdqa xmm13,XMMWORD[80+rbp] 1164 pand xmm10,xmm2 1165 movdqa xmm14,XMMWORD[96+rbp] 1166 pand xmm11,xmm3 1167 movdqa xmm15,XMMWORD[112+rbp] 1168 lea rbp,[128+rbp] 1169 pand xmm12,xmm4 1170 pand xmm13,xmm5 1171 pand xmm14,xmm6 1172 pand xmm15,xmm7 1173 por xmm8,xmm10 1174 por xmm9,xmm11 1175 por xmm8,xmm12 1176 por xmm9,xmm13 1177 por xmm8,xmm14 1178 por xmm9,xmm15 1179 1180 por xmm8,xmm9 1181 pshufd xmm9,xmm8,0x4e 1182 por xmm8,xmm9 1183 DB 102,76,15,126,194 1184 1185 DB 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 1186 adcx rbx,rax 1187 adox r8,r9 1188 1189 mulx r9,rax,QWORD[8+rsi] 1190 adcx r8,rax 1191 adox r9,r10 1192 1193 mulx r10,rax,QWORD[16+rsi] 1194 adcx r9,rax 1195 adox r10,r11 1196 1197 DB 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 1198 adcx r10,rax 1199 adox r11,r12 1200 1201 mulx r12,rax,QWORD[32+rsi] 1202 adcx r11,rax 1203 adox r12,r13 1204 1205 mulx r13,rax,QWORD[40+rsi] 1206 adcx r12,rax 1207 adox r13,r14 1208 1209 DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1210 adcx r13,rax 1211 DB 0x67 1212 adox r14,r15 1213 1214 mulx r15,rax,QWORD[56+rsi] 1215 mov QWORD[64+rcx*8+rsp],rbx 1216 adcx r14,rax 1217 adox r15,rdi 1218 mov rbx,r8 1219 adcx r15,rdi 1220 1221 inc rcx 1222 jnz NEAR $L$oop_mulx_gather 1223 1224 mov QWORD[64+rsp],r8 1225 mov QWORD[((64+8))+rsp],r9 1226 mov QWORD[((64+16))+rsp],r10 1227 mov QWORD[((64+24))+rsp],r11 1228 mov QWORD[((64+32))+rsp],r12 1229 mov QWORD[((64+40))+rsp],r13 1230 mov QWORD[((64+48))+rsp],r14 1231 mov QWORD[((64+56))+rsp],r15 1232 1233 mov rdx,QWORD[128+rsp] 1234 mov rdi,QWORD[((128+8))+rsp] 1235 mov rbp,QWORD[((128+16))+rsp] 1236 1237 mov r8,QWORD[rsp] 1238 mov r9,QWORD[8+rsp] 1239 mov r10,QWORD[16+rsp] 1240 mov r11,QWORD[24+rsp] 1241 mov r12,QWORD[32+rsp] 1242 mov r13,QWORD[40+rsp] 1243 mov r14,QWORD[48+rsp] 1244 mov r15,QWORD[56+rsp] 1245 1246 call __rsaz_512_reducex 1247 1248 $L$mul_gather_tail: 802 1249 add r8,QWORD[64+rsp] 803 1250 adc r9,QWORD[72+rsp] … … 886 1333 887 1334 mov rbp,rdi 1335 mov r11d,0x80100 1336 and r11d,DWORD[((OPENSSL_ia32cap_P+8))] 1337 cmp r11d,0x80100 1338 je NEAR $L$mulx_scatter 888 1339 mov rbx,QWORD[rdi] 889 1340 call __rsaz_512_mul … … 902 1353 903 1354 call __rsaz_512_reduce 1355 jmp NEAR $L$mul_scatter_tail 1356 1357 ALIGN 32 1358 $L$mulx_scatter: 1359 mov rdx,QWORD[rdi] 1360 call __rsaz_512_mulx 1361 1362 DB 102,72,15,126,199 1363 DB 102,72,15,126,205 1364 1365 mov rdx,QWORD[128+rsp] 1366 mov r8,QWORD[rsp] 1367 mov r9,QWORD[8+rsp] 1368 mov r10,QWORD[16+rsp] 1369 mov r11,QWORD[24+rsp] 1370 mov r12,QWORD[32+rsp] 1371 mov r13,QWORD[40+rsp] 1372 mov r14,QWORD[48+rsp] 1373 mov r15,QWORD[56+rsp] 1374 1375 call __rsaz_512_reducex 1376 1377 $L$mul_scatter_tail: 904 1378 add r8,QWORD[64+rsp] 905 1379 adc r9,QWORD[72+rsp] … … 977 1451 978 1452 $L$mul_by_one_body: 1453 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 979 1454 mov rbp,rdx 980 1455 mov QWORD[128+rsp],rcx … … 997 1472 movdqa XMMWORD[80+rsp],xmm0 998 1473 movdqa XMMWORD[96+rsp],xmm0 1474 and eax,0x80100 1475 cmp eax,0x80100 1476 je NEAR $L$by_one_callx 999 1477 call __rsaz_512_reduce 1478 jmp NEAR $L$by_one_tail 1479 ALIGN 32 1480 $L$by_one_callx: 1481 mov rdx,QWORD[128+rsp] 1482 call __rsaz_512_reducex 1483 $L$by_one_tail: 1000 1484 mov QWORD[rdi],r8 1001 1485 mov QWORD[8+rdi],r9 … … 1110 1594 dec ecx 1111 1595 jne NEAR $L$reduction_loop 1596 1597 DB 0F3h,0C3h ;repret 1598 1599 1600 1601 ALIGN 32 1602 __rsaz_512_reducex: 1603 1604 1605 imul rdx,r8 1606 xor rsi,rsi 1607 mov ecx,8 1608 jmp NEAR $L$reduction_loopx 1609 1610 ALIGN 32 1611 $L$reduction_loopx: 1612 mov rbx,r8 1613 mulx r8,rax,QWORD[rbp] 1614 adcx rax,rbx 1615 adox r8,r9 1616 1617 mulx r9,rax,QWORD[8+rbp] 1618 adcx r8,rax 1619 adox r9,r10 1620 1621 mulx r10,rbx,QWORD[16+rbp] 1622 adcx r9,rbx 1623 adox r10,r11 1624 1625 mulx r11,rbx,QWORD[24+rbp] 1626 adcx r10,rbx 1627 adox r11,r12 1628 1629 DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 1630 mov rax,rdx 1631 mov rdx,r8 1632 adcx r11,rbx 1633 adox r12,r13 1634 1635 mulx rdx,rbx,QWORD[((128+8))+rsp] 1636 mov rdx,rax 1637 1638 mulx r13,rax,QWORD[40+rbp] 1639 adcx r12,rax 1640 adox r13,r14 1641 1642 DB 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00 1643 adcx r13,rax 1644 adox r14,r15 1645 1646 mulx r15,rax,QWORD[56+rbp] 1647 mov rdx,rbx 1648 adcx r14,rax 1649 adox r15,rsi 1650 adcx r15,rsi 1651 1652 dec ecx 1653 jne NEAR $L$reduction_loopx 1112 1654 1113 1655 DB 0F3h,0C3h ;repret … … 1313 1855 mov QWORD[48+rdi],r14 1314 1856 mov QWORD[56+rdi],r15 1857 1858 DB 0F3h,0C3h ;repret 1859 1860 1861 1862 ALIGN 32 1863 __rsaz_512_mulx: 1864 1865 mulx r8,rbx,QWORD[rsi] 1866 mov rcx,-6 1867 1868 mulx r9,rax,QWORD[8+rsi] 1869 mov QWORD[8+rsp],rbx 1870 1871 mulx r10,rbx,QWORD[16+rsi] 1872 adc r8,rax 1873 1874 mulx r11,rax,QWORD[24+rsi] 1875 adc r9,rbx 1876 1877 mulx r12,rbx,QWORD[32+rsi] 1878 adc r10,rax 1879 1880 mulx r13,rax,QWORD[40+rsi] 1881 adc r11,rbx 1882 1883 mulx r14,rbx,QWORD[48+rsi] 1884 adc r12,rax 1885 1886 mulx r15,rax,QWORD[56+rsi] 1887 mov rdx,QWORD[8+rbp] 1888 adc r13,rbx 1889 adc r14,rax 1890 adc r15,0 1891 1892 xor rdi,rdi 1893 jmp NEAR $L$oop_mulx 1894 1895 ALIGN 32 1896 $L$oop_mulx: 1897 mov rbx,r8 1898 mulx r8,rax,QWORD[rsi] 1899 adcx rbx,rax 1900 adox r8,r9 1901 1902 mulx r9,rax,QWORD[8+rsi] 1903 adcx r8,rax 1904 adox r9,r10 1905 1906 mulx r10,rax,QWORD[16+rsi] 1907 adcx r9,rax 1908 adox r10,r11 1909 1910 mulx r11,rax,QWORD[24+rsi] 1911 adcx r10,rax 1912 adox r11,r12 1913 1914 DB 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00 1915 adcx r11,rax 1916 adox r12,r13 1917 1918 mulx r13,rax,QWORD[40+rsi] 1919 adcx r12,rax 1920 adox r13,r14 1921 1922 mulx r14,rax,QWORD[48+rsi] 1923 adcx r13,rax 1924 adox r14,r15 1925 1926 mulx r15,rax,QWORD[56+rsi] 1927 mov rdx,QWORD[64+rcx*8+rbp] 1928 mov QWORD[((8+64-8))+rcx*8+rsp],rbx 1929 adcx r14,rax 1930 adox r15,rdi 1931 adcx r15,rdi 1932 1933 inc rcx 1934 jnz NEAR $L$oop_mulx 1935 1936 mov rbx,r8 1937 mulx r8,rax,QWORD[rsi] 1938 adcx rbx,rax 1939 adox r8,r9 1940 1941 DB 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00 1942 adcx r8,rax 1943 adox r9,r10 1944 1945 DB 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00 1946 adcx r9,rax 1947 adox r10,r11 1948 1949 mulx r11,rax,QWORD[24+rsi] 1950 adcx r10,rax 1951 adox r11,r12 1952 1953 mulx r12,rax,QWORD[32+rsi] 1954 adcx r11,rax 1955 adox r12,r13 1956 1957 mulx r13,rax,QWORD[40+rsi] 1958 adcx r12,rax 1959 adox r13,r14 1960 1961 DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 1962 adcx r13,rax 1963 adox r14,r15 1964 1965 DB 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00 1966 adcx r14,rax 1967 adox r15,rdi 1968 adcx r15,rdi 1969 1970 mov QWORD[((8+64-8))+rsp],rbx 1971 mov QWORD[((8+64))+rsp],r8 1972 mov QWORD[((8+64+8))+rsp],r9 1973 mov QWORD[((8+64+16))+rsp],r10 1974 mov QWORD[((8+64+24))+rsp],r11 1975 mov QWORD[((8+64+32))+rsp],r12 1976 mov QWORD[((8+64+40))+rsp],r13 1977 mov QWORD[((8+64+48))+rsp],r14 1978 mov QWORD[((8+64+56))+rsp],r15 1315 1979 1316 1980 DB 0F3h,0C3h ;repret -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/sha1-mb-x86_64.S
r97373 r99371 25 25 bt rcx,61 26 26 jc NEAR _shaext_shortcut 27 test ecx,268435456 28 jnz NEAR _avx_shortcut 27 29 mov rax,rsp 28 30 … … 3018 3020 $L$SEH_end_sha1_multi_block_shaext: 3019 3021 3022 ALIGN 32 3023 sha1_multi_block_avx: 3024 mov QWORD[8+rsp],rdi ;WIN64 prologue 3025 mov QWORD[16+rsp],rsi 3026 mov rax,rsp 3027 $L$SEH_begin_sha1_multi_block_avx: 3028 mov rdi,rcx 3029 mov rsi,rdx 3030 mov rdx,r8 3031 3032 3033 3034 _avx_shortcut: 3035 shr rcx,32 3036 cmp edx,2 3037 jb NEAR $L$avx 3038 test ecx,32 3039 jnz NEAR _avx2_shortcut 3040 jmp NEAR $L$avx 3041 ALIGN 32 3042 $L$avx: 3043 mov rax,rsp 3044 3045 push rbx 3046 3047 push rbp 3048 3049 lea rsp,[((-168))+rsp] 3050 movaps XMMWORD[rsp],xmm6 3051 movaps XMMWORD[16+rsp],xmm7 3052 movaps XMMWORD[32+rsp],xmm8 3053 movaps XMMWORD[48+rsp],xmm9 3054 movaps XMMWORD[(-120)+rax],xmm10 3055 movaps XMMWORD[(-104)+rax],xmm11 3056 movaps XMMWORD[(-88)+rax],xmm12 3057 movaps XMMWORD[(-72)+rax],xmm13 3058 movaps XMMWORD[(-56)+rax],xmm14 3059 movaps XMMWORD[(-40)+rax],xmm15 3060 sub rsp,288 3061 and rsp,-256 3062 mov QWORD[272+rsp],rax 3063 3064 $L$body_avx: 3065 lea rbp,[K_XX_XX] 3066 lea rbx,[256+rsp] 3067 3068 vzeroupper 3069 $L$oop_grande_avx: 3070 mov DWORD[280+rsp],edx 3071 xor edx,edx 3072 3073 mov r8,QWORD[rsi] 3074 3075 mov ecx,DWORD[8+rsi] 3076 cmp ecx,edx 3077 cmovg edx,ecx 3078 test ecx,ecx 3079 mov DWORD[rbx],ecx 3080 cmovle r8,rbp 3081 3082 mov r9,QWORD[16+rsi] 3083 3084 mov ecx,DWORD[24+rsi] 3085 cmp ecx,edx 3086 cmovg edx,ecx 3087 test ecx,ecx 3088 mov DWORD[4+rbx],ecx 3089 cmovle r9,rbp 3090 3091 mov r10,QWORD[32+rsi] 3092 3093 mov ecx,DWORD[40+rsi] 3094 cmp ecx,edx 3095 cmovg edx,ecx 3096 test ecx,ecx 3097 mov DWORD[8+rbx],ecx 3098 cmovle r10,rbp 3099 3100 mov r11,QWORD[48+rsi] 3101 3102 mov ecx,DWORD[56+rsi] 3103 cmp ecx,edx 3104 cmovg edx,ecx 3105 test ecx,ecx 3106 mov DWORD[12+rbx],ecx 3107 cmovle r11,rbp 3108 test edx,edx 3109 jz NEAR $L$done_avx 3110 3111 vmovdqu xmm10,XMMWORD[rdi] 3112 lea rax,[128+rsp] 3113 vmovdqu xmm11,XMMWORD[32+rdi] 3114 vmovdqu xmm12,XMMWORD[64+rdi] 3115 vmovdqu xmm13,XMMWORD[96+rdi] 3116 vmovdqu xmm14,XMMWORD[128+rdi] 3117 vmovdqu xmm5,XMMWORD[96+rbp] 3118 jmp NEAR $L$oop_avx 3119 3120 ALIGN 32 3121 $L$oop_avx: 3122 vmovdqa xmm15,XMMWORD[((-32))+rbp] 3123 vmovd xmm0,DWORD[r8] 3124 lea r8,[64+r8] 3125 vmovd xmm2,DWORD[r9] 3126 lea r9,[64+r9] 3127 vpinsrd xmm0,xmm0,DWORD[r10],1 3128 lea r10,[64+r10] 3129 vpinsrd xmm2,xmm2,DWORD[r11],1 3130 lea r11,[64+r11] 3131 vmovd xmm1,DWORD[((-60))+r8] 3132 vpunpckldq xmm0,xmm0,xmm2 3133 vmovd xmm9,DWORD[((-60))+r9] 3134 vpshufb xmm0,xmm0,xmm5 3135 vpinsrd xmm1,xmm1,DWORD[((-60))+r10],1 3136 vpinsrd xmm9,xmm9,DWORD[((-60))+r11],1 3137 vpaddd xmm14,xmm14,xmm15 3138 vpslld xmm8,xmm10,5 3139 vpandn xmm7,xmm11,xmm13 3140 vpand xmm6,xmm11,xmm12 3141 3142 vmovdqa XMMWORD[(0-128)+rax],xmm0 3143 vpaddd xmm14,xmm14,xmm0 3144 vpunpckldq xmm1,xmm1,xmm9 3145 vpsrld xmm9,xmm10,27 3146 vpxor xmm6,xmm6,xmm7 3147 vmovd xmm2,DWORD[((-56))+r8] 3148 3149 vpslld xmm7,xmm11,30 3150 vpor xmm8,xmm8,xmm9 3151 vmovd xmm9,DWORD[((-56))+r9] 3152 vpaddd xmm14,xmm14,xmm6 3153 3154 vpsrld xmm11,xmm11,2 3155 vpaddd xmm14,xmm14,xmm8 3156 vpshufb xmm1,xmm1,xmm5 3157 vpor xmm11,xmm11,xmm7 3158 vpinsrd xmm2,xmm2,DWORD[((-56))+r10],1 3159 vpinsrd xmm9,xmm9,DWORD[((-56))+r11],1 3160 vpaddd xmm13,xmm13,xmm15 3161 vpslld xmm8,xmm14,5 3162 vpandn xmm7,xmm10,xmm12 3163 vpand xmm6,xmm10,xmm11 3164 3165 vmovdqa XMMWORD[(16-128)+rax],xmm1 3166 vpaddd xmm13,xmm13,xmm1 3167 vpunpckldq xmm2,xmm2,xmm9 3168 vpsrld xmm9,xmm14,27 3169 vpxor xmm6,xmm6,xmm7 3170 vmovd xmm3,DWORD[((-52))+r8] 3171 3172 vpslld xmm7,xmm10,30 3173 vpor xmm8,xmm8,xmm9 3174 vmovd xmm9,DWORD[((-52))+r9] 3175 vpaddd xmm13,xmm13,xmm6 3176 3177 vpsrld xmm10,xmm10,2 3178 vpaddd xmm13,xmm13,xmm8 3179 vpshufb xmm2,xmm2,xmm5 3180 vpor xmm10,xmm10,xmm7 3181 vpinsrd xmm3,xmm3,DWORD[((-52))+r10],1 3182 vpinsrd xmm9,xmm9,DWORD[((-52))+r11],1 3183 vpaddd xmm12,xmm12,xmm15 3184 vpslld xmm8,xmm13,5 3185 vpandn xmm7,xmm14,xmm11 3186 vpand xmm6,xmm14,xmm10 3187 3188 vmovdqa XMMWORD[(32-128)+rax],xmm2 3189 vpaddd xmm12,xmm12,xmm2 3190 vpunpckldq xmm3,xmm3,xmm9 3191 vpsrld xmm9,xmm13,27 3192 vpxor xmm6,xmm6,xmm7 3193 vmovd xmm4,DWORD[((-48))+r8] 3194 3195 vpslld xmm7,xmm14,30 3196 vpor xmm8,xmm8,xmm9 3197 vmovd xmm9,DWORD[((-48))+r9] 3198 vpaddd xmm12,xmm12,xmm6 3199 3200 vpsrld xmm14,xmm14,2 3201 vpaddd xmm12,xmm12,xmm8 3202 vpshufb xmm3,xmm3,xmm5 3203 vpor xmm14,xmm14,xmm7 3204 vpinsrd xmm4,xmm4,DWORD[((-48))+r10],1 3205 vpinsrd xmm9,xmm9,DWORD[((-48))+r11],1 3206 vpaddd xmm11,xmm11,xmm15 3207 vpslld xmm8,xmm12,5 3208 vpandn xmm7,xmm13,xmm10 3209 vpand xmm6,xmm13,xmm14 3210 3211 vmovdqa XMMWORD[(48-128)+rax],xmm3 3212 vpaddd xmm11,xmm11,xmm3 3213 vpunpckldq xmm4,xmm4,xmm9 3214 vpsrld xmm9,xmm12,27 3215 vpxor xmm6,xmm6,xmm7 3216 vmovd xmm0,DWORD[((-44))+r8] 3217 3218 vpslld xmm7,xmm13,30 3219 vpor xmm8,xmm8,xmm9 3220 vmovd xmm9,DWORD[((-44))+r9] 3221 vpaddd xmm11,xmm11,xmm6 3222 3223 vpsrld xmm13,xmm13,2 3224 vpaddd xmm11,xmm11,xmm8 3225 vpshufb xmm4,xmm4,xmm5 3226 vpor xmm13,xmm13,xmm7 3227 vpinsrd xmm0,xmm0,DWORD[((-44))+r10],1 3228 vpinsrd xmm9,xmm9,DWORD[((-44))+r11],1 3229 vpaddd xmm10,xmm10,xmm15 3230 vpslld xmm8,xmm11,5 3231 vpandn xmm7,xmm12,xmm14 3232 vpand xmm6,xmm12,xmm13 3233 3234 vmovdqa XMMWORD[(64-128)+rax],xmm4 3235 vpaddd xmm10,xmm10,xmm4 3236 vpunpckldq xmm0,xmm0,xmm9 3237 vpsrld xmm9,xmm11,27 3238 vpxor xmm6,xmm6,xmm7 3239 vmovd xmm1,DWORD[((-40))+r8] 3240 3241 vpslld xmm7,xmm12,30 3242 vpor xmm8,xmm8,xmm9 3243 vmovd xmm9,DWORD[((-40))+r9] 3244 vpaddd xmm10,xmm10,xmm6 3245 3246 vpsrld xmm12,xmm12,2 3247 vpaddd xmm10,xmm10,xmm8 3248 vpshufb xmm0,xmm0,xmm5 3249 vpor xmm12,xmm12,xmm7 3250 vpinsrd xmm1,xmm1,DWORD[((-40))+r10],1 3251 vpinsrd xmm9,xmm9,DWORD[((-40))+r11],1 3252 vpaddd xmm14,xmm14,xmm15 3253 vpslld xmm8,xmm10,5 3254 vpandn xmm7,xmm11,xmm13 3255 vpand xmm6,xmm11,xmm12 3256 3257 vmovdqa XMMWORD[(80-128)+rax],xmm0 3258 vpaddd xmm14,xmm14,xmm0 3259 vpunpckldq xmm1,xmm1,xmm9 3260 vpsrld xmm9,xmm10,27 3261 vpxor xmm6,xmm6,xmm7 3262 vmovd xmm2,DWORD[((-36))+r8] 3263 3264 vpslld xmm7,xmm11,30 3265 vpor xmm8,xmm8,xmm9 3266 vmovd xmm9,DWORD[((-36))+r9] 3267 vpaddd xmm14,xmm14,xmm6 3268 3269 vpsrld xmm11,xmm11,2 3270 vpaddd xmm14,xmm14,xmm8 3271 vpshufb xmm1,xmm1,xmm5 3272 vpor xmm11,xmm11,xmm7 3273 vpinsrd xmm2,xmm2,DWORD[((-36))+r10],1 3274 vpinsrd xmm9,xmm9,DWORD[((-36))+r11],1 3275 vpaddd xmm13,xmm13,xmm15 3276 vpslld xmm8,xmm14,5 3277 vpandn xmm7,xmm10,xmm12 3278 vpand xmm6,xmm10,xmm11 3279 3280 vmovdqa XMMWORD[(96-128)+rax],xmm1 3281 vpaddd xmm13,xmm13,xmm1 3282 vpunpckldq xmm2,xmm2,xmm9 3283 vpsrld xmm9,xmm14,27 3284 vpxor xmm6,xmm6,xmm7 3285 vmovd xmm3,DWORD[((-32))+r8] 3286 3287 vpslld xmm7,xmm10,30 3288 vpor xmm8,xmm8,xmm9 3289 vmovd xmm9,DWORD[((-32))+r9] 3290 vpaddd xmm13,xmm13,xmm6 3291 3292 vpsrld xmm10,xmm10,2 3293 vpaddd xmm13,xmm13,xmm8 3294 vpshufb xmm2,xmm2,xmm5 3295 vpor xmm10,xmm10,xmm7 3296 vpinsrd xmm3,xmm3,DWORD[((-32))+r10],1 3297 vpinsrd xmm9,xmm9,DWORD[((-32))+r11],1 3298 vpaddd xmm12,xmm12,xmm15 3299 vpslld xmm8,xmm13,5 3300 vpandn xmm7,xmm14,xmm11 3301 vpand xmm6,xmm14,xmm10 3302 3303 vmovdqa XMMWORD[(112-128)+rax],xmm2 3304 vpaddd xmm12,xmm12,xmm2 3305 vpunpckldq xmm3,xmm3,xmm9 3306 vpsrld xmm9,xmm13,27 3307 vpxor xmm6,xmm6,xmm7 3308 vmovd xmm4,DWORD[((-28))+r8] 3309 3310 vpslld xmm7,xmm14,30 3311 vpor xmm8,xmm8,xmm9 3312 vmovd xmm9,DWORD[((-28))+r9] 3313 vpaddd xmm12,xmm12,xmm6 3314 3315 vpsrld xmm14,xmm14,2 3316 vpaddd xmm12,xmm12,xmm8 3317 vpshufb xmm3,xmm3,xmm5 3318 vpor xmm14,xmm14,xmm7 3319 vpinsrd xmm4,xmm4,DWORD[((-28))+r10],1 3320 vpinsrd xmm9,xmm9,DWORD[((-28))+r11],1 3321 vpaddd xmm11,xmm11,xmm15 3322 vpslld xmm8,xmm12,5 3323 vpandn xmm7,xmm13,xmm10 3324 vpand xmm6,xmm13,xmm14 3325 3326 vmovdqa XMMWORD[(128-128)+rax],xmm3 3327 vpaddd xmm11,xmm11,xmm3 3328 vpunpckldq xmm4,xmm4,xmm9 3329 vpsrld xmm9,xmm12,27 3330 vpxor xmm6,xmm6,xmm7 3331 vmovd xmm0,DWORD[((-24))+r8] 3332 3333 vpslld xmm7,xmm13,30 3334 vpor xmm8,xmm8,xmm9 3335 vmovd xmm9,DWORD[((-24))+r9] 3336 vpaddd xmm11,xmm11,xmm6 3337 3338 vpsrld xmm13,xmm13,2 3339 vpaddd xmm11,xmm11,xmm8 3340 vpshufb xmm4,xmm4,xmm5 3341 vpor xmm13,xmm13,xmm7 3342 vpinsrd xmm0,xmm0,DWORD[((-24))+r10],1 3343 vpinsrd xmm9,xmm9,DWORD[((-24))+r11],1 3344 vpaddd xmm10,xmm10,xmm15 3345 vpslld xmm8,xmm11,5 3346 vpandn xmm7,xmm12,xmm14 3347 vpand xmm6,xmm12,xmm13 3348 3349 vmovdqa XMMWORD[(144-128)+rax],xmm4 3350 vpaddd xmm10,xmm10,xmm4 3351 vpunpckldq xmm0,xmm0,xmm9 3352 vpsrld xmm9,xmm11,27 3353 vpxor xmm6,xmm6,xmm7 3354 vmovd xmm1,DWORD[((-20))+r8] 3355 3356 vpslld xmm7,xmm12,30 3357 vpor xmm8,xmm8,xmm9 3358 vmovd xmm9,DWORD[((-20))+r9] 3359 vpaddd xmm10,xmm10,xmm6 3360 3361 vpsrld xmm12,xmm12,2 3362 vpaddd xmm10,xmm10,xmm8 3363 vpshufb xmm0,xmm0,xmm5 3364 vpor xmm12,xmm12,xmm7 3365 vpinsrd xmm1,xmm1,DWORD[((-20))+r10],1 3366 vpinsrd xmm9,xmm9,DWORD[((-20))+r11],1 3367 vpaddd xmm14,xmm14,xmm15 3368 vpslld xmm8,xmm10,5 3369 vpandn xmm7,xmm11,xmm13 3370 vpand xmm6,xmm11,xmm12 3371 3372 vmovdqa XMMWORD[(160-128)+rax],xmm0 3373 vpaddd xmm14,xmm14,xmm0 3374 vpunpckldq xmm1,xmm1,xmm9 3375 vpsrld xmm9,xmm10,27 3376 vpxor xmm6,xmm6,xmm7 3377 vmovd xmm2,DWORD[((-16))+r8] 3378 3379 vpslld xmm7,xmm11,30 3380 vpor xmm8,xmm8,xmm9 3381 vmovd xmm9,DWORD[((-16))+r9] 3382 vpaddd xmm14,xmm14,xmm6 3383 3384 vpsrld xmm11,xmm11,2 3385 vpaddd xmm14,xmm14,xmm8 3386 vpshufb xmm1,xmm1,xmm5 3387 vpor xmm11,xmm11,xmm7 3388 vpinsrd xmm2,xmm2,DWORD[((-16))+r10],1 3389 vpinsrd xmm9,xmm9,DWORD[((-16))+r11],1 3390 vpaddd xmm13,xmm13,xmm15 3391 vpslld xmm8,xmm14,5 3392 vpandn xmm7,xmm10,xmm12 3393 vpand xmm6,xmm10,xmm11 3394 3395 vmovdqa XMMWORD[(176-128)+rax],xmm1 3396 vpaddd xmm13,xmm13,xmm1 3397 vpunpckldq xmm2,xmm2,xmm9 3398 vpsrld xmm9,xmm14,27 3399 vpxor xmm6,xmm6,xmm7 3400 vmovd xmm3,DWORD[((-12))+r8] 3401 3402 vpslld xmm7,xmm10,30 3403 vpor xmm8,xmm8,xmm9 3404 vmovd xmm9,DWORD[((-12))+r9] 3405 vpaddd xmm13,xmm13,xmm6 3406 3407 vpsrld xmm10,xmm10,2 3408 vpaddd xmm13,xmm13,xmm8 3409 vpshufb xmm2,xmm2,xmm5 3410 vpor xmm10,xmm10,xmm7 3411 vpinsrd xmm3,xmm3,DWORD[((-12))+r10],1 3412 vpinsrd xmm9,xmm9,DWORD[((-12))+r11],1 3413 vpaddd xmm12,xmm12,xmm15 3414 vpslld xmm8,xmm13,5 3415 vpandn xmm7,xmm14,xmm11 3416 vpand xmm6,xmm14,xmm10 3417 3418 vmovdqa XMMWORD[(192-128)+rax],xmm2 3419 vpaddd xmm12,xmm12,xmm2 3420 vpunpckldq xmm3,xmm3,xmm9 3421 vpsrld xmm9,xmm13,27 3422 vpxor xmm6,xmm6,xmm7 3423 vmovd xmm4,DWORD[((-8))+r8] 3424 3425 vpslld xmm7,xmm14,30 3426 vpor xmm8,xmm8,xmm9 3427 vmovd xmm9,DWORD[((-8))+r9] 3428 vpaddd xmm12,xmm12,xmm6 3429 3430 vpsrld xmm14,xmm14,2 3431 vpaddd xmm12,xmm12,xmm8 3432 vpshufb xmm3,xmm3,xmm5 3433 vpor xmm14,xmm14,xmm7 3434 vpinsrd xmm4,xmm4,DWORD[((-8))+r10],1 3435 vpinsrd xmm9,xmm9,DWORD[((-8))+r11],1 3436 vpaddd xmm11,xmm11,xmm15 3437 vpslld xmm8,xmm12,5 3438 vpandn xmm7,xmm13,xmm10 3439 vpand xmm6,xmm13,xmm14 3440 3441 vmovdqa XMMWORD[(208-128)+rax],xmm3 3442 vpaddd xmm11,xmm11,xmm3 3443 vpunpckldq xmm4,xmm4,xmm9 3444 vpsrld xmm9,xmm12,27 3445 vpxor xmm6,xmm6,xmm7 3446 vmovd xmm0,DWORD[((-4))+r8] 3447 3448 vpslld xmm7,xmm13,30 3449 vpor xmm8,xmm8,xmm9 3450 vmovd xmm9,DWORD[((-4))+r9] 3451 vpaddd xmm11,xmm11,xmm6 3452 3453 vpsrld xmm13,xmm13,2 3454 vpaddd xmm11,xmm11,xmm8 3455 vpshufb xmm4,xmm4,xmm5 3456 vpor xmm13,xmm13,xmm7 3457 vmovdqa xmm1,XMMWORD[((0-128))+rax] 3458 vpinsrd xmm0,xmm0,DWORD[((-4))+r10],1 3459 vpinsrd xmm9,xmm9,DWORD[((-4))+r11],1 3460 vpaddd xmm10,xmm10,xmm15 3461 prefetcht0 [63+r8] 3462 vpslld xmm8,xmm11,5 3463 vpandn xmm7,xmm12,xmm14 3464 vpand xmm6,xmm12,xmm13 3465 3466 vmovdqa XMMWORD[(224-128)+rax],xmm4 3467 vpaddd xmm10,xmm10,xmm4 3468 vpunpckldq xmm0,xmm0,xmm9 3469 vpsrld xmm9,xmm11,27 3470 prefetcht0 [63+r9] 3471 vpxor xmm6,xmm6,xmm7 3472 3473 vpslld xmm7,xmm12,30 3474 vpor xmm8,xmm8,xmm9 3475 prefetcht0 [63+r10] 3476 vpaddd xmm10,xmm10,xmm6 3477 3478 vpsrld xmm12,xmm12,2 3479 vpaddd xmm10,xmm10,xmm8 3480 prefetcht0 [63+r11] 3481 vpshufb xmm0,xmm0,xmm5 3482 vpor xmm12,xmm12,xmm7 3483 vmovdqa xmm2,XMMWORD[((16-128))+rax] 3484 vpxor xmm1,xmm1,xmm3 3485 vmovdqa xmm3,XMMWORD[((32-128))+rax] 3486 3487 vpaddd xmm14,xmm14,xmm15 3488 vpslld xmm8,xmm10,5 3489 vpandn xmm7,xmm11,xmm13 3490 3491 vpand xmm6,xmm11,xmm12 3492 3493 vmovdqa XMMWORD[(240-128)+rax],xmm0 3494 vpaddd xmm14,xmm14,xmm0 3495 vpxor xmm1,xmm1,XMMWORD[((128-128))+rax] 3496 vpsrld xmm9,xmm10,27 3497 vpxor xmm6,xmm6,xmm7 3498 vpxor xmm1,xmm1,xmm3 3499 3500 3501 vpslld xmm7,xmm11,30 3502 vpor xmm8,xmm8,xmm9 3503 vpaddd xmm14,xmm14,xmm6 3504 3505 vpsrld xmm5,xmm1,31 3506 vpaddd xmm1,xmm1,xmm1 3507 3508 vpsrld xmm11,xmm11,2 3509 3510 vpaddd xmm14,xmm14,xmm8 3511 vpor xmm1,xmm1,xmm5 3512 vpor xmm11,xmm11,xmm7 3513 vpxor xmm2,xmm2,xmm4 3514 vmovdqa xmm4,XMMWORD[((48-128))+rax] 3515 3516 vpaddd xmm13,xmm13,xmm15 3517 vpslld xmm8,xmm14,5 3518 vpandn xmm7,xmm10,xmm12 3519 3520 vpand xmm6,xmm10,xmm11 3521 3522 vmovdqa XMMWORD[(0-128)+rax],xmm1 3523 vpaddd xmm13,xmm13,xmm1 3524 vpxor xmm2,xmm2,XMMWORD[((144-128))+rax] 3525 vpsrld xmm9,xmm14,27 3526 vpxor xmm6,xmm6,xmm7 3527 vpxor xmm2,xmm2,xmm4 3528 3529 3530 vpslld xmm7,xmm10,30 3531 vpor xmm8,xmm8,xmm9 3532 vpaddd xmm13,xmm13,xmm6 3533 3534 vpsrld xmm5,xmm2,31 3535 vpaddd xmm2,xmm2,xmm2 3536 3537 vpsrld xmm10,xmm10,2 3538 3539 vpaddd xmm13,xmm13,xmm8 3540 vpor xmm2,xmm2,xmm5 3541 vpor xmm10,xmm10,xmm7 3542 vpxor xmm3,xmm3,xmm0 3543 vmovdqa xmm0,XMMWORD[((64-128))+rax] 3544 3545 vpaddd xmm12,xmm12,xmm15 3546 vpslld xmm8,xmm13,5 3547 vpandn xmm7,xmm14,xmm11 3548 3549 vpand xmm6,xmm14,xmm10 3550 3551 vmovdqa XMMWORD[(16-128)+rax],xmm2 3552 vpaddd xmm12,xmm12,xmm2 3553 vpxor xmm3,xmm3,XMMWORD[((160-128))+rax] 3554 vpsrld xmm9,xmm13,27 3555 vpxor xmm6,xmm6,xmm7 3556 vpxor xmm3,xmm3,xmm0 3557 3558 3559 vpslld xmm7,xmm14,30 3560 vpor xmm8,xmm8,xmm9 3561 vpaddd xmm12,xmm12,xmm6 3562 3563 vpsrld xmm5,xmm3,31 3564 vpaddd xmm3,xmm3,xmm3 3565 3566 vpsrld xmm14,xmm14,2 3567 3568 vpaddd xmm12,xmm12,xmm8 3569 vpor xmm3,xmm3,xmm5 3570 vpor xmm14,xmm14,xmm7 3571 vpxor xmm4,xmm4,xmm1 3572 vmovdqa xmm1,XMMWORD[((80-128))+rax] 3573 3574 vpaddd xmm11,xmm11,xmm15 3575 vpslld xmm8,xmm12,5 3576 vpandn xmm7,xmm13,xmm10 3577 3578 vpand xmm6,xmm13,xmm14 3579 3580 vmovdqa XMMWORD[(32-128)+rax],xmm3 3581 vpaddd xmm11,xmm11,xmm3 3582 vpxor xmm4,xmm4,XMMWORD[((176-128))+rax] 3583 vpsrld xmm9,xmm12,27 3584 vpxor xmm6,xmm6,xmm7 3585 vpxor xmm4,xmm4,xmm1 3586 3587 3588 vpslld xmm7,xmm13,30 3589 vpor xmm8,xmm8,xmm9 3590 vpaddd xmm11,xmm11,xmm6 3591 3592 vpsrld xmm5,xmm4,31 3593 vpaddd xmm4,xmm4,xmm4 3594 3595 vpsrld xmm13,xmm13,2 3596 3597 vpaddd xmm11,xmm11,xmm8 3598 vpor xmm4,xmm4,xmm5 3599 vpor xmm13,xmm13,xmm7 3600 vpxor xmm0,xmm0,xmm2 3601 vmovdqa xmm2,XMMWORD[((96-128))+rax] 3602 3603 vpaddd xmm10,xmm10,xmm15 3604 vpslld xmm8,xmm11,5 3605 vpandn xmm7,xmm12,xmm14 3606 3607 vpand xmm6,xmm12,xmm13 3608 3609 vmovdqa XMMWORD[(48-128)+rax],xmm4 3610 vpaddd xmm10,xmm10,xmm4 3611 vpxor xmm0,xmm0,XMMWORD[((192-128))+rax] 3612 vpsrld xmm9,xmm11,27 3613 vpxor xmm6,xmm6,xmm7 3614 vpxor xmm0,xmm0,xmm2 3615 3616 3617 vpslld xmm7,xmm12,30 3618 vpor xmm8,xmm8,xmm9 3619 vpaddd xmm10,xmm10,xmm6 3620 3621 vpsrld xmm5,xmm0,31 3622 vpaddd xmm0,xmm0,xmm0 3623 3624 vpsrld xmm12,xmm12,2 3625 3626 vpaddd xmm10,xmm10,xmm8 3627 vpor xmm0,xmm0,xmm5 3628 vpor xmm12,xmm12,xmm7 3629 vmovdqa xmm15,XMMWORD[rbp] 3630 vpxor xmm1,xmm1,xmm3 3631 vmovdqa xmm3,XMMWORD[((112-128))+rax] 3632 3633 vpslld xmm8,xmm10,5 3634 vpaddd xmm14,xmm14,xmm15 3635 vpxor xmm6,xmm13,xmm11 3636 vmovdqa XMMWORD[(64-128)+rax],xmm0 3637 vpaddd xmm14,xmm14,xmm0 3638 vpxor xmm1,xmm1,XMMWORD[((208-128))+rax] 3639 vpsrld xmm9,xmm10,27 3640 vpxor xmm6,xmm6,xmm12 3641 vpxor xmm1,xmm1,xmm3 3642 3643 vpslld xmm7,xmm11,30 3644 vpor xmm8,xmm8,xmm9 3645 vpaddd xmm14,xmm14,xmm6 3646 vpsrld xmm5,xmm1,31 3647 vpaddd xmm1,xmm1,xmm1 3648 3649 vpsrld xmm11,xmm11,2 3650 vpaddd xmm14,xmm14,xmm8 3651 vpor xmm1,xmm1,xmm5 3652 vpor xmm11,xmm11,xmm7 3653 vpxor xmm2,xmm2,xmm4 3654 vmovdqa xmm4,XMMWORD[((128-128))+rax] 3655 3656 vpslld xmm8,xmm14,5 3657 vpaddd xmm13,xmm13,xmm15 3658 vpxor xmm6,xmm12,xmm10 3659 vmovdqa XMMWORD[(80-128)+rax],xmm1 3660 vpaddd xmm13,xmm13,xmm1 3661 vpxor xmm2,xmm2,XMMWORD[((224-128))+rax] 3662 vpsrld xmm9,xmm14,27 3663 vpxor xmm6,xmm6,xmm11 3664 vpxor xmm2,xmm2,xmm4 3665 3666 vpslld xmm7,xmm10,30 3667 vpor xmm8,xmm8,xmm9 3668 vpaddd xmm13,xmm13,xmm6 3669 vpsrld xmm5,xmm2,31 3670 vpaddd xmm2,xmm2,xmm2 3671 3672 vpsrld xmm10,xmm10,2 3673 vpaddd xmm13,xmm13,xmm8 3674 vpor xmm2,xmm2,xmm5 3675 vpor xmm10,xmm10,xmm7 3676 vpxor xmm3,xmm3,xmm0 3677 vmovdqa xmm0,XMMWORD[((144-128))+rax] 3678 3679 vpslld xmm8,xmm13,5 3680 vpaddd xmm12,xmm12,xmm15 3681 vpxor xmm6,xmm11,xmm14 3682 vmovdqa XMMWORD[(96-128)+rax],xmm2 3683 vpaddd xmm12,xmm12,xmm2 3684 vpxor xmm3,xmm3,XMMWORD[((240-128))+rax] 3685 vpsrld xmm9,xmm13,27 3686 vpxor xmm6,xmm6,xmm10 3687 vpxor xmm3,xmm3,xmm0 3688 3689 vpslld xmm7,xmm14,30 3690 vpor xmm8,xmm8,xmm9 3691 vpaddd xmm12,xmm12,xmm6 3692 vpsrld xmm5,xmm3,31 3693 vpaddd xmm3,xmm3,xmm3 3694 3695 vpsrld xmm14,xmm14,2 3696 vpaddd xmm12,xmm12,xmm8 3697 vpor xmm3,xmm3,xmm5 3698 vpor xmm14,xmm14,xmm7 3699 vpxor xmm4,xmm4,xmm1 3700 vmovdqa xmm1,XMMWORD[((160-128))+rax] 3701 3702 vpslld xmm8,xmm12,5 3703 vpaddd xmm11,xmm11,xmm15 3704 vpxor xmm6,xmm10,xmm13 3705 vmovdqa XMMWORD[(112-128)+rax],xmm3 3706 vpaddd xmm11,xmm11,xmm3 3707 vpxor xmm4,xmm4,XMMWORD[((0-128))+rax] 3708 vpsrld xmm9,xmm12,27 3709 vpxor xmm6,xmm6,xmm14 3710 vpxor xmm4,xmm4,xmm1 3711 3712 vpslld xmm7,xmm13,30 3713 vpor xmm8,xmm8,xmm9 3714 vpaddd xmm11,xmm11,xmm6 3715 vpsrld xmm5,xmm4,31 3716 vpaddd xmm4,xmm4,xmm4 3717 3718 vpsrld xmm13,xmm13,2 3719 vpaddd xmm11,xmm11,xmm8 3720 vpor xmm4,xmm4,xmm5 3721 vpor xmm13,xmm13,xmm7 3722 vpxor xmm0,xmm0,xmm2 3723 vmovdqa xmm2,XMMWORD[((176-128))+rax] 3724 3725 vpslld xmm8,xmm11,5 3726 vpaddd xmm10,xmm10,xmm15 3727 vpxor xmm6,xmm14,xmm12 3728 vmovdqa XMMWORD[(128-128)+rax],xmm4 3729 vpaddd xmm10,xmm10,xmm4 3730 vpxor xmm0,xmm0,XMMWORD[((16-128))+rax] 3731 vpsrld xmm9,xmm11,27 3732 vpxor xmm6,xmm6,xmm13 3733 vpxor xmm0,xmm0,xmm2 3734 3735 vpslld xmm7,xmm12,30 3736 vpor xmm8,xmm8,xmm9 3737 vpaddd xmm10,xmm10,xmm6 3738 vpsrld xmm5,xmm0,31 3739 vpaddd xmm0,xmm0,xmm0 3740 3741 vpsrld xmm12,xmm12,2 3742 vpaddd xmm10,xmm10,xmm8 3743 vpor xmm0,xmm0,xmm5 3744 vpor xmm12,xmm12,xmm7 3745 vpxor xmm1,xmm1,xmm3 3746 vmovdqa xmm3,XMMWORD[((192-128))+rax] 3747 3748 vpslld xmm8,xmm10,5 3749 vpaddd xmm14,xmm14,xmm15 3750 vpxor xmm6,xmm13,xmm11 3751 vmovdqa XMMWORD[(144-128)+rax],xmm0 3752 vpaddd xmm14,xmm14,xmm0 3753 vpxor xmm1,xmm1,XMMWORD[((32-128))+rax] 3754 vpsrld xmm9,xmm10,27 3755 vpxor xmm6,xmm6,xmm12 3756 vpxor xmm1,xmm1,xmm3 3757 3758 vpslld xmm7,xmm11,30 3759 vpor xmm8,xmm8,xmm9 3760 vpaddd xmm14,xmm14,xmm6 3761 vpsrld xmm5,xmm1,31 3762 vpaddd xmm1,xmm1,xmm1 3763 3764 vpsrld xmm11,xmm11,2 3765 vpaddd xmm14,xmm14,xmm8 3766 vpor xmm1,xmm1,xmm5 3767 vpor xmm11,xmm11,xmm7 3768 vpxor xmm2,xmm2,xmm4 3769 vmovdqa xmm4,XMMWORD[((208-128))+rax] 3770 3771 vpslld xmm8,xmm14,5 3772 vpaddd xmm13,xmm13,xmm15 3773 vpxor xmm6,xmm12,xmm10 3774 vmovdqa XMMWORD[(160-128)+rax],xmm1 3775 vpaddd xmm13,xmm13,xmm1 3776 vpxor xmm2,xmm2,XMMWORD[((48-128))+rax] 3777 vpsrld xmm9,xmm14,27 3778 vpxor xmm6,xmm6,xmm11 3779 vpxor xmm2,xmm2,xmm4 3780 3781 vpslld xmm7,xmm10,30 3782 vpor xmm8,xmm8,xmm9 3783 vpaddd xmm13,xmm13,xmm6 3784 vpsrld xmm5,xmm2,31 3785 vpaddd xmm2,xmm2,xmm2 3786 3787 vpsrld xmm10,xmm10,2 3788 vpaddd xmm13,xmm13,xmm8 3789 vpor xmm2,xmm2,xmm5 3790 vpor xmm10,xmm10,xmm7 3791 vpxor xmm3,xmm3,xmm0 3792 vmovdqa xmm0,XMMWORD[((224-128))+rax] 3793 3794 vpslld xmm8,xmm13,5 3795 vpaddd xmm12,xmm12,xmm15 3796 vpxor xmm6,xmm11,xmm14 3797 vmovdqa XMMWORD[(176-128)+rax],xmm2 3798 vpaddd xmm12,xmm12,xmm2 3799 vpxor xmm3,xmm3,XMMWORD[((64-128))+rax] 3800 vpsrld xmm9,xmm13,27 3801 vpxor xmm6,xmm6,xmm10 3802 vpxor xmm3,xmm3,xmm0 3803 3804 vpslld xmm7,xmm14,30 3805 vpor xmm8,xmm8,xmm9 3806 vpaddd xmm12,xmm12,xmm6 3807 vpsrld xmm5,xmm3,31 3808 vpaddd xmm3,xmm3,xmm3 3809 3810 vpsrld xmm14,xmm14,2 3811 vpaddd xmm12,xmm12,xmm8 3812 vpor xmm3,xmm3,xmm5 3813 vpor xmm14,xmm14,xmm7 3814 vpxor xmm4,xmm4,xmm1 3815 vmovdqa xmm1,XMMWORD[((240-128))+rax] 3816 3817 vpslld xmm8,xmm12,5 3818 vpaddd xmm11,xmm11,xmm15 3819 vpxor xmm6,xmm10,xmm13 3820 vmovdqa XMMWORD[(192-128)+rax],xmm3 3821 vpaddd xmm11,xmm11,xmm3 3822 vpxor xmm4,xmm4,XMMWORD[((80-128))+rax] 3823 vpsrld xmm9,xmm12,27 3824 vpxor xmm6,xmm6,xmm14 3825 vpxor xmm4,xmm4,xmm1 3826 3827 vpslld xmm7,xmm13,30 3828 vpor xmm8,xmm8,xmm9 3829 vpaddd xmm11,xmm11,xmm6 3830 vpsrld xmm5,xmm4,31 3831 vpaddd xmm4,xmm4,xmm4 3832 3833 vpsrld xmm13,xmm13,2 3834 vpaddd xmm11,xmm11,xmm8 3835 vpor xmm4,xmm4,xmm5 3836 vpor xmm13,xmm13,xmm7 3837 vpxor xmm0,xmm0,xmm2 3838 vmovdqa xmm2,XMMWORD[((0-128))+rax] 3839 3840 vpslld xmm8,xmm11,5 3841 vpaddd xmm10,xmm10,xmm15 3842 vpxor xmm6,xmm14,xmm12 3843 vmovdqa XMMWORD[(208-128)+rax],xmm4 3844 vpaddd xmm10,xmm10,xmm4 3845 vpxor xmm0,xmm0,XMMWORD[((96-128))+rax] 3846 vpsrld xmm9,xmm11,27 3847 vpxor xmm6,xmm6,xmm13 3848 vpxor xmm0,xmm0,xmm2 3849 3850 vpslld xmm7,xmm12,30 3851 vpor xmm8,xmm8,xmm9 3852 vpaddd xmm10,xmm10,xmm6 3853 vpsrld xmm5,xmm0,31 3854 vpaddd xmm0,xmm0,xmm0 3855 3856 vpsrld xmm12,xmm12,2 3857 vpaddd xmm10,xmm10,xmm8 3858 vpor xmm0,xmm0,xmm5 3859 vpor xmm12,xmm12,xmm7 3860 vpxor xmm1,xmm1,xmm3 3861 vmovdqa xmm3,XMMWORD[((16-128))+rax] 3862 3863 vpslld xmm8,xmm10,5 3864 vpaddd xmm14,xmm14,xmm15 3865 vpxor xmm6,xmm13,xmm11 3866 vmovdqa XMMWORD[(224-128)+rax],xmm0 3867 vpaddd xmm14,xmm14,xmm0 3868 vpxor xmm1,xmm1,XMMWORD[((112-128))+rax] 3869 vpsrld xmm9,xmm10,27 3870 vpxor xmm6,xmm6,xmm12 3871 vpxor xmm1,xmm1,xmm3 3872 3873 vpslld xmm7,xmm11,30 3874 vpor xmm8,xmm8,xmm9 3875 vpaddd xmm14,xmm14,xmm6 3876 vpsrld xmm5,xmm1,31 3877 vpaddd xmm1,xmm1,xmm1 3878 3879 vpsrld xmm11,xmm11,2 3880 vpaddd xmm14,xmm14,xmm8 3881 vpor xmm1,xmm1,xmm5 3882 vpor xmm11,xmm11,xmm7 3883 vpxor xmm2,xmm2,xmm4 3884 vmovdqa xmm4,XMMWORD[((32-128))+rax] 3885 3886 vpslld xmm8,xmm14,5 3887 vpaddd xmm13,xmm13,xmm15 3888 vpxor xmm6,xmm12,xmm10 3889 vmovdqa XMMWORD[(240-128)+rax],xmm1 3890 vpaddd xmm13,xmm13,xmm1 3891 vpxor xmm2,xmm2,XMMWORD[((128-128))+rax] 3892 vpsrld xmm9,xmm14,27 3893 vpxor xmm6,xmm6,xmm11 3894 vpxor xmm2,xmm2,xmm4 3895 3896 vpslld xmm7,xmm10,30 3897 vpor xmm8,xmm8,xmm9 3898 vpaddd xmm13,xmm13,xmm6 3899 vpsrld xmm5,xmm2,31 3900 vpaddd xmm2,xmm2,xmm2 3901 3902 vpsrld xmm10,xmm10,2 3903 vpaddd xmm13,xmm13,xmm8 3904 vpor xmm2,xmm2,xmm5 3905 vpor xmm10,xmm10,xmm7 3906 vpxor xmm3,xmm3,xmm0 3907 vmovdqa xmm0,XMMWORD[((48-128))+rax] 3908 3909 vpslld xmm8,xmm13,5 3910 vpaddd xmm12,xmm12,xmm15 3911 vpxor xmm6,xmm11,xmm14 3912 vmovdqa XMMWORD[(0-128)+rax],xmm2 3913 vpaddd xmm12,xmm12,xmm2 3914 vpxor xmm3,xmm3,XMMWORD[((144-128))+rax] 3915 vpsrld xmm9,xmm13,27 3916 vpxor xmm6,xmm6,xmm10 3917 vpxor xmm3,xmm3,xmm0 3918 3919 vpslld xmm7,xmm14,30 3920 vpor xmm8,xmm8,xmm9 3921 vpaddd xmm12,xmm12,xmm6 3922 vpsrld xmm5,xmm3,31 3923 vpaddd xmm3,xmm3,xmm3 3924 3925 vpsrld xmm14,xmm14,2 3926 vpaddd xmm12,xmm12,xmm8 3927 vpor xmm3,xmm3,xmm5 3928 vpor xmm14,xmm14,xmm7 3929 vpxor xmm4,xmm4,xmm1 3930 vmovdqa xmm1,XMMWORD[((64-128))+rax] 3931 3932 vpslld xmm8,xmm12,5 3933 vpaddd xmm11,xmm11,xmm15 3934 vpxor xmm6,xmm10,xmm13 3935 vmovdqa XMMWORD[(16-128)+rax],xmm3 3936 vpaddd xmm11,xmm11,xmm3 3937 vpxor xmm4,xmm4,XMMWORD[((160-128))+rax] 3938 vpsrld xmm9,xmm12,27 3939 vpxor xmm6,xmm6,xmm14 3940 vpxor xmm4,xmm4,xmm1 3941 3942 vpslld xmm7,xmm13,30 3943 vpor xmm8,xmm8,xmm9 3944 vpaddd xmm11,xmm11,xmm6 3945 vpsrld xmm5,xmm4,31 3946 vpaddd xmm4,xmm4,xmm4 3947 3948 vpsrld xmm13,xmm13,2 3949 vpaddd xmm11,xmm11,xmm8 3950 vpor xmm4,xmm4,xmm5 3951 vpor xmm13,xmm13,xmm7 3952 vpxor xmm0,xmm0,xmm2 3953 vmovdqa xmm2,XMMWORD[((80-128))+rax] 3954 3955 vpslld xmm8,xmm11,5 3956 vpaddd xmm10,xmm10,xmm15 3957 vpxor xmm6,xmm14,xmm12 3958 vmovdqa XMMWORD[(32-128)+rax],xmm4 3959 vpaddd xmm10,xmm10,xmm4 3960 vpxor xmm0,xmm0,XMMWORD[((176-128))+rax] 3961 vpsrld xmm9,xmm11,27 3962 vpxor xmm6,xmm6,xmm13 3963 vpxor xmm0,xmm0,xmm2 3964 3965 vpslld xmm7,xmm12,30 3966 vpor xmm8,xmm8,xmm9 3967 vpaddd xmm10,xmm10,xmm6 3968 vpsrld xmm5,xmm0,31 3969 vpaddd xmm0,xmm0,xmm0 3970 3971 vpsrld xmm12,xmm12,2 3972 vpaddd xmm10,xmm10,xmm8 3973 vpor xmm0,xmm0,xmm5 3974 vpor xmm12,xmm12,xmm7 3975 vpxor xmm1,xmm1,xmm3 3976 vmovdqa xmm3,XMMWORD[((96-128))+rax] 3977 3978 vpslld xmm8,xmm10,5 3979 vpaddd xmm14,xmm14,xmm15 3980 vpxor xmm6,xmm13,xmm11 3981 vmovdqa XMMWORD[(48-128)+rax],xmm0 3982 vpaddd xmm14,xmm14,xmm0 3983 vpxor xmm1,xmm1,XMMWORD[((192-128))+rax] 3984 vpsrld xmm9,xmm10,27 3985 vpxor xmm6,xmm6,xmm12 3986 vpxor xmm1,xmm1,xmm3 3987 3988 vpslld xmm7,xmm11,30 3989 vpor xmm8,xmm8,xmm9 3990 vpaddd xmm14,xmm14,xmm6 3991 vpsrld xmm5,xmm1,31 3992 vpaddd xmm1,xmm1,xmm1 3993 3994 vpsrld xmm11,xmm11,2 3995 vpaddd xmm14,xmm14,xmm8 3996 vpor xmm1,xmm1,xmm5 3997 vpor xmm11,xmm11,xmm7 3998 vpxor xmm2,xmm2,xmm4 3999 vmovdqa xmm4,XMMWORD[((112-128))+rax] 4000 4001 vpslld xmm8,xmm14,5 4002 vpaddd xmm13,xmm13,xmm15 4003 vpxor xmm6,xmm12,xmm10 4004 vmovdqa XMMWORD[(64-128)+rax],xmm1 4005 vpaddd xmm13,xmm13,xmm1 4006 vpxor xmm2,xmm2,XMMWORD[((208-128))+rax] 4007 vpsrld xmm9,xmm14,27 4008 vpxor xmm6,xmm6,xmm11 4009 vpxor xmm2,xmm2,xmm4 4010 4011 vpslld xmm7,xmm10,30 4012 vpor xmm8,xmm8,xmm9 4013 vpaddd xmm13,xmm13,xmm6 4014 vpsrld xmm5,xmm2,31 4015 vpaddd xmm2,xmm2,xmm2 4016 4017 vpsrld xmm10,xmm10,2 4018 vpaddd xmm13,xmm13,xmm8 4019 vpor xmm2,xmm2,xmm5 4020 vpor xmm10,xmm10,xmm7 4021 vpxor xmm3,xmm3,xmm0 4022 vmovdqa xmm0,XMMWORD[((128-128))+rax] 4023 4024 vpslld xmm8,xmm13,5 4025 vpaddd xmm12,xmm12,xmm15 4026 vpxor xmm6,xmm11,xmm14 4027 vmovdqa XMMWORD[(80-128)+rax],xmm2 4028 vpaddd xmm12,xmm12,xmm2 4029 vpxor xmm3,xmm3,XMMWORD[((224-128))+rax] 4030 vpsrld xmm9,xmm13,27 4031 vpxor xmm6,xmm6,xmm10 4032 vpxor xmm3,xmm3,xmm0 4033 4034 vpslld xmm7,xmm14,30 4035 vpor xmm8,xmm8,xmm9 4036 vpaddd xmm12,xmm12,xmm6 4037 vpsrld xmm5,xmm3,31 4038 vpaddd xmm3,xmm3,xmm3 4039 4040 vpsrld xmm14,xmm14,2 4041 vpaddd xmm12,xmm12,xmm8 4042 vpor xmm3,xmm3,xmm5 4043 vpor xmm14,xmm14,xmm7 4044 vpxor xmm4,xmm4,xmm1 4045 vmovdqa xmm1,XMMWORD[((144-128))+rax] 4046 4047 vpslld xmm8,xmm12,5 4048 vpaddd xmm11,xmm11,xmm15 4049 vpxor xmm6,xmm10,xmm13 4050 vmovdqa XMMWORD[(96-128)+rax],xmm3 4051 vpaddd xmm11,xmm11,xmm3 4052 vpxor xmm4,xmm4,XMMWORD[((240-128))+rax] 4053 vpsrld xmm9,xmm12,27 4054 vpxor xmm6,xmm6,xmm14 4055 vpxor xmm4,xmm4,xmm1 4056 4057 vpslld xmm7,xmm13,30 4058 vpor xmm8,xmm8,xmm9 4059 vpaddd xmm11,xmm11,xmm6 4060 vpsrld xmm5,xmm4,31 4061 vpaddd xmm4,xmm4,xmm4 4062 4063 vpsrld xmm13,xmm13,2 4064 vpaddd xmm11,xmm11,xmm8 4065 vpor xmm4,xmm4,xmm5 4066 vpor xmm13,xmm13,xmm7 4067 vpxor xmm0,xmm0,xmm2 4068 vmovdqa xmm2,XMMWORD[((160-128))+rax] 4069 4070 vpslld xmm8,xmm11,5 4071 vpaddd xmm10,xmm10,xmm15 4072 vpxor xmm6,xmm14,xmm12 4073 vmovdqa XMMWORD[(112-128)+rax],xmm4 4074 vpaddd xmm10,xmm10,xmm4 4075 vpxor xmm0,xmm0,XMMWORD[((0-128))+rax] 4076 vpsrld xmm9,xmm11,27 4077 vpxor xmm6,xmm6,xmm13 4078 vpxor xmm0,xmm0,xmm2 4079 4080 vpslld xmm7,xmm12,30 4081 vpor xmm8,xmm8,xmm9 4082 vpaddd xmm10,xmm10,xmm6 4083 vpsrld xmm5,xmm0,31 4084 vpaddd xmm0,xmm0,xmm0 4085 4086 vpsrld xmm12,xmm12,2 4087 vpaddd xmm10,xmm10,xmm8 4088 vpor xmm0,xmm0,xmm5 4089 vpor xmm12,xmm12,xmm7 4090 vmovdqa xmm15,XMMWORD[32+rbp] 4091 vpxor xmm1,xmm1,xmm3 4092 vmovdqa xmm3,XMMWORD[((176-128))+rax] 4093 4094 vpaddd xmm14,xmm14,xmm15 4095 vpslld xmm8,xmm10,5 4096 vpand xmm7,xmm13,xmm12 4097 vpxor xmm1,xmm1,XMMWORD[((16-128))+rax] 4098 4099 vpaddd xmm14,xmm14,xmm7 4100 vpsrld xmm9,xmm10,27 4101 vpxor xmm6,xmm13,xmm12 4102 vpxor xmm1,xmm1,xmm3 4103 4104 vmovdqu XMMWORD[(128-128)+rax],xmm0 4105 vpaddd xmm14,xmm14,xmm0 4106 vpor xmm8,xmm8,xmm9 4107 vpsrld xmm5,xmm1,31 4108 vpand xmm6,xmm6,xmm11 4109 vpaddd xmm1,xmm1,xmm1 4110 4111 vpslld xmm7,xmm11,30 4112 vpaddd xmm14,xmm14,xmm6 4113 4114 vpsrld xmm11,xmm11,2 4115 vpaddd xmm14,xmm14,xmm8 4116 vpor xmm1,xmm1,xmm5 4117 vpor xmm11,xmm11,xmm7 4118 vpxor xmm2,xmm2,xmm4 4119 vmovdqa xmm4,XMMWORD[((192-128))+rax] 4120 4121 vpaddd xmm13,xmm13,xmm15 4122 vpslld xmm8,xmm14,5 4123 vpand xmm7,xmm12,xmm11 4124 vpxor xmm2,xmm2,XMMWORD[((32-128))+rax] 4125 4126 vpaddd xmm13,xmm13,xmm7 4127 vpsrld xmm9,xmm14,27 4128 vpxor xmm6,xmm12,xmm11 4129 vpxor xmm2,xmm2,xmm4 4130 4131 vmovdqu XMMWORD[(144-128)+rax],xmm1 4132 vpaddd xmm13,xmm13,xmm1 4133 vpor xmm8,xmm8,xmm9 4134 vpsrld xmm5,xmm2,31 4135 vpand xmm6,xmm6,xmm10 4136 vpaddd xmm2,xmm2,xmm2 4137 4138 vpslld xmm7,xmm10,30 4139 vpaddd xmm13,xmm13,xmm6 4140 4141 vpsrld xmm10,xmm10,2 4142 vpaddd xmm13,xmm13,xmm8 4143 vpor xmm2,xmm2,xmm5 4144 vpor xmm10,xmm10,xmm7 4145 vpxor xmm3,xmm3,xmm0 4146 vmovdqa xmm0,XMMWORD[((208-128))+rax] 4147 4148 vpaddd xmm12,xmm12,xmm15 4149 vpslld xmm8,xmm13,5 4150 vpand xmm7,xmm11,xmm10 4151 vpxor xmm3,xmm3,XMMWORD[((48-128))+rax] 4152 4153 vpaddd xmm12,xmm12,xmm7 4154 vpsrld xmm9,xmm13,27 4155 vpxor xmm6,xmm11,xmm10 4156 vpxor xmm3,xmm3,xmm0 4157 4158 vmovdqu XMMWORD[(160-128)+rax],xmm2 4159 vpaddd xmm12,xmm12,xmm2 4160 vpor xmm8,xmm8,xmm9 4161 vpsrld xmm5,xmm3,31 4162 vpand xmm6,xmm6,xmm14 4163 vpaddd xmm3,xmm3,xmm3 4164 4165 vpslld xmm7,xmm14,30 4166 vpaddd xmm12,xmm12,xmm6 4167 4168 vpsrld xmm14,xmm14,2 4169 vpaddd xmm12,xmm12,xmm8 4170 vpor xmm3,xmm3,xmm5 4171 vpor xmm14,xmm14,xmm7 4172 vpxor xmm4,xmm4,xmm1 4173 vmovdqa xmm1,XMMWORD[((224-128))+rax] 4174 4175 vpaddd xmm11,xmm11,xmm15 4176 vpslld xmm8,xmm12,5 4177 vpand xmm7,xmm10,xmm14 4178 vpxor xmm4,xmm4,XMMWORD[((64-128))+rax] 4179 4180 vpaddd xmm11,xmm11,xmm7 4181 vpsrld xmm9,xmm12,27 4182 vpxor xmm6,xmm10,xmm14 4183 vpxor xmm4,xmm4,xmm1 4184 4185 vmovdqu XMMWORD[(176-128)+rax],xmm3 4186 vpaddd xmm11,xmm11,xmm3 4187 vpor xmm8,xmm8,xmm9 4188 vpsrld xmm5,xmm4,31 4189 vpand xmm6,xmm6,xmm13 4190 vpaddd xmm4,xmm4,xmm4 4191 4192 vpslld xmm7,xmm13,30 4193 vpaddd xmm11,xmm11,xmm6 4194 4195 vpsrld xmm13,xmm13,2 4196 vpaddd xmm11,xmm11,xmm8 4197 vpor xmm4,xmm4,xmm5 4198 vpor xmm13,xmm13,xmm7 4199 vpxor xmm0,xmm0,xmm2 4200 vmovdqa xmm2,XMMWORD[((240-128))+rax] 4201 4202 vpaddd xmm10,xmm10,xmm15 4203 vpslld xmm8,xmm11,5 4204 vpand xmm7,xmm14,xmm13 4205 vpxor xmm0,xmm0,XMMWORD[((80-128))+rax] 4206 4207 vpaddd xmm10,xmm10,xmm7 4208 vpsrld xmm9,xmm11,27 4209 vpxor xmm6,xmm14,xmm13 4210 vpxor xmm0,xmm0,xmm2 4211 4212 vmovdqu XMMWORD[(192-128)+rax],xmm4 4213 vpaddd xmm10,xmm10,xmm4 4214 vpor xmm8,xmm8,xmm9 4215 vpsrld xmm5,xmm0,31 4216 vpand xmm6,xmm6,xmm12 4217 vpaddd xmm0,xmm0,xmm0 4218 4219 vpslld xmm7,xmm12,30 4220 vpaddd xmm10,xmm10,xmm6 4221 4222 vpsrld xmm12,xmm12,2 4223 vpaddd xmm10,xmm10,xmm8 4224 vpor xmm0,xmm0,xmm5 4225 vpor xmm12,xmm12,xmm7 4226 vpxor xmm1,xmm1,xmm3 4227 vmovdqa xmm3,XMMWORD[((0-128))+rax] 4228 4229 vpaddd xmm14,xmm14,xmm15 4230 vpslld xmm8,xmm10,5 4231 vpand xmm7,xmm13,xmm12 4232 vpxor xmm1,xmm1,XMMWORD[((96-128))+rax] 4233 4234 vpaddd xmm14,xmm14,xmm7 4235 vpsrld xmm9,xmm10,27 4236 vpxor xmm6,xmm13,xmm12 4237 vpxor xmm1,xmm1,xmm3 4238 4239 vmovdqu XMMWORD[(208-128)+rax],xmm0 4240 vpaddd xmm14,xmm14,xmm0 4241 vpor xmm8,xmm8,xmm9 4242 vpsrld xmm5,xmm1,31 4243 vpand xmm6,xmm6,xmm11 4244 vpaddd xmm1,xmm1,xmm1 4245 4246 vpslld xmm7,xmm11,30 4247 vpaddd xmm14,xmm14,xmm6 4248 4249 vpsrld xmm11,xmm11,2 4250 vpaddd xmm14,xmm14,xmm8 4251 vpor xmm1,xmm1,xmm5 4252 vpor xmm11,xmm11,xmm7 4253 vpxor xmm2,xmm2,xmm4 4254 vmovdqa xmm4,XMMWORD[((16-128))+rax] 4255 4256 vpaddd xmm13,xmm13,xmm15 4257 vpslld xmm8,xmm14,5 4258 vpand xmm7,xmm12,xmm11 4259 vpxor xmm2,xmm2,XMMWORD[((112-128))+rax] 4260 4261 vpaddd xmm13,xmm13,xmm7 4262 vpsrld xmm9,xmm14,27 4263 vpxor xmm6,xmm12,xmm11 4264 vpxor xmm2,xmm2,xmm4 4265 4266 vmovdqu XMMWORD[(224-128)+rax],xmm1 4267 vpaddd xmm13,xmm13,xmm1 4268 vpor xmm8,xmm8,xmm9 4269 vpsrld xmm5,xmm2,31 4270 vpand xmm6,xmm6,xmm10 4271 vpaddd xmm2,xmm2,xmm2 4272 4273 vpslld xmm7,xmm10,30 4274 vpaddd xmm13,xmm13,xmm6 4275 4276 vpsrld xmm10,xmm10,2 4277 vpaddd xmm13,xmm13,xmm8 4278 vpor xmm2,xmm2,xmm5 4279 vpor xmm10,xmm10,xmm7 4280 vpxor xmm3,xmm3,xmm0 4281 vmovdqa xmm0,XMMWORD[((32-128))+rax] 4282 4283 vpaddd xmm12,xmm12,xmm15 4284 vpslld xmm8,xmm13,5 4285 vpand xmm7,xmm11,xmm10 4286 vpxor xmm3,xmm3,XMMWORD[((128-128))+rax] 4287 4288 vpaddd xmm12,xmm12,xmm7 4289 vpsrld xmm9,xmm13,27 4290 vpxor xmm6,xmm11,xmm10 4291 vpxor xmm3,xmm3,xmm0 4292 4293 vmovdqu XMMWORD[(240-128)+rax],xmm2 4294 vpaddd xmm12,xmm12,xmm2 4295 vpor xmm8,xmm8,xmm9 4296 vpsrld xmm5,xmm3,31 4297 vpand xmm6,xmm6,xmm14 4298 vpaddd xmm3,xmm3,xmm3 4299 4300 vpslld xmm7,xmm14,30 4301 vpaddd xmm12,xmm12,xmm6 4302 4303 vpsrld xmm14,xmm14,2 4304 vpaddd xmm12,xmm12,xmm8 4305 vpor xmm3,xmm3,xmm5 4306 vpor xmm14,xmm14,xmm7 4307 vpxor xmm4,xmm4,xmm1 4308 vmovdqa xmm1,XMMWORD[((48-128))+rax] 4309 4310 vpaddd xmm11,xmm11,xmm15 4311 vpslld xmm8,xmm12,5 4312 vpand xmm7,xmm10,xmm14 4313 vpxor xmm4,xmm4,XMMWORD[((144-128))+rax] 4314 4315 vpaddd xmm11,xmm11,xmm7 4316 vpsrld xmm9,xmm12,27 4317 vpxor xmm6,xmm10,xmm14 4318 vpxor xmm4,xmm4,xmm1 4319 4320 vmovdqu XMMWORD[(0-128)+rax],xmm3 4321 vpaddd xmm11,xmm11,xmm3 4322 vpor xmm8,xmm8,xmm9 4323 vpsrld xmm5,xmm4,31 4324 vpand xmm6,xmm6,xmm13 4325 vpaddd xmm4,xmm4,xmm4 4326 4327 vpslld xmm7,xmm13,30 4328 vpaddd xmm11,xmm11,xmm6 4329 4330 vpsrld xmm13,xmm13,2 4331 vpaddd xmm11,xmm11,xmm8 4332 vpor xmm4,xmm4,xmm5 4333 vpor xmm13,xmm13,xmm7 4334 vpxor xmm0,xmm0,xmm2 4335 vmovdqa xmm2,XMMWORD[((64-128))+rax] 4336 4337 vpaddd xmm10,xmm10,xmm15 4338 vpslld xmm8,xmm11,5 4339 vpand xmm7,xmm14,xmm13 4340 vpxor xmm0,xmm0,XMMWORD[((160-128))+rax] 4341 4342 vpaddd xmm10,xmm10,xmm7 4343 vpsrld xmm9,xmm11,27 4344 vpxor xmm6,xmm14,xmm13 4345 vpxor xmm0,xmm0,xmm2 4346 4347 vmovdqu XMMWORD[(16-128)+rax],xmm4 4348 vpaddd xmm10,xmm10,xmm4 4349 vpor xmm8,xmm8,xmm9 4350 vpsrld xmm5,xmm0,31 4351 vpand xmm6,xmm6,xmm12 4352 vpaddd xmm0,xmm0,xmm0 4353 4354 vpslld xmm7,xmm12,30 4355 vpaddd xmm10,xmm10,xmm6 4356 4357 vpsrld xmm12,xmm12,2 4358 vpaddd xmm10,xmm10,xmm8 4359 vpor xmm0,xmm0,xmm5 4360 vpor xmm12,xmm12,xmm7 4361 vpxor xmm1,xmm1,xmm3 4362 vmovdqa xmm3,XMMWORD[((80-128))+rax] 4363 4364 vpaddd xmm14,xmm14,xmm15 4365 vpslld xmm8,xmm10,5 4366 vpand xmm7,xmm13,xmm12 4367 vpxor xmm1,xmm1,XMMWORD[((176-128))+rax] 4368 4369 vpaddd xmm14,xmm14,xmm7 4370 vpsrld xmm9,xmm10,27 4371 vpxor xmm6,xmm13,xmm12 4372 vpxor xmm1,xmm1,xmm3 4373 4374 vmovdqu XMMWORD[(32-128)+rax],xmm0 4375 vpaddd xmm14,xmm14,xmm0 4376 vpor xmm8,xmm8,xmm9 4377 vpsrld xmm5,xmm1,31 4378 vpand xmm6,xmm6,xmm11 4379 vpaddd xmm1,xmm1,xmm1 4380 4381 vpslld xmm7,xmm11,30 4382 vpaddd xmm14,xmm14,xmm6 4383 4384 vpsrld xmm11,xmm11,2 4385 vpaddd xmm14,xmm14,xmm8 4386 vpor xmm1,xmm1,xmm5 4387 vpor xmm11,xmm11,xmm7 4388 vpxor xmm2,xmm2,xmm4 4389 vmovdqa xmm4,XMMWORD[((96-128))+rax] 4390 4391 vpaddd xmm13,xmm13,xmm15 4392 vpslld xmm8,xmm14,5 4393 vpand xmm7,xmm12,xmm11 4394 vpxor xmm2,xmm2,XMMWORD[((192-128))+rax] 4395 4396 vpaddd xmm13,xmm13,xmm7 4397 vpsrld xmm9,xmm14,27 4398 vpxor xmm6,xmm12,xmm11 4399 vpxor xmm2,xmm2,xmm4 4400 4401 vmovdqu XMMWORD[(48-128)+rax],xmm1 4402 vpaddd xmm13,xmm13,xmm1 4403 vpor xmm8,xmm8,xmm9 4404 vpsrld xmm5,xmm2,31 4405 vpand xmm6,xmm6,xmm10 4406 vpaddd xmm2,xmm2,xmm2 4407 4408 vpslld xmm7,xmm10,30 4409 vpaddd xmm13,xmm13,xmm6 4410 4411 vpsrld xmm10,xmm10,2 4412 vpaddd xmm13,xmm13,xmm8 4413 vpor xmm2,xmm2,xmm5 4414 vpor xmm10,xmm10,xmm7 4415 vpxor xmm3,xmm3,xmm0 4416 vmovdqa xmm0,XMMWORD[((112-128))+rax] 4417 4418 vpaddd xmm12,xmm12,xmm15 4419 vpslld xmm8,xmm13,5 4420 vpand xmm7,xmm11,xmm10 4421 vpxor xmm3,xmm3,XMMWORD[((208-128))+rax] 4422 4423 vpaddd xmm12,xmm12,xmm7 4424 vpsrld xmm9,xmm13,27 4425 vpxor xmm6,xmm11,xmm10 4426 vpxor xmm3,xmm3,xmm0 4427 4428 vmovdqu XMMWORD[(64-128)+rax],xmm2 4429 vpaddd xmm12,xmm12,xmm2 4430 vpor xmm8,xmm8,xmm9 4431 vpsrld xmm5,xmm3,31 4432 vpand xmm6,xmm6,xmm14 4433 vpaddd xmm3,xmm3,xmm3 4434 4435 vpslld xmm7,xmm14,30 4436 vpaddd xmm12,xmm12,xmm6 4437 4438 vpsrld xmm14,xmm14,2 4439 vpaddd xmm12,xmm12,xmm8 4440 vpor xmm3,xmm3,xmm5 4441 vpor xmm14,xmm14,xmm7 4442 vpxor xmm4,xmm4,xmm1 4443 vmovdqa xmm1,XMMWORD[((128-128))+rax] 4444 4445 vpaddd xmm11,xmm11,xmm15 4446 vpslld xmm8,xmm12,5 4447 vpand xmm7,xmm10,xmm14 4448 vpxor xmm4,xmm4,XMMWORD[((224-128))+rax] 4449 4450 vpaddd xmm11,xmm11,xmm7 4451 vpsrld xmm9,xmm12,27 4452 vpxor xmm6,xmm10,xmm14 4453 vpxor xmm4,xmm4,xmm1 4454 4455 vmovdqu XMMWORD[(80-128)+rax],xmm3 4456 vpaddd xmm11,xmm11,xmm3 4457 vpor xmm8,xmm8,xmm9 4458 vpsrld xmm5,xmm4,31 4459 vpand xmm6,xmm6,xmm13 4460 vpaddd xmm4,xmm4,xmm4 4461 4462 vpslld xmm7,xmm13,30 4463 vpaddd xmm11,xmm11,xmm6 4464 4465 vpsrld xmm13,xmm13,2 4466 vpaddd xmm11,xmm11,xmm8 4467 vpor xmm4,xmm4,xmm5 4468 vpor xmm13,xmm13,xmm7 4469 vpxor xmm0,xmm0,xmm2 4470 vmovdqa xmm2,XMMWORD[((144-128))+rax] 4471 4472 vpaddd xmm10,xmm10,xmm15 4473 vpslld xmm8,xmm11,5 4474 vpand xmm7,xmm14,xmm13 4475 vpxor xmm0,xmm0,XMMWORD[((240-128))+rax] 4476 4477 vpaddd xmm10,xmm10,xmm7 4478 vpsrld xmm9,xmm11,27 4479 vpxor xmm6,xmm14,xmm13 4480 vpxor xmm0,xmm0,xmm2 4481 4482 vmovdqu XMMWORD[(96-128)+rax],xmm4 4483 vpaddd xmm10,xmm10,xmm4 4484 vpor xmm8,xmm8,xmm9 4485 vpsrld xmm5,xmm0,31 4486 vpand xmm6,xmm6,xmm12 4487 vpaddd xmm0,xmm0,xmm0 4488 4489 vpslld xmm7,xmm12,30 4490 vpaddd xmm10,xmm10,xmm6 4491 4492 vpsrld xmm12,xmm12,2 4493 vpaddd xmm10,xmm10,xmm8 4494 vpor xmm0,xmm0,xmm5 4495 vpor xmm12,xmm12,xmm7 4496 vpxor xmm1,xmm1,xmm3 4497 vmovdqa xmm3,XMMWORD[((160-128))+rax] 4498 4499 vpaddd xmm14,xmm14,xmm15 4500 vpslld xmm8,xmm10,5 4501 vpand xmm7,xmm13,xmm12 4502 vpxor xmm1,xmm1,XMMWORD[((0-128))+rax] 4503 4504 vpaddd xmm14,xmm14,xmm7 4505 vpsrld xmm9,xmm10,27 4506 vpxor xmm6,xmm13,xmm12 4507 vpxor xmm1,xmm1,xmm3 4508 4509 vmovdqu XMMWORD[(112-128)+rax],xmm0 4510 vpaddd xmm14,xmm14,xmm0 4511 vpor xmm8,xmm8,xmm9 4512 vpsrld xmm5,xmm1,31 4513 vpand xmm6,xmm6,xmm11 4514 vpaddd xmm1,xmm1,xmm1 4515 4516 vpslld xmm7,xmm11,30 4517 vpaddd xmm14,xmm14,xmm6 4518 4519 vpsrld xmm11,xmm11,2 4520 vpaddd xmm14,xmm14,xmm8 4521 vpor xmm1,xmm1,xmm5 4522 vpor xmm11,xmm11,xmm7 4523 vpxor xmm2,xmm2,xmm4 4524 vmovdqa xmm4,XMMWORD[((176-128))+rax] 4525 4526 vpaddd xmm13,xmm13,xmm15 4527 vpslld xmm8,xmm14,5 4528 vpand xmm7,xmm12,xmm11 4529 vpxor xmm2,xmm2,XMMWORD[((16-128))+rax] 4530 4531 vpaddd xmm13,xmm13,xmm7 4532 vpsrld xmm9,xmm14,27 4533 vpxor xmm6,xmm12,xmm11 4534 vpxor xmm2,xmm2,xmm4 4535 4536 vmovdqu XMMWORD[(128-128)+rax],xmm1 4537 vpaddd xmm13,xmm13,xmm1 4538 vpor xmm8,xmm8,xmm9 4539 vpsrld xmm5,xmm2,31 4540 vpand xmm6,xmm6,xmm10 4541 vpaddd xmm2,xmm2,xmm2 4542 4543 vpslld xmm7,xmm10,30 4544 vpaddd xmm13,xmm13,xmm6 4545 4546 vpsrld xmm10,xmm10,2 4547 vpaddd xmm13,xmm13,xmm8 4548 vpor xmm2,xmm2,xmm5 4549 vpor xmm10,xmm10,xmm7 4550 vpxor xmm3,xmm3,xmm0 4551 vmovdqa xmm0,XMMWORD[((192-128))+rax] 4552 4553 vpaddd xmm12,xmm12,xmm15 4554 vpslld xmm8,xmm13,5 4555 vpand xmm7,xmm11,xmm10 4556 vpxor xmm3,xmm3,XMMWORD[((32-128))+rax] 4557 4558 vpaddd xmm12,xmm12,xmm7 4559 vpsrld xmm9,xmm13,27 4560 vpxor xmm6,xmm11,xmm10 4561 vpxor xmm3,xmm3,xmm0 4562 4563 vmovdqu XMMWORD[(144-128)+rax],xmm2 4564 vpaddd xmm12,xmm12,xmm2 4565 vpor xmm8,xmm8,xmm9 4566 vpsrld xmm5,xmm3,31 4567 vpand xmm6,xmm6,xmm14 4568 vpaddd xmm3,xmm3,xmm3 4569 4570 vpslld xmm7,xmm14,30 4571 vpaddd xmm12,xmm12,xmm6 4572 4573 vpsrld xmm14,xmm14,2 4574 vpaddd xmm12,xmm12,xmm8 4575 vpor xmm3,xmm3,xmm5 4576 vpor xmm14,xmm14,xmm7 4577 vpxor xmm4,xmm4,xmm1 4578 vmovdqa xmm1,XMMWORD[((208-128))+rax] 4579 4580 vpaddd xmm11,xmm11,xmm15 4581 vpslld xmm8,xmm12,5 4582 vpand xmm7,xmm10,xmm14 4583 vpxor xmm4,xmm4,XMMWORD[((48-128))+rax] 4584 4585 vpaddd xmm11,xmm11,xmm7 4586 vpsrld xmm9,xmm12,27 4587 vpxor xmm6,xmm10,xmm14 4588 vpxor xmm4,xmm4,xmm1 4589 4590 vmovdqu XMMWORD[(160-128)+rax],xmm3 4591 vpaddd xmm11,xmm11,xmm3 4592 vpor xmm8,xmm8,xmm9 4593 vpsrld xmm5,xmm4,31 4594 vpand xmm6,xmm6,xmm13 4595 vpaddd xmm4,xmm4,xmm4 4596 4597 vpslld xmm7,xmm13,30 4598 vpaddd xmm11,xmm11,xmm6 4599 4600 vpsrld xmm13,xmm13,2 4601 vpaddd xmm11,xmm11,xmm8 4602 vpor xmm4,xmm4,xmm5 4603 vpor xmm13,xmm13,xmm7 4604 vpxor xmm0,xmm0,xmm2 4605 vmovdqa xmm2,XMMWORD[((224-128))+rax] 4606 4607 vpaddd xmm10,xmm10,xmm15 4608 vpslld xmm8,xmm11,5 4609 vpand xmm7,xmm14,xmm13 4610 vpxor xmm0,xmm0,XMMWORD[((64-128))+rax] 4611 4612 vpaddd xmm10,xmm10,xmm7 4613 vpsrld xmm9,xmm11,27 4614 vpxor xmm6,xmm14,xmm13 4615 vpxor xmm0,xmm0,xmm2 4616 4617 vmovdqu XMMWORD[(176-128)+rax],xmm4 4618 vpaddd xmm10,xmm10,xmm4 4619 vpor xmm8,xmm8,xmm9 4620 vpsrld xmm5,xmm0,31 4621 vpand xmm6,xmm6,xmm12 4622 vpaddd xmm0,xmm0,xmm0 4623 4624 vpslld xmm7,xmm12,30 4625 vpaddd xmm10,xmm10,xmm6 4626 4627 vpsrld xmm12,xmm12,2 4628 vpaddd xmm10,xmm10,xmm8 4629 vpor xmm0,xmm0,xmm5 4630 vpor xmm12,xmm12,xmm7 4631 vmovdqa xmm15,XMMWORD[64+rbp] 4632 vpxor xmm1,xmm1,xmm3 4633 vmovdqa xmm3,XMMWORD[((240-128))+rax] 4634 4635 vpslld xmm8,xmm10,5 4636 vpaddd xmm14,xmm14,xmm15 4637 vpxor xmm6,xmm13,xmm11 4638 vmovdqa XMMWORD[(192-128)+rax],xmm0 4639 vpaddd xmm14,xmm14,xmm0 4640 vpxor xmm1,xmm1,XMMWORD[((80-128))+rax] 4641 vpsrld xmm9,xmm10,27 4642 vpxor xmm6,xmm6,xmm12 4643 vpxor xmm1,xmm1,xmm3 4644 4645 vpslld xmm7,xmm11,30 4646 vpor xmm8,xmm8,xmm9 4647 vpaddd xmm14,xmm14,xmm6 4648 vpsrld xmm5,xmm1,31 4649 vpaddd xmm1,xmm1,xmm1 4650 4651 vpsrld xmm11,xmm11,2 4652 vpaddd xmm14,xmm14,xmm8 4653 vpor xmm1,xmm1,xmm5 4654 vpor xmm11,xmm11,xmm7 4655 vpxor xmm2,xmm2,xmm4 4656 vmovdqa xmm4,XMMWORD[((0-128))+rax] 4657 4658 vpslld xmm8,xmm14,5 4659 vpaddd xmm13,xmm13,xmm15 4660 vpxor xmm6,xmm12,xmm10 4661 vmovdqa XMMWORD[(208-128)+rax],xmm1 4662 vpaddd xmm13,xmm13,xmm1 4663 vpxor xmm2,xmm2,XMMWORD[((96-128))+rax] 4664 vpsrld xmm9,xmm14,27 4665 vpxor xmm6,xmm6,xmm11 4666 vpxor xmm2,xmm2,xmm4 4667 4668 vpslld xmm7,xmm10,30 4669 vpor xmm8,xmm8,xmm9 4670 vpaddd xmm13,xmm13,xmm6 4671 vpsrld xmm5,xmm2,31 4672 vpaddd xmm2,xmm2,xmm2 4673 4674 vpsrld xmm10,xmm10,2 4675 vpaddd xmm13,xmm13,xmm8 4676 vpor xmm2,xmm2,xmm5 4677 vpor xmm10,xmm10,xmm7 4678 vpxor xmm3,xmm3,xmm0 4679 vmovdqa xmm0,XMMWORD[((16-128))+rax] 4680 4681 vpslld xmm8,xmm13,5 4682 vpaddd xmm12,xmm12,xmm15 4683 vpxor xmm6,xmm11,xmm14 4684 vmovdqa XMMWORD[(224-128)+rax],xmm2 4685 vpaddd xmm12,xmm12,xmm2 4686 vpxor xmm3,xmm3,XMMWORD[((112-128))+rax] 4687 vpsrld xmm9,xmm13,27 4688 vpxor xmm6,xmm6,xmm10 4689 vpxor xmm3,xmm3,xmm0 4690 4691 vpslld xmm7,xmm14,30 4692 vpor xmm8,xmm8,xmm9 4693 vpaddd xmm12,xmm12,xmm6 4694 vpsrld xmm5,xmm3,31 4695 vpaddd xmm3,xmm3,xmm3 4696 4697 vpsrld xmm14,xmm14,2 4698 vpaddd xmm12,xmm12,xmm8 4699 vpor xmm3,xmm3,xmm5 4700 vpor xmm14,xmm14,xmm7 4701 vpxor xmm4,xmm4,xmm1 4702 vmovdqa xmm1,XMMWORD[((32-128))+rax] 4703 4704 vpslld xmm8,xmm12,5 4705 vpaddd xmm11,xmm11,xmm15 4706 vpxor xmm6,xmm10,xmm13 4707 vmovdqa XMMWORD[(240-128)+rax],xmm3 4708 vpaddd xmm11,xmm11,xmm3 4709 vpxor xmm4,xmm4,XMMWORD[((128-128))+rax] 4710 vpsrld xmm9,xmm12,27 4711 vpxor xmm6,xmm6,xmm14 4712 vpxor xmm4,xmm4,xmm1 4713 4714 vpslld xmm7,xmm13,30 4715 vpor xmm8,xmm8,xmm9 4716 vpaddd xmm11,xmm11,xmm6 4717 vpsrld xmm5,xmm4,31 4718 vpaddd xmm4,xmm4,xmm4 4719 4720 vpsrld xmm13,xmm13,2 4721 vpaddd xmm11,xmm11,xmm8 4722 vpor xmm4,xmm4,xmm5 4723 vpor xmm13,xmm13,xmm7 4724 vpxor xmm0,xmm0,xmm2 4725 vmovdqa xmm2,XMMWORD[((48-128))+rax] 4726 4727 vpslld xmm8,xmm11,5 4728 vpaddd xmm10,xmm10,xmm15 4729 vpxor xmm6,xmm14,xmm12 4730 vmovdqa XMMWORD[(0-128)+rax],xmm4 4731 vpaddd xmm10,xmm10,xmm4 4732 vpxor xmm0,xmm0,XMMWORD[((144-128))+rax] 4733 vpsrld xmm9,xmm11,27 4734 vpxor xmm6,xmm6,xmm13 4735 vpxor xmm0,xmm0,xmm2 4736 4737 vpslld xmm7,xmm12,30 4738 vpor xmm8,xmm8,xmm9 4739 vpaddd xmm10,xmm10,xmm6 4740 vpsrld xmm5,xmm0,31 4741 vpaddd xmm0,xmm0,xmm0 4742 4743 vpsrld xmm12,xmm12,2 4744 vpaddd xmm10,xmm10,xmm8 4745 vpor xmm0,xmm0,xmm5 4746 vpor xmm12,xmm12,xmm7 4747 vpxor xmm1,xmm1,xmm3 4748 vmovdqa xmm3,XMMWORD[((64-128))+rax] 4749 4750 vpslld xmm8,xmm10,5 4751 vpaddd xmm14,xmm14,xmm15 4752 vpxor xmm6,xmm13,xmm11 4753 vmovdqa XMMWORD[(16-128)+rax],xmm0 4754 vpaddd xmm14,xmm14,xmm0 4755 vpxor xmm1,xmm1,XMMWORD[((160-128))+rax] 4756 vpsrld xmm9,xmm10,27 4757 vpxor xmm6,xmm6,xmm12 4758 vpxor xmm1,xmm1,xmm3 4759 4760 vpslld xmm7,xmm11,30 4761 vpor xmm8,xmm8,xmm9 4762 vpaddd xmm14,xmm14,xmm6 4763 vpsrld xmm5,xmm1,31 4764 vpaddd xmm1,xmm1,xmm1 4765 4766 vpsrld xmm11,xmm11,2 4767 vpaddd xmm14,xmm14,xmm8 4768 vpor xmm1,xmm1,xmm5 4769 vpor xmm11,xmm11,xmm7 4770 vpxor xmm2,xmm2,xmm4 4771 vmovdqa xmm4,XMMWORD[((80-128))+rax] 4772 4773 vpslld xmm8,xmm14,5 4774 vpaddd xmm13,xmm13,xmm15 4775 vpxor xmm6,xmm12,xmm10 4776 vmovdqa XMMWORD[(32-128)+rax],xmm1 4777 vpaddd xmm13,xmm13,xmm1 4778 vpxor xmm2,xmm2,XMMWORD[((176-128))+rax] 4779 vpsrld xmm9,xmm14,27 4780 vpxor xmm6,xmm6,xmm11 4781 vpxor xmm2,xmm2,xmm4 4782 4783 vpslld xmm7,xmm10,30 4784 vpor xmm8,xmm8,xmm9 4785 vpaddd xmm13,xmm13,xmm6 4786 vpsrld xmm5,xmm2,31 4787 vpaddd xmm2,xmm2,xmm2 4788 4789 vpsrld xmm10,xmm10,2 4790 vpaddd xmm13,xmm13,xmm8 4791 vpor xmm2,xmm2,xmm5 4792 vpor xmm10,xmm10,xmm7 4793 vpxor xmm3,xmm3,xmm0 4794 vmovdqa xmm0,XMMWORD[((96-128))+rax] 4795 4796 vpslld xmm8,xmm13,5 4797 vpaddd xmm12,xmm12,xmm15 4798 vpxor xmm6,xmm11,xmm14 4799 vmovdqa XMMWORD[(48-128)+rax],xmm2 4800 vpaddd xmm12,xmm12,xmm2 4801 vpxor xmm3,xmm3,XMMWORD[((192-128))+rax] 4802 vpsrld xmm9,xmm13,27 4803 vpxor xmm6,xmm6,xmm10 4804 vpxor xmm3,xmm3,xmm0 4805 4806 vpslld xmm7,xmm14,30 4807 vpor xmm8,xmm8,xmm9 4808 vpaddd xmm12,xmm12,xmm6 4809 vpsrld xmm5,xmm3,31 4810 vpaddd xmm3,xmm3,xmm3 4811 4812 vpsrld xmm14,xmm14,2 4813 vpaddd xmm12,xmm12,xmm8 4814 vpor xmm3,xmm3,xmm5 4815 vpor xmm14,xmm14,xmm7 4816 vpxor xmm4,xmm4,xmm1 4817 vmovdqa xmm1,XMMWORD[((112-128))+rax] 4818 4819 vpslld xmm8,xmm12,5 4820 vpaddd xmm11,xmm11,xmm15 4821 vpxor xmm6,xmm10,xmm13 4822 vmovdqa XMMWORD[(64-128)+rax],xmm3 4823 vpaddd xmm11,xmm11,xmm3 4824 vpxor xmm4,xmm4,XMMWORD[((208-128))+rax] 4825 vpsrld xmm9,xmm12,27 4826 vpxor xmm6,xmm6,xmm14 4827 vpxor xmm4,xmm4,xmm1 4828 4829 vpslld xmm7,xmm13,30 4830 vpor xmm8,xmm8,xmm9 4831 vpaddd xmm11,xmm11,xmm6 4832 vpsrld xmm5,xmm4,31 4833 vpaddd xmm4,xmm4,xmm4 4834 4835 vpsrld xmm13,xmm13,2 4836 vpaddd xmm11,xmm11,xmm8 4837 vpor xmm4,xmm4,xmm5 4838 vpor xmm13,xmm13,xmm7 4839 vpxor xmm0,xmm0,xmm2 4840 vmovdqa xmm2,XMMWORD[((128-128))+rax] 4841 4842 vpslld xmm8,xmm11,5 4843 vpaddd xmm10,xmm10,xmm15 4844 vpxor xmm6,xmm14,xmm12 4845 vmovdqa XMMWORD[(80-128)+rax],xmm4 4846 vpaddd xmm10,xmm10,xmm4 4847 vpxor xmm0,xmm0,XMMWORD[((224-128))+rax] 4848 vpsrld xmm9,xmm11,27 4849 vpxor xmm6,xmm6,xmm13 4850 vpxor xmm0,xmm0,xmm2 4851 4852 vpslld xmm7,xmm12,30 4853 vpor xmm8,xmm8,xmm9 4854 vpaddd xmm10,xmm10,xmm6 4855 vpsrld xmm5,xmm0,31 4856 vpaddd xmm0,xmm0,xmm0 4857 4858 vpsrld xmm12,xmm12,2 4859 vpaddd xmm10,xmm10,xmm8 4860 vpor xmm0,xmm0,xmm5 4861 vpor xmm12,xmm12,xmm7 4862 vpxor xmm1,xmm1,xmm3 4863 vmovdqa xmm3,XMMWORD[((144-128))+rax] 4864 4865 vpslld xmm8,xmm10,5 4866 vpaddd xmm14,xmm14,xmm15 4867 vpxor xmm6,xmm13,xmm11 4868 vmovdqa XMMWORD[(96-128)+rax],xmm0 4869 vpaddd xmm14,xmm14,xmm0 4870 vpxor xmm1,xmm1,XMMWORD[((240-128))+rax] 4871 vpsrld xmm9,xmm10,27 4872 vpxor xmm6,xmm6,xmm12 4873 vpxor xmm1,xmm1,xmm3 4874 4875 vpslld xmm7,xmm11,30 4876 vpor xmm8,xmm8,xmm9 4877 vpaddd xmm14,xmm14,xmm6 4878 vpsrld xmm5,xmm1,31 4879 vpaddd xmm1,xmm1,xmm1 4880 4881 vpsrld xmm11,xmm11,2 4882 vpaddd xmm14,xmm14,xmm8 4883 vpor xmm1,xmm1,xmm5 4884 vpor xmm11,xmm11,xmm7 4885 vpxor xmm2,xmm2,xmm4 4886 vmovdqa xmm4,XMMWORD[((160-128))+rax] 4887 4888 vpslld xmm8,xmm14,5 4889 vpaddd xmm13,xmm13,xmm15 4890 vpxor xmm6,xmm12,xmm10 4891 vmovdqa XMMWORD[(112-128)+rax],xmm1 4892 vpaddd xmm13,xmm13,xmm1 4893 vpxor xmm2,xmm2,XMMWORD[((0-128))+rax] 4894 vpsrld xmm9,xmm14,27 4895 vpxor xmm6,xmm6,xmm11 4896 vpxor xmm2,xmm2,xmm4 4897 4898 vpslld xmm7,xmm10,30 4899 vpor xmm8,xmm8,xmm9 4900 vpaddd xmm13,xmm13,xmm6 4901 vpsrld xmm5,xmm2,31 4902 vpaddd xmm2,xmm2,xmm2 4903 4904 vpsrld xmm10,xmm10,2 4905 vpaddd xmm13,xmm13,xmm8 4906 vpor xmm2,xmm2,xmm5 4907 vpor xmm10,xmm10,xmm7 4908 vpxor xmm3,xmm3,xmm0 4909 vmovdqa xmm0,XMMWORD[((176-128))+rax] 4910 4911 vpslld xmm8,xmm13,5 4912 vpaddd xmm12,xmm12,xmm15 4913 vpxor xmm6,xmm11,xmm14 4914 vpaddd xmm12,xmm12,xmm2 4915 vpxor xmm3,xmm3,XMMWORD[((16-128))+rax] 4916 vpsrld xmm9,xmm13,27 4917 vpxor xmm6,xmm6,xmm10 4918 vpxor xmm3,xmm3,xmm0 4919 4920 vpslld xmm7,xmm14,30 4921 vpor xmm8,xmm8,xmm9 4922 vpaddd xmm12,xmm12,xmm6 4923 vpsrld xmm5,xmm3,31 4924 vpaddd xmm3,xmm3,xmm3 4925 4926 vpsrld xmm14,xmm14,2 4927 vpaddd xmm12,xmm12,xmm8 4928 vpor xmm3,xmm3,xmm5 4929 vpor xmm14,xmm14,xmm7 4930 vpxor xmm4,xmm4,xmm1 4931 vmovdqa xmm1,XMMWORD[((192-128))+rax] 4932 4933 vpslld xmm8,xmm12,5 4934 vpaddd xmm11,xmm11,xmm15 4935 vpxor xmm6,xmm10,xmm13 4936 vpaddd xmm11,xmm11,xmm3 4937 vpxor xmm4,xmm4,XMMWORD[((32-128))+rax] 4938 vpsrld xmm9,xmm12,27 4939 vpxor xmm6,xmm6,xmm14 4940 vpxor xmm4,xmm4,xmm1 4941 4942 vpslld xmm7,xmm13,30 4943 vpor xmm8,xmm8,xmm9 4944 vpaddd xmm11,xmm11,xmm6 4945 vpsrld xmm5,xmm4,31 4946 vpaddd xmm4,xmm4,xmm4 4947 4948 vpsrld xmm13,xmm13,2 4949 vpaddd xmm11,xmm11,xmm8 4950 vpor xmm4,xmm4,xmm5 4951 vpor xmm13,xmm13,xmm7 4952 vpxor xmm0,xmm0,xmm2 4953 vmovdqa xmm2,XMMWORD[((208-128))+rax] 4954 4955 vpslld xmm8,xmm11,5 4956 vpaddd xmm10,xmm10,xmm15 4957 vpxor xmm6,xmm14,xmm12 4958 vpaddd xmm10,xmm10,xmm4 4959 vpxor xmm0,xmm0,XMMWORD[((48-128))+rax] 4960 vpsrld xmm9,xmm11,27 4961 vpxor xmm6,xmm6,xmm13 4962 vpxor xmm0,xmm0,xmm2 4963 4964 vpslld xmm7,xmm12,30 4965 vpor xmm8,xmm8,xmm9 4966 vpaddd xmm10,xmm10,xmm6 4967 vpsrld xmm5,xmm0,31 4968 vpaddd xmm0,xmm0,xmm0 4969 4970 vpsrld xmm12,xmm12,2 4971 vpaddd xmm10,xmm10,xmm8 4972 vpor xmm0,xmm0,xmm5 4973 vpor xmm12,xmm12,xmm7 4974 vpxor xmm1,xmm1,xmm3 4975 vmovdqa xmm3,XMMWORD[((224-128))+rax] 4976 4977 vpslld xmm8,xmm10,5 4978 vpaddd xmm14,xmm14,xmm15 4979 vpxor xmm6,xmm13,xmm11 4980 vpaddd xmm14,xmm14,xmm0 4981 vpxor xmm1,xmm1,XMMWORD[((64-128))+rax] 4982 vpsrld xmm9,xmm10,27 4983 vpxor xmm6,xmm6,xmm12 4984 vpxor xmm1,xmm1,xmm3 4985 4986 vpslld xmm7,xmm11,30 4987 vpor xmm8,xmm8,xmm9 4988 vpaddd xmm14,xmm14,xmm6 4989 vpsrld xmm5,xmm1,31 4990 vpaddd xmm1,xmm1,xmm1 4991 4992 vpsrld xmm11,xmm11,2 4993 vpaddd xmm14,xmm14,xmm8 4994 vpor xmm1,xmm1,xmm5 4995 vpor xmm11,xmm11,xmm7 4996 vpxor xmm2,xmm2,xmm4 4997 vmovdqa xmm4,XMMWORD[((240-128))+rax] 4998 4999 vpslld xmm8,xmm14,5 5000 vpaddd xmm13,xmm13,xmm15 5001 vpxor xmm6,xmm12,xmm10 5002 vpaddd xmm13,xmm13,xmm1 5003 vpxor xmm2,xmm2,XMMWORD[((80-128))+rax] 5004 vpsrld xmm9,xmm14,27 5005 vpxor xmm6,xmm6,xmm11 5006 vpxor xmm2,xmm2,xmm4 5007 5008 vpslld xmm7,xmm10,30 5009 vpor xmm8,xmm8,xmm9 5010 vpaddd xmm13,xmm13,xmm6 5011 vpsrld xmm5,xmm2,31 5012 vpaddd xmm2,xmm2,xmm2 5013 5014 vpsrld xmm10,xmm10,2 5015 vpaddd xmm13,xmm13,xmm8 5016 vpor xmm2,xmm2,xmm5 5017 vpor xmm10,xmm10,xmm7 5018 vpxor xmm3,xmm3,xmm0 5019 vmovdqa xmm0,XMMWORD[((0-128))+rax] 5020 5021 vpslld xmm8,xmm13,5 5022 vpaddd xmm12,xmm12,xmm15 5023 vpxor xmm6,xmm11,xmm14 5024 vpaddd xmm12,xmm12,xmm2 5025 vpxor xmm3,xmm3,XMMWORD[((96-128))+rax] 5026 vpsrld xmm9,xmm13,27 5027 vpxor xmm6,xmm6,xmm10 5028 vpxor xmm3,xmm3,xmm0 5029 5030 vpslld xmm7,xmm14,30 5031 vpor xmm8,xmm8,xmm9 5032 vpaddd xmm12,xmm12,xmm6 5033 vpsrld xmm5,xmm3,31 5034 vpaddd xmm3,xmm3,xmm3 5035 5036 vpsrld xmm14,xmm14,2 5037 vpaddd xmm12,xmm12,xmm8 5038 vpor xmm3,xmm3,xmm5 5039 vpor xmm14,xmm14,xmm7 5040 vpxor xmm4,xmm4,xmm1 5041 vmovdqa xmm1,XMMWORD[((16-128))+rax] 5042 5043 vpslld xmm8,xmm12,5 5044 vpaddd xmm11,xmm11,xmm15 5045 vpxor xmm6,xmm10,xmm13 5046 vpaddd xmm11,xmm11,xmm3 5047 vpxor xmm4,xmm4,XMMWORD[((112-128))+rax] 5048 vpsrld xmm9,xmm12,27 5049 vpxor xmm6,xmm6,xmm14 5050 vpxor xmm4,xmm4,xmm1 5051 5052 vpslld xmm7,xmm13,30 5053 vpor xmm8,xmm8,xmm9 5054 vpaddd xmm11,xmm11,xmm6 5055 vpsrld xmm5,xmm4,31 5056 vpaddd xmm4,xmm4,xmm4 5057 5058 vpsrld xmm13,xmm13,2 5059 vpaddd xmm11,xmm11,xmm8 5060 vpor xmm4,xmm4,xmm5 5061 vpor xmm13,xmm13,xmm7 5062 vpslld xmm8,xmm11,5 5063 vpaddd xmm10,xmm10,xmm15 5064 vpxor xmm6,xmm14,xmm12 5065 5066 vpsrld xmm9,xmm11,27 5067 vpaddd xmm10,xmm10,xmm4 5068 vpxor xmm6,xmm6,xmm13 5069 5070 vpslld xmm7,xmm12,30 5071 vpor xmm8,xmm8,xmm9 5072 vpaddd xmm10,xmm10,xmm6 5073 5074 vpsrld xmm12,xmm12,2 5075 vpaddd xmm10,xmm10,xmm8 5076 vpor xmm12,xmm12,xmm7 5077 mov ecx,1 5078 cmp ecx,DWORD[rbx] 5079 cmovge r8,rbp 5080 cmp ecx,DWORD[4+rbx] 5081 cmovge r9,rbp 5082 cmp ecx,DWORD[8+rbx] 5083 cmovge r10,rbp 5084 cmp ecx,DWORD[12+rbx] 5085 cmovge r11,rbp 5086 vmovdqu xmm6,XMMWORD[rbx] 5087 vpxor xmm8,xmm8,xmm8 5088 vmovdqa xmm7,xmm6 5089 vpcmpgtd xmm7,xmm7,xmm8 5090 vpaddd xmm6,xmm6,xmm7 5091 5092 vpand xmm10,xmm10,xmm7 5093 vpand xmm11,xmm11,xmm7 5094 vpaddd xmm10,xmm10,XMMWORD[rdi] 5095 vpand xmm12,xmm12,xmm7 5096 vpaddd xmm11,xmm11,XMMWORD[32+rdi] 5097 vpand xmm13,xmm13,xmm7 5098 vpaddd xmm12,xmm12,XMMWORD[64+rdi] 5099 vpand xmm14,xmm14,xmm7 5100 vpaddd xmm13,xmm13,XMMWORD[96+rdi] 5101 vpaddd xmm14,xmm14,XMMWORD[128+rdi] 5102 vmovdqu XMMWORD[rdi],xmm10 5103 vmovdqu XMMWORD[32+rdi],xmm11 5104 vmovdqu XMMWORD[64+rdi],xmm12 5105 vmovdqu XMMWORD[96+rdi],xmm13 5106 vmovdqu XMMWORD[128+rdi],xmm14 5107 5108 vmovdqu XMMWORD[rbx],xmm6 5109 vmovdqu xmm5,XMMWORD[96+rbp] 5110 dec edx 5111 jnz NEAR $L$oop_avx 5112 5113 mov edx,DWORD[280+rsp] 5114 lea rdi,[16+rdi] 5115 lea rsi,[64+rsi] 5116 dec edx 5117 jnz NEAR $L$oop_grande_avx 5118 5119 $L$done_avx: 5120 mov rax,QWORD[272+rsp] 5121 5122 vzeroupper 5123 movaps xmm6,XMMWORD[((-184))+rax] 5124 movaps xmm7,XMMWORD[((-168))+rax] 5125 movaps xmm8,XMMWORD[((-152))+rax] 5126 movaps xmm9,XMMWORD[((-136))+rax] 5127 movaps xmm10,XMMWORD[((-120))+rax] 5128 movaps xmm11,XMMWORD[((-104))+rax] 5129 movaps xmm12,XMMWORD[((-88))+rax] 5130 movaps xmm13,XMMWORD[((-72))+rax] 5131 movaps xmm14,XMMWORD[((-56))+rax] 5132 movaps xmm15,XMMWORD[((-40))+rax] 5133 mov rbp,QWORD[((-16))+rax] 5134 5135 mov rbx,QWORD[((-8))+rax] 5136 5137 lea rsp,[rax] 5138 5139 $L$epilogue_avx: 5140 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 5141 mov rsi,QWORD[16+rsp] 5142 DB 0F3h,0C3h ;repret 5143 5144 $L$SEH_end_sha1_multi_block_avx: 5145 5146 ALIGN 32 5147 sha1_multi_block_avx2: 5148 mov QWORD[8+rsp],rdi ;WIN64 prologue 5149 mov QWORD[16+rsp],rsi 5150 mov rax,rsp 5151 $L$SEH_begin_sha1_multi_block_avx2: 5152 mov rdi,rcx 5153 mov rsi,rdx 5154 mov rdx,r8 5155 5156 5157 5158 _avx2_shortcut: 5159 mov rax,rsp 5160 5161 push rbx 5162 5163 push rbp 5164 5165 push r12 5166 5167 push r13 5168 5169 push r14 5170 5171 push r15 5172 5173 lea rsp,[((-168))+rsp] 5174 movaps XMMWORD[rsp],xmm6 5175 movaps XMMWORD[16+rsp],xmm7 5176 movaps XMMWORD[32+rsp],xmm8 5177 movaps XMMWORD[48+rsp],xmm9 5178 movaps XMMWORD[64+rsp],xmm10 5179 movaps XMMWORD[80+rsp],xmm11 5180 movaps XMMWORD[(-120)+rax],xmm12 5181 movaps XMMWORD[(-104)+rax],xmm13 5182 movaps XMMWORD[(-88)+rax],xmm14 5183 movaps XMMWORD[(-72)+rax],xmm15 5184 sub rsp,576 5185 and rsp,-256 5186 mov QWORD[544+rsp],rax 5187 5188 $L$body_avx2: 5189 lea rbp,[K_XX_XX] 5190 shr edx,1 5191 5192 vzeroupper 5193 $L$oop_grande_avx2: 5194 mov DWORD[552+rsp],edx 5195 xor edx,edx 5196 lea rbx,[512+rsp] 5197 5198 mov r12,QWORD[rsi] 5199 5200 mov ecx,DWORD[8+rsi] 5201 cmp ecx,edx 5202 cmovg edx,ecx 5203 test ecx,ecx 5204 mov DWORD[rbx],ecx 5205 cmovle r12,rbp 5206 5207 mov r13,QWORD[16+rsi] 5208 5209 mov ecx,DWORD[24+rsi] 5210 cmp ecx,edx 5211 cmovg edx,ecx 5212 test ecx,ecx 5213 mov DWORD[4+rbx],ecx 5214 cmovle r13,rbp 5215 5216 mov r14,QWORD[32+rsi] 5217 5218 mov ecx,DWORD[40+rsi] 5219 cmp ecx,edx 5220 cmovg edx,ecx 5221 test ecx,ecx 5222 mov DWORD[8+rbx],ecx 5223 cmovle r14,rbp 5224 5225 mov r15,QWORD[48+rsi] 5226 5227 mov ecx,DWORD[56+rsi] 5228 cmp ecx,edx 5229 cmovg edx,ecx 5230 test ecx,ecx 5231 mov DWORD[12+rbx],ecx 5232 cmovle r15,rbp 5233 5234 mov r8,QWORD[64+rsi] 5235 5236 mov ecx,DWORD[72+rsi] 5237 cmp ecx,edx 5238 cmovg edx,ecx 5239 test ecx,ecx 5240 mov DWORD[16+rbx],ecx 5241 cmovle r8,rbp 5242 5243 mov r9,QWORD[80+rsi] 5244 5245 mov ecx,DWORD[88+rsi] 5246 cmp ecx,edx 5247 cmovg edx,ecx 5248 test ecx,ecx 5249 mov DWORD[20+rbx],ecx 5250 cmovle r9,rbp 5251 5252 mov r10,QWORD[96+rsi] 5253 5254 mov ecx,DWORD[104+rsi] 5255 cmp ecx,edx 5256 cmovg edx,ecx 5257 test ecx,ecx 5258 mov DWORD[24+rbx],ecx 5259 cmovle r10,rbp 5260 5261 mov r11,QWORD[112+rsi] 5262 5263 mov ecx,DWORD[120+rsi] 5264 cmp ecx,edx 5265 cmovg edx,ecx 5266 test ecx,ecx 5267 mov DWORD[28+rbx],ecx 5268 cmovle r11,rbp 5269 vmovdqu ymm0,YMMWORD[rdi] 5270 lea rax,[128+rsp] 5271 vmovdqu ymm1,YMMWORD[32+rdi] 5272 lea rbx,[((256+128))+rsp] 5273 vmovdqu ymm2,YMMWORD[64+rdi] 5274 vmovdqu ymm3,YMMWORD[96+rdi] 5275 vmovdqu ymm4,YMMWORD[128+rdi] 5276 vmovdqu ymm9,YMMWORD[96+rbp] 5277 jmp NEAR $L$oop_avx2 5278 5279 ALIGN 32 5280 $L$oop_avx2: 5281 vmovdqa ymm15,YMMWORD[((-32))+rbp] 5282 vmovd xmm10,DWORD[r12] 5283 lea r12,[64+r12] 5284 vmovd xmm12,DWORD[r8] 5285 lea r8,[64+r8] 5286 vmovd xmm7,DWORD[r13] 5287 lea r13,[64+r13] 5288 vmovd xmm6,DWORD[r9] 5289 lea r9,[64+r9] 5290 vpinsrd xmm10,xmm10,DWORD[r14],1 5291 lea r14,[64+r14] 5292 vpinsrd xmm12,xmm12,DWORD[r10],1 5293 lea r10,[64+r10] 5294 vpinsrd xmm7,xmm7,DWORD[r15],1 5295 lea r15,[64+r15] 5296 vpunpckldq ymm10,ymm10,ymm7 5297 vpinsrd xmm6,xmm6,DWORD[r11],1 5298 lea r11,[64+r11] 5299 vpunpckldq ymm12,ymm12,ymm6 5300 vmovd xmm11,DWORD[((-60))+r12] 5301 vinserti128 ymm10,ymm10,xmm12,1 5302 vmovd xmm8,DWORD[((-60))+r8] 5303 vpshufb ymm10,ymm10,ymm9 5304 vmovd xmm7,DWORD[((-60))+r13] 5305 vmovd xmm6,DWORD[((-60))+r9] 5306 vpinsrd xmm11,xmm11,DWORD[((-60))+r14],1 5307 vpinsrd xmm8,xmm8,DWORD[((-60))+r10],1 5308 vpinsrd xmm7,xmm7,DWORD[((-60))+r15],1 5309 vpunpckldq ymm11,ymm11,ymm7 5310 vpinsrd xmm6,xmm6,DWORD[((-60))+r11],1 5311 vpunpckldq ymm8,ymm8,ymm6 5312 vpaddd ymm4,ymm4,ymm15 5313 vpslld ymm7,ymm0,5 5314 vpandn ymm6,ymm1,ymm3 5315 vpand ymm5,ymm1,ymm2 5316 5317 vmovdqa YMMWORD[(0-128)+rax],ymm10 5318 vpaddd ymm4,ymm4,ymm10 5319 vinserti128 ymm11,ymm11,xmm8,1 5320 vpsrld ymm8,ymm0,27 5321 vpxor ymm5,ymm5,ymm6 5322 vmovd xmm12,DWORD[((-56))+r12] 5323 5324 vpslld ymm6,ymm1,30 5325 vpor ymm7,ymm7,ymm8 5326 vmovd xmm8,DWORD[((-56))+r8] 5327 vpaddd ymm4,ymm4,ymm5 5328 5329 vpsrld ymm1,ymm1,2 5330 vpaddd ymm4,ymm4,ymm7 5331 vpshufb ymm11,ymm11,ymm9 5332 vpor ymm1,ymm1,ymm6 5333 vmovd xmm7,DWORD[((-56))+r13] 5334 vmovd xmm6,DWORD[((-56))+r9] 5335 vpinsrd xmm12,xmm12,DWORD[((-56))+r14],1 5336 vpinsrd xmm8,xmm8,DWORD[((-56))+r10],1 5337 vpinsrd xmm7,xmm7,DWORD[((-56))+r15],1 5338 vpunpckldq ymm12,ymm12,ymm7 5339 vpinsrd xmm6,xmm6,DWORD[((-56))+r11],1 5340 vpunpckldq ymm8,ymm8,ymm6 5341 vpaddd ymm3,ymm3,ymm15 5342 vpslld ymm7,ymm4,5 5343 vpandn ymm6,ymm0,ymm2 5344 vpand ymm5,ymm0,ymm1 5345 5346 vmovdqa YMMWORD[(32-128)+rax],ymm11 5347 vpaddd ymm3,ymm3,ymm11 5348 vinserti128 ymm12,ymm12,xmm8,1 5349 vpsrld ymm8,ymm4,27 5350 vpxor ymm5,ymm5,ymm6 5351 vmovd xmm13,DWORD[((-52))+r12] 5352 5353 vpslld ymm6,ymm0,30 5354 vpor ymm7,ymm7,ymm8 5355 vmovd xmm8,DWORD[((-52))+r8] 5356 vpaddd ymm3,ymm3,ymm5 5357 5358 vpsrld ymm0,ymm0,2 5359 vpaddd ymm3,ymm3,ymm7 5360 vpshufb ymm12,ymm12,ymm9 5361 vpor ymm0,ymm0,ymm6 5362 vmovd xmm7,DWORD[((-52))+r13] 5363 vmovd xmm6,DWORD[((-52))+r9] 5364 vpinsrd xmm13,xmm13,DWORD[((-52))+r14],1 5365 vpinsrd xmm8,xmm8,DWORD[((-52))+r10],1 5366 vpinsrd xmm7,xmm7,DWORD[((-52))+r15],1 5367 vpunpckldq ymm13,ymm13,ymm7 5368 vpinsrd xmm6,xmm6,DWORD[((-52))+r11],1 5369 vpunpckldq ymm8,ymm8,ymm6 5370 vpaddd ymm2,ymm2,ymm15 5371 vpslld ymm7,ymm3,5 5372 vpandn ymm6,ymm4,ymm1 5373 vpand ymm5,ymm4,ymm0 5374 5375 vmovdqa YMMWORD[(64-128)+rax],ymm12 5376 vpaddd ymm2,ymm2,ymm12 5377 vinserti128 ymm13,ymm13,xmm8,1 5378 vpsrld ymm8,ymm3,27 5379 vpxor ymm5,ymm5,ymm6 5380 vmovd xmm14,DWORD[((-48))+r12] 5381 5382 vpslld ymm6,ymm4,30 5383 vpor ymm7,ymm7,ymm8 5384 vmovd xmm8,DWORD[((-48))+r8] 5385 vpaddd ymm2,ymm2,ymm5 5386 5387 vpsrld ymm4,ymm4,2 5388 vpaddd ymm2,ymm2,ymm7 5389 vpshufb ymm13,ymm13,ymm9 5390 vpor ymm4,ymm4,ymm6 5391 vmovd xmm7,DWORD[((-48))+r13] 5392 vmovd xmm6,DWORD[((-48))+r9] 5393 vpinsrd xmm14,xmm14,DWORD[((-48))+r14],1 5394 vpinsrd xmm8,xmm8,DWORD[((-48))+r10],1 5395 vpinsrd xmm7,xmm7,DWORD[((-48))+r15],1 5396 vpunpckldq ymm14,ymm14,ymm7 5397 vpinsrd xmm6,xmm6,DWORD[((-48))+r11],1 5398 vpunpckldq ymm8,ymm8,ymm6 5399 vpaddd ymm1,ymm1,ymm15 5400 vpslld ymm7,ymm2,5 5401 vpandn ymm6,ymm3,ymm0 5402 vpand ymm5,ymm3,ymm4 5403 5404 vmovdqa YMMWORD[(96-128)+rax],ymm13 5405 vpaddd ymm1,ymm1,ymm13 5406 vinserti128 ymm14,ymm14,xmm8,1 5407 vpsrld ymm8,ymm2,27 5408 vpxor ymm5,ymm5,ymm6 5409 vmovd xmm10,DWORD[((-44))+r12] 5410 5411 vpslld ymm6,ymm3,30 5412 vpor ymm7,ymm7,ymm8 5413 vmovd xmm8,DWORD[((-44))+r8] 5414 vpaddd ymm1,ymm1,ymm5 5415 5416 vpsrld ymm3,ymm3,2 5417 vpaddd ymm1,ymm1,ymm7 5418 vpshufb ymm14,ymm14,ymm9 5419 vpor ymm3,ymm3,ymm6 5420 vmovd xmm7,DWORD[((-44))+r13] 5421 vmovd xmm6,DWORD[((-44))+r9] 5422 vpinsrd xmm10,xmm10,DWORD[((-44))+r14],1 5423 vpinsrd xmm8,xmm8,DWORD[((-44))+r10],1 5424 vpinsrd xmm7,xmm7,DWORD[((-44))+r15],1 5425 vpunpckldq ymm10,ymm10,ymm7 5426 vpinsrd xmm6,xmm6,DWORD[((-44))+r11],1 5427 vpunpckldq ymm8,ymm8,ymm6 5428 vpaddd ymm0,ymm0,ymm15 5429 vpslld ymm7,ymm1,5 5430 vpandn ymm6,ymm2,ymm4 5431 vpand ymm5,ymm2,ymm3 5432 5433 vmovdqa YMMWORD[(128-128)+rax],ymm14 5434 vpaddd ymm0,ymm0,ymm14 5435 vinserti128 ymm10,ymm10,xmm8,1 5436 vpsrld ymm8,ymm1,27 5437 vpxor ymm5,ymm5,ymm6 5438 vmovd xmm11,DWORD[((-40))+r12] 5439 5440 vpslld ymm6,ymm2,30 5441 vpor ymm7,ymm7,ymm8 5442 vmovd xmm8,DWORD[((-40))+r8] 5443 vpaddd ymm0,ymm0,ymm5 5444 5445 vpsrld ymm2,ymm2,2 5446 vpaddd ymm0,ymm0,ymm7 5447 vpshufb ymm10,ymm10,ymm9 5448 vpor ymm2,ymm2,ymm6 5449 vmovd xmm7,DWORD[((-40))+r13] 5450 vmovd xmm6,DWORD[((-40))+r9] 5451 vpinsrd xmm11,xmm11,DWORD[((-40))+r14],1 5452 vpinsrd xmm8,xmm8,DWORD[((-40))+r10],1 5453 vpinsrd xmm7,xmm7,DWORD[((-40))+r15],1 5454 vpunpckldq ymm11,ymm11,ymm7 5455 vpinsrd xmm6,xmm6,DWORD[((-40))+r11],1 5456 vpunpckldq ymm8,ymm8,ymm6 5457 vpaddd ymm4,ymm4,ymm15 5458 vpslld ymm7,ymm0,5 5459 vpandn ymm6,ymm1,ymm3 5460 vpand ymm5,ymm1,ymm2 5461 5462 vmovdqa YMMWORD[(160-128)+rax],ymm10 5463 vpaddd ymm4,ymm4,ymm10 5464 vinserti128 ymm11,ymm11,xmm8,1 5465 vpsrld ymm8,ymm0,27 5466 vpxor ymm5,ymm5,ymm6 5467 vmovd xmm12,DWORD[((-36))+r12] 5468 5469 vpslld ymm6,ymm1,30 5470 vpor ymm7,ymm7,ymm8 5471 vmovd xmm8,DWORD[((-36))+r8] 5472 vpaddd ymm4,ymm4,ymm5 5473 5474 vpsrld ymm1,ymm1,2 5475 vpaddd ymm4,ymm4,ymm7 5476 vpshufb ymm11,ymm11,ymm9 5477 vpor ymm1,ymm1,ymm6 5478 vmovd xmm7,DWORD[((-36))+r13] 5479 vmovd xmm6,DWORD[((-36))+r9] 5480 vpinsrd xmm12,xmm12,DWORD[((-36))+r14],1 5481 vpinsrd xmm8,xmm8,DWORD[((-36))+r10],1 5482 vpinsrd xmm7,xmm7,DWORD[((-36))+r15],1 5483 vpunpckldq ymm12,ymm12,ymm7 5484 vpinsrd xmm6,xmm6,DWORD[((-36))+r11],1 5485 vpunpckldq ymm8,ymm8,ymm6 5486 vpaddd ymm3,ymm3,ymm15 5487 vpslld ymm7,ymm4,5 5488 vpandn ymm6,ymm0,ymm2 5489 vpand ymm5,ymm0,ymm1 5490 5491 vmovdqa YMMWORD[(192-128)+rax],ymm11 5492 vpaddd ymm3,ymm3,ymm11 5493 vinserti128 ymm12,ymm12,xmm8,1 5494 vpsrld ymm8,ymm4,27 5495 vpxor ymm5,ymm5,ymm6 5496 vmovd xmm13,DWORD[((-32))+r12] 5497 5498 vpslld ymm6,ymm0,30 5499 vpor ymm7,ymm7,ymm8 5500 vmovd xmm8,DWORD[((-32))+r8] 5501 vpaddd ymm3,ymm3,ymm5 5502 5503 vpsrld ymm0,ymm0,2 5504 vpaddd ymm3,ymm3,ymm7 5505 vpshufb ymm12,ymm12,ymm9 5506 vpor ymm0,ymm0,ymm6 5507 vmovd xmm7,DWORD[((-32))+r13] 5508 vmovd xmm6,DWORD[((-32))+r9] 5509 vpinsrd xmm13,xmm13,DWORD[((-32))+r14],1 5510 vpinsrd xmm8,xmm8,DWORD[((-32))+r10],1 5511 vpinsrd xmm7,xmm7,DWORD[((-32))+r15],1 5512 vpunpckldq ymm13,ymm13,ymm7 5513 vpinsrd xmm6,xmm6,DWORD[((-32))+r11],1 5514 vpunpckldq ymm8,ymm8,ymm6 5515 vpaddd ymm2,ymm2,ymm15 5516 vpslld ymm7,ymm3,5 5517 vpandn ymm6,ymm4,ymm1 5518 vpand ymm5,ymm4,ymm0 5519 5520 vmovdqa YMMWORD[(224-128)+rax],ymm12 5521 vpaddd ymm2,ymm2,ymm12 5522 vinserti128 ymm13,ymm13,xmm8,1 5523 vpsrld ymm8,ymm3,27 5524 vpxor ymm5,ymm5,ymm6 5525 vmovd xmm14,DWORD[((-28))+r12] 5526 5527 vpslld ymm6,ymm4,30 5528 vpor ymm7,ymm7,ymm8 5529 vmovd xmm8,DWORD[((-28))+r8] 5530 vpaddd ymm2,ymm2,ymm5 5531 5532 vpsrld ymm4,ymm4,2 5533 vpaddd ymm2,ymm2,ymm7 5534 vpshufb ymm13,ymm13,ymm9 5535 vpor ymm4,ymm4,ymm6 5536 vmovd xmm7,DWORD[((-28))+r13] 5537 vmovd xmm6,DWORD[((-28))+r9] 5538 vpinsrd xmm14,xmm14,DWORD[((-28))+r14],1 5539 vpinsrd xmm8,xmm8,DWORD[((-28))+r10],1 5540 vpinsrd xmm7,xmm7,DWORD[((-28))+r15],1 5541 vpunpckldq ymm14,ymm14,ymm7 5542 vpinsrd xmm6,xmm6,DWORD[((-28))+r11],1 5543 vpunpckldq ymm8,ymm8,ymm6 5544 vpaddd ymm1,ymm1,ymm15 5545 vpslld ymm7,ymm2,5 5546 vpandn ymm6,ymm3,ymm0 5547 vpand ymm5,ymm3,ymm4 5548 5549 vmovdqa YMMWORD[(256-256-128)+rbx],ymm13 5550 vpaddd ymm1,ymm1,ymm13 5551 vinserti128 ymm14,ymm14,xmm8,1 5552 vpsrld ymm8,ymm2,27 5553 vpxor ymm5,ymm5,ymm6 5554 vmovd xmm10,DWORD[((-24))+r12] 5555 5556 vpslld ymm6,ymm3,30 5557 vpor ymm7,ymm7,ymm8 5558 vmovd xmm8,DWORD[((-24))+r8] 5559 vpaddd ymm1,ymm1,ymm5 5560 5561 vpsrld ymm3,ymm3,2 5562 vpaddd ymm1,ymm1,ymm7 5563 vpshufb ymm14,ymm14,ymm9 5564 vpor ymm3,ymm3,ymm6 5565 vmovd xmm7,DWORD[((-24))+r13] 5566 vmovd xmm6,DWORD[((-24))+r9] 5567 vpinsrd xmm10,xmm10,DWORD[((-24))+r14],1 5568 vpinsrd xmm8,xmm8,DWORD[((-24))+r10],1 5569 vpinsrd xmm7,xmm7,DWORD[((-24))+r15],1 5570 vpunpckldq ymm10,ymm10,ymm7 5571 vpinsrd xmm6,xmm6,DWORD[((-24))+r11],1 5572 vpunpckldq ymm8,ymm8,ymm6 5573 vpaddd ymm0,ymm0,ymm15 5574 vpslld ymm7,ymm1,5 5575 vpandn ymm6,ymm2,ymm4 5576 vpand ymm5,ymm2,ymm3 5577 5578 vmovdqa YMMWORD[(288-256-128)+rbx],ymm14 5579 vpaddd ymm0,ymm0,ymm14 5580 vinserti128 ymm10,ymm10,xmm8,1 5581 vpsrld ymm8,ymm1,27 5582 vpxor ymm5,ymm5,ymm6 5583 vmovd xmm11,DWORD[((-20))+r12] 5584 5585 vpslld ymm6,ymm2,30 5586 vpor ymm7,ymm7,ymm8 5587 vmovd xmm8,DWORD[((-20))+r8] 5588 vpaddd ymm0,ymm0,ymm5 5589 5590 vpsrld ymm2,ymm2,2 5591 vpaddd ymm0,ymm0,ymm7 5592 vpshufb ymm10,ymm10,ymm9 5593 vpor ymm2,ymm2,ymm6 5594 vmovd xmm7,DWORD[((-20))+r13] 5595 vmovd xmm6,DWORD[((-20))+r9] 5596 vpinsrd xmm11,xmm11,DWORD[((-20))+r14],1 5597 vpinsrd xmm8,xmm8,DWORD[((-20))+r10],1 5598 vpinsrd xmm7,xmm7,DWORD[((-20))+r15],1 5599 vpunpckldq ymm11,ymm11,ymm7 5600 vpinsrd xmm6,xmm6,DWORD[((-20))+r11],1 5601 vpunpckldq ymm8,ymm8,ymm6 5602 vpaddd ymm4,ymm4,ymm15 5603 vpslld ymm7,ymm0,5 5604 vpandn ymm6,ymm1,ymm3 5605 vpand ymm5,ymm1,ymm2 5606 5607 vmovdqa YMMWORD[(320-256-128)+rbx],ymm10 5608 vpaddd ymm4,ymm4,ymm10 5609 vinserti128 ymm11,ymm11,xmm8,1 5610 vpsrld ymm8,ymm0,27 5611 vpxor ymm5,ymm5,ymm6 5612 vmovd xmm12,DWORD[((-16))+r12] 5613 5614 vpslld ymm6,ymm1,30 5615 vpor ymm7,ymm7,ymm8 5616 vmovd xmm8,DWORD[((-16))+r8] 5617 vpaddd ymm4,ymm4,ymm5 5618 5619 vpsrld ymm1,ymm1,2 5620 vpaddd ymm4,ymm4,ymm7 5621 vpshufb ymm11,ymm11,ymm9 5622 vpor ymm1,ymm1,ymm6 5623 vmovd xmm7,DWORD[((-16))+r13] 5624 vmovd xmm6,DWORD[((-16))+r9] 5625 vpinsrd xmm12,xmm12,DWORD[((-16))+r14],1 5626 vpinsrd xmm8,xmm8,DWORD[((-16))+r10],1 5627 vpinsrd xmm7,xmm7,DWORD[((-16))+r15],1 5628 vpunpckldq ymm12,ymm12,ymm7 5629 vpinsrd xmm6,xmm6,DWORD[((-16))+r11],1 5630 vpunpckldq ymm8,ymm8,ymm6 5631 vpaddd ymm3,ymm3,ymm15 5632 vpslld ymm7,ymm4,5 5633 vpandn ymm6,ymm0,ymm2 5634 vpand ymm5,ymm0,ymm1 5635 5636 vmovdqa YMMWORD[(352-256-128)+rbx],ymm11 5637 vpaddd ymm3,ymm3,ymm11 5638 vinserti128 ymm12,ymm12,xmm8,1 5639 vpsrld ymm8,ymm4,27 5640 vpxor ymm5,ymm5,ymm6 5641 vmovd xmm13,DWORD[((-12))+r12] 5642 5643 vpslld ymm6,ymm0,30 5644 vpor ymm7,ymm7,ymm8 5645 vmovd xmm8,DWORD[((-12))+r8] 5646 vpaddd ymm3,ymm3,ymm5 5647 5648 vpsrld ymm0,ymm0,2 5649 vpaddd ymm3,ymm3,ymm7 5650 vpshufb ymm12,ymm12,ymm9 5651 vpor ymm0,ymm0,ymm6 5652 vmovd xmm7,DWORD[((-12))+r13] 5653 vmovd xmm6,DWORD[((-12))+r9] 5654 vpinsrd xmm13,xmm13,DWORD[((-12))+r14],1 5655 vpinsrd xmm8,xmm8,DWORD[((-12))+r10],1 5656 vpinsrd xmm7,xmm7,DWORD[((-12))+r15],1 5657 vpunpckldq ymm13,ymm13,ymm7 5658 vpinsrd xmm6,xmm6,DWORD[((-12))+r11],1 5659 vpunpckldq ymm8,ymm8,ymm6 5660 vpaddd ymm2,ymm2,ymm15 5661 vpslld ymm7,ymm3,5 5662 vpandn ymm6,ymm4,ymm1 5663 vpand ymm5,ymm4,ymm0 5664 5665 vmovdqa YMMWORD[(384-256-128)+rbx],ymm12 5666 vpaddd ymm2,ymm2,ymm12 5667 vinserti128 ymm13,ymm13,xmm8,1 5668 vpsrld ymm8,ymm3,27 5669 vpxor ymm5,ymm5,ymm6 5670 vmovd xmm14,DWORD[((-8))+r12] 5671 5672 vpslld ymm6,ymm4,30 5673 vpor ymm7,ymm7,ymm8 5674 vmovd xmm8,DWORD[((-8))+r8] 5675 vpaddd ymm2,ymm2,ymm5 5676 5677 vpsrld ymm4,ymm4,2 5678 vpaddd ymm2,ymm2,ymm7 5679 vpshufb ymm13,ymm13,ymm9 5680 vpor ymm4,ymm4,ymm6 5681 vmovd xmm7,DWORD[((-8))+r13] 5682 vmovd xmm6,DWORD[((-8))+r9] 5683 vpinsrd xmm14,xmm14,DWORD[((-8))+r14],1 5684 vpinsrd xmm8,xmm8,DWORD[((-8))+r10],1 5685 vpinsrd xmm7,xmm7,DWORD[((-8))+r15],1 5686 vpunpckldq ymm14,ymm14,ymm7 5687 vpinsrd xmm6,xmm6,DWORD[((-8))+r11],1 5688 vpunpckldq ymm8,ymm8,ymm6 5689 vpaddd ymm1,ymm1,ymm15 5690 vpslld ymm7,ymm2,5 5691 vpandn ymm6,ymm3,ymm0 5692 vpand ymm5,ymm3,ymm4 5693 5694 vmovdqa YMMWORD[(416-256-128)+rbx],ymm13 5695 vpaddd ymm1,ymm1,ymm13 5696 vinserti128 ymm14,ymm14,xmm8,1 5697 vpsrld ymm8,ymm2,27 5698 vpxor ymm5,ymm5,ymm6 5699 vmovd xmm10,DWORD[((-4))+r12] 5700 5701 vpslld ymm6,ymm3,30 5702 vpor ymm7,ymm7,ymm8 5703 vmovd xmm8,DWORD[((-4))+r8] 5704 vpaddd ymm1,ymm1,ymm5 5705 5706 vpsrld ymm3,ymm3,2 5707 vpaddd ymm1,ymm1,ymm7 5708 vpshufb ymm14,ymm14,ymm9 5709 vpor ymm3,ymm3,ymm6 5710 vmovdqa ymm11,YMMWORD[((0-128))+rax] 5711 vmovd xmm7,DWORD[((-4))+r13] 5712 vmovd xmm6,DWORD[((-4))+r9] 5713 vpinsrd xmm10,xmm10,DWORD[((-4))+r14],1 5714 vpinsrd xmm8,xmm8,DWORD[((-4))+r10],1 5715 vpinsrd xmm7,xmm7,DWORD[((-4))+r15],1 5716 vpunpckldq ymm10,ymm10,ymm7 5717 vpinsrd xmm6,xmm6,DWORD[((-4))+r11],1 5718 vpunpckldq ymm8,ymm8,ymm6 5719 vpaddd ymm0,ymm0,ymm15 5720 prefetcht0 [63+r12] 5721 vpslld ymm7,ymm1,5 5722 vpandn ymm6,ymm2,ymm4 5723 vpand ymm5,ymm2,ymm3 5724 5725 vmovdqa YMMWORD[(448-256-128)+rbx],ymm14 5726 vpaddd ymm0,ymm0,ymm14 5727 vinserti128 ymm10,ymm10,xmm8,1 5728 vpsrld ymm8,ymm1,27 5729 prefetcht0 [63+r13] 5730 vpxor ymm5,ymm5,ymm6 5731 5732 vpslld ymm6,ymm2,30 5733 vpor ymm7,ymm7,ymm8 5734 prefetcht0 [63+r14] 5735 vpaddd ymm0,ymm0,ymm5 5736 5737 vpsrld ymm2,ymm2,2 5738 vpaddd ymm0,ymm0,ymm7 5739 prefetcht0 [63+r15] 5740 vpshufb ymm10,ymm10,ymm9 5741 vpor ymm2,ymm2,ymm6 5742 vmovdqa ymm12,YMMWORD[((32-128))+rax] 5743 vpxor ymm11,ymm11,ymm13 5744 vmovdqa ymm13,YMMWORD[((64-128))+rax] 5745 5746 vpaddd ymm4,ymm4,ymm15 5747 vpslld ymm7,ymm0,5 5748 vpandn ymm6,ymm1,ymm3 5749 prefetcht0 [63+r8] 5750 vpand ymm5,ymm1,ymm2 5751 5752 vmovdqa YMMWORD[(480-256-128)+rbx],ymm10 5753 vpaddd ymm4,ymm4,ymm10 5754 vpxor ymm11,ymm11,YMMWORD[((256-256-128))+rbx] 5755 vpsrld ymm8,ymm0,27 5756 vpxor ymm5,ymm5,ymm6 5757 vpxor ymm11,ymm11,ymm13 5758 prefetcht0 [63+r9] 5759 5760 vpslld ymm6,ymm1,30 5761 vpor ymm7,ymm7,ymm8 5762 vpaddd ymm4,ymm4,ymm5 5763 prefetcht0 [63+r10] 5764 vpsrld ymm9,ymm11,31 5765 vpaddd ymm11,ymm11,ymm11 5766 5767 vpsrld ymm1,ymm1,2 5768 prefetcht0 [63+r11] 5769 vpaddd ymm4,ymm4,ymm7 5770 vpor ymm11,ymm11,ymm9 5771 vpor ymm1,ymm1,ymm6 5772 vpxor ymm12,ymm12,ymm14 5773 vmovdqa ymm14,YMMWORD[((96-128))+rax] 5774 5775 vpaddd ymm3,ymm3,ymm15 5776 vpslld ymm7,ymm4,5 5777 vpandn ymm6,ymm0,ymm2 5778 5779 vpand ymm5,ymm0,ymm1 5780 5781 vmovdqa YMMWORD[(0-128)+rax],ymm11 5782 vpaddd ymm3,ymm3,ymm11 5783 vpxor ymm12,ymm12,YMMWORD[((288-256-128))+rbx] 5784 vpsrld ymm8,ymm4,27 5785 vpxor ymm5,ymm5,ymm6 5786 vpxor ymm12,ymm12,ymm14 5787 5788 5789 vpslld ymm6,ymm0,30 5790 vpor ymm7,ymm7,ymm8 5791 vpaddd ymm3,ymm3,ymm5 5792 5793 vpsrld ymm9,ymm12,31 5794 vpaddd ymm12,ymm12,ymm12 5795 5796 vpsrld ymm0,ymm0,2 5797 5798 vpaddd ymm3,ymm3,ymm7 5799 vpor ymm12,ymm12,ymm9 5800 vpor ymm0,ymm0,ymm6 5801 vpxor ymm13,ymm13,ymm10 5802 vmovdqa ymm10,YMMWORD[((128-128))+rax] 5803 5804 vpaddd ymm2,ymm2,ymm15 5805 vpslld ymm7,ymm3,5 5806 vpandn ymm6,ymm4,ymm1 5807 5808 vpand ymm5,ymm4,ymm0 5809 5810 vmovdqa YMMWORD[(32-128)+rax],ymm12 5811 vpaddd ymm2,ymm2,ymm12 5812 vpxor ymm13,ymm13,YMMWORD[((320-256-128))+rbx] 5813 vpsrld ymm8,ymm3,27 5814 vpxor ymm5,ymm5,ymm6 5815 vpxor ymm13,ymm13,ymm10 5816 5817 5818 vpslld ymm6,ymm4,30 5819 vpor ymm7,ymm7,ymm8 5820 vpaddd ymm2,ymm2,ymm5 5821 5822 vpsrld ymm9,ymm13,31 5823 vpaddd ymm13,ymm13,ymm13 5824 5825 vpsrld ymm4,ymm4,2 5826 5827 vpaddd ymm2,ymm2,ymm7 5828 vpor ymm13,ymm13,ymm9 5829 vpor ymm4,ymm4,ymm6 5830 vpxor ymm14,ymm14,ymm11 5831 vmovdqa ymm11,YMMWORD[((160-128))+rax] 5832 5833 vpaddd ymm1,ymm1,ymm15 5834 vpslld ymm7,ymm2,5 5835 vpandn ymm6,ymm3,ymm0 5836 5837 vpand ymm5,ymm3,ymm4 5838 5839 vmovdqa YMMWORD[(64-128)+rax],ymm13 5840 vpaddd ymm1,ymm1,ymm13 5841 vpxor ymm14,ymm14,YMMWORD[((352-256-128))+rbx] 5842 vpsrld ymm8,ymm2,27 5843 vpxor ymm5,ymm5,ymm6 5844 vpxor ymm14,ymm14,ymm11 5845 5846 5847 vpslld ymm6,ymm3,30 5848 vpor ymm7,ymm7,ymm8 5849 vpaddd ymm1,ymm1,ymm5 5850 5851 vpsrld ymm9,ymm14,31 5852 vpaddd ymm14,ymm14,ymm14 5853 5854 vpsrld ymm3,ymm3,2 5855 5856 vpaddd ymm1,ymm1,ymm7 5857 vpor ymm14,ymm14,ymm9 5858 vpor ymm3,ymm3,ymm6 5859 vpxor ymm10,ymm10,ymm12 5860 vmovdqa ymm12,YMMWORD[((192-128))+rax] 5861 5862 vpaddd ymm0,ymm0,ymm15 5863 vpslld ymm7,ymm1,5 5864 vpandn ymm6,ymm2,ymm4 5865 5866 vpand ymm5,ymm2,ymm3 5867 5868 vmovdqa YMMWORD[(96-128)+rax],ymm14 5869 vpaddd ymm0,ymm0,ymm14 5870 vpxor ymm10,ymm10,YMMWORD[((384-256-128))+rbx] 5871 vpsrld ymm8,ymm1,27 5872 vpxor ymm5,ymm5,ymm6 5873 vpxor ymm10,ymm10,ymm12 5874 5875 5876 vpslld ymm6,ymm2,30 5877 vpor ymm7,ymm7,ymm8 5878 vpaddd ymm0,ymm0,ymm5 5879 5880 vpsrld ymm9,ymm10,31 5881 vpaddd ymm10,ymm10,ymm10 5882 5883 vpsrld ymm2,ymm2,2 5884 5885 vpaddd ymm0,ymm0,ymm7 5886 vpor ymm10,ymm10,ymm9 5887 vpor ymm2,ymm2,ymm6 5888 vmovdqa ymm15,YMMWORD[rbp] 5889 vpxor ymm11,ymm11,ymm13 5890 vmovdqa ymm13,YMMWORD[((224-128))+rax] 5891 5892 vpslld ymm7,ymm0,5 5893 vpaddd ymm4,ymm4,ymm15 5894 vpxor ymm5,ymm3,ymm1 5895 vmovdqa YMMWORD[(128-128)+rax],ymm10 5896 vpaddd ymm4,ymm4,ymm10 5897 vpxor ymm11,ymm11,YMMWORD[((416-256-128))+rbx] 5898 vpsrld ymm8,ymm0,27 5899 vpxor ymm5,ymm5,ymm2 5900 vpxor ymm11,ymm11,ymm13 5901 5902 vpslld ymm6,ymm1,30 5903 vpor ymm7,ymm7,ymm8 5904 vpaddd ymm4,ymm4,ymm5 5905 vpsrld ymm9,ymm11,31 5906 vpaddd ymm11,ymm11,ymm11 5907 5908 vpsrld ymm1,ymm1,2 5909 vpaddd ymm4,ymm4,ymm7 5910 vpor ymm11,ymm11,ymm9 5911 vpor ymm1,ymm1,ymm6 5912 vpxor ymm12,ymm12,ymm14 5913 vmovdqa ymm14,YMMWORD[((256-256-128))+rbx] 5914 5915 vpslld ymm7,ymm4,5 5916 vpaddd ymm3,ymm3,ymm15 5917 vpxor ymm5,ymm2,ymm0 5918 vmovdqa YMMWORD[(160-128)+rax],ymm11 5919 vpaddd ymm3,ymm3,ymm11 5920 vpxor ymm12,ymm12,YMMWORD[((448-256-128))+rbx] 5921 vpsrld ymm8,ymm4,27 5922 vpxor ymm5,ymm5,ymm1 5923 vpxor ymm12,ymm12,ymm14 5924 5925 vpslld ymm6,ymm0,30 5926 vpor ymm7,ymm7,ymm8 5927 vpaddd ymm3,ymm3,ymm5 5928 vpsrld ymm9,ymm12,31 5929 vpaddd ymm12,ymm12,ymm12 5930 5931 vpsrld ymm0,ymm0,2 5932 vpaddd ymm3,ymm3,ymm7 5933 vpor ymm12,ymm12,ymm9 5934 vpor ymm0,ymm0,ymm6 5935 vpxor ymm13,ymm13,ymm10 5936 vmovdqa ymm10,YMMWORD[((288-256-128))+rbx] 5937 5938 vpslld ymm7,ymm3,5 5939 vpaddd ymm2,ymm2,ymm15 5940 vpxor ymm5,ymm1,ymm4 5941 vmovdqa YMMWORD[(192-128)+rax],ymm12 5942 vpaddd ymm2,ymm2,ymm12 5943 vpxor ymm13,ymm13,YMMWORD[((480-256-128))+rbx] 5944 vpsrld ymm8,ymm3,27 5945 vpxor ymm5,ymm5,ymm0 5946 vpxor ymm13,ymm13,ymm10 5947 5948 vpslld ymm6,ymm4,30 5949 vpor ymm7,ymm7,ymm8 5950 vpaddd ymm2,ymm2,ymm5 5951 vpsrld ymm9,ymm13,31 5952 vpaddd ymm13,ymm13,ymm13 5953 5954 vpsrld ymm4,ymm4,2 5955 vpaddd ymm2,ymm2,ymm7 5956 vpor ymm13,ymm13,ymm9 5957 vpor ymm4,ymm4,ymm6 5958 vpxor ymm14,ymm14,ymm11 5959 vmovdqa ymm11,YMMWORD[((320-256-128))+rbx] 5960 5961 vpslld ymm7,ymm2,5 5962 vpaddd ymm1,ymm1,ymm15 5963 vpxor ymm5,ymm0,ymm3 5964 vmovdqa YMMWORD[(224-128)+rax],ymm13 5965 vpaddd ymm1,ymm1,ymm13 5966 vpxor ymm14,ymm14,YMMWORD[((0-128))+rax] 5967 vpsrld ymm8,ymm2,27 5968 vpxor ymm5,ymm5,ymm4 5969 vpxor ymm14,ymm14,ymm11 5970 5971 vpslld ymm6,ymm3,30 5972 vpor ymm7,ymm7,ymm8 5973 vpaddd ymm1,ymm1,ymm5 5974 vpsrld ymm9,ymm14,31 5975 vpaddd ymm14,ymm14,ymm14 5976 5977 vpsrld ymm3,ymm3,2 5978 vpaddd ymm1,ymm1,ymm7 5979 vpor ymm14,ymm14,ymm9 5980 vpor ymm3,ymm3,ymm6 5981 vpxor ymm10,ymm10,ymm12 5982 vmovdqa ymm12,YMMWORD[((352-256-128))+rbx] 5983 5984 vpslld ymm7,ymm1,5 5985 vpaddd ymm0,ymm0,ymm15 5986 vpxor ymm5,ymm4,ymm2 5987 vmovdqa YMMWORD[(256-256-128)+rbx],ymm14 5988 vpaddd ymm0,ymm0,ymm14 5989 vpxor ymm10,ymm10,YMMWORD[((32-128))+rax] 5990 vpsrld ymm8,ymm1,27 5991 vpxor ymm5,ymm5,ymm3 5992 vpxor ymm10,ymm10,ymm12 5993 5994 vpslld ymm6,ymm2,30 5995 vpor ymm7,ymm7,ymm8 5996 vpaddd ymm0,ymm0,ymm5 5997 vpsrld ymm9,ymm10,31 5998 vpaddd ymm10,ymm10,ymm10 5999 6000 vpsrld ymm2,ymm2,2 6001 vpaddd ymm0,ymm0,ymm7 6002 vpor ymm10,ymm10,ymm9 6003 vpor ymm2,ymm2,ymm6 6004 vpxor ymm11,ymm11,ymm13 6005 vmovdqa ymm13,YMMWORD[((384-256-128))+rbx] 6006 6007 vpslld ymm7,ymm0,5 6008 vpaddd ymm4,ymm4,ymm15 6009 vpxor ymm5,ymm3,ymm1 6010 vmovdqa YMMWORD[(288-256-128)+rbx],ymm10 6011 vpaddd ymm4,ymm4,ymm10 6012 vpxor ymm11,ymm11,YMMWORD[((64-128))+rax] 6013 vpsrld ymm8,ymm0,27 6014 vpxor ymm5,ymm5,ymm2 6015 vpxor ymm11,ymm11,ymm13 6016 6017 vpslld ymm6,ymm1,30 6018 vpor ymm7,ymm7,ymm8 6019 vpaddd ymm4,ymm4,ymm5 6020 vpsrld ymm9,ymm11,31 6021 vpaddd ymm11,ymm11,ymm11 6022 6023 vpsrld ymm1,ymm1,2 6024 vpaddd ymm4,ymm4,ymm7 6025 vpor ymm11,ymm11,ymm9 6026 vpor ymm1,ymm1,ymm6 6027 vpxor ymm12,ymm12,ymm14 6028 vmovdqa ymm14,YMMWORD[((416-256-128))+rbx] 6029 6030 vpslld ymm7,ymm4,5 6031 vpaddd ymm3,ymm3,ymm15 6032 vpxor ymm5,ymm2,ymm0 6033 vmovdqa YMMWORD[(320-256-128)+rbx],ymm11 6034 vpaddd ymm3,ymm3,ymm11 6035 vpxor ymm12,ymm12,YMMWORD[((96-128))+rax] 6036 vpsrld ymm8,ymm4,27 6037 vpxor ymm5,ymm5,ymm1 6038 vpxor ymm12,ymm12,ymm14 6039 6040 vpslld ymm6,ymm0,30 6041 vpor ymm7,ymm7,ymm8 6042 vpaddd ymm3,ymm3,ymm5 6043 vpsrld ymm9,ymm12,31 6044 vpaddd ymm12,ymm12,ymm12 6045 6046 vpsrld ymm0,ymm0,2 6047 vpaddd ymm3,ymm3,ymm7 6048 vpor ymm12,ymm12,ymm9 6049 vpor ymm0,ymm0,ymm6 6050 vpxor ymm13,ymm13,ymm10 6051 vmovdqa ymm10,YMMWORD[((448-256-128))+rbx] 6052 6053 vpslld ymm7,ymm3,5 6054 vpaddd ymm2,ymm2,ymm15 6055 vpxor ymm5,ymm1,ymm4 6056 vmovdqa YMMWORD[(352-256-128)+rbx],ymm12 6057 vpaddd ymm2,ymm2,ymm12 6058 vpxor ymm13,ymm13,YMMWORD[((128-128))+rax] 6059 vpsrld ymm8,ymm3,27 6060 vpxor ymm5,ymm5,ymm0 6061 vpxor ymm13,ymm13,ymm10 6062 6063 vpslld ymm6,ymm4,30 6064 vpor ymm7,ymm7,ymm8 6065 vpaddd ymm2,ymm2,ymm5 6066 vpsrld ymm9,ymm13,31 6067 vpaddd ymm13,ymm13,ymm13 6068 6069 vpsrld ymm4,ymm4,2 6070 vpaddd ymm2,ymm2,ymm7 6071 vpor ymm13,ymm13,ymm9 6072 vpor ymm4,ymm4,ymm6 6073 vpxor ymm14,ymm14,ymm11 6074 vmovdqa ymm11,YMMWORD[((480-256-128))+rbx] 6075 6076 vpslld ymm7,ymm2,5 6077 vpaddd ymm1,ymm1,ymm15 6078 vpxor ymm5,ymm0,ymm3 6079 vmovdqa YMMWORD[(384-256-128)+rbx],ymm13 6080 vpaddd ymm1,ymm1,ymm13 6081 vpxor ymm14,ymm14,YMMWORD[((160-128))+rax] 6082 vpsrld ymm8,ymm2,27 6083 vpxor ymm5,ymm5,ymm4 6084 vpxor ymm14,ymm14,ymm11 6085 6086 vpslld ymm6,ymm3,30 6087 vpor ymm7,ymm7,ymm8 6088 vpaddd ymm1,ymm1,ymm5 6089 vpsrld ymm9,ymm14,31 6090 vpaddd ymm14,ymm14,ymm14 6091 6092 vpsrld ymm3,ymm3,2 6093 vpaddd ymm1,ymm1,ymm7 6094 vpor ymm14,ymm14,ymm9 6095 vpor ymm3,ymm3,ymm6 6096 vpxor ymm10,ymm10,ymm12 6097 vmovdqa ymm12,YMMWORD[((0-128))+rax] 6098 6099 vpslld ymm7,ymm1,5 6100 vpaddd ymm0,ymm0,ymm15 6101 vpxor ymm5,ymm4,ymm2 6102 vmovdqa YMMWORD[(416-256-128)+rbx],ymm14 6103 vpaddd ymm0,ymm0,ymm14 6104 vpxor ymm10,ymm10,YMMWORD[((192-128))+rax] 6105 vpsrld ymm8,ymm1,27 6106 vpxor ymm5,ymm5,ymm3 6107 vpxor ymm10,ymm10,ymm12 6108 6109 vpslld ymm6,ymm2,30 6110 vpor ymm7,ymm7,ymm8 6111 vpaddd ymm0,ymm0,ymm5 6112 vpsrld ymm9,ymm10,31 6113 vpaddd ymm10,ymm10,ymm10 6114 6115 vpsrld ymm2,ymm2,2 6116 vpaddd ymm0,ymm0,ymm7 6117 vpor ymm10,ymm10,ymm9 6118 vpor ymm2,ymm2,ymm6 6119 vpxor ymm11,ymm11,ymm13 6120 vmovdqa ymm13,YMMWORD[((32-128))+rax] 6121 6122 vpslld ymm7,ymm0,5 6123 vpaddd ymm4,ymm4,ymm15 6124 vpxor ymm5,ymm3,ymm1 6125 vmovdqa YMMWORD[(448-256-128)+rbx],ymm10 6126 vpaddd ymm4,ymm4,ymm10 6127 vpxor ymm11,ymm11,YMMWORD[((224-128))+rax] 6128 vpsrld ymm8,ymm0,27 6129 vpxor ymm5,ymm5,ymm2 6130 vpxor ymm11,ymm11,ymm13 6131 6132 vpslld ymm6,ymm1,30 6133 vpor ymm7,ymm7,ymm8 6134 vpaddd ymm4,ymm4,ymm5 6135 vpsrld ymm9,ymm11,31 6136 vpaddd ymm11,ymm11,ymm11 6137 6138 vpsrld ymm1,ymm1,2 6139 vpaddd ymm4,ymm4,ymm7 6140 vpor ymm11,ymm11,ymm9 6141 vpor ymm1,ymm1,ymm6 6142 vpxor ymm12,ymm12,ymm14 6143 vmovdqa ymm14,YMMWORD[((64-128))+rax] 6144 6145 vpslld ymm7,ymm4,5 6146 vpaddd ymm3,ymm3,ymm15 6147 vpxor ymm5,ymm2,ymm0 6148 vmovdqa YMMWORD[(480-256-128)+rbx],ymm11 6149 vpaddd ymm3,ymm3,ymm11 6150 vpxor ymm12,ymm12,YMMWORD[((256-256-128))+rbx] 6151 vpsrld ymm8,ymm4,27 6152 vpxor ymm5,ymm5,ymm1 6153 vpxor ymm12,ymm12,ymm14 6154 6155 vpslld ymm6,ymm0,30 6156 vpor ymm7,ymm7,ymm8 6157 vpaddd ymm3,ymm3,ymm5 6158 vpsrld ymm9,ymm12,31 6159 vpaddd ymm12,ymm12,ymm12 6160 6161 vpsrld ymm0,ymm0,2 6162 vpaddd ymm3,ymm3,ymm7 6163 vpor ymm12,ymm12,ymm9 6164 vpor ymm0,ymm0,ymm6 6165 vpxor ymm13,ymm13,ymm10 6166 vmovdqa ymm10,YMMWORD[((96-128))+rax] 6167 6168 vpslld ymm7,ymm3,5 6169 vpaddd ymm2,ymm2,ymm15 6170 vpxor ymm5,ymm1,ymm4 6171 vmovdqa YMMWORD[(0-128)+rax],ymm12 6172 vpaddd ymm2,ymm2,ymm12 6173 vpxor ymm13,ymm13,YMMWORD[((288-256-128))+rbx] 6174 vpsrld ymm8,ymm3,27 6175 vpxor ymm5,ymm5,ymm0 6176 vpxor ymm13,ymm13,ymm10 6177 6178 vpslld ymm6,ymm4,30 6179 vpor ymm7,ymm7,ymm8 6180 vpaddd ymm2,ymm2,ymm5 6181 vpsrld ymm9,ymm13,31 6182 vpaddd ymm13,ymm13,ymm13 6183 6184 vpsrld ymm4,ymm4,2 6185 vpaddd ymm2,ymm2,ymm7 6186 vpor ymm13,ymm13,ymm9 6187 vpor ymm4,ymm4,ymm6 6188 vpxor ymm14,ymm14,ymm11 6189 vmovdqa ymm11,YMMWORD[((128-128))+rax] 6190 6191 vpslld ymm7,ymm2,5 6192 vpaddd ymm1,ymm1,ymm15 6193 vpxor ymm5,ymm0,ymm3 6194 vmovdqa YMMWORD[(32-128)+rax],ymm13 6195 vpaddd ymm1,ymm1,ymm13 6196 vpxor ymm14,ymm14,YMMWORD[((320-256-128))+rbx] 6197 vpsrld ymm8,ymm2,27 6198 vpxor ymm5,ymm5,ymm4 6199 vpxor ymm14,ymm14,ymm11 6200 6201 vpslld ymm6,ymm3,30 6202 vpor ymm7,ymm7,ymm8 6203 vpaddd ymm1,ymm1,ymm5 6204 vpsrld ymm9,ymm14,31 6205 vpaddd ymm14,ymm14,ymm14 6206 6207 vpsrld ymm3,ymm3,2 6208 vpaddd ymm1,ymm1,ymm7 6209 vpor ymm14,ymm14,ymm9 6210 vpor ymm3,ymm3,ymm6 6211 vpxor ymm10,ymm10,ymm12 6212 vmovdqa ymm12,YMMWORD[((160-128))+rax] 6213 6214 vpslld ymm7,ymm1,5 6215 vpaddd ymm0,ymm0,ymm15 6216 vpxor ymm5,ymm4,ymm2 6217 vmovdqa YMMWORD[(64-128)+rax],ymm14 6218 vpaddd ymm0,ymm0,ymm14 6219 vpxor ymm10,ymm10,YMMWORD[((352-256-128))+rbx] 6220 vpsrld ymm8,ymm1,27 6221 vpxor ymm5,ymm5,ymm3 6222 vpxor ymm10,ymm10,ymm12 6223 6224 vpslld ymm6,ymm2,30 6225 vpor ymm7,ymm7,ymm8 6226 vpaddd ymm0,ymm0,ymm5 6227 vpsrld ymm9,ymm10,31 6228 vpaddd ymm10,ymm10,ymm10 6229 6230 vpsrld ymm2,ymm2,2 6231 vpaddd ymm0,ymm0,ymm7 6232 vpor ymm10,ymm10,ymm9 6233 vpor ymm2,ymm2,ymm6 6234 vpxor ymm11,ymm11,ymm13 6235 vmovdqa ymm13,YMMWORD[((192-128))+rax] 6236 6237 vpslld ymm7,ymm0,5 6238 vpaddd ymm4,ymm4,ymm15 6239 vpxor ymm5,ymm3,ymm1 6240 vmovdqa YMMWORD[(96-128)+rax],ymm10 6241 vpaddd ymm4,ymm4,ymm10 6242 vpxor ymm11,ymm11,YMMWORD[((384-256-128))+rbx] 6243 vpsrld ymm8,ymm0,27 6244 vpxor ymm5,ymm5,ymm2 6245 vpxor ymm11,ymm11,ymm13 6246 6247 vpslld ymm6,ymm1,30 6248 vpor ymm7,ymm7,ymm8 6249 vpaddd ymm4,ymm4,ymm5 6250 vpsrld ymm9,ymm11,31 6251 vpaddd ymm11,ymm11,ymm11 6252 6253 vpsrld ymm1,ymm1,2 6254 vpaddd ymm4,ymm4,ymm7 6255 vpor ymm11,ymm11,ymm9 6256 vpor ymm1,ymm1,ymm6 6257 vpxor ymm12,ymm12,ymm14 6258 vmovdqa ymm14,YMMWORD[((224-128))+rax] 6259 6260 vpslld ymm7,ymm4,5 6261 vpaddd ymm3,ymm3,ymm15 6262 vpxor ymm5,ymm2,ymm0 6263 vmovdqa YMMWORD[(128-128)+rax],ymm11 6264 vpaddd ymm3,ymm3,ymm11 6265 vpxor ymm12,ymm12,YMMWORD[((416-256-128))+rbx] 6266 vpsrld ymm8,ymm4,27 6267 vpxor ymm5,ymm5,ymm1 6268 vpxor ymm12,ymm12,ymm14 6269 6270 vpslld ymm6,ymm0,30 6271 vpor ymm7,ymm7,ymm8 6272 vpaddd ymm3,ymm3,ymm5 6273 vpsrld ymm9,ymm12,31 6274 vpaddd ymm12,ymm12,ymm12 6275 6276 vpsrld ymm0,ymm0,2 6277 vpaddd ymm3,ymm3,ymm7 6278 vpor ymm12,ymm12,ymm9 6279 vpor ymm0,ymm0,ymm6 6280 vpxor ymm13,ymm13,ymm10 6281 vmovdqa ymm10,YMMWORD[((256-256-128))+rbx] 6282 6283 vpslld ymm7,ymm3,5 6284 vpaddd ymm2,ymm2,ymm15 6285 vpxor ymm5,ymm1,ymm4 6286 vmovdqa YMMWORD[(160-128)+rax],ymm12 6287 vpaddd ymm2,ymm2,ymm12 6288 vpxor ymm13,ymm13,YMMWORD[((448-256-128))+rbx] 6289 vpsrld ymm8,ymm3,27 6290 vpxor ymm5,ymm5,ymm0 6291 vpxor ymm13,ymm13,ymm10 6292 6293 vpslld ymm6,ymm4,30 6294 vpor ymm7,ymm7,ymm8 6295 vpaddd ymm2,ymm2,ymm5 6296 vpsrld ymm9,ymm13,31 6297 vpaddd ymm13,ymm13,ymm13 6298 6299 vpsrld ymm4,ymm4,2 6300 vpaddd ymm2,ymm2,ymm7 6301 vpor ymm13,ymm13,ymm9 6302 vpor ymm4,ymm4,ymm6 6303 vpxor ymm14,ymm14,ymm11 6304 vmovdqa ymm11,YMMWORD[((288-256-128))+rbx] 6305 6306 vpslld ymm7,ymm2,5 6307 vpaddd ymm1,ymm1,ymm15 6308 vpxor ymm5,ymm0,ymm3 6309 vmovdqa YMMWORD[(192-128)+rax],ymm13 6310 vpaddd ymm1,ymm1,ymm13 6311 vpxor ymm14,ymm14,YMMWORD[((480-256-128))+rbx] 6312 vpsrld ymm8,ymm2,27 6313 vpxor ymm5,ymm5,ymm4 6314 vpxor ymm14,ymm14,ymm11 6315 6316 vpslld ymm6,ymm3,30 6317 vpor ymm7,ymm7,ymm8 6318 vpaddd ymm1,ymm1,ymm5 6319 vpsrld ymm9,ymm14,31 6320 vpaddd ymm14,ymm14,ymm14 6321 6322 vpsrld ymm3,ymm3,2 6323 vpaddd ymm1,ymm1,ymm7 6324 vpor ymm14,ymm14,ymm9 6325 vpor ymm3,ymm3,ymm6 6326 vpxor ymm10,ymm10,ymm12 6327 vmovdqa ymm12,YMMWORD[((320-256-128))+rbx] 6328 6329 vpslld ymm7,ymm1,5 6330 vpaddd ymm0,ymm0,ymm15 6331 vpxor ymm5,ymm4,ymm2 6332 vmovdqa YMMWORD[(224-128)+rax],ymm14 6333 vpaddd ymm0,ymm0,ymm14 6334 vpxor ymm10,ymm10,YMMWORD[((0-128))+rax] 6335 vpsrld ymm8,ymm1,27 6336 vpxor ymm5,ymm5,ymm3 6337 vpxor ymm10,ymm10,ymm12 6338 6339 vpslld ymm6,ymm2,30 6340 vpor ymm7,ymm7,ymm8 6341 vpaddd ymm0,ymm0,ymm5 6342 vpsrld ymm9,ymm10,31 6343 vpaddd ymm10,ymm10,ymm10 6344 6345 vpsrld ymm2,ymm2,2 6346 vpaddd ymm0,ymm0,ymm7 6347 vpor ymm10,ymm10,ymm9 6348 vpor ymm2,ymm2,ymm6 6349 vmovdqa ymm15,YMMWORD[32+rbp] 6350 vpxor ymm11,ymm11,ymm13 6351 vmovdqa ymm13,YMMWORD[((352-256-128))+rbx] 6352 6353 vpaddd ymm4,ymm4,ymm15 6354 vpslld ymm7,ymm0,5 6355 vpand ymm6,ymm3,ymm2 6356 vpxor ymm11,ymm11,YMMWORD[((32-128))+rax] 6357 6358 vpaddd ymm4,ymm4,ymm6 6359 vpsrld ymm8,ymm0,27 6360 vpxor ymm5,ymm3,ymm2 6361 vpxor ymm11,ymm11,ymm13 6362 6363 vmovdqu YMMWORD[(256-256-128)+rbx],ymm10 6364 vpaddd ymm4,ymm4,ymm10 6365 vpor ymm7,ymm7,ymm8 6366 vpsrld ymm9,ymm11,31 6367 vpand ymm5,ymm5,ymm1 6368 vpaddd ymm11,ymm11,ymm11 6369 6370 vpslld ymm6,ymm1,30 6371 vpaddd ymm4,ymm4,ymm5 6372 6373 vpsrld ymm1,ymm1,2 6374 vpaddd ymm4,ymm4,ymm7 6375 vpor ymm11,ymm11,ymm9 6376 vpor ymm1,ymm1,ymm6 6377 vpxor ymm12,ymm12,ymm14 6378 vmovdqa ymm14,YMMWORD[((384-256-128))+rbx] 6379 6380 vpaddd ymm3,ymm3,ymm15 6381 vpslld ymm7,ymm4,5 6382 vpand ymm6,ymm2,ymm1 6383 vpxor ymm12,ymm12,YMMWORD[((64-128))+rax] 6384 6385 vpaddd ymm3,ymm3,ymm6 6386 vpsrld ymm8,ymm4,27 6387 vpxor ymm5,ymm2,ymm1 6388 vpxor ymm12,ymm12,ymm14 6389 6390 vmovdqu YMMWORD[(288-256-128)+rbx],ymm11 6391 vpaddd ymm3,ymm3,ymm11 6392 vpor ymm7,ymm7,ymm8 6393 vpsrld ymm9,ymm12,31 6394 vpand ymm5,ymm5,ymm0 6395 vpaddd ymm12,ymm12,ymm12 6396 6397 vpslld ymm6,ymm0,30 6398 vpaddd ymm3,ymm3,ymm5 6399 6400 vpsrld ymm0,ymm0,2 6401 vpaddd ymm3,ymm3,ymm7 6402 vpor ymm12,ymm12,ymm9 6403 vpor ymm0,ymm0,ymm6 6404 vpxor ymm13,ymm13,ymm10 6405 vmovdqa ymm10,YMMWORD[((416-256-128))+rbx] 6406 6407 vpaddd ymm2,ymm2,ymm15 6408 vpslld ymm7,ymm3,5 6409 vpand ymm6,ymm1,ymm0 6410 vpxor ymm13,ymm13,YMMWORD[((96-128))+rax] 6411 6412 vpaddd ymm2,ymm2,ymm6 6413 vpsrld ymm8,ymm3,27 6414 vpxor ymm5,ymm1,ymm0 6415 vpxor ymm13,ymm13,ymm10 6416 6417 vmovdqu YMMWORD[(320-256-128)+rbx],ymm12 6418 vpaddd ymm2,ymm2,ymm12 6419 vpor ymm7,ymm7,ymm8 6420 vpsrld ymm9,ymm13,31 6421 vpand ymm5,ymm5,ymm4 6422 vpaddd ymm13,ymm13,ymm13 6423 6424 vpslld ymm6,ymm4,30 6425 vpaddd ymm2,ymm2,ymm5 6426 6427 vpsrld ymm4,ymm4,2 6428 vpaddd ymm2,ymm2,ymm7 6429 vpor ymm13,ymm13,ymm9 6430 vpor ymm4,ymm4,ymm6 6431 vpxor ymm14,ymm14,ymm11 6432 vmovdqa ymm11,YMMWORD[((448-256-128))+rbx] 6433 6434 vpaddd ymm1,ymm1,ymm15 6435 vpslld ymm7,ymm2,5 6436 vpand ymm6,ymm0,ymm4 6437 vpxor ymm14,ymm14,YMMWORD[((128-128))+rax] 6438 6439 vpaddd ymm1,ymm1,ymm6 6440 vpsrld ymm8,ymm2,27 6441 vpxor ymm5,ymm0,ymm4 6442 vpxor ymm14,ymm14,ymm11 6443 6444 vmovdqu YMMWORD[(352-256-128)+rbx],ymm13 6445 vpaddd ymm1,ymm1,ymm13 6446 vpor ymm7,ymm7,ymm8 6447 vpsrld ymm9,ymm14,31 6448 vpand ymm5,ymm5,ymm3 6449 vpaddd ymm14,ymm14,ymm14 6450 6451 vpslld ymm6,ymm3,30 6452 vpaddd ymm1,ymm1,ymm5 6453 6454 vpsrld ymm3,ymm3,2 6455 vpaddd ymm1,ymm1,ymm7 6456 vpor ymm14,ymm14,ymm9 6457 vpor ymm3,ymm3,ymm6 6458 vpxor ymm10,ymm10,ymm12 6459 vmovdqa ymm12,YMMWORD[((480-256-128))+rbx] 6460 6461 vpaddd ymm0,ymm0,ymm15 6462 vpslld ymm7,ymm1,5 6463 vpand ymm6,ymm4,ymm3 6464 vpxor ymm10,ymm10,YMMWORD[((160-128))+rax] 6465 6466 vpaddd ymm0,ymm0,ymm6 6467 vpsrld ymm8,ymm1,27 6468 vpxor ymm5,ymm4,ymm3 6469 vpxor ymm10,ymm10,ymm12 6470 6471 vmovdqu YMMWORD[(384-256-128)+rbx],ymm14 6472 vpaddd ymm0,ymm0,ymm14 6473 vpor ymm7,ymm7,ymm8 6474 vpsrld ymm9,ymm10,31 6475 vpand ymm5,ymm5,ymm2 6476 vpaddd ymm10,ymm10,ymm10 6477 6478 vpslld ymm6,ymm2,30 6479 vpaddd ymm0,ymm0,ymm5 6480 6481 vpsrld ymm2,ymm2,2 6482 vpaddd ymm0,ymm0,ymm7 6483 vpor ymm10,ymm10,ymm9 6484 vpor ymm2,ymm2,ymm6 6485 vpxor ymm11,ymm11,ymm13 6486 vmovdqa ymm13,YMMWORD[((0-128))+rax] 6487 6488 vpaddd ymm4,ymm4,ymm15 6489 vpslld ymm7,ymm0,5 6490 vpand ymm6,ymm3,ymm2 6491 vpxor ymm11,ymm11,YMMWORD[((192-128))+rax] 6492 6493 vpaddd ymm4,ymm4,ymm6 6494 vpsrld ymm8,ymm0,27 6495 vpxor ymm5,ymm3,ymm2 6496 vpxor ymm11,ymm11,ymm13 6497 6498 vmovdqu YMMWORD[(416-256-128)+rbx],ymm10 6499 vpaddd ymm4,ymm4,ymm10 6500 vpor ymm7,ymm7,ymm8 6501 vpsrld ymm9,ymm11,31 6502 vpand ymm5,ymm5,ymm1 6503 vpaddd ymm11,ymm11,ymm11 6504 6505 vpslld ymm6,ymm1,30 6506 vpaddd ymm4,ymm4,ymm5 6507 6508 vpsrld ymm1,ymm1,2 6509 vpaddd ymm4,ymm4,ymm7 6510 vpor ymm11,ymm11,ymm9 6511 vpor ymm1,ymm1,ymm6 6512 vpxor ymm12,ymm12,ymm14 6513 vmovdqa ymm14,YMMWORD[((32-128))+rax] 6514 6515 vpaddd ymm3,ymm3,ymm15 6516 vpslld ymm7,ymm4,5 6517 vpand ymm6,ymm2,ymm1 6518 vpxor ymm12,ymm12,YMMWORD[((224-128))+rax] 6519 6520 vpaddd ymm3,ymm3,ymm6 6521 vpsrld ymm8,ymm4,27 6522 vpxor ymm5,ymm2,ymm1 6523 vpxor ymm12,ymm12,ymm14 6524 6525 vmovdqu YMMWORD[(448-256-128)+rbx],ymm11 6526 vpaddd ymm3,ymm3,ymm11 6527 vpor ymm7,ymm7,ymm8 6528 vpsrld ymm9,ymm12,31 6529 vpand ymm5,ymm5,ymm0 6530 vpaddd ymm12,ymm12,ymm12 6531 6532 vpslld ymm6,ymm0,30 6533 vpaddd ymm3,ymm3,ymm5 6534 6535 vpsrld ymm0,ymm0,2 6536 vpaddd ymm3,ymm3,ymm7 6537 vpor ymm12,ymm12,ymm9 6538 vpor ymm0,ymm0,ymm6 6539 vpxor ymm13,ymm13,ymm10 6540 vmovdqa ymm10,YMMWORD[((64-128))+rax] 6541 6542 vpaddd ymm2,ymm2,ymm15 6543 vpslld ymm7,ymm3,5 6544 vpand ymm6,ymm1,ymm0 6545 vpxor ymm13,ymm13,YMMWORD[((256-256-128))+rbx] 6546 6547 vpaddd ymm2,ymm2,ymm6 6548 vpsrld ymm8,ymm3,27 6549 vpxor ymm5,ymm1,ymm0 6550 vpxor ymm13,ymm13,ymm10 6551 6552 vmovdqu YMMWORD[(480-256-128)+rbx],ymm12 6553 vpaddd ymm2,ymm2,ymm12 6554 vpor ymm7,ymm7,ymm8 6555 vpsrld ymm9,ymm13,31 6556 vpand ymm5,ymm5,ymm4 6557 vpaddd ymm13,ymm13,ymm13 6558 6559 vpslld ymm6,ymm4,30 6560 vpaddd ymm2,ymm2,ymm5 6561 6562 vpsrld ymm4,ymm4,2 6563 vpaddd ymm2,ymm2,ymm7 6564 vpor ymm13,ymm13,ymm9 6565 vpor ymm4,ymm4,ymm6 6566 vpxor ymm14,ymm14,ymm11 6567 vmovdqa ymm11,YMMWORD[((96-128))+rax] 6568 6569 vpaddd ymm1,ymm1,ymm15 6570 vpslld ymm7,ymm2,5 6571 vpand ymm6,ymm0,ymm4 6572 vpxor ymm14,ymm14,YMMWORD[((288-256-128))+rbx] 6573 6574 vpaddd ymm1,ymm1,ymm6 6575 vpsrld ymm8,ymm2,27 6576 vpxor ymm5,ymm0,ymm4 6577 vpxor ymm14,ymm14,ymm11 6578 6579 vmovdqu YMMWORD[(0-128)+rax],ymm13 6580 vpaddd ymm1,ymm1,ymm13 6581 vpor ymm7,ymm7,ymm8 6582 vpsrld ymm9,ymm14,31 6583 vpand ymm5,ymm5,ymm3 6584 vpaddd ymm14,ymm14,ymm14 6585 6586 vpslld ymm6,ymm3,30 6587 vpaddd ymm1,ymm1,ymm5 6588 6589 vpsrld ymm3,ymm3,2 6590 vpaddd ymm1,ymm1,ymm7 6591 vpor ymm14,ymm14,ymm9 6592 vpor ymm3,ymm3,ymm6 6593 vpxor ymm10,ymm10,ymm12 6594 vmovdqa ymm12,YMMWORD[((128-128))+rax] 6595 6596 vpaddd ymm0,ymm0,ymm15 6597 vpslld ymm7,ymm1,5 6598 vpand ymm6,ymm4,ymm3 6599 vpxor ymm10,ymm10,YMMWORD[((320-256-128))+rbx] 6600 6601 vpaddd ymm0,ymm0,ymm6 6602 vpsrld ymm8,ymm1,27 6603 vpxor ymm5,ymm4,ymm3 6604 vpxor ymm10,ymm10,ymm12 6605 6606 vmovdqu YMMWORD[(32-128)+rax],ymm14 6607 vpaddd ymm0,ymm0,ymm14 6608 vpor ymm7,ymm7,ymm8 6609 vpsrld ymm9,ymm10,31 6610 vpand ymm5,ymm5,ymm2 6611 vpaddd ymm10,ymm10,ymm10 6612 6613 vpslld ymm6,ymm2,30 6614 vpaddd ymm0,ymm0,ymm5 6615 6616 vpsrld ymm2,ymm2,2 6617 vpaddd ymm0,ymm0,ymm7 6618 vpor ymm10,ymm10,ymm9 6619 vpor ymm2,ymm2,ymm6 6620 vpxor ymm11,ymm11,ymm13 6621 vmovdqa ymm13,YMMWORD[((160-128))+rax] 6622 6623 vpaddd ymm4,ymm4,ymm15 6624 vpslld ymm7,ymm0,5 6625 vpand ymm6,ymm3,ymm2 6626 vpxor ymm11,ymm11,YMMWORD[((352-256-128))+rbx] 6627 6628 vpaddd ymm4,ymm4,ymm6 6629 vpsrld ymm8,ymm0,27 6630 vpxor ymm5,ymm3,ymm2 6631 vpxor ymm11,ymm11,ymm13 6632 6633 vmovdqu YMMWORD[(64-128)+rax],ymm10 6634 vpaddd ymm4,ymm4,ymm10 6635 vpor ymm7,ymm7,ymm8 6636 vpsrld ymm9,ymm11,31 6637 vpand ymm5,ymm5,ymm1 6638 vpaddd ymm11,ymm11,ymm11 6639 6640 vpslld ymm6,ymm1,30 6641 vpaddd ymm4,ymm4,ymm5 6642 6643 vpsrld ymm1,ymm1,2 6644 vpaddd ymm4,ymm4,ymm7 6645 vpor ymm11,ymm11,ymm9 6646 vpor ymm1,ymm1,ymm6 6647 vpxor ymm12,ymm12,ymm14 6648 vmovdqa ymm14,YMMWORD[((192-128))+rax] 6649 6650 vpaddd ymm3,ymm3,ymm15 6651 vpslld ymm7,ymm4,5 6652 vpand ymm6,ymm2,ymm1 6653 vpxor ymm12,ymm12,YMMWORD[((384-256-128))+rbx] 6654 6655 vpaddd ymm3,ymm3,ymm6 6656 vpsrld ymm8,ymm4,27 6657 vpxor ymm5,ymm2,ymm1 6658 vpxor ymm12,ymm12,ymm14 6659 6660 vmovdqu YMMWORD[(96-128)+rax],ymm11 6661 vpaddd ymm3,ymm3,ymm11 6662 vpor ymm7,ymm7,ymm8 6663 vpsrld ymm9,ymm12,31 6664 vpand ymm5,ymm5,ymm0 6665 vpaddd ymm12,ymm12,ymm12 6666 6667 vpslld ymm6,ymm0,30 6668 vpaddd ymm3,ymm3,ymm5 6669 6670 vpsrld ymm0,ymm0,2 6671 vpaddd ymm3,ymm3,ymm7 6672 vpor ymm12,ymm12,ymm9 6673 vpor ymm0,ymm0,ymm6 6674 vpxor ymm13,ymm13,ymm10 6675 vmovdqa ymm10,YMMWORD[((224-128))+rax] 6676 6677 vpaddd ymm2,ymm2,ymm15 6678 vpslld ymm7,ymm3,5 6679 vpand ymm6,ymm1,ymm0 6680 vpxor ymm13,ymm13,YMMWORD[((416-256-128))+rbx] 6681 6682 vpaddd ymm2,ymm2,ymm6 6683 vpsrld ymm8,ymm3,27 6684 vpxor ymm5,ymm1,ymm0 6685 vpxor ymm13,ymm13,ymm10 6686 6687 vmovdqu YMMWORD[(128-128)+rax],ymm12 6688 vpaddd ymm2,ymm2,ymm12 6689 vpor ymm7,ymm7,ymm8 6690 vpsrld ymm9,ymm13,31 6691 vpand ymm5,ymm5,ymm4 6692 vpaddd ymm13,ymm13,ymm13 6693 6694 vpslld ymm6,ymm4,30 6695 vpaddd ymm2,ymm2,ymm5 6696 6697 vpsrld ymm4,ymm4,2 6698 vpaddd ymm2,ymm2,ymm7 6699 vpor ymm13,ymm13,ymm9 6700 vpor ymm4,ymm4,ymm6 6701 vpxor ymm14,ymm14,ymm11 6702 vmovdqa ymm11,YMMWORD[((256-256-128))+rbx] 6703 6704 vpaddd ymm1,ymm1,ymm15 6705 vpslld ymm7,ymm2,5 6706 vpand ymm6,ymm0,ymm4 6707 vpxor ymm14,ymm14,YMMWORD[((448-256-128))+rbx] 6708 6709 vpaddd ymm1,ymm1,ymm6 6710 vpsrld ymm8,ymm2,27 6711 vpxor ymm5,ymm0,ymm4 6712 vpxor ymm14,ymm14,ymm11 6713 6714 vmovdqu YMMWORD[(160-128)+rax],ymm13 6715 vpaddd ymm1,ymm1,ymm13 6716 vpor ymm7,ymm7,ymm8 6717 vpsrld ymm9,ymm14,31 6718 vpand ymm5,ymm5,ymm3 6719 vpaddd ymm14,ymm14,ymm14 6720 6721 vpslld ymm6,ymm3,30 6722 vpaddd ymm1,ymm1,ymm5 6723 6724 vpsrld ymm3,ymm3,2 6725 vpaddd ymm1,ymm1,ymm7 6726 vpor ymm14,ymm14,ymm9 6727 vpor ymm3,ymm3,ymm6 6728 vpxor ymm10,ymm10,ymm12 6729 vmovdqa ymm12,YMMWORD[((288-256-128))+rbx] 6730 6731 vpaddd ymm0,ymm0,ymm15 6732 vpslld ymm7,ymm1,5 6733 vpand ymm6,ymm4,ymm3 6734 vpxor ymm10,ymm10,YMMWORD[((480-256-128))+rbx] 6735 6736 vpaddd ymm0,ymm0,ymm6 6737 vpsrld ymm8,ymm1,27 6738 vpxor ymm5,ymm4,ymm3 6739 vpxor ymm10,ymm10,ymm12 6740 6741 vmovdqu YMMWORD[(192-128)+rax],ymm14 6742 vpaddd ymm0,ymm0,ymm14 6743 vpor ymm7,ymm7,ymm8 6744 vpsrld ymm9,ymm10,31 6745 vpand ymm5,ymm5,ymm2 6746 vpaddd ymm10,ymm10,ymm10 6747 6748 vpslld ymm6,ymm2,30 6749 vpaddd ymm0,ymm0,ymm5 6750 6751 vpsrld ymm2,ymm2,2 6752 vpaddd ymm0,ymm0,ymm7 6753 vpor ymm10,ymm10,ymm9 6754 vpor ymm2,ymm2,ymm6 6755 vpxor ymm11,ymm11,ymm13 6756 vmovdqa ymm13,YMMWORD[((320-256-128))+rbx] 6757 6758 vpaddd ymm4,ymm4,ymm15 6759 vpslld ymm7,ymm0,5 6760 vpand ymm6,ymm3,ymm2 6761 vpxor ymm11,ymm11,YMMWORD[((0-128))+rax] 6762 6763 vpaddd ymm4,ymm4,ymm6 6764 vpsrld ymm8,ymm0,27 6765 vpxor ymm5,ymm3,ymm2 6766 vpxor ymm11,ymm11,ymm13 6767 6768 vmovdqu YMMWORD[(224-128)+rax],ymm10 6769 vpaddd ymm4,ymm4,ymm10 6770 vpor ymm7,ymm7,ymm8 6771 vpsrld ymm9,ymm11,31 6772 vpand ymm5,ymm5,ymm1 6773 vpaddd ymm11,ymm11,ymm11 6774 6775 vpslld ymm6,ymm1,30 6776 vpaddd ymm4,ymm4,ymm5 6777 6778 vpsrld ymm1,ymm1,2 6779 vpaddd ymm4,ymm4,ymm7 6780 vpor ymm11,ymm11,ymm9 6781 vpor ymm1,ymm1,ymm6 6782 vpxor ymm12,ymm12,ymm14 6783 vmovdqa ymm14,YMMWORD[((352-256-128))+rbx] 6784 6785 vpaddd ymm3,ymm3,ymm15 6786 vpslld ymm7,ymm4,5 6787 vpand ymm6,ymm2,ymm1 6788 vpxor ymm12,ymm12,YMMWORD[((32-128))+rax] 6789 6790 vpaddd ymm3,ymm3,ymm6 6791 vpsrld ymm8,ymm4,27 6792 vpxor ymm5,ymm2,ymm1 6793 vpxor ymm12,ymm12,ymm14 6794 6795 vmovdqu YMMWORD[(256-256-128)+rbx],ymm11 6796 vpaddd ymm3,ymm3,ymm11 6797 vpor ymm7,ymm7,ymm8 6798 vpsrld ymm9,ymm12,31 6799 vpand ymm5,ymm5,ymm0 6800 vpaddd ymm12,ymm12,ymm12 6801 6802 vpslld ymm6,ymm0,30 6803 vpaddd ymm3,ymm3,ymm5 6804 6805 vpsrld ymm0,ymm0,2 6806 vpaddd ymm3,ymm3,ymm7 6807 vpor ymm12,ymm12,ymm9 6808 vpor ymm0,ymm0,ymm6 6809 vpxor ymm13,ymm13,ymm10 6810 vmovdqa ymm10,YMMWORD[((384-256-128))+rbx] 6811 6812 vpaddd ymm2,ymm2,ymm15 6813 vpslld ymm7,ymm3,5 6814 vpand ymm6,ymm1,ymm0 6815 vpxor ymm13,ymm13,YMMWORD[((64-128))+rax] 6816 6817 vpaddd ymm2,ymm2,ymm6 6818 vpsrld ymm8,ymm3,27 6819 vpxor ymm5,ymm1,ymm0 6820 vpxor ymm13,ymm13,ymm10 6821 6822 vmovdqu YMMWORD[(288-256-128)+rbx],ymm12 6823 vpaddd ymm2,ymm2,ymm12 6824 vpor ymm7,ymm7,ymm8 6825 vpsrld ymm9,ymm13,31 6826 vpand ymm5,ymm5,ymm4 6827 vpaddd ymm13,ymm13,ymm13 6828 6829 vpslld ymm6,ymm4,30 6830 vpaddd ymm2,ymm2,ymm5 6831 6832 vpsrld ymm4,ymm4,2 6833 vpaddd ymm2,ymm2,ymm7 6834 vpor ymm13,ymm13,ymm9 6835 vpor ymm4,ymm4,ymm6 6836 vpxor ymm14,ymm14,ymm11 6837 vmovdqa ymm11,YMMWORD[((416-256-128))+rbx] 6838 6839 vpaddd ymm1,ymm1,ymm15 6840 vpslld ymm7,ymm2,5 6841 vpand ymm6,ymm0,ymm4 6842 vpxor ymm14,ymm14,YMMWORD[((96-128))+rax] 6843 6844 vpaddd ymm1,ymm1,ymm6 6845 vpsrld ymm8,ymm2,27 6846 vpxor ymm5,ymm0,ymm4 6847 vpxor ymm14,ymm14,ymm11 6848 6849 vmovdqu YMMWORD[(320-256-128)+rbx],ymm13 6850 vpaddd ymm1,ymm1,ymm13 6851 vpor ymm7,ymm7,ymm8 6852 vpsrld ymm9,ymm14,31 6853 vpand ymm5,ymm5,ymm3 6854 vpaddd ymm14,ymm14,ymm14 6855 6856 vpslld ymm6,ymm3,30 6857 vpaddd ymm1,ymm1,ymm5 6858 6859 vpsrld ymm3,ymm3,2 6860 vpaddd ymm1,ymm1,ymm7 6861 vpor ymm14,ymm14,ymm9 6862 vpor ymm3,ymm3,ymm6 6863 vpxor ymm10,ymm10,ymm12 6864 vmovdqa ymm12,YMMWORD[((448-256-128))+rbx] 6865 6866 vpaddd ymm0,ymm0,ymm15 6867 vpslld ymm7,ymm1,5 6868 vpand ymm6,ymm4,ymm3 6869 vpxor ymm10,ymm10,YMMWORD[((128-128))+rax] 6870 6871 vpaddd ymm0,ymm0,ymm6 6872 vpsrld ymm8,ymm1,27 6873 vpxor ymm5,ymm4,ymm3 6874 vpxor ymm10,ymm10,ymm12 6875 6876 vmovdqu YMMWORD[(352-256-128)+rbx],ymm14 6877 vpaddd ymm0,ymm0,ymm14 6878 vpor ymm7,ymm7,ymm8 6879 vpsrld ymm9,ymm10,31 6880 vpand ymm5,ymm5,ymm2 6881 vpaddd ymm10,ymm10,ymm10 6882 6883 vpslld ymm6,ymm2,30 6884 vpaddd ymm0,ymm0,ymm5 6885 6886 vpsrld ymm2,ymm2,2 6887 vpaddd ymm0,ymm0,ymm7 6888 vpor ymm10,ymm10,ymm9 6889 vpor ymm2,ymm2,ymm6 6890 vmovdqa ymm15,YMMWORD[64+rbp] 6891 vpxor ymm11,ymm11,ymm13 6892 vmovdqa ymm13,YMMWORD[((480-256-128))+rbx] 6893 6894 vpslld ymm7,ymm0,5 6895 vpaddd ymm4,ymm4,ymm15 6896 vpxor ymm5,ymm3,ymm1 6897 vmovdqa YMMWORD[(384-256-128)+rbx],ymm10 6898 vpaddd ymm4,ymm4,ymm10 6899 vpxor ymm11,ymm11,YMMWORD[((160-128))+rax] 6900 vpsrld ymm8,ymm0,27 6901 vpxor ymm5,ymm5,ymm2 6902 vpxor ymm11,ymm11,ymm13 6903 6904 vpslld ymm6,ymm1,30 6905 vpor ymm7,ymm7,ymm8 6906 vpaddd ymm4,ymm4,ymm5 6907 vpsrld ymm9,ymm11,31 6908 vpaddd ymm11,ymm11,ymm11 6909 6910 vpsrld ymm1,ymm1,2 6911 vpaddd ymm4,ymm4,ymm7 6912 vpor ymm11,ymm11,ymm9 6913 vpor ymm1,ymm1,ymm6 6914 vpxor ymm12,ymm12,ymm14 6915 vmovdqa ymm14,YMMWORD[((0-128))+rax] 6916 6917 vpslld ymm7,ymm4,5 6918 vpaddd ymm3,ymm3,ymm15 6919 vpxor ymm5,ymm2,ymm0 6920 vmovdqa YMMWORD[(416-256-128)+rbx],ymm11 6921 vpaddd ymm3,ymm3,ymm11 6922 vpxor ymm12,ymm12,YMMWORD[((192-128))+rax] 6923 vpsrld ymm8,ymm4,27 6924 vpxor ymm5,ymm5,ymm1 6925 vpxor ymm12,ymm12,ymm14 6926 6927 vpslld ymm6,ymm0,30 6928 vpor ymm7,ymm7,ymm8 6929 vpaddd ymm3,ymm3,ymm5 6930 vpsrld ymm9,ymm12,31 6931 vpaddd ymm12,ymm12,ymm12 6932 6933 vpsrld ymm0,ymm0,2 6934 vpaddd ymm3,ymm3,ymm7 6935 vpor ymm12,ymm12,ymm9 6936 vpor ymm0,ymm0,ymm6 6937 vpxor ymm13,ymm13,ymm10 6938 vmovdqa ymm10,YMMWORD[((32-128))+rax] 6939 6940 vpslld ymm7,ymm3,5 6941 vpaddd ymm2,ymm2,ymm15 6942 vpxor ymm5,ymm1,ymm4 6943 vmovdqa YMMWORD[(448-256-128)+rbx],ymm12 6944 vpaddd ymm2,ymm2,ymm12 6945 vpxor ymm13,ymm13,YMMWORD[((224-128))+rax] 6946 vpsrld ymm8,ymm3,27 6947 vpxor ymm5,ymm5,ymm0 6948 vpxor ymm13,ymm13,ymm10 6949 6950 vpslld ymm6,ymm4,30 6951 vpor ymm7,ymm7,ymm8 6952 vpaddd ymm2,ymm2,ymm5 6953 vpsrld ymm9,ymm13,31 6954 vpaddd ymm13,ymm13,ymm13 6955 6956 vpsrld ymm4,ymm4,2 6957 vpaddd ymm2,ymm2,ymm7 6958 vpor ymm13,ymm13,ymm9 6959 vpor ymm4,ymm4,ymm6 6960 vpxor ymm14,ymm14,ymm11 6961 vmovdqa ymm11,YMMWORD[((64-128))+rax] 6962 6963 vpslld ymm7,ymm2,5 6964 vpaddd ymm1,ymm1,ymm15 6965 vpxor ymm5,ymm0,ymm3 6966 vmovdqa YMMWORD[(480-256-128)+rbx],ymm13 6967 vpaddd ymm1,ymm1,ymm13 6968 vpxor ymm14,ymm14,YMMWORD[((256-256-128))+rbx] 6969 vpsrld ymm8,ymm2,27 6970 vpxor ymm5,ymm5,ymm4 6971 vpxor ymm14,ymm14,ymm11 6972 6973 vpslld ymm6,ymm3,30 6974 vpor ymm7,ymm7,ymm8 6975 vpaddd ymm1,ymm1,ymm5 6976 vpsrld ymm9,ymm14,31 6977 vpaddd ymm14,ymm14,ymm14 6978 6979 vpsrld ymm3,ymm3,2 6980 vpaddd ymm1,ymm1,ymm7 6981 vpor ymm14,ymm14,ymm9 6982 vpor ymm3,ymm3,ymm6 6983 vpxor ymm10,ymm10,ymm12 6984 vmovdqa ymm12,YMMWORD[((96-128))+rax] 6985 6986 vpslld ymm7,ymm1,5 6987 vpaddd ymm0,ymm0,ymm15 6988 vpxor ymm5,ymm4,ymm2 6989 vmovdqa YMMWORD[(0-128)+rax],ymm14 6990 vpaddd ymm0,ymm0,ymm14 6991 vpxor ymm10,ymm10,YMMWORD[((288-256-128))+rbx] 6992 vpsrld ymm8,ymm1,27 6993 vpxor ymm5,ymm5,ymm3 6994 vpxor ymm10,ymm10,ymm12 6995 6996 vpslld ymm6,ymm2,30 6997 vpor ymm7,ymm7,ymm8 6998 vpaddd ymm0,ymm0,ymm5 6999 vpsrld ymm9,ymm10,31 7000 vpaddd ymm10,ymm10,ymm10 7001 7002 vpsrld ymm2,ymm2,2 7003 vpaddd ymm0,ymm0,ymm7 7004 vpor ymm10,ymm10,ymm9 7005 vpor ymm2,ymm2,ymm6 7006 vpxor ymm11,ymm11,ymm13 7007 vmovdqa ymm13,YMMWORD[((128-128))+rax] 7008 7009 vpslld ymm7,ymm0,5 7010 vpaddd ymm4,ymm4,ymm15 7011 vpxor ymm5,ymm3,ymm1 7012 vmovdqa YMMWORD[(32-128)+rax],ymm10 7013 vpaddd ymm4,ymm4,ymm10 7014 vpxor ymm11,ymm11,YMMWORD[((320-256-128))+rbx] 7015 vpsrld ymm8,ymm0,27 7016 vpxor ymm5,ymm5,ymm2 7017 vpxor ymm11,ymm11,ymm13 7018 7019 vpslld ymm6,ymm1,30 7020 vpor ymm7,ymm7,ymm8 7021 vpaddd ymm4,ymm4,ymm5 7022 vpsrld ymm9,ymm11,31 7023 vpaddd ymm11,ymm11,ymm11 7024 7025 vpsrld ymm1,ymm1,2 7026 vpaddd ymm4,ymm4,ymm7 7027 vpor ymm11,ymm11,ymm9 7028 vpor ymm1,ymm1,ymm6 7029 vpxor ymm12,ymm12,ymm14 7030 vmovdqa ymm14,YMMWORD[((160-128))+rax] 7031 7032 vpslld ymm7,ymm4,5 7033 vpaddd ymm3,ymm3,ymm15 7034 vpxor ymm5,ymm2,ymm0 7035 vmovdqa YMMWORD[(64-128)+rax],ymm11 7036 vpaddd ymm3,ymm3,ymm11 7037 vpxor ymm12,ymm12,YMMWORD[((352-256-128))+rbx] 7038 vpsrld ymm8,ymm4,27 7039 vpxor ymm5,ymm5,ymm1 7040 vpxor ymm12,ymm12,ymm14 7041 7042 vpslld ymm6,ymm0,30 7043 vpor ymm7,ymm7,ymm8 7044 vpaddd ymm3,ymm3,ymm5 7045 vpsrld ymm9,ymm12,31 7046 vpaddd ymm12,ymm12,ymm12 7047 7048 vpsrld ymm0,ymm0,2 7049 vpaddd ymm3,ymm3,ymm7 7050 vpor ymm12,ymm12,ymm9 7051 vpor ymm0,ymm0,ymm6 7052 vpxor ymm13,ymm13,ymm10 7053 vmovdqa ymm10,YMMWORD[((192-128))+rax] 7054 7055 vpslld ymm7,ymm3,5 7056 vpaddd ymm2,ymm2,ymm15 7057 vpxor ymm5,ymm1,ymm4 7058 vmovdqa YMMWORD[(96-128)+rax],ymm12 7059 vpaddd ymm2,ymm2,ymm12 7060 vpxor ymm13,ymm13,YMMWORD[((384-256-128))+rbx] 7061 vpsrld ymm8,ymm3,27 7062 vpxor ymm5,ymm5,ymm0 7063 vpxor ymm13,ymm13,ymm10 7064 7065 vpslld ymm6,ymm4,30 7066 vpor ymm7,ymm7,ymm8 7067 vpaddd ymm2,ymm2,ymm5 7068 vpsrld ymm9,ymm13,31 7069 vpaddd ymm13,ymm13,ymm13 7070 7071 vpsrld ymm4,ymm4,2 7072 vpaddd ymm2,ymm2,ymm7 7073 vpor ymm13,ymm13,ymm9 7074 vpor ymm4,ymm4,ymm6 7075 vpxor ymm14,ymm14,ymm11 7076 vmovdqa ymm11,YMMWORD[((224-128))+rax] 7077 7078 vpslld ymm7,ymm2,5 7079 vpaddd ymm1,ymm1,ymm15 7080 vpxor ymm5,ymm0,ymm3 7081 vmovdqa YMMWORD[(128-128)+rax],ymm13 7082 vpaddd ymm1,ymm1,ymm13 7083 vpxor ymm14,ymm14,YMMWORD[((416-256-128))+rbx] 7084 vpsrld ymm8,ymm2,27 7085 vpxor ymm5,ymm5,ymm4 7086 vpxor ymm14,ymm14,ymm11 7087 7088 vpslld ymm6,ymm3,30 7089 vpor ymm7,ymm7,ymm8 7090 vpaddd ymm1,ymm1,ymm5 7091 vpsrld ymm9,ymm14,31 7092 vpaddd ymm14,ymm14,ymm14 7093 7094 vpsrld ymm3,ymm3,2 7095 vpaddd ymm1,ymm1,ymm7 7096 vpor ymm14,ymm14,ymm9 7097 vpor ymm3,ymm3,ymm6 7098 vpxor ymm10,ymm10,ymm12 7099 vmovdqa ymm12,YMMWORD[((256-256-128))+rbx] 7100 7101 vpslld ymm7,ymm1,5 7102 vpaddd ymm0,ymm0,ymm15 7103 vpxor ymm5,ymm4,ymm2 7104 vmovdqa YMMWORD[(160-128)+rax],ymm14 7105 vpaddd ymm0,ymm0,ymm14 7106 vpxor ymm10,ymm10,YMMWORD[((448-256-128))+rbx] 7107 vpsrld ymm8,ymm1,27 7108 vpxor ymm5,ymm5,ymm3 7109 vpxor ymm10,ymm10,ymm12 7110 7111 vpslld ymm6,ymm2,30 7112 vpor ymm7,ymm7,ymm8 7113 vpaddd ymm0,ymm0,ymm5 7114 vpsrld ymm9,ymm10,31 7115 vpaddd ymm10,ymm10,ymm10 7116 7117 vpsrld ymm2,ymm2,2 7118 vpaddd ymm0,ymm0,ymm7 7119 vpor ymm10,ymm10,ymm9 7120 vpor ymm2,ymm2,ymm6 7121 vpxor ymm11,ymm11,ymm13 7122 vmovdqa ymm13,YMMWORD[((288-256-128))+rbx] 7123 7124 vpslld ymm7,ymm0,5 7125 vpaddd ymm4,ymm4,ymm15 7126 vpxor ymm5,ymm3,ymm1 7127 vmovdqa YMMWORD[(192-128)+rax],ymm10 7128 vpaddd ymm4,ymm4,ymm10 7129 vpxor ymm11,ymm11,YMMWORD[((480-256-128))+rbx] 7130 vpsrld ymm8,ymm0,27 7131 vpxor ymm5,ymm5,ymm2 7132 vpxor ymm11,ymm11,ymm13 7133 7134 vpslld ymm6,ymm1,30 7135 vpor ymm7,ymm7,ymm8 7136 vpaddd ymm4,ymm4,ymm5 7137 vpsrld ymm9,ymm11,31 7138 vpaddd ymm11,ymm11,ymm11 7139 7140 vpsrld ymm1,ymm1,2 7141 vpaddd ymm4,ymm4,ymm7 7142 vpor ymm11,ymm11,ymm9 7143 vpor ymm1,ymm1,ymm6 7144 vpxor ymm12,ymm12,ymm14 7145 vmovdqa ymm14,YMMWORD[((320-256-128))+rbx] 7146 7147 vpslld ymm7,ymm4,5 7148 vpaddd ymm3,ymm3,ymm15 7149 vpxor ymm5,ymm2,ymm0 7150 vmovdqa YMMWORD[(224-128)+rax],ymm11 7151 vpaddd ymm3,ymm3,ymm11 7152 vpxor ymm12,ymm12,YMMWORD[((0-128))+rax] 7153 vpsrld ymm8,ymm4,27 7154 vpxor ymm5,ymm5,ymm1 7155 vpxor ymm12,ymm12,ymm14 7156 7157 vpslld ymm6,ymm0,30 7158 vpor ymm7,ymm7,ymm8 7159 vpaddd ymm3,ymm3,ymm5 7160 vpsrld ymm9,ymm12,31 7161 vpaddd ymm12,ymm12,ymm12 7162 7163 vpsrld ymm0,ymm0,2 7164 vpaddd ymm3,ymm3,ymm7 7165 vpor ymm12,ymm12,ymm9 7166 vpor ymm0,ymm0,ymm6 7167 vpxor ymm13,ymm13,ymm10 7168 vmovdqa ymm10,YMMWORD[((352-256-128))+rbx] 7169 7170 vpslld ymm7,ymm3,5 7171 vpaddd ymm2,ymm2,ymm15 7172 vpxor ymm5,ymm1,ymm4 7173 vpaddd ymm2,ymm2,ymm12 7174 vpxor ymm13,ymm13,YMMWORD[((32-128))+rax] 7175 vpsrld ymm8,ymm3,27 7176 vpxor ymm5,ymm5,ymm0 7177 vpxor ymm13,ymm13,ymm10 7178 7179 vpslld ymm6,ymm4,30 7180 vpor ymm7,ymm7,ymm8 7181 vpaddd ymm2,ymm2,ymm5 7182 vpsrld ymm9,ymm13,31 7183 vpaddd ymm13,ymm13,ymm13 7184 7185 vpsrld ymm4,ymm4,2 7186 vpaddd ymm2,ymm2,ymm7 7187 vpor ymm13,ymm13,ymm9 7188 vpor ymm4,ymm4,ymm6 7189 vpxor ymm14,ymm14,ymm11 7190 vmovdqa ymm11,YMMWORD[((384-256-128))+rbx] 7191 7192 vpslld ymm7,ymm2,5 7193 vpaddd ymm1,ymm1,ymm15 7194 vpxor ymm5,ymm0,ymm3 7195 vpaddd ymm1,ymm1,ymm13 7196 vpxor ymm14,ymm14,YMMWORD[((64-128))+rax] 7197 vpsrld ymm8,ymm2,27 7198 vpxor ymm5,ymm5,ymm4 7199 vpxor ymm14,ymm14,ymm11 7200 7201 vpslld ymm6,ymm3,30 7202 vpor ymm7,ymm7,ymm8 7203 vpaddd ymm1,ymm1,ymm5 7204 vpsrld ymm9,ymm14,31 7205 vpaddd ymm14,ymm14,ymm14 7206 7207 vpsrld ymm3,ymm3,2 7208 vpaddd ymm1,ymm1,ymm7 7209 vpor ymm14,ymm14,ymm9 7210 vpor ymm3,ymm3,ymm6 7211 vpxor ymm10,ymm10,ymm12 7212 vmovdqa ymm12,YMMWORD[((416-256-128))+rbx] 7213 7214 vpslld ymm7,ymm1,5 7215 vpaddd ymm0,ymm0,ymm15 7216 vpxor ymm5,ymm4,ymm2 7217 vpaddd ymm0,ymm0,ymm14 7218 vpxor ymm10,ymm10,YMMWORD[((96-128))+rax] 7219 vpsrld ymm8,ymm1,27 7220 vpxor ymm5,ymm5,ymm3 7221 vpxor ymm10,ymm10,ymm12 7222 7223 vpslld ymm6,ymm2,30 7224 vpor ymm7,ymm7,ymm8 7225 vpaddd ymm0,ymm0,ymm5 7226 vpsrld ymm9,ymm10,31 7227 vpaddd ymm10,ymm10,ymm10 7228 7229 vpsrld ymm2,ymm2,2 7230 vpaddd ymm0,ymm0,ymm7 7231 vpor ymm10,ymm10,ymm9 7232 vpor ymm2,ymm2,ymm6 7233 vpxor ymm11,ymm11,ymm13 7234 vmovdqa ymm13,YMMWORD[((448-256-128))+rbx] 7235 7236 vpslld ymm7,ymm0,5 7237 vpaddd ymm4,ymm4,ymm15 7238 vpxor ymm5,ymm3,ymm1 7239 vpaddd ymm4,ymm4,ymm10 7240 vpxor ymm11,ymm11,YMMWORD[((128-128))+rax] 7241 vpsrld ymm8,ymm0,27 7242 vpxor ymm5,ymm5,ymm2 7243 vpxor ymm11,ymm11,ymm13 7244 7245 vpslld ymm6,ymm1,30 7246 vpor ymm7,ymm7,ymm8 7247 vpaddd ymm4,ymm4,ymm5 7248 vpsrld ymm9,ymm11,31 7249 vpaddd ymm11,ymm11,ymm11 7250 7251 vpsrld ymm1,ymm1,2 7252 vpaddd ymm4,ymm4,ymm7 7253 vpor ymm11,ymm11,ymm9 7254 vpor ymm1,ymm1,ymm6 7255 vpxor ymm12,ymm12,ymm14 7256 vmovdqa ymm14,YMMWORD[((480-256-128))+rbx] 7257 7258 vpslld ymm7,ymm4,5 7259 vpaddd ymm3,ymm3,ymm15 7260 vpxor ymm5,ymm2,ymm0 7261 vpaddd ymm3,ymm3,ymm11 7262 vpxor ymm12,ymm12,YMMWORD[((160-128))+rax] 7263 vpsrld ymm8,ymm4,27 7264 vpxor ymm5,ymm5,ymm1 7265 vpxor ymm12,ymm12,ymm14 7266 7267 vpslld ymm6,ymm0,30 7268 vpor ymm7,ymm7,ymm8 7269 vpaddd ymm3,ymm3,ymm5 7270 vpsrld ymm9,ymm12,31 7271 vpaddd ymm12,ymm12,ymm12 7272 7273 vpsrld ymm0,ymm0,2 7274 vpaddd ymm3,ymm3,ymm7 7275 vpor ymm12,ymm12,ymm9 7276 vpor ymm0,ymm0,ymm6 7277 vpxor ymm13,ymm13,ymm10 7278 vmovdqa ymm10,YMMWORD[((0-128))+rax] 7279 7280 vpslld ymm7,ymm3,5 7281 vpaddd ymm2,ymm2,ymm15 7282 vpxor ymm5,ymm1,ymm4 7283 vpaddd ymm2,ymm2,ymm12 7284 vpxor ymm13,ymm13,YMMWORD[((192-128))+rax] 7285 vpsrld ymm8,ymm3,27 7286 vpxor ymm5,ymm5,ymm0 7287 vpxor ymm13,ymm13,ymm10 7288 7289 vpslld ymm6,ymm4,30 7290 vpor ymm7,ymm7,ymm8 7291 vpaddd ymm2,ymm2,ymm5 7292 vpsrld ymm9,ymm13,31 7293 vpaddd ymm13,ymm13,ymm13 7294 7295 vpsrld ymm4,ymm4,2 7296 vpaddd ymm2,ymm2,ymm7 7297 vpor ymm13,ymm13,ymm9 7298 vpor ymm4,ymm4,ymm6 7299 vpxor ymm14,ymm14,ymm11 7300 vmovdqa ymm11,YMMWORD[((32-128))+rax] 7301 7302 vpslld ymm7,ymm2,5 7303 vpaddd ymm1,ymm1,ymm15 7304 vpxor ymm5,ymm0,ymm3 7305 vpaddd ymm1,ymm1,ymm13 7306 vpxor ymm14,ymm14,YMMWORD[((224-128))+rax] 7307 vpsrld ymm8,ymm2,27 7308 vpxor ymm5,ymm5,ymm4 7309 vpxor ymm14,ymm14,ymm11 7310 7311 vpslld ymm6,ymm3,30 7312 vpor ymm7,ymm7,ymm8 7313 vpaddd ymm1,ymm1,ymm5 7314 vpsrld ymm9,ymm14,31 7315 vpaddd ymm14,ymm14,ymm14 7316 7317 vpsrld ymm3,ymm3,2 7318 vpaddd ymm1,ymm1,ymm7 7319 vpor ymm14,ymm14,ymm9 7320 vpor ymm3,ymm3,ymm6 7321 vpslld ymm7,ymm1,5 7322 vpaddd ymm0,ymm0,ymm15 7323 vpxor ymm5,ymm4,ymm2 7324 7325 vpsrld ymm8,ymm1,27 7326 vpaddd ymm0,ymm0,ymm14 7327 vpxor ymm5,ymm5,ymm3 7328 7329 vpslld ymm6,ymm2,30 7330 vpor ymm7,ymm7,ymm8 7331 vpaddd ymm0,ymm0,ymm5 7332 7333 vpsrld ymm2,ymm2,2 7334 vpaddd ymm0,ymm0,ymm7 7335 vpor ymm2,ymm2,ymm6 7336 mov ecx,1 7337 lea rbx,[512+rsp] 7338 cmp ecx,DWORD[rbx] 7339 cmovge r12,rbp 7340 cmp ecx,DWORD[4+rbx] 7341 cmovge r13,rbp 7342 cmp ecx,DWORD[8+rbx] 7343 cmovge r14,rbp 7344 cmp ecx,DWORD[12+rbx] 7345 cmovge r15,rbp 7346 cmp ecx,DWORD[16+rbx] 7347 cmovge r8,rbp 7348 cmp ecx,DWORD[20+rbx] 7349 cmovge r9,rbp 7350 cmp ecx,DWORD[24+rbx] 7351 cmovge r10,rbp 7352 cmp ecx,DWORD[28+rbx] 7353 cmovge r11,rbp 7354 vmovdqu ymm5,YMMWORD[rbx] 7355 vpxor ymm7,ymm7,ymm7 7356 vmovdqa ymm6,ymm5 7357 vpcmpgtd ymm6,ymm6,ymm7 7358 vpaddd ymm5,ymm5,ymm6 7359 7360 vpand ymm0,ymm0,ymm6 7361 vpand ymm1,ymm1,ymm6 7362 vpaddd ymm0,ymm0,YMMWORD[rdi] 7363 vpand ymm2,ymm2,ymm6 7364 vpaddd ymm1,ymm1,YMMWORD[32+rdi] 7365 vpand ymm3,ymm3,ymm6 7366 vpaddd ymm2,ymm2,YMMWORD[64+rdi] 7367 vpand ymm4,ymm4,ymm6 7368 vpaddd ymm3,ymm3,YMMWORD[96+rdi] 7369 vpaddd ymm4,ymm4,YMMWORD[128+rdi] 7370 vmovdqu YMMWORD[rdi],ymm0 7371 vmovdqu YMMWORD[32+rdi],ymm1 7372 vmovdqu YMMWORD[64+rdi],ymm2 7373 vmovdqu YMMWORD[96+rdi],ymm3 7374 vmovdqu YMMWORD[128+rdi],ymm4 7375 7376 vmovdqu YMMWORD[rbx],ymm5 7377 lea rbx,[((256+128))+rsp] 7378 vmovdqu ymm9,YMMWORD[96+rbp] 7379 dec edx 7380 jnz NEAR $L$oop_avx2 7381 7382 7383 7384 7385 7386 7387 7388 $L$done_avx2: 7389 mov rax,QWORD[544+rsp] 7390 7391 vzeroupper 7392 movaps xmm6,XMMWORD[((-216))+rax] 7393 movaps xmm7,XMMWORD[((-200))+rax] 7394 movaps xmm8,XMMWORD[((-184))+rax] 7395 movaps xmm9,XMMWORD[((-168))+rax] 7396 movaps xmm10,XMMWORD[((-152))+rax] 7397 movaps xmm11,XMMWORD[((-136))+rax] 7398 movaps xmm12,XMMWORD[((-120))+rax] 7399 movaps xmm13,XMMWORD[((-104))+rax] 7400 movaps xmm14,XMMWORD[((-88))+rax] 7401 movaps xmm15,XMMWORD[((-72))+rax] 7402 mov r15,QWORD[((-48))+rax] 7403 7404 mov r14,QWORD[((-40))+rax] 7405 7406 mov r13,QWORD[((-32))+rax] 7407 7408 mov r12,QWORD[((-24))+rax] 7409 7410 mov rbp,QWORD[((-16))+rax] 7411 7412 mov rbx,QWORD[((-8))+rax] 7413 7414 lea rsp,[rax] 7415 7416 $L$epilogue_avx2: 7417 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 7418 mov rsi,QWORD[16+rsp] 7419 DB 0F3h,0C3h ;repret 7420 7421 $L$SEH_end_sha1_multi_block_avx2: 7422 3020 7423 ALIGN 256 3021 7424 DD 0x5a827999,0x5a827999,0x5a827999,0x5a827999 … … 3120 7523 DB 0F3h,0C3h ;repret 3121 7524 7525 7526 ALIGN 16 7527 avx2_handler: 7528 push rsi 7529 push rdi 7530 push rbx 7531 push rbp 7532 push r12 7533 push r13 7534 push r14 7535 push r15 7536 pushfq 7537 sub rsp,64 7538 7539 mov rax,QWORD[120+r8] 7540 mov rbx,QWORD[248+r8] 7541 7542 mov rsi,QWORD[8+r9] 7543 mov r11,QWORD[56+r9] 7544 7545 mov r10d,DWORD[r11] 7546 lea r10,[r10*1+rsi] 7547 cmp rbx,r10 7548 jb NEAR $L$in_prologue 7549 7550 mov rax,QWORD[152+r8] 7551 7552 mov r10d,DWORD[4+r11] 7553 lea r10,[r10*1+rsi] 7554 cmp rbx,r10 7555 jae NEAR $L$in_prologue 7556 7557 mov rax,QWORD[544+r8] 7558 7559 mov rbx,QWORD[((-8))+rax] 7560 mov rbp,QWORD[((-16))+rax] 7561 mov r12,QWORD[((-24))+rax] 7562 mov r13,QWORD[((-32))+rax] 7563 mov r14,QWORD[((-40))+rax] 7564 mov r15,QWORD[((-48))+rax] 7565 mov QWORD[144+r8],rbx 7566 mov QWORD[160+r8],rbp 7567 mov QWORD[216+r8],r12 7568 mov QWORD[224+r8],r13 7569 mov QWORD[232+r8],r14 7570 mov QWORD[240+r8],r15 7571 7572 lea rsi,[((-56-160))+rax] 7573 lea rdi,[512+r8] 7574 mov ecx,20 7575 DD 0xa548f3fc 7576 7577 jmp NEAR $L$in_prologue 7578 3122 7579 section .pdata rdata align=4 3123 7580 ALIGN 4 … … 3128 7585 DD $L$SEH_end_sha1_multi_block_shaext wrt ..imagebase 3129 7586 DD $L$SEH_info_sha1_multi_block_shaext wrt ..imagebase 7587 DD $L$SEH_begin_sha1_multi_block_avx wrt ..imagebase 7588 DD $L$SEH_end_sha1_multi_block_avx wrt ..imagebase 7589 DD $L$SEH_info_sha1_multi_block_avx wrt ..imagebase 7590 DD $L$SEH_begin_sha1_multi_block_avx2 wrt ..imagebase 7591 DD $L$SEH_end_sha1_multi_block_avx2 wrt ..imagebase 7592 DD $L$SEH_info_sha1_multi_block_avx2 wrt ..imagebase 3130 7593 section .xdata rdata align=8 3131 7594 ALIGN 8 … … 3138 7601 DD se_handler wrt ..imagebase 3139 7602 DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase 7603 $L$SEH_info_sha1_multi_block_avx: 7604 DB 9,0,0,0 7605 DD se_handler wrt ..imagebase 7606 DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase 7607 $L$SEH_info_sha1_multi_block_avx2: 7608 DB 9,0,0,0 7609 DD avx2_handler wrt ..imagebase 7610 DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/sha1-x86_64.S
r97373 r99371 28 28 test r10d,536870912 29 29 jnz NEAR _shaext_shortcut 30 and r10d,296 31 cmp r10d,296 32 je NEAR _avx2_shortcut 33 and r8d,268435456 34 and r9d,1073741824 35 or r8d,r9d 36 cmp r8d,1342177280 37 je NEAR _avx_shortcut 30 38 jmp NEAR _ssse3_shortcut 31 39 … … 2668 2676 2669 2677 $L$SEH_end_sha1_block_data_order_ssse3: 2678 2679 ALIGN 16 2680 sha1_block_data_order_avx: 2681 mov QWORD[8+rsp],rdi ;WIN64 prologue 2682 mov QWORD[16+rsp],rsi 2683 mov rax,rsp 2684 $L$SEH_begin_sha1_block_data_order_avx: 2685 mov rdi,rcx 2686 mov rsi,rdx 2687 mov rdx,r8 2688 2689 2690 _avx_shortcut: 2691 2692 mov r11,rsp 2693 2694 push rbx 2695 2696 push rbp 2697 2698 push r12 2699 2700 push r13 2701 2702 push r14 2703 2704 lea rsp,[((-160))+rsp] 2705 vzeroupper 2706 vmovaps XMMWORD[(-40-96)+r11],xmm6 2707 vmovaps XMMWORD[(-40-80)+r11],xmm7 2708 vmovaps XMMWORD[(-40-64)+r11],xmm8 2709 vmovaps XMMWORD[(-40-48)+r11],xmm9 2710 vmovaps XMMWORD[(-40-32)+r11],xmm10 2711 vmovaps XMMWORD[(-40-16)+r11],xmm11 2712 $L$prologue_avx: 2713 and rsp,-64 2714 mov r8,rdi 2715 mov r9,rsi 2716 mov r10,rdx 2717 2718 shl r10,6 2719 add r10,r9 2720 lea r14,[((K_XX_XX+64))] 2721 2722 mov eax,DWORD[r8] 2723 mov ebx,DWORD[4+r8] 2724 mov ecx,DWORD[8+r8] 2725 mov edx,DWORD[12+r8] 2726 mov esi,ebx 2727 mov ebp,DWORD[16+r8] 2728 mov edi,ecx 2729 xor edi,edx 2730 and esi,edi 2731 2732 vmovdqa xmm6,XMMWORD[64+r14] 2733 vmovdqa xmm11,XMMWORD[((-64))+r14] 2734 vmovdqu xmm0,XMMWORD[r9] 2735 vmovdqu xmm1,XMMWORD[16+r9] 2736 vmovdqu xmm2,XMMWORD[32+r9] 2737 vmovdqu xmm3,XMMWORD[48+r9] 2738 vpshufb xmm0,xmm0,xmm6 2739 add r9,64 2740 vpshufb xmm1,xmm1,xmm6 2741 vpshufb xmm2,xmm2,xmm6 2742 vpshufb xmm3,xmm3,xmm6 2743 vpaddd xmm4,xmm0,xmm11 2744 vpaddd xmm5,xmm1,xmm11 2745 vpaddd xmm6,xmm2,xmm11 2746 vmovdqa XMMWORD[rsp],xmm4 2747 vmovdqa XMMWORD[16+rsp],xmm5 2748 vmovdqa XMMWORD[32+rsp],xmm6 2749 jmp NEAR $L$oop_avx 2750 ALIGN 16 2751 $L$oop_avx: 2752 shrd ebx,ebx,2 2753 xor esi,edx 2754 vpalignr xmm4,xmm1,xmm0,8 2755 mov edi,eax 2756 add ebp,DWORD[rsp] 2757 vpaddd xmm9,xmm11,xmm3 2758 xor ebx,ecx 2759 shld eax,eax,5 2760 vpsrldq xmm8,xmm3,4 2761 add ebp,esi 2762 and edi,ebx 2763 vpxor xmm4,xmm4,xmm0 2764 xor ebx,ecx 2765 add ebp,eax 2766 vpxor xmm8,xmm8,xmm2 2767 shrd eax,eax,7 2768 xor edi,ecx 2769 mov esi,ebp 2770 add edx,DWORD[4+rsp] 2771 vpxor xmm4,xmm4,xmm8 2772 xor eax,ebx 2773 shld ebp,ebp,5 2774 vmovdqa XMMWORD[48+rsp],xmm9 2775 add edx,edi 2776 and esi,eax 2777 vpsrld xmm8,xmm4,31 2778 xor eax,ebx 2779 add edx,ebp 2780 shrd ebp,ebp,7 2781 xor esi,ebx 2782 vpslldq xmm10,xmm4,12 2783 vpaddd xmm4,xmm4,xmm4 2784 mov edi,edx 2785 add ecx,DWORD[8+rsp] 2786 xor ebp,eax 2787 shld edx,edx,5 2788 vpsrld xmm9,xmm10,30 2789 vpor xmm4,xmm4,xmm8 2790 add ecx,esi 2791 and edi,ebp 2792 xor ebp,eax 2793 add ecx,edx 2794 vpslld xmm10,xmm10,2 2795 vpxor xmm4,xmm4,xmm9 2796 shrd edx,edx,7 2797 xor edi,eax 2798 mov esi,ecx 2799 add ebx,DWORD[12+rsp] 2800 vpxor xmm4,xmm4,xmm10 2801 xor edx,ebp 2802 shld ecx,ecx,5 2803 add ebx,edi 2804 and esi,edx 2805 xor edx,ebp 2806 add ebx,ecx 2807 shrd ecx,ecx,7 2808 xor esi,ebp 2809 vpalignr xmm5,xmm2,xmm1,8 2810 mov edi,ebx 2811 add eax,DWORD[16+rsp] 2812 vpaddd xmm9,xmm11,xmm4 2813 xor ecx,edx 2814 shld ebx,ebx,5 2815 vpsrldq xmm8,xmm4,4 2816 add eax,esi 2817 and edi,ecx 2818 vpxor xmm5,xmm5,xmm1 2819 xor ecx,edx 2820 add eax,ebx 2821 vpxor xmm8,xmm8,xmm3 2822 shrd ebx,ebx,7 2823 xor edi,edx 2824 mov esi,eax 2825 add ebp,DWORD[20+rsp] 2826 vpxor xmm5,xmm5,xmm8 2827 xor ebx,ecx 2828 shld eax,eax,5 2829 vmovdqa XMMWORD[rsp],xmm9 2830 add ebp,edi 2831 and esi,ebx 2832 vpsrld xmm8,xmm5,31 2833 xor ebx,ecx 2834 add ebp,eax 2835 shrd eax,eax,7 2836 xor esi,ecx 2837 vpslldq xmm10,xmm5,12 2838 vpaddd xmm5,xmm5,xmm5 2839 mov edi,ebp 2840 add edx,DWORD[24+rsp] 2841 xor eax,ebx 2842 shld ebp,ebp,5 2843 vpsrld xmm9,xmm10,30 2844 vpor xmm5,xmm5,xmm8 2845 add edx,esi 2846 and edi,eax 2847 xor eax,ebx 2848 add edx,ebp 2849 vpslld xmm10,xmm10,2 2850 vpxor xmm5,xmm5,xmm9 2851 shrd ebp,ebp,7 2852 xor edi,ebx 2853 mov esi,edx 2854 add ecx,DWORD[28+rsp] 2855 vpxor xmm5,xmm5,xmm10 2856 xor ebp,eax 2857 shld edx,edx,5 2858 vmovdqa xmm11,XMMWORD[((-32))+r14] 2859 add ecx,edi 2860 and esi,ebp 2861 xor ebp,eax 2862 add ecx,edx 2863 shrd edx,edx,7 2864 xor esi,eax 2865 vpalignr xmm6,xmm3,xmm2,8 2866 mov edi,ecx 2867 add ebx,DWORD[32+rsp] 2868 vpaddd xmm9,xmm11,xmm5 2869 xor edx,ebp 2870 shld ecx,ecx,5 2871 vpsrldq xmm8,xmm5,4 2872 add ebx,esi 2873 and edi,edx 2874 vpxor xmm6,xmm6,xmm2 2875 xor edx,ebp 2876 add ebx,ecx 2877 vpxor xmm8,xmm8,xmm4 2878 shrd ecx,ecx,7 2879 xor edi,ebp 2880 mov esi,ebx 2881 add eax,DWORD[36+rsp] 2882 vpxor xmm6,xmm6,xmm8 2883 xor ecx,edx 2884 shld ebx,ebx,5 2885 vmovdqa XMMWORD[16+rsp],xmm9 2886 add eax,edi 2887 and esi,ecx 2888 vpsrld xmm8,xmm6,31 2889 xor ecx,edx 2890 add eax,ebx 2891 shrd ebx,ebx,7 2892 xor esi,edx 2893 vpslldq xmm10,xmm6,12 2894 vpaddd xmm6,xmm6,xmm6 2895 mov edi,eax 2896 add ebp,DWORD[40+rsp] 2897 xor ebx,ecx 2898 shld eax,eax,5 2899 vpsrld xmm9,xmm10,30 2900 vpor xmm6,xmm6,xmm8 2901 add ebp,esi 2902 and edi,ebx 2903 xor ebx,ecx 2904 add ebp,eax 2905 vpslld xmm10,xmm10,2 2906 vpxor xmm6,xmm6,xmm9 2907 shrd eax,eax,7 2908 xor edi,ecx 2909 mov esi,ebp 2910 add edx,DWORD[44+rsp] 2911 vpxor xmm6,xmm6,xmm10 2912 xor eax,ebx 2913 shld ebp,ebp,5 2914 add edx,edi 2915 and esi,eax 2916 xor eax,ebx 2917 add edx,ebp 2918 shrd ebp,ebp,7 2919 xor esi,ebx 2920 vpalignr xmm7,xmm4,xmm3,8 2921 mov edi,edx 2922 add ecx,DWORD[48+rsp] 2923 vpaddd xmm9,xmm11,xmm6 2924 xor ebp,eax 2925 shld edx,edx,5 2926 vpsrldq xmm8,xmm6,4 2927 add ecx,esi 2928 and edi,ebp 2929 vpxor xmm7,xmm7,xmm3 2930 xor ebp,eax 2931 add ecx,edx 2932 vpxor xmm8,xmm8,xmm5 2933 shrd edx,edx,7 2934 xor edi,eax 2935 mov esi,ecx 2936 add ebx,DWORD[52+rsp] 2937 vpxor xmm7,xmm7,xmm8 2938 xor edx,ebp 2939 shld ecx,ecx,5 2940 vmovdqa XMMWORD[32+rsp],xmm9 2941 add ebx,edi 2942 and esi,edx 2943 vpsrld xmm8,xmm7,31 2944 xor edx,ebp 2945 add ebx,ecx 2946 shrd ecx,ecx,7 2947 xor esi,ebp 2948 vpslldq xmm10,xmm7,12 2949 vpaddd xmm7,xmm7,xmm7 2950 mov edi,ebx 2951 add eax,DWORD[56+rsp] 2952 xor ecx,edx 2953 shld ebx,ebx,5 2954 vpsrld xmm9,xmm10,30 2955 vpor xmm7,xmm7,xmm8 2956 add eax,esi 2957 and edi,ecx 2958 xor ecx,edx 2959 add eax,ebx 2960 vpslld xmm10,xmm10,2 2961 vpxor xmm7,xmm7,xmm9 2962 shrd ebx,ebx,7 2963 xor edi,edx 2964 mov esi,eax 2965 add ebp,DWORD[60+rsp] 2966 vpxor xmm7,xmm7,xmm10 2967 xor ebx,ecx 2968 shld eax,eax,5 2969 add ebp,edi 2970 and esi,ebx 2971 xor ebx,ecx 2972 add ebp,eax 2973 vpalignr xmm8,xmm7,xmm6,8 2974 vpxor xmm0,xmm0,xmm4 2975 shrd eax,eax,7 2976 xor esi,ecx 2977 mov edi,ebp 2978 add edx,DWORD[rsp] 2979 vpxor xmm0,xmm0,xmm1 2980 xor eax,ebx 2981 shld ebp,ebp,5 2982 vpaddd xmm9,xmm11,xmm7 2983 add edx,esi 2984 and edi,eax 2985 vpxor xmm0,xmm0,xmm8 2986 xor eax,ebx 2987 add edx,ebp 2988 shrd ebp,ebp,7 2989 xor edi,ebx 2990 vpsrld xmm8,xmm0,30 2991 vmovdqa XMMWORD[48+rsp],xmm9 2992 mov esi,edx 2993 add ecx,DWORD[4+rsp] 2994 xor ebp,eax 2995 shld edx,edx,5 2996 vpslld xmm0,xmm0,2 2997 add ecx,edi 2998 and esi,ebp 2999 xor ebp,eax 3000 add ecx,edx 3001 shrd edx,edx,7 3002 xor esi,eax 3003 mov edi,ecx 3004 add ebx,DWORD[8+rsp] 3005 vpor xmm0,xmm0,xmm8 3006 xor edx,ebp 3007 shld ecx,ecx,5 3008 add ebx,esi 3009 and edi,edx 3010 xor edx,ebp 3011 add ebx,ecx 3012 add eax,DWORD[12+rsp] 3013 xor edi,ebp 3014 mov esi,ebx 3015 shld ebx,ebx,5 3016 add eax,edi 3017 xor esi,edx 3018 shrd ecx,ecx,7 3019 add eax,ebx 3020 vpalignr xmm8,xmm0,xmm7,8 3021 vpxor xmm1,xmm1,xmm5 3022 add ebp,DWORD[16+rsp] 3023 xor esi,ecx 3024 mov edi,eax 3025 shld eax,eax,5 3026 vpxor xmm1,xmm1,xmm2 3027 add ebp,esi 3028 xor edi,ecx 3029 vpaddd xmm9,xmm11,xmm0 3030 shrd ebx,ebx,7 3031 add ebp,eax 3032 vpxor xmm1,xmm1,xmm8 3033 add edx,DWORD[20+rsp] 3034 xor edi,ebx 3035 mov esi,ebp 3036 shld ebp,ebp,5 3037 vpsrld xmm8,xmm1,30 3038 vmovdqa XMMWORD[rsp],xmm9 3039 add edx,edi 3040 xor esi,ebx 3041 shrd eax,eax,7 3042 add edx,ebp 3043 vpslld xmm1,xmm1,2 3044 add ecx,DWORD[24+rsp] 3045 xor esi,eax 3046 mov edi,edx 3047 shld edx,edx,5 3048 add ecx,esi 3049 xor edi,eax 3050 shrd ebp,ebp,7 3051 add ecx,edx 3052 vpor xmm1,xmm1,xmm8 3053 add ebx,DWORD[28+rsp] 3054 xor edi,ebp 3055 mov esi,ecx 3056 shld ecx,ecx,5 3057 add ebx,edi 3058 xor esi,ebp 3059 shrd edx,edx,7 3060 add ebx,ecx 3061 vpalignr xmm8,xmm1,xmm0,8 3062 vpxor xmm2,xmm2,xmm6 3063 add eax,DWORD[32+rsp] 3064 xor esi,edx 3065 mov edi,ebx 3066 shld ebx,ebx,5 3067 vpxor xmm2,xmm2,xmm3 3068 add eax,esi 3069 xor edi,edx 3070 vpaddd xmm9,xmm11,xmm1 3071 vmovdqa xmm11,XMMWORD[r14] 3072 shrd ecx,ecx,7 3073 add eax,ebx 3074 vpxor xmm2,xmm2,xmm8 3075 add ebp,DWORD[36+rsp] 3076 xor edi,ecx 3077 mov esi,eax 3078 shld eax,eax,5 3079 vpsrld xmm8,xmm2,30 3080 vmovdqa XMMWORD[16+rsp],xmm9 3081 add ebp,edi 3082 xor esi,ecx 3083 shrd ebx,ebx,7 3084 add ebp,eax 3085 vpslld xmm2,xmm2,2 3086 add edx,DWORD[40+rsp] 3087 xor esi,ebx 3088 mov edi,ebp 3089 shld ebp,ebp,5 3090 add edx,esi 3091 xor edi,ebx 3092 shrd eax,eax,7 3093 add edx,ebp 3094 vpor xmm2,xmm2,xmm8 3095 add ecx,DWORD[44+rsp] 3096 xor edi,eax 3097 mov esi,edx 3098 shld edx,edx,5 3099 add ecx,edi 3100 xor esi,eax 3101 shrd ebp,ebp,7 3102 add ecx,edx 3103 vpalignr xmm8,xmm2,xmm1,8 3104 vpxor xmm3,xmm3,xmm7 3105 add ebx,DWORD[48+rsp] 3106 xor esi,ebp 3107 mov edi,ecx 3108 shld ecx,ecx,5 3109 vpxor xmm3,xmm3,xmm4 3110 add ebx,esi 3111 xor edi,ebp 3112 vpaddd xmm9,xmm11,xmm2 3113 shrd edx,edx,7 3114 add ebx,ecx 3115 vpxor xmm3,xmm3,xmm8 3116 add eax,DWORD[52+rsp] 3117 xor edi,edx 3118 mov esi,ebx 3119 shld ebx,ebx,5 3120 vpsrld xmm8,xmm3,30 3121 vmovdqa XMMWORD[32+rsp],xmm9 3122 add eax,edi 3123 xor esi,edx 3124 shrd ecx,ecx,7 3125 add eax,ebx 3126 vpslld xmm3,xmm3,2 3127 add ebp,DWORD[56+rsp] 3128 xor esi,ecx 3129 mov edi,eax 3130 shld eax,eax,5 3131 add ebp,esi 3132 xor edi,ecx 3133 shrd ebx,ebx,7 3134 add ebp,eax 3135 vpor xmm3,xmm3,xmm8 3136 add edx,DWORD[60+rsp] 3137 xor edi,ebx 3138 mov esi,ebp 3139 shld ebp,ebp,5 3140 add edx,edi 3141 xor esi,ebx 3142 shrd eax,eax,7 3143 add edx,ebp 3144 vpalignr xmm8,xmm3,xmm2,8 3145 vpxor xmm4,xmm4,xmm0 3146 add ecx,DWORD[rsp] 3147 xor esi,eax 3148 mov edi,edx 3149 shld edx,edx,5 3150 vpxor xmm4,xmm4,xmm5 3151 add ecx,esi 3152 xor edi,eax 3153 vpaddd xmm9,xmm11,xmm3 3154 shrd ebp,ebp,7 3155 add ecx,edx 3156 vpxor xmm4,xmm4,xmm8 3157 add ebx,DWORD[4+rsp] 3158 xor edi,ebp 3159 mov esi,ecx 3160 shld ecx,ecx,5 3161 vpsrld xmm8,xmm4,30 3162 vmovdqa XMMWORD[48+rsp],xmm9 3163 add ebx,edi 3164 xor esi,ebp 3165 shrd edx,edx,7 3166 add ebx,ecx 3167 vpslld xmm4,xmm4,2 3168 add eax,DWORD[8+rsp] 3169 xor esi,edx 3170 mov edi,ebx 3171 shld ebx,ebx,5 3172 add eax,esi 3173 xor edi,edx 3174 shrd ecx,ecx,7 3175 add eax,ebx 3176 vpor xmm4,xmm4,xmm8 3177 add ebp,DWORD[12+rsp] 3178 xor edi,ecx 3179 mov esi,eax 3180 shld eax,eax,5 3181 add ebp,edi 3182 xor esi,ecx 3183 shrd ebx,ebx,7 3184 add ebp,eax 3185 vpalignr xmm8,xmm4,xmm3,8 3186 vpxor xmm5,xmm5,xmm1 3187 add edx,DWORD[16+rsp] 3188 xor esi,ebx 3189 mov edi,ebp 3190 shld ebp,ebp,5 3191 vpxor xmm5,xmm5,xmm6 3192 add edx,esi 3193 xor edi,ebx 3194 vpaddd xmm9,xmm11,xmm4 3195 shrd eax,eax,7 3196 add edx,ebp 3197 vpxor xmm5,xmm5,xmm8 3198 add ecx,DWORD[20+rsp] 3199 xor edi,eax 3200 mov esi,edx 3201 shld edx,edx,5 3202 vpsrld xmm8,xmm5,30 3203 vmovdqa XMMWORD[rsp],xmm9 3204 add ecx,edi 3205 xor esi,eax 3206 shrd ebp,ebp,7 3207 add ecx,edx 3208 vpslld xmm5,xmm5,2 3209 add ebx,DWORD[24+rsp] 3210 xor esi,ebp 3211 mov edi,ecx 3212 shld ecx,ecx,5 3213 add ebx,esi 3214 xor edi,ebp 3215 shrd edx,edx,7 3216 add ebx,ecx 3217 vpor xmm5,xmm5,xmm8 3218 add eax,DWORD[28+rsp] 3219 shrd ecx,ecx,7 3220 mov esi,ebx 3221 xor edi,edx 3222 shld ebx,ebx,5 3223 add eax,edi 3224 xor esi,ecx 3225 xor ecx,edx 3226 add eax,ebx 3227 vpalignr xmm8,xmm5,xmm4,8 3228 vpxor xmm6,xmm6,xmm2 3229 add ebp,DWORD[32+rsp] 3230 and esi,ecx 3231 xor ecx,edx 3232 shrd ebx,ebx,7 3233 vpxor xmm6,xmm6,xmm7 3234 mov edi,eax 3235 xor esi,ecx 3236 vpaddd xmm9,xmm11,xmm5 3237 shld eax,eax,5 3238 add ebp,esi 3239 vpxor xmm6,xmm6,xmm8 3240 xor edi,ebx 3241 xor ebx,ecx 3242 add ebp,eax 3243 add edx,DWORD[36+rsp] 3244 vpsrld xmm8,xmm6,30 3245 vmovdqa XMMWORD[16+rsp],xmm9 3246 and edi,ebx 3247 xor ebx,ecx 3248 shrd eax,eax,7 3249 mov esi,ebp 3250 vpslld xmm6,xmm6,2 3251 xor edi,ebx 3252 shld ebp,ebp,5 3253 add edx,edi 3254 xor esi,eax 3255 xor eax,ebx 3256 add edx,ebp 3257 add ecx,DWORD[40+rsp] 3258 and esi,eax 3259 vpor xmm6,xmm6,xmm8 3260 xor eax,ebx 3261 shrd ebp,ebp,7 3262 mov edi,edx 3263 xor esi,eax 3264 shld edx,edx,5 3265 add ecx,esi 3266 xor edi,ebp 3267 xor ebp,eax 3268 add ecx,edx 3269 add ebx,DWORD[44+rsp] 3270 and edi,ebp 3271 xor ebp,eax 3272 shrd edx,edx,7 3273 mov esi,ecx 3274 xor edi,ebp 3275 shld ecx,ecx,5 3276 add ebx,edi 3277 xor esi,edx 3278 xor edx,ebp 3279 add ebx,ecx 3280 vpalignr xmm8,xmm6,xmm5,8 3281 vpxor xmm7,xmm7,xmm3 3282 add eax,DWORD[48+rsp] 3283 and esi,edx 3284 xor edx,ebp 3285 shrd ecx,ecx,7 3286 vpxor xmm7,xmm7,xmm0 3287 mov edi,ebx 3288 xor esi,edx 3289 vpaddd xmm9,xmm11,xmm6 3290 vmovdqa xmm11,XMMWORD[32+r14] 3291 shld ebx,ebx,5 3292 add eax,esi 3293 vpxor xmm7,xmm7,xmm8 3294 xor edi,ecx 3295 xor ecx,edx 3296 add eax,ebx 3297 add ebp,DWORD[52+rsp] 3298 vpsrld xmm8,xmm7,30 3299 vmovdqa XMMWORD[32+rsp],xmm9 3300 and edi,ecx 3301 xor ecx,edx 3302 shrd ebx,ebx,7 3303 mov esi,eax 3304 vpslld xmm7,xmm7,2 3305 xor edi,ecx 3306 shld eax,eax,5 3307 add ebp,edi 3308 xor esi,ebx 3309 xor ebx,ecx 3310 add ebp,eax 3311 add edx,DWORD[56+rsp] 3312 and esi,ebx 3313 vpor xmm7,xmm7,xmm8 3314 xor ebx,ecx 3315 shrd eax,eax,7 3316 mov edi,ebp 3317 xor esi,ebx 3318 shld ebp,ebp,5 3319 add edx,esi 3320 xor edi,eax 3321 xor eax,ebx 3322 add edx,ebp 3323 add ecx,DWORD[60+rsp] 3324 and edi,eax 3325 xor eax,ebx 3326 shrd ebp,ebp,7 3327 mov esi,edx 3328 xor edi,eax 3329 shld edx,edx,5 3330 add ecx,edi 3331 xor esi,ebp 3332 xor ebp,eax 3333 add ecx,edx 3334 vpalignr xmm8,xmm7,xmm6,8 3335 vpxor xmm0,xmm0,xmm4 3336 add ebx,DWORD[rsp] 3337 and esi,ebp 3338 xor ebp,eax 3339 shrd edx,edx,7 3340 vpxor xmm0,xmm0,xmm1 3341 mov edi,ecx 3342 xor esi,ebp 3343 vpaddd xmm9,xmm11,xmm7 3344 shld ecx,ecx,5 3345 add ebx,esi 3346 vpxor xmm0,xmm0,xmm8 3347 xor edi,edx 3348 xor edx,ebp 3349 add ebx,ecx 3350 add eax,DWORD[4+rsp] 3351 vpsrld xmm8,xmm0,30 3352 vmovdqa XMMWORD[48+rsp],xmm9 3353 and edi,edx 3354 xor edx,ebp 3355 shrd ecx,ecx,7 3356 mov esi,ebx 3357 vpslld xmm0,xmm0,2 3358 xor edi,edx 3359 shld ebx,ebx,5 3360 add eax,edi 3361 xor esi,ecx 3362 xor ecx,edx 3363 add eax,ebx 3364 add ebp,DWORD[8+rsp] 3365 and esi,ecx 3366 vpor xmm0,xmm0,xmm8 3367 xor ecx,edx 3368 shrd ebx,ebx,7 3369 mov edi,eax 3370 xor esi,ecx 3371 shld eax,eax,5 3372 add ebp,esi 3373 xor edi,ebx 3374 xor ebx,ecx 3375 add ebp,eax 3376 add edx,DWORD[12+rsp] 3377 and edi,ebx 3378 xor ebx,ecx 3379 shrd eax,eax,7 3380 mov esi,ebp 3381 xor edi,ebx 3382 shld ebp,ebp,5 3383 add edx,edi 3384 xor esi,eax 3385 xor eax,ebx 3386 add edx,ebp 3387 vpalignr xmm8,xmm0,xmm7,8 3388 vpxor xmm1,xmm1,xmm5 3389 add ecx,DWORD[16+rsp] 3390 and esi,eax 3391 xor eax,ebx 3392 shrd ebp,ebp,7 3393 vpxor xmm1,xmm1,xmm2 3394 mov edi,edx 3395 xor esi,eax 3396 vpaddd xmm9,xmm11,xmm0 3397 shld edx,edx,5 3398 add ecx,esi 3399 vpxor xmm1,xmm1,xmm8 3400 xor edi,ebp 3401 xor ebp,eax 3402 add ecx,edx 3403 add ebx,DWORD[20+rsp] 3404 vpsrld xmm8,xmm1,30 3405 vmovdqa XMMWORD[rsp],xmm9 3406 and edi,ebp 3407 xor ebp,eax 3408 shrd edx,edx,7 3409 mov esi,ecx 3410 vpslld xmm1,xmm1,2 3411 xor edi,ebp 3412 shld ecx,ecx,5 3413 add ebx,edi 3414 xor esi,edx 3415 xor edx,ebp 3416 add ebx,ecx 3417 add eax,DWORD[24+rsp] 3418 and esi,edx 3419 vpor xmm1,xmm1,xmm8 3420 xor edx,ebp 3421 shrd ecx,ecx,7 3422 mov edi,ebx 3423 xor esi,edx 3424 shld ebx,ebx,5 3425 add eax,esi 3426 xor edi,ecx 3427 xor ecx,edx 3428 add eax,ebx 3429 add ebp,DWORD[28+rsp] 3430 and edi,ecx 3431 xor ecx,edx 3432 shrd ebx,ebx,7 3433 mov esi,eax 3434 xor edi,ecx 3435 shld eax,eax,5 3436 add ebp,edi 3437 xor esi,ebx 3438 xor ebx,ecx 3439 add ebp,eax 3440 vpalignr xmm8,xmm1,xmm0,8 3441 vpxor xmm2,xmm2,xmm6 3442 add edx,DWORD[32+rsp] 3443 and esi,ebx 3444 xor ebx,ecx 3445 shrd eax,eax,7 3446 vpxor xmm2,xmm2,xmm3 3447 mov edi,ebp 3448 xor esi,ebx 3449 vpaddd xmm9,xmm11,xmm1 3450 shld ebp,ebp,5 3451 add edx,esi 3452 vpxor xmm2,xmm2,xmm8 3453 xor edi,eax 3454 xor eax,ebx 3455 add edx,ebp 3456 add ecx,DWORD[36+rsp] 3457 vpsrld xmm8,xmm2,30 3458 vmovdqa XMMWORD[16+rsp],xmm9 3459 and edi,eax 3460 xor eax,ebx 3461 shrd ebp,ebp,7 3462 mov esi,edx 3463 vpslld xmm2,xmm2,2 3464 xor edi,eax 3465 shld edx,edx,5 3466 add ecx,edi 3467 xor esi,ebp 3468 xor ebp,eax 3469 add ecx,edx 3470 add ebx,DWORD[40+rsp] 3471 and esi,ebp 3472 vpor xmm2,xmm2,xmm8 3473 xor ebp,eax 3474 shrd edx,edx,7 3475 mov edi,ecx 3476 xor esi,ebp 3477 shld ecx,ecx,5 3478 add ebx,esi 3479 xor edi,edx 3480 xor edx,ebp 3481 add ebx,ecx 3482 add eax,DWORD[44+rsp] 3483 and edi,edx 3484 xor edx,ebp 3485 shrd ecx,ecx,7 3486 mov esi,ebx 3487 xor edi,edx 3488 shld ebx,ebx,5 3489 add eax,edi 3490 xor esi,edx 3491 add eax,ebx 3492 vpalignr xmm8,xmm2,xmm1,8 3493 vpxor xmm3,xmm3,xmm7 3494 add ebp,DWORD[48+rsp] 3495 xor esi,ecx 3496 mov edi,eax 3497 shld eax,eax,5 3498 vpxor xmm3,xmm3,xmm4 3499 add ebp,esi 3500 xor edi,ecx 3501 vpaddd xmm9,xmm11,xmm2 3502 shrd ebx,ebx,7 3503 add ebp,eax 3504 vpxor xmm3,xmm3,xmm8 3505 add edx,DWORD[52+rsp] 3506 xor edi,ebx 3507 mov esi,ebp 3508 shld ebp,ebp,5 3509 vpsrld xmm8,xmm3,30 3510 vmovdqa XMMWORD[32+rsp],xmm9 3511 add edx,edi 3512 xor esi,ebx 3513 shrd eax,eax,7 3514 add edx,ebp 3515 vpslld xmm3,xmm3,2 3516 add ecx,DWORD[56+rsp] 3517 xor esi,eax 3518 mov edi,edx 3519 shld edx,edx,5 3520 add ecx,esi 3521 xor edi,eax 3522 shrd ebp,ebp,7 3523 add ecx,edx 3524 vpor xmm3,xmm3,xmm8 3525 add ebx,DWORD[60+rsp] 3526 xor edi,ebp 3527 mov esi,ecx 3528 shld ecx,ecx,5 3529 add ebx,edi 3530 xor esi,ebp 3531 shrd edx,edx,7 3532 add ebx,ecx 3533 add eax,DWORD[rsp] 3534 vpaddd xmm9,xmm11,xmm3 3535 xor esi,edx 3536 mov edi,ebx 3537 shld ebx,ebx,5 3538 add eax,esi 3539 vmovdqa XMMWORD[48+rsp],xmm9 3540 xor edi,edx 3541 shrd ecx,ecx,7 3542 add eax,ebx 3543 add ebp,DWORD[4+rsp] 3544 xor edi,ecx 3545 mov esi,eax 3546 shld eax,eax,5 3547 add ebp,edi 3548 xor esi,ecx 3549 shrd ebx,ebx,7 3550 add ebp,eax 3551 add edx,DWORD[8+rsp] 3552 xor esi,ebx 3553 mov edi,ebp 3554 shld ebp,ebp,5 3555 add edx,esi 3556 xor edi,ebx 3557 shrd eax,eax,7 3558 add edx,ebp 3559 add ecx,DWORD[12+rsp] 3560 xor edi,eax 3561 mov esi,edx 3562 shld edx,edx,5 3563 add ecx,edi 3564 xor esi,eax 3565 shrd ebp,ebp,7 3566 add ecx,edx 3567 cmp r9,r10 3568 je NEAR $L$done_avx 3569 vmovdqa xmm6,XMMWORD[64+r14] 3570 vmovdqa xmm11,XMMWORD[((-64))+r14] 3571 vmovdqu xmm0,XMMWORD[r9] 3572 vmovdqu xmm1,XMMWORD[16+r9] 3573 vmovdqu xmm2,XMMWORD[32+r9] 3574 vmovdqu xmm3,XMMWORD[48+r9] 3575 vpshufb xmm0,xmm0,xmm6 3576 add r9,64 3577 add ebx,DWORD[16+rsp] 3578 xor esi,ebp 3579 vpshufb xmm1,xmm1,xmm6 3580 mov edi,ecx 3581 shld ecx,ecx,5 3582 vpaddd xmm4,xmm0,xmm11 3583 add ebx,esi 3584 xor edi,ebp 3585 shrd edx,edx,7 3586 add ebx,ecx 3587 vmovdqa XMMWORD[rsp],xmm4 3588 add eax,DWORD[20+rsp] 3589 xor edi,edx 3590 mov esi,ebx 3591 shld ebx,ebx,5 3592 add eax,edi 3593 xor esi,edx 3594 shrd ecx,ecx,7 3595 add eax,ebx 3596 add ebp,DWORD[24+rsp] 3597 xor esi,ecx 3598 mov edi,eax 3599 shld eax,eax,5 3600 add ebp,esi 3601 xor edi,ecx 3602 shrd ebx,ebx,7 3603 add ebp,eax 3604 add edx,DWORD[28+rsp] 3605 xor edi,ebx 3606 mov esi,ebp 3607 shld ebp,ebp,5 3608 add edx,edi 3609 xor esi,ebx 3610 shrd eax,eax,7 3611 add edx,ebp 3612 add ecx,DWORD[32+rsp] 3613 xor esi,eax 3614 vpshufb xmm2,xmm2,xmm6 3615 mov edi,edx 3616 shld edx,edx,5 3617 vpaddd xmm5,xmm1,xmm11 3618 add ecx,esi 3619 xor edi,eax 3620 shrd ebp,ebp,7 3621 add ecx,edx 3622 vmovdqa XMMWORD[16+rsp],xmm5 3623 add ebx,DWORD[36+rsp] 3624 xor edi,ebp 3625 mov esi,ecx 3626 shld ecx,ecx,5 3627 add ebx,edi 3628 xor esi,ebp 3629 shrd edx,edx,7 3630 add ebx,ecx 3631 add eax,DWORD[40+rsp] 3632 xor esi,edx 3633 mov edi,ebx 3634 shld ebx,ebx,5 3635 add eax,esi 3636 xor edi,edx 3637 shrd ecx,ecx,7 3638 add eax,ebx 3639 add ebp,DWORD[44+rsp] 3640 xor edi,ecx 3641 mov esi,eax 3642 shld eax,eax,5 3643 add ebp,edi 3644 xor esi,ecx 3645 shrd ebx,ebx,7 3646 add ebp,eax 3647 add edx,DWORD[48+rsp] 3648 xor esi,ebx 3649 vpshufb xmm3,xmm3,xmm6 3650 mov edi,ebp 3651 shld ebp,ebp,5 3652 vpaddd xmm6,xmm2,xmm11 3653 add edx,esi 3654 xor edi,ebx 3655 shrd eax,eax,7 3656 add edx,ebp 3657 vmovdqa XMMWORD[32+rsp],xmm6 3658 add ecx,DWORD[52+rsp] 3659 xor edi,eax 3660 mov esi,edx 3661 shld edx,edx,5 3662 add ecx,edi 3663 xor esi,eax 3664 shrd ebp,ebp,7 3665 add ecx,edx 3666 add ebx,DWORD[56+rsp] 3667 xor esi,ebp 3668 mov edi,ecx 3669 shld ecx,ecx,5 3670 add ebx,esi 3671 xor edi,ebp 3672 shrd edx,edx,7 3673 add ebx,ecx 3674 add eax,DWORD[60+rsp] 3675 xor edi,edx 3676 mov esi,ebx 3677 shld ebx,ebx,5 3678 add eax,edi 3679 shrd ecx,ecx,7 3680 add eax,ebx 3681 add eax,DWORD[r8] 3682 add esi,DWORD[4+r8] 3683 add ecx,DWORD[8+r8] 3684 add edx,DWORD[12+r8] 3685 mov DWORD[r8],eax 3686 add ebp,DWORD[16+r8] 3687 mov DWORD[4+r8],esi 3688 mov ebx,esi 3689 mov DWORD[8+r8],ecx 3690 mov edi,ecx 3691 mov DWORD[12+r8],edx 3692 xor edi,edx 3693 mov DWORD[16+r8],ebp 3694 and esi,edi 3695 jmp NEAR $L$oop_avx 3696 3697 ALIGN 16 3698 $L$done_avx: 3699 add ebx,DWORD[16+rsp] 3700 xor esi,ebp 3701 mov edi,ecx 3702 shld ecx,ecx,5 3703 add ebx,esi 3704 xor edi,ebp 3705 shrd edx,edx,7 3706 add ebx,ecx 3707 add eax,DWORD[20+rsp] 3708 xor edi,edx 3709 mov esi,ebx 3710 shld ebx,ebx,5 3711 add eax,edi 3712 xor esi,edx 3713 shrd ecx,ecx,7 3714 add eax,ebx 3715 add ebp,DWORD[24+rsp] 3716 xor esi,ecx 3717 mov edi,eax 3718 shld eax,eax,5 3719 add ebp,esi 3720 xor edi,ecx 3721 shrd ebx,ebx,7 3722 add ebp,eax 3723 add edx,DWORD[28+rsp] 3724 xor edi,ebx 3725 mov esi,ebp 3726 shld ebp,ebp,5 3727 add edx,edi 3728 xor esi,ebx 3729 shrd eax,eax,7 3730 add edx,ebp 3731 add ecx,DWORD[32+rsp] 3732 xor esi,eax 3733 mov edi,edx 3734 shld edx,edx,5 3735 add ecx,esi 3736 xor edi,eax 3737 shrd ebp,ebp,7 3738 add ecx,edx 3739 add ebx,DWORD[36+rsp] 3740 xor edi,ebp 3741 mov esi,ecx 3742 shld ecx,ecx,5 3743 add ebx,edi 3744 xor esi,ebp 3745 shrd edx,edx,7 3746 add ebx,ecx 3747 add eax,DWORD[40+rsp] 3748 xor esi,edx 3749 mov edi,ebx 3750 shld ebx,ebx,5 3751 add eax,esi 3752 xor edi,edx 3753 shrd ecx,ecx,7 3754 add eax,ebx 3755 add ebp,DWORD[44+rsp] 3756 xor edi,ecx 3757 mov esi,eax 3758 shld eax,eax,5 3759 add ebp,edi 3760 xor esi,ecx 3761 shrd ebx,ebx,7 3762 add ebp,eax 3763 add edx,DWORD[48+rsp] 3764 xor esi,ebx 3765 mov edi,ebp 3766 shld ebp,ebp,5 3767 add edx,esi 3768 xor edi,ebx 3769 shrd eax,eax,7 3770 add edx,ebp 3771 add ecx,DWORD[52+rsp] 3772 xor edi,eax 3773 mov esi,edx 3774 shld edx,edx,5 3775 add ecx,edi 3776 xor esi,eax 3777 shrd ebp,ebp,7 3778 add ecx,edx 3779 add ebx,DWORD[56+rsp] 3780 xor esi,ebp 3781 mov edi,ecx 3782 shld ecx,ecx,5 3783 add ebx,esi 3784 xor edi,ebp 3785 shrd edx,edx,7 3786 add ebx,ecx 3787 add eax,DWORD[60+rsp] 3788 xor edi,edx 3789 mov esi,ebx 3790 shld ebx,ebx,5 3791 add eax,edi 3792 shrd ecx,ecx,7 3793 add eax,ebx 3794 vzeroupper 3795 3796 add eax,DWORD[r8] 3797 add esi,DWORD[4+r8] 3798 add ecx,DWORD[8+r8] 3799 mov DWORD[r8],eax 3800 add edx,DWORD[12+r8] 3801 mov DWORD[4+r8],esi 3802 add ebp,DWORD[16+r8] 3803 mov DWORD[8+r8],ecx 3804 mov DWORD[12+r8],edx 3805 mov DWORD[16+r8],ebp 3806 movaps xmm6,XMMWORD[((-40-96))+r11] 3807 movaps xmm7,XMMWORD[((-40-80))+r11] 3808 movaps xmm8,XMMWORD[((-40-64))+r11] 3809 movaps xmm9,XMMWORD[((-40-48))+r11] 3810 movaps xmm10,XMMWORD[((-40-32))+r11] 3811 movaps xmm11,XMMWORD[((-40-16))+r11] 3812 mov r14,QWORD[((-40))+r11] 3813 3814 mov r13,QWORD[((-32))+r11] 3815 3816 mov r12,QWORD[((-24))+r11] 3817 3818 mov rbp,QWORD[((-16))+r11] 3819 3820 mov rbx,QWORD[((-8))+r11] 3821 3822 lea rsp,[r11] 3823 3824 $L$epilogue_avx: 3825 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 3826 mov rsi,QWORD[16+rsp] 3827 DB 0F3h,0C3h ;repret 3828 3829 $L$SEH_end_sha1_block_data_order_avx: 3830 3831 ALIGN 16 3832 sha1_block_data_order_avx2: 3833 mov QWORD[8+rsp],rdi ;WIN64 prologue 3834 mov QWORD[16+rsp],rsi 3835 mov rax,rsp 3836 $L$SEH_begin_sha1_block_data_order_avx2: 3837 mov rdi,rcx 3838 mov rsi,rdx 3839 mov rdx,r8 3840 3841 3842 _avx2_shortcut: 3843 3844 mov r11,rsp 3845 3846 push rbx 3847 3848 push rbp 3849 3850 push r12 3851 3852 push r13 3853 3854 push r14 3855 3856 vzeroupper 3857 lea rsp,[((-96))+rsp] 3858 vmovaps XMMWORD[(-40-96)+r11],xmm6 3859 vmovaps XMMWORD[(-40-80)+r11],xmm7 3860 vmovaps XMMWORD[(-40-64)+r11],xmm8 3861 vmovaps XMMWORD[(-40-48)+r11],xmm9 3862 vmovaps XMMWORD[(-40-32)+r11],xmm10 3863 vmovaps XMMWORD[(-40-16)+r11],xmm11 3864 $L$prologue_avx2: 3865 mov r8,rdi 3866 mov r9,rsi 3867 mov r10,rdx 3868 3869 lea rsp,[((-640))+rsp] 3870 shl r10,6 3871 lea r13,[64+r9] 3872 and rsp,-128 3873 add r10,r9 3874 lea r14,[((K_XX_XX+64))] 3875 3876 mov eax,DWORD[r8] 3877 cmp r13,r10 3878 cmovae r13,r9 3879 mov ebp,DWORD[4+r8] 3880 mov ecx,DWORD[8+r8] 3881 mov edx,DWORD[12+r8] 3882 mov esi,DWORD[16+r8] 3883 vmovdqu ymm6,YMMWORD[64+r14] 3884 3885 vmovdqu xmm0,XMMWORD[r9] 3886 vmovdqu xmm1,XMMWORD[16+r9] 3887 vmovdqu xmm2,XMMWORD[32+r9] 3888 vmovdqu xmm3,XMMWORD[48+r9] 3889 lea r9,[64+r9] 3890 vinserti128 ymm0,ymm0,XMMWORD[r13],1 3891 vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 3892 vpshufb ymm0,ymm0,ymm6 3893 vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 3894 vpshufb ymm1,ymm1,ymm6 3895 vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 3896 vpshufb ymm2,ymm2,ymm6 3897 vmovdqu ymm11,YMMWORD[((-64))+r14] 3898 vpshufb ymm3,ymm3,ymm6 3899 3900 vpaddd ymm4,ymm0,ymm11 3901 vpaddd ymm5,ymm1,ymm11 3902 vmovdqu YMMWORD[rsp],ymm4 3903 vpaddd ymm6,ymm2,ymm11 3904 vmovdqu YMMWORD[32+rsp],ymm5 3905 vpaddd ymm7,ymm3,ymm11 3906 vmovdqu YMMWORD[64+rsp],ymm6 3907 vmovdqu YMMWORD[96+rsp],ymm7 3908 vpalignr ymm4,ymm1,ymm0,8 3909 vpsrldq ymm8,ymm3,4 3910 vpxor ymm4,ymm4,ymm0 3911 vpxor ymm8,ymm8,ymm2 3912 vpxor ymm4,ymm4,ymm8 3913 vpsrld ymm8,ymm4,31 3914 vpslldq ymm10,ymm4,12 3915 vpaddd ymm4,ymm4,ymm4 3916 vpsrld ymm9,ymm10,30 3917 vpor ymm4,ymm4,ymm8 3918 vpslld ymm10,ymm10,2 3919 vpxor ymm4,ymm4,ymm9 3920 vpxor ymm4,ymm4,ymm10 3921 vpaddd ymm9,ymm4,ymm11 3922 vmovdqu YMMWORD[128+rsp],ymm9 3923 vpalignr ymm5,ymm2,ymm1,8 3924 vpsrldq ymm8,ymm4,4 3925 vpxor ymm5,ymm5,ymm1 3926 vpxor ymm8,ymm8,ymm3 3927 vpxor ymm5,ymm5,ymm8 3928 vpsrld ymm8,ymm5,31 3929 vmovdqu ymm11,YMMWORD[((-32))+r14] 3930 vpslldq ymm10,ymm5,12 3931 vpaddd ymm5,ymm5,ymm5 3932 vpsrld ymm9,ymm10,30 3933 vpor ymm5,ymm5,ymm8 3934 vpslld ymm10,ymm10,2 3935 vpxor ymm5,ymm5,ymm9 3936 vpxor ymm5,ymm5,ymm10 3937 vpaddd ymm9,ymm5,ymm11 3938 vmovdqu YMMWORD[160+rsp],ymm9 3939 vpalignr ymm6,ymm3,ymm2,8 3940 vpsrldq ymm8,ymm5,4 3941 vpxor ymm6,ymm6,ymm2 3942 vpxor ymm8,ymm8,ymm4 3943 vpxor ymm6,ymm6,ymm8 3944 vpsrld ymm8,ymm6,31 3945 vpslldq ymm10,ymm6,12 3946 vpaddd ymm6,ymm6,ymm6 3947 vpsrld ymm9,ymm10,30 3948 vpor ymm6,ymm6,ymm8 3949 vpslld ymm10,ymm10,2 3950 vpxor ymm6,ymm6,ymm9 3951 vpxor ymm6,ymm6,ymm10 3952 vpaddd ymm9,ymm6,ymm11 3953 vmovdqu YMMWORD[192+rsp],ymm9 3954 vpalignr ymm7,ymm4,ymm3,8 3955 vpsrldq ymm8,ymm6,4 3956 vpxor ymm7,ymm7,ymm3 3957 vpxor ymm8,ymm8,ymm5 3958 vpxor ymm7,ymm7,ymm8 3959 vpsrld ymm8,ymm7,31 3960 vpslldq ymm10,ymm7,12 3961 vpaddd ymm7,ymm7,ymm7 3962 vpsrld ymm9,ymm10,30 3963 vpor ymm7,ymm7,ymm8 3964 vpslld ymm10,ymm10,2 3965 vpxor ymm7,ymm7,ymm9 3966 vpxor ymm7,ymm7,ymm10 3967 vpaddd ymm9,ymm7,ymm11 3968 vmovdqu YMMWORD[224+rsp],ymm9 3969 lea r13,[128+rsp] 3970 jmp NEAR $L$oop_avx2 3971 ALIGN 32 3972 $L$oop_avx2: 3973 rorx ebx,ebp,2 3974 andn edi,ebp,edx 3975 and ebp,ecx 3976 xor ebp,edi 3977 jmp NEAR $L$align32_1 3978 ALIGN 32 3979 $L$align32_1: 3980 vpalignr ymm8,ymm7,ymm6,8 3981 vpxor ymm0,ymm0,ymm4 3982 add esi,DWORD[((-128))+r13] 3983 andn edi,eax,ecx 3984 vpxor ymm0,ymm0,ymm1 3985 add esi,ebp 3986 rorx r12d,eax,27 3987 rorx ebp,eax,2 3988 vpxor ymm0,ymm0,ymm8 3989 and eax,ebx 3990 add esi,r12d 3991 xor eax,edi 3992 vpsrld ymm8,ymm0,30 3993 vpslld ymm0,ymm0,2 3994 add edx,DWORD[((-124))+r13] 3995 andn edi,esi,ebx 3996 add edx,eax 3997 rorx r12d,esi,27 3998 rorx eax,esi,2 3999 and esi,ebp 4000 vpor ymm0,ymm0,ymm8 4001 add edx,r12d 4002 xor esi,edi 4003 add ecx,DWORD[((-120))+r13] 4004 andn edi,edx,ebp 4005 vpaddd ymm9,ymm0,ymm11 4006 add ecx,esi 4007 rorx r12d,edx,27 4008 rorx esi,edx,2 4009 and edx,eax 4010 vmovdqu YMMWORD[256+rsp],ymm9 4011 add ecx,r12d 4012 xor edx,edi 4013 add ebx,DWORD[((-116))+r13] 4014 andn edi,ecx,eax 4015 add ebx,edx 4016 rorx r12d,ecx,27 4017 rorx edx,ecx,2 4018 and ecx,esi 4019 add ebx,r12d 4020 xor ecx,edi 4021 add ebp,DWORD[((-96))+r13] 4022 andn edi,ebx,esi 4023 add ebp,ecx 4024 rorx r12d,ebx,27 4025 rorx ecx,ebx,2 4026 and ebx,edx 4027 add ebp,r12d 4028 xor ebx,edi 4029 vpalignr ymm8,ymm0,ymm7,8 4030 vpxor ymm1,ymm1,ymm5 4031 add eax,DWORD[((-92))+r13] 4032 andn edi,ebp,edx 4033 vpxor ymm1,ymm1,ymm2 4034 add eax,ebx 4035 rorx r12d,ebp,27 4036 rorx ebx,ebp,2 4037 vpxor ymm1,ymm1,ymm8 4038 and ebp,ecx 4039 add eax,r12d 4040 xor ebp,edi 4041 vpsrld ymm8,ymm1,30 4042 vpslld ymm1,ymm1,2 4043 add esi,DWORD[((-88))+r13] 4044 andn edi,eax,ecx 4045 add esi,ebp 4046 rorx r12d,eax,27 4047 rorx ebp,eax,2 4048 and eax,ebx 4049 vpor ymm1,ymm1,ymm8 4050 add esi,r12d 4051 xor eax,edi 4052 add edx,DWORD[((-84))+r13] 4053 andn edi,esi,ebx 4054 vpaddd ymm9,ymm1,ymm11 4055 add edx,eax 4056 rorx r12d,esi,27 4057 rorx eax,esi,2 4058 and esi,ebp 4059 vmovdqu YMMWORD[288+rsp],ymm9 4060 add edx,r12d 4061 xor esi,edi 4062 add ecx,DWORD[((-64))+r13] 4063 andn edi,edx,ebp 4064 add ecx,esi 4065 rorx r12d,edx,27 4066 rorx esi,edx,2 4067 and edx,eax 4068 add ecx,r12d 4069 xor edx,edi 4070 add ebx,DWORD[((-60))+r13] 4071 andn edi,ecx,eax 4072 add ebx,edx 4073 rorx r12d,ecx,27 4074 rorx edx,ecx,2 4075 and ecx,esi 4076 add ebx,r12d 4077 xor ecx,edi 4078 vpalignr ymm8,ymm1,ymm0,8 4079 vpxor ymm2,ymm2,ymm6 4080 add ebp,DWORD[((-56))+r13] 4081 andn edi,ebx,esi 4082 vpxor ymm2,ymm2,ymm3 4083 vmovdqu ymm11,YMMWORD[r14] 4084 add ebp,ecx 4085 rorx r12d,ebx,27 4086 rorx ecx,ebx,2 4087 vpxor ymm2,ymm2,ymm8 4088 and ebx,edx 4089 add ebp,r12d 4090 xor ebx,edi 4091 vpsrld ymm8,ymm2,30 4092 vpslld ymm2,ymm2,2 4093 add eax,DWORD[((-52))+r13] 4094 andn edi,ebp,edx 4095 add eax,ebx 4096 rorx r12d,ebp,27 4097 rorx ebx,ebp,2 4098 and ebp,ecx 4099 vpor ymm2,ymm2,ymm8 4100 add eax,r12d 4101 xor ebp,edi 4102 add esi,DWORD[((-32))+r13] 4103 andn edi,eax,ecx 4104 vpaddd ymm9,ymm2,ymm11 4105 add esi,ebp 4106 rorx r12d,eax,27 4107 rorx ebp,eax,2 4108 and eax,ebx 4109 vmovdqu YMMWORD[320+rsp],ymm9 4110 add esi,r12d 4111 xor eax,edi 4112 add edx,DWORD[((-28))+r13] 4113 andn edi,esi,ebx 4114 add edx,eax 4115 rorx r12d,esi,27 4116 rorx eax,esi,2 4117 and esi,ebp 4118 add edx,r12d 4119 xor esi,edi 4120 add ecx,DWORD[((-24))+r13] 4121 andn edi,edx,ebp 4122 add ecx,esi 4123 rorx r12d,edx,27 4124 rorx esi,edx,2 4125 and edx,eax 4126 add ecx,r12d 4127 xor edx,edi 4128 vpalignr ymm8,ymm2,ymm1,8 4129 vpxor ymm3,ymm3,ymm7 4130 add ebx,DWORD[((-20))+r13] 4131 andn edi,ecx,eax 4132 vpxor ymm3,ymm3,ymm4 4133 add ebx,edx 4134 rorx r12d,ecx,27 4135 rorx edx,ecx,2 4136 vpxor ymm3,ymm3,ymm8 4137 and ecx,esi 4138 add ebx,r12d 4139 xor ecx,edi 4140 vpsrld ymm8,ymm3,30 4141 vpslld ymm3,ymm3,2 4142 add ebp,DWORD[r13] 4143 andn edi,ebx,esi 4144 add ebp,ecx 4145 rorx r12d,ebx,27 4146 rorx ecx,ebx,2 4147 and ebx,edx 4148 vpor ymm3,ymm3,ymm8 4149 add ebp,r12d 4150 xor ebx,edi 4151 add eax,DWORD[4+r13] 4152 andn edi,ebp,edx 4153 vpaddd ymm9,ymm3,ymm11 4154 add eax,ebx 4155 rorx r12d,ebp,27 4156 rorx ebx,ebp,2 4157 and ebp,ecx 4158 vmovdqu YMMWORD[352+rsp],ymm9 4159 add eax,r12d 4160 xor ebp,edi 4161 add esi,DWORD[8+r13] 4162 andn edi,eax,ecx 4163 add esi,ebp 4164 rorx r12d,eax,27 4165 rorx ebp,eax,2 4166 and eax,ebx 4167 add esi,r12d 4168 xor eax,edi 4169 add edx,DWORD[12+r13] 4170 lea edx,[rax*1+rdx] 4171 rorx r12d,esi,27 4172 rorx eax,esi,2 4173 xor esi,ebp 4174 add edx,r12d 4175 xor esi,ebx 4176 vpalignr ymm8,ymm3,ymm2,8 4177 vpxor ymm4,ymm4,ymm0 4178 add ecx,DWORD[32+r13] 4179 lea ecx,[rsi*1+rcx] 4180 vpxor ymm4,ymm4,ymm5 4181 rorx r12d,edx,27 4182 rorx esi,edx,2 4183 xor edx,eax 4184 vpxor ymm4,ymm4,ymm8 4185 add ecx,r12d 4186 xor edx,ebp 4187 add ebx,DWORD[36+r13] 4188 vpsrld ymm8,ymm4,30 4189 vpslld ymm4,ymm4,2 4190 lea ebx,[rdx*1+rbx] 4191 rorx r12d,ecx,27 4192 rorx edx,ecx,2 4193 xor ecx,esi 4194 add ebx,r12d 4195 xor ecx,eax 4196 vpor ymm4,ymm4,ymm8 4197 add ebp,DWORD[40+r13] 4198 lea ebp,[rbp*1+rcx] 4199 rorx r12d,ebx,27 4200 rorx ecx,ebx,2 4201 vpaddd ymm9,ymm4,ymm11 4202 xor ebx,edx 4203 add ebp,r12d 4204 xor ebx,esi 4205 add eax,DWORD[44+r13] 4206 vmovdqu YMMWORD[384+rsp],ymm9 4207 lea eax,[rbx*1+rax] 4208 rorx r12d,ebp,27 4209 rorx ebx,ebp,2 4210 xor ebp,ecx 4211 add eax,r12d 4212 xor ebp,edx 4213 add esi,DWORD[64+r13] 4214 lea esi,[rbp*1+rsi] 4215 rorx r12d,eax,27 4216 rorx ebp,eax,2 4217 xor eax,ebx 4218 add esi,r12d 4219 xor eax,ecx 4220 vpalignr ymm8,ymm4,ymm3,8 4221 vpxor ymm5,ymm5,ymm1 4222 add edx,DWORD[68+r13] 4223 lea edx,[rax*1+rdx] 4224 vpxor ymm5,ymm5,ymm6 4225 rorx r12d,esi,27 4226 rorx eax,esi,2 4227 xor esi,ebp 4228 vpxor ymm5,ymm5,ymm8 4229 add edx,r12d 4230 xor esi,ebx 4231 add ecx,DWORD[72+r13] 4232 vpsrld ymm8,ymm5,30 4233 vpslld ymm5,ymm5,2 4234 lea ecx,[rsi*1+rcx] 4235 rorx r12d,edx,27 4236 rorx esi,edx,2 4237 xor edx,eax 4238 add ecx,r12d 4239 xor edx,ebp 4240 vpor ymm5,ymm5,ymm8 4241 add ebx,DWORD[76+r13] 4242 lea ebx,[rdx*1+rbx] 4243 rorx r12d,ecx,27 4244 rorx edx,ecx,2 4245 vpaddd ymm9,ymm5,ymm11 4246 xor ecx,esi 4247 add ebx,r12d 4248 xor ecx,eax 4249 add ebp,DWORD[96+r13] 4250 vmovdqu YMMWORD[416+rsp],ymm9 4251 lea ebp,[rbp*1+rcx] 4252 rorx r12d,ebx,27 4253 rorx ecx,ebx,2 4254 xor ebx,edx 4255 add ebp,r12d 4256 xor ebx,esi 4257 add eax,DWORD[100+r13] 4258 lea eax,[rbx*1+rax] 4259 rorx r12d,ebp,27 4260 rorx ebx,ebp,2 4261 xor ebp,ecx 4262 add eax,r12d 4263 xor ebp,edx 4264 vpalignr ymm8,ymm5,ymm4,8 4265 vpxor ymm6,ymm6,ymm2 4266 add esi,DWORD[104+r13] 4267 lea esi,[rbp*1+rsi] 4268 vpxor ymm6,ymm6,ymm7 4269 rorx r12d,eax,27 4270 rorx ebp,eax,2 4271 xor eax,ebx 4272 vpxor ymm6,ymm6,ymm8 4273 add esi,r12d 4274 xor eax,ecx 4275 add edx,DWORD[108+r13] 4276 lea r13,[256+r13] 4277 vpsrld ymm8,ymm6,30 4278 vpslld ymm6,ymm6,2 4279 lea edx,[rax*1+rdx] 4280 rorx r12d,esi,27 4281 rorx eax,esi,2 4282 xor esi,ebp 4283 add edx,r12d 4284 xor esi,ebx 4285 vpor ymm6,ymm6,ymm8 4286 add ecx,DWORD[((-128))+r13] 4287 lea ecx,[rsi*1+rcx] 4288 rorx r12d,edx,27 4289 rorx esi,edx,2 4290 vpaddd ymm9,ymm6,ymm11 4291 xor edx,eax 4292 add ecx,r12d 4293 xor edx,ebp 4294 add ebx,DWORD[((-124))+r13] 4295 vmovdqu YMMWORD[448+rsp],ymm9 4296 lea ebx,[rdx*1+rbx] 4297 rorx r12d,ecx,27 4298 rorx edx,ecx,2 4299 xor ecx,esi 4300 add ebx,r12d 4301 xor ecx,eax 4302 add ebp,DWORD[((-120))+r13] 4303 lea ebp,[rbp*1+rcx] 4304 rorx r12d,ebx,27 4305 rorx ecx,ebx,2 4306 xor ebx,edx 4307 add ebp,r12d 4308 xor ebx,esi 4309 vpalignr ymm8,ymm6,ymm5,8 4310 vpxor ymm7,ymm7,ymm3 4311 add eax,DWORD[((-116))+r13] 4312 lea eax,[rbx*1+rax] 4313 vpxor ymm7,ymm7,ymm0 4314 vmovdqu ymm11,YMMWORD[32+r14] 4315 rorx r12d,ebp,27 4316 rorx ebx,ebp,2 4317 xor ebp,ecx 4318 vpxor ymm7,ymm7,ymm8 4319 add eax,r12d 4320 xor ebp,edx 4321 add esi,DWORD[((-96))+r13] 4322 vpsrld ymm8,ymm7,30 4323 vpslld ymm7,ymm7,2 4324 lea esi,[rbp*1+rsi] 4325 rorx r12d,eax,27 4326 rorx ebp,eax,2 4327 xor eax,ebx 4328 add esi,r12d 4329 xor eax,ecx 4330 vpor ymm7,ymm7,ymm8 4331 add edx,DWORD[((-92))+r13] 4332 lea edx,[rax*1+rdx] 4333 rorx r12d,esi,27 4334 rorx eax,esi,2 4335 vpaddd ymm9,ymm7,ymm11 4336 xor esi,ebp 4337 add edx,r12d 4338 xor esi,ebx 4339 add ecx,DWORD[((-88))+r13] 4340 vmovdqu YMMWORD[480+rsp],ymm9 4341 lea ecx,[rsi*1+rcx] 4342 rorx r12d,edx,27 4343 rorx esi,edx,2 4344 xor edx,eax 4345 add ecx,r12d 4346 xor edx,ebp 4347 add ebx,DWORD[((-84))+r13] 4348 mov edi,esi 4349 xor edi,eax 4350 lea ebx,[rdx*1+rbx] 4351 rorx r12d,ecx,27 4352 rorx edx,ecx,2 4353 xor ecx,esi 4354 add ebx,r12d 4355 and ecx,edi 4356 jmp NEAR $L$align32_2 4357 ALIGN 32 4358 $L$align32_2: 4359 vpalignr ymm8,ymm7,ymm6,8 4360 vpxor ymm0,ymm0,ymm4 4361 add ebp,DWORD[((-64))+r13] 4362 xor ecx,esi 4363 vpxor ymm0,ymm0,ymm1 4364 mov edi,edx 4365 xor edi,esi 4366 lea ebp,[rbp*1+rcx] 4367 vpxor ymm0,ymm0,ymm8 4368 rorx r12d,ebx,27 4369 rorx ecx,ebx,2 4370 xor ebx,edx 4371 vpsrld ymm8,ymm0,30 4372 vpslld ymm0,ymm0,2 4373 add ebp,r12d 4374 and ebx,edi 4375 add eax,DWORD[((-60))+r13] 4376 xor ebx,edx 4377 mov edi,ecx 4378 xor edi,edx 4379 vpor ymm0,ymm0,ymm8 4380 lea eax,[rbx*1+rax] 4381 rorx r12d,ebp,27 4382 rorx ebx,ebp,2 4383 xor ebp,ecx 4384 vpaddd ymm9,ymm0,ymm11 4385 add eax,r12d 4386 and ebp,edi 4387 add esi,DWORD[((-56))+r13] 4388 xor ebp,ecx 4389 vmovdqu YMMWORD[512+rsp],ymm9 4390 mov edi,ebx 4391 xor edi,ecx 4392 lea esi,[rbp*1+rsi] 4393 rorx r12d,eax,27 4394 rorx ebp,eax,2 4395 xor eax,ebx 4396 add esi,r12d 4397 and eax,edi 4398 add edx,DWORD[((-52))+r13] 4399 xor eax,ebx 4400 mov edi,ebp 4401 xor edi,ebx 4402 lea edx,[rax*1+rdx] 4403 rorx r12d,esi,27 4404 rorx eax,esi,2 4405 xor esi,ebp 4406 add edx,r12d 4407 and esi,edi 4408 add ecx,DWORD[((-32))+r13] 4409 xor esi,ebp 4410 mov edi,eax 4411 xor edi,ebp 4412 lea ecx,[rsi*1+rcx] 4413 rorx r12d,edx,27 4414 rorx esi,edx,2 4415 xor edx,eax 4416 add ecx,r12d 4417 and edx,edi 4418 vpalignr ymm8,ymm0,ymm7,8 4419 vpxor ymm1,ymm1,ymm5 4420 add ebx,DWORD[((-28))+r13] 4421 xor edx,eax 4422 vpxor ymm1,ymm1,ymm2 4423 mov edi,esi 4424 xor edi,eax 4425 lea ebx,[rdx*1+rbx] 4426 vpxor ymm1,ymm1,ymm8 4427 rorx r12d,ecx,27 4428 rorx edx,ecx,2 4429 xor ecx,esi 4430 vpsrld ymm8,ymm1,30 4431 vpslld ymm1,ymm1,2 4432 add ebx,r12d 4433 and ecx,edi 4434 add ebp,DWORD[((-24))+r13] 4435 xor ecx,esi 4436 mov edi,edx 4437 xor edi,esi 4438 vpor ymm1,ymm1,ymm8 4439 lea ebp,[rbp*1+rcx] 4440 rorx r12d,ebx,27 4441 rorx ecx,ebx,2 4442 xor ebx,edx 4443 vpaddd ymm9,ymm1,ymm11 4444 add ebp,r12d 4445 and ebx,edi 4446 add eax,DWORD[((-20))+r13] 4447 xor ebx,edx 4448 vmovdqu YMMWORD[544+rsp],ymm9 4449 mov edi,ecx 4450 xor edi,edx 4451 lea eax,[rbx*1+rax] 4452 rorx r12d,ebp,27 4453 rorx ebx,ebp,2 4454 xor ebp,ecx 4455 add eax,r12d 4456 and ebp,edi 4457 add esi,DWORD[r13] 4458 xor ebp,ecx 4459 mov edi,ebx 4460 xor edi,ecx 4461 lea esi,[rbp*1+rsi] 4462 rorx r12d,eax,27 4463 rorx ebp,eax,2 4464 xor eax,ebx 4465 add esi,r12d 4466 and eax,edi 4467 add edx,DWORD[4+r13] 4468 xor eax,ebx 4469 mov edi,ebp 4470 xor edi,ebx 4471 lea edx,[rax*1+rdx] 4472 rorx r12d,esi,27 4473 rorx eax,esi,2 4474 xor esi,ebp 4475 add edx,r12d 4476 and esi,edi 4477 vpalignr ymm8,ymm1,ymm0,8 4478 vpxor ymm2,ymm2,ymm6 4479 add ecx,DWORD[8+r13] 4480 xor esi,ebp 4481 vpxor ymm2,ymm2,ymm3 4482 mov edi,eax 4483 xor edi,ebp 4484 lea ecx,[rsi*1+rcx] 4485 vpxor ymm2,ymm2,ymm8 4486 rorx r12d,edx,27 4487 rorx esi,edx,2 4488 xor edx,eax 4489 vpsrld ymm8,ymm2,30 4490 vpslld ymm2,ymm2,2 4491 add ecx,r12d 4492 and edx,edi 4493 add ebx,DWORD[12+r13] 4494 xor edx,eax 4495 mov edi,esi 4496 xor edi,eax 4497 vpor ymm2,ymm2,ymm8 4498 lea ebx,[rdx*1+rbx] 4499 rorx r12d,ecx,27 4500 rorx edx,ecx,2 4501 xor ecx,esi 4502 vpaddd ymm9,ymm2,ymm11 4503 add ebx,r12d 4504 and ecx,edi 4505 add ebp,DWORD[32+r13] 4506 xor ecx,esi 4507 vmovdqu YMMWORD[576+rsp],ymm9 4508 mov edi,edx 4509 xor edi,esi 4510 lea ebp,[rbp*1+rcx] 4511 rorx r12d,ebx,27 4512 rorx ecx,ebx,2 4513 xor ebx,edx 4514 add ebp,r12d 4515 and ebx,edi 4516 add eax,DWORD[36+r13] 4517 xor ebx,edx 4518 mov edi,ecx 4519 xor edi,edx 4520 lea eax,[rbx*1+rax] 4521 rorx r12d,ebp,27 4522 rorx ebx,ebp,2 4523 xor ebp,ecx 4524 add eax,r12d 4525 and ebp,edi 4526 add esi,DWORD[40+r13] 4527 xor ebp,ecx 4528 mov edi,ebx 4529 xor edi,ecx 4530 lea esi,[rbp*1+rsi] 4531 rorx r12d,eax,27 4532 rorx ebp,eax,2 4533 xor eax,ebx 4534 add esi,r12d 4535 and eax,edi 4536 vpalignr ymm8,ymm2,ymm1,8 4537 vpxor ymm3,ymm3,ymm7 4538 add edx,DWORD[44+r13] 4539 xor eax,ebx 4540 vpxor ymm3,ymm3,ymm4 4541 mov edi,ebp 4542 xor edi,ebx 4543 lea edx,[rax*1+rdx] 4544 vpxor ymm3,ymm3,ymm8 4545 rorx r12d,esi,27 4546 rorx eax,esi,2 4547 xor esi,ebp 4548 vpsrld ymm8,ymm3,30 4549 vpslld ymm3,ymm3,2 4550 add edx,r12d 4551 and esi,edi 4552 add ecx,DWORD[64+r13] 4553 xor esi,ebp 4554 mov edi,eax 4555 xor edi,ebp 4556 vpor ymm3,ymm3,ymm8 4557 lea ecx,[rsi*1+rcx] 4558 rorx r12d,edx,27 4559 rorx esi,edx,2 4560 xor edx,eax 4561 vpaddd ymm9,ymm3,ymm11 4562 add ecx,r12d 4563 and edx,edi 4564 add ebx,DWORD[68+r13] 4565 xor edx,eax 4566 vmovdqu YMMWORD[608+rsp],ymm9 4567 mov edi,esi 4568 xor edi,eax 4569 lea ebx,[rdx*1+rbx] 4570 rorx r12d,ecx,27 4571 rorx edx,ecx,2 4572 xor ecx,esi 4573 add ebx,r12d 4574 and ecx,edi 4575 add ebp,DWORD[72+r13] 4576 xor ecx,esi 4577 mov edi,edx 4578 xor edi,esi 4579 lea ebp,[rbp*1+rcx] 4580 rorx r12d,ebx,27 4581 rorx ecx,ebx,2 4582 xor ebx,edx 4583 add ebp,r12d 4584 and ebx,edi 4585 add eax,DWORD[76+r13] 4586 xor ebx,edx 4587 lea eax,[rbx*1+rax] 4588 rorx r12d,ebp,27 4589 rorx ebx,ebp,2 4590 xor ebp,ecx 4591 add eax,r12d 4592 xor ebp,edx 4593 add esi,DWORD[96+r13] 4594 lea esi,[rbp*1+rsi] 4595 rorx r12d,eax,27 4596 rorx ebp,eax,2 4597 xor eax,ebx 4598 add esi,r12d 4599 xor eax,ecx 4600 add edx,DWORD[100+r13] 4601 lea edx,[rax*1+rdx] 4602 rorx r12d,esi,27 4603 rorx eax,esi,2 4604 xor esi,ebp 4605 add edx,r12d 4606 xor esi,ebx 4607 add ecx,DWORD[104+r13] 4608 lea ecx,[rsi*1+rcx] 4609 rorx r12d,edx,27 4610 rorx esi,edx,2 4611 xor edx,eax 4612 add ecx,r12d 4613 xor edx,ebp 4614 add ebx,DWORD[108+r13] 4615 lea r13,[256+r13] 4616 lea ebx,[rdx*1+rbx] 4617 rorx r12d,ecx,27 4618 rorx edx,ecx,2 4619 xor ecx,esi 4620 add ebx,r12d 4621 xor ecx,eax 4622 add ebp,DWORD[((-128))+r13] 4623 lea ebp,[rbp*1+rcx] 4624 rorx r12d,ebx,27 4625 rorx ecx,ebx,2 4626 xor ebx,edx 4627 add ebp,r12d 4628 xor ebx,esi 4629 add eax,DWORD[((-124))+r13] 4630 lea eax,[rbx*1+rax] 4631 rorx r12d,ebp,27 4632 rorx ebx,ebp,2 4633 xor ebp,ecx 4634 add eax,r12d 4635 xor ebp,edx 4636 add esi,DWORD[((-120))+r13] 4637 lea esi,[rbp*1+rsi] 4638 rorx r12d,eax,27 4639 rorx ebp,eax,2 4640 xor eax,ebx 4641 add esi,r12d 4642 xor eax,ecx 4643 add edx,DWORD[((-116))+r13] 4644 lea edx,[rax*1+rdx] 4645 rorx r12d,esi,27 4646 rorx eax,esi,2 4647 xor esi,ebp 4648 add edx,r12d 4649 xor esi,ebx 4650 add ecx,DWORD[((-96))+r13] 4651 lea ecx,[rsi*1+rcx] 4652 rorx r12d,edx,27 4653 rorx esi,edx,2 4654 xor edx,eax 4655 add ecx,r12d 4656 xor edx,ebp 4657 add ebx,DWORD[((-92))+r13] 4658 lea ebx,[rdx*1+rbx] 4659 rorx r12d,ecx,27 4660 rorx edx,ecx,2 4661 xor ecx,esi 4662 add ebx,r12d 4663 xor ecx,eax 4664 add ebp,DWORD[((-88))+r13] 4665 lea ebp,[rbp*1+rcx] 4666 rorx r12d,ebx,27 4667 rorx ecx,ebx,2 4668 xor ebx,edx 4669 add ebp,r12d 4670 xor ebx,esi 4671 add eax,DWORD[((-84))+r13] 4672 lea eax,[rbx*1+rax] 4673 rorx r12d,ebp,27 4674 rorx ebx,ebp,2 4675 xor ebp,ecx 4676 add eax,r12d 4677 xor ebp,edx 4678 add esi,DWORD[((-64))+r13] 4679 lea esi,[rbp*1+rsi] 4680 rorx r12d,eax,27 4681 rorx ebp,eax,2 4682 xor eax,ebx 4683 add esi,r12d 4684 xor eax,ecx 4685 add edx,DWORD[((-60))+r13] 4686 lea edx,[rax*1+rdx] 4687 rorx r12d,esi,27 4688 rorx eax,esi,2 4689 xor esi,ebp 4690 add edx,r12d 4691 xor esi,ebx 4692 add ecx,DWORD[((-56))+r13] 4693 lea ecx,[rsi*1+rcx] 4694 rorx r12d,edx,27 4695 rorx esi,edx,2 4696 xor edx,eax 4697 add ecx,r12d 4698 xor edx,ebp 4699 add ebx,DWORD[((-52))+r13] 4700 lea ebx,[rdx*1+rbx] 4701 rorx r12d,ecx,27 4702 rorx edx,ecx,2 4703 xor ecx,esi 4704 add ebx,r12d 4705 xor ecx,eax 4706 add ebp,DWORD[((-32))+r13] 4707 lea ebp,[rbp*1+rcx] 4708 rorx r12d,ebx,27 4709 rorx ecx,ebx,2 4710 xor ebx,edx 4711 add ebp,r12d 4712 xor ebx,esi 4713 add eax,DWORD[((-28))+r13] 4714 lea eax,[rbx*1+rax] 4715 rorx r12d,ebp,27 4716 rorx ebx,ebp,2 4717 xor ebp,ecx 4718 add eax,r12d 4719 xor ebp,edx 4720 add esi,DWORD[((-24))+r13] 4721 lea esi,[rbp*1+rsi] 4722 rorx r12d,eax,27 4723 rorx ebp,eax,2 4724 xor eax,ebx 4725 add esi,r12d 4726 xor eax,ecx 4727 add edx,DWORD[((-20))+r13] 4728 lea edx,[rax*1+rdx] 4729 rorx r12d,esi,27 4730 add edx,r12d 4731 lea r13,[128+r9] 4732 lea rdi,[128+r9] 4733 cmp r13,r10 4734 cmovae r13,r9 4735 4736 4737 add edx,DWORD[r8] 4738 add esi,DWORD[4+r8] 4739 add ebp,DWORD[8+r8] 4740 mov DWORD[r8],edx 4741 add ebx,DWORD[12+r8] 4742 mov DWORD[4+r8],esi 4743 mov eax,edx 4744 add ecx,DWORD[16+r8] 4745 mov r12d,ebp 4746 mov DWORD[8+r8],ebp 4747 mov edx,ebx 4748 4749 mov DWORD[12+r8],ebx 4750 mov ebp,esi 4751 mov DWORD[16+r8],ecx 4752 4753 mov esi,ecx 4754 mov ecx,r12d 4755 4756 4757 cmp r9,r10 4758 je NEAR $L$done_avx2 4759 vmovdqu ymm6,YMMWORD[64+r14] 4760 cmp rdi,r10 4761 ja NEAR $L$ast_avx2 4762 4763 vmovdqu xmm0,XMMWORD[((-64))+rdi] 4764 vmovdqu xmm1,XMMWORD[((-48))+rdi] 4765 vmovdqu xmm2,XMMWORD[((-32))+rdi] 4766 vmovdqu xmm3,XMMWORD[((-16))+rdi] 4767 vinserti128 ymm0,ymm0,XMMWORD[r13],1 4768 vinserti128 ymm1,ymm1,XMMWORD[16+r13],1 4769 vinserti128 ymm2,ymm2,XMMWORD[32+r13],1 4770 vinserti128 ymm3,ymm3,XMMWORD[48+r13],1 4771 jmp NEAR $L$ast_avx2 4772 4773 ALIGN 32 4774 $L$ast_avx2: 4775 lea r13,[((128+16))+rsp] 4776 rorx ebx,ebp,2 4777 andn edi,ebp,edx 4778 and ebp,ecx 4779 xor ebp,edi 4780 sub r9,-128 4781 add esi,DWORD[((-128))+r13] 4782 andn edi,eax,ecx 4783 add esi,ebp 4784 rorx r12d,eax,27 4785 rorx ebp,eax,2 4786 and eax,ebx 4787 add esi,r12d 4788 xor eax,edi 4789 add edx,DWORD[((-124))+r13] 4790 andn edi,esi,ebx 4791 add edx,eax 4792 rorx r12d,esi,27 4793 rorx eax,esi,2 4794 and esi,ebp 4795 add edx,r12d 4796 xor esi,edi 4797 add ecx,DWORD[((-120))+r13] 4798 andn edi,edx,ebp 4799 add ecx,esi 4800 rorx r12d,edx,27 4801 rorx esi,edx,2 4802 and edx,eax 4803 add ecx,r12d 4804 xor edx,edi 4805 add ebx,DWORD[((-116))+r13] 4806 andn edi,ecx,eax 4807 add ebx,edx 4808 rorx r12d,ecx,27 4809 rorx edx,ecx,2 4810 and ecx,esi 4811 add ebx,r12d 4812 xor ecx,edi 4813 add ebp,DWORD[((-96))+r13] 4814 andn edi,ebx,esi 4815 add ebp,ecx 4816 rorx r12d,ebx,27 4817 rorx ecx,ebx,2 4818 and ebx,edx 4819 add ebp,r12d 4820 xor ebx,edi 4821 add eax,DWORD[((-92))+r13] 4822 andn edi,ebp,edx 4823 add eax,ebx 4824 rorx r12d,ebp,27 4825 rorx ebx,ebp,2 4826 and ebp,ecx 4827 add eax,r12d 4828 xor ebp,edi 4829 add esi,DWORD[((-88))+r13] 4830 andn edi,eax,ecx 4831 add esi,ebp 4832 rorx r12d,eax,27 4833 rorx ebp,eax,2 4834 and eax,ebx 4835 add esi,r12d 4836 xor eax,edi 4837 add edx,DWORD[((-84))+r13] 4838 andn edi,esi,ebx 4839 add edx,eax 4840 rorx r12d,esi,27 4841 rorx eax,esi,2 4842 and esi,ebp 4843 add edx,r12d 4844 xor esi,edi 4845 add ecx,DWORD[((-64))+r13] 4846 andn edi,edx,ebp 4847 add ecx,esi 4848 rorx r12d,edx,27 4849 rorx esi,edx,2 4850 and edx,eax 4851 add ecx,r12d 4852 xor edx,edi 4853 add ebx,DWORD[((-60))+r13] 4854 andn edi,ecx,eax 4855 add ebx,edx 4856 rorx r12d,ecx,27 4857 rorx edx,ecx,2 4858 and ecx,esi 4859 add ebx,r12d 4860 xor ecx,edi 4861 add ebp,DWORD[((-56))+r13] 4862 andn edi,ebx,esi 4863 add ebp,ecx 4864 rorx r12d,ebx,27 4865 rorx ecx,ebx,2 4866 and ebx,edx 4867 add ebp,r12d 4868 xor ebx,edi 4869 add eax,DWORD[((-52))+r13] 4870 andn edi,ebp,edx 4871 add eax,ebx 4872 rorx r12d,ebp,27 4873 rorx ebx,ebp,2 4874 and ebp,ecx 4875 add eax,r12d 4876 xor ebp,edi 4877 add esi,DWORD[((-32))+r13] 4878 andn edi,eax,ecx 4879 add esi,ebp 4880 rorx r12d,eax,27 4881 rorx ebp,eax,2 4882 and eax,ebx 4883 add esi,r12d 4884 xor eax,edi 4885 add edx,DWORD[((-28))+r13] 4886 andn edi,esi,ebx 4887 add edx,eax 4888 rorx r12d,esi,27 4889 rorx eax,esi,2 4890 and esi,ebp 4891 add edx,r12d 4892 xor esi,edi 4893 add ecx,DWORD[((-24))+r13] 4894 andn edi,edx,ebp 4895 add ecx,esi 4896 rorx r12d,edx,27 4897 rorx esi,edx,2 4898 and edx,eax 4899 add ecx,r12d 4900 xor edx,edi 4901 add ebx,DWORD[((-20))+r13] 4902 andn edi,ecx,eax 4903 add ebx,edx 4904 rorx r12d,ecx,27 4905 rorx edx,ecx,2 4906 and ecx,esi 4907 add ebx,r12d 4908 xor ecx,edi 4909 add ebp,DWORD[r13] 4910 andn edi,ebx,esi 4911 add ebp,ecx 4912 rorx r12d,ebx,27 4913 rorx ecx,ebx,2 4914 and ebx,edx 4915 add ebp,r12d 4916 xor ebx,edi 4917 add eax,DWORD[4+r13] 4918 andn edi,ebp,edx 4919 add eax,ebx 4920 rorx r12d,ebp,27 4921 rorx ebx,ebp,2 4922 and ebp,ecx 4923 add eax,r12d 4924 xor ebp,edi 4925 add esi,DWORD[8+r13] 4926 andn edi,eax,ecx 4927 add esi,ebp 4928 rorx r12d,eax,27 4929 rorx ebp,eax,2 4930 and eax,ebx 4931 add esi,r12d 4932 xor eax,edi 4933 add edx,DWORD[12+r13] 4934 lea edx,[rax*1+rdx] 4935 rorx r12d,esi,27 4936 rorx eax,esi,2 4937 xor esi,ebp 4938 add edx,r12d 4939 xor esi,ebx 4940 add ecx,DWORD[32+r13] 4941 lea ecx,[rsi*1+rcx] 4942 rorx r12d,edx,27 4943 rorx esi,edx,2 4944 xor edx,eax 4945 add ecx,r12d 4946 xor edx,ebp 4947 add ebx,DWORD[36+r13] 4948 lea ebx,[rdx*1+rbx] 4949 rorx r12d,ecx,27 4950 rorx edx,ecx,2 4951 xor ecx,esi 4952 add ebx,r12d 4953 xor ecx,eax 4954 add ebp,DWORD[40+r13] 4955 lea ebp,[rbp*1+rcx] 4956 rorx r12d,ebx,27 4957 rorx ecx,ebx,2 4958 xor ebx,edx 4959 add ebp,r12d 4960 xor ebx,esi 4961 add eax,DWORD[44+r13] 4962 lea eax,[rbx*1+rax] 4963 rorx r12d,ebp,27 4964 rorx ebx,ebp,2 4965 xor ebp,ecx 4966 add eax,r12d 4967 xor ebp,edx 4968 add esi,DWORD[64+r13] 4969 lea esi,[rbp*1+rsi] 4970 rorx r12d,eax,27 4971 rorx ebp,eax,2 4972 xor eax,ebx 4973 add esi,r12d 4974 xor eax,ecx 4975 vmovdqu ymm11,YMMWORD[((-64))+r14] 4976 vpshufb ymm0,ymm0,ymm6 4977 add edx,DWORD[68+r13] 4978 lea edx,[rax*1+rdx] 4979 rorx r12d,esi,27 4980 rorx eax,esi,2 4981 xor esi,ebp 4982 add edx,r12d 4983 xor esi,ebx 4984 add ecx,DWORD[72+r13] 4985 lea ecx,[rsi*1+rcx] 4986 rorx r12d,edx,27 4987 rorx esi,edx,2 4988 xor edx,eax 4989 add ecx,r12d 4990 xor edx,ebp 4991 add ebx,DWORD[76+r13] 4992 lea ebx,[rdx*1+rbx] 4993 rorx r12d,ecx,27 4994 rorx edx,ecx,2 4995 xor ecx,esi 4996 add ebx,r12d 4997 xor ecx,eax 4998 add ebp,DWORD[96+r13] 4999 lea ebp,[rbp*1+rcx] 5000 rorx r12d,ebx,27 5001 rorx ecx,ebx,2 5002 xor ebx,edx 5003 add ebp,r12d 5004 xor ebx,esi 5005 add eax,DWORD[100+r13] 5006 lea eax,[rbx*1+rax] 5007 rorx r12d,ebp,27 5008 rorx ebx,ebp,2 5009 xor ebp,ecx 5010 add eax,r12d 5011 xor ebp,edx 5012 vpshufb ymm1,ymm1,ymm6 5013 vpaddd ymm8,ymm0,ymm11 5014 add esi,DWORD[104+r13] 5015 lea esi,[rbp*1+rsi] 5016 rorx r12d,eax,27 5017 rorx ebp,eax,2 5018 xor eax,ebx 5019 add esi,r12d 5020 xor eax,ecx 5021 add edx,DWORD[108+r13] 5022 lea r13,[256+r13] 5023 lea edx,[rax*1+rdx] 5024 rorx r12d,esi,27 5025 rorx eax,esi,2 5026 xor esi,ebp 5027 add edx,r12d 5028 xor esi,ebx 5029 add ecx,DWORD[((-128))+r13] 5030 lea ecx,[rsi*1+rcx] 5031 rorx r12d,edx,27 5032 rorx esi,edx,2 5033 xor edx,eax 5034 add ecx,r12d 5035 xor edx,ebp 5036 add ebx,DWORD[((-124))+r13] 5037 lea ebx,[rdx*1+rbx] 5038 rorx r12d,ecx,27 5039 rorx edx,ecx,2 5040 xor ecx,esi 5041 add ebx,r12d 5042 xor ecx,eax 5043 add ebp,DWORD[((-120))+r13] 5044 lea ebp,[rbp*1+rcx] 5045 rorx r12d,ebx,27 5046 rorx ecx,ebx,2 5047 xor ebx,edx 5048 add ebp,r12d 5049 xor ebx,esi 5050 vmovdqu YMMWORD[rsp],ymm8 5051 vpshufb ymm2,ymm2,ymm6 5052 vpaddd ymm9,ymm1,ymm11 5053 add eax,DWORD[((-116))+r13] 5054 lea eax,[rbx*1+rax] 5055 rorx r12d,ebp,27 5056 rorx ebx,ebp,2 5057 xor ebp,ecx 5058 add eax,r12d 5059 xor ebp,edx 5060 add esi,DWORD[((-96))+r13] 5061 lea esi,[rbp*1+rsi] 5062 rorx r12d,eax,27 5063 rorx ebp,eax,2 5064 xor eax,ebx 5065 add esi,r12d 5066 xor eax,ecx 5067 add edx,DWORD[((-92))+r13] 5068 lea edx,[rax*1+rdx] 5069 rorx r12d,esi,27 5070 rorx eax,esi,2 5071 xor esi,ebp 5072 add edx,r12d 5073 xor esi,ebx 5074 add ecx,DWORD[((-88))+r13] 5075 lea ecx,[rsi*1+rcx] 5076 rorx r12d,edx,27 5077 rorx esi,edx,2 5078 xor edx,eax 5079 add ecx,r12d 5080 xor edx,ebp 5081 add ebx,DWORD[((-84))+r13] 5082 mov edi,esi 5083 xor edi,eax 5084 lea ebx,[rdx*1+rbx] 5085 rorx r12d,ecx,27 5086 rorx edx,ecx,2 5087 xor ecx,esi 5088 add ebx,r12d 5089 and ecx,edi 5090 vmovdqu YMMWORD[32+rsp],ymm9 5091 vpshufb ymm3,ymm3,ymm6 5092 vpaddd ymm6,ymm2,ymm11 5093 add ebp,DWORD[((-64))+r13] 5094 xor ecx,esi 5095 mov edi,edx 5096 xor edi,esi 5097 lea ebp,[rbp*1+rcx] 5098 rorx r12d,ebx,27 5099 rorx ecx,ebx,2 5100 xor ebx,edx 5101 add ebp,r12d 5102 and ebx,edi 5103 add eax,DWORD[((-60))+r13] 5104 xor ebx,edx 5105 mov edi,ecx 5106 xor edi,edx 5107 lea eax,[rbx*1+rax] 5108 rorx r12d,ebp,27 5109 rorx ebx,ebp,2 5110 xor ebp,ecx 5111 add eax,r12d 5112 and ebp,edi 5113 add esi,DWORD[((-56))+r13] 5114 xor ebp,ecx 5115 mov edi,ebx 5116 xor edi,ecx 5117 lea esi,[rbp*1+rsi] 5118 rorx r12d,eax,27 5119 rorx ebp,eax,2 5120 xor eax,ebx 5121 add esi,r12d 5122 and eax,edi 5123 add edx,DWORD[((-52))+r13] 5124 xor eax,ebx 5125 mov edi,ebp 5126 xor edi,ebx 5127 lea edx,[rax*1+rdx] 5128 rorx r12d,esi,27 5129 rorx eax,esi,2 5130 xor esi,ebp 5131 add edx,r12d 5132 and esi,edi 5133 add ecx,DWORD[((-32))+r13] 5134 xor esi,ebp 5135 mov edi,eax 5136 xor edi,ebp 5137 lea ecx,[rsi*1+rcx] 5138 rorx r12d,edx,27 5139 rorx esi,edx,2 5140 xor edx,eax 5141 add ecx,r12d 5142 and edx,edi 5143 jmp NEAR $L$align32_3 5144 ALIGN 32 5145 $L$align32_3: 5146 vmovdqu YMMWORD[64+rsp],ymm6 5147 vpaddd ymm7,ymm3,ymm11 5148 add ebx,DWORD[((-28))+r13] 5149 xor edx,eax 5150 mov edi,esi 5151 xor edi,eax 5152 lea ebx,[rdx*1+rbx] 5153 rorx r12d,ecx,27 5154 rorx edx,ecx,2 5155 xor ecx,esi 5156 add ebx,r12d 5157 and ecx,edi 5158 add ebp,DWORD[((-24))+r13] 5159 xor ecx,esi 5160 mov edi,edx 5161 xor edi,esi 5162 lea ebp,[rbp*1+rcx] 5163 rorx r12d,ebx,27 5164 rorx ecx,ebx,2 5165 xor ebx,edx 5166 add ebp,r12d 5167 and ebx,edi 5168 add eax,DWORD[((-20))+r13] 5169 xor ebx,edx 5170 mov edi,ecx 5171 xor edi,edx 5172 lea eax,[rbx*1+rax] 5173 rorx r12d,ebp,27 5174 rorx ebx,ebp,2 5175 xor ebp,ecx 5176 add eax,r12d 5177 and ebp,edi 5178 add esi,DWORD[r13] 5179 xor ebp,ecx 5180 mov edi,ebx 5181 xor edi,ecx 5182 lea esi,[rbp*1+rsi] 5183 rorx r12d,eax,27 5184 rorx ebp,eax,2 5185 xor eax,ebx 5186 add esi,r12d 5187 and eax,edi 5188 add edx,DWORD[4+r13] 5189 xor eax,ebx 5190 mov edi,ebp 5191 xor edi,ebx 5192 lea edx,[rax*1+rdx] 5193 rorx r12d,esi,27 5194 rorx eax,esi,2 5195 xor esi,ebp 5196 add edx,r12d 5197 and esi,edi 5198 vmovdqu YMMWORD[96+rsp],ymm7 5199 add ecx,DWORD[8+r13] 5200 xor esi,ebp 5201 mov edi,eax 5202 xor edi,ebp 5203 lea ecx,[rsi*1+rcx] 5204 rorx r12d,edx,27 5205 rorx esi,edx,2 5206 xor edx,eax 5207 add ecx,r12d 5208 and edx,edi 5209 add ebx,DWORD[12+r13] 5210 xor edx,eax 5211 mov edi,esi 5212 xor edi,eax 5213 lea ebx,[rdx*1+rbx] 5214 rorx r12d,ecx,27 5215 rorx edx,ecx,2 5216 xor ecx,esi 5217 add ebx,r12d 5218 and ecx,edi 5219 add ebp,DWORD[32+r13] 5220 xor ecx,esi 5221 mov edi,edx 5222 xor edi,esi 5223 lea ebp,[rbp*1+rcx] 5224 rorx r12d,ebx,27 5225 rorx ecx,ebx,2 5226 xor ebx,edx 5227 add ebp,r12d 5228 and ebx,edi 5229 add eax,DWORD[36+r13] 5230 xor ebx,edx 5231 mov edi,ecx 5232 xor edi,edx 5233 lea eax,[rbx*1+rax] 5234 rorx r12d,ebp,27 5235 rorx ebx,ebp,2 5236 xor ebp,ecx 5237 add eax,r12d 5238 and ebp,edi 5239 add esi,DWORD[40+r13] 5240 xor ebp,ecx 5241 mov edi,ebx 5242 xor edi,ecx 5243 lea esi,[rbp*1+rsi] 5244 rorx r12d,eax,27 5245 rorx ebp,eax,2 5246 xor eax,ebx 5247 add esi,r12d 5248 and eax,edi 5249 vpalignr ymm4,ymm1,ymm0,8 5250 add edx,DWORD[44+r13] 5251 xor eax,ebx 5252 mov edi,ebp 5253 xor edi,ebx 5254 vpsrldq ymm8,ymm3,4 5255 lea edx,[rax*1+rdx] 5256 rorx r12d,esi,27 5257 rorx eax,esi,2 5258 vpxor ymm4,ymm4,ymm0 5259 vpxor ymm8,ymm8,ymm2 5260 xor esi,ebp 5261 add edx,r12d 5262 vpxor ymm4,ymm4,ymm8 5263 and esi,edi 5264 add ecx,DWORD[64+r13] 5265 xor esi,ebp 5266 mov edi,eax 5267 vpsrld ymm8,ymm4,31 5268 xor edi,ebp 5269 lea ecx,[rsi*1+rcx] 5270 rorx r12d,edx,27 5271 vpslldq ymm10,ymm4,12 5272 vpaddd ymm4,ymm4,ymm4 5273 rorx esi,edx,2 5274 xor edx,eax 5275 vpsrld ymm9,ymm10,30 5276 vpor ymm4,ymm4,ymm8 5277 add ecx,r12d 5278 and edx,edi 5279 vpslld ymm10,ymm10,2 5280 vpxor ymm4,ymm4,ymm9 5281 add ebx,DWORD[68+r13] 5282 xor edx,eax 5283 vpxor ymm4,ymm4,ymm10 5284 mov edi,esi 5285 xor edi,eax 5286 lea ebx,[rdx*1+rbx] 5287 vpaddd ymm9,ymm4,ymm11 5288 rorx r12d,ecx,27 5289 rorx edx,ecx,2 5290 xor ecx,esi 5291 vmovdqu YMMWORD[128+rsp],ymm9 5292 add ebx,r12d 5293 and ecx,edi 5294 add ebp,DWORD[72+r13] 5295 xor ecx,esi 5296 mov edi,edx 5297 xor edi,esi 5298 lea ebp,[rbp*1+rcx] 5299 rorx r12d,ebx,27 5300 rorx ecx,ebx,2 5301 xor ebx,edx 5302 add ebp,r12d 5303 and ebx,edi 5304 add eax,DWORD[76+r13] 5305 xor ebx,edx 5306 lea eax,[rbx*1+rax] 5307 rorx r12d,ebp,27 5308 rorx ebx,ebp,2 5309 xor ebp,ecx 5310 add eax,r12d 5311 xor ebp,edx 5312 vpalignr ymm5,ymm2,ymm1,8 5313 add esi,DWORD[96+r13] 5314 lea esi,[rbp*1+rsi] 5315 rorx r12d,eax,27 5316 rorx ebp,eax,2 5317 vpsrldq ymm8,ymm4,4 5318 xor eax,ebx 5319 add esi,r12d 5320 xor eax,ecx 5321 vpxor ymm5,ymm5,ymm1 5322 vpxor ymm8,ymm8,ymm3 5323 add edx,DWORD[100+r13] 5324 lea edx,[rax*1+rdx] 5325 vpxor ymm5,ymm5,ymm8 5326 rorx r12d,esi,27 5327 rorx eax,esi,2 5328 xor esi,ebp 5329 add edx,r12d 5330 vpsrld ymm8,ymm5,31 5331 vmovdqu ymm11,YMMWORD[((-32))+r14] 5332 xor esi,ebx 5333 add ecx,DWORD[104+r13] 5334 lea ecx,[rsi*1+rcx] 5335 vpslldq ymm10,ymm5,12 5336 vpaddd ymm5,ymm5,ymm5 5337 rorx r12d,edx,27 5338 rorx esi,edx,2 5339 vpsrld ymm9,ymm10,30 5340 vpor ymm5,ymm5,ymm8 5341 xor edx,eax 5342 add ecx,r12d 5343 vpslld ymm10,ymm10,2 5344 vpxor ymm5,ymm5,ymm9 5345 xor edx,ebp 5346 add ebx,DWORD[108+r13] 5347 lea r13,[256+r13] 5348 vpxor ymm5,ymm5,ymm10 5349 lea ebx,[rdx*1+rbx] 5350 rorx r12d,ecx,27 5351 rorx edx,ecx,2 5352 vpaddd ymm9,ymm5,ymm11 5353 xor ecx,esi 5354 add ebx,r12d 5355 xor ecx,eax 5356 vmovdqu YMMWORD[160+rsp],ymm9 5357 add ebp,DWORD[((-128))+r13] 5358 lea ebp,[rbp*1+rcx] 5359 rorx r12d,ebx,27 5360 rorx ecx,ebx,2 5361 xor ebx,edx 5362 add ebp,r12d 5363 xor ebx,esi 5364 vpalignr ymm6,ymm3,ymm2,8 5365 add eax,DWORD[((-124))+r13] 5366 lea eax,[rbx*1+rax] 5367 rorx r12d,ebp,27 5368 rorx ebx,ebp,2 5369 vpsrldq ymm8,ymm5,4 5370 xor ebp,ecx 5371 add eax,r12d 5372 xor ebp,edx 5373 vpxor ymm6,ymm6,ymm2 5374 vpxor ymm8,ymm8,ymm4 5375 add esi,DWORD[((-120))+r13] 5376 lea esi,[rbp*1+rsi] 5377 vpxor ymm6,ymm6,ymm8 5378 rorx r12d,eax,27 5379 rorx ebp,eax,2 5380 xor eax,ebx 5381 add esi,r12d 5382 vpsrld ymm8,ymm6,31 5383 xor eax,ecx 5384 add edx,DWORD[((-116))+r13] 5385 lea edx,[rax*1+rdx] 5386 vpslldq ymm10,ymm6,12 5387 vpaddd ymm6,ymm6,ymm6 5388 rorx r12d,esi,27 5389 rorx eax,esi,2 5390 vpsrld ymm9,ymm10,30 5391 vpor ymm6,ymm6,ymm8 5392 xor esi,ebp 5393 add edx,r12d 5394 vpslld ymm10,ymm10,2 5395 vpxor ymm6,ymm6,ymm9 5396 xor esi,ebx 5397 add ecx,DWORD[((-96))+r13] 5398 vpxor ymm6,ymm6,ymm10 5399 lea ecx,[rsi*1+rcx] 5400 rorx r12d,edx,27 5401 rorx esi,edx,2 5402 vpaddd ymm9,ymm6,ymm11 5403 xor edx,eax 5404 add ecx,r12d 5405 xor edx,ebp 5406 vmovdqu YMMWORD[192+rsp],ymm9 5407 add ebx,DWORD[((-92))+r13] 5408 lea ebx,[rdx*1+rbx] 5409 rorx r12d,ecx,27 5410 rorx edx,ecx,2 5411 xor ecx,esi 5412 add ebx,r12d 5413 xor ecx,eax 5414 vpalignr ymm7,ymm4,ymm3,8 5415 add ebp,DWORD[((-88))+r13] 5416 lea ebp,[rbp*1+rcx] 5417 rorx r12d,ebx,27 5418 rorx ecx,ebx,2 5419 vpsrldq ymm8,ymm6,4 5420 xor ebx,edx 5421 add ebp,r12d 5422 xor ebx,esi 5423 vpxor ymm7,ymm7,ymm3 5424 vpxor ymm8,ymm8,ymm5 5425 add eax,DWORD[((-84))+r13] 5426 lea eax,[rbx*1+rax] 5427 vpxor ymm7,ymm7,ymm8 5428 rorx r12d,ebp,27 5429 rorx ebx,ebp,2 5430 xor ebp,ecx 5431 add eax,r12d 5432 vpsrld ymm8,ymm7,31 5433 xor ebp,edx 5434 add esi,DWORD[((-64))+r13] 5435 lea esi,[rbp*1+rsi] 5436 vpslldq ymm10,ymm7,12 5437 vpaddd ymm7,ymm7,ymm7 5438 rorx r12d,eax,27 5439 rorx ebp,eax,2 5440 vpsrld ymm9,ymm10,30 5441 vpor ymm7,ymm7,ymm8 5442 xor eax,ebx 5443 add esi,r12d 5444 vpslld ymm10,ymm10,2 5445 vpxor ymm7,ymm7,ymm9 5446 xor eax,ecx 5447 add edx,DWORD[((-60))+r13] 5448 vpxor ymm7,ymm7,ymm10 5449 lea edx,[rax*1+rdx] 5450 rorx r12d,esi,27 5451 rorx eax,esi,2 5452 vpaddd ymm9,ymm7,ymm11 5453 xor esi,ebp 5454 add edx,r12d 5455 xor esi,ebx 5456 vmovdqu YMMWORD[224+rsp],ymm9 5457 add ecx,DWORD[((-56))+r13] 5458 lea ecx,[rsi*1+rcx] 5459 rorx r12d,edx,27 5460 rorx esi,edx,2 5461 xor edx,eax 5462 add ecx,r12d 5463 xor edx,ebp 5464 add ebx,DWORD[((-52))+r13] 5465 lea ebx,[rdx*1+rbx] 5466 rorx r12d,ecx,27 5467 rorx edx,ecx,2 5468 xor ecx,esi 5469 add ebx,r12d 5470 xor ecx,eax 5471 add ebp,DWORD[((-32))+r13] 5472 lea ebp,[rbp*1+rcx] 5473 rorx r12d,ebx,27 5474 rorx ecx,ebx,2 5475 xor ebx,edx 5476 add ebp,r12d 5477 xor ebx,esi 5478 add eax,DWORD[((-28))+r13] 5479 lea eax,[rbx*1+rax] 5480 rorx r12d,ebp,27 5481 rorx ebx,ebp,2 5482 xor ebp,ecx 5483 add eax,r12d 5484 xor ebp,edx 5485 add esi,DWORD[((-24))+r13] 5486 lea esi,[rbp*1+rsi] 5487 rorx r12d,eax,27 5488 rorx ebp,eax,2 5489 xor eax,ebx 5490 add esi,r12d 5491 xor eax,ecx 5492 add edx,DWORD[((-20))+r13] 5493 lea edx,[rax*1+rdx] 5494 rorx r12d,esi,27 5495 add edx,r12d 5496 lea r13,[128+rsp] 5497 5498 5499 add edx,DWORD[r8] 5500 add esi,DWORD[4+r8] 5501 add ebp,DWORD[8+r8] 5502 mov DWORD[r8],edx 5503 add ebx,DWORD[12+r8] 5504 mov DWORD[4+r8],esi 5505 mov eax,edx 5506 add ecx,DWORD[16+r8] 5507 mov r12d,ebp 5508 mov DWORD[8+r8],ebp 5509 mov edx,ebx 5510 5511 mov DWORD[12+r8],ebx 5512 mov ebp,esi 5513 mov DWORD[16+r8],ecx 5514 5515 mov esi,ecx 5516 mov ecx,r12d 5517 5518 5519 cmp r9,r10 5520 jbe NEAR $L$oop_avx2 5521 5522 $L$done_avx2: 5523 vzeroupper 5524 movaps xmm6,XMMWORD[((-40-96))+r11] 5525 movaps xmm7,XMMWORD[((-40-80))+r11] 5526 movaps xmm8,XMMWORD[((-40-64))+r11] 5527 movaps xmm9,XMMWORD[((-40-48))+r11] 5528 movaps xmm10,XMMWORD[((-40-32))+r11] 5529 movaps xmm11,XMMWORD[((-40-16))+r11] 5530 mov r14,QWORD[((-40))+r11] 5531 5532 mov r13,QWORD[((-32))+r11] 5533 5534 mov r12,QWORD[((-24))+r11] 5535 5536 mov rbp,QWORD[((-16))+r11] 5537 5538 mov rbx,QWORD[((-8))+r11] 5539 5540 lea rsp,[r11] 5541 5542 $L$epilogue_avx2: 5543 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 5544 mov rsi,QWORD[16+rsp] 5545 DB 0F3h,0C3h ;repret 5546 5547 $L$SEH_end_sha1_block_data_order_avx2: 2670 5548 ALIGN 64 2671 5549 K_XX_XX: … … 2861 5739 DD $L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase 2862 5740 DD $L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase 5741 DD $L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase 5742 DD $L$SEH_end_sha1_block_data_order_avx wrt ..imagebase 5743 DD $L$SEH_info_sha1_block_data_order_avx wrt ..imagebase 5744 DD $L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase 5745 DD $L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase 5746 DD $L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase 2863 5747 section .xdata rdata align=8 2864 5748 ALIGN 8 … … 2873 5757 DD ssse3_handler wrt ..imagebase 2874 5758 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase 5759 $L$SEH_info_sha1_block_data_order_avx: 5760 DB 9,0,0,0 5761 DD ssse3_handler wrt ..imagebase 5762 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase 5763 $L$SEH_info_sha1_block_data_order_avx2: 5764 DB 9,0,0,0 5765 DD ssse3_handler wrt ..imagebase 5766 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/sha256-mb-x86_64.S
r97373 r99371 25 25 bt rcx,61 26 26 jc NEAR _shaext_shortcut 27 test ecx,268435456 28 jnz NEAR _avx_shortcut 27 29 mov rax,rsp 28 30 … … 3205 3207 3206 3208 $L$SEH_end_sha256_multi_block_shaext: 3209 3210 ALIGN 32 3211 sha256_multi_block_avx: 3212 mov QWORD[8+rsp],rdi ;WIN64 prologue 3213 mov QWORD[16+rsp],rsi 3214 mov rax,rsp 3215 $L$SEH_begin_sha256_multi_block_avx: 3216 mov rdi,rcx 3217 mov rsi,rdx 3218 mov rdx,r8 3219 3220 3221 3222 _avx_shortcut: 3223 shr rcx,32 3224 cmp edx,2 3225 jb NEAR $L$avx 3226 test ecx,32 3227 jnz NEAR _avx2_shortcut 3228 jmp NEAR $L$avx 3229 ALIGN 32 3230 $L$avx: 3231 mov rax,rsp 3232 3233 push rbx 3234 3235 push rbp 3236 3237 lea rsp,[((-168))+rsp] 3238 movaps XMMWORD[rsp],xmm6 3239 movaps XMMWORD[16+rsp],xmm7 3240 movaps XMMWORD[32+rsp],xmm8 3241 movaps XMMWORD[48+rsp],xmm9 3242 movaps XMMWORD[(-120)+rax],xmm10 3243 movaps XMMWORD[(-104)+rax],xmm11 3244 movaps XMMWORD[(-88)+rax],xmm12 3245 movaps XMMWORD[(-72)+rax],xmm13 3246 movaps XMMWORD[(-56)+rax],xmm14 3247 movaps XMMWORD[(-40)+rax],xmm15 3248 sub rsp,288 3249 and rsp,-256 3250 mov QWORD[272+rsp],rax 3251 3252 $L$body_avx: 3253 lea rbp,[((K256+128))] 3254 lea rbx,[256+rsp] 3255 lea rdi,[128+rdi] 3256 3257 $L$oop_grande_avx: 3258 mov DWORD[280+rsp],edx 3259 xor edx,edx 3260 3261 mov r8,QWORD[rsi] 3262 3263 mov ecx,DWORD[8+rsi] 3264 cmp ecx,edx 3265 cmovg edx,ecx 3266 test ecx,ecx 3267 mov DWORD[rbx],ecx 3268 cmovle r8,rbp 3269 3270 mov r9,QWORD[16+rsi] 3271 3272 mov ecx,DWORD[24+rsi] 3273 cmp ecx,edx 3274 cmovg edx,ecx 3275 test ecx,ecx 3276 mov DWORD[4+rbx],ecx 3277 cmovle r9,rbp 3278 3279 mov r10,QWORD[32+rsi] 3280 3281 mov ecx,DWORD[40+rsi] 3282 cmp ecx,edx 3283 cmovg edx,ecx 3284 test ecx,ecx 3285 mov DWORD[8+rbx],ecx 3286 cmovle r10,rbp 3287 3288 mov r11,QWORD[48+rsi] 3289 3290 mov ecx,DWORD[56+rsi] 3291 cmp ecx,edx 3292 cmovg edx,ecx 3293 test ecx,ecx 3294 mov DWORD[12+rbx],ecx 3295 cmovle r11,rbp 3296 test edx,edx 3297 jz NEAR $L$done_avx 3298 3299 vmovdqu xmm8,XMMWORD[((0-128))+rdi] 3300 lea rax,[128+rsp] 3301 vmovdqu xmm9,XMMWORD[((32-128))+rdi] 3302 vmovdqu xmm10,XMMWORD[((64-128))+rdi] 3303 vmovdqu xmm11,XMMWORD[((96-128))+rdi] 3304 vmovdqu xmm12,XMMWORD[((128-128))+rdi] 3305 vmovdqu xmm13,XMMWORD[((160-128))+rdi] 3306 vmovdqu xmm14,XMMWORD[((192-128))+rdi] 3307 vmovdqu xmm15,XMMWORD[((224-128))+rdi] 3308 vmovdqu xmm6,XMMWORD[$L$pbswap] 3309 jmp NEAR $L$oop_avx 3310 3311 ALIGN 32 3312 $L$oop_avx: 3313 vpxor xmm4,xmm10,xmm9 3314 vmovd xmm5,DWORD[r8] 3315 vmovd xmm0,DWORD[r9] 3316 vpinsrd xmm5,xmm5,DWORD[r10],1 3317 vpinsrd xmm0,xmm0,DWORD[r11],1 3318 vpunpckldq xmm5,xmm5,xmm0 3319 vpshufb xmm5,xmm5,xmm6 3320 vpsrld xmm7,xmm12,6 3321 vpslld xmm2,xmm12,26 3322 vmovdqu XMMWORD[(0-128)+rax],xmm5 3323 vpaddd xmm5,xmm5,xmm15 3324 3325 vpsrld xmm1,xmm12,11 3326 vpxor xmm7,xmm7,xmm2 3327 vpslld xmm2,xmm12,21 3328 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] 3329 vpxor xmm7,xmm7,xmm1 3330 3331 vpsrld xmm1,xmm12,25 3332 vpxor xmm7,xmm7,xmm2 3333 3334 vpslld xmm2,xmm12,7 3335 vpandn xmm0,xmm12,xmm14 3336 vpand xmm3,xmm12,xmm13 3337 3338 vpxor xmm7,xmm7,xmm1 3339 3340 vpsrld xmm15,xmm8,2 3341 vpxor xmm7,xmm7,xmm2 3342 3343 vpslld xmm1,xmm8,30 3344 vpxor xmm0,xmm0,xmm3 3345 vpxor xmm3,xmm9,xmm8 3346 3347 vpxor xmm15,xmm15,xmm1 3348 vpaddd xmm5,xmm5,xmm7 3349 3350 vpsrld xmm1,xmm8,13 3351 3352 vpslld xmm2,xmm8,19 3353 vpaddd xmm5,xmm5,xmm0 3354 vpand xmm4,xmm4,xmm3 3355 3356 vpxor xmm7,xmm15,xmm1 3357 3358 vpsrld xmm1,xmm8,22 3359 vpxor xmm7,xmm7,xmm2 3360 3361 vpslld xmm2,xmm8,10 3362 vpxor xmm15,xmm9,xmm4 3363 vpaddd xmm11,xmm11,xmm5 3364 3365 vpxor xmm7,xmm7,xmm1 3366 vpxor xmm7,xmm7,xmm2 3367 3368 vpaddd xmm15,xmm15,xmm5 3369 vpaddd xmm15,xmm15,xmm7 3370 vmovd xmm5,DWORD[4+r8] 3371 vmovd xmm0,DWORD[4+r9] 3372 vpinsrd xmm5,xmm5,DWORD[4+r10],1 3373 vpinsrd xmm0,xmm0,DWORD[4+r11],1 3374 vpunpckldq xmm5,xmm5,xmm0 3375 vpshufb xmm5,xmm5,xmm6 3376 vpsrld xmm7,xmm11,6 3377 vpslld xmm2,xmm11,26 3378 vmovdqu XMMWORD[(16-128)+rax],xmm5 3379 vpaddd xmm5,xmm5,xmm14 3380 3381 vpsrld xmm1,xmm11,11 3382 vpxor xmm7,xmm7,xmm2 3383 vpslld xmm2,xmm11,21 3384 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp] 3385 vpxor xmm7,xmm7,xmm1 3386 3387 vpsrld xmm1,xmm11,25 3388 vpxor xmm7,xmm7,xmm2 3389 3390 vpslld xmm2,xmm11,7 3391 vpandn xmm0,xmm11,xmm13 3392 vpand xmm4,xmm11,xmm12 3393 3394 vpxor xmm7,xmm7,xmm1 3395 3396 vpsrld xmm14,xmm15,2 3397 vpxor xmm7,xmm7,xmm2 3398 3399 vpslld xmm1,xmm15,30 3400 vpxor xmm0,xmm0,xmm4 3401 vpxor xmm4,xmm8,xmm15 3402 3403 vpxor xmm14,xmm14,xmm1 3404 vpaddd xmm5,xmm5,xmm7 3405 3406 vpsrld xmm1,xmm15,13 3407 3408 vpslld xmm2,xmm15,19 3409 vpaddd xmm5,xmm5,xmm0 3410 vpand xmm3,xmm3,xmm4 3411 3412 vpxor xmm7,xmm14,xmm1 3413 3414 vpsrld xmm1,xmm15,22 3415 vpxor xmm7,xmm7,xmm2 3416 3417 vpslld xmm2,xmm15,10 3418 vpxor xmm14,xmm8,xmm3 3419 vpaddd xmm10,xmm10,xmm5 3420 3421 vpxor xmm7,xmm7,xmm1 3422 vpxor xmm7,xmm7,xmm2 3423 3424 vpaddd xmm14,xmm14,xmm5 3425 vpaddd xmm14,xmm14,xmm7 3426 vmovd xmm5,DWORD[8+r8] 3427 vmovd xmm0,DWORD[8+r9] 3428 vpinsrd xmm5,xmm5,DWORD[8+r10],1 3429 vpinsrd xmm0,xmm0,DWORD[8+r11],1 3430 vpunpckldq xmm5,xmm5,xmm0 3431 vpshufb xmm5,xmm5,xmm6 3432 vpsrld xmm7,xmm10,6 3433 vpslld xmm2,xmm10,26 3434 vmovdqu XMMWORD[(32-128)+rax],xmm5 3435 vpaddd xmm5,xmm5,xmm13 3436 3437 vpsrld xmm1,xmm10,11 3438 vpxor xmm7,xmm7,xmm2 3439 vpslld xmm2,xmm10,21 3440 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] 3441 vpxor xmm7,xmm7,xmm1 3442 3443 vpsrld xmm1,xmm10,25 3444 vpxor xmm7,xmm7,xmm2 3445 3446 vpslld xmm2,xmm10,7 3447 vpandn xmm0,xmm10,xmm12 3448 vpand xmm3,xmm10,xmm11 3449 3450 vpxor xmm7,xmm7,xmm1 3451 3452 vpsrld xmm13,xmm14,2 3453 vpxor xmm7,xmm7,xmm2 3454 3455 vpslld xmm1,xmm14,30 3456 vpxor xmm0,xmm0,xmm3 3457 vpxor xmm3,xmm15,xmm14 3458 3459 vpxor xmm13,xmm13,xmm1 3460 vpaddd xmm5,xmm5,xmm7 3461 3462 vpsrld xmm1,xmm14,13 3463 3464 vpslld xmm2,xmm14,19 3465 vpaddd xmm5,xmm5,xmm0 3466 vpand xmm4,xmm4,xmm3 3467 3468 vpxor xmm7,xmm13,xmm1 3469 3470 vpsrld xmm1,xmm14,22 3471 vpxor xmm7,xmm7,xmm2 3472 3473 vpslld xmm2,xmm14,10 3474 vpxor xmm13,xmm15,xmm4 3475 vpaddd xmm9,xmm9,xmm5 3476 3477 vpxor xmm7,xmm7,xmm1 3478 vpxor xmm7,xmm7,xmm2 3479 3480 vpaddd xmm13,xmm13,xmm5 3481 vpaddd xmm13,xmm13,xmm7 3482 vmovd xmm5,DWORD[12+r8] 3483 vmovd xmm0,DWORD[12+r9] 3484 vpinsrd xmm5,xmm5,DWORD[12+r10],1 3485 vpinsrd xmm0,xmm0,DWORD[12+r11],1 3486 vpunpckldq xmm5,xmm5,xmm0 3487 vpshufb xmm5,xmm5,xmm6 3488 vpsrld xmm7,xmm9,6 3489 vpslld xmm2,xmm9,26 3490 vmovdqu XMMWORD[(48-128)+rax],xmm5 3491 vpaddd xmm5,xmm5,xmm12 3492 3493 vpsrld xmm1,xmm9,11 3494 vpxor xmm7,xmm7,xmm2 3495 vpslld xmm2,xmm9,21 3496 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp] 3497 vpxor xmm7,xmm7,xmm1 3498 3499 vpsrld xmm1,xmm9,25 3500 vpxor xmm7,xmm7,xmm2 3501 3502 vpslld xmm2,xmm9,7 3503 vpandn xmm0,xmm9,xmm11 3504 vpand xmm4,xmm9,xmm10 3505 3506 vpxor xmm7,xmm7,xmm1 3507 3508 vpsrld xmm12,xmm13,2 3509 vpxor xmm7,xmm7,xmm2 3510 3511 vpslld xmm1,xmm13,30 3512 vpxor xmm0,xmm0,xmm4 3513 vpxor xmm4,xmm14,xmm13 3514 3515 vpxor xmm12,xmm12,xmm1 3516 vpaddd xmm5,xmm5,xmm7 3517 3518 vpsrld xmm1,xmm13,13 3519 3520 vpslld xmm2,xmm13,19 3521 vpaddd xmm5,xmm5,xmm0 3522 vpand xmm3,xmm3,xmm4 3523 3524 vpxor xmm7,xmm12,xmm1 3525 3526 vpsrld xmm1,xmm13,22 3527 vpxor xmm7,xmm7,xmm2 3528 3529 vpslld xmm2,xmm13,10 3530 vpxor xmm12,xmm14,xmm3 3531 vpaddd xmm8,xmm8,xmm5 3532 3533 vpxor xmm7,xmm7,xmm1 3534 vpxor xmm7,xmm7,xmm2 3535 3536 vpaddd xmm12,xmm12,xmm5 3537 vpaddd xmm12,xmm12,xmm7 3538 vmovd xmm5,DWORD[16+r8] 3539 vmovd xmm0,DWORD[16+r9] 3540 vpinsrd xmm5,xmm5,DWORD[16+r10],1 3541 vpinsrd xmm0,xmm0,DWORD[16+r11],1 3542 vpunpckldq xmm5,xmm5,xmm0 3543 vpshufb xmm5,xmm5,xmm6 3544 vpsrld xmm7,xmm8,6 3545 vpslld xmm2,xmm8,26 3546 vmovdqu XMMWORD[(64-128)+rax],xmm5 3547 vpaddd xmm5,xmm5,xmm11 3548 3549 vpsrld xmm1,xmm8,11 3550 vpxor xmm7,xmm7,xmm2 3551 vpslld xmm2,xmm8,21 3552 vpaddd xmm5,xmm5,XMMWORD[rbp] 3553 vpxor xmm7,xmm7,xmm1 3554 3555 vpsrld xmm1,xmm8,25 3556 vpxor xmm7,xmm7,xmm2 3557 3558 vpslld xmm2,xmm8,7 3559 vpandn xmm0,xmm8,xmm10 3560 vpand xmm3,xmm8,xmm9 3561 3562 vpxor xmm7,xmm7,xmm1 3563 3564 vpsrld xmm11,xmm12,2 3565 vpxor xmm7,xmm7,xmm2 3566 3567 vpslld xmm1,xmm12,30 3568 vpxor xmm0,xmm0,xmm3 3569 vpxor xmm3,xmm13,xmm12 3570 3571 vpxor xmm11,xmm11,xmm1 3572 vpaddd xmm5,xmm5,xmm7 3573 3574 vpsrld xmm1,xmm12,13 3575 3576 vpslld xmm2,xmm12,19 3577 vpaddd xmm5,xmm5,xmm0 3578 vpand xmm4,xmm4,xmm3 3579 3580 vpxor xmm7,xmm11,xmm1 3581 3582 vpsrld xmm1,xmm12,22 3583 vpxor xmm7,xmm7,xmm2 3584 3585 vpslld xmm2,xmm12,10 3586 vpxor xmm11,xmm13,xmm4 3587 vpaddd xmm15,xmm15,xmm5 3588 3589 vpxor xmm7,xmm7,xmm1 3590 vpxor xmm7,xmm7,xmm2 3591 3592 vpaddd xmm11,xmm11,xmm5 3593 vpaddd xmm11,xmm11,xmm7 3594 vmovd xmm5,DWORD[20+r8] 3595 vmovd xmm0,DWORD[20+r9] 3596 vpinsrd xmm5,xmm5,DWORD[20+r10],1 3597 vpinsrd xmm0,xmm0,DWORD[20+r11],1 3598 vpunpckldq xmm5,xmm5,xmm0 3599 vpshufb xmm5,xmm5,xmm6 3600 vpsrld xmm7,xmm15,6 3601 vpslld xmm2,xmm15,26 3602 vmovdqu XMMWORD[(80-128)+rax],xmm5 3603 vpaddd xmm5,xmm5,xmm10 3604 3605 vpsrld xmm1,xmm15,11 3606 vpxor xmm7,xmm7,xmm2 3607 vpslld xmm2,xmm15,21 3608 vpaddd xmm5,xmm5,XMMWORD[32+rbp] 3609 vpxor xmm7,xmm7,xmm1 3610 3611 vpsrld xmm1,xmm15,25 3612 vpxor xmm7,xmm7,xmm2 3613 3614 vpslld xmm2,xmm15,7 3615 vpandn xmm0,xmm15,xmm9 3616 vpand xmm4,xmm15,xmm8 3617 3618 vpxor xmm7,xmm7,xmm1 3619 3620 vpsrld xmm10,xmm11,2 3621 vpxor xmm7,xmm7,xmm2 3622 3623 vpslld xmm1,xmm11,30 3624 vpxor xmm0,xmm0,xmm4 3625 vpxor xmm4,xmm12,xmm11 3626 3627 vpxor xmm10,xmm10,xmm1 3628 vpaddd xmm5,xmm5,xmm7 3629 3630 vpsrld xmm1,xmm11,13 3631 3632 vpslld xmm2,xmm11,19 3633 vpaddd xmm5,xmm5,xmm0 3634 vpand xmm3,xmm3,xmm4 3635 3636 vpxor xmm7,xmm10,xmm1 3637 3638 vpsrld xmm1,xmm11,22 3639 vpxor xmm7,xmm7,xmm2 3640 3641 vpslld xmm2,xmm11,10 3642 vpxor xmm10,xmm12,xmm3 3643 vpaddd xmm14,xmm14,xmm5 3644 3645 vpxor xmm7,xmm7,xmm1 3646 vpxor xmm7,xmm7,xmm2 3647 3648 vpaddd xmm10,xmm10,xmm5 3649 vpaddd xmm10,xmm10,xmm7 3650 vmovd xmm5,DWORD[24+r8] 3651 vmovd xmm0,DWORD[24+r9] 3652 vpinsrd xmm5,xmm5,DWORD[24+r10],1 3653 vpinsrd xmm0,xmm0,DWORD[24+r11],1 3654 vpunpckldq xmm5,xmm5,xmm0 3655 vpshufb xmm5,xmm5,xmm6 3656 vpsrld xmm7,xmm14,6 3657 vpslld xmm2,xmm14,26 3658 vmovdqu XMMWORD[(96-128)+rax],xmm5 3659 vpaddd xmm5,xmm5,xmm9 3660 3661 vpsrld xmm1,xmm14,11 3662 vpxor xmm7,xmm7,xmm2 3663 vpslld xmm2,xmm14,21 3664 vpaddd xmm5,xmm5,XMMWORD[64+rbp] 3665 vpxor xmm7,xmm7,xmm1 3666 3667 vpsrld xmm1,xmm14,25 3668 vpxor xmm7,xmm7,xmm2 3669 3670 vpslld xmm2,xmm14,7 3671 vpandn xmm0,xmm14,xmm8 3672 vpand xmm3,xmm14,xmm15 3673 3674 vpxor xmm7,xmm7,xmm1 3675 3676 vpsrld xmm9,xmm10,2 3677 vpxor xmm7,xmm7,xmm2 3678 3679 vpslld xmm1,xmm10,30 3680 vpxor xmm0,xmm0,xmm3 3681 vpxor xmm3,xmm11,xmm10 3682 3683 vpxor xmm9,xmm9,xmm1 3684 vpaddd xmm5,xmm5,xmm7 3685 3686 vpsrld xmm1,xmm10,13 3687 3688 vpslld xmm2,xmm10,19 3689 vpaddd xmm5,xmm5,xmm0 3690 vpand xmm4,xmm4,xmm3 3691 3692 vpxor xmm7,xmm9,xmm1 3693 3694 vpsrld xmm1,xmm10,22 3695 vpxor xmm7,xmm7,xmm2 3696 3697 vpslld xmm2,xmm10,10 3698 vpxor xmm9,xmm11,xmm4 3699 vpaddd xmm13,xmm13,xmm5 3700 3701 vpxor xmm7,xmm7,xmm1 3702 vpxor xmm7,xmm7,xmm2 3703 3704 vpaddd xmm9,xmm9,xmm5 3705 vpaddd xmm9,xmm9,xmm7 3706 vmovd xmm5,DWORD[28+r8] 3707 vmovd xmm0,DWORD[28+r9] 3708 vpinsrd xmm5,xmm5,DWORD[28+r10],1 3709 vpinsrd xmm0,xmm0,DWORD[28+r11],1 3710 vpunpckldq xmm5,xmm5,xmm0 3711 vpshufb xmm5,xmm5,xmm6 3712 vpsrld xmm7,xmm13,6 3713 vpslld xmm2,xmm13,26 3714 vmovdqu XMMWORD[(112-128)+rax],xmm5 3715 vpaddd xmm5,xmm5,xmm8 3716 3717 vpsrld xmm1,xmm13,11 3718 vpxor xmm7,xmm7,xmm2 3719 vpslld xmm2,xmm13,21 3720 vpaddd xmm5,xmm5,XMMWORD[96+rbp] 3721 vpxor xmm7,xmm7,xmm1 3722 3723 vpsrld xmm1,xmm13,25 3724 vpxor xmm7,xmm7,xmm2 3725 3726 vpslld xmm2,xmm13,7 3727 vpandn xmm0,xmm13,xmm15 3728 vpand xmm4,xmm13,xmm14 3729 3730 vpxor xmm7,xmm7,xmm1 3731 3732 vpsrld xmm8,xmm9,2 3733 vpxor xmm7,xmm7,xmm2 3734 3735 vpslld xmm1,xmm9,30 3736 vpxor xmm0,xmm0,xmm4 3737 vpxor xmm4,xmm10,xmm9 3738 3739 vpxor xmm8,xmm8,xmm1 3740 vpaddd xmm5,xmm5,xmm7 3741 3742 vpsrld xmm1,xmm9,13 3743 3744 vpslld xmm2,xmm9,19 3745 vpaddd xmm5,xmm5,xmm0 3746 vpand xmm3,xmm3,xmm4 3747 3748 vpxor xmm7,xmm8,xmm1 3749 3750 vpsrld xmm1,xmm9,22 3751 vpxor xmm7,xmm7,xmm2 3752 3753 vpslld xmm2,xmm9,10 3754 vpxor xmm8,xmm10,xmm3 3755 vpaddd xmm12,xmm12,xmm5 3756 3757 vpxor xmm7,xmm7,xmm1 3758 vpxor xmm7,xmm7,xmm2 3759 3760 vpaddd xmm8,xmm8,xmm5 3761 vpaddd xmm8,xmm8,xmm7 3762 add rbp,256 3763 vmovd xmm5,DWORD[32+r8] 3764 vmovd xmm0,DWORD[32+r9] 3765 vpinsrd xmm5,xmm5,DWORD[32+r10],1 3766 vpinsrd xmm0,xmm0,DWORD[32+r11],1 3767 vpunpckldq xmm5,xmm5,xmm0 3768 vpshufb xmm5,xmm5,xmm6 3769 vpsrld xmm7,xmm12,6 3770 vpslld xmm2,xmm12,26 3771 vmovdqu XMMWORD[(128-128)+rax],xmm5 3772 vpaddd xmm5,xmm5,xmm15 3773 3774 vpsrld xmm1,xmm12,11 3775 vpxor xmm7,xmm7,xmm2 3776 vpslld xmm2,xmm12,21 3777 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] 3778 vpxor xmm7,xmm7,xmm1 3779 3780 vpsrld xmm1,xmm12,25 3781 vpxor xmm7,xmm7,xmm2 3782 3783 vpslld xmm2,xmm12,7 3784 vpandn xmm0,xmm12,xmm14 3785 vpand xmm3,xmm12,xmm13 3786 3787 vpxor xmm7,xmm7,xmm1 3788 3789 vpsrld xmm15,xmm8,2 3790 vpxor xmm7,xmm7,xmm2 3791 3792 vpslld xmm1,xmm8,30 3793 vpxor xmm0,xmm0,xmm3 3794 vpxor xmm3,xmm9,xmm8 3795 3796 vpxor xmm15,xmm15,xmm1 3797 vpaddd xmm5,xmm5,xmm7 3798 3799 vpsrld xmm1,xmm8,13 3800 3801 vpslld xmm2,xmm8,19 3802 vpaddd xmm5,xmm5,xmm0 3803 vpand xmm4,xmm4,xmm3 3804 3805 vpxor xmm7,xmm15,xmm1 3806 3807 vpsrld xmm1,xmm8,22 3808 vpxor xmm7,xmm7,xmm2 3809 3810 vpslld xmm2,xmm8,10 3811 vpxor xmm15,xmm9,xmm4 3812 vpaddd xmm11,xmm11,xmm5 3813 3814 vpxor xmm7,xmm7,xmm1 3815 vpxor xmm7,xmm7,xmm2 3816 3817 vpaddd xmm15,xmm15,xmm5 3818 vpaddd xmm15,xmm15,xmm7 3819 vmovd xmm5,DWORD[36+r8] 3820 vmovd xmm0,DWORD[36+r9] 3821 vpinsrd xmm5,xmm5,DWORD[36+r10],1 3822 vpinsrd xmm0,xmm0,DWORD[36+r11],1 3823 vpunpckldq xmm5,xmm5,xmm0 3824 vpshufb xmm5,xmm5,xmm6 3825 vpsrld xmm7,xmm11,6 3826 vpslld xmm2,xmm11,26 3827 vmovdqu XMMWORD[(144-128)+rax],xmm5 3828 vpaddd xmm5,xmm5,xmm14 3829 3830 vpsrld xmm1,xmm11,11 3831 vpxor xmm7,xmm7,xmm2 3832 vpslld xmm2,xmm11,21 3833 vpaddd xmm5,xmm5,XMMWORD[((-96))+rbp] 3834 vpxor xmm7,xmm7,xmm1 3835 3836 vpsrld xmm1,xmm11,25 3837 vpxor xmm7,xmm7,xmm2 3838 3839 vpslld xmm2,xmm11,7 3840 vpandn xmm0,xmm11,xmm13 3841 vpand xmm4,xmm11,xmm12 3842 3843 vpxor xmm7,xmm7,xmm1 3844 3845 vpsrld xmm14,xmm15,2 3846 vpxor xmm7,xmm7,xmm2 3847 3848 vpslld xmm1,xmm15,30 3849 vpxor xmm0,xmm0,xmm4 3850 vpxor xmm4,xmm8,xmm15 3851 3852 vpxor xmm14,xmm14,xmm1 3853 vpaddd xmm5,xmm5,xmm7 3854 3855 vpsrld xmm1,xmm15,13 3856 3857 vpslld xmm2,xmm15,19 3858 vpaddd xmm5,xmm5,xmm0 3859 vpand xmm3,xmm3,xmm4 3860 3861 vpxor xmm7,xmm14,xmm1 3862 3863 vpsrld xmm1,xmm15,22 3864 vpxor xmm7,xmm7,xmm2 3865 3866 vpslld xmm2,xmm15,10 3867 vpxor xmm14,xmm8,xmm3 3868 vpaddd xmm10,xmm10,xmm5 3869 3870 vpxor xmm7,xmm7,xmm1 3871 vpxor xmm7,xmm7,xmm2 3872 3873 vpaddd xmm14,xmm14,xmm5 3874 vpaddd xmm14,xmm14,xmm7 3875 vmovd xmm5,DWORD[40+r8] 3876 vmovd xmm0,DWORD[40+r9] 3877 vpinsrd xmm5,xmm5,DWORD[40+r10],1 3878 vpinsrd xmm0,xmm0,DWORD[40+r11],1 3879 vpunpckldq xmm5,xmm5,xmm0 3880 vpshufb xmm5,xmm5,xmm6 3881 vpsrld xmm7,xmm10,6 3882 vpslld xmm2,xmm10,26 3883 vmovdqu XMMWORD[(160-128)+rax],xmm5 3884 vpaddd xmm5,xmm5,xmm13 3885 3886 vpsrld xmm1,xmm10,11 3887 vpxor xmm7,xmm7,xmm2 3888 vpslld xmm2,xmm10,21 3889 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] 3890 vpxor xmm7,xmm7,xmm1 3891 3892 vpsrld xmm1,xmm10,25 3893 vpxor xmm7,xmm7,xmm2 3894 3895 vpslld xmm2,xmm10,7 3896 vpandn xmm0,xmm10,xmm12 3897 vpand xmm3,xmm10,xmm11 3898 3899 vpxor xmm7,xmm7,xmm1 3900 3901 vpsrld xmm13,xmm14,2 3902 vpxor xmm7,xmm7,xmm2 3903 3904 vpslld xmm1,xmm14,30 3905 vpxor xmm0,xmm0,xmm3 3906 vpxor xmm3,xmm15,xmm14 3907 3908 vpxor xmm13,xmm13,xmm1 3909 vpaddd xmm5,xmm5,xmm7 3910 3911 vpsrld xmm1,xmm14,13 3912 3913 vpslld xmm2,xmm14,19 3914 vpaddd xmm5,xmm5,xmm0 3915 vpand xmm4,xmm4,xmm3 3916 3917 vpxor xmm7,xmm13,xmm1 3918 3919 vpsrld xmm1,xmm14,22 3920 vpxor xmm7,xmm7,xmm2 3921 3922 vpslld xmm2,xmm14,10 3923 vpxor xmm13,xmm15,xmm4 3924 vpaddd xmm9,xmm9,xmm5 3925 3926 vpxor xmm7,xmm7,xmm1 3927 vpxor xmm7,xmm7,xmm2 3928 3929 vpaddd xmm13,xmm13,xmm5 3930 vpaddd xmm13,xmm13,xmm7 3931 vmovd xmm5,DWORD[44+r8] 3932 vmovd xmm0,DWORD[44+r9] 3933 vpinsrd xmm5,xmm5,DWORD[44+r10],1 3934 vpinsrd xmm0,xmm0,DWORD[44+r11],1 3935 vpunpckldq xmm5,xmm5,xmm0 3936 vpshufb xmm5,xmm5,xmm6 3937 vpsrld xmm7,xmm9,6 3938 vpslld xmm2,xmm9,26 3939 vmovdqu XMMWORD[(176-128)+rax],xmm5 3940 vpaddd xmm5,xmm5,xmm12 3941 3942 vpsrld xmm1,xmm9,11 3943 vpxor xmm7,xmm7,xmm2 3944 vpslld xmm2,xmm9,21 3945 vpaddd xmm5,xmm5,XMMWORD[((-32))+rbp] 3946 vpxor xmm7,xmm7,xmm1 3947 3948 vpsrld xmm1,xmm9,25 3949 vpxor xmm7,xmm7,xmm2 3950 3951 vpslld xmm2,xmm9,7 3952 vpandn xmm0,xmm9,xmm11 3953 vpand xmm4,xmm9,xmm10 3954 3955 vpxor xmm7,xmm7,xmm1 3956 3957 vpsrld xmm12,xmm13,2 3958 vpxor xmm7,xmm7,xmm2 3959 3960 vpslld xmm1,xmm13,30 3961 vpxor xmm0,xmm0,xmm4 3962 vpxor xmm4,xmm14,xmm13 3963 3964 vpxor xmm12,xmm12,xmm1 3965 vpaddd xmm5,xmm5,xmm7 3966 3967 vpsrld xmm1,xmm13,13 3968 3969 vpslld xmm2,xmm13,19 3970 vpaddd xmm5,xmm5,xmm0 3971 vpand xmm3,xmm3,xmm4 3972 3973 vpxor xmm7,xmm12,xmm1 3974 3975 vpsrld xmm1,xmm13,22 3976 vpxor xmm7,xmm7,xmm2 3977 3978 vpslld xmm2,xmm13,10 3979 vpxor xmm12,xmm14,xmm3 3980 vpaddd xmm8,xmm8,xmm5 3981 3982 vpxor xmm7,xmm7,xmm1 3983 vpxor xmm7,xmm7,xmm2 3984 3985 vpaddd xmm12,xmm12,xmm5 3986 vpaddd xmm12,xmm12,xmm7 3987 vmovd xmm5,DWORD[48+r8] 3988 vmovd xmm0,DWORD[48+r9] 3989 vpinsrd xmm5,xmm5,DWORD[48+r10],1 3990 vpinsrd xmm0,xmm0,DWORD[48+r11],1 3991 vpunpckldq xmm5,xmm5,xmm0 3992 vpshufb xmm5,xmm5,xmm6 3993 vpsrld xmm7,xmm8,6 3994 vpslld xmm2,xmm8,26 3995 vmovdqu XMMWORD[(192-128)+rax],xmm5 3996 vpaddd xmm5,xmm5,xmm11 3997 3998 vpsrld xmm1,xmm8,11 3999 vpxor xmm7,xmm7,xmm2 4000 vpslld xmm2,xmm8,21 4001 vpaddd xmm5,xmm5,XMMWORD[rbp] 4002 vpxor xmm7,xmm7,xmm1 4003 4004 vpsrld xmm1,xmm8,25 4005 vpxor xmm7,xmm7,xmm2 4006 4007 vpslld xmm2,xmm8,7 4008 vpandn xmm0,xmm8,xmm10 4009 vpand xmm3,xmm8,xmm9 4010 4011 vpxor xmm7,xmm7,xmm1 4012 4013 vpsrld xmm11,xmm12,2 4014 vpxor xmm7,xmm7,xmm2 4015 4016 vpslld xmm1,xmm12,30 4017 vpxor xmm0,xmm0,xmm3 4018 vpxor xmm3,xmm13,xmm12 4019 4020 vpxor xmm11,xmm11,xmm1 4021 vpaddd xmm5,xmm5,xmm7 4022 4023 vpsrld xmm1,xmm12,13 4024 4025 vpslld xmm2,xmm12,19 4026 vpaddd xmm5,xmm5,xmm0 4027 vpand xmm4,xmm4,xmm3 4028 4029 vpxor xmm7,xmm11,xmm1 4030 4031 vpsrld xmm1,xmm12,22 4032 vpxor xmm7,xmm7,xmm2 4033 4034 vpslld xmm2,xmm12,10 4035 vpxor xmm11,xmm13,xmm4 4036 vpaddd xmm15,xmm15,xmm5 4037 4038 vpxor xmm7,xmm7,xmm1 4039 vpxor xmm7,xmm7,xmm2 4040 4041 vpaddd xmm11,xmm11,xmm5 4042 vpaddd xmm11,xmm11,xmm7 4043 vmovd xmm5,DWORD[52+r8] 4044 vmovd xmm0,DWORD[52+r9] 4045 vpinsrd xmm5,xmm5,DWORD[52+r10],1 4046 vpinsrd xmm0,xmm0,DWORD[52+r11],1 4047 vpunpckldq xmm5,xmm5,xmm0 4048 vpshufb xmm5,xmm5,xmm6 4049 vpsrld xmm7,xmm15,6 4050 vpslld xmm2,xmm15,26 4051 vmovdqu XMMWORD[(208-128)+rax],xmm5 4052 vpaddd xmm5,xmm5,xmm10 4053 4054 vpsrld xmm1,xmm15,11 4055 vpxor xmm7,xmm7,xmm2 4056 vpslld xmm2,xmm15,21 4057 vpaddd xmm5,xmm5,XMMWORD[32+rbp] 4058 vpxor xmm7,xmm7,xmm1 4059 4060 vpsrld xmm1,xmm15,25 4061 vpxor xmm7,xmm7,xmm2 4062 4063 vpslld xmm2,xmm15,7 4064 vpandn xmm0,xmm15,xmm9 4065 vpand xmm4,xmm15,xmm8 4066 4067 vpxor xmm7,xmm7,xmm1 4068 4069 vpsrld xmm10,xmm11,2 4070 vpxor xmm7,xmm7,xmm2 4071 4072 vpslld xmm1,xmm11,30 4073 vpxor xmm0,xmm0,xmm4 4074 vpxor xmm4,xmm12,xmm11 4075 4076 vpxor xmm10,xmm10,xmm1 4077 vpaddd xmm5,xmm5,xmm7 4078 4079 vpsrld xmm1,xmm11,13 4080 4081 vpslld xmm2,xmm11,19 4082 vpaddd xmm5,xmm5,xmm0 4083 vpand xmm3,xmm3,xmm4 4084 4085 vpxor xmm7,xmm10,xmm1 4086 4087 vpsrld xmm1,xmm11,22 4088 vpxor xmm7,xmm7,xmm2 4089 4090 vpslld xmm2,xmm11,10 4091 vpxor xmm10,xmm12,xmm3 4092 vpaddd xmm14,xmm14,xmm5 4093 4094 vpxor xmm7,xmm7,xmm1 4095 vpxor xmm7,xmm7,xmm2 4096 4097 vpaddd xmm10,xmm10,xmm5 4098 vpaddd xmm10,xmm10,xmm7 4099 vmovd xmm5,DWORD[56+r8] 4100 vmovd xmm0,DWORD[56+r9] 4101 vpinsrd xmm5,xmm5,DWORD[56+r10],1 4102 vpinsrd xmm0,xmm0,DWORD[56+r11],1 4103 vpunpckldq xmm5,xmm5,xmm0 4104 vpshufb xmm5,xmm5,xmm6 4105 vpsrld xmm7,xmm14,6 4106 vpslld xmm2,xmm14,26 4107 vmovdqu XMMWORD[(224-128)+rax],xmm5 4108 vpaddd xmm5,xmm5,xmm9 4109 4110 vpsrld xmm1,xmm14,11 4111 vpxor xmm7,xmm7,xmm2 4112 vpslld xmm2,xmm14,21 4113 vpaddd xmm5,xmm5,XMMWORD[64+rbp] 4114 vpxor xmm7,xmm7,xmm1 4115 4116 vpsrld xmm1,xmm14,25 4117 vpxor xmm7,xmm7,xmm2 4118 4119 vpslld xmm2,xmm14,7 4120 vpandn xmm0,xmm14,xmm8 4121 vpand xmm3,xmm14,xmm15 4122 4123 vpxor xmm7,xmm7,xmm1 4124 4125 vpsrld xmm9,xmm10,2 4126 vpxor xmm7,xmm7,xmm2 4127 4128 vpslld xmm1,xmm10,30 4129 vpxor xmm0,xmm0,xmm3 4130 vpxor xmm3,xmm11,xmm10 4131 4132 vpxor xmm9,xmm9,xmm1 4133 vpaddd xmm5,xmm5,xmm7 4134 4135 vpsrld xmm1,xmm10,13 4136 4137 vpslld xmm2,xmm10,19 4138 vpaddd xmm5,xmm5,xmm0 4139 vpand xmm4,xmm4,xmm3 4140 4141 vpxor xmm7,xmm9,xmm1 4142 4143 vpsrld xmm1,xmm10,22 4144 vpxor xmm7,xmm7,xmm2 4145 4146 vpslld xmm2,xmm10,10 4147 vpxor xmm9,xmm11,xmm4 4148 vpaddd xmm13,xmm13,xmm5 4149 4150 vpxor xmm7,xmm7,xmm1 4151 vpxor xmm7,xmm7,xmm2 4152 4153 vpaddd xmm9,xmm9,xmm5 4154 vpaddd xmm9,xmm9,xmm7 4155 vmovd xmm5,DWORD[60+r8] 4156 lea r8,[64+r8] 4157 vmovd xmm0,DWORD[60+r9] 4158 lea r9,[64+r9] 4159 vpinsrd xmm5,xmm5,DWORD[60+r10],1 4160 lea r10,[64+r10] 4161 vpinsrd xmm0,xmm0,DWORD[60+r11],1 4162 lea r11,[64+r11] 4163 vpunpckldq xmm5,xmm5,xmm0 4164 vpshufb xmm5,xmm5,xmm6 4165 vpsrld xmm7,xmm13,6 4166 vpslld xmm2,xmm13,26 4167 vmovdqu XMMWORD[(240-128)+rax],xmm5 4168 vpaddd xmm5,xmm5,xmm8 4169 4170 vpsrld xmm1,xmm13,11 4171 vpxor xmm7,xmm7,xmm2 4172 vpslld xmm2,xmm13,21 4173 vpaddd xmm5,xmm5,XMMWORD[96+rbp] 4174 vpxor xmm7,xmm7,xmm1 4175 4176 vpsrld xmm1,xmm13,25 4177 vpxor xmm7,xmm7,xmm2 4178 prefetcht0 [63+r8] 4179 vpslld xmm2,xmm13,7 4180 vpandn xmm0,xmm13,xmm15 4181 vpand xmm4,xmm13,xmm14 4182 prefetcht0 [63+r9] 4183 vpxor xmm7,xmm7,xmm1 4184 4185 vpsrld xmm8,xmm9,2 4186 vpxor xmm7,xmm7,xmm2 4187 prefetcht0 [63+r10] 4188 vpslld xmm1,xmm9,30 4189 vpxor xmm0,xmm0,xmm4 4190 vpxor xmm4,xmm10,xmm9 4191 prefetcht0 [63+r11] 4192 vpxor xmm8,xmm8,xmm1 4193 vpaddd xmm5,xmm5,xmm7 4194 4195 vpsrld xmm1,xmm9,13 4196 4197 vpslld xmm2,xmm9,19 4198 vpaddd xmm5,xmm5,xmm0 4199 vpand xmm3,xmm3,xmm4 4200 4201 vpxor xmm7,xmm8,xmm1 4202 4203 vpsrld xmm1,xmm9,22 4204 vpxor xmm7,xmm7,xmm2 4205 4206 vpslld xmm2,xmm9,10 4207 vpxor xmm8,xmm10,xmm3 4208 vpaddd xmm12,xmm12,xmm5 4209 4210 vpxor xmm7,xmm7,xmm1 4211 vpxor xmm7,xmm7,xmm2 4212 4213 vpaddd xmm8,xmm8,xmm5 4214 vpaddd xmm8,xmm8,xmm7 4215 add rbp,256 4216 vmovdqu xmm5,XMMWORD[((0-128))+rax] 4217 mov ecx,3 4218 jmp NEAR $L$oop_16_xx_avx 4219 ALIGN 32 4220 $L$oop_16_xx_avx: 4221 vmovdqu xmm6,XMMWORD[((16-128))+rax] 4222 vpaddd xmm5,xmm5,XMMWORD[((144-128))+rax] 4223 4224 vpsrld xmm7,xmm6,3 4225 vpsrld xmm1,xmm6,7 4226 vpslld xmm2,xmm6,25 4227 vpxor xmm7,xmm7,xmm1 4228 vpsrld xmm1,xmm6,18 4229 vpxor xmm7,xmm7,xmm2 4230 vpslld xmm2,xmm6,14 4231 vmovdqu xmm0,XMMWORD[((224-128))+rax] 4232 vpsrld xmm3,xmm0,10 4233 4234 vpxor xmm7,xmm7,xmm1 4235 vpsrld xmm1,xmm0,17 4236 vpxor xmm7,xmm7,xmm2 4237 vpslld xmm2,xmm0,15 4238 vpaddd xmm5,xmm5,xmm7 4239 vpxor xmm7,xmm3,xmm1 4240 vpsrld xmm1,xmm0,19 4241 vpxor xmm7,xmm7,xmm2 4242 vpslld xmm2,xmm0,13 4243 vpxor xmm7,xmm7,xmm1 4244 vpxor xmm7,xmm7,xmm2 4245 vpaddd xmm5,xmm5,xmm7 4246 vpsrld xmm7,xmm12,6 4247 vpslld xmm2,xmm12,26 4248 vmovdqu XMMWORD[(0-128)+rax],xmm5 4249 vpaddd xmm5,xmm5,xmm15 4250 4251 vpsrld xmm1,xmm12,11 4252 vpxor xmm7,xmm7,xmm2 4253 vpslld xmm2,xmm12,21 4254 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] 4255 vpxor xmm7,xmm7,xmm1 4256 4257 vpsrld xmm1,xmm12,25 4258 vpxor xmm7,xmm7,xmm2 4259 4260 vpslld xmm2,xmm12,7 4261 vpandn xmm0,xmm12,xmm14 4262 vpand xmm3,xmm12,xmm13 4263 4264 vpxor xmm7,xmm7,xmm1 4265 4266 vpsrld xmm15,xmm8,2 4267 vpxor xmm7,xmm7,xmm2 4268 4269 vpslld xmm1,xmm8,30 4270 vpxor xmm0,xmm0,xmm3 4271 vpxor xmm3,xmm9,xmm8 4272 4273 vpxor xmm15,xmm15,xmm1 4274 vpaddd xmm5,xmm5,xmm7 4275 4276 vpsrld xmm1,xmm8,13 4277 4278 vpslld xmm2,xmm8,19 4279 vpaddd xmm5,xmm5,xmm0 4280 vpand xmm4,xmm4,xmm3 4281 4282 vpxor xmm7,xmm15,xmm1 4283 4284 vpsrld xmm1,xmm8,22 4285 vpxor xmm7,xmm7,xmm2 4286 4287 vpslld xmm2,xmm8,10 4288 vpxor xmm15,xmm9,xmm4 4289 vpaddd xmm11,xmm11,xmm5 4290 4291 vpxor xmm7,xmm7,xmm1 4292 vpxor xmm7,xmm7,xmm2 4293 4294 vpaddd xmm15,xmm15,xmm5 4295 vpaddd xmm15,xmm15,xmm7 4296 vmovdqu xmm5,XMMWORD[((32-128))+rax] 4297 vpaddd xmm6,xmm6,XMMWORD[((160-128))+rax] 4298 4299 vpsrld xmm7,xmm5,3 4300 vpsrld xmm1,xmm5,7 4301 vpslld xmm2,xmm5,25 4302 vpxor xmm7,xmm7,xmm1 4303 vpsrld xmm1,xmm5,18 4304 vpxor xmm7,xmm7,xmm2 4305 vpslld xmm2,xmm5,14 4306 vmovdqu xmm0,XMMWORD[((240-128))+rax] 4307 vpsrld xmm4,xmm0,10 4308 4309 vpxor xmm7,xmm7,xmm1 4310 vpsrld xmm1,xmm0,17 4311 vpxor xmm7,xmm7,xmm2 4312 vpslld xmm2,xmm0,15 4313 vpaddd xmm6,xmm6,xmm7 4314 vpxor xmm7,xmm4,xmm1 4315 vpsrld xmm1,xmm0,19 4316 vpxor xmm7,xmm7,xmm2 4317 vpslld xmm2,xmm0,13 4318 vpxor xmm7,xmm7,xmm1 4319 vpxor xmm7,xmm7,xmm2 4320 vpaddd xmm6,xmm6,xmm7 4321 vpsrld xmm7,xmm11,6 4322 vpslld xmm2,xmm11,26 4323 vmovdqu XMMWORD[(16-128)+rax],xmm6 4324 vpaddd xmm6,xmm6,xmm14 4325 4326 vpsrld xmm1,xmm11,11 4327 vpxor xmm7,xmm7,xmm2 4328 vpslld xmm2,xmm11,21 4329 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp] 4330 vpxor xmm7,xmm7,xmm1 4331 4332 vpsrld xmm1,xmm11,25 4333 vpxor xmm7,xmm7,xmm2 4334 4335 vpslld xmm2,xmm11,7 4336 vpandn xmm0,xmm11,xmm13 4337 vpand xmm4,xmm11,xmm12 4338 4339 vpxor xmm7,xmm7,xmm1 4340 4341 vpsrld xmm14,xmm15,2 4342 vpxor xmm7,xmm7,xmm2 4343 4344 vpslld xmm1,xmm15,30 4345 vpxor xmm0,xmm0,xmm4 4346 vpxor xmm4,xmm8,xmm15 4347 4348 vpxor xmm14,xmm14,xmm1 4349 vpaddd xmm6,xmm6,xmm7 4350 4351 vpsrld xmm1,xmm15,13 4352 4353 vpslld xmm2,xmm15,19 4354 vpaddd xmm6,xmm6,xmm0 4355 vpand xmm3,xmm3,xmm4 4356 4357 vpxor xmm7,xmm14,xmm1 4358 4359 vpsrld xmm1,xmm15,22 4360 vpxor xmm7,xmm7,xmm2 4361 4362 vpslld xmm2,xmm15,10 4363 vpxor xmm14,xmm8,xmm3 4364 vpaddd xmm10,xmm10,xmm6 4365 4366 vpxor xmm7,xmm7,xmm1 4367 vpxor xmm7,xmm7,xmm2 4368 4369 vpaddd xmm14,xmm14,xmm6 4370 vpaddd xmm14,xmm14,xmm7 4371 vmovdqu xmm6,XMMWORD[((48-128))+rax] 4372 vpaddd xmm5,xmm5,XMMWORD[((176-128))+rax] 4373 4374 vpsrld xmm7,xmm6,3 4375 vpsrld xmm1,xmm6,7 4376 vpslld xmm2,xmm6,25 4377 vpxor xmm7,xmm7,xmm1 4378 vpsrld xmm1,xmm6,18 4379 vpxor xmm7,xmm7,xmm2 4380 vpslld xmm2,xmm6,14 4381 vmovdqu xmm0,XMMWORD[((0-128))+rax] 4382 vpsrld xmm3,xmm0,10 4383 4384 vpxor xmm7,xmm7,xmm1 4385 vpsrld xmm1,xmm0,17 4386 vpxor xmm7,xmm7,xmm2 4387 vpslld xmm2,xmm0,15 4388 vpaddd xmm5,xmm5,xmm7 4389 vpxor xmm7,xmm3,xmm1 4390 vpsrld xmm1,xmm0,19 4391 vpxor xmm7,xmm7,xmm2 4392 vpslld xmm2,xmm0,13 4393 vpxor xmm7,xmm7,xmm1 4394 vpxor xmm7,xmm7,xmm2 4395 vpaddd xmm5,xmm5,xmm7 4396 vpsrld xmm7,xmm10,6 4397 vpslld xmm2,xmm10,26 4398 vmovdqu XMMWORD[(32-128)+rax],xmm5 4399 vpaddd xmm5,xmm5,xmm13 4400 4401 vpsrld xmm1,xmm10,11 4402 vpxor xmm7,xmm7,xmm2 4403 vpslld xmm2,xmm10,21 4404 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] 4405 vpxor xmm7,xmm7,xmm1 4406 4407 vpsrld xmm1,xmm10,25 4408 vpxor xmm7,xmm7,xmm2 4409 4410 vpslld xmm2,xmm10,7 4411 vpandn xmm0,xmm10,xmm12 4412 vpand xmm3,xmm10,xmm11 4413 4414 vpxor xmm7,xmm7,xmm1 4415 4416 vpsrld xmm13,xmm14,2 4417 vpxor xmm7,xmm7,xmm2 4418 4419 vpslld xmm1,xmm14,30 4420 vpxor xmm0,xmm0,xmm3 4421 vpxor xmm3,xmm15,xmm14 4422 4423 vpxor xmm13,xmm13,xmm1 4424 vpaddd xmm5,xmm5,xmm7 4425 4426 vpsrld xmm1,xmm14,13 4427 4428 vpslld xmm2,xmm14,19 4429 vpaddd xmm5,xmm5,xmm0 4430 vpand xmm4,xmm4,xmm3 4431 4432 vpxor xmm7,xmm13,xmm1 4433 4434 vpsrld xmm1,xmm14,22 4435 vpxor xmm7,xmm7,xmm2 4436 4437 vpslld xmm2,xmm14,10 4438 vpxor xmm13,xmm15,xmm4 4439 vpaddd xmm9,xmm9,xmm5 4440 4441 vpxor xmm7,xmm7,xmm1 4442 vpxor xmm7,xmm7,xmm2 4443 4444 vpaddd xmm13,xmm13,xmm5 4445 vpaddd xmm13,xmm13,xmm7 4446 vmovdqu xmm5,XMMWORD[((64-128))+rax] 4447 vpaddd xmm6,xmm6,XMMWORD[((192-128))+rax] 4448 4449 vpsrld xmm7,xmm5,3 4450 vpsrld xmm1,xmm5,7 4451 vpslld xmm2,xmm5,25 4452 vpxor xmm7,xmm7,xmm1 4453 vpsrld xmm1,xmm5,18 4454 vpxor xmm7,xmm7,xmm2 4455 vpslld xmm2,xmm5,14 4456 vmovdqu xmm0,XMMWORD[((16-128))+rax] 4457 vpsrld xmm4,xmm0,10 4458 4459 vpxor xmm7,xmm7,xmm1 4460 vpsrld xmm1,xmm0,17 4461 vpxor xmm7,xmm7,xmm2 4462 vpslld xmm2,xmm0,15 4463 vpaddd xmm6,xmm6,xmm7 4464 vpxor xmm7,xmm4,xmm1 4465 vpsrld xmm1,xmm0,19 4466 vpxor xmm7,xmm7,xmm2 4467 vpslld xmm2,xmm0,13 4468 vpxor xmm7,xmm7,xmm1 4469 vpxor xmm7,xmm7,xmm2 4470 vpaddd xmm6,xmm6,xmm7 4471 vpsrld xmm7,xmm9,6 4472 vpslld xmm2,xmm9,26 4473 vmovdqu XMMWORD[(48-128)+rax],xmm6 4474 vpaddd xmm6,xmm6,xmm12 4475 4476 vpsrld xmm1,xmm9,11 4477 vpxor xmm7,xmm7,xmm2 4478 vpslld xmm2,xmm9,21 4479 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp] 4480 vpxor xmm7,xmm7,xmm1 4481 4482 vpsrld xmm1,xmm9,25 4483 vpxor xmm7,xmm7,xmm2 4484 4485 vpslld xmm2,xmm9,7 4486 vpandn xmm0,xmm9,xmm11 4487 vpand xmm4,xmm9,xmm10 4488 4489 vpxor xmm7,xmm7,xmm1 4490 4491 vpsrld xmm12,xmm13,2 4492 vpxor xmm7,xmm7,xmm2 4493 4494 vpslld xmm1,xmm13,30 4495 vpxor xmm0,xmm0,xmm4 4496 vpxor xmm4,xmm14,xmm13 4497 4498 vpxor xmm12,xmm12,xmm1 4499 vpaddd xmm6,xmm6,xmm7 4500 4501 vpsrld xmm1,xmm13,13 4502 4503 vpslld xmm2,xmm13,19 4504 vpaddd xmm6,xmm6,xmm0 4505 vpand xmm3,xmm3,xmm4 4506 4507 vpxor xmm7,xmm12,xmm1 4508 4509 vpsrld xmm1,xmm13,22 4510 vpxor xmm7,xmm7,xmm2 4511 4512 vpslld xmm2,xmm13,10 4513 vpxor xmm12,xmm14,xmm3 4514 vpaddd xmm8,xmm8,xmm6 4515 4516 vpxor xmm7,xmm7,xmm1 4517 vpxor xmm7,xmm7,xmm2 4518 4519 vpaddd xmm12,xmm12,xmm6 4520 vpaddd xmm12,xmm12,xmm7 4521 vmovdqu xmm6,XMMWORD[((80-128))+rax] 4522 vpaddd xmm5,xmm5,XMMWORD[((208-128))+rax] 4523 4524 vpsrld xmm7,xmm6,3 4525 vpsrld xmm1,xmm6,7 4526 vpslld xmm2,xmm6,25 4527 vpxor xmm7,xmm7,xmm1 4528 vpsrld xmm1,xmm6,18 4529 vpxor xmm7,xmm7,xmm2 4530 vpslld xmm2,xmm6,14 4531 vmovdqu xmm0,XMMWORD[((32-128))+rax] 4532 vpsrld xmm3,xmm0,10 4533 4534 vpxor xmm7,xmm7,xmm1 4535 vpsrld xmm1,xmm0,17 4536 vpxor xmm7,xmm7,xmm2 4537 vpslld xmm2,xmm0,15 4538 vpaddd xmm5,xmm5,xmm7 4539 vpxor xmm7,xmm3,xmm1 4540 vpsrld xmm1,xmm0,19 4541 vpxor xmm7,xmm7,xmm2 4542 vpslld xmm2,xmm0,13 4543 vpxor xmm7,xmm7,xmm1 4544 vpxor xmm7,xmm7,xmm2 4545 vpaddd xmm5,xmm5,xmm7 4546 vpsrld xmm7,xmm8,6 4547 vpslld xmm2,xmm8,26 4548 vmovdqu XMMWORD[(64-128)+rax],xmm5 4549 vpaddd xmm5,xmm5,xmm11 4550 4551 vpsrld xmm1,xmm8,11 4552 vpxor xmm7,xmm7,xmm2 4553 vpslld xmm2,xmm8,21 4554 vpaddd xmm5,xmm5,XMMWORD[rbp] 4555 vpxor xmm7,xmm7,xmm1 4556 4557 vpsrld xmm1,xmm8,25 4558 vpxor xmm7,xmm7,xmm2 4559 4560 vpslld xmm2,xmm8,7 4561 vpandn xmm0,xmm8,xmm10 4562 vpand xmm3,xmm8,xmm9 4563 4564 vpxor xmm7,xmm7,xmm1 4565 4566 vpsrld xmm11,xmm12,2 4567 vpxor xmm7,xmm7,xmm2 4568 4569 vpslld xmm1,xmm12,30 4570 vpxor xmm0,xmm0,xmm3 4571 vpxor xmm3,xmm13,xmm12 4572 4573 vpxor xmm11,xmm11,xmm1 4574 vpaddd xmm5,xmm5,xmm7 4575 4576 vpsrld xmm1,xmm12,13 4577 4578 vpslld xmm2,xmm12,19 4579 vpaddd xmm5,xmm5,xmm0 4580 vpand xmm4,xmm4,xmm3 4581 4582 vpxor xmm7,xmm11,xmm1 4583 4584 vpsrld xmm1,xmm12,22 4585 vpxor xmm7,xmm7,xmm2 4586 4587 vpslld xmm2,xmm12,10 4588 vpxor xmm11,xmm13,xmm4 4589 vpaddd xmm15,xmm15,xmm5 4590 4591 vpxor xmm7,xmm7,xmm1 4592 vpxor xmm7,xmm7,xmm2 4593 4594 vpaddd xmm11,xmm11,xmm5 4595 vpaddd xmm11,xmm11,xmm7 4596 vmovdqu xmm5,XMMWORD[((96-128))+rax] 4597 vpaddd xmm6,xmm6,XMMWORD[((224-128))+rax] 4598 4599 vpsrld xmm7,xmm5,3 4600 vpsrld xmm1,xmm5,7 4601 vpslld xmm2,xmm5,25 4602 vpxor xmm7,xmm7,xmm1 4603 vpsrld xmm1,xmm5,18 4604 vpxor xmm7,xmm7,xmm2 4605 vpslld xmm2,xmm5,14 4606 vmovdqu xmm0,XMMWORD[((48-128))+rax] 4607 vpsrld xmm4,xmm0,10 4608 4609 vpxor xmm7,xmm7,xmm1 4610 vpsrld xmm1,xmm0,17 4611 vpxor xmm7,xmm7,xmm2 4612 vpslld xmm2,xmm0,15 4613 vpaddd xmm6,xmm6,xmm7 4614 vpxor xmm7,xmm4,xmm1 4615 vpsrld xmm1,xmm0,19 4616 vpxor xmm7,xmm7,xmm2 4617 vpslld xmm2,xmm0,13 4618 vpxor xmm7,xmm7,xmm1 4619 vpxor xmm7,xmm7,xmm2 4620 vpaddd xmm6,xmm6,xmm7 4621 vpsrld xmm7,xmm15,6 4622 vpslld xmm2,xmm15,26 4623 vmovdqu XMMWORD[(80-128)+rax],xmm6 4624 vpaddd xmm6,xmm6,xmm10 4625 4626 vpsrld xmm1,xmm15,11 4627 vpxor xmm7,xmm7,xmm2 4628 vpslld xmm2,xmm15,21 4629 vpaddd xmm6,xmm6,XMMWORD[32+rbp] 4630 vpxor xmm7,xmm7,xmm1 4631 4632 vpsrld xmm1,xmm15,25 4633 vpxor xmm7,xmm7,xmm2 4634 4635 vpslld xmm2,xmm15,7 4636 vpandn xmm0,xmm15,xmm9 4637 vpand xmm4,xmm15,xmm8 4638 4639 vpxor xmm7,xmm7,xmm1 4640 4641 vpsrld xmm10,xmm11,2 4642 vpxor xmm7,xmm7,xmm2 4643 4644 vpslld xmm1,xmm11,30 4645 vpxor xmm0,xmm0,xmm4 4646 vpxor xmm4,xmm12,xmm11 4647 4648 vpxor xmm10,xmm10,xmm1 4649 vpaddd xmm6,xmm6,xmm7 4650 4651 vpsrld xmm1,xmm11,13 4652 4653 vpslld xmm2,xmm11,19 4654 vpaddd xmm6,xmm6,xmm0 4655 vpand xmm3,xmm3,xmm4 4656 4657 vpxor xmm7,xmm10,xmm1 4658 4659 vpsrld xmm1,xmm11,22 4660 vpxor xmm7,xmm7,xmm2 4661 4662 vpslld xmm2,xmm11,10 4663 vpxor xmm10,xmm12,xmm3 4664 vpaddd xmm14,xmm14,xmm6 4665 4666 vpxor xmm7,xmm7,xmm1 4667 vpxor xmm7,xmm7,xmm2 4668 4669 vpaddd xmm10,xmm10,xmm6 4670 vpaddd xmm10,xmm10,xmm7 4671 vmovdqu xmm6,XMMWORD[((112-128))+rax] 4672 vpaddd xmm5,xmm5,XMMWORD[((240-128))+rax] 4673 4674 vpsrld xmm7,xmm6,3 4675 vpsrld xmm1,xmm6,7 4676 vpslld xmm2,xmm6,25 4677 vpxor xmm7,xmm7,xmm1 4678 vpsrld xmm1,xmm6,18 4679 vpxor xmm7,xmm7,xmm2 4680 vpslld xmm2,xmm6,14 4681 vmovdqu xmm0,XMMWORD[((64-128))+rax] 4682 vpsrld xmm3,xmm0,10 4683 4684 vpxor xmm7,xmm7,xmm1 4685 vpsrld xmm1,xmm0,17 4686 vpxor xmm7,xmm7,xmm2 4687 vpslld xmm2,xmm0,15 4688 vpaddd xmm5,xmm5,xmm7 4689 vpxor xmm7,xmm3,xmm1 4690 vpsrld xmm1,xmm0,19 4691 vpxor xmm7,xmm7,xmm2 4692 vpslld xmm2,xmm0,13 4693 vpxor xmm7,xmm7,xmm1 4694 vpxor xmm7,xmm7,xmm2 4695 vpaddd xmm5,xmm5,xmm7 4696 vpsrld xmm7,xmm14,6 4697 vpslld xmm2,xmm14,26 4698 vmovdqu XMMWORD[(96-128)+rax],xmm5 4699 vpaddd xmm5,xmm5,xmm9 4700 4701 vpsrld xmm1,xmm14,11 4702 vpxor xmm7,xmm7,xmm2 4703 vpslld xmm2,xmm14,21 4704 vpaddd xmm5,xmm5,XMMWORD[64+rbp] 4705 vpxor xmm7,xmm7,xmm1 4706 4707 vpsrld xmm1,xmm14,25 4708 vpxor xmm7,xmm7,xmm2 4709 4710 vpslld xmm2,xmm14,7 4711 vpandn xmm0,xmm14,xmm8 4712 vpand xmm3,xmm14,xmm15 4713 4714 vpxor xmm7,xmm7,xmm1 4715 4716 vpsrld xmm9,xmm10,2 4717 vpxor xmm7,xmm7,xmm2 4718 4719 vpslld xmm1,xmm10,30 4720 vpxor xmm0,xmm0,xmm3 4721 vpxor xmm3,xmm11,xmm10 4722 4723 vpxor xmm9,xmm9,xmm1 4724 vpaddd xmm5,xmm5,xmm7 4725 4726 vpsrld xmm1,xmm10,13 4727 4728 vpslld xmm2,xmm10,19 4729 vpaddd xmm5,xmm5,xmm0 4730 vpand xmm4,xmm4,xmm3 4731 4732 vpxor xmm7,xmm9,xmm1 4733 4734 vpsrld xmm1,xmm10,22 4735 vpxor xmm7,xmm7,xmm2 4736 4737 vpslld xmm2,xmm10,10 4738 vpxor xmm9,xmm11,xmm4 4739 vpaddd xmm13,xmm13,xmm5 4740 4741 vpxor xmm7,xmm7,xmm1 4742 vpxor xmm7,xmm7,xmm2 4743 4744 vpaddd xmm9,xmm9,xmm5 4745 vpaddd xmm9,xmm9,xmm7 4746 vmovdqu xmm5,XMMWORD[((128-128))+rax] 4747 vpaddd xmm6,xmm6,XMMWORD[((0-128))+rax] 4748 4749 vpsrld xmm7,xmm5,3 4750 vpsrld xmm1,xmm5,7 4751 vpslld xmm2,xmm5,25 4752 vpxor xmm7,xmm7,xmm1 4753 vpsrld xmm1,xmm5,18 4754 vpxor xmm7,xmm7,xmm2 4755 vpslld xmm2,xmm5,14 4756 vmovdqu xmm0,XMMWORD[((80-128))+rax] 4757 vpsrld xmm4,xmm0,10 4758 4759 vpxor xmm7,xmm7,xmm1 4760 vpsrld xmm1,xmm0,17 4761 vpxor xmm7,xmm7,xmm2 4762 vpslld xmm2,xmm0,15 4763 vpaddd xmm6,xmm6,xmm7 4764 vpxor xmm7,xmm4,xmm1 4765 vpsrld xmm1,xmm0,19 4766 vpxor xmm7,xmm7,xmm2 4767 vpslld xmm2,xmm0,13 4768 vpxor xmm7,xmm7,xmm1 4769 vpxor xmm7,xmm7,xmm2 4770 vpaddd xmm6,xmm6,xmm7 4771 vpsrld xmm7,xmm13,6 4772 vpslld xmm2,xmm13,26 4773 vmovdqu XMMWORD[(112-128)+rax],xmm6 4774 vpaddd xmm6,xmm6,xmm8 4775 4776 vpsrld xmm1,xmm13,11 4777 vpxor xmm7,xmm7,xmm2 4778 vpslld xmm2,xmm13,21 4779 vpaddd xmm6,xmm6,XMMWORD[96+rbp] 4780 vpxor xmm7,xmm7,xmm1 4781 4782 vpsrld xmm1,xmm13,25 4783 vpxor xmm7,xmm7,xmm2 4784 4785 vpslld xmm2,xmm13,7 4786 vpandn xmm0,xmm13,xmm15 4787 vpand xmm4,xmm13,xmm14 4788 4789 vpxor xmm7,xmm7,xmm1 4790 4791 vpsrld xmm8,xmm9,2 4792 vpxor xmm7,xmm7,xmm2 4793 4794 vpslld xmm1,xmm9,30 4795 vpxor xmm0,xmm0,xmm4 4796 vpxor xmm4,xmm10,xmm9 4797 4798 vpxor xmm8,xmm8,xmm1 4799 vpaddd xmm6,xmm6,xmm7 4800 4801 vpsrld xmm1,xmm9,13 4802 4803 vpslld xmm2,xmm9,19 4804 vpaddd xmm6,xmm6,xmm0 4805 vpand xmm3,xmm3,xmm4 4806 4807 vpxor xmm7,xmm8,xmm1 4808 4809 vpsrld xmm1,xmm9,22 4810 vpxor xmm7,xmm7,xmm2 4811 4812 vpslld xmm2,xmm9,10 4813 vpxor xmm8,xmm10,xmm3 4814 vpaddd xmm12,xmm12,xmm6 4815 4816 vpxor xmm7,xmm7,xmm1 4817 vpxor xmm7,xmm7,xmm2 4818 4819 vpaddd xmm8,xmm8,xmm6 4820 vpaddd xmm8,xmm8,xmm7 4821 add rbp,256 4822 vmovdqu xmm6,XMMWORD[((144-128))+rax] 4823 vpaddd xmm5,xmm5,XMMWORD[((16-128))+rax] 4824 4825 vpsrld xmm7,xmm6,3 4826 vpsrld xmm1,xmm6,7 4827 vpslld xmm2,xmm6,25 4828 vpxor xmm7,xmm7,xmm1 4829 vpsrld xmm1,xmm6,18 4830 vpxor xmm7,xmm7,xmm2 4831 vpslld xmm2,xmm6,14 4832 vmovdqu xmm0,XMMWORD[((96-128))+rax] 4833 vpsrld xmm3,xmm0,10 4834 4835 vpxor xmm7,xmm7,xmm1 4836 vpsrld xmm1,xmm0,17 4837 vpxor xmm7,xmm7,xmm2 4838 vpslld xmm2,xmm0,15 4839 vpaddd xmm5,xmm5,xmm7 4840 vpxor xmm7,xmm3,xmm1 4841 vpsrld xmm1,xmm0,19 4842 vpxor xmm7,xmm7,xmm2 4843 vpslld xmm2,xmm0,13 4844 vpxor xmm7,xmm7,xmm1 4845 vpxor xmm7,xmm7,xmm2 4846 vpaddd xmm5,xmm5,xmm7 4847 vpsrld xmm7,xmm12,6 4848 vpslld xmm2,xmm12,26 4849 vmovdqu XMMWORD[(128-128)+rax],xmm5 4850 vpaddd xmm5,xmm5,xmm15 4851 4852 vpsrld xmm1,xmm12,11 4853 vpxor xmm7,xmm7,xmm2 4854 vpslld xmm2,xmm12,21 4855 vpaddd xmm5,xmm5,XMMWORD[((-128))+rbp] 4856 vpxor xmm7,xmm7,xmm1 4857 4858 vpsrld xmm1,xmm12,25 4859 vpxor xmm7,xmm7,xmm2 4860 4861 vpslld xmm2,xmm12,7 4862 vpandn xmm0,xmm12,xmm14 4863 vpand xmm3,xmm12,xmm13 4864 4865 vpxor xmm7,xmm7,xmm1 4866 4867 vpsrld xmm15,xmm8,2 4868 vpxor xmm7,xmm7,xmm2 4869 4870 vpslld xmm1,xmm8,30 4871 vpxor xmm0,xmm0,xmm3 4872 vpxor xmm3,xmm9,xmm8 4873 4874 vpxor xmm15,xmm15,xmm1 4875 vpaddd xmm5,xmm5,xmm7 4876 4877 vpsrld xmm1,xmm8,13 4878 4879 vpslld xmm2,xmm8,19 4880 vpaddd xmm5,xmm5,xmm0 4881 vpand xmm4,xmm4,xmm3 4882 4883 vpxor xmm7,xmm15,xmm1 4884 4885 vpsrld xmm1,xmm8,22 4886 vpxor xmm7,xmm7,xmm2 4887 4888 vpslld xmm2,xmm8,10 4889 vpxor xmm15,xmm9,xmm4 4890 vpaddd xmm11,xmm11,xmm5 4891 4892 vpxor xmm7,xmm7,xmm1 4893 vpxor xmm7,xmm7,xmm2 4894 4895 vpaddd xmm15,xmm15,xmm5 4896 vpaddd xmm15,xmm15,xmm7 4897 vmovdqu xmm5,XMMWORD[((160-128))+rax] 4898 vpaddd xmm6,xmm6,XMMWORD[((32-128))+rax] 4899 4900 vpsrld xmm7,xmm5,3 4901 vpsrld xmm1,xmm5,7 4902 vpslld xmm2,xmm5,25 4903 vpxor xmm7,xmm7,xmm1 4904 vpsrld xmm1,xmm5,18 4905 vpxor xmm7,xmm7,xmm2 4906 vpslld xmm2,xmm5,14 4907 vmovdqu xmm0,XMMWORD[((112-128))+rax] 4908 vpsrld xmm4,xmm0,10 4909 4910 vpxor xmm7,xmm7,xmm1 4911 vpsrld xmm1,xmm0,17 4912 vpxor xmm7,xmm7,xmm2 4913 vpslld xmm2,xmm0,15 4914 vpaddd xmm6,xmm6,xmm7 4915 vpxor xmm7,xmm4,xmm1 4916 vpsrld xmm1,xmm0,19 4917 vpxor xmm7,xmm7,xmm2 4918 vpslld xmm2,xmm0,13 4919 vpxor xmm7,xmm7,xmm1 4920 vpxor xmm7,xmm7,xmm2 4921 vpaddd xmm6,xmm6,xmm7 4922 vpsrld xmm7,xmm11,6 4923 vpslld xmm2,xmm11,26 4924 vmovdqu XMMWORD[(144-128)+rax],xmm6 4925 vpaddd xmm6,xmm6,xmm14 4926 4927 vpsrld xmm1,xmm11,11 4928 vpxor xmm7,xmm7,xmm2 4929 vpslld xmm2,xmm11,21 4930 vpaddd xmm6,xmm6,XMMWORD[((-96))+rbp] 4931 vpxor xmm7,xmm7,xmm1 4932 4933 vpsrld xmm1,xmm11,25 4934 vpxor xmm7,xmm7,xmm2 4935 4936 vpslld xmm2,xmm11,7 4937 vpandn xmm0,xmm11,xmm13 4938 vpand xmm4,xmm11,xmm12 4939 4940 vpxor xmm7,xmm7,xmm1 4941 4942 vpsrld xmm14,xmm15,2 4943 vpxor xmm7,xmm7,xmm2 4944 4945 vpslld xmm1,xmm15,30 4946 vpxor xmm0,xmm0,xmm4 4947 vpxor xmm4,xmm8,xmm15 4948 4949 vpxor xmm14,xmm14,xmm1 4950 vpaddd xmm6,xmm6,xmm7 4951 4952 vpsrld xmm1,xmm15,13 4953 4954 vpslld xmm2,xmm15,19 4955 vpaddd xmm6,xmm6,xmm0 4956 vpand xmm3,xmm3,xmm4 4957 4958 vpxor xmm7,xmm14,xmm1 4959 4960 vpsrld xmm1,xmm15,22 4961 vpxor xmm7,xmm7,xmm2 4962 4963 vpslld xmm2,xmm15,10 4964 vpxor xmm14,xmm8,xmm3 4965 vpaddd xmm10,xmm10,xmm6 4966 4967 vpxor xmm7,xmm7,xmm1 4968 vpxor xmm7,xmm7,xmm2 4969 4970 vpaddd xmm14,xmm14,xmm6 4971 vpaddd xmm14,xmm14,xmm7 4972 vmovdqu xmm6,XMMWORD[((176-128))+rax] 4973 vpaddd xmm5,xmm5,XMMWORD[((48-128))+rax] 4974 4975 vpsrld xmm7,xmm6,3 4976 vpsrld xmm1,xmm6,7 4977 vpslld xmm2,xmm6,25 4978 vpxor xmm7,xmm7,xmm1 4979 vpsrld xmm1,xmm6,18 4980 vpxor xmm7,xmm7,xmm2 4981 vpslld xmm2,xmm6,14 4982 vmovdqu xmm0,XMMWORD[((128-128))+rax] 4983 vpsrld xmm3,xmm0,10 4984 4985 vpxor xmm7,xmm7,xmm1 4986 vpsrld xmm1,xmm0,17 4987 vpxor xmm7,xmm7,xmm2 4988 vpslld xmm2,xmm0,15 4989 vpaddd xmm5,xmm5,xmm7 4990 vpxor xmm7,xmm3,xmm1 4991 vpsrld xmm1,xmm0,19 4992 vpxor xmm7,xmm7,xmm2 4993 vpslld xmm2,xmm0,13 4994 vpxor xmm7,xmm7,xmm1 4995 vpxor xmm7,xmm7,xmm2 4996 vpaddd xmm5,xmm5,xmm7 4997 vpsrld xmm7,xmm10,6 4998 vpslld xmm2,xmm10,26 4999 vmovdqu XMMWORD[(160-128)+rax],xmm5 5000 vpaddd xmm5,xmm5,xmm13 5001 5002 vpsrld xmm1,xmm10,11 5003 vpxor xmm7,xmm7,xmm2 5004 vpslld xmm2,xmm10,21 5005 vpaddd xmm5,xmm5,XMMWORD[((-64))+rbp] 5006 vpxor xmm7,xmm7,xmm1 5007 5008 vpsrld xmm1,xmm10,25 5009 vpxor xmm7,xmm7,xmm2 5010 5011 vpslld xmm2,xmm10,7 5012 vpandn xmm0,xmm10,xmm12 5013 vpand xmm3,xmm10,xmm11 5014 5015 vpxor xmm7,xmm7,xmm1 5016 5017 vpsrld xmm13,xmm14,2 5018 vpxor xmm7,xmm7,xmm2 5019 5020 vpslld xmm1,xmm14,30 5021 vpxor xmm0,xmm0,xmm3 5022 vpxor xmm3,xmm15,xmm14 5023 5024 vpxor xmm13,xmm13,xmm1 5025 vpaddd xmm5,xmm5,xmm7 5026 5027 vpsrld xmm1,xmm14,13 5028 5029 vpslld xmm2,xmm14,19 5030 vpaddd xmm5,xmm5,xmm0 5031 vpand xmm4,xmm4,xmm3 5032 5033 vpxor xmm7,xmm13,xmm1 5034 5035 vpsrld xmm1,xmm14,22 5036 vpxor xmm7,xmm7,xmm2 5037 5038 vpslld xmm2,xmm14,10 5039 vpxor xmm13,xmm15,xmm4 5040 vpaddd xmm9,xmm9,xmm5 5041 5042 vpxor xmm7,xmm7,xmm1 5043 vpxor xmm7,xmm7,xmm2 5044 5045 vpaddd xmm13,xmm13,xmm5 5046 vpaddd xmm13,xmm13,xmm7 5047 vmovdqu xmm5,XMMWORD[((192-128))+rax] 5048 vpaddd xmm6,xmm6,XMMWORD[((64-128))+rax] 5049 5050 vpsrld xmm7,xmm5,3 5051 vpsrld xmm1,xmm5,7 5052 vpslld xmm2,xmm5,25 5053 vpxor xmm7,xmm7,xmm1 5054 vpsrld xmm1,xmm5,18 5055 vpxor xmm7,xmm7,xmm2 5056 vpslld xmm2,xmm5,14 5057 vmovdqu xmm0,XMMWORD[((144-128))+rax] 5058 vpsrld xmm4,xmm0,10 5059 5060 vpxor xmm7,xmm7,xmm1 5061 vpsrld xmm1,xmm0,17 5062 vpxor xmm7,xmm7,xmm2 5063 vpslld xmm2,xmm0,15 5064 vpaddd xmm6,xmm6,xmm7 5065 vpxor xmm7,xmm4,xmm1 5066 vpsrld xmm1,xmm0,19 5067 vpxor xmm7,xmm7,xmm2 5068 vpslld xmm2,xmm0,13 5069 vpxor xmm7,xmm7,xmm1 5070 vpxor xmm7,xmm7,xmm2 5071 vpaddd xmm6,xmm6,xmm7 5072 vpsrld xmm7,xmm9,6 5073 vpslld xmm2,xmm9,26 5074 vmovdqu XMMWORD[(176-128)+rax],xmm6 5075 vpaddd xmm6,xmm6,xmm12 5076 5077 vpsrld xmm1,xmm9,11 5078 vpxor xmm7,xmm7,xmm2 5079 vpslld xmm2,xmm9,21 5080 vpaddd xmm6,xmm6,XMMWORD[((-32))+rbp] 5081 vpxor xmm7,xmm7,xmm1 5082 5083 vpsrld xmm1,xmm9,25 5084 vpxor xmm7,xmm7,xmm2 5085 5086 vpslld xmm2,xmm9,7 5087 vpandn xmm0,xmm9,xmm11 5088 vpand xmm4,xmm9,xmm10 5089 5090 vpxor xmm7,xmm7,xmm1 5091 5092 vpsrld xmm12,xmm13,2 5093 vpxor xmm7,xmm7,xmm2 5094 5095 vpslld xmm1,xmm13,30 5096 vpxor xmm0,xmm0,xmm4 5097 vpxor xmm4,xmm14,xmm13 5098 5099 vpxor xmm12,xmm12,xmm1 5100 vpaddd xmm6,xmm6,xmm7 5101 5102 vpsrld xmm1,xmm13,13 5103 5104 vpslld xmm2,xmm13,19 5105 vpaddd xmm6,xmm6,xmm0 5106 vpand xmm3,xmm3,xmm4 5107 5108 vpxor xmm7,xmm12,xmm1 5109 5110 vpsrld xmm1,xmm13,22 5111 vpxor xmm7,xmm7,xmm2 5112 5113 vpslld xmm2,xmm13,10 5114 vpxor xmm12,xmm14,xmm3 5115 vpaddd xmm8,xmm8,xmm6 5116 5117 vpxor xmm7,xmm7,xmm1 5118 vpxor xmm7,xmm7,xmm2 5119 5120 vpaddd xmm12,xmm12,xmm6 5121 vpaddd xmm12,xmm12,xmm7 5122 vmovdqu xmm6,XMMWORD[((208-128))+rax] 5123 vpaddd xmm5,xmm5,XMMWORD[((80-128))+rax] 5124 5125 vpsrld xmm7,xmm6,3 5126 vpsrld xmm1,xmm6,7 5127 vpslld xmm2,xmm6,25 5128 vpxor xmm7,xmm7,xmm1 5129 vpsrld xmm1,xmm6,18 5130 vpxor xmm7,xmm7,xmm2 5131 vpslld xmm2,xmm6,14 5132 vmovdqu xmm0,XMMWORD[((160-128))+rax] 5133 vpsrld xmm3,xmm0,10 5134 5135 vpxor xmm7,xmm7,xmm1 5136 vpsrld xmm1,xmm0,17 5137 vpxor xmm7,xmm7,xmm2 5138 vpslld xmm2,xmm0,15 5139 vpaddd xmm5,xmm5,xmm7 5140 vpxor xmm7,xmm3,xmm1 5141 vpsrld xmm1,xmm0,19 5142 vpxor xmm7,xmm7,xmm2 5143 vpslld xmm2,xmm0,13 5144 vpxor xmm7,xmm7,xmm1 5145 vpxor xmm7,xmm7,xmm2 5146 vpaddd xmm5,xmm5,xmm7 5147 vpsrld xmm7,xmm8,6 5148 vpslld xmm2,xmm8,26 5149 vmovdqu XMMWORD[(192-128)+rax],xmm5 5150 vpaddd xmm5,xmm5,xmm11 5151 5152 vpsrld xmm1,xmm8,11 5153 vpxor xmm7,xmm7,xmm2 5154 vpslld xmm2,xmm8,21 5155 vpaddd xmm5,xmm5,XMMWORD[rbp] 5156 vpxor xmm7,xmm7,xmm1 5157 5158 vpsrld xmm1,xmm8,25 5159 vpxor xmm7,xmm7,xmm2 5160 5161 vpslld xmm2,xmm8,7 5162 vpandn xmm0,xmm8,xmm10 5163 vpand xmm3,xmm8,xmm9 5164 5165 vpxor xmm7,xmm7,xmm1 5166 5167 vpsrld xmm11,xmm12,2 5168 vpxor xmm7,xmm7,xmm2 5169 5170 vpslld xmm1,xmm12,30 5171 vpxor xmm0,xmm0,xmm3 5172 vpxor xmm3,xmm13,xmm12 5173 5174 vpxor xmm11,xmm11,xmm1 5175 vpaddd xmm5,xmm5,xmm7 5176 5177 vpsrld xmm1,xmm12,13 5178 5179 vpslld xmm2,xmm12,19 5180 vpaddd xmm5,xmm5,xmm0 5181 vpand xmm4,xmm4,xmm3 5182 5183 vpxor xmm7,xmm11,xmm1 5184 5185 vpsrld xmm1,xmm12,22 5186 vpxor xmm7,xmm7,xmm2 5187 5188 vpslld xmm2,xmm12,10 5189 vpxor xmm11,xmm13,xmm4 5190 vpaddd xmm15,xmm15,xmm5 5191 5192 vpxor xmm7,xmm7,xmm1 5193 vpxor xmm7,xmm7,xmm2 5194 5195 vpaddd xmm11,xmm11,xmm5 5196 vpaddd xmm11,xmm11,xmm7 5197 vmovdqu xmm5,XMMWORD[((224-128))+rax] 5198 vpaddd xmm6,xmm6,XMMWORD[((96-128))+rax] 5199 5200 vpsrld xmm7,xmm5,3 5201 vpsrld xmm1,xmm5,7 5202 vpslld xmm2,xmm5,25 5203 vpxor xmm7,xmm7,xmm1 5204 vpsrld xmm1,xmm5,18 5205 vpxor xmm7,xmm7,xmm2 5206 vpslld xmm2,xmm5,14 5207 vmovdqu xmm0,XMMWORD[((176-128))+rax] 5208 vpsrld xmm4,xmm0,10 5209 5210 vpxor xmm7,xmm7,xmm1 5211 vpsrld xmm1,xmm0,17 5212 vpxor xmm7,xmm7,xmm2 5213 vpslld xmm2,xmm0,15 5214 vpaddd xmm6,xmm6,xmm7 5215 vpxor xmm7,xmm4,xmm1 5216 vpsrld xmm1,xmm0,19 5217 vpxor xmm7,xmm7,xmm2 5218 vpslld xmm2,xmm0,13 5219 vpxor xmm7,xmm7,xmm1 5220 vpxor xmm7,xmm7,xmm2 5221 vpaddd xmm6,xmm6,xmm7 5222 vpsrld xmm7,xmm15,6 5223 vpslld xmm2,xmm15,26 5224 vmovdqu XMMWORD[(208-128)+rax],xmm6 5225 vpaddd xmm6,xmm6,xmm10 5226 5227 vpsrld xmm1,xmm15,11 5228 vpxor xmm7,xmm7,xmm2 5229 vpslld xmm2,xmm15,21 5230 vpaddd xmm6,xmm6,XMMWORD[32+rbp] 5231 vpxor xmm7,xmm7,xmm1 5232 5233 vpsrld xmm1,xmm15,25 5234 vpxor xmm7,xmm7,xmm2 5235 5236 vpslld xmm2,xmm15,7 5237 vpandn xmm0,xmm15,xmm9 5238 vpand xmm4,xmm15,xmm8 5239 5240 vpxor xmm7,xmm7,xmm1 5241 5242 vpsrld xmm10,xmm11,2 5243 vpxor xmm7,xmm7,xmm2 5244 5245 vpslld xmm1,xmm11,30 5246 vpxor xmm0,xmm0,xmm4 5247 vpxor xmm4,xmm12,xmm11 5248 5249 vpxor xmm10,xmm10,xmm1 5250 vpaddd xmm6,xmm6,xmm7 5251 5252 vpsrld xmm1,xmm11,13 5253 5254 vpslld xmm2,xmm11,19 5255 vpaddd xmm6,xmm6,xmm0 5256 vpand xmm3,xmm3,xmm4 5257 5258 vpxor xmm7,xmm10,xmm1 5259 5260 vpsrld xmm1,xmm11,22 5261 vpxor xmm7,xmm7,xmm2 5262 5263 vpslld xmm2,xmm11,10 5264 vpxor xmm10,xmm12,xmm3 5265 vpaddd xmm14,xmm14,xmm6 5266 5267 vpxor xmm7,xmm7,xmm1 5268 vpxor xmm7,xmm7,xmm2 5269 5270 vpaddd xmm10,xmm10,xmm6 5271 vpaddd xmm10,xmm10,xmm7 5272 vmovdqu xmm6,XMMWORD[((240-128))+rax] 5273 vpaddd xmm5,xmm5,XMMWORD[((112-128))+rax] 5274 5275 vpsrld xmm7,xmm6,3 5276 vpsrld xmm1,xmm6,7 5277 vpslld xmm2,xmm6,25 5278 vpxor xmm7,xmm7,xmm1 5279 vpsrld xmm1,xmm6,18 5280 vpxor xmm7,xmm7,xmm2 5281 vpslld xmm2,xmm6,14 5282 vmovdqu xmm0,XMMWORD[((192-128))+rax] 5283 vpsrld xmm3,xmm0,10 5284 5285 vpxor xmm7,xmm7,xmm1 5286 vpsrld xmm1,xmm0,17 5287 vpxor xmm7,xmm7,xmm2 5288 vpslld xmm2,xmm0,15 5289 vpaddd xmm5,xmm5,xmm7 5290 vpxor xmm7,xmm3,xmm1 5291 vpsrld xmm1,xmm0,19 5292 vpxor xmm7,xmm7,xmm2 5293 vpslld xmm2,xmm0,13 5294 vpxor xmm7,xmm7,xmm1 5295 vpxor xmm7,xmm7,xmm2 5296 vpaddd xmm5,xmm5,xmm7 5297 vpsrld xmm7,xmm14,6 5298 vpslld xmm2,xmm14,26 5299 vmovdqu XMMWORD[(224-128)+rax],xmm5 5300 vpaddd xmm5,xmm5,xmm9 5301 5302 vpsrld xmm1,xmm14,11 5303 vpxor xmm7,xmm7,xmm2 5304 vpslld xmm2,xmm14,21 5305 vpaddd xmm5,xmm5,XMMWORD[64+rbp] 5306 vpxor xmm7,xmm7,xmm1 5307 5308 vpsrld xmm1,xmm14,25 5309 vpxor xmm7,xmm7,xmm2 5310 5311 vpslld xmm2,xmm14,7 5312 vpandn xmm0,xmm14,xmm8 5313 vpand xmm3,xmm14,xmm15 5314 5315 vpxor xmm7,xmm7,xmm1 5316 5317 vpsrld xmm9,xmm10,2 5318 vpxor xmm7,xmm7,xmm2 5319 5320 vpslld xmm1,xmm10,30 5321 vpxor xmm0,xmm0,xmm3 5322 vpxor xmm3,xmm11,xmm10 5323 5324 vpxor xmm9,xmm9,xmm1 5325 vpaddd xmm5,xmm5,xmm7 5326 5327 vpsrld xmm1,xmm10,13 5328 5329 vpslld xmm2,xmm10,19 5330 vpaddd xmm5,xmm5,xmm0 5331 vpand xmm4,xmm4,xmm3 5332 5333 vpxor xmm7,xmm9,xmm1 5334 5335 vpsrld xmm1,xmm10,22 5336 vpxor xmm7,xmm7,xmm2 5337 5338 vpslld xmm2,xmm10,10 5339 vpxor xmm9,xmm11,xmm4 5340 vpaddd xmm13,xmm13,xmm5 5341 5342 vpxor xmm7,xmm7,xmm1 5343 vpxor xmm7,xmm7,xmm2 5344 5345 vpaddd xmm9,xmm9,xmm5 5346 vpaddd xmm9,xmm9,xmm7 5347 vmovdqu xmm5,XMMWORD[((0-128))+rax] 5348 vpaddd xmm6,xmm6,XMMWORD[((128-128))+rax] 5349 5350 vpsrld xmm7,xmm5,3 5351 vpsrld xmm1,xmm5,7 5352 vpslld xmm2,xmm5,25 5353 vpxor xmm7,xmm7,xmm1 5354 vpsrld xmm1,xmm5,18 5355 vpxor xmm7,xmm7,xmm2 5356 vpslld xmm2,xmm5,14 5357 vmovdqu xmm0,XMMWORD[((208-128))+rax] 5358 vpsrld xmm4,xmm0,10 5359 5360 vpxor xmm7,xmm7,xmm1 5361 vpsrld xmm1,xmm0,17 5362 vpxor xmm7,xmm7,xmm2 5363 vpslld xmm2,xmm0,15 5364 vpaddd xmm6,xmm6,xmm7 5365 vpxor xmm7,xmm4,xmm1 5366 vpsrld xmm1,xmm0,19 5367 vpxor xmm7,xmm7,xmm2 5368 vpslld xmm2,xmm0,13 5369 vpxor xmm7,xmm7,xmm1 5370 vpxor xmm7,xmm7,xmm2 5371 vpaddd xmm6,xmm6,xmm7 5372 vpsrld xmm7,xmm13,6 5373 vpslld xmm2,xmm13,26 5374 vmovdqu XMMWORD[(240-128)+rax],xmm6 5375 vpaddd xmm6,xmm6,xmm8 5376 5377 vpsrld xmm1,xmm13,11 5378 vpxor xmm7,xmm7,xmm2 5379 vpslld xmm2,xmm13,21 5380 vpaddd xmm6,xmm6,XMMWORD[96+rbp] 5381 vpxor xmm7,xmm7,xmm1 5382 5383 vpsrld xmm1,xmm13,25 5384 vpxor xmm7,xmm7,xmm2 5385 5386 vpslld xmm2,xmm13,7 5387 vpandn xmm0,xmm13,xmm15 5388 vpand xmm4,xmm13,xmm14 5389 5390 vpxor xmm7,xmm7,xmm1 5391 5392 vpsrld xmm8,xmm9,2 5393 vpxor xmm7,xmm7,xmm2 5394 5395 vpslld xmm1,xmm9,30 5396 vpxor xmm0,xmm0,xmm4 5397 vpxor xmm4,xmm10,xmm9 5398 5399 vpxor xmm8,xmm8,xmm1 5400 vpaddd xmm6,xmm6,xmm7 5401 5402 vpsrld xmm1,xmm9,13 5403 5404 vpslld xmm2,xmm9,19 5405 vpaddd xmm6,xmm6,xmm0 5406 vpand xmm3,xmm3,xmm4 5407 5408 vpxor xmm7,xmm8,xmm1 5409 5410 vpsrld xmm1,xmm9,22 5411 vpxor xmm7,xmm7,xmm2 5412 5413 vpslld xmm2,xmm9,10 5414 vpxor xmm8,xmm10,xmm3 5415 vpaddd xmm12,xmm12,xmm6 5416 5417 vpxor xmm7,xmm7,xmm1 5418 vpxor xmm7,xmm7,xmm2 5419 5420 vpaddd xmm8,xmm8,xmm6 5421 vpaddd xmm8,xmm8,xmm7 5422 add rbp,256 5423 dec ecx 5424 jnz NEAR $L$oop_16_xx_avx 5425 5426 mov ecx,1 5427 lea rbp,[((K256+128))] 5428 cmp ecx,DWORD[rbx] 5429 cmovge r8,rbp 5430 cmp ecx,DWORD[4+rbx] 5431 cmovge r9,rbp 5432 cmp ecx,DWORD[8+rbx] 5433 cmovge r10,rbp 5434 cmp ecx,DWORD[12+rbx] 5435 cmovge r11,rbp 5436 vmovdqa xmm7,XMMWORD[rbx] 5437 vpxor xmm0,xmm0,xmm0 5438 vmovdqa xmm6,xmm7 5439 vpcmpgtd xmm6,xmm6,xmm0 5440 vpaddd xmm7,xmm7,xmm6 5441 5442 vmovdqu xmm0,XMMWORD[((0-128))+rdi] 5443 vpand xmm8,xmm8,xmm6 5444 vmovdqu xmm1,XMMWORD[((32-128))+rdi] 5445 vpand xmm9,xmm9,xmm6 5446 vmovdqu xmm2,XMMWORD[((64-128))+rdi] 5447 vpand xmm10,xmm10,xmm6 5448 vmovdqu xmm5,XMMWORD[((96-128))+rdi] 5449 vpand xmm11,xmm11,xmm6 5450 vpaddd xmm8,xmm8,xmm0 5451 vmovdqu xmm0,XMMWORD[((128-128))+rdi] 5452 vpand xmm12,xmm12,xmm6 5453 vpaddd xmm9,xmm9,xmm1 5454 vmovdqu xmm1,XMMWORD[((160-128))+rdi] 5455 vpand xmm13,xmm13,xmm6 5456 vpaddd xmm10,xmm10,xmm2 5457 vmovdqu xmm2,XMMWORD[((192-128))+rdi] 5458 vpand xmm14,xmm14,xmm6 5459 vpaddd xmm11,xmm11,xmm5 5460 vmovdqu xmm5,XMMWORD[((224-128))+rdi] 5461 vpand xmm15,xmm15,xmm6 5462 vpaddd xmm12,xmm12,xmm0 5463 vpaddd xmm13,xmm13,xmm1 5464 vmovdqu XMMWORD[(0-128)+rdi],xmm8 5465 vpaddd xmm14,xmm14,xmm2 5466 vmovdqu XMMWORD[(32-128)+rdi],xmm9 5467 vpaddd xmm15,xmm15,xmm5 5468 vmovdqu XMMWORD[(64-128)+rdi],xmm10 5469 vmovdqu XMMWORD[(96-128)+rdi],xmm11 5470 vmovdqu XMMWORD[(128-128)+rdi],xmm12 5471 vmovdqu XMMWORD[(160-128)+rdi],xmm13 5472 vmovdqu XMMWORD[(192-128)+rdi],xmm14 5473 vmovdqu XMMWORD[(224-128)+rdi],xmm15 5474 5475 vmovdqu XMMWORD[rbx],xmm7 5476 vmovdqu xmm6,XMMWORD[$L$pbswap] 5477 dec edx 5478 jnz NEAR $L$oop_avx 5479 5480 mov edx,DWORD[280+rsp] 5481 lea rdi,[16+rdi] 5482 lea rsi,[64+rsi] 5483 dec edx 5484 jnz NEAR $L$oop_grande_avx 5485 5486 $L$done_avx: 5487 mov rax,QWORD[272+rsp] 5488 5489 vzeroupper 5490 movaps xmm6,XMMWORD[((-184))+rax] 5491 movaps xmm7,XMMWORD[((-168))+rax] 5492 movaps xmm8,XMMWORD[((-152))+rax] 5493 movaps xmm9,XMMWORD[((-136))+rax] 5494 movaps xmm10,XMMWORD[((-120))+rax] 5495 movaps xmm11,XMMWORD[((-104))+rax] 5496 movaps xmm12,XMMWORD[((-88))+rax] 5497 movaps xmm13,XMMWORD[((-72))+rax] 5498 movaps xmm14,XMMWORD[((-56))+rax] 5499 movaps xmm15,XMMWORD[((-40))+rax] 5500 mov rbp,QWORD[((-16))+rax] 5501 5502 mov rbx,QWORD[((-8))+rax] 5503 5504 lea rsp,[rax] 5505 5506 $L$epilogue_avx: 5507 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 5508 mov rsi,QWORD[16+rsp] 5509 DB 0F3h,0C3h ;repret 5510 5511 $L$SEH_end_sha256_multi_block_avx: 5512 5513 ALIGN 32 5514 sha256_multi_block_avx2: 5515 mov QWORD[8+rsp],rdi ;WIN64 prologue 5516 mov QWORD[16+rsp],rsi 5517 mov rax,rsp 5518 $L$SEH_begin_sha256_multi_block_avx2: 5519 mov rdi,rcx 5520 mov rsi,rdx 5521 mov rdx,r8 5522 5523 5524 5525 _avx2_shortcut: 5526 mov rax,rsp 5527 5528 push rbx 5529 5530 push rbp 5531 5532 push r12 5533 5534 push r13 5535 5536 push r14 5537 5538 push r15 5539 5540 lea rsp,[((-168))+rsp] 5541 movaps XMMWORD[rsp],xmm6 5542 movaps XMMWORD[16+rsp],xmm7 5543 movaps XMMWORD[32+rsp],xmm8 5544 movaps XMMWORD[48+rsp],xmm9 5545 movaps XMMWORD[64+rsp],xmm10 5546 movaps XMMWORD[80+rsp],xmm11 5547 movaps XMMWORD[(-120)+rax],xmm12 5548 movaps XMMWORD[(-104)+rax],xmm13 5549 movaps XMMWORD[(-88)+rax],xmm14 5550 movaps XMMWORD[(-72)+rax],xmm15 5551 sub rsp,576 5552 and rsp,-256 5553 mov QWORD[544+rsp],rax 5554 5555 $L$body_avx2: 5556 lea rbp,[((K256+128))] 5557 lea rdi,[128+rdi] 5558 5559 $L$oop_grande_avx2: 5560 mov DWORD[552+rsp],edx 5561 xor edx,edx 5562 lea rbx,[512+rsp] 5563 5564 mov r12,QWORD[rsi] 5565 5566 mov ecx,DWORD[8+rsi] 5567 cmp ecx,edx 5568 cmovg edx,ecx 5569 test ecx,ecx 5570 mov DWORD[rbx],ecx 5571 cmovle r12,rbp 5572 5573 mov r13,QWORD[16+rsi] 5574 5575 mov ecx,DWORD[24+rsi] 5576 cmp ecx,edx 5577 cmovg edx,ecx 5578 test ecx,ecx 5579 mov DWORD[4+rbx],ecx 5580 cmovle r13,rbp 5581 5582 mov r14,QWORD[32+rsi] 5583 5584 mov ecx,DWORD[40+rsi] 5585 cmp ecx,edx 5586 cmovg edx,ecx 5587 test ecx,ecx 5588 mov DWORD[8+rbx],ecx 5589 cmovle r14,rbp 5590 5591 mov r15,QWORD[48+rsi] 5592 5593 mov ecx,DWORD[56+rsi] 5594 cmp ecx,edx 5595 cmovg edx,ecx 5596 test ecx,ecx 5597 mov DWORD[12+rbx],ecx 5598 cmovle r15,rbp 5599 5600 mov r8,QWORD[64+rsi] 5601 5602 mov ecx,DWORD[72+rsi] 5603 cmp ecx,edx 5604 cmovg edx,ecx 5605 test ecx,ecx 5606 mov DWORD[16+rbx],ecx 5607 cmovle r8,rbp 5608 5609 mov r9,QWORD[80+rsi] 5610 5611 mov ecx,DWORD[88+rsi] 5612 cmp ecx,edx 5613 cmovg edx,ecx 5614 test ecx,ecx 5615 mov DWORD[20+rbx],ecx 5616 cmovle r9,rbp 5617 5618 mov r10,QWORD[96+rsi] 5619 5620 mov ecx,DWORD[104+rsi] 5621 cmp ecx,edx 5622 cmovg edx,ecx 5623 test ecx,ecx 5624 mov DWORD[24+rbx],ecx 5625 cmovle r10,rbp 5626 5627 mov r11,QWORD[112+rsi] 5628 5629 mov ecx,DWORD[120+rsi] 5630 cmp ecx,edx 5631 cmovg edx,ecx 5632 test ecx,ecx 5633 mov DWORD[28+rbx],ecx 5634 cmovle r11,rbp 5635 vmovdqu ymm8,YMMWORD[((0-128))+rdi] 5636 lea rax,[128+rsp] 5637 vmovdqu ymm9,YMMWORD[((32-128))+rdi] 5638 lea rbx,[((256+128))+rsp] 5639 vmovdqu ymm10,YMMWORD[((64-128))+rdi] 5640 vmovdqu ymm11,YMMWORD[((96-128))+rdi] 5641 vmovdqu ymm12,YMMWORD[((128-128))+rdi] 5642 vmovdqu ymm13,YMMWORD[((160-128))+rdi] 5643 vmovdqu ymm14,YMMWORD[((192-128))+rdi] 5644 vmovdqu ymm15,YMMWORD[((224-128))+rdi] 5645 vmovdqu ymm6,YMMWORD[$L$pbswap] 5646 jmp NEAR $L$oop_avx2 5647 5648 ALIGN 32 5649 $L$oop_avx2: 5650 vpxor ymm4,ymm10,ymm9 5651 vmovd xmm5,DWORD[r12] 5652 vmovd xmm0,DWORD[r8] 5653 vmovd xmm1,DWORD[r13] 5654 vmovd xmm2,DWORD[r9] 5655 vpinsrd xmm5,xmm5,DWORD[r14],1 5656 vpinsrd xmm0,xmm0,DWORD[r10],1 5657 vpinsrd xmm1,xmm1,DWORD[r15],1 5658 vpunpckldq ymm5,ymm5,ymm1 5659 vpinsrd xmm2,xmm2,DWORD[r11],1 5660 vpunpckldq ymm0,ymm0,ymm2 5661 vinserti128 ymm5,ymm5,xmm0,1 5662 vpshufb ymm5,ymm5,ymm6 5663 vpsrld ymm7,ymm12,6 5664 vpslld ymm2,ymm12,26 5665 vmovdqu YMMWORD[(0-128)+rax],ymm5 5666 vpaddd ymm5,ymm5,ymm15 5667 5668 vpsrld ymm1,ymm12,11 5669 vpxor ymm7,ymm7,ymm2 5670 vpslld ymm2,ymm12,21 5671 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] 5672 vpxor ymm7,ymm7,ymm1 5673 5674 vpsrld ymm1,ymm12,25 5675 vpxor ymm7,ymm7,ymm2 5676 5677 vpslld ymm2,ymm12,7 5678 vpandn ymm0,ymm12,ymm14 5679 vpand ymm3,ymm12,ymm13 5680 5681 vpxor ymm7,ymm7,ymm1 5682 5683 vpsrld ymm15,ymm8,2 5684 vpxor ymm7,ymm7,ymm2 5685 5686 vpslld ymm1,ymm8,30 5687 vpxor ymm0,ymm0,ymm3 5688 vpxor ymm3,ymm9,ymm8 5689 5690 vpxor ymm15,ymm15,ymm1 5691 vpaddd ymm5,ymm5,ymm7 5692 5693 vpsrld ymm1,ymm8,13 5694 5695 vpslld ymm2,ymm8,19 5696 vpaddd ymm5,ymm5,ymm0 5697 vpand ymm4,ymm4,ymm3 5698 5699 vpxor ymm7,ymm15,ymm1 5700 5701 vpsrld ymm1,ymm8,22 5702 vpxor ymm7,ymm7,ymm2 5703 5704 vpslld ymm2,ymm8,10 5705 vpxor ymm15,ymm9,ymm4 5706 vpaddd ymm11,ymm11,ymm5 5707 5708 vpxor ymm7,ymm7,ymm1 5709 vpxor ymm7,ymm7,ymm2 5710 5711 vpaddd ymm15,ymm15,ymm5 5712 vpaddd ymm15,ymm15,ymm7 5713 vmovd xmm5,DWORD[4+r12] 5714 vmovd xmm0,DWORD[4+r8] 5715 vmovd xmm1,DWORD[4+r13] 5716 vmovd xmm2,DWORD[4+r9] 5717 vpinsrd xmm5,xmm5,DWORD[4+r14],1 5718 vpinsrd xmm0,xmm0,DWORD[4+r10],1 5719 vpinsrd xmm1,xmm1,DWORD[4+r15],1 5720 vpunpckldq ymm5,ymm5,ymm1 5721 vpinsrd xmm2,xmm2,DWORD[4+r11],1 5722 vpunpckldq ymm0,ymm0,ymm2 5723 vinserti128 ymm5,ymm5,xmm0,1 5724 vpshufb ymm5,ymm5,ymm6 5725 vpsrld ymm7,ymm11,6 5726 vpslld ymm2,ymm11,26 5727 vmovdqu YMMWORD[(32-128)+rax],ymm5 5728 vpaddd ymm5,ymm5,ymm14 5729 5730 vpsrld ymm1,ymm11,11 5731 vpxor ymm7,ymm7,ymm2 5732 vpslld ymm2,ymm11,21 5733 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp] 5734 vpxor ymm7,ymm7,ymm1 5735 5736 vpsrld ymm1,ymm11,25 5737 vpxor ymm7,ymm7,ymm2 5738 5739 vpslld ymm2,ymm11,7 5740 vpandn ymm0,ymm11,ymm13 5741 vpand ymm4,ymm11,ymm12 5742 5743 vpxor ymm7,ymm7,ymm1 5744 5745 vpsrld ymm14,ymm15,2 5746 vpxor ymm7,ymm7,ymm2 5747 5748 vpslld ymm1,ymm15,30 5749 vpxor ymm0,ymm0,ymm4 5750 vpxor ymm4,ymm8,ymm15 5751 5752 vpxor ymm14,ymm14,ymm1 5753 vpaddd ymm5,ymm5,ymm7 5754 5755 vpsrld ymm1,ymm15,13 5756 5757 vpslld ymm2,ymm15,19 5758 vpaddd ymm5,ymm5,ymm0 5759 vpand ymm3,ymm3,ymm4 5760 5761 vpxor ymm7,ymm14,ymm1 5762 5763 vpsrld ymm1,ymm15,22 5764 vpxor ymm7,ymm7,ymm2 5765 5766 vpslld ymm2,ymm15,10 5767 vpxor ymm14,ymm8,ymm3 5768 vpaddd ymm10,ymm10,ymm5 5769 5770 vpxor ymm7,ymm7,ymm1 5771 vpxor ymm7,ymm7,ymm2 5772 5773 vpaddd ymm14,ymm14,ymm5 5774 vpaddd ymm14,ymm14,ymm7 5775 vmovd xmm5,DWORD[8+r12] 5776 vmovd xmm0,DWORD[8+r8] 5777 vmovd xmm1,DWORD[8+r13] 5778 vmovd xmm2,DWORD[8+r9] 5779 vpinsrd xmm5,xmm5,DWORD[8+r14],1 5780 vpinsrd xmm0,xmm0,DWORD[8+r10],1 5781 vpinsrd xmm1,xmm1,DWORD[8+r15],1 5782 vpunpckldq ymm5,ymm5,ymm1 5783 vpinsrd xmm2,xmm2,DWORD[8+r11],1 5784 vpunpckldq ymm0,ymm0,ymm2 5785 vinserti128 ymm5,ymm5,xmm0,1 5786 vpshufb ymm5,ymm5,ymm6 5787 vpsrld ymm7,ymm10,6 5788 vpslld ymm2,ymm10,26 5789 vmovdqu YMMWORD[(64-128)+rax],ymm5 5790 vpaddd ymm5,ymm5,ymm13 5791 5792 vpsrld ymm1,ymm10,11 5793 vpxor ymm7,ymm7,ymm2 5794 vpslld ymm2,ymm10,21 5795 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] 5796 vpxor ymm7,ymm7,ymm1 5797 5798 vpsrld ymm1,ymm10,25 5799 vpxor ymm7,ymm7,ymm2 5800 5801 vpslld ymm2,ymm10,7 5802 vpandn ymm0,ymm10,ymm12 5803 vpand ymm3,ymm10,ymm11 5804 5805 vpxor ymm7,ymm7,ymm1 5806 5807 vpsrld ymm13,ymm14,2 5808 vpxor ymm7,ymm7,ymm2 5809 5810 vpslld ymm1,ymm14,30 5811 vpxor ymm0,ymm0,ymm3 5812 vpxor ymm3,ymm15,ymm14 5813 5814 vpxor ymm13,ymm13,ymm1 5815 vpaddd ymm5,ymm5,ymm7 5816 5817 vpsrld ymm1,ymm14,13 5818 5819 vpslld ymm2,ymm14,19 5820 vpaddd ymm5,ymm5,ymm0 5821 vpand ymm4,ymm4,ymm3 5822 5823 vpxor ymm7,ymm13,ymm1 5824 5825 vpsrld ymm1,ymm14,22 5826 vpxor ymm7,ymm7,ymm2 5827 5828 vpslld ymm2,ymm14,10 5829 vpxor ymm13,ymm15,ymm4 5830 vpaddd ymm9,ymm9,ymm5 5831 5832 vpxor ymm7,ymm7,ymm1 5833 vpxor ymm7,ymm7,ymm2 5834 5835 vpaddd ymm13,ymm13,ymm5 5836 vpaddd ymm13,ymm13,ymm7 5837 vmovd xmm5,DWORD[12+r12] 5838 vmovd xmm0,DWORD[12+r8] 5839 vmovd xmm1,DWORD[12+r13] 5840 vmovd xmm2,DWORD[12+r9] 5841 vpinsrd xmm5,xmm5,DWORD[12+r14],1 5842 vpinsrd xmm0,xmm0,DWORD[12+r10],1 5843 vpinsrd xmm1,xmm1,DWORD[12+r15],1 5844 vpunpckldq ymm5,ymm5,ymm1 5845 vpinsrd xmm2,xmm2,DWORD[12+r11],1 5846 vpunpckldq ymm0,ymm0,ymm2 5847 vinserti128 ymm5,ymm5,xmm0,1 5848 vpshufb ymm5,ymm5,ymm6 5849 vpsrld ymm7,ymm9,6 5850 vpslld ymm2,ymm9,26 5851 vmovdqu YMMWORD[(96-128)+rax],ymm5 5852 vpaddd ymm5,ymm5,ymm12 5853 5854 vpsrld ymm1,ymm9,11 5855 vpxor ymm7,ymm7,ymm2 5856 vpslld ymm2,ymm9,21 5857 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp] 5858 vpxor ymm7,ymm7,ymm1 5859 5860 vpsrld ymm1,ymm9,25 5861 vpxor ymm7,ymm7,ymm2 5862 5863 vpslld ymm2,ymm9,7 5864 vpandn ymm0,ymm9,ymm11 5865 vpand ymm4,ymm9,ymm10 5866 5867 vpxor ymm7,ymm7,ymm1 5868 5869 vpsrld ymm12,ymm13,2 5870 vpxor ymm7,ymm7,ymm2 5871 5872 vpslld ymm1,ymm13,30 5873 vpxor ymm0,ymm0,ymm4 5874 vpxor ymm4,ymm14,ymm13 5875 5876 vpxor ymm12,ymm12,ymm1 5877 vpaddd ymm5,ymm5,ymm7 5878 5879 vpsrld ymm1,ymm13,13 5880 5881 vpslld ymm2,ymm13,19 5882 vpaddd ymm5,ymm5,ymm0 5883 vpand ymm3,ymm3,ymm4 5884 5885 vpxor ymm7,ymm12,ymm1 5886 5887 vpsrld ymm1,ymm13,22 5888 vpxor ymm7,ymm7,ymm2 5889 5890 vpslld ymm2,ymm13,10 5891 vpxor ymm12,ymm14,ymm3 5892 vpaddd ymm8,ymm8,ymm5 5893 5894 vpxor ymm7,ymm7,ymm1 5895 vpxor ymm7,ymm7,ymm2 5896 5897 vpaddd ymm12,ymm12,ymm5 5898 vpaddd ymm12,ymm12,ymm7 5899 vmovd xmm5,DWORD[16+r12] 5900 vmovd xmm0,DWORD[16+r8] 5901 vmovd xmm1,DWORD[16+r13] 5902 vmovd xmm2,DWORD[16+r9] 5903 vpinsrd xmm5,xmm5,DWORD[16+r14],1 5904 vpinsrd xmm0,xmm0,DWORD[16+r10],1 5905 vpinsrd xmm1,xmm1,DWORD[16+r15],1 5906 vpunpckldq ymm5,ymm5,ymm1 5907 vpinsrd xmm2,xmm2,DWORD[16+r11],1 5908 vpunpckldq ymm0,ymm0,ymm2 5909 vinserti128 ymm5,ymm5,xmm0,1 5910 vpshufb ymm5,ymm5,ymm6 5911 vpsrld ymm7,ymm8,6 5912 vpslld ymm2,ymm8,26 5913 vmovdqu YMMWORD[(128-128)+rax],ymm5 5914 vpaddd ymm5,ymm5,ymm11 5915 5916 vpsrld ymm1,ymm8,11 5917 vpxor ymm7,ymm7,ymm2 5918 vpslld ymm2,ymm8,21 5919 vpaddd ymm5,ymm5,YMMWORD[rbp] 5920 vpxor ymm7,ymm7,ymm1 5921 5922 vpsrld ymm1,ymm8,25 5923 vpxor ymm7,ymm7,ymm2 5924 5925 vpslld ymm2,ymm8,7 5926 vpandn ymm0,ymm8,ymm10 5927 vpand ymm3,ymm8,ymm9 5928 5929 vpxor ymm7,ymm7,ymm1 5930 5931 vpsrld ymm11,ymm12,2 5932 vpxor ymm7,ymm7,ymm2 5933 5934 vpslld ymm1,ymm12,30 5935 vpxor ymm0,ymm0,ymm3 5936 vpxor ymm3,ymm13,ymm12 5937 5938 vpxor ymm11,ymm11,ymm1 5939 vpaddd ymm5,ymm5,ymm7 5940 5941 vpsrld ymm1,ymm12,13 5942 5943 vpslld ymm2,ymm12,19 5944 vpaddd ymm5,ymm5,ymm0 5945 vpand ymm4,ymm4,ymm3 5946 5947 vpxor ymm7,ymm11,ymm1 5948 5949 vpsrld ymm1,ymm12,22 5950 vpxor ymm7,ymm7,ymm2 5951 5952 vpslld ymm2,ymm12,10 5953 vpxor ymm11,ymm13,ymm4 5954 vpaddd ymm15,ymm15,ymm5 5955 5956 vpxor ymm7,ymm7,ymm1 5957 vpxor ymm7,ymm7,ymm2 5958 5959 vpaddd ymm11,ymm11,ymm5 5960 vpaddd ymm11,ymm11,ymm7 5961 vmovd xmm5,DWORD[20+r12] 5962 vmovd xmm0,DWORD[20+r8] 5963 vmovd xmm1,DWORD[20+r13] 5964 vmovd xmm2,DWORD[20+r9] 5965 vpinsrd xmm5,xmm5,DWORD[20+r14],1 5966 vpinsrd xmm0,xmm0,DWORD[20+r10],1 5967 vpinsrd xmm1,xmm1,DWORD[20+r15],1 5968 vpunpckldq ymm5,ymm5,ymm1 5969 vpinsrd xmm2,xmm2,DWORD[20+r11],1 5970 vpunpckldq ymm0,ymm0,ymm2 5971 vinserti128 ymm5,ymm5,xmm0,1 5972 vpshufb ymm5,ymm5,ymm6 5973 vpsrld ymm7,ymm15,6 5974 vpslld ymm2,ymm15,26 5975 vmovdqu YMMWORD[(160-128)+rax],ymm5 5976 vpaddd ymm5,ymm5,ymm10 5977 5978 vpsrld ymm1,ymm15,11 5979 vpxor ymm7,ymm7,ymm2 5980 vpslld ymm2,ymm15,21 5981 vpaddd ymm5,ymm5,YMMWORD[32+rbp] 5982 vpxor ymm7,ymm7,ymm1 5983 5984 vpsrld ymm1,ymm15,25 5985 vpxor ymm7,ymm7,ymm2 5986 5987 vpslld ymm2,ymm15,7 5988 vpandn ymm0,ymm15,ymm9 5989 vpand ymm4,ymm15,ymm8 5990 5991 vpxor ymm7,ymm7,ymm1 5992 5993 vpsrld ymm10,ymm11,2 5994 vpxor ymm7,ymm7,ymm2 5995 5996 vpslld ymm1,ymm11,30 5997 vpxor ymm0,ymm0,ymm4 5998 vpxor ymm4,ymm12,ymm11 5999 6000 vpxor ymm10,ymm10,ymm1 6001 vpaddd ymm5,ymm5,ymm7 6002 6003 vpsrld ymm1,ymm11,13 6004 6005 vpslld ymm2,ymm11,19 6006 vpaddd ymm5,ymm5,ymm0 6007 vpand ymm3,ymm3,ymm4 6008 6009 vpxor ymm7,ymm10,ymm1 6010 6011 vpsrld ymm1,ymm11,22 6012 vpxor ymm7,ymm7,ymm2 6013 6014 vpslld ymm2,ymm11,10 6015 vpxor ymm10,ymm12,ymm3 6016 vpaddd ymm14,ymm14,ymm5 6017 6018 vpxor ymm7,ymm7,ymm1 6019 vpxor ymm7,ymm7,ymm2 6020 6021 vpaddd ymm10,ymm10,ymm5 6022 vpaddd ymm10,ymm10,ymm7 6023 vmovd xmm5,DWORD[24+r12] 6024 vmovd xmm0,DWORD[24+r8] 6025 vmovd xmm1,DWORD[24+r13] 6026 vmovd xmm2,DWORD[24+r9] 6027 vpinsrd xmm5,xmm5,DWORD[24+r14],1 6028 vpinsrd xmm0,xmm0,DWORD[24+r10],1 6029 vpinsrd xmm1,xmm1,DWORD[24+r15],1 6030 vpunpckldq ymm5,ymm5,ymm1 6031 vpinsrd xmm2,xmm2,DWORD[24+r11],1 6032 vpunpckldq ymm0,ymm0,ymm2 6033 vinserti128 ymm5,ymm5,xmm0,1 6034 vpshufb ymm5,ymm5,ymm6 6035 vpsrld ymm7,ymm14,6 6036 vpslld ymm2,ymm14,26 6037 vmovdqu YMMWORD[(192-128)+rax],ymm5 6038 vpaddd ymm5,ymm5,ymm9 6039 6040 vpsrld ymm1,ymm14,11 6041 vpxor ymm7,ymm7,ymm2 6042 vpslld ymm2,ymm14,21 6043 vpaddd ymm5,ymm5,YMMWORD[64+rbp] 6044 vpxor ymm7,ymm7,ymm1 6045 6046 vpsrld ymm1,ymm14,25 6047 vpxor ymm7,ymm7,ymm2 6048 6049 vpslld ymm2,ymm14,7 6050 vpandn ymm0,ymm14,ymm8 6051 vpand ymm3,ymm14,ymm15 6052 6053 vpxor ymm7,ymm7,ymm1 6054 6055 vpsrld ymm9,ymm10,2 6056 vpxor ymm7,ymm7,ymm2 6057 6058 vpslld ymm1,ymm10,30 6059 vpxor ymm0,ymm0,ymm3 6060 vpxor ymm3,ymm11,ymm10 6061 6062 vpxor ymm9,ymm9,ymm1 6063 vpaddd ymm5,ymm5,ymm7 6064 6065 vpsrld ymm1,ymm10,13 6066 6067 vpslld ymm2,ymm10,19 6068 vpaddd ymm5,ymm5,ymm0 6069 vpand ymm4,ymm4,ymm3 6070 6071 vpxor ymm7,ymm9,ymm1 6072 6073 vpsrld ymm1,ymm10,22 6074 vpxor ymm7,ymm7,ymm2 6075 6076 vpslld ymm2,ymm10,10 6077 vpxor ymm9,ymm11,ymm4 6078 vpaddd ymm13,ymm13,ymm5 6079 6080 vpxor ymm7,ymm7,ymm1 6081 vpxor ymm7,ymm7,ymm2 6082 6083 vpaddd ymm9,ymm9,ymm5 6084 vpaddd ymm9,ymm9,ymm7 6085 vmovd xmm5,DWORD[28+r12] 6086 vmovd xmm0,DWORD[28+r8] 6087 vmovd xmm1,DWORD[28+r13] 6088 vmovd xmm2,DWORD[28+r9] 6089 vpinsrd xmm5,xmm5,DWORD[28+r14],1 6090 vpinsrd xmm0,xmm0,DWORD[28+r10],1 6091 vpinsrd xmm1,xmm1,DWORD[28+r15],1 6092 vpunpckldq ymm5,ymm5,ymm1 6093 vpinsrd xmm2,xmm2,DWORD[28+r11],1 6094 vpunpckldq ymm0,ymm0,ymm2 6095 vinserti128 ymm5,ymm5,xmm0,1 6096 vpshufb ymm5,ymm5,ymm6 6097 vpsrld ymm7,ymm13,6 6098 vpslld ymm2,ymm13,26 6099 vmovdqu YMMWORD[(224-128)+rax],ymm5 6100 vpaddd ymm5,ymm5,ymm8 6101 6102 vpsrld ymm1,ymm13,11 6103 vpxor ymm7,ymm7,ymm2 6104 vpslld ymm2,ymm13,21 6105 vpaddd ymm5,ymm5,YMMWORD[96+rbp] 6106 vpxor ymm7,ymm7,ymm1 6107 6108 vpsrld ymm1,ymm13,25 6109 vpxor ymm7,ymm7,ymm2 6110 6111 vpslld ymm2,ymm13,7 6112 vpandn ymm0,ymm13,ymm15 6113 vpand ymm4,ymm13,ymm14 6114 6115 vpxor ymm7,ymm7,ymm1 6116 6117 vpsrld ymm8,ymm9,2 6118 vpxor ymm7,ymm7,ymm2 6119 6120 vpslld ymm1,ymm9,30 6121 vpxor ymm0,ymm0,ymm4 6122 vpxor ymm4,ymm10,ymm9 6123 6124 vpxor ymm8,ymm8,ymm1 6125 vpaddd ymm5,ymm5,ymm7 6126 6127 vpsrld ymm1,ymm9,13 6128 6129 vpslld ymm2,ymm9,19 6130 vpaddd ymm5,ymm5,ymm0 6131 vpand ymm3,ymm3,ymm4 6132 6133 vpxor ymm7,ymm8,ymm1 6134 6135 vpsrld ymm1,ymm9,22 6136 vpxor ymm7,ymm7,ymm2 6137 6138 vpslld ymm2,ymm9,10 6139 vpxor ymm8,ymm10,ymm3 6140 vpaddd ymm12,ymm12,ymm5 6141 6142 vpxor ymm7,ymm7,ymm1 6143 vpxor ymm7,ymm7,ymm2 6144 6145 vpaddd ymm8,ymm8,ymm5 6146 vpaddd ymm8,ymm8,ymm7 6147 add rbp,256 6148 vmovd xmm5,DWORD[32+r12] 6149 vmovd xmm0,DWORD[32+r8] 6150 vmovd xmm1,DWORD[32+r13] 6151 vmovd xmm2,DWORD[32+r9] 6152 vpinsrd xmm5,xmm5,DWORD[32+r14],1 6153 vpinsrd xmm0,xmm0,DWORD[32+r10],1 6154 vpinsrd xmm1,xmm1,DWORD[32+r15],1 6155 vpunpckldq ymm5,ymm5,ymm1 6156 vpinsrd xmm2,xmm2,DWORD[32+r11],1 6157 vpunpckldq ymm0,ymm0,ymm2 6158 vinserti128 ymm5,ymm5,xmm0,1 6159 vpshufb ymm5,ymm5,ymm6 6160 vpsrld ymm7,ymm12,6 6161 vpslld ymm2,ymm12,26 6162 vmovdqu YMMWORD[(256-256-128)+rbx],ymm5 6163 vpaddd ymm5,ymm5,ymm15 6164 6165 vpsrld ymm1,ymm12,11 6166 vpxor ymm7,ymm7,ymm2 6167 vpslld ymm2,ymm12,21 6168 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] 6169 vpxor ymm7,ymm7,ymm1 6170 6171 vpsrld ymm1,ymm12,25 6172 vpxor ymm7,ymm7,ymm2 6173 6174 vpslld ymm2,ymm12,7 6175 vpandn ymm0,ymm12,ymm14 6176 vpand ymm3,ymm12,ymm13 6177 6178 vpxor ymm7,ymm7,ymm1 6179 6180 vpsrld ymm15,ymm8,2 6181 vpxor ymm7,ymm7,ymm2 6182 6183 vpslld ymm1,ymm8,30 6184 vpxor ymm0,ymm0,ymm3 6185 vpxor ymm3,ymm9,ymm8 6186 6187 vpxor ymm15,ymm15,ymm1 6188 vpaddd ymm5,ymm5,ymm7 6189 6190 vpsrld ymm1,ymm8,13 6191 6192 vpslld ymm2,ymm8,19 6193 vpaddd ymm5,ymm5,ymm0 6194 vpand ymm4,ymm4,ymm3 6195 6196 vpxor ymm7,ymm15,ymm1 6197 6198 vpsrld ymm1,ymm8,22 6199 vpxor ymm7,ymm7,ymm2 6200 6201 vpslld ymm2,ymm8,10 6202 vpxor ymm15,ymm9,ymm4 6203 vpaddd ymm11,ymm11,ymm5 6204 6205 vpxor ymm7,ymm7,ymm1 6206 vpxor ymm7,ymm7,ymm2 6207 6208 vpaddd ymm15,ymm15,ymm5 6209 vpaddd ymm15,ymm15,ymm7 6210 vmovd xmm5,DWORD[36+r12] 6211 vmovd xmm0,DWORD[36+r8] 6212 vmovd xmm1,DWORD[36+r13] 6213 vmovd xmm2,DWORD[36+r9] 6214 vpinsrd xmm5,xmm5,DWORD[36+r14],1 6215 vpinsrd xmm0,xmm0,DWORD[36+r10],1 6216 vpinsrd xmm1,xmm1,DWORD[36+r15],1 6217 vpunpckldq ymm5,ymm5,ymm1 6218 vpinsrd xmm2,xmm2,DWORD[36+r11],1 6219 vpunpckldq ymm0,ymm0,ymm2 6220 vinserti128 ymm5,ymm5,xmm0,1 6221 vpshufb ymm5,ymm5,ymm6 6222 vpsrld ymm7,ymm11,6 6223 vpslld ymm2,ymm11,26 6224 vmovdqu YMMWORD[(288-256-128)+rbx],ymm5 6225 vpaddd ymm5,ymm5,ymm14 6226 6227 vpsrld ymm1,ymm11,11 6228 vpxor ymm7,ymm7,ymm2 6229 vpslld ymm2,ymm11,21 6230 vpaddd ymm5,ymm5,YMMWORD[((-96))+rbp] 6231 vpxor ymm7,ymm7,ymm1 6232 6233 vpsrld ymm1,ymm11,25 6234 vpxor ymm7,ymm7,ymm2 6235 6236 vpslld ymm2,ymm11,7 6237 vpandn ymm0,ymm11,ymm13 6238 vpand ymm4,ymm11,ymm12 6239 6240 vpxor ymm7,ymm7,ymm1 6241 6242 vpsrld ymm14,ymm15,2 6243 vpxor ymm7,ymm7,ymm2 6244 6245 vpslld ymm1,ymm15,30 6246 vpxor ymm0,ymm0,ymm4 6247 vpxor ymm4,ymm8,ymm15 6248 6249 vpxor ymm14,ymm14,ymm1 6250 vpaddd ymm5,ymm5,ymm7 6251 6252 vpsrld ymm1,ymm15,13 6253 6254 vpslld ymm2,ymm15,19 6255 vpaddd ymm5,ymm5,ymm0 6256 vpand ymm3,ymm3,ymm4 6257 6258 vpxor ymm7,ymm14,ymm1 6259 6260 vpsrld ymm1,ymm15,22 6261 vpxor ymm7,ymm7,ymm2 6262 6263 vpslld ymm2,ymm15,10 6264 vpxor ymm14,ymm8,ymm3 6265 vpaddd ymm10,ymm10,ymm5 6266 6267 vpxor ymm7,ymm7,ymm1 6268 vpxor ymm7,ymm7,ymm2 6269 6270 vpaddd ymm14,ymm14,ymm5 6271 vpaddd ymm14,ymm14,ymm7 6272 vmovd xmm5,DWORD[40+r12] 6273 vmovd xmm0,DWORD[40+r8] 6274 vmovd xmm1,DWORD[40+r13] 6275 vmovd xmm2,DWORD[40+r9] 6276 vpinsrd xmm5,xmm5,DWORD[40+r14],1 6277 vpinsrd xmm0,xmm0,DWORD[40+r10],1 6278 vpinsrd xmm1,xmm1,DWORD[40+r15],1 6279 vpunpckldq ymm5,ymm5,ymm1 6280 vpinsrd xmm2,xmm2,DWORD[40+r11],1 6281 vpunpckldq ymm0,ymm0,ymm2 6282 vinserti128 ymm5,ymm5,xmm0,1 6283 vpshufb ymm5,ymm5,ymm6 6284 vpsrld ymm7,ymm10,6 6285 vpslld ymm2,ymm10,26 6286 vmovdqu YMMWORD[(320-256-128)+rbx],ymm5 6287 vpaddd ymm5,ymm5,ymm13 6288 6289 vpsrld ymm1,ymm10,11 6290 vpxor ymm7,ymm7,ymm2 6291 vpslld ymm2,ymm10,21 6292 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] 6293 vpxor ymm7,ymm7,ymm1 6294 6295 vpsrld ymm1,ymm10,25 6296 vpxor ymm7,ymm7,ymm2 6297 6298 vpslld ymm2,ymm10,7 6299 vpandn ymm0,ymm10,ymm12 6300 vpand ymm3,ymm10,ymm11 6301 6302 vpxor ymm7,ymm7,ymm1 6303 6304 vpsrld ymm13,ymm14,2 6305 vpxor ymm7,ymm7,ymm2 6306 6307 vpslld ymm1,ymm14,30 6308 vpxor ymm0,ymm0,ymm3 6309 vpxor ymm3,ymm15,ymm14 6310 6311 vpxor ymm13,ymm13,ymm1 6312 vpaddd ymm5,ymm5,ymm7 6313 6314 vpsrld ymm1,ymm14,13 6315 6316 vpslld ymm2,ymm14,19 6317 vpaddd ymm5,ymm5,ymm0 6318 vpand ymm4,ymm4,ymm3 6319 6320 vpxor ymm7,ymm13,ymm1 6321 6322 vpsrld ymm1,ymm14,22 6323 vpxor ymm7,ymm7,ymm2 6324 6325 vpslld ymm2,ymm14,10 6326 vpxor ymm13,ymm15,ymm4 6327 vpaddd ymm9,ymm9,ymm5 6328 6329 vpxor ymm7,ymm7,ymm1 6330 vpxor ymm7,ymm7,ymm2 6331 6332 vpaddd ymm13,ymm13,ymm5 6333 vpaddd ymm13,ymm13,ymm7 6334 vmovd xmm5,DWORD[44+r12] 6335 vmovd xmm0,DWORD[44+r8] 6336 vmovd xmm1,DWORD[44+r13] 6337 vmovd xmm2,DWORD[44+r9] 6338 vpinsrd xmm5,xmm5,DWORD[44+r14],1 6339 vpinsrd xmm0,xmm0,DWORD[44+r10],1 6340 vpinsrd xmm1,xmm1,DWORD[44+r15],1 6341 vpunpckldq ymm5,ymm5,ymm1 6342 vpinsrd xmm2,xmm2,DWORD[44+r11],1 6343 vpunpckldq ymm0,ymm0,ymm2 6344 vinserti128 ymm5,ymm5,xmm0,1 6345 vpshufb ymm5,ymm5,ymm6 6346 vpsrld ymm7,ymm9,6 6347 vpslld ymm2,ymm9,26 6348 vmovdqu YMMWORD[(352-256-128)+rbx],ymm5 6349 vpaddd ymm5,ymm5,ymm12 6350 6351 vpsrld ymm1,ymm9,11 6352 vpxor ymm7,ymm7,ymm2 6353 vpslld ymm2,ymm9,21 6354 vpaddd ymm5,ymm5,YMMWORD[((-32))+rbp] 6355 vpxor ymm7,ymm7,ymm1 6356 6357 vpsrld ymm1,ymm9,25 6358 vpxor ymm7,ymm7,ymm2 6359 6360 vpslld ymm2,ymm9,7 6361 vpandn ymm0,ymm9,ymm11 6362 vpand ymm4,ymm9,ymm10 6363 6364 vpxor ymm7,ymm7,ymm1 6365 6366 vpsrld ymm12,ymm13,2 6367 vpxor ymm7,ymm7,ymm2 6368 6369 vpslld ymm1,ymm13,30 6370 vpxor ymm0,ymm0,ymm4 6371 vpxor ymm4,ymm14,ymm13 6372 6373 vpxor ymm12,ymm12,ymm1 6374 vpaddd ymm5,ymm5,ymm7 6375 6376 vpsrld ymm1,ymm13,13 6377 6378 vpslld ymm2,ymm13,19 6379 vpaddd ymm5,ymm5,ymm0 6380 vpand ymm3,ymm3,ymm4 6381 6382 vpxor ymm7,ymm12,ymm1 6383 6384 vpsrld ymm1,ymm13,22 6385 vpxor ymm7,ymm7,ymm2 6386 6387 vpslld ymm2,ymm13,10 6388 vpxor ymm12,ymm14,ymm3 6389 vpaddd ymm8,ymm8,ymm5 6390 6391 vpxor ymm7,ymm7,ymm1 6392 vpxor ymm7,ymm7,ymm2 6393 6394 vpaddd ymm12,ymm12,ymm5 6395 vpaddd ymm12,ymm12,ymm7 6396 vmovd xmm5,DWORD[48+r12] 6397 vmovd xmm0,DWORD[48+r8] 6398 vmovd xmm1,DWORD[48+r13] 6399 vmovd xmm2,DWORD[48+r9] 6400 vpinsrd xmm5,xmm5,DWORD[48+r14],1 6401 vpinsrd xmm0,xmm0,DWORD[48+r10],1 6402 vpinsrd xmm1,xmm1,DWORD[48+r15],1 6403 vpunpckldq ymm5,ymm5,ymm1 6404 vpinsrd xmm2,xmm2,DWORD[48+r11],1 6405 vpunpckldq ymm0,ymm0,ymm2 6406 vinserti128 ymm5,ymm5,xmm0,1 6407 vpshufb ymm5,ymm5,ymm6 6408 vpsrld ymm7,ymm8,6 6409 vpslld ymm2,ymm8,26 6410 vmovdqu YMMWORD[(384-256-128)+rbx],ymm5 6411 vpaddd ymm5,ymm5,ymm11 6412 6413 vpsrld ymm1,ymm8,11 6414 vpxor ymm7,ymm7,ymm2 6415 vpslld ymm2,ymm8,21 6416 vpaddd ymm5,ymm5,YMMWORD[rbp] 6417 vpxor ymm7,ymm7,ymm1 6418 6419 vpsrld ymm1,ymm8,25 6420 vpxor ymm7,ymm7,ymm2 6421 6422 vpslld ymm2,ymm8,7 6423 vpandn ymm0,ymm8,ymm10 6424 vpand ymm3,ymm8,ymm9 6425 6426 vpxor ymm7,ymm7,ymm1 6427 6428 vpsrld ymm11,ymm12,2 6429 vpxor ymm7,ymm7,ymm2 6430 6431 vpslld ymm1,ymm12,30 6432 vpxor ymm0,ymm0,ymm3 6433 vpxor ymm3,ymm13,ymm12 6434 6435 vpxor ymm11,ymm11,ymm1 6436 vpaddd ymm5,ymm5,ymm7 6437 6438 vpsrld ymm1,ymm12,13 6439 6440 vpslld ymm2,ymm12,19 6441 vpaddd ymm5,ymm5,ymm0 6442 vpand ymm4,ymm4,ymm3 6443 6444 vpxor ymm7,ymm11,ymm1 6445 6446 vpsrld ymm1,ymm12,22 6447 vpxor ymm7,ymm7,ymm2 6448 6449 vpslld ymm2,ymm12,10 6450 vpxor ymm11,ymm13,ymm4 6451 vpaddd ymm15,ymm15,ymm5 6452 6453 vpxor ymm7,ymm7,ymm1 6454 vpxor ymm7,ymm7,ymm2 6455 6456 vpaddd ymm11,ymm11,ymm5 6457 vpaddd ymm11,ymm11,ymm7 6458 vmovd xmm5,DWORD[52+r12] 6459 vmovd xmm0,DWORD[52+r8] 6460 vmovd xmm1,DWORD[52+r13] 6461 vmovd xmm2,DWORD[52+r9] 6462 vpinsrd xmm5,xmm5,DWORD[52+r14],1 6463 vpinsrd xmm0,xmm0,DWORD[52+r10],1 6464 vpinsrd xmm1,xmm1,DWORD[52+r15],1 6465 vpunpckldq ymm5,ymm5,ymm1 6466 vpinsrd xmm2,xmm2,DWORD[52+r11],1 6467 vpunpckldq ymm0,ymm0,ymm2 6468 vinserti128 ymm5,ymm5,xmm0,1 6469 vpshufb ymm5,ymm5,ymm6 6470 vpsrld ymm7,ymm15,6 6471 vpslld ymm2,ymm15,26 6472 vmovdqu YMMWORD[(416-256-128)+rbx],ymm5 6473 vpaddd ymm5,ymm5,ymm10 6474 6475 vpsrld ymm1,ymm15,11 6476 vpxor ymm7,ymm7,ymm2 6477 vpslld ymm2,ymm15,21 6478 vpaddd ymm5,ymm5,YMMWORD[32+rbp] 6479 vpxor ymm7,ymm7,ymm1 6480 6481 vpsrld ymm1,ymm15,25 6482 vpxor ymm7,ymm7,ymm2 6483 6484 vpslld ymm2,ymm15,7 6485 vpandn ymm0,ymm15,ymm9 6486 vpand ymm4,ymm15,ymm8 6487 6488 vpxor ymm7,ymm7,ymm1 6489 6490 vpsrld ymm10,ymm11,2 6491 vpxor ymm7,ymm7,ymm2 6492 6493 vpslld ymm1,ymm11,30 6494 vpxor ymm0,ymm0,ymm4 6495 vpxor ymm4,ymm12,ymm11 6496 6497 vpxor ymm10,ymm10,ymm1 6498 vpaddd ymm5,ymm5,ymm7 6499 6500 vpsrld ymm1,ymm11,13 6501 6502 vpslld ymm2,ymm11,19 6503 vpaddd ymm5,ymm5,ymm0 6504 vpand ymm3,ymm3,ymm4 6505 6506 vpxor ymm7,ymm10,ymm1 6507 6508 vpsrld ymm1,ymm11,22 6509 vpxor ymm7,ymm7,ymm2 6510 6511 vpslld ymm2,ymm11,10 6512 vpxor ymm10,ymm12,ymm3 6513 vpaddd ymm14,ymm14,ymm5 6514 6515 vpxor ymm7,ymm7,ymm1 6516 vpxor ymm7,ymm7,ymm2 6517 6518 vpaddd ymm10,ymm10,ymm5 6519 vpaddd ymm10,ymm10,ymm7 6520 vmovd xmm5,DWORD[56+r12] 6521 vmovd xmm0,DWORD[56+r8] 6522 vmovd xmm1,DWORD[56+r13] 6523 vmovd xmm2,DWORD[56+r9] 6524 vpinsrd xmm5,xmm5,DWORD[56+r14],1 6525 vpinsrd xmm0,xmm0,DWORD[56+r10],1 6526 vpinsrd xmm1,xmm1,DWORD[56+r15],1 6527 vpunpckldq ymm5,ymm5,ymm1 6528 vpinsrd xmm2,xmm2,DWORD[56+r11],1 6529 vpunpckldq ymm0,ymm0,ymm2 6530 vinserti128 ymm5,ymm5,xmm0,1 6531 vpshufb ymm5,ymm5,ymm6 6532 vpsrld ymm7,ymm14,6 6533 vpslld ymm2,ymm14,26 6534 vmovdqu YMMWORD[(448-256-128)+rbx],ymm5 6535 vpaddd ymm5,ymm5,ymm9 6536 6537 vpsrld ymm1,ymm14,11 6538 vpxor ymm7,ymm7,ymm2 6539 vpslld ymm2,ymm14,21 6540 vpaddd ymm5,ymm5,YMMWORD[64+rbp] 6541 vpxor ymm7,ymm7,ymm1 6542 6543 vpsrld ymm1,ymm14,25 6544 vpxor ymm7,ymm7,ymm2 6545 6546 vpslld ymm2,ymm14,7 6547 vpandn ymm0,ymm14,ymm8 6548 vpand ymm3,ymm14,ymm15 6549 6550 vpxor ymm7,ymm7,ymm1 6551 6552 vpsrld ymm9,ymm10,2 6553 vpxor ymm7,ymm7,ymm2 6554 6555 vpslld ymm1,ymm10,30 6556 vpxor ymm0,ymm0,ymm3 6557 vpxor ymm3,ymm11,ymm10 6558 6559 vpxor ymm9,ymm9,ymm1 6560 vpaddd ymm5,ymm5,ymm7 6561 6562 vpsrld ymm1,ymm10,13 6563 6564 vpslld ymm2,ymm10,19 6565 vpaddd ymm5,ymm5,ymm0 6566 vpand ymm4,ymm4,ymm3 6567 6568 vpxor ymm7,ymm9,ymm1 6569 6570 vpsrld ymm1,ymm10,22 6571 vpxor ymm7,ymm7,ymm2 6572 6573 vpslld ymm2,ymm10,10 6574 vpxor ymm9,ymm11,ymm4 6575 vpaddd ymm13,ymm13,ymm5 6576 6577 vpxor ymm7,ymm7,ymm1 6578 vpxor ymm7,ymm7,ymm2 6579 6580 vpaddd ymm9,ymm9,ymm5 6581 vpaddd ymm9,ymm9,ymm7 6582 vmovd xmm5,DWORD[60+r12] 6583 lea r12,[64+r12] 6584 vmovd xmm0,DWORD[60+r8] 6585 lea r8,[64+r8] 6586 vmovd xmm1,DWORD[60+r13] 6587 lea r13,[64+r13] 6588 vmovd xmm2,DWORD[60+r9] 6589 lea r9,[64+r9] 6590 vpinsrd xmm5,xmm5,DWORD[60+r14],1 6591 lea r14,[64+r14] 6592 vpinsrd xmm0,xmm0,DWORD[60+r10],1 6593 lea r10,[64+r10] 6594 vpinsrd xmm1,xmm1,DWORD[60+r15],1 6595 lea r15,[64+r15] 6596 vpunpckldq ymm5,ymm5,ymm1 6597 vpinsrd xmm2,xmm2,DWORD[60+r11],1 6598 lea r11,[64+r11] 6599 vpunpckldq ymm0,ymm0,ymm2 6600 vinserti128 ymm5,ymm5,xmm0,1 6601 vpshufb ymm5,ymm5,ymm6 6602 vpsrld ymm7,ymm13,6 6603 vpslld ymm2,ymm13,26 6604 vmovdqu YMMWORD[(480-256-128)+rbx],ymm5 6605 vpaddd ymm5,ymm5,ymm8 6606 6607 vpsrld ymm1,ymm13,11 6608 vpxor ymm7,ymm7,ymm2 6609 vpslld ymm2,ymm13,21 6610 vpaddd ymm5,ymm5,YMMWORD[96+rbp] 6611 vpxor ymm7,ymm7,ymm1 6612 6613 vpsrld ymm1,ymm13,25 6614 vpxor ymm7,ymm7,ymm2 6615 prefetcht0 [63+r12] 6616 vpslld ymm2,ymm13,7 6617 vpandn ymm0,ymm13,ymm15 6618 vpand ymm4,ymm13,ymm14 6619 prefetcht0 [63+r13] 6620 vpxor ymm7,ymm7,ymm1 6621 6622 vpsrld ymm8,ymm9,2 6623 vpxor ymm7,ymm7,ymm2 6624 prefetcht0 [63+r14] 6625 vpslld ymm1,ymm9,30 6626 vpxor ymm0,ymm0,ymm4 6627 vpxor ymm4,ymm10,ymm9 6628 prefetcht0 [63+r15] 6629 vpxor ymm8,ymm8,ymm1 6630 vpaddd ymm5,ymm5,ymm7 6631 6632 vpsrld ymm1,ymm9,13 6633 prefetcht0 [63+r8] 6634 vpslld ymm2,ymm9,19 6635 vpaddd ymm5,ymm5,ymm0 6636 vpand ymm3,ymm3,ymm4 6637 prefetcht0 [63+r9] 6638 vpxor ymm7,ymm8,ymm1 6639 6640 vpsrld ymm1,ymm9,22 6641 vpxor ymm7,ymm7,ymm2 6642 prefetcht0 [63+r10] 6643 vpslld ymm2,ymm9,10 6644 vpxor ymm8,ymm10,ymm3 6645 vpaddd ymm12,ymm12,ymm5 6646 prefetcht0 [63+r11] 6647 vpxor ymm7,ymm7,ymm1 6648 vpxor ymm7,ymm7,ymm2 6649 6650 vpaddd ymm8,ymm8,ymm5 6651 vpaddd ymm8,ymm8,ymm7 6652 add rbp,256 6653 vmovdqu ymm5,YMMWORD[((0-128))+rax] 6654 mov ecx,3 6655 jmp NEAR $L$oop_16_xx_avx2 6656 ALIGN 32 6657 $L$oop_16_xx_avx2: 6658 vmovdqu ymm6,YMMWORD[((32-128))+rax] 6659 vpaddd ymm5,ymm5,YMMWORD[((288-256-128))+rbx] 6660 6661 vpsrld ymm7,ymm6,3 6662 vpsrld ymm1,ymm6,7 6663 vpslld ymm2,ymm6,25 6664 vpxor ymm7,ymm7,ymm1 6665 vpsrld ymm1,ymm6,18 6666 vpxor ymm7,ymm7,ymm2 6667 vpslld ymm2,ymm6,14 6668 vmovdqu ymm0,YMMWORD[((448-256-128))+rbx] 6669 vpsrld ymm3,ymm0,10 6670 6671 vpxor ymm7,ymm7,ymm1 6672 vpsrld ymm1,ymm0,17 6673 vpxor ymm7,ymm7,ymm2 6674 vpslld ymm2,ymm0,15 6675 vpaddd ymm5,ymm5,ymm7 6676 vpxor ymm7,ymm3,ymm1 6677 vpsrld ymm1,ymm0,19 6678 vpxor ymm7,ymm7,ymm2 6679 vpslld ymm2,ymm0,13 6680 vpxor ymm7,ymm7,ymm1 6681 vpxor ymm7,ymm7,ymm2 6682 vpaddd ymm5,ymm5,ymm7 6683 vpsrld ymm7,ymm12,6 6684 vpslld ymm2,ymm12,26 6685 vmovdqu YMMWORD[(0-128)+rax],ymm5 6686 vpaddd ymm5,ymm5,ymm15 6687 6688 vpsrld ymm1,ymm12,11 6689 vpxor ymm7,ymm7,ymm2 6690 vpslld ymm2,ymm12,21 6691 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] 6692 vpxor ymm7,ymm7,ymm1 6693 6694 vpsrld ymm1,ymm12,25 6695 vpxor ymm7,ymm7,ymm2 6696 6697 vpslld ymm2,ymm12,7 6698 vpandn ymm0,ymm12,ymm14 6699 vpand ymm3,ymm12,ymm13 6700 6701 vpxor ymm7,ymm7,ymm1 6702 6703 vpsrld ymm15,ymm8,2 6704 vpxor ymm7,ymm7,ymm2 6705 6706 vpslld ymm1,ymm8,30 6707 vpxor ymm0,ymm0,ymm3 6708 vpxor ymm3,ymm9,ymm8 6709 6710 vpxor ymm15,ymm15,ymm1 6711 vpaddd ymm5,ymm5,ymm7 6712 6713 vpsrld ymm1,ymm8,13 6714 6715 vpslld ymm2,ymm8,19 6716 vpaddd ymm5,ymm5,ymm0 6717 vpand ymm4,ymm4,ymm3 6718 6719 vpxor ymm7,ymm15,ymm1 6720 6721 vpsrld ymm1,ymm8,22 6722 vpxor ymm7,ymm7,ymm2 6723 6724 vpslld ymm2,ymm8,10 6725 vpxor ymm15,ymm9,ymm4 6726 vpaddd ymm11,ymm11,ymm5 6727 6728 vpxor ymm7,ymm7,ymm1 6729 vpxor ymm7,ymm7,ymm2 6730 6731 vpaddd ymm15,ymm15,ymm5 6732 vpaddd ymm15,ymm15,ymm7 6733 vmovdqu ymm5,YMMWORD[((64-128))+rax] 6734 vpaddd ymm6,ymm6,YMMWORD[((320-256-128))+rbx] 6735 6736 vpsrld ymm7,ymm5,3 6737 vpsrld ymm1,ymm5,7 6738 vpslld ymm2,ymm5,25 6739 vpxor ymm7,ymm7,ymm1 6740 vpsrld ymm1,ymm5,18 6741 vpxor ymm7,ymm7,ymm2 6742 vpslld ymm2,ymm5,14 6743 vmovdqu ymm0,YMMWORD[((480-256-128))+rbx] 6744 vpsrld ymm4,ymm0,10 6745 6746 vpxor ymm7,ymm7,ymm1 6747 vpsrld ymm1,ymm0,17 6748 vpxor ymm7,ymm7,ymm2 6749 vpslld ymm2,ymm0,15 6750 vpaddd ymm6,ymm6,ymm7 6751 vpxor ymm7,ymm4,ymm1 6752 vpsrld ymm1,ymm0,19 6753 vpxor ymm7,ymm7,ymm2 6754 vpslld ymm2,ymm0,13 6755 vpxor ymm7,ymm7,ymm1 6756 vpxor ymm7,ymm7,ymm2 6757 vpaddd ymm6,ymm6,ymm7 6758 vpsrld ymm7,ymm11,6 6759 vpslld ymm2,ymm11,26 6760 vmovdqu YMMWORD[(32-128)+rax],ymm6 6761 vpaddd ymm6,ymm6,ymm14 6762 6763 vpsrld ymm1,ymm11,11 6764 vpxor ymm7,ymm7,ymm2 6765 vpslld ymm2,ymm11,21 6766 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp] 6767 vpxor ymm7,ymm7,ymm1 6768 6769 vpsrld ymm1,ymm11,25 6770 vpxor ymm7,ymm7,ymm2 6771 6772 vpslld ymm2,ymm11,7 6773 vpandn ymm0,ymm11,ymm13 6774 vpand ymm4,ymm11,ymm12 6775 6776 vpxor ymm7,ymm7,ymm1 6777 6778 vpsrld ymm14,ymm15,2 6779 vpxor ymm7,ymm7,ymm2 6780 6781 vpslld ymm1,ymm15,30 6782 vpxor ymm0,ymm0,ymm4 6783 vpxor ymm4,ymm8,ymm15 6784 6785 vpxor ymm14,ymm14,ymm1 6786 vpaddd ymm6,ymm6,ymm7 6787 6788 vpsrld ymm1,ymm15,13 6789 6790 vpslld ymm2,ymm15,19 6791 vpaddd ymm6,ymm6,ymm0 6792 vpand ymm3,ymm3,ymm4 6793 6794 vpxor ymm7,ymm14,ymm1 6795 6796 vpsrld ymm1,ymm15,22 6797 vpxor ymm7,ymm7,ymm2 6798 6799 vpslld ymm2,ymm15,10 6800 vpxor ymm14,ymm8,ymm3 6801 vpaddd ymm10,ymm10,ymm6 6802 6803 vpxor ymm7,ymm7,ymm1 6804 vpxor ymm7,ymm7,ymm2 6805 6806 vpaddd ymm14,ymm14,ymm6 6807 vpaddd ymm14,ymm14,ymm7 6808 vmovdqu ymm6,YMMWORD[((96-128))+rax] 6809 vpaddd ymm5,ymm5,YMMWORD[((352-256-128))+rbx] 6810 6811 vpsrld ymm7,ymm6,3 6812 vpsrld ymm1,ymm6,7 6813 vpslld ymm2,ymm6,25 6814 vpxor ymm7,ymm7,ymm1 6815 vpsrld ymm1,ymm6,18 6816 vpxor ymm7,ymm7,ymm2 6817 vpslld ymm2,ymm6,14 6818 vmovdqu ymm0,YMMWORD[((0-128))+rax] 6819 vpsrld ymm3,ymm0,10 6820 6821 vpxor ymm7,ymm7,ymm1 6822 vpsrld ymm1,ymm0,17 6823 vpxor ymm7,ymm7,ymm2 6824 vpslld ymm2,ymm0,15 6825 vpaddd ymm5,ymm5,ymm7 6826 vpxor ymm7,ymm3,ymm1 6827 vpsrld ymm1,ymm0,19 6828 vpxor ymm7,ymm7,ymm2 6829 vpslld ymm2,ymm0,13 6830 vpxor ymm7,ymm7,ymm1 6831 vpxor ymm7,ymm7,ymm2 6832 vpaddd ymm5,ymm5,ymm7 6833 vpsrld ymm7,ymm10,6 6834 vpslld ymm2,ymm10,26 6835 vmovdqu YMMWORD[(64-128)+rax],ymm5 6836 vpaddd ymm5,ymm5,ymm13 6837 6838 vpsrld ymm1,ymm10,11 6839 vpxor ymm7,ymm7,ymm2 6840 vpslld ymm2,ymm10,21 6841 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] 6842 vpxor ymm7,ymm7,ymm1 6843 6844 vpsrld ymm1,ymm10,25 6845 vpxor ymm7,ymm7,ymm2 6846 6847 vpslld ymm2,ymm10,7 6848 vpandn ymm0,ymm10,ymm12 6849 vpand ymm3,ymm10,ymm11 6850 6851 vpxor ymm7,ymm7,ymm1 6852 6853 vpsrld ymm13,ymm14,2 6854 vpxor ymm7,ymm7,ymm2 6855 6856 vpslld ymm1,ymm14,30 6857 vpxor ymm0,ymm0,ymm3 6858 vpxor ymm3,ymm15,ymm14 6859 6860 vpxor ymm13,ymm13,ymm1 6861 vpaddd ymm5,ymm5,ymm7 6862 6863 vpsrld ymm1,ymm14,13 6864 6865 vpslld ymm2,ymm14,19 6866 vpaddd ymm5,ymm5,ymm0 6867 vpand ymm4,ymm4,ymm3 6868 6869 vpxor ymm7,ymm13,ymm1 6870 6871 vpsrld ymm1,ymm14,22 6872 vpxor ymm7,ymm7,ymm2 6873 6874 vpslld ymm2,ymm14,10 6875 vpxor ymm13,ymm15,ymm4 6876 vpaddd ymm9,ymm9,ymm5 6877 6878 vpxor ymm7,ymm7,ymm1 6879 vpxor ymm7,ymm7,ymm2 6880 6881 vpaddd ymm13,ymm13,ymm5 6882 vpaddd ymm13,ymm13,ymm7 6883 vmovdqu ymm5,YMMWORD[((128-128))+rax] 6884 vpaddd ymm6,ymm6,YMMWORD[((384-256-128))+rbx] 6885 6886 vpsrld ymm7,ymm5,3 6887 vpsrld ymm1,ymm5,7 6888 vpslld ymm2,ymm5,25 6889 vpxor ymm7,ymm7,ymm1 6890 vpsrld ymm1,ymm5,18 6891 vpxor ymm7,ymm7,ymm2 6892 vpslld ymm2,ymm5,14 6893 vmovdqu ymm0,YMMWORD[((32-128))+rax] 6894 vpsrld ymm4,ymm0,10 6895 6896 vpxor ymm7,ymm7,ymm1 6897 vpsrld ymm1,ymm0,17 6898 vpxor ymm7,ymm7,ymm2 6899 vpslld ymm2,ymm0,15 6900 vpaddd ymm6,ymm6,ymm7 6901 vpxor ymm7,ymm4,ymm1 6902 vpsrld ymm1,ymm0,19 6903 vpxor ymm7,ymm7,ymm2 6904 vpslld ymm2,ymm0,13 6905 vpxor ymm7,ymm7,ymm1 6906 vpxor ymm7,ymm7,ymm2 6907 vpaddd ymm6,ymm6,ymm7 6908 vpsrld ymm7,ymm9,6 6909 vpslld ymm2,ymm9,26 6910 vmovdqu YMMWORD[(96-128)+rax],ymm6 6911 vpaddd ymm6,ymm6,ymm12 6912 6913 vpsrld ymm1,ymm9,11 6914 vpxor ymm7,ymm7,ymm2 6915 vpslld ymm2,ymm9,21 6916 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp] 6917 vpxor ymm7,ymm7,ymm1 6918 6919 vpsrld ymm1,ymm9,25 6920 vpxor ymm7,ymm7,ymm2 6921 6922 vpslld ymm2,ymm9,7 6923 vpandn ymm0,ymm9,ymm11 6924 vpand ymm4,ymm9,ymm10 6925 6926 vpxor ymm7,ymm7,ymm1 6927 6928 vpsrld ymm12,ymm13,2 6929 vpxor ymm7,ymm7,ymm2 6930 6931 vpslld ymm1,ymm13,30 6932 vpxor ymm0,ymm0,ymm4 6933 vpxor ymm4,ymm14,ymm13 6934 6935 vpxor ymm12,ymm12,ymm1 6936 vpaddd ymm6,ymm6,ymm7 6937 6938 vpsrld ymm1,ymm13,13 6939 6940 vpslld ymm2,ymm13,19 6941 vpaddd ymm6,ymm6,ymm0 6942 vpand ymm3,ymm3,ymm4 6943 6944 vpxor ymm7,ymm12,ymm1 6945 6946 vpsrld ymm1,ymm13,22 6947 vpxor ymm7,ymm7,ymm2 6948 6949 vpslld ymm2,ymm13,10 6950 vpxor ymm12,ymm14,ymm3 6951 vpaddd ymm8,ymm8,ymm6 6952 6953 vpxor ymm7,ymm7,ymm1 6954 vpxor ymm7,ymm7,ymm2 6955 6956 vpaddd ymm12,ymm12,ymm6 6957 vpaddd ymm12,ymm12,ymm7 6958 vmovdqu ymm6,YMMWORD[((160-128))+rax] 6959 vpaddd ymm5,ymm5,YMMWORD[((416-256-128))+rbx] 6960 6961 vpsrld ymm7,ymm6,3 6962 vpsrld ymm1,ymm6,7 6963 vpslld ymm2,ymm6,25 6964 vpxor ymm7,ymm7,ymm1 6965 vpsrld ymm1,ymm6,18 6966 vpxor ymm7,ymm7,ymm2 6967 vpslld ymm2,ymm6,14 6968 vmovdqu ymm0,YMMWORD[((64-128))+rax] 6969 vpsrld ymm3,ymm0,10 6970 6971 vpxor ymm7,ymm7,ymm1 6972 vpsrld ymm1,ymm0,17 6973 vpxor ymm7,ymm7,ymm2 6974 vpslld ymm2,ymm0,15 6975 vpaddd ymm5,ymm5,ymm7 6976 vpxor ymm7,ymm3,ymm1 6977 vpsrld ymm1,ymm0,19 6978 vpxor ymm7,ymm7,ymm2 6979 vpslld ymm2,ymm0,13 6980 vpxor ymm7,ymm7,ymm1 6981 vpxor ymm7,ymm7,ymm2 6982 vpaddd ymm5,ymm5,ymm7 6983 vpsrld ymm7,ymm8,6 6984 vpslld ymm2,ymm8,26 6985 vmovdqu YMMWORD[(128-128)+rax],ymm5 6986 vpaddd ymm5,ymm5,ymm11 6987 6988 vpsrld ymm1,ymm8,11 6989 vpxor ymm7,ymm7,ymm2 6990 vpslld ymm2,ymm8,21 6991 vpaddd ymm5,ymm5,YMMWORD[rbp] 6992 vpxor ymm7,ymm7,ymm1 6993 6994 vpsrld ymm1,ymm8,25 6995 vpxor ymm7,ymm7,ymm2 6996 6997 vpslld ymm2,ymm8,7 6998 vpandn ymm0,ymm8,ymm10 6999 vpand ymm3,ymm8,ymm9 7000 7001 vpxor ymm7,ymm7,ymm1 7002 7003 vpsrld ymm11,ymm12,2 7004 vpxor ymm7,ymm7,ymm2 7005 7006 vpslld ymm1,ymm12,30 7007 vpxor ymm0,ymm0,ymm3 7008 vpxor ymm3,ymm13,ymm12 7009 7010 vpxor ymm11,ymm11,ymm1 7011 vpaddd ymm5,ymm5,ymm7 7012 7013 vpsrld ymm1,ymm12,13 7014 7015 vpslld ymm2,ymm12,19 7016 vpaddd ymm5,ymm5,ymm0 7017 vpand ymm4,ymm4,ymm3 7018 7019 vpxor ymm7,ymm11,ymm1 7020 7021 vpsrld ymm1,ymm12,22 7022 vpxor ymm7,ymm7,ymm2 7023 7024 vpslld ymm2,ymm12,10 7025 vpxor ymm11,ymm13,ymm4 7026 vpaddd ymm15,ymm15,ymm5 7027 7028 vpxor ymm7,ymm7,ymm1 7029 vpxor ymm7,ymm7,ymm2 7030 7031 vpaddd ymm11,ymm11,ymm5 7032 vpaddd ymm11,ymm11,ymm7 7033 vmovdqu ymm5,YMMWORD[((192-128))+rax] 7034 vpaddd ymm6,ymm6,YMMWORD[((448-256-128))+rbx] 7035 7036 vpsrld ymm7,ymm5,3 7037 vpsrld ymm1,ymm5,7 7038 vpslld ymm2,ymm5,25 7039 vpxor ymm7,ymm7,ymm1 7040 vpsrld ymm1,ymm5,18 7041 vpxor ymm7,ymm7,ymm2 7042 vpslld ymm2,ymm5,14 7043 vmovdqu ymm0,YMMWORD[((96-128))+rax] 7044 vpsrld ymm4,ymm0,10 7045 7046 vpxor ymm7,ymm7,ymm1 7047 vpsrld ymm1,ymm0,17 7048 vpxor ymm7,ymm7,ymm2 7049 vpslld ymm2,ymm0,15 7050 vpaddd ymm6,ymm6,ymm7 7051 vpxor ymm7,ymm4,ymm1 7052 vpsrld ymm1,ymm0,19 7053 vpxor ymm7,ymm7,ymm2 7054 vpslld ymm2,ymm0,13 7055 vpxor ymm7,ymm7,ymm1 7056 vpxor ymm7,ymm7,ymm2 7057 vpaddd ymm6,ymm6,ymm7 7058 vpsrld ymm7,ymm15,6 7059 vpslld ymm2,ymm15,26 7060 vmovdqu YMMWORD[(160-128)+rax],ymm6 7061 vpaddd ymm6,ymm6,ymm10 7062 7063 vpsrld ymm1,ymm15,11 7064 vpxor ymm7,ymm7,ymm2 7065 vpslld ymm2,ymm15,21 7066 vpaddd ymm6,ymm6,YMMWORD[32+rbp] 7067 vpxor ymm7,ymm7,ymm1 7068 7069 vpsrld ymm1,ymm15,25 7070 vpxor ymm7,ymm7,ymm2 7071 7072 vpslld ymm2,ymm15,7 7073 vpandn ymm0,ymm15,ymm9 7074 vpand ymm4,ymm15,ymm8 7075 7076 vpxor ymm7,ymm7,ymm1 7077 7078 vpsrld ymm10,ymm11,2 7079 vpxor ymm7,ymm7,ymm2 7080 7081 vpslld ymm1,ymm11,30 7082 vpxor ymm0,ymm0,ymm4 7083 vpxor ymm4,ymm12,ymm11 7084 7085 vpxor ymm10,ymm10,ymm1 7086 vpaddd ymm6,ymm6,ymm7 7087 7088 vpsrld ymm1,ymm11,13 7089 7090 vpslld ymm2,ymm11,19 7091 vpaddd ymm6,ymm6,ymm0 7092 vpand ymm3,ymm3,ymm4 7093 7094 vpxor ymm7,ymm10,ymm1 7095 7096 vpsrld ymm1,ymm11,22 7097 vpxor ymm7,ymm7,ymm2 7098 7099 vpslld ymm2,ymm11,10 7100 vpxor ymm10,ymm12,ymm3 7101 vpaddd ymm14,ymm14,ymm6 7102 7103 vpxor ymm7,ymm7,ymm1 7104 vpxor ymm7,ymm7,ymm2 7105 7106 vpaddd ymm10,ymm10,ymm6 7107 vpaddd ymm10,ymm10,ymm7 7108 vmovdqu ymm6,YMMWORD[((224-128))+rax] 7109 vpaddd ymm5,ymm5,YMMWORD[((480-256-128))+rbx] 7110 7111 vpsrld ymm7,ymm6,3 7112 vpsrld ymm1,ymm6,7 7113 vpslld ymm2,ymm6,25 7114 vpxor ymm7,ymm7,ymm1 7115 vpsrld ymm1,ymm6,18 7116 vpxor ymm7,ymm7,ymm2 7117 vpslld ymm2,ymm6,14 7118 vmovdqu ymm0,YMMWORD[((128-128))+rax] 7119 vpsrld ymm3,ymm0,10 7120 7121 vpxor ymm7,ymm7,ymm1 7122 vpsrld ymm1,ymm0,17 7123 vpxor ymm7,ymm7,ymm2 7124 vpslld ymm2,ymm0,15 7125 vpaddd ymm5,ymm5,ymm7 7126 vpxor ymm7,ymm3,ymm1 7127 vpsrld ymm1,ymm0,19 7128 vpxor ymm7,ymm7,ymm2 7129 vpslld ymm2,ymm0,13 7130 vpxor ymm7,ymm7,ymm1 7131 vpxor ymm7,ymm7,ymm2 7132 vpaddd ymm5,ymm5,ymm7 7133 vpsrld ymm7,ymm14,6 7134 vpslld ymm2,ymm14,26 7135 vmovdqu YMMWORD[(192-128)+rax],ymm5 7136 vpaddd ymm5,ymm5,ymm9 7137 7138 vpsrld ymm1,ymm14,11 7139 vpxor ymm7,ymm7,ymm2 7140 vpslld ymm2,ymm14,21 7141 vpaddd ymm5,ymm5,YMMWORD[64+rbp] 7142 vpxor ymm7,ymm7,ymm1 7143 7144 vpsrld ymm1,ymm14,25 7145 vpxor ymm7,ymm7,ymm2 7146 7147 vpslld ymm2,ymm14,7 7148 vpandn ymm0,ymm14,ymm8 7149 vpand ymm3,ymm14,ymm15 7150 7151 vpxor ymm7,ymm7,ymm1 7152 7153 vpsrld ymm9,ymm10,2 7154 vpxor ymm7,ymm7,ymm2 7155 7156 vpslld ymm1,ymm10,30 7157 vpxor ymm0,ymm0,ymm3 7158 vpxor ymm3,ymm11,ymm10 7159 7160 vpxor ymm9,ymm9,ymm1 7161 vpaddd ymm5,ymm5,ymm7 7162 7163 vpsrld ymm1,ymm10,13 7164 7165 vpslld ymm2,ymm10,19 7166 vpaddd ymm5,ymm5,ymm0 7167 vpand ymm4,ymm4,ymm3 7168 7169 vpxor ymm7,ymm9,ymm1 7170 7171 vpsrld ymm1,ymm10,22 7172 vpxor ymm7,ymm7,ymm2 7173 7174 vpslld ymm2,ymm10,10 7175 vpxor ymm9,ymm11,ymm4 7176 vpaddd ymm13,ymm13,ymm5 7177 7178 vpxor ymm7,ymm7,ymm1 7179 vpxor ymm7,ymm7,ymm2 7180 7181 vpaddd ymm9,ymm9,ymm5 7182 vpaddd ymm9,ymm9,ymm7 7183 vmovdqu ymm5,YMMWORD[((256-256-128))+rbx] 7184 vpaddd ymm6,ymm6,YMMWORD[((0-128))+rax] 7185 7186 vpsrld ymm7,ymm5,3 7187 vpsrld ymm1,ymm5,7 7188 vpslld ymm2,ymm5,25 7189 vpxor ymm7,ymm7,ymm1 7190 vpsrld ymm1,ymm5,18 7191 vpxor ymm7,ymm7,ymm2 7192 vpslld ymm2,ymm5,14 7193 vmovdqu ymm0,YMMWORD[((160-128))+rax] 7194 vpsrld ymm4,ymm0,10 7195 7196 vpxor ymm7,ymm7,ymm1 7197 vpsrld ymm1,ymm0,17 7198 vpxor ymm7,ymm7,ymm2 7199 vpslld ymm2,ymm0,15 7200 vpaddd ymm6,ymm6,ymm7 7201 vpxor ymm7,ymm4,ymm1 7202 vpsrld ymm1,ymm0,19 7203 vpxor ymm7,ymm7,ymm2 7204 vpslld ymm2,ymm0,13 7205 vpxor ymm7,ymm7,ymm1 7206 vpxor ymm7,ymm7,ymm2 7207 vpaddd ymm6,ymm6,ymm7 7208 vpsrld ymm7,ymm13,6 7209 vpslld ymm2,ymm13,26 7210 vmovdqu YMMWORD[(224-128)+rax],ymm6 7211 vpaddd ymm6,ymm6,ymm8 7212 7213 vpsrld ymm1,ymm13,11 7214 vpxor ymm7,ymm7,ymm2 7215 vpslld ymm2,ymm13,21 7216 vpaddd ymm6,ymm6,YMMWORD[96+rbp] 7217 vpxor ymm7,ymm7,ymm1 7218 7219 vpsrld ymm1,ymm13,25 7220 vpxor ymm7,ymm7,ymm2 7221 7222 vpslld ymm2,ymm13,7 7223 vpandn ymm0,ymm13,ymm15 7224 vpand ymm4,ymm13,ymm14 7225 7226 vpxor ymm7,ymm7,ymm1 7227 7228 vpsrld ymm8,ymm9,2 7229 vpxor ymm7,ymm7,ymm2 7230 7231 vpslld ymm1,ymm9,30 7232 vpxor ymm0,ymm0,ymm4 7233 vpxor ymm4,ymm10,ymm9 7234 7235 vpxor ymm8,ymm8,ymm1 7236 vpaddd ymm6,ymm6,ymm7 7237 7238 vpsrld ymm1,ymm9,13 7239 7240 vpslld ymm2,ymm9,19 7241 vpaddd ymm6,ymm6,ymm0 7242 vpand ymm3,ymm3,ymm4 7243 7244 vpxor ymm7,ymm8,ymm1 7245 7246 vpsrld ymm1,ymm9,22 7247 vpxor ymm7,ymm7,ymm2 7248 7249 vpslld ymm2,ymm9,10 7250 vpxor ymm8,ymm10,ymm3 7251 vpaddd ymm12,ymm12,ymm6 7252 7253 vpxor ymm7,ymm7,ymm1 7254 vpxor ymm7,ymm7,ymm2 7255 7256 vpaddd ymm8,ymm8,ymm6 7257 vpaddd ymm8,ymm8,ymm7 7258 add rbp,256 7259 vmovdqu ymm6,YMMWORD[((288-256-128))+rbx] 7260 vpaddd ymm5,ymm5,YMMWORD[((32-128))+rax] 7261 7262 vpsrld ymm7,ymm6,3 7263 vpsrld ymm1,ymm6,7 7264 vpslld ymm2,ymm6,25 7265 vpxor ymm7,ymm7,ymm1 7266 vpsrld ymm1,ymm6,18 7267 vpxor ymm7,ymm7,ymm2 7268 vpslld ymm2,ymm6,14 7269 vmovdqu ymm0,YMMWORD[((192-128))+rax] 7270 vpsrld ymm3,ymm0,10 7271 7272 vpxor ymm7,ymm7,ymm1 7273 vpsrld ymm1,ymm0,17 7274 vpxor ymm7,ymm7,ymm2 7275 vpslld ymm2,ymm0,15 7276 vpaddd ymm5,ymm5,ymm7 7277 vpxor ymm7,ymm3,ymm1 7278 vpsrld ymm1,ymm0,19 7279 vpxor ymm7,ymm7,ymm2 7280 vpslld ymm2,ymm0,13 7281 vpxor ymm7,ymm7,ymm1 7282 vpxor ymm7,ymm7,ymm2 7283 vpaddd ymm5,ymm5,ymm7 7284 vpsrld ymm7,ymm12,6 7285 vpslld ymm2,ymm12,26 7286 vmovdqu YMMWORD[(256-256-128)+rbx],ymm5 7287 vpaddd ymm5,ymm5,ymm15 7288 7289 vpsrld ymm1,ymm12,11 7290 vpxor ymm7,ymm7,ymm2 7291 vpslld ymm2,ymm12,21 7292 vpaddd ymm5,ymm5,YMMWORD[((-128))+rbp] 7293 vpxor ymm7,ymm7,ymm1 7294 7295 vpsrld ymm1,ymm12,25 7296 vpxor ymm7,ymm7,ymm2 7297 7298 vpslld ymm2,ymm12,7 7299 vpandn ymm0,ymm12,ymm14 7300 vpand ymm3,ymm12,ymm13 7301 7302 vpxor ymm7,ymm7,ymm1 7303 7304 vpsrld ymm15,ymm8,2 7305 vpxor ymm7,ymm7,ymm2 7306 7307 vpslld ymm1,ymm8,30 7308 vpxor ymm0,ymm0,ymm3 7309 vpxor ymm3,ymm9,ymm8 7310 7311 vpxor ymm15,ymm15,ymm1 7312 vpaddd ymm5,ymm5,ymm7 7313 7314 vpsrld ymm1,ymm8,13 7315 7316 vpslld ymm2,ymm8,19 7317 vpaddd ymm5,ymm5,ymm0 7318 vpand ymm4,ymm4,ymm3 7319 7320 vpxor ymm7,ymm15,ymm1 7321 7322 vpsrld ymm1,ymm8,22 7323 vpxor ymm7,ymm7,ymm2 7324 7325 vpslld ymm2,ymm8,10 7326 vpxor ymm15,ymm9,ymm4 7327 vpaddd ymm11,ymm11,ymm5 7328 7329 vpxor ymm7,ymm7,ymm1 7330 vpxor ymm7,ymm7,ymm2 7331 7332 vpaddd ymm15,ymm15,ymm5 7333 vpaddd ymm15,ymm15,ymm7 7334 vmovdqu ymm5,YMMWORD[((320-256-128))+rbx] 7335 vpaddd ymm6,ymm6,YMMWORD[((64-128))+rax] 7336 7337 vpsrld ymm7,ymm5,3 7338 vpsrld ymm1,ymm5,7 7339 vpslld ymm2,ymm5,25 7340 vpxor ymm7,ymm7,ymm1 7341 vpsrld ymm1,ymm5,18 7342 vpxor ymm7,ymm7,ymm2 7343 vpslld ymm2,ymm5,14 7344 vmovdqu ymm0,YMMWORD[((224-128))+rax] 7345 vpsrld ymm4,ymm0,10 7346 7347 vpxor ymm7,ymm7,ymm1 7348 vpsrld ymm1,ymm0,17 7349 vpxor ymm7,ymm7,ymm2 7350 vpslld ymm2,ymm0,15 7351 vpaddd ymm6,ymm6,ymm7 7352 vpxor ymm7,ymm4,ymm1 7353 vpsrld ymm1,ymm0,19 7354 vpxor ymm7,ymm7,ymm2 7355 vpslld ymm2,ymm0,13 7356 vpxor ymm7,ymm7,ymm1 7357 vpxor ymm7,ymm7,ymm2 7358 vpaddd ymm6,ymm6,ymm7 7359 vpsrld ymm7,ymm11,6 7360 vpslld ymm2,ymm11,26 7361 vmovdqu YMMWORD[(288-256-128)+rbx],ymm6 7362 vpaddd ymm6,ymm6,ymm14 7363 7364 vpsrld ymm1,ymm11,11 7365 vpxor ymm7,ymm7,ymm2 7366 vpslld ymm2,ymm11,21 7367 vpaddd ymm6,ymm6,YMMWORD[((-96))+rbp] 7368 vpxor ymm7,ymm7,ymm1 7369 7370 vpsrld ymm1,ymm11,25 7371 vpxor ymm7,ymm7,ymm2 7372 7373 vpslld ymm2,ymm11,7 7374 vpandn ymm0,ymm11,ymm13 7375 vpand ymm4,ymm11,ymm12 7376 7377 vpxor ymm7,ymm7,ymm1 7378 7379 vpsrld ymm14,ymm15,2 7380 vpxor ymm7,ymm7,ymm2 7381 7382 vpslld ymm1,ymm15,30 7383 vpxor ymm0,ymm0,ymm4 7384 vpxor ymm4,ymm8,ymm15 7385 7386 vpxor ymm14,ymm14,ymm1 7387 vpaddd ymm6,ymm6,ymm7 7388 7389 vpsrld ymm1,ymm15,13 7390 7391 vpslld ymm2,ymm15,19 7392 vpaddd ymm6,ymm6,ymm0 7393 vpand ymm3,ymm3,ymm4 7394 7395 vpxor ymm7,ymm14,ymm1 7396 7397 vpsrld ymm1,ymm15,22 7398 vpxor ymm7,ymm7,ymm2 7399 7400 vpslld ymm2,ymm15,10 7401 vpxor ymm14,ymm8,ymm3 7402 vpaddd ymm10,ymm10,ymm6 7403 7404 vpxor ymm7,ymm7,ymm1 7405 vpxor ymm7,ymm7,ymm2 7406 7407 vpaddd ymm14,ymm14,ymm6 7408 vpaddd ymm14,ymm14,ymm7 7409 vmovdqu ymm6,YMMWORD[((352-256-128))+rbx] 7410 vpaddd ymm5,ymm5,YMMWORD[((96-128))+rax] 7411 7412 vpsrld ymm7,ymm6,3 7413 vpsrld ymm1,ymm6,7 7414 vpslld ymm2,ymm6,25 7415 vpxor ymm7,ymm7,ymm1 7416 vpsrld ymm1,ymm6,18 7417 vpxor ymm7,ymm7,ymm2 7418 vpslld ymm2,ymm6,14 7419 vmovdqu ymm0,YMMWORD[((256-256-128))+rbx] 7420 vpsrld ymm3,ymm0,10 7421 7422 vpxor ymm7,ymm7,ymm1 7423 vpsrld ymm1,ymm0,17 7424 vpxor ymm7,ymm7,ymm2 7425 vpslld ymm2,ymm0,15 7426 vpaddd ymm5,ymm5,ymm7 7427 vpxor ymm7,ymm3,ymm1 7428 vpsrld ymm1,ymm0,19 7429 vpxor ymm7,ymm7,ymm2 7430 vpslld ymm2,ymm0,13 7431 vpxor ymm7,ymm7,ymm1 7432 vpxor ymm7,ymm7,ymm2 7433 vpaddd ymm5,ymm5,ymm7 7434 vpsrld ymm7,ymm10,6 7435 vpslld ymm2,ymm10,26 7436 vmovdqu YMMWORD[(320-256-128)+rbx],ymm5 7437 vpaddd ymm5,ymm5,ymm13 7438 7439 vpsrld ymm1,ymm10,11 7440 vpxor ymm7,ymm7,ymm2 7441 vpslld ymm2,ymm10,21 7442 vpaddd ymm5,ymm5,YMMWORD[((-64))+rbp] 7443 vpxor ymm7,ymm7,ymm1 7444 7445 vpsrld ymm1,ymm10,25 7446 vpxor ymm7,ymm7,ymm2 7447 7448 vpslld ymm2,ymm10,7 7449 vpandn ymm0,ymm10,ymm12 7450 vpand ymm3,ymm10,ymm11 7451 7452 vpxor ymm7,ymm7,ymm1 7453 7454 vpsrld ymm13,ymm14,2 7455 vpxor ymm7,ymm7,ymm2 7456 7457 vpslld ymm1,ymm14,30 7458 vpxor ymm0,ymm0,ymm3 7459 vpxor ymm3,ymm15,ymm14 7460 7461 vpxor ymm13,ymm13,ymm1 7462 vpaddd ymm5,ymm5,ymm7 7463 7464 vpsrld ymm1,ymm14,13 7465 7466 vpslld ymm2,ymm14,19 7467 vpaddd ymm5,ymm5,ymm0 7468 vpand ymm4,ymm4,ymm3 7469 7470 vpxor ymm7,ymm13,ymm1 7471 7472 vpsrld ymm1,ymm14,22 7473 vpxor ymm7,ymm7,ymm2 7474 7475 vpslld ymm2,ymm14,10 7476 vpxor ymm13,ymm15,ymm4 7477 vpaddd ymm9,ymm9,ymm5 7478 7479 vpxor ymm7,ymm7,ymm1 7480 vpxor ymm7,ymm7,ymm2 7481 7482 vpaddd ymm13,ymm13,ymm5 7483 vpaddd ymm13,ymm13,ymm7 7484 vmovdqu ymm5,YMMWORD[((384-256-128))+rbx] 7485 vpaddd ymm6,ymm6,YMMWORD[((128-128))+rax] 7486 7487 vpsrld ymm7,ymm5,3 7488 vpsrld ymm1,ymm5,7 7489 vpslld ymm2,ymm5,25 7490 vpxor ymm7,ymm7,ymm1 7491 vpsrld ymm1,ymm5,18 7492 vpxor ymm7,ymm7,ymm2 7493 vpslld ymm2,ymm5,14 7494 vmovdqu ymm0,YMMWORD[((288-256-128))+rbx] 7495 vpsrld ymm4,ymm0,10 7496 7497 vpxor ymm7,ymm7,ymm1 7498 vpsrld ymm1,ymm0,17 7499 vpxor ymm7,ymm7,ymm2 7500 vpslld ymm2,ymm0,15 7501 vpaddd ymm6,ymm6,ymm7 7502 vpxor ymm7,ymm4,ymm1 7503 vpsrld ymm1,ymm0,19 7504 vpxor ymm7,ymm7,ymm2 7505 vpslld ymm2,ymm0,13 7506 vpxor ymm7,ymm7,ymm1 7507 vpxor ymm7,ymm7,ymm2 7508 vpaddd ymm6,ymm6,ymm7 7509 vpsrld ymm7,ymm9,6 7510 vpslld ymm2,ymm9,26 7511 vmovdqu YMMWORD[(352-256-128)+rbx],ymm6 7512 vpaddd ymm6,ymm6,ymm12 7513 7514 vpsrld ymm1,ymm9,11 7515 vpxor ymm7,ymm7,ymm2 7516 vpslld ymm2,ymm9,21 7517 vpaddd ymm6,ymm6,YMMWORD[((-32))+rbp] 7518 vpxor ymm7,ymm7,ymm1 7519 7520 vpsrld ymm1,ymm9,25 7521 vpxor ymm7,ymm7,ymm2 7522 7523 vpslld ymm2,ymm9,7 7524 vpandn ymm0,ymm9,ymm11 7525 vpand ymm4,ymm9,ymm10 7526 7527 vpxor ymm7,ymm7,ymm1 7528 7529 vpsrld ymm12,ymm13,2 7530 vpxor ymm7,ymm7,ymm2 7531 7532 vpslld ymm1,ymm13,30 7533 vpxor ymm0,ymm0,ymm4 7534 vpxor ymm4,ymm14,ymm13 7535 7536 vpxor ymm12,ymm12,ymm1 7537 vpaddd ymm6,ymm6,ymm7 7538 7539 vpsrld ymm1,ymm13,13 7540 7541 vpslld ymm2,ymm13,19 7542 vpaddd ymm6,ymm6,ymm0 7543 vpand ymm3,ymm3,ymm4 7544 7545 vpxor ymm7,ymm12,ymm1 7546 7547 vpsrld ymm1,ymm13,22 7548 vpxor ymm7,ymm7,ymm2 7549 7550 vpslld ymm2,ymm13,10 7551 vpxor ymm12,ymm14,ymm3 7552 vpaddd ymm8,ymm8,ymm6 7553 7554 vpxor ymm7,ymm7,ymm1 7555 vpxor ymm7,ymm7,ymm2 7556 7557 vpaddd ymm12,ymm12,ymm6 7558 vpaddd ymm12,ymm12,ymm7 7559 vmovdqu ymm6,YMMWORD[((416-256-128))+rbx] 7560 vpaddd ymm5,ymm5,YMMWORD[((160-128))+rax] 7561 7562 vpsrld ymm7,ymm6,3 7563 vpsrld ymm1,ymm6,7 7564 vpslld ymm2,ymm6,25 7565 vpxor ymm7,ymm7,ymm1 7566 vpsrld ymm1,ymm6,18 7567 vpxor ymm7,ymm7,ymm2 7568 vpslld ymm2,ymm6,14 7569 vmovdqu ymm0,YMMWORD[((320-256-128))+rbx] 7570 vpsrld ymm3,ymm0,10 7571 7572 vpxor ymm7,ymm7,ymm1 7573 vpsrld ymm1,ymm0,17 7574 vpxor ymm7,ymm7,ymm2 7575 vpslld ymm2,ymm0,15 7576 vpaddd ymm5,ymm5,ymm7 7577 vpxor ymm7,ymm3,ymm1 7578 vpsrld ymm1,ymm0,19 7579 vpxor ymm7,ymm7,ymm2 7580 vpslld ymm2,ymm0,13 7581 vpxor ymm7,ymm7,ymm1 7582 vpxor ymm7,ymm7,ymm2 7583 vpaddd ymm5,ymm5,ymm7 7584 vpsrld ymm7,ymm8,6 7585 vpslld ymm2,ymm8,26 7586 vmovdqu YMMWORD[(384-256-128)+rbx],ymm5 7587 vpaddd ymm5,ymm5,ymm11 7588 7589 vpsrld ymm1,ymm8,11 7590 vpxor ymm7,ymm7,ymm2 7591 vpslld ymm2,ymm8,21 7592 vpaddd ymm5,ymm5,YMMWORD[rbp] 7593 vpxor ymm7,ymm7,ymm1 7594 7595 vpsrld ymm1,ymm8,25 7596 vpxor ymm7,ymm7,ymm2 7597 7598 vpslld ymm2,ymm8,7 7599 vpandn ymm0,ymm8,ymm10 7600 vpand ymm3,ymm8,ymm9 7601 7602 vpxor ymm7,ymm7,ymm1 7603 7604 vpsrld ymm11,ymm12,2 7605 vpxor ymm7,ymm7,ymm2 7606 7607 vpslld ymm1,ymm12,30 7608 vpxor ymm0,ymm0,ymm3 7609 vpxor ymm3,ymm13,ymm12 7610 7611 vpxor ymm11,ymm11,ymm1 7612 vpaddd ymm5,ymm5,ymm7 7613 7614 vpsrld ymm1,ymm12,13 7615 7616 vpslld ymm2,ymm12,19 7617 vpaddd ymm5,ymm5,ymm0 7618 vpand ymm4,ymm4,ymm3 7619 7620 vpxor ymm7,ymm11,ymm1 7621 7622 vpsrld ymm1,ymm12,22 7623 vpxor ymm7,ymm7,ymm2 7624 7625 vpslld ymm2,ymm12,10 7626 vpxor ymm11,ymm13,ymm4 7627 vpaddd ymm15,ymm15,ymm5 7628 7629 vpxor ymm7,ymm7,ymm1 7630 vpxor ymm7,ymm7,ymm2 7631 7632 vpaddd ymm11,ymm11,ymm5 7633 vpaddd ymm11,ymm11,ymm7 7634 vmovdqu ymm5,YMMWORD[((448-256-128))+rbx] 7635 vpaddd ymm6,ymm6,YMMWORD[((192-128))+rax] 7636 7637 vpsrld ymm7,ymm5,3 7638 vpsrld ymm1,ymm5,7 7639 vpslld ymm2,ymm5,25 7640 vpxor ymm7,ymm7,ymm1 7641 vpsrld ymm1,ymm5,18 7642 vpxor ymm7,ymm7,ymm2 7643 vpslld ymm2,ymm5,14 7644 vmovdqu ymm0,YMMWORD[((352-256-128))+rbx] 7645 vpsrld ymm4,ymm0,10 7646 7647 vpxor ymm7,ymm7,ymm1 7648 vpsrld ymm1,ymm0,17 7649 vpxor ymm7,ymm7,ymm2 7650 vpslld ymm2,ymm0,15 7651 vpaddd ymm6,ymm6,ymm7 7652 vpxor ymm7,ymm4,ymm1 7653 vpsrld ymm1,ymm0,19 7654 vpxor ymm7,ymm7,ymm2 7655 vpslld ymm2,ymm0,13 7656 vpxor ymm7,ymm7,ymm1 7657 vpxor ymm7,ymm7,ymm2 7658 vpaddd ymm6,ymm6,ymm7 7659 vpsrld ymm7,ymm15,6 7660 vpslld ymm2,ymm15,26 7661 vmovdqu YMMWORD[(416-256-128)+rbx],ymm6 7662 vpaddd ymm6,ymm6,ymm10 7663 7664 vpsrld ymm1,ymm15,11 7665 vpxor ymm7,ymm7,ymm2 7666 vpslld ymm2,ymm15,21 7667 vpaddd ymm6,ymm6,YMMWORD[32+rbp] 7668 vpxor ymm7,ymm7,ymm1 7669 7670 vpsrld ymm1,ymm15,25 7671 vpxor ymm7,ymm7,ymm2 7672 7673 vpslld ymm2,ymm15,7 7674 vpandn ymm0,ymm15,ymm9 7675 vpand ymm4,ymm15,ymm8 7676 7677 vpxor ymm7,ymm7,ymm1 7678 7679 vpsrld ymm10,ymm11,2 7680 vpxor ymm7,ymm7,ymm2 7681 7682 vpslld ymm1,ymm11,30 7683 vpxor ymm0,ymm0,ymm4 7684 vpxor ymm4,ymm12,ymm11 7685 7686 vpxor ymm10,ymm10,ymm1 7687 vpaddd ymm6,ymm6,ymm7 7688 7689 vpsrld ymm1,ymm11,13 7690 7691 vpslld ymm2,ymm11,19 7692 vpaddd ymm6,ymm6,ymm0 7693 vpand ymm3,ymm3,ymm4 7694 7695 vpxor ymm7,ymm10,ymm1 7696 7697 vpsrld ymm1,ymm11,22 7698 vpxor ymm7,ymm7,ymm2 7699 7700 vpslld ymm2,ymm11,10 7701 vpxor ymm10,ymm12,ymm3 7702 vpaddd ymm14,ymm14,ymm6 7703 7704 vpxor ymm7,ymm7,ymm1 7705 vpxor ymm7,ymm7,ymm2 7706 7707 vpaddd ymm10,ymm10,ymm6 7708 vpaddd ymm10,ymm10,ymm7 7709 vmovdqu ymm6,YMMWORD[((480-256-128))+rbx] 7710 vpaddd ymm5,ymm5,YMMWORD[((224-128))+rax] 7711 7712 vpsrld ymm7,ymm6,3 7713 vpsrld ymm1,ymm6,7 7714 vpslld ymm2,ymm6,25 7715 vpxor ymm7,ymm7,ymm1 7716 vpsrld ymm1,ymm6,18 7717 vpxor ymm7,ymm7,ymm2 7718 vpslld ymm2,ymm6,14 7719 vmovdqu ymm0,YMMWORD[((384-256-128))+rbx] 7720 vpsrld ymm3,ymm0,10 7721 7722 vpxor ymm7,ymm7,ymm1 7723 vpsrld ymm1,ymm0,17 7724 vpxor ymm7,ymm7,ymm2 7725 vpslld ymm2,ymm0,15 7726 vpaddd ymm5,ymm5,ymm7 7727 vpxor ymm7,ymm3,ymm1 7728 vpsrld ymm1,ymm0,19 7729 vpxor ymm7,ymm7,ymm2 7730 vpslld ymm2,ymm0,13 7731 vpxor ymm7,ymm7,ymm1 7732 vpxor ymm7,ymm7,ymm2 7733 vpaddd ymm5,ymm5,ymm7 7734 vpsrld ymm7,ymm14,6 7735 vpslld ymm2,ymm14,26 7736 vmovdqu YMMWORD[(448-256-128)+rbx],ymm5 7737 vpaddd ymm5,ymm5,ymm9 7738 7739 vpsrld ymm1,ymm14,11 7740 vpxor ymm7,ymm7,ymm2 7741 vpslld ymm2,ymm14,21 7742 vpaddd ymm5,ymm5,YMMWORD[64+rbp] 7743 vpxor ymm7,ymm7,ymm1 7744 7745 vpsrld ymm1,ymm14,25 7746 vpxor ymm7,ymm7,ymm2 7747 7748 vpslld ymm2,ymm14,7 7749 vpandn ymm0,ymm14,ymm8 7750 vpand ymm3,ymm14,ymm15 7751 7752 vpxor ymm7,ymm7,ymm1 7753 7754 vpsrld ymm9,ymm10,2 7755 vpxor ymm7,ymm7,ymm2 7756 7757 vpslld ymm1,ymm10,30 7758 vpxor ymm0,ymm0,ymm3 7759 vpxor ymm3,ymm11,ymm10 7760 7761 vpxor ymm9,ymm9,ymm1 7762 vpaddd ymm5,ymm5,ymm7 7763 7764 vpsrld ymm1,ymm10,13 7765 7766 vpslld ymm2,ymm10,19 7767 vpaddd ymm5,ymm5,ymm0 7768 vpand ymm4,ymm4,ymm3 7769 7770 vpxor ymm7,ymm9,ymm1 7771 7772 vpsrld ymm1,ymm10,22 7773 vpxor ymm7,ymm7,ymm2 7774 7775 vpslld ymm2,ymm10,10 7776 vpxor ymm9,ymm11,ymm4 7777 vpaddd ymm13,ymm13,ymm5 7778 7779 vpxor ymm7,ymm7,ymm1 7780 vpxor ymm7,ymm7,ymm2 7781 7782 vpaddd ymm9,ymm9,ymm5 7783 vpaddd ymm9,ymm9,ymm7 7784 vmovdqu ymm5,YMMWORD[((0-128))+rax] 7785 vpaddd ymm6,ymm6,YMMWORD[((256-256-128))+rbx] 7786 7787 vpsrld ymm7,ymm5,3 7788 vpsrld ymm1,ymm5,7 7789 vpslld ymm2,ymm5,25 7790 vpxor ymm7,ymm7,ymm1 7791 vpsrld ymm1,ymm5,18 7792 vpxor ymm7,ymm7,ymm2 7793 vpslld ymm2,ymm5,14 7794 vmovdqu ymm0,YMMWORD[((416-256-128))+rbx] 7795 vpsrld ymm4,ymm0,10 7796 7797 vpxor ymm7,ymm7,ymm1 7798 vpsrld ymm1,ymm0,17 7799 vpxor ymm7,ymm7,ymm2 7800 vpslld ymm2,ymm0,15 7801 vpaddd ymm6,ymm6,ymm7 7802 vpxor ymm7,ymm4,ymm1 7803 vpsrld ymm1,ymm0,19 7804 vpxor ymm7,ymm7,ymm2 7805 vpslld ymm2,ymm0,13 7806 vpxor ymm7,ymm7,ymm1 7807 vpxor ymm7,ymm7,ymm2 7808 vpaddd ymm6,ymm6,ymm7 7809 vpsrld ymm7,ymm13,6 7810 vpslld ymm2,ymm13,26 7811 vmovdqu YMMWORD[(480-256-128)+rbx],ymm6 7812 vpaddd ymm6,ymm6,ymm8 7813 7814 vpsrld ymm1,ymm13,11 7815 vpxor ymm7,ymm7,ymm2 7816 vpslld ymm2,ymm13,21 7817 vpaddd ymm6,ymm6,YMMWORD[96+rbp] 7818 vpxor ymm7,ymm7,ymm1 7819 7820 vpsrld ymm1,ymm13,25 7821 vpxor ymm7,ymm7,ymm2 7822 7823 vpslld ymm2,ymm13,7 7824 vpandn ymm0,ymm13,ymm15 7825 vpand ymm4,ymm13,ymm14 7826 7827 vpxor ymm7,ymm7,ymm1 7828 7829 vpsrld ymm8,ymm9,2 7830 vpxor ymm7,ymm7,ymm2 7831 7832 vpslld ymm1,ymm9,30 7833 vpxor ymm0,ymm0,ymm4 7834 vpxor ymm4,ymm10,ymm9 7835 7836 vpxor ymm8,ymm8,ymm1 7837 vpaddd ymm6,ymm6,ymm7 7838 7839 vpsrld ymm1,ymm9,13 7840 7841 vpslld ymm2,ymm9,19 7842 vpaddd ymm6,ymm6,ymm0 7843 vpand ymm3,ymm3,ymm4 7844 7845 vpxor ymm7,ymm8,ymm1 7846 7847 vpsrld ymm1,ymm9,22 7848 vpxor ymm7,ymm7,ymm2 7849 7850 vpslld ymm2,ymm9,10 7851 vpxor ymm8,ymm10,ymm3 7852 vpaddd ymm12,ymm12,ymm6 7853 7854 vpxor ymm7,ymm7,ymm1 7855 vpxor ymm7,ymm7,ymm2 7856 7857 vpaddd ymm8,ymm8,ymm6 7858 vpaddd ymm8,ymm8,ymm7 7859 add rbp,256 7860 dec ecx 7861 jnz NEAR $L$oop_16_xx_avx2 7862 7863 mov ecx,1 7864 lea rbx,[512+rsp] 7865 lea rbp,[((K256+128))] 7866 cmp ecx,DWORD[rbx] 7867 cmovge r12,rbp 7868 cmp ecx,DWORD[4+rbx] 7869 cmovge r13,rbp 7870 cmp ecx,DWORD[8+rbx] 7871 cmovge r14,rbp 7872 cmp ecx,DWORD[12+rbx] 7873 cmovge r15,rbp 7874 cmp ecx,DWORD[16+rbx] 7875 cmovge r8,rbp 7876 cmp ecx,DWORD[20+rbx] 7877 cmovge r9,rbp 7878 cmp ecx,DWORD[24+rbx] 7879 cmovge r10,rbp 7880 cmp ecx,DWORD[28+rbx] 7881 cmovge r11,rbp 7882 vmovdqa ymm7,YMMWORD[rbx] 7883 vpxor ymm0,ymm0,ymm0 7884 vmovdqa ymm6,ymm7 7885 vpcmpgtd ymm6,ymm6,ymm0 7886 vpaddd ymm7,ymm7,ymm6 7887 7888 vmovdqu ymm0,YMMWORD[((0-128))+rdi] 7889 vpand ymm8,ymm8,ymm6 7890 vmovdqu ymm1,YMMWORD[((32-128))+rdi] 7891 vpand ymm9,ymm9,ymm6 7892 vmovdqu ymm2,YMMWORD[((64-128))+rdi] 7893 vpand ymm10,ymm10,ymm6 7894 vmovdqu ymm5,YMMWORD[((96-128))+rdi] 7895 vpand ymm11,ymm11,ymm6 7896 vpaddd ymm8,ymm8,ymm0 7897 vmovdqu ymm0,YMMWORD[((128-128))+rdi] 7898 vpand ymm12,ymm12,ymm6 7899 vpaddd ymm9,ymm9,ymm1 7900 vmovdqu ymm1,YMMWORD[((160-128))+rdi] 7901 vpand ymm13,ymm13,ymm6 7902 vpaddd ymm10,ymm10,ymm2 7903 vmovdqu ymm2,YMMWORD[((192-128))+rdi] 7904 vpand ymm14,ymm14,ymm6 7905 vpaddd ymm11,ymm11,ymm5 7906 vmovdqu ymm5,YMMWORD[((224-128))+rdi] 7907 vpand ymm15,ymm15,ymm6 7908 vpaddd ymm12,ymm12,ymm0 7909 vpaddd ymm13,ymm13,ymm1 7910 vmovdqu YMMWORD[(0-128)+rdi],ymm8 7911 vpaddd ymm14,ymm14,ymm2 7912 vmovdqu YMMWORD[(32-128)+rdi],ymm9 7913 vpaddd ymm15,ymm15,ymm5 7914 vmovdqu YMMWORD[(64-128)+rdi],ymm10 7915 vmovdqu YMMWORD[(96-128)+rdi],ymm11 7916 vmovdqu YMMWORD[(128-128)+rdi],ymm12 7917 vmovdqu YMMWORD[(160-128)+rdi],ymm13 7918 vmovdqu YMMWORD[(192-128)+rdi],ymm14 7919 vmovdqu YMMWORD[(224-128)+rdi],ymm15 7920 7921 vmovdqu YMMWORD[rbx],ymm7 7922 lea rbx,[((256+128))+rsp] 7923 vmovdqu ymm6,YMMWORD[$L$pbswap] 7924 dec edx 7925 jnz NEAR $L$oop_avx2 7926 7927 7928 7929 7930 7931 7932 7933 $L$done_avx2: 7934 mov rax,QWORD[544+rsp] 7935 7936 vzeroupper 7937 movaps xmm6,XMMWORD[((-216))+rax] 7938 movaps xmm7,XMMWORD[((-200))+rax] 7939 movaps xmm8,XMMWORD[((-184))+rax] 7940 movaps xmm9,XMMWORD[((-168))+rax] 7941 movaps xmm10,XMMWORD[((-152))+rax] 7942 movaps xmm11,XMMWORD[((-136))+rax] 7943 movaps xmm12,XMMWORD[((-120))+rax] 7944 movaps xmm13,XMMWORD[((-104))+rax] 7945 movaps xmm14,XMMWORD[((-88))+rax] 7946 movaps xmm15,XMMWORD[((-72))+rax] 7947 mov r15,QWORD[((-48))+rax] 7948 7949 mov r14,QWORD[((-40))+rax] 7950 7951 mov r13,QWORD[((-32))+rax] 7952 7953 mov r12,QWORD[((-24))+rax] 7954 7955 mov rbp,QWORD[((-16))+rax] 7956 7957 mov rbx,QWORD[((-8))+rax] 7958 7959 lea rsp,[rax] 7960 7961 $L$epilogue_avx2: 7962 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 7963 mov rsi,QWORD[16+rsp] 7964 DB 0F3h,0C3h ;repret 7965 7966 $L$SEH_end_sha256_multi_block_avx2: 3207 7967 ALIGN 256 3208 7968 K256: … … 3444 8204 DB 0F3h,0C3h ;repret 3445 8205 8206 8207 ALIGN 16 8208 avx2_handler: 8209 push rsi 8210 push rdi 8211 push rbx 8212 push rbp 8213 push r12 8214 push r13 8215 push r14 8216 push r15 8217 pushfq 8218 sub rsp,64 8219 8220 mov rax,QWORD[120+r8] 8221 mov rbx,QWORD[248+r8] 8222 8223 mov rsi,QWORD[8+r9] 8224 mov r11,QWORD[56+r9] 8225 8226 mov r10d,DWORD[r11] 8227 lea r10,[r10*1+rsi] 8228 cmp rbx,r10 8229 jb NEAR $L$in_prologue 8230 8231 mov rax,QWORD[152+r8] 8232 8233 mov r10d,DWORD[4+r11] 8234 lea r10,[r10*1+rsi] 8235 cmp rbx,r10 8236 jae NEAR $L$in_prologue 8237 8238 mov rax,QWORD[544+r8] 8239 8240 mov rbx,QWORD[((-8))+rax] 8241 mov rbp,QWORD[((-16))+rax] 8242 mov r12,QWORD[((-24))+rax] 8243 mov r13,QWORD[((-32))+rax] 8244 mov r14,QWORD[((-40))+rax] 8245 mov r15,QWORD[((-48))+rax] 8246 mov QWORD[144+r8],rbx 8247 mov QWORD[160+r8],rbp 8248 mov QWORD[216+r8],r12 8249 mov QWORD[224+r8],r13 8250 mov QWORD[232+r8],r14 8251 mov QWORD[240+r8],r15 8252 8253 lea rsi,[((-56-160))+rax] 8254 lea rdi,[512+r8] 8255 mov ecx,20 8256 DD 0xa548f3fc 8257 8258 jmp NEAR $L$in_prologue 8259 3446 8260 section .pdata rdata align=4 3447 8261 ALIGN 4 … … 3452 8266 DD $L$SEH_end_sha256_multi_block_shaext wrt ..imagebase 3453 8267 DD $L$SEH_info_sha256_multi_block_shaext wrt ..imagebase 8268 DD $L$SEH_begin_sha256_multi_block_avx wrt ..imagebase 8269 DD $L$SEH_end_sha256_multi_block_avx wrt ..imagebase 8270 DD $L$SEH_info_sha256_multi_block_avx wrt ..imagebase 8271 DD $L$SEH_begin_sha256_multi_block_avx2 wrt ..imagebase 8272 DD $L$SEH_end_sha256_multi_block_avx2 wrt ..imagebase 8273 DD $L$SEH_info_sha256_multi_block_avx2 wrt ..imagebase 3454 8274 section .xdata rdata align=8 3455 8275 ALIGN 8 … … 3462 8282 DD se_handler wrt ..imagebase 3463 8283 DD $L$body_shaext wrt ..imagebase,$L$epilogue_shaext wrt ..imagebase 8284 $L$SEH_info_sha256_multi_block_avx: 8285 DB 9,0,0,0 8286 DD se_handler wrt ..imagebase 8287 DD $L$body_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase 8288 $L$SEH_info_sha256_multi_block_avx2: 8289 DB 9,0,0,0 8290 DD avx2_handler wrt ..imagebase 8291 DD $L$body_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/sha256-x86_64.S
r97373 r99371 27 27 test r11d,536870912 28 28 jnz NEAR _shaext_shortcut 29 and r11d,296 30 cmp r11d,296 31 je NEAR $L$avx2_shortcut 32 and r9d,1073741824 33 and r10d,268435968 34 or r10d,r9d 35 cmp r10d,1342177792 36 je NEAR $L$avx_shortcut 29 37 test r10d,512 30 38 jnz NEAR $L$ssse3_shortcut … … 3150 3158 3151 3159 $L$SEH_end_sha256_block_data_order_ssse3: 3160 3161 ALIGN 64 3162 sha256_block_data_order_avx: 3163 mov QWORD[8+rsp],rdi ;WIN64 prologue 3164 mov QWORD[16+rsp],rsi 3165 mov rax,rsp 3166 $L$SEH_begin_sha256_block_data_order_avx: 3167 mov rdi,rcx 3168 mov rsi,rdx 3169 mov rdx,r8 3170 3171 3172 3173 $L$avx_shortcut: 3174 mov rax,rsp 3175 3176 push rbx 3177 3178 push rbp 3179 3180 push r12 3181 3182 push r13 3183 3184 push r14 3185 3186 push r15 3187 3188 shl rdx,4 3189 sub rsp,160 3190 lea rdx,[rdx*4+rsi] 3191 and rsp,-64 3192 mov QWORD[((64+0))+rsp],rdi 3193 mov QWORD[((64+8))+rsp],rsi 3194 mov QWORD[((64+16))+rsp],rdx 3195 mov QWORD[88+rsp],rax 3196 3197 movaps XMMWORD[(64+32)+rsp],xmm6 3198 movaps XMMWORD[(64+48)+rsp],xmm7 3199 movaps XMMWORD[(64+64)+rsp],xmm8 3200 movaps XMMWORD[(64+80)+rsp],xmm9 3201 $L$prologue_avx: 3202 3203 vzeroupper 3204 mov eax,DWORD[rdi] 3205 mov ebx,DWORD[4+rdi] 3206 mov ecx,DWORD[8+rdi] 3207 mov edx,DWORD[12+rdi] 3208 mov r8d,DWORD[16+rdi] 3209 mov r9d,DWORD[20+rdi] 3210 mov r10d,DWORD[24+rdi] 3211 mov r11d,DWORD[28+rdi] 3212 vmovdqa xmm8,XMMWORD[((K256+512+32))] 3213 vmovdqa xmm9,XMMWORD[((K256+512+64))] 3214 jmp NEAR $L$loop_avx 3215 ALIGN 16 3216 $L$loop_avx: 3217 vmovdqa xmm7,XMMWORD[((K256+512))] 3218 vmovdqu xmm0,XMMWORD[rsi] 3219 vmovdqu xmm1,XMMWORD[16+rsi] 3220 vmovdqu xmm2,XMMWORD[32+rsi] 3221 vmovdqu xmm3,XMMWORD[48+rsi] 3222 vpshufb xmm0,xmm0,xmm7 3223 lea rbp,[K256] 3224 vpshufb xmm1,xmm1,xmm7 3225 vpshufb xmm2,xmm2,xmm7 3226 vpaddd xmm4,xmm0,XMMWORD[rbp] 3227 vpshufb xmm3,xmm3,xmm7 3228 vpaddd xmm5,xmm1,XMMWORD[32+rbp] 3229 vpaddd xmm6,xmm2,XMMWORD[64+rbp] 3230 vpaddd xmm7,xmm3,XMMWORD[96+rbp] 3231 vmovdqa XMMWORD[rsp],xmm4 3232 mov r14d,eax 3233 vmovdqa XMMWORD[16+rsp],xmm5 3234 mov edi,ebx 3235 vmovdqa XMMWORD[32+rsp],xmm6 3236 xor edi,ecx 3237 vmovdqa XMMWORD[48+rsp],xmm7 3238 mov r13d,r8d 3239 jmp NEAR $L$avx_00_47 3240 3241 ALIGN 16 3242 $L$avx_00_47: 3243 sub rbp,-128 3244 vpalignr xmm4,xmm1,xmm0,4 3245 shrd r13d,r13d,14 3246 mov eax,r14d 3247 mov r12d,r9d 3248 vpalignr xmm7,xmm3,xmm2,4 3249 shrd r14d,r14d,9 3250 xor r13d,r8d 3251 xor r12d,r10d 3252 vpsrld xmm6,xmm4,7 3253 shrd r13d,r13d,5 3254 xor r14d,eax 3255 and r12d,r8d 3256 vpaddd xmm0,xmm0,xmm7 3257 xor r13d,r8d 3258 add r11d,DWORD[rsp] 3259 mov r15d,eax 3260 vpsrld xmm7,xmm4,3 3261 xor r12d,r10d 3262 shrd r14d,r14d,11 3263 xor r15d,ebx 3264 vpslld xmm5,xmm4,14 3265 add r11d,r12d 3266 shrd r13d,r13d,6 3267 and edi,r15d 3268 vpxor xmm4,xmm7,xmm6 3269 xor r14d,eax 3270 add r11d,r13d 3271 xor edi,ebx 3272 vpshufd xmm7,xmm3,250 3273 shrd r14d,r14d,2 3274 add edx,r11d 3275 add r11d,edi 3276 vpsrld xmm6,xmm6,11 3277 mov r13d,edx 3278 add r14d,r11d 3279 shrd r13d,r13d,14 3280 vpxor xmm4,xmm4,xmm5 3281 mov r11d,r14d 3282 mov r12d,r8d 3283 shrd r14d,r14d,9 3284 vpslld xmm5,xmm5,11 3285 xor r13d,edx 3286 xor r12d,r9d 3287 shrd r13d,r13d,5 3288 vpxor xmm4,xmm4,xmm6 3289 xor r14d,r11d 3290 and r12d,edx 3291 xor r13d,edx 3292 vpsrld xmm6,xmm7,10 3293 add r10d,DWORD[4+rsp] 3294 mov edi,r11d 3295 xor r12d,r9d 3296 vpxor xmm4,xmm4,xmm5 3297 shrd r14d,r14d,11 3298 xor edi,eax 3299 add r10d,r12d 3300 vpsrlq xmm7,xmm7,17 3301 shrd r13d,r13d,6 3302 and r15d,edi 3303 xor r14d,r11d 3304 vpaddd xmm0,xmm0,xmm4 3305 add r10d,r13d 3306 xor r15d,eax 3307 shrd r14d,r14d,2 3308 vpxor xmm6,xmm6,xmm7 3309 add ecx,r10d 3310 add r10d,r15d 3311 mov r13d,ecx 3312 vpsrlq xmm7,xmm7,2 3313 add r14d,r10d 3314 shrd r13d,r13d,14 3315 mov r10d,r14d 3316 vpxor xmm6,xmm6,xmm7 3317 mov r12d,edx 3318 shrd r14d,r14d,9 3319 xor r13d,ecx 3320 vpshufb xmm6,xmm6,xmm8 3321 xor r12d,r8d 3322 shrd r13d,r13d,5 3323 xor r14d,r10d 3324 vpaddd xmm0,xmm0,xmm6 3325 and r12d,ecx 3326 xor r13d,ecx 3327 add r9d,DWORD[8+rsp] 3328 vpshufd xmm7,xmm0,80 3329 mov r15d,r10d 3330 xor r12d,r8d 3331 shrd r14d,r14d,11 3332 vpsrld xmm6,xmm7,10 3333 xor r15d,r11d 3334 add r9d,r12d 3335 shrd r13d,r13d,6 3336 vpsrlq xmm7,xmm7,17 3337 and edi,r15d 3338 xor r14d,r10d 3339 add r9d,r13d 3340 vpxor xmm6,xmm6,xmm7 3341 xor edi,r11d 3342 shrd r14d,r14d,2 3343 add ebx,r9d 3344 vpsrlq xmm7,xmm7,2 3345 add r9d,edi 3346 mov r13d,ebx 3347 add r14d,r9d 3348 vpxor xmm6,xmm6,xmm7 3349 shrd r13d,r13d,14 3350 mov r9d,r14d 3351 mov r12d,ecx 3352 vpshufb xmm6,xmm6,xmm9 3353 shrd r14d,r14d,9 3354 xor r13d,ebx 3355 xor r12d,edx 3356 vpaddd xmm0,xmm0,xmm6 3357 shrd r13d,r13d,5 3358 xor r14d,r9d 3359 and r12d,ebx 3360 vpaddd xmm6,xmm0,XMMWORD[rbp] 3361 xor r13d,ebx 3362 add r8d,DWORD[12+rsp] 3363 mov edi,r9d 3364 xor r12d,edx 3365 shrd r14d,r14d,11 3366 xor edi,r10d 3367 add r8d,r12d 3368 shrd r13d,r13d,6 3369 and r15d,edi 3370 xor r14d,r9d 3371 add r8d,r13d 3372 xor r15d,r10d 3373 shrd r14d,r14d,2 3374 add eax,r8d 3375 add r8d,r15d 3376 mov r13d,eax 3377 add r14d,r8d 3378 vmovdqa XMMWORD[rsp],xmm6 3379 vpalignr xmm4,xmm2,xmm1,4 3380 shrd r13d,r13d,14 3381 mov r8d,r14d 3382 mov r12d,ebx 3383 vpalignr xmm7,xmm0,xmm3,4 3384 shrd r14d,r14d,9 3385 xor r13d,eax 3386 xor r12d,ecx 3387 vpsrld xmm6,xmm4,7 3388 shrd r13d,r13d,5 3389 xor r14d,r8d 3390 and r12d,eax 3391 vpaddd xmm1,xmm1,xmm7 3392 xor r13d,eax 3393 add edx,DWORD[16+rsp] 3394 mov r15d,r8d 3395 vpsrld xmm7,xmm4,3 3396 xor r12d,ecx 3397 shrd r14d,r14d,11 3398 xor r15d,r9d 3399 vpslld xmm5,xmm4,14 3400 add edx,r12d 3401 shrd r13d,r13d,6 3402 and edi,r15d 3403 vpxor xmm4,xmm7,xmm6 3404 xor r14d,r8d 3405 add edx,r13d 3406 xor edi,r9d 3407 vpshufd xmm7,xmm0,250 3408 shrd r14d,r14d,2 3409 add r11d,edx 3410 add edx,edi 3411 vpsrld xmm6,xmm6,11 3412 mov r13d,r11d 3413 add r14d,edx 3414 shrd r13d,r13d,14 3415 vpxor xmm4,xmm4,xmm5 3416 mov edx,r14d 3417 mov r12d,eax 3418 shrd r14d,r14d,9 3419 vpslld xmm5,xmm5,11 3420 xor r13d,r11d 3421 xor r12d,ebx 3422 shrd r13d,r13d,5 3423 vpxor xmm4,xmm4,xmm6 3424 xor r14d,edx 3425 and r12d,r11d 3426 xor r13d,r11d 3427 vpsrld xmm6,xmm7,10 3428 add ecx,DWORD[20+rsp] 3429 mov edi,edx 3430 xor r12d,ebx 3431 vpxor xmm4,xmm4,xmm5 3432 shrd r14d,r14d,11 3433 xor edi,r8d 3434 add ecx,r12d 3435 vpsrlq xmm7,xmm7,17 3436 shrd r13d,r13d,6 3437 and r15d,edi 3438 xor r14d,edx 3439 vpaddd xmm1,xmm1,xmm4 3440 add ecx,r13d 3441 xor r15d,r8d 3442 shrd r14d,r14d,2 3443 vpxor xmm6,xmm6,xmm7 3444 add r10d,ecx 3445 add ecx,r15d 3446 mov r13d,r10d 3447 vpsrlq xmm7,xmm7,2 3448 add r14d,ecx 3449 shrd r13d,r13d,14 3450 mov ecx,r14d 3451 vpxor xmm6,xmm6,xmm7 3452 mov r12d,r11d 3453 shrd r14d,r14d,9 3454 xor r13d,r10d 3455 vpshufb xmm6,xmm6,xmm8 3456 xor r12d,eax 3457 shrd r13d,r13d,5 3458 xor r14d,ecx 3459 vpaddd xmm1,xmm1,xmm6 3460 and r12d,r10d 3461 xor r13d,r10d 3462 add ebx,DWORD[24+rsp] 3463 vpshufd xmm7,xmm1,80 3464 mov r15d,ecx 3465 xor r12d,eax 3466 shrd r14d,r14d,11 3467 vpsrld xmm6,xmm7,10 3468 xor r15d,edx 3469 add ebx,r12d 3470 shrd r13d,r13d,6 3471 vpsrlq xmm7,xmm7,17 3472 and edi,r15d 3473 xor r14d,ecx 3474 add ebx,r13d 3475 vpxor xmm6,xmm6,xmm7 3476 xor edi,edx 3477 shrd r14d,r14d,2 3478 add r9d,ebx 3479 vpsrlq xmm7,xmm7,2 3480 add ebx,edi 3481 mov r13d,r9d 3482 add r14d,ebx 3483 vpxor xmm6,xmm6,xmm7 3484 shrd r13d,r13d,14 3485 mov ebx,r14d 3486 mov r12d,r10d 3487 vpshufb xmm6,xmm6,xmm9 3488 shrd r14d,r14d,9 3489 xor r13d,r9d 3490 xor r12d,r11d 3491 vpaddd xmm1,xmm1,xmm6 3492 shrd r13d,r13d,5 3493 xor r14d,ebx 3494 and r12d,r9d 3495 vpaddd xmm6,xmm1,XMMWORD[32+rbp] 3496 xor r13d,r9d 3497 add eax,DWORD[28+rsp] 3498 mov edi,ebx 3499 xor r12d,r11d 3500 shrd r14d,r14d,11 3501 xor edi,ecx 3502 add eax,r12d 3503 shrd r13d,r13d,6 3504 and r15d,edi 3505 xor r14d,ebx 3506 add eax,r13d 3507 xor r15d,ecx 3508 shrd r14d,r14d,2 3509 add r8d,eax 3510 add eax,r15d 3511 mov r13d,r8d 3512 add r14d,eax 3513 vmovdqa XMMWORD[16+rsp],xmm6 3514 vpalignr xmm4,xmm3,xmm2,4 3515 shrd r13d,r13d,14 3516 mov eax,r14d 3517 mov r12d,r9d 3518 vpalignr xmm7,xmm1,xmm0,4 3519 shrd r14d,r14d,9 3520 xor r13d,r8d 3521 xor r12d,r10d 3522 vpsrld xmm6,xmm4,7 3523 shrd r13d,r13d,5 3524 xor r14d,eax 3525 and r12d,r8d 3526 vpaddd xmm2,xmm2,xmm7 3527 xor r13d,r8d 3528 add r11d,DWORD[32+rsp] 3529 mov r15d,eax 3530 vpsrld xmm7,xmm4,3 3531 xor r12d,r10d 3532 shrd r14d,r14d,11 3533 xor r15d,ebx 3534 vpslld xmm5,xmm4,14 3535 add r11d,r12d 3536 shrd r13d,r13d,6 3537 and edi,r15d 3538 vpxor xmm4,xmm7,xmm6 3539 xor r14d,eax 3540 add r11d,r13d 3541 xor edi,ebx 3542 vpshufd xmm7,xmm1,250 3543 shrd r14d,r14d,2 3544 add edx,r11d 3545 add r11d,edi 3546 vpsrld xmm6,xmm6,11 3547 mov r13d,edx 3548 add r14d,r11d 3549 shrd r13d,r13d,14 3550 vpxor xmm4,xmm4,xmm5 3551 mov r11d,r14d 3552 mov r12d,r8d 3553 shrd r14d,r14d,9 3554 vpslld xmm5,xmm5,11 3555 xor r13d,edx 3556 xor r12d,r9d 3557 shrd r13d,r13d,5 3558 vpxor xmm4,xmm4,xmm6 3559 xor r14d,r11d 3560 and r12d,edx 3561 xor r13d,edx 3562 vpsrld xmm6,xmm7,10 3563 add r10d,DWORD[36+rsp] 3564 mov edi,r11d 3565 xor r12d,r9d 3566 vpxor xmm4,xmm4,xmm5 3567 shrd r14d,r14d,11 3568 xor edi,eax 3569 add r10d,r12d 3570 vpsrlq xmm7,xmm7,17 3571 shrd r13d,r13d,6 3572 and r15d,edi 3573 xor r14d,r11d 3574 vpaddd xmm2,xmm2,xmm4 3575 add r10d,r13d 3576 xor r15d,eax 3577 shrd r14d,r14d,2 3578 vpxor xmm6,xmm6,xmm7 3579 add ecx,r10d 3580 add r10d,r15d 3581 mov r13d,ecx 3582 vpsrlq xmm7,xmm7,2 3583 add r14d,r10d 3584 shrd r13d,r13d,14 3585 mov r10d,r14d 3586 vpxor xmm6,xmm6,xmm7 3587 mov r12d,edx 3588 shrd r14d,r14d,9 3589 xor r13d,ecx 3590 vpshufb xmm6,xmm6,xmm8 3591 xor r12d,r8d 3592 shrd r13d,r13d,5 3593 xor r14d,r10d 3594 vpaddd xmm2,xmm2,xmm6 3595 and r12d,ecx 3596 xor r13d,ecx 3597 add r9d,DWORD[40+rsp] 3598 vpshufd xmm7,xmm2,80 3599 mov r15d,r10d 3600 xor r12d,r8d 3601 shrd r14d,r14d,11 3602 vpsrld xmm6,xmm7,10 3603 xor r15d,r11d 3604 add r9d,r12d 3605 shrd r13d,r13d,6 3606 vpsrlq xmm7,xmm7,17 3607 and edi,r15d 3608 xor r14d,r10d 3609 add r9d,r13d 3610 vpxor xmm6,xmm6,xmm7 3611 xor edi,r11d 3612 shrd r14d,r14d,2 3613 add ebx,r9d 3614 vpsrlq xmm7,xmm7,2 3615 add r9d,edi 3616 mov r13d,ebx 3617 add r14d,r9d 3618 vpxor xmm6,xmm6,xmm7 3619 shrd r13d,r13d,14 3620 mov r9d,r14d 3621 mov r12d,ecx 3622 vpshufb xmm6,xmm6,xmm9 3623 shrd r14d,r14d,9 3624 xor r13d,ebx 3625 xor r12d,edx 3626 vpaddd xmm2,xmm2,xmm6 3627 shrd r13d,r13d,5 3628 xor r14d,r9d 3629 and r12d,ebx 3630 vpaddd xmm6,xmm2,XMMWORD[64+rbp] 3631 xor r13d,ebx 3632 add r8d,DWORD[44+rsp] 3633 mov edi,r9d 3634 xor r12d,edx 3635 shrd r14d,r14d,11 3636 xor edi,r10d 3637 add r8d,r12d 3638 shrd r13d,r13d,6 3639 and r15d,edi 3640 xor r14d,r9d 3641 add r8d,r13d 3642 xor r15d,r10d 3643 shrd r14d,r14d,2 3644 add eax,r8d 3645 add r8d,r15d 3646 mov r13d,eax 3647 add r14d,r8d 3648 vmovdqa XMMWORD[32+rsp],xmm6 3649 vpalignr xmm4,xmm0,xmm3,4 3650 shrd r13d,r13d,14 3651 mov r8d,r14d 3652 mov r12d,ebx 3653 vpalignr xmm7,xmm2,xmm1,4 3654 shrd r14d,r14d,9 3655 xor r13d,eax 3656 xor r12d,ecx 3657 vpsrld xmm6,xmm4,7 3658 shrd r13d,r13d,5 3659 xor r14d,r8d 3660 and r12d,eax 3661 vpaddd xmm3,xmm3,xmm7 3662 xor r13d,eax 3663 add edx,DWORD[48+rsp] 3664 mov r15d,r8d 3665 vpsrld xmm7,xmm4,3 3666 xor r12d,ecx 3667 shrd r14d,r14d,11 3668 xor r15d,r9d 3669 vpslld xmm5,xmm4,14 3670 add edx,r12d 3671 shrd r13d,r13d,6 3672 and edi,r15d 3673 vpxor xmm4,xmm7,xmm6 3674 xor r14d,r8d 3675 add edx,r13d 3676 xor edi,r9d 3677 vpshufd xmm7,xmm2,250 3678 shrd r14d,r14d,2 3679 add r11d,edx 3680 add edx,edi 3681 vpsrld xmm6,xmm6,11 3682 mov r13d,r11d 3683 add r14d,edx 3684 shrd r13d,r13d,14 3685 vpxor xmm4,xmm4,xmm5 3686 mov edx,r14d 3687 mov r12d,eax 3688 shrd r14d,r14d,9 3689 vpslld xmm5,xmm5,11 3690 xor r13d,r11d 3691 xor r12d,ebx 3692 shrd r13d,r13d,5 3693 vpxor xmm4,xmm4,xmm6 3694 xor r14d,edx 3695 and r12d,r11d 3696 xor r13d,r11d 3697 vpsrld xmm6,xmm7,10 3698 add ecx,DWORD[52+rsp] 3699 mov edi,edx 3700 xor r12d,ebx 3701 vpxor xmm4,xmm4,xmm5 3702 shrd r14d,r14d,11 3703 xor edi,r8d 3704 add ecx,r12d 3705 vpsrlq xmm7,xmm7,17 3706 shrd r13d,r13d,6 3707 and r15d,edi 3708 xor r14d,edx 3709 vpaddd xmm3,xmm3,xmm4 3710 add ecx,r13d 3711 xor r15d,r8d 3712 shrd r14d,r14d,2 3713 vpxor xmm6,xmm6,xmm7 3714 add r10d,ecx 3715 add ecx,r15d 3716 mov r13d,r10d 3717 vpsrlq xmm7,xmm7,2 3718 add r14d,ecx 3719 shrd r13d,r13d,14 3720 mov ecx,r14d 3721 vpxor xmm6,xmm6,xmm7 3722 mov r12d,r11d 3723 shrd r14d,r14d,9 3724 xor r13d,r10d 3725 vpshufb xmm6,xmm6,xmm8 3726 xor r12d,eax 3727 shrd r13d,r13d,5 3728 xor r14d,ecx 3729 vpaddd xmm3,xmm3,xmm6 3730 and r12d,r10d 3731 xor r13d,r10d 3732 add ebx,DWORD[56+rsp] 3733 vpshufd xmm7,xmm3,80 3734 mov r15d,ecx 3735 xor r12d,eax 3736 shrd r14d,r14d,11 3737 vpsrld xmm6,xmm7,10 3738 xor r15d,edx 3739 add ebx,r12d 3740 shrd r13d,r13d,6 3741 vpsrlq xmm7,xmm7,17 3742 and edi,r15d 3743 xor r14d,ecx 3744 add ebx,r13d 3745 vpxor xmm6,xmm6,xmm7 3746 xor edi,edx 3747 shrd r14d,r14d,2 3748 add r9d,ebx 3749 vpsrlq xmm7,xmm7,2 3750 add ebx,edi 3751 mov r13d,r9d 3752 add r14d,ebx 3753 vpxor xmm6,xmm6,xmm7 3754 shrd r13d,r13d,14 3755 mov ebx,r14d 3756 mov r12d,r10d 3757 vpshufb xmm6,xmm6,xmm9 3758 shrd r14d,r14d,9 3759 xor r13d,r9d 3760 xor r12d,r11d 3761 vpaddd xmm3,xmm3,xmm6 3762 shrd r13d,r13d,5 3763 xor r14d,ebx 3764 and r12d,r9d 3765 vpaddd xmm6,xmm3,XMMWORD[96+rbp] 3766 xor r13d,r9d 3767 add eax,DWORD[60+rsp] 3768 mov edi,ebx 3769 xor r12d,r11d 3770 shrd r14d,r14d,11 3771 xor edi,ecx 3772 add eax,r12d 3773 shrd r13d,r13d,6 3774 and r15d,edi 3775 xor r14d,ebx 3776 add eax,r13d 3777 xor r15d,ecx 3778 shrd r14d,r14d,2 3779 add r8d,eax 3780 add eax,r15d 3781 mov r13d,r8d 3782 add r14d,eax 3783 vmovdqa XMMWORD[48+rsp],xmm6 3784 cmp BYTE[131+rbp],0 3785 jne NEAR $L$avx_00_47 3786 shrd r13d,r13d,14 3787 mov eax,r14d 3788 mov r12d,r9d 3789 shrd r14d,r14d,9 3790 xor r13d,r8d 3791 xor r12d,r10d 3792 shrd r13d,r13d,5 3793 xor r14d,eax 3794 and r12d,r8d 3795 xor r13d,r8d 3796 add r11d,DWORD[rsp] 3797 mov r15d,eax 3798 xor r12d,r10d 3799 shrd r14d,r14d,11 3800 xor r15d,ebx 3801 add r11d,r12d 3802 shrd r13d,r13d,6 3803 and edi,r15d 3804 xor r14d,eax 3805 add r11d,r13d 3806 xor edi,ebx 3807 shrd r14d,r14d,2 3808 add edx,r11d 3809 add r11d,edi 3810 mov r13d,edx 3811 add r14d,r11d 3812 shrd r13d,r13d,14 3813 mov r11d,r14d 3814 mov r12d,r8d 3815 shrd r14d,r14d,9 3816 xor r13d,edx 3817 xor r12d,r9d 3818 shrd r13d,r13d,5 3819 xor r14d,r11d 3820 and r12d,edx 3821 xor r13d,edx 3822 add r10d,DWORD[4+rsp] 3823 mov edi,r11d 3824 xor r12d,r9d 3825 shrd r14d,r14d,11 3826 xor edi,eax 3827 add r10d,r12d 3828 shrd r13d,r13d,6 3829 and r15d,edi 3830 xor r14d,r11d 3831 add r10d,r13d 3832 xor r15d,eax 3833 shrd r14d,r14d,2 3834 add ecx,r10d 3835 add r10d,r15d 3836 mov r13d,ecx 3837 add r14d,r10d 3838 shrd r13d,r13d,14 3839 mov r10d,r14d 3840 mov r12d,edx 3841 shrd r14d,r14d,9 3842 xor r13d,ecx 3843 xor r12d,r8d 3844 shrd r13d,r13d,5 3845 xor r14d,r10d 3846 and r12d,ecx 3847 xor r13d,ecx 3848 add r9d,DWORD[8+rsp] 3849 mov r15d,r10d 3850 xor r12d,r8d 3851 shrd r14d,r14d,11 3852 xor r15d,r11d 3853 add r9d,r12d 3854 shrd r13d,r13d,6 3855 and edi,r15d 3856 xor r14d,r10d 3857 add r9d,r13d 3858 xor edi,r11d 3859 shrd r14d,r14d,2 3860 add ebx,r9d 3861 add r9d,edi 3862 mov r13d,ebx 3863 add r14d,r9d 3864 shrd r13d,r13d,14 3865 mov r9d,r14d 3866 mov r12d,ecx 3867 shrd r14d,r14d,9 3868 xor r13d,ebx 3869 xor r12d,edx 3870 shrd r13d,r13d,5 3871 xor r14d,r9d 3872 and r12d,ebx 3873 xor r13d,ebx 3874 add r8d,DWORD[12+rsp] 3875 mov edi,r9d 3876 xor r12d,edx 3877 shrd r14d,r14d,11 3878 xor edi,r10d 3879 add r8d,r12d 3880 shrd r13d,r13d,6 3881 and r15d,edi 3882 xor r14d,r9d 3883 add r8d,r13d 3884 xor r15d,r10d 3885 shrd r14d,r14d,2 3886 add eax,r8d 3887 add r8d,r15d 3888 mov r13d,eax 3889 add r14d,r8d 3890 shrd r13d,r13d,14 3891 mov r8d,r14d 3892 mov r12d,ebx 3893 shrd r14d,r14d,9 3894 xor r13d,eax 3895 xor r12d,ecx 3896 shrd r13d,r13d,5 3897 xor r14d,r8d 3898 and r12d,eax 3899 xor r13d,eax 3900 add edx,DWORD[16+rsp] 3901 mov r15d,r8d 3902 xor r12d,ecx 3903 shrd r14d,r14d,11 3904 xor r15d,r9d 3905 add edx,r12d 3906 shrd r13d,r13d,6 3907 and edi,r15d 3908 xor r14d,r8d 3909 add edx,r13d 3910 xor edi,r9d 3911 shrd r14d,r14d,2 3912 add r11d,edx 3913 add edx,edi 3914 mov r13d,r11d 3915 add r14d,edx 3916 shrd r13d,r13d,14 3917 mov edx,r14d 3918 mov r12d,eax 3919 shrd r14d,r14d,9 3920 xor r13d,r11d 3921 xor r12d,ebx 3922 shrd r13d,r13d,5 3923 xor r14d,edx 3924 and r12d,r11d 3925 xor r13d,r11d 3926 add ecx,DWORD[20+rsp] 3927 mov edi,edx 3928 xor r12d,ebx 3929 shrd r14d,r14d,11 3930 xor edi,r8d 3931 add ecx,r12d 3932 shrd r13d,r13d,6 3933 and r15d,edi 3934 xor r14d,edx 3935 add ecx,r13d 3936 xor r15d,r8d 3937 shrd r14d,r14d,2 3938 add r10d,ecx 3939 add ecx,r15d 3940 mov r13d,r10d 3941 add r14d,ecx 3942 shrd r13d,r13d,14 3943 mov ecx,r14d 3944 mov r12d,r11d 3945 shrd r14d,r14d,9 3946 xor r13d,r10d 3947 xor r12d,eax 3948 shrd r13d,r13d,5 3949 xor r14d,ecx 3950 and r12d,r10d 3951 xor r13d,r10d 3952 add ebx,DWORD[24+rsp] 3953 mov r15d,ecx 3954 xor r12d,eax 3955 shrd r14d,r14d,11 3956 xor r15d,edx 3957 add ebx,r12d 3958 shrd r13d,r13d,6 3959 and edi,r15d 3960 xor r14d,ecx 3961 add ebx,r13d 3962 xor edi,edx 3963 shrd r14d,r14d,2 3964 add r9d,ebx 3965 add ebx,edi 3966 mov r13d,r9d 3967 add r14d,ebx 3968 shrd r13d,r13d,14 3969 mov ebx,r14d 3970 mov r12d,r10d 3971 shrd r14d,r14d,9 3972 xor r13d,r9d 3973 xor r12d,r11d 3974 shrd r13d,r13d,5 3975 xor r14d,ebx 3976 and r12d,r9d 3977 xor r13d,r9d 3978 add eax,DWORD[28+rsp] 3979 mov edi,ebx 3980 xor r12d,r11d 3981 shrd r14d,r14d,11 3982 xor edi,ecx 3983 add eax,r12d 3984 shrd r13d,r13d,6 3985 and r15d,edi 3986 xor r14d,ebx 3987 add eax,r13d 3988 xor r15d,ecx 3989 shrd r14d,r14d,2 3990 add r8d,eax 3991 add eax,r15d 3992 mov r13d,r8d 3993 add r14d,eax 3994 shrd r13d,r13d,14 3995 mov eax,r14d 3996 mov r12d,r9d 3997 shrd r14d,r14d,9 3998 xor r13d,r8d 3999 xor r12d,r10d 4000 shrd r13d,r13d,5 4001 xor r14d,eax 4002 and r12d,r8d 4003 xor r13d,r8d 4004 add r11d,DWORD[32+rsp] 4005 mov r15d,eax 4006 xor r12d,r10d 4007 shrd r14d,r14d,11 4008 xor r15d,ebx 4009 add r11d,r12d 4010 shrd r13d,r13d,6 4011 and edi,r15d 4012 xor r14d,eax 4013 add r11d,r13d 4014 xor edi,ebx 4015 shrd r14d,r14d,2 4016 add edx,r11d 4017 add r11d,edi 4018 mov r13d,edx 4019 add r14d,r11d 4020 shrd r13d,r13d,14 4021 mov r11d,r14d 4022 mov r12d,r8d 4023 shrd r14d,r14d,9 4024 xor r13d,edx 4025 xor r12d,r9d 4026 shrd r13d,r13d,5 4027 xor r14d,r11d 4028 and r12d,edx 4029 xor r13d,edx 4030 add r10d,DWORD[36+rsp] 4031 mov edi,r11d 4032 xor r12d,r9d 4033 shrd r14d,r14d,11 4034 xor edi,eax 4035 add r10d,r12d 4036 shrd r13d,r13d,6 4037 and r15d,edi 4038 xor r14d,r11d 4039 add r10d,r13d 4040 xor r15d,eax 4041 shrd r14d,r14d,2 4042 add ecx,r10d 4043 add r10d,r15d 4044 mov r13d,ecx 4045 add r14d,r10d 4046 shrd r13d,r13d,14 4047 mov r10d,r14d 4048 mov r12d,edx 4049 shrd r14d,r14d,9 4050 xor r13d,ecx 4051 xor r12d,r8d 4052 shrd r13d,r13d,5 4053 xor r14d,r10d 4054 and r12d,ecx 4055 xor r13d,ecx 4056 add r9d,DWORD[40+rsp] 4057 mov r15d,r10d 4058 xor r12d,r8d 4059 shrd r14d,r14d,11 4060 xor r15d,r11d 4061 add r9d,r12d 4062 shrd r13d,r13d,6 4063 and edi,r15d 4064 xor r14d,r10d 4065 add r9d,r13d 4066 xor edi,r11d 4067 shrd r14d,r14d,2 4068 add ebx,r9d 4069 add r9d,edi 4070 mov r13d,ebx 4071 add r14d,r9d 4072 shrd r13d,r13d,14 4073 mov r9d,r14d 4074 mov r12d,ecx 4075 shrd r14d,r14d,9 4076 xor r13d,ebx 4077 xor r12d,edx 4078 shrd r13d,r13d,5 4079 xor r14d,r9d 4080 and r12d,ebx 4081 xor r13d,ebx 4082 add r8d,DWORD[44+rsp] 4083 mov edi,r9d 4084 xor r12d,edx 4085 shrd r14d,r14d,11 4086 xor edi,r10d 4087 add r8d,r12d 4088 shrd r13d,r13d,6 4089 and r15d,edi 4090 xor r14d,r9d 4091 add r8d,r13d 4092 xor r15d,r10d 4093 shrd r14d,r14d,2 4094 add eax,r8d 4095 add r8d,r15d 4096 mov r13d,eax 4097 add r14d,r8d 4098 shrd r13d,r13d,14 4099 mov r8d,r14d 4100 mov r12d,ebx 4101 shrd r14d,r14d,9 4102 xor r13d,eax 4103 xor r12d,ecx 4104 shrd r13d,r13d,5 4105 xor r14d,r8d 4106 and r12d,eax 4107 xor r13d,eax 4108 add edx,DWORD[48+rsp] 4109 mov r15d,r8d 4110 xor r12d,ecx 4111 shrd r14d,r14d,11 4112 xor r15d,r9d 4113 add edx,r12d 4114 shrd r13d,r13d,6 4115 and edi,r15d 4116 xor r14d,r8d 4117 add edx,r13d 4118 xor edi,r9d 4119 shrd r14d,r14d,2 4120 add r11d,edx 4121 add edx,edi 4122 mov r13d,r11d 4123 add r14d,edx 4124 shrd r13d,r13d,14 4125 mov edx,r14d 4126 mov r12d,eax 4127 shrd r14d,r14d,9 4128 xor r13d,r11d 4129 xor r12d,ebx 4130 shrd r13d,r13d,5 4131 xor r14d,edx 4132 and r12d,r11d 4133 xor r13d,r11d 4134 add ecx,DWORD[52+rsp] 4135 mov edi,edx 4136 xor r12d,ebx 4137 shrd r14d,r14d,11 4138 xor edi,r8d 4139 add ecx,r12d 4140 shrd r13d,r13d,6 4141 and r15d,edi 4142 xor r14d,edx 4143 add ecx,r13d 4144 xor r15d,r8d 4145 shrd r14d,r14d,2 4146 add r10d,ecx 4147 add ecx,r15d 4148 mov r13d,r10d 4149 add r14d,ecx 4150 shrd r13d,r13d,14 4151 mov ecx,r14d 4152 mov r12d,r11d 4153 shrd r14d,r14d,9 4154 xor r13d,r10d 4155 xor r12d,eax 4156 shrd r13d,r13d,5 4157 xor r14d,ecx 4158 and r12d,r10d 4159 xor r13d,r10d 4160 add ebx,DWORD[56+rsp] 4161 mov r15d,ecx 4162 xor r12d,eax 4163 shrd r14d,r14d,11 4164 xor r15d,edx 4165 add ebx,r12d 4166 shrd r13d,r13d,6 4167 and edi,r15d 4168 xor r14d,ecx 4169 add ebx,r13d 4170 xor edi,edx 4171 shrd r14d,r14d,2 4172 add r9d,ebx 4173 add ebx,edi 4174 mov r13d,r9d 4175 add r14d,ebx 4176 shrd r13d,r13d,14 4177 mov ebx,r14d 4178 mov r12d,r10d 4179 shrd r14d,r14d,9 4180 xor r13d,r9d 4181 xor r12d,r11d 4182 shrd r13d,r13d,5 4183 xor r14d,ebx 4184 and r12d,r9d 4185 xor r13d,r9d 4186 add eax,DWORD[60+rsp] 4187 mov edi,ebx 4188 xor r12d,r11d 4189 shrd r14d,r14d,11 4190 xor edi,ecx 4191 add eax,r12d 4192 shrd r13d,r13d,6 4193 and r15d,edi 4194 xor r14d,ebx 4195 add eax,r13d 4196 xor r15d,ecx 4197 shrd r14d,r14d,2 4198 add r8d,eax 4199 add eax,r15d 4200 mov r13d,r8d 4201 add r14d,eax 4202 mov rdi,QWORD[((64+0))+rsp] 4203 mov eax,r14d 4204 4205 add eax,DWORD[rdi] 4206 lea rsi,[64+rsi] 4207 add ebx,DWORD[4+rdi] 4208 add ecx,DWORD[8+rdi] 4209 add edx,DWORD[12+rdi] 4210 add r8d,DWORD[16+rdi] 4211 add r9d,DWORD[20+rdi] 4212 add r10d,DWORD[24+rdi] 4213 add r11d,DWORD[28+rdi] 4214 4215 cmp rsi,QWORD[((64+16))+rsp] 4216 4217 mov DWORD[rdi],eax 4218 mov DWORD[4+rdi],ebx 4219 mov DWORD[8+rdi],ecx 4220 mov DWORD[12+rdi],edx 4221 mov DWORD[16+rdi],r8d 4222 mov DWORD[20+rdi],r9d 4223 mov DWORD[24+rdi],r10d 4224 mov DWORD[28+rdi],r11d 4225 jb NEAR $L$loop_avx 4226 4227 mov rsi,QWORD[88+rsp] 4228 4229 vzeroupper 4230 movaps xmm6,XMMWORD[((64+32))+rsp] 4231 movaps xmm7,XMMWORD[((64+48))+rsp] 4232 movaps xmm8,XMMWORD[((64+64))+rsp] 4233 movaps xmm9,XMMWORD[((64+80))+rsp] 4234 mov r15,QWORD[((-48))+rsi] 4235 4236 mov r14,QWORD[((-40))+rsi] 4237 4238 mov r13,QWORD[((-32))+rsi] 4239 4240 mov r12,QWORD[((-24))+rsi] 4241 4242 mov rbp,QWORD[((-16))+rsi] 4243 4244 mov rbx,QWORD[((-8))+rsi] 4245 4246 lea rsp,[rsi] 4247 4248 $L$epilogue_avx: 4249 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 4250 mov rsi,QWORD[16+rsp] 4251 DB 0F3h,0C3h ;repret 4252 4253 $L$SEH_end_sha256_block_data_order_avx: 4254 4255 ALIGN 64 4256 sha256_block_data_order_avx2: 4257 mov QWORD[8+rsp],rdi ;WIN64 prologue 4258 mov QWORD[16+rsp],rsi 4259 mov rax,rsp 4260 $L$SEH_begin_sha256_block_data_order_avx2: 4261 mov rdi,rcx 4262 mov rsi,rdx 4263 mov rdx,r8 4264 4265 4266 4267 $L$avx2_shortcut: 4268 mov rax,rsp 4269 4270 push rbx 4271 4272 push rbp 4273 4274 push r12 4275 4276 push r13 4277 4278 push r14 4279 4280 push r15 4281 4282 sub rsp,608 4283 shl rdx,4 4284 and rsp,-256*4 4285 lea rdx,[rdx*4+rsi] 4286 add rsp,448 4287 mov QWORD[((64+0))+rsp],rdi 4288 mov QWORD[((64+8))+rsp],rsi 4289 mov QWORD[((64+16))+rsp],rdx 4290 mov QWORD[88+rsp],rax 4291 4292 movaps XMMWORD[(64+32)+rsp],xmm6 4293 movaps XMMWORD[(64+48)+rsp],xmm7 4294 movaps XMMWORD[(64+64)+rsp],xmm8 4295 movaps XMMWORD[(64+80)+rsp],xmm9 4296 $L$prologue_avx2: 4297 4298 vzeroupper 4299 sub rsi,-16*4 4300 mov eax,DWORD[rdi] 4301 mov r12,rsi 4302 mov ebx,DWORD[4+rdi] 4303 cmp rsi,rdx 4304 mov ecx,DWORD[8+rdi] 4305 cmove r12,rsp 4306 mov edx,DWORD[12+rdi] 4307 mov r8d,DWORD[16+rdi] 4308 mov r9d,DWORD[20+rdi] 4309 mov r10d,DWORD[24+rdi] 4310 mov r11d,DWORD[28+rdi] 4311 vmovdqa ymm8,YMMWORD[((K256+512+32))] 4312 vmovdqa ymm9,YMMWORD[((K256+512+64))] 4313 jmp NEAR $L$oop_avx2 4314 ALIGN 16 4315 $L$oop_avx2: 4316 vmovdqa ymm7,YMMWORD[((K256+512))] 4317 vmovdqu xmm0,XMMWORD[((-64+0))+rsi] 4318 vmovdqu xmm1,XMMWORD[((-64+16))+rsi] 4319 vmovdqu xmm2,XMMWORD[((-64+32))+rsi] 4320 vmovdqu xmm3,XMMWORD[((-64+48))+rsi] 4321 4322 vinserti128 ymm0,ymm0,XMMWORD[r12],1 4323 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1 4324 vpshufb ymm0,ymm0,ymm7 4325 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1 4326 vpshufb ymm1,ymm1,ymm7 4327 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1 4328 4329 lea rbp,[K256] 4330 vpshufb ymm2,ymm2,ymm7 4331 vpaddd ymm4,ymm0,YMMWORD[rbp] 4332 vpshufb ymm3,ymm3,ymm7 4333 vpaddd ymm5,ymm1,YMMWORD[32+rbp] 4334 vpaddd ymm6,ymm2,YMMWORD[64+rbp] 4335 vpaddd ymm7,ymm3,YMMWORD[96+rbp] 4336 vmovdqa YMMWORD[rsp],ymm4 4337 xor r14d,r14d 4338 vmovdqa YMMWORD[32+rsp],ymm5 4339 lea rsp,[((-64))+rsp] 4340 mov edi,ebx 4341 vmovdqa YMMWORD[rsp],ymm6 4342 xor edi,ecx 4343 vmovdqa YMMWORD[32+rsp],ymm7 4344 mov r12d,r9d 4345 sub rbp,-16*2*4 4346 jmp NEAR $L$avx2_00_47 4347 4348 ALIGN 16 4349 $L$avx2_00_47: 4350 lea rsp,[((-64))+rsp] 4351 vpalignr ymm4,ymm1,ymm0,4 4352 add r11d,DWORD[((0+128))+rsp] 4353 and r12d,r8d 4354 rorx r13d,r8d,25 4355 vpalignr ymm7,ymm3,ymm2,4 4356 rorx r15d,r8d,11 4357 lea eax,[r14*1+rax] 4358 lea r11d,[r12*1+r11] 4359 vpsrld ymm6,ymm4,7 4360 andn r12d,r8d,r10d 4361 xor r13d,r15d 4362 rorx r14d,r8d,6 4363 vpaddd ymm0,ymm0,ymm7 4364 lea r11d,[r12*1+r11] 4365 xor r13d,r14d 4366 mov r15d,eax 4367 vpsrld ymm7,ymm4,3 4368 rorx r12d,eax,22 4369 lea r11d,[r13*1+r11] 4370 xor r15d,ebx 4371 vpslld ymm5,ymm4,14 4372 rorx r14d,eax,13 4373 rorx r13d,eax,2 4374 lea edx,[r11*1+rdx] 4375 vpxor ymm4,ymm7,ymm6 4376 and edi,r15d 4377 xor r14d,r12d 4378 xor edi,ebx 4379 vpshufd ymm7,ymm3,250 4380 xor r14d,r13d 4381 lea r11d,[rdi*1+r11] 4382 mov r12d,r8d 4383 vpsrld ymm6,ymm6,11 4384 add r10d,DWORD[((4+128))+rsp] 4385 and r12d,edx 4386 rorx r13d,edx,25 4387 vpxor ymm4,ymm4,ymm5 4388 rorx edi,edx,11 4389 lea r11d,[r14*1+r11] 4390 lea r10d,[r12*1+r10] 4391 vpslld ymm5,ymm5,11 4392 andn r12d,edx,r9d 4393 xor r13d,edi 4394 rorx r14d,edx,6 4395 vpxor ymm4,ymm4,ymm6 4396 lea r10d,[r12*1+r10] 4397 xor r13d,r14d 4398 mov edi,r11d 4399 vpsrld ymm6,ymm7,10 4400 rorx r12d,r11d,22 4401 lea r10d,[r13*1+r10] 4402 xor edi,eax 4403 vpxor ymm4,ymm4,ymm5 4404 rorx r14d,r11d,13 4405 rorx r13d,r11d,2 4406 lea ecx,[r10*1+rcx] 4407 vpsrlq ymm7,ymm7,17 4408 and r15d,edi 4409 xor r14d,r12d 4410 xor r15d,eax 4411 vpaddd ymm0,ymm0,ymm4 4412 xor r14d,r13d 4413 lea r10d,[r15*1+r10] 4414 mov r12d,edx 4415 vpxor ymm6,ymm6,ymm7 4416 add r9d,DWORD[((8+128))+rsp] 4417 and r12d,ecx 4418 rorx r13d,ecx,25 4419 vpsrlq ymm7,ymm7,2 4420 rorx r15d,ecx,11 4421 lea r10d,[r14*1+r10] 4422 lea r9d,[r12*1+r9] 4423 vpxor ymm6,ymm6,ymm7 4424 andn r12d,ecx,r8d 4425 xor r13d,r15d 4426 rorx r14d,ecx,6 4427 vpshufb ymm6,ymm6,ymm8 4428 lea r9d,[r12*1+r9] 4429 xor r13d,r14d 4430 mov r15d,r10d 4431 vpaddd ymm0,ymm0,ymm6 4432 rorx r12d,r10d,22 4433 lea r9d,[r13*1+r9] 4434 xor r15d,r11d 4435 vpshufd ymm7,ymm0,80 4436 rorx r14d,r10d,13 4437 rorx r13d,r10d,2 4438 lea ebx,[r9*1+rbx] 4439 vpsrld ymm6,ymm7,10 4440 and edi,r15d 4441 xor r14d,r12d 4442 xor edi,r11d 4443 vpsrlq ymm7,ymm7,17 4444 xor r14d,r13d 4445 lea r9d,[rdi*1+r9] 4446 mov r12d,ecx 4447 vpxor ymm6,ymm6,ymm7 4448 add r8d,DWORD[((12+128))+rsp] 4449 and r12d,ebx 4450 rorx r13d,ebx,25 4451 vpsrlq ymm7,ymm7,2 4452 rorx edi,ebx,11 4453 lea r9d,[r14*1+r9] 4454 lea r8d,[r12*1+r8] 4455 vpxor ymm6,ymm6,ymm7 4456 andn r12d,ebx,edx 4457 xor r13d,edi 4458 rorx r14d,ebx,6 4459 vpshufb ymm6,ymm6,ymm9 4460 lea r8d,[r12*1+r8] 4461 xor r13d,r14d 4462 mov edi,r9d 4463 vpaddd ymm0,ymm0,ymm6 4464 rorx r12d,r9d,22 4465 lea r8d,[r13*1+r8] 4466 xor edi,r10d 4467 vpaddd ymm6,ymm0,YMMWORD[rbp] 4468 rorx r14d,r9d,13 4469 rorx r13d,r9d,2 4470 lea eax,[r8*1+rax] 4471 and r15d,edi 4472 xor r14d,r12d 4473 xor r15d,r10d 4474 xor r14d,r13d 4475 lea r8d,[r15*1+r8] 4476 mov r12d,ebx 4477 vmovdqa YMMWORD[rsp],ymm6 4478 vpalignr ymm4,ymm2,ymm1,4 4479 add edx,DWORD[((32+128))+rsp] 4480 and r12d,eax 4481 rorx r13d,eax,25 4482 vpalignr ymm7,ymm0,ymm3,4 4483 rorx r15d,eax,11 4484 lea r8d,[r14*1+r8] 4485 lea edx,[r12*1+rdx] 4486 vpsrld ymm6,ymm4,7 4487 andn r12d,eax,ecx 4488 xor r13d,r15d 4489 rorx r14d,eax,6 4490 vpaddd ymm1,ymm1,ymm7 4491 lea edx,[r12*1+rdx] 4492 xor r13d,r14d 4493 mov r15d,r8d 4494 vpsrld ymm7,ymm4,3 4495 rorx r12d,r8d,22 4496 lea edx,[r13*1+rdx] 4497 xor r15d,r9d 4498 vpslld ymm5,ymm4,14 4499 rorx r14d,r8d,13 4500 rorx r13d,r8d,2 4501 lea r11d,[rdx*1+r11] 4502 vpxor ymm4,ymm7,ymm6 4503 and edi,r15d 4504 xor r14d,r12d 4505 xor edi,r9d 4506 vpshufd ymm7,ymm0,250 4507 xor r14d,r13d 4508 lea edx,[rdi*1+rdx] 4509 mov r12d,eax 4510 vpsrld ymm6,ymm6,11 4511 add ecx,DWORD[((36+128))+rsp] 4512 and r12d,r11d 4513 rorx r13d,r11d,25 4514 vpxor ymm4,ymm4,ymm5 4515 rorx edi,r11d,11 4516 lea edx,[r14*1+rdx] 4517 lea ecx,[r12*1+rcx] 4518 vpslld ymm5,ymm5,11 4519 andn r12d,r11d,ebx 4520 xor r13d,edi 4521 rorx r14d,r11d,6 4522 vpxor ymm4,ymm4,ymm6 4523 lea ecx,[r12*1+rcx] 4524 xor r13d,r14d 4525 mov edi,edx 4526 vpsrld ymm6,ymm7,10 4527 rorx r12d,edx,22 4528 lea ecx,[r13*1+rcx] 4529 xor edi,r8d 4530 vpxor ymm4,ymm4,ymm5 4531 rorx r14d,edx,13 4532 rorx r13d,edx,2 4533 lea r10d,[rcx*1+r10] 4534 vpsrlq ymm7,ymm7,17 4535 and r15d,edi 4536 xor r14d,r12d 4537 xor r15d,r8d 4538 vpaddd ymm1,ymm1,ymm4 4539 xor r14d,r13d 4540 lea ecx,[r15*1+rcx] 4541 mov r12d,r11d 4542 vpxor ymm6,ymm6,ymm7 4543 add ebx,DWORD[((40+128))+rsp] 4544 and r12d,r10d 4545 rorx r13d,r10d,25 4546 vpsrlq ymm7,ymm7,2 4547 rorx r15d,r10d,11 4548 lea ecx,[r14*1+rcx] 4549 lea ebx,[r12*1+rbx] 4550 vpxor ymm6,ymm6,ymm7 4551 andn r12d,r10d,eax 4552 xor r13d,r15d 4553 rorx r14d,r10d,6 4554 vpshufb ymm6,ymm6,ymm8 4555 lea ebx,[r12*1+rbx] 4556 xor r13d,r14d 4557 mov r15d,ecx 4558 vpaddd ymm1,ymm1,ymm6 4559 rorx r12d,ecx,22 4560 lea ebx,[r13*1+rbx] 4561 xor r15d,edx 4562 vpshufd ymm7,ymm1,80 4563 rorx r14d,ecx,13 4564 rorx r13d,ecx,2 4565 lea r9d,[rbx*1+r9] 4566 vpsrld ymm6,ymm7,10 4567 and edi,r15d 4568 xor r14d,r12d 4569 xor edi,edx 4570 vpsrlq ymm7,ymm7,17 4571 xor r14d,r13d 4572 lea ebx,[rdi*1+rbx] 4573 mov r12d,r10d 4574 vpxor ymm6,ymm6,ymm7 4575 add eax,DWORD[((44+128))+rsp] 4576 and r12d,r9d 4577 rorx r13d,r9d,25 4578 vpsrlq ymm7,ymm7,2 4579 rorx edi,r9d,11 4580 lea ebx,[r14*1+rbx] 4581 lea eax,[r12*1+rax] 4582 vpxor ymm6,ymm6,ymm7 4583 andn r12d,r9d,r11d 4584 xor r13d,edi 4585 rorx r14d,r9d,6 4586 vpshufb ymm6,ymm6,ymm9 4587 lea eax,[r12*1+rax] 4588 xor r13d,r14d 4589 mov edi,ebx 4590 vpaddd ymm1,ymm1,ymm6 4591 rorx r12d,ebx,22 4592 lea eax,[r13*1+rax] 4593 xor edi,ecx 4594 vpaddd ymm6,ymm1,YMMWORD[32+rbp] 4595 rorx r14d,ebx,13 4596 rorx r13d,ebx,2 4597 lea r8d,[rax*1+r8] 4598 and r15d,edi 4599 xor r14d,r12d 4600 xor r15d,ecx 4601 xor r14d,r13d 4602 lea eax,[r15*1+rax] 4603 mov r12d,r9d 4604 vmovdqa YMMWORD[32+rsp],ymm6 4605 lea rsp,[((-64))+rsp] 4606 vpalignr ymm4,ymm3,ymm2,4 4607 add r11d,DWORD[((0+128))+rsp] 4608 and r12d,r8d 4609 rorx r13d,r8d,25 4610 vpalignr ymm7,ymm1,ymm0,4 4611 rorx r15d,r8d,11 4612 lea eax,[r14*1+rax] 4613 lea r11d,[r12*1+r11] 4614 vpsrld ymm6,ymm4,7 4615 andn r12d,r8d,r10d 4616 xor r13d,r15d 4617 rorx r14d,r8d,6 4618 vpaddd ymm2,ymm2,ymm7 4619 lea r11d,[r12*1+r11] 4620 xor r13d,r14d 4621 mov r15d,eax 4622 vpsrld ymm7,ymm4,3 4623 rorx r12d,eax,22 4624 lea r11d,[r13*1+r11] 4625 xor r15d,ebx 4626 vpslld ymm5,ymm4,14 4627 rorx r14d,eax,13 4628 rorx r13d,eax,2 4629 lea edx,[r11*1+rdx] 4630 vpxor ymm4,ymm7,ymm6 4631 and edi,r15d 4632 xor r14d,r12d 4633 xor edi,ebx 4634 vpshufd ymm7,ymm1,250 4635 xor r14d,r13d 4636 lea r11d,[rdi*1+r11] 4637 mov r12d,r8d 4638 vpsrld ymm6,ymm6,11 4639 add r10d,DWORD[((4+128))+rsp] 4640 and r12d,edx 4641 rorx r13d,edx,25 4642 vpxor ymm4,ymm4,ymm5 4643 rorx edi,edx,11 4644 lea r11d,[r14*1+r11] 4645 lea r10d,[r12*1+r10] 4646 vpslld ymm5,ymm5,11 4647 andn r12d,edx,r9d 4648 xor r13d,edi 4649 rorx r14d,edx,6 4650 vpxor ymm4,ymm4,ymm6 4651 lea r10d,[r12*1+r10] 4652 xor r13d,r14d 4653 mov edi,r11d 4654 vpsrld ymm6,ymm7,10 4655 rorx r12d,r11d,22 4656 lea r10d,[r13*1+r10] 4657 xor edi,eax 4658 vpxor ymm4,ymm4,ymm5 4659 rorx r14d,r11d,13 4660 rorx r13d,r11d,2 4661 lea ecx,[r10*1+rcx] 4662 vpsrlq ymm7,ymm7,17 4663 and r15d,edi 4664 xor r14d,r12d 4665 xor r15d,eax 4666 vpaddd ymm2,ymm2,ymm4 4667 xor r14d,r13d 4668 lea r10d,[r15*1+r10] 4669 mov r12d,edx 4670 vpxor ymm6,ymm6,ymm7 4671 add r9d,DWORD[((8+128))+rsp] 4672 and r12d,ecx 4673 rorx r13d,ecx,25 4674 vpsrlq ymm7,ymm7,2 4675 rorx r15d,ecx,11 4676 lea r10d,[r14*1+r10] 4677 lea r9d,[r12*1+r9] 4678 vpxor ymm6,ymm6,ymm7 4679 andn r12d,ecx,r8d 4680 xor r13d,r15d 4681 rorx r14d,ecx,6 4682 vpshufb ymm6,ymm6,ymm8 4683 lea r9d,[r12*1+r9] 4684 xor r13d,r14d 4685 mov r15d,r10d 4686 vpaddd ymm2,ymm2,ymm6 4687 rorx r12d,r10d,22 4688 lea r9d,[r13*1+r9] 4689 xor r15d,r11d 4690 vpshufd ymm7,ymm2,80 4691 rorx r14d,r10d,13 4692 rorx r13d,r10d,2 4693 lea ebx,[r9*1+rbx] 4694 vpsrld ymm6,ymm7,10 4695 and edi,r15d 4696 xor r14d,r12d 4697 xor edi,r11d 4698 vpsrlq ymm7,ymm7,17 4699 xor r14d,r13d 4700 lea r9d,[rdi*1+r9] 4701 mov r12d,ecx 4702 vpxor ymm6,ymm6,ymm7 4703 add r8d,DWORD[((12+128))+rsp] 4704 and r12d,ebx 4705 rorx r13d,ebx,25 4706 vpsrlq ymm7,ymm7,2 4707 rorx edi,ebx,11 4708 lea r9d,[r14*1+r9] 4709 lea r8d,[r12*1+r8] 4710 vpxor ymm6,ymm6,ymm7 4711 andn r12d,ebx,edx 4712 xor r13d,edi 4713 rorx r14d,ebx,6 4714 vpshufb ymm6,ymm6,ymm9 4715 lea r8d,[r12*1+r8] 4716 xor r13d,r14d 4717 mov edi,r9d 4718 vpaddd ymm2,ymm2,ymm6 4719 rorx r12d,r9d,22 4720 lea r8d,[r13*1+r8] 4721 xor edi,r10d 4722 vpaddd ymm6,ymm2,YMMWORD[64+rbp] 4723 rorx r14d,r9d,13 4724 rorx r13d,r9d,2 4725 lea eax,[r8*1+rax] 4726 and r15d,edi 4727 xor r14d,r12d 4728 xor r15d,r10d 4729 xor r14d,r13d 4730 lea r8d,[r15*1+r8] 4731 mov r12d,ebx 4732 vmovdqa YMMWORD[rsp],ymm6 4733 vpalignr ymm4,ymm0,ymm3,4 4734 add edx,DWORD[((32+128))+rsp] 4735 and r12d,eax 4736 rorx r13d,eax,25 4737 vpalignr ymm7,ymm2,ymm1,4 4738 rorx r15d,eax,11 4739 lea r8d,[r14*1+r8] 4740 lea edx,[r12*1+rdx] 4741 vpsrld ymm6,ymm4,7 4742 andn r12d,eax,ecx 4743 xor r13d,r15d 4744 rorx r14d,eax,6 4745 vpaddd ymm3,ymm3,ymm7 4746 lea edx,[r12*1+rdx] 4747 xor r13d,r14d 4748 mov r15d,r8d 4749 vpsrld ymm7,ymm4,3 4750 rorx r12d,r8d,22 4751 lea edx,[r13*1+rdx] 4752 xor r15d,r9d 4753 vpslld ymm5,ymm4,14 4754 rorx r14d,r8d,13 4755 rorx r13d,r8d,2 4756 lea r11d,[rdx*1+r11] 4757 vpxor ymm4,ymm7,ymm6 4758 and edi,r15d 4759 xor r14d,r12d 4760 xor edi,r9d 4761 vpshufd ymm7,ymm2,250 4762 xor r14d,r13d 4763 lea edx,[rdi*1+rdx] 4764 mov r12d,eax 4765 vpsrld ymm6,ymm6,11 4766 add ecx,DWORD[((36+128))+rsp] 4767 and r12d,r11d 4768 rorx r13d,r11d,25 4769 vpxor ymm4,ymm4,ymm5 4770 rorx edi,r11d,11 4771 lea edx,[r14*1+rdx] 4772 lea ecx,[r12*1+rcx] 4773 vpslld ymm5,ymm5,11 4774 andn r12d,r11d,ebx 4775 xor r13d,edi 4776 rorx r14d,r11d,6 4777 vpxor ymm4,ymm4,ymm6 4778 lea ecx,[r12*1+rcx] 4779 xor r13d,r14d 4780 mov edi,edx 4781 vpsrld ymm6,ymm7,10 4782 rorx r12d,edx,22 4783 lea ecx,[r13*1+rcx] 4784 xor edi,r8d 4785 vpxor ymm4,ymm4,ymm5 4786 rorx r14d,edx,13 4787 rorx r13d,edx,2 4788 lea r10d,[rcx*1+r10] 4789 vpsrlq ymm7,ymm7,17 4790 and r15d,edi 4791 xor r14d,r12d 4792 xor r15d,r8d 4793 vpaddd ymm3,ymm3,ymm4 4794 xor r14d,r13d 4795 lea ecx,[r15*1+rcx] 4796 mov r12d,r11d 4797 vpxor ymm6,ymm6,ymm7 4798 add ebx,DWORD[((40+128))+rsp] 4799 and r12d,r10d 4800 rorx r13d,r10d,25 4801 vpsrlq ymm7,ymm7,2 4802 rorx r15d,r10d,11 4803 lea ecx,[r14*1+rcx] 4804 lea ebx,[r12*1+rbx] 4805 vpxor ymm6,ymm6,ymm7 4806 andn r12d,r10d,eax 4807 xor r13d,r15d 4808 rorx r14d,r10d,6 4809 vpshufb ymm6,ymm6,ymm8 4810 lea ebx,[r12*1+rbx] 4811 xor r13d,r14d 4812 mov r15d,ecx 4813 vpaddd ymm3,ymm3,ymm6 4814 rorx r12d,ecx,22 4815 lea ebx,[r13*1+rbx] 4816 xor r15d,edx 4817 vpshufd ymm7,ymm3,80 4818 rorx r14d,ecx,13 4819 rorx r13d,ecx,2 4820 lea r9d,[rbx*1+r9] 4821 vpsrld ymm6,ymm7,10 4822 and edi,r15d 4823 xor r14d,r12d 4824 xor edi,edx 4825 vpsrlq ymm7,ymm7,17 4826 xor r14d,r13d 4827 lea ebx,[rdi*1+rbx] 4828 mov r12d,r10d 4829 vpxor ymm6,ymm6,ymm7 4830 add eax,DWORD[((44+128))+rsp] 4831 and r12d,r9d 4832 rorx r13d,r9d,25 4833 vpsrlq ymm7,ymm7,2 4834 rorx edi,r9d,11 4835 lea ebx,[r14*1+rbx] 4836 lea eax,[r12*1+rax] 4837 vpxor ymm6,ymm6,ymm7 4838 andn r12d,r9d,r11d 4839 xor r13d,edi 4840 rorx r14d,r9d,6 4841 vpshufb ymm6,ymm6,ymm9 4842 lea eax,[r12*1+rax] 4843 xor r13d,r14d 4844 mov edi,ebx 4845 vpaddd ymm3,ymm3,ymm6 4846 rorx r12d,ebx,22 4847 lea eax,[r13*1+rax] 4848 xor edi,ecx 4849 vpaddd ymm6,ymm3,YMMWORD[96+rbp] 4850 rorx r14d,ebx,13 4851 rorx r13d,ebx,2 4852 lea r8d,[rax*1+r8] 4853 and r15d,edi 4854 xor r14d,r12d 4855 xor r15d,ecx 4856 xor r14d,r13d 4857 lea eax,[r15*1+rax] 4858 mov r12d,r9d 4859 vmovdqa YMMWORD[32+rsp],ymm6 4860 lea rbp,[128+rbp] 4861 cmp BYTE[3+rbp],0 4862 jne NEAR $L$avx2_00_47 4863 add r11d,DWORD[((0+64))+rsp] 4864 and r12d,r8d 4865 rorx r13d,r8d,25 4866 rorx r15d,r8d,11 4867 lea eax,[r14*1+rax] 4868 lea r11d,[r12*1+r11] 4869 andn r12d,r8d,r10d 4870 xor r13d,r15d 4871 rorx r14d,r8d,6 4872 lea r11d,[r12*1+r11] 4873 xor r13d,r14d 4874 mov r15d,eax 4875 rorx r12d,eax,22 4876 lea r11d,[r13*1+r11] 4877 xor r15d,ebx 4878 rorx r14d,eax,13 4879 rorx r13d,eax,2 4880 lea edx,[r11*1+rdx] 4881 and edi,r15d 4882 xor r14d,r12d 4883 xor edi,ebx 4884 xor r14d,r13d 4885 lea r11d,[rdi*1+r11] 4886 mov r12d,r8d 4887 add r10d,DWORD[((4+64))+rsp] 4888 and r12d,edx 4889 rorx r13d,edx,25 4890 rorx edi,edx,11 4891 lea r11d,[r14*1+r11] 4892 lea r10d,[r12*1+r10] 4893 andn r12d,edx,r9d 4894 xor r13d,edi 4895 rorx r14d,edx,6 4896 lea r10d,[r12*1+r10] 4897 xor r13d,r14d 4898 mov edi,r11d 4899 rorx r12d,r11d,22 4900 lea r10d,[r13*1+r10] 4901 xor edi,eax 4902 rorx r14d,r11d,13 4903 rorx r13d,r11d,2 4904 lea ecx,[r10*1+rcx] 4905 and r15d,edi 4906 xor r14d,r12d 4907 xor r15d,eax 4908 xor r14d,r13d 4909 lea r10d,[r15*1+r10] 4910 mov r12d,edx 4911 add r9d,DWORD[((8+64))+rsp] 4912 and r12d,ecx 4913 rorx r13d,ecx,25 4914 rorx r15d,ecx,11 4915 lea r10d,[r14*1+r10] 4916 lea r9d,[r12*1+r9] 4917 andn r12d,ecx,r8d 4918 xor r13d,r15d 4919 rorx r14d,ecx,6 4920 lea r9d,[r12*1+r9] 4921 xor r13d,r14d 4922 mov r15d,r10d 4923 rorx r12d,r10d,22 4924 lea r9d,[r13*1+r9] 4925 xor r15d,r11d 4926 rorx r14d,r10d,13 4927 rorx r13d,r10d,2 4928 lea ebx,[r9*1+rbx] 4929 and edi,r15d 4930 xor r14d,r12d 4931 xor edi,r11d 4932 xor r14d,r13d 4933 lea r9d,[rdi*1+r9] 4934 mov r12d,ecx 4935 add r8d,DWORD[((12+64))+rsp] 4936 and r12d,ebx 4937 rorx r13d,ebx,25 4938 rorx edi,ebx,11 4939 lea r9d,[r14*1+r9] 4940 lea r8d,[r12*1+r8] 4941 andn r12d,ebx,edx 4942 xor r13d,edi 4943 rorx r14d,ebx,6 4944 lea r8d,[r12*1+r8] 4945 xor r13d,r14d 4946 mov edi,r9d 4947 rorx r12d,r9d,22 4948 lea r8d,[r13*1+r8] 4949 xor edi,r10d 4950 rorx r14d,r9d,13 4951 rorx r13d,r9d,2 4952 lea eax,[r8*1+rax] 4953 and r15d,edi 4954 xor r14d,r12d 4955 xor r15d,r10d 4956 xor r14d,r13d 4957 lea r8d,[r15*1+r8] 4958 mov r12d,ebx 4959 add edx,DWORD[((32+64))+rsp] 4960 and r12d,eax 4961 rorx r13d,eax,25 4962 rorx r15d,eax,11 4963 lea r8d,[r14*1+r8] 4964 lea edx,[r12*1+rdx] 4965 andn r12d,eax,ecx 4966 xor r13d,r15d 4967 rorx r14d,eax,6 4968 lea edx,[r12*1+rdx] 4969 xor r13d,r14d 4970 mov r15d,r8d 4971 rorx r12d,r8d,22 4972 lea edx,[r13*1+rdx] 4973 xor r15d,r9d 4974 rorx r14d,r8d,13 4975 rorx r13d,r8d,2 4976 lea r11d,[rdx*1+r11] 4977 and edi,r15d 4978 xor r14d,r12d 4979 xor edi,r9d 4980 xor r14d,r13d 4981 lea edx,[rdi*1+rdx] 4982 mov r12d,eax 4983 add ecx,DWORD[((36+64))+rsp] 4984 and r12d,r11d 4985 rorx r13d,r11d,25 4986 rorx edi,r11d,11 4987 lea edx,[r14*1+rdx] 4988 lea ecx,[r12*1+rcx] 4989 andn r12d,r11d,ebx 4990 xor r13d,edi 4991 rorx r14d,r11d,6 4992 lea ecx,[r12*1+rcx] 4993 xor r13d,r14d 4994 mov edi,edx 4995 rorx r12d,edx,22 4996 lea ecx,[r13*1+rcx] 4997 xor edi,r8d 4998 rorx r14d,edx,13 4999 rorx r13d,edx,2 5000 lea r10d,[rcx*1+r10] 5001 and r15d,edi 5002 xor r14d,r12d 5003 xor r15d,r8d 5004 xor r14d,r13d 5005 lea ecx,[r15*1+rcx] 5006 mov r12d,r11d 5007 add ebx,DWORD[((40+64))+rsp] 5008 and r12d,r10d 5009 rorx r13d,r10d,25 5010 rorx r15d,r10d,11 5011 lea ecx,[r14*1+rcx] 5012 lea ebx,[r12*1+rbx] 5013 andn r12d,r10d,eax 5014 xor r13d,r15d 5015 rorx r14d,r10d,6 5016 lea ebx,[r12*1+rbx] 5017 xor r13d,r14d 5018 mov r15d,ecx 5019 rorx r12d,ecx,22 5020 lea ebx,[r13*1+rbx] 5021 xor r15d,edx 5022 rorx r14d,ecx,13 5023 rorx r13d,ecx,2 5024 lea r9d,[rbx*1+r9] 5025 and edi,r15d 5026 xor r14d,r12d 5027 xor edi,edx 5028 xor r14d,r13d 5029 lea ebx,[rdi*1+rbx] 5030 mov r12d,r10d 5031 add eax,DWORD[((44+64))+rsp] 5032 and r12d,r9d 5033 rorx r13d,r9d,25 5034 rorx edi,r9d,11 5035 lea ebx,[r14*1+rbx] 5036 lea eax,[r12*1+rax] 5037 andn r12d,r9d,r11d 5038 xor r13d,edi 5039 rorx r14d,r9d,6 5040 lea eax,[r12*1+rax] 5041 xor r13d,r14d 5042 mov edi,ebx 5043 rorx r12d,ebx,22 5044 lea eax,[r13*1+rax] 5045 xor edi,ecx 5046 rorx r14d,ebx,13 5047 rorx r13d,ebx,2 5048 lea r8d,[rax*1+r8] 5049 and r15d,edi 5050 xor r14d,r12d 5051 xor r15d,ecx 5052 xor r14d,r13d 5053 lea eax,[r15*1+rax] 5054 mov r12d,r9d 5055 add r11d,DWORD[rsp] 5056 and r12d,r8d 5057 rorx r13d,r8d,25 5058 rorx r15d,r8d,11 5059 lea eax,[r14*1+rax] 5060 lea r11d,[r12*1+r11] 5061 andn r12d,r8d,r10d 5062 xor r13d,r15d 5063 rorx r14d,r8d,6 5064 lea r11d,[r12*1+r11] 5065 xor r13d,r14d 5066 mov r15d,eax 5067 rorx r12d,eax,22 5068 lea r11d,[r13*1+r11] 5069 xor r15d,ebx 5070 rorx r14d,eax,13 5071 rorx r13d,eax,2 5072 lea edx,[r11*1+rdx] 5073 and edi,r15d 5074 xor r14d,r12d 5075 xor edi,ebx 5076 xor r14d,r13d 5077 lea r11d,[rdi*1+r11] 5078 mov r12d,r8d 5079 add r10d,DWORD[4+rsp] 5080 and r12d,edx 5081 rorx r13d,edx,25 5082 rorx edi,edx,11 5083 lea r11d,[r14*1+r11] 5084 lea r10d,[r12*1+r10] 5085 andn r12d,edx,r9d 5086 xor r13d,edi 5087 rorx r14d,edx,6 5088 lea r10d,[r12*1+r10] 5089 xor r13d,r14d 5090 mov edi,r11d 5091 rorx r12d,r11d,22 5092 lea r10d,[r13*1+r10] 5093 xor edi,eax 5094 rorx r14d,r11d,13 5095 rorx r13d,r11d,2 5096 lea ecx,[r10*1+rcx] 5097 and r15d,edi 5098 xor r14d,r12d 5099 xor r15d,eax 5100 xor r14d,r13d 5101 lea r10d,[r15*1+r10] 5102 mov r12d,edx 5103 add r9d,DWORD[8+rsp] 5104 and r12d,ecx 5105 rorx r13d,ecx,25 5106 rorx r15d,ecx,11 5107 lea r10d,[r14*1+r10] 5108 lea r9d,[r12*1+r9] 5109 andn r12d,ecx,r8d 5110 xor r13d,r15d 5111 rorx r14d,ecx,6 5112 lea r9d,[r12*1+r9] 5113 xor r13d,r14d 5114 mov r15d,r10d 5115 rorx r12d,r10d,22 5116 lea r9d,[r13*1+r9] 5117 xor r15d,r11d 5118 rorx r14d,r10d,13 5119 rorx r13d,r10d,2 5120 lea ebx,[r9*1+rbx] 5121 and edi,r15d 5122 xor r14d,r12d 5123 xor edi,r11d 5124 xor r14d,r13d 5125 lea r9d,[rdi*1+r9] 5126 mov r12d,ecx 5127 add r8d,DWORD[12+rsp] 5128 and r12d,ebx 5129 rorx r13d,ebx,25 5130 rorx edi,ebx,11 5131 lea r9d,[r14*1+r9] 5132 lea r8d,[r12*1+r8] 5133 andn r12d,ebx,edx 5134 xor r13d,edi 5135 rorx r14d,ebx,6 5136 lea r8d,[r12*1+r8] 5137 xor r13d,r14d 5138 mov edi,r9d 5139 rorx r12d,r9d,22 5140 lea r8d,[r13*1+r8] 5141 xor edi,r10d 5142 rorx r14d,r9d,13 5143 rorx r13d,r9d,2 5144 lea eax,[r8*1+rax] 5145 and r15d,edi 5146 xor r14d,r12d 5147 xor r15d,r10d 5148 xor r14d,r13d 5149 lea r8d,[r15*1+r8] 5150 mov r12d,ebx 5151 add edx,DWORD[32+rsp] 5152 and r12d,eax 5153 rorx r13d,eax,25 5154 rorx r15d,eax,11 5155 lea r8d,[r14*1+r8] 5156 lea edx,[r12*1+rdx] 5157 andn r12d,eax,ecx 5158 xor r13d,r15d 5159 rorx r14d,eax,6 5160 lea edx,[r12*1+rdx] 5161 xor r13d,r14d 5162 mov r15d,r8d 5163 rorx r12d,r8d,22 5164 lea edx,[r13*1+rdx] 5165 xor r15d,r9d 5166 rorx r14d,r8d,13 5167 rorx r13d,r8d,2 5168 lea r11d,[rdx*1+r11] 5169 and edi,r15d 5170 xor r14d,r12d 5171 xor edi,r9d 5172 xor r14d,r13d 5173 lea edx,[rdi*1+rdx] 5174 mov r12d,eax 5175 add ecx,DWORD[36+rsp] 5176 and r12d,r11d 5177 rorx r13d,r11d,25 5178 rorx edi,r11d,11 5179 lea edx,[r14*1+rdx] 5180 lea ecx,[r12*1+rcx] 5181 andn r12d,r11d,ebx 5182 xor r13d,edi 5183 rorx r14d,r11d,6 5184 lea ecx,[r12*1+rcx] 5185 xor r13d,r14d 5186 mov edi,edx 5187 rorx r12d,edx,22 5188 lea ecx,[r13*1+rcx] 5189 xor edi,r8d 5190 rorx r14d,edx,13 5191 rorx r13d,edx,2 5192 lea r10d,[rcx*1+r10] 5193 and r15d,edi 5194 xor r14d,r12d 5195 xor r15d,r8d 5196 xor r14d,r13d 5197 lea ecx,[r15*1+rcx] 5198 mov r12d,r11d 5199 add ebx,DWORD[40+rsp] 5200 and r12d,r10d 5201 rorx r13d,r10d,25 5202 rorx r15d,r10d,11 5203 lea ecx,[r14*1+rcx] 5204 lea ebx,[r12*1+rbx] 5205 andn r12d,r10d,eax 5206 xor r13d,r15d 5207 rorx r14d,r10d,6 5208 lea ebx,[r12*1+rbx] 5209 xor r13d,r14d 5210 mov r15d,ecx 5211 rorx r12d,ecx,22 5212 lea ebx,[r13*1+rbx] 5213 xor r15d,edx 5214 rorx r14d,ecx,13 5215 rorx r13d,ecx,2 5216 lea r9d,[rbx*1+r9] 5217 and edi,r15d 5218 xor r14d,r12d 5219 xor edi,edx 5220 xor r14d,r13d 5221 lea ebx,[rdi*1+rbx] 5222 mov r12d,r10d 5223 add eax,DWORD[44+rsp] 5224 and r12d,r9d 5225 rorx r13d,r9d,25 5226 rorx edi,r9d,11 5227 lea ebx,[r14*1+rbx] 5228 lea eax,[r12*1+rax] 5229 andn r12d,r9d,r11d 5230 xor r13d,edi 5231 rorx r14d,r9d,6 5232 lea eax,[r12*1+rax] 5233 xor r13d,r14d 5234 mov edi,ebx 5235 rorx r12d,ebx,22 5236 lea eax,[r13*1+rax] 5237 xor edi,ecx 5238 rorx r14d,ebx,13 5239 rorx r13d,ebx,2 5240 lea r8d,[rax*1+r8] 5241 and r15d,edi 5242 xor r14d,r12d 5243 xor r15d,ecx 5244 xor r14d,r13d 5245 lea eax,[r15*1+rax] 5246 mov r12d,r9d 5247 mov rdi,QWORD[512+rsp] 5248 add eax,r14d 5249 5250 lea rbp,[448+rsp] 5251 5252 add eax,DWORD[rdi] 5253 add ebx,DWORD[4+rdi] 5254 add ecx,DWORD[8+rdi] 5255 add edx,DWORD[12+rdi] 5256 add r8d,DWORD[16+rdi] 5257 add r9d,DWORD[20+rdi] 5258 add r10d,DWORD[24+rdi] 5259 add r11d,DWORD[28+rdi] 5260 5261 mov DWORD[rdi],eax 5262 mov DWORD[4+rdi],ebx 5263 mov DWORD[8+rdi],ecx 5264 mov DWORD[12+rdi],edx 5265 mov DWORD[16+rdi],r8d 5266 mov DWORD[20+rdi],r9d 5267 mov DWORD[24+rdi],r10d 5268 mov DWORD[28+rdi],r11d 5269 5270 cmp rsi,QWORD[80+rbp] 5271 je NEAR $L$done_avx2 5272 5273 xor r14d,r14d 5274 mov edi,ebx 5275 xor edi,ecx 5276 mov r12d,r9d 5277 jmp NEAR $L$ower_avx2 5278 ALIGN 16 5279 $L$ower_avx2: 5280 add r11d,DWORD[((0+16))+rbp] 5281 and r12d,r8d 5282 rorx r13d,r8d,25 5283 rorx r15d,r8d,11 5284 lea eax,[r14*1+rax] 5285 lea r11d,[r12*1+r11] 5286 andn r12d,r8d,r10d 5287 xor r13d,r15d 5288 rorx r14d,r8d,6 5289 lea r11d,[r12*1+r11] 5290 xor r13d,r14d 5291 mov r15d,eax 5292 rorx r12d,eax,22 5293 lea r11d,[r13*1+r11] 5294 xor r15d,ebx 5295 rorx r14d,eax,13 5296 rorx r13d,eax,2 5297 lea edx,[r11*1+rdx] 5298 and edi,r15d 5299 xor r14d,r12d 5300 xor edi,ebx 5301 xor r14d,r13d 5302 lea r11d,[rdi*1+r11] 5303 mov r12d,r8d 5304 add r10d,DWORD[((4+16))+rbp] 5305 and r12d,edx 5306 rorx r13d,edx,25 5307 rorx edi,edx,11 5308 lea r11d,[r14*1+r11] 5309 lea r10d,[r12*1+r10] 5310 andn r12d,edx,r9d 5311 xor r13d,edi 5312 rorx r14d,edx,6 5313 lea r10d,[r12*1+r10] 5314 xor r13d,r14d 5315 mov edi,r11d 5316 rorx r12d,r11d,22 5317 lea r10d,[r13*1+r10] 5318 xor edi,eax 5319 rorx r14d,r11d,13 5320 rorx r13d,r11d,2 5321 lea ecx,[r10*1+rcx] 5322 and r15d,edi 5323 xor r14d,r12d 5324 xor r15d,eax 5325 xor r14d,r13d 5326 lea r10d,[r15*1+r10] 5327 mov r12d,edx 5328 add r9d,DWORD[((8+16))+rbp] 5329 and r12d,ecx 5330 rorx r13d,ecx,25 5331 rorx r15d,ecx,11 5332 lea r10d,[r14*1+r10] 5333 lea r9d,[r12*1+r9] 5334 andn r12d,ecx,r8d 5335 xor r13d,r15d 5336 rorx r14d,ecx,6 5337 lea r9d,[r12*1+r9] 5338 xor r13d,r14d 5339 mov r15d,r10d 5340 rorx r12d,r10d,22 5341 lea r9d,[r13*1+r9] 5342 xor r15d,r11d 5343 rorx r14d,r10d,13 5344 rorx r13d,r10d,2 5345 lea ebx,[r9*1+rbx] 5346 and edi,r15d 5347 xor r14d,r12d 5348 xor edi,r11d 5349 xor r14d,r13d 5350 lea r9d,[rdi*1+r9] 5351 mov r12d,ecx 5352 add r8d,DWORD[((12+16))+rbp] 5353 and r12d,ebx 5354 rorx r13d,ebx,25 5355 rorx edi,ebx,11 5356 lea r9d,[r14*1+r9] 5357 lea r8d,[r12*1+r8] 5358 andn r12d,ebx,edx 5359 xor r13d,edi 5360 rorx r14d,ebx,6 5361 lea r8d,[r12*1+r8] 5362 xor r13d,r14d 5363 mov edi,r9d 5364 rorx r12d,r9d,22 5365 lea r8d,[r13*1+r8] 5366 xor edi,r10d 5367 rorx r14d,r9d,13 5368 rorx r13d,r9d,2 5369 lea eax,[r8*1+rax] 5370 and r15d,edi 5371 xor r14d,r12d 5372 xor r15d,r10d 5373 xor r14d,r13d 5374 lea r8d,[r15*1+r8] 5375 mov r12d,ebx 5376 add edx,DWORD[((32+16))+rbp] 5377 and r12d,eax 5378 rorx r13d,eax,25 5379 rorx r15d,eax,11 5380 lea r8d,[r14*1+r8] 5381 lea edx,[r12*1+rdx] 5382 andn r12d,eax,ecx 5383 xor r13d,r15d 5384 rorx r14d,eax,6 5385 lea edx,[r12*1+rdx] 5386 xor r13d,r14d 5387 mov r15d,r8d 5388 rorx r12d,r8d,22 5389 lea edx,[r13*1+rdx] 5390 xor r15d,r9d 5391 rorx r14d,r8d,13 5392 rorx r13d,r8d,2 5393 lea r11d,[rdx*1+r11] 5394 and edi,r15d 5395 xor r14d,r12d 5396 xor edi,r9d 5397 xor r14d,r13d 5398 lea edx,[rdi*1+rdx] 5399 mov r12d,eax 5400 add ecx,DWORD[((36+16))+rbp] 5401 and r12d,r11d 5402 rorx r13d,r11d,25 5403 rorx edi,r11d,11 5404 lea edx,[r14*1+rdx] 5405 lea ecx,[r12*1+rcx] 5406 andn r12d,r11d,ebx 5407 xor r13d,edi 5408 rorx r14d,r11d,6 5409 lea ecx,[r12*1+rcx] 5410 xor r13d,r14d 5411 mov edi,edx 5412 rorx r12d,edx,22 5413 lea ecx,[r13*1+rcx] 5414 xor edi,r8d 5415 rorx r14d,edx,13 5416 rorx r13d,edx,2 5417 lea r10d,[rcx*1+r10] 5418 and r15d,edi 5419 xor r14d,r12d 5420 xor r15d,r8d 5421 xor r14d,r13d 5422 lea ecx,[r15*1+rcx] 5423 mov r12d,r11d 5424 add ebx,DWORD[((40+16))+rbp] 5425 and r12d,r10d 5426 rorx r13d,r10d,25 5427 rorx r15d,r10d,11 5428 lea ecx,[r14*1+rcx] 5429 lea ebx,[r12*1+rbx] 5430 andn r12d,r10d,eax 5431 xor r13d,r15d 5432 rorx r14d,r10d,6 5433 lea ebx,[r12*1+rbx] 5434 xor r13d,r14d 5435 mov r15d,ecx 5436 rorx r12d,ecx,22 5437 lea ebx,[r13*1+rbx] 5438 xor r15d,edx 5439 rorx r14d,ecx,13 5440 rorx r13d,ecx,2 5441 lea r9d,[rbx*1+r9] 5442 and edi,r15d 5443 xor r14d,r12d 5444 xor edi,edx 5445 xor r14d,r13d 5446 lea ebx,[rdi*1+rbx] 5447 mov r12d,r10d 5448 add eax,DWORD[((44+16))+rbp] 5449 and r12d,r9d 5450 rorx r13d,r9d,25 5451 rorx edi,r9d,11 5452 lea ebx,[r14*1+rbx] 5453 lea eax,[r12*1+rax] 5454 andn r12d,r9d,r11d 5455 xor r13d,edi 5456 rorx r14d,r9d,6 5457 lea eax,[r12*1+rax] 5458 xor r13d,r14d 5459 mov edi,ebx 5460 rorx r12d,ebx,22 5461 lea eax,[r13*1+rax] 5462 xor edi,ecx 5463 rorx r14d,ebx,13 5464 rorx r13d,ebx,2 5465 lea r8d,[rax*1+r8] 5466 and r15d,edi 5467 xor r14d,r12d 5468 xor r15d,ecx 5469 xor r14d,r13d 5470 lea eax,[r15*1+rax] 5471 mov r12d,r9d 5472 lea rbp,[((-64))+rbp] 5473 cmp rbp,rsp 5474 jae NEAR $L$ower_avx2 5475 5476 mov rdi,QWORD[512+rsp] 5477 add eax,r14d 5478 5479 lea rsp,[448+rsp] 5480 5481 5482 5483 add eax,DWORD[rdi] 5484 add ebx,DWORD[4+rdi] 5485 add ecx,DWORD[8+rdi] 5486 add edx,DWORD[12+rdi] 5487 add r8d,DWORD[16+rdi] 5488 add r9d,DWORD[20+rdi] 5489 lea rsi,[128+rsi] 5490 add r10d,DWORD[24+rdi] 5491 mov r12,rsi 5492 add r11d,DWORD[28+rdi] 5493 cmp rsi,QWORD[((64+16))+rsp] 5494 5495 mov DWORD[rdi],eax 5496 cmove r12,rsp 5497 mov DWORD[4+rdi],ebx 5498 mov DWORD[8+rdi],ecx 5499 mov DWORD[12+rdi],edx 5500 mov DWORD[16+rdi],r8d 5501 mov DWORD[20+rdi],r9d 5502 mov DWORD[24+rdi],r10d 5503 mov DWORD[28+rdi],r11d 5504 5505 jbe NEAR $L$oop_avx2 5506 lea rbp,[rsp] 5507 5508 5509 5510 5511 $L$done_avx2: 5512 mov rsi,QWORD[88+rbp] 5513 5514 vzeroupper 5515 movaps xmm6,XMMWORD[((64+32))+rbp] 5516 movaps xmm7,XMMWORD[((64+48))+rbp] 5517 movaps xmm8,XMMWORD[((64+64))+rbp] 5518 movaps xmm9,XMMWORD[((64+80))+rbp] 5519 mov r15,QWORD[((-48))+rsi] 5520 5521 mov r14,QWORD[((-40))+rsi] 5522 5523 mov r13,QWORD[((-32))+rsi] 5524 5525 mov r12,QWORD[((-24))+rsi] 5526 5527 mov rbp,QWORD[((-16))+rsi] 5528 5529 mov rbx,QWORD[((-8))+rsi] 5530 5531 lea rsp,[rsi] 5532 5533 $L$epilogue_avx2: 5534 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 5535 mov rsi,QWORD[16+rsp] 5536 DB 0F3h,0C3h ;repret 5537 5538 $L$SEH_end_sha256_block_data_order_avx2: 3152 5539 EXTERN __imp_RtlVirtualUnwind 3153 5540 … … 3182 5569 cmp rbx,r10 3183 5570 jae NEAR $L$in_prologue 5571 lea r10,[$L$avx2_shortcut] 5572 cmp rbx,r10 5573 jb NEAR $L$not_in_avx2 5574 5575 and rax,-256*4 5576 add rax,448 5577 $L$not_in_avx2: 3184 5578 mov rsi,rax 3185 5579 mov rax,QWORD[((64+24))+rax] … … 3289 5683 DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase 3290 5684 DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase 5685 DD $L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase 5686 DD $L$SEH_end_sha256_block_data_order_avx wrt ..imagebase 5687 DD $L$SEH_info_sha256_block_data_order_avx wrt ..imagebase 5688 DD $L$SEH_begin_sha256_block_data_order_avx2 wrt ..imagebase 5689 DD $L$SEH_end_sha256_block_data_order_avx2 wrt ..imagebase 5690 DD $L$SEH_info_sha256_block_data_order_avx2 wrt ..imagebase 3291 5691 section .xdata rdata align=8 3292 5692 ALIGN 8 … … 3302 5702 DD se_handler wrt ..imagebase 3303 5703 DD $L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase 5704 $L$SEH_info_sha256_block_data_order_avx: 5705 DB 9,0,0,0 5706 DD se_handler wrt ..imagebase 5707 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase 5708 $L$SEH_info_sha256_block_data_order_avx2: 5709 DB 9,0,0,0 5710 DD se_handler wrt ..imagebase 5711 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/sha512-x86_64.S
r97373 r99371 21 21 22 22 23 lea r11,[OPENSSL_ia32cap_P] 24 mov r9d,DWORD[r11] 25 mov r10d,DWORD[4+r11] 26 mov r11d,DWORD[8+r11] 27 test r10d,2048 28 jnz NEAR $L$xop_shortcut 29 and r11d,296 30 cmp r11d,296 31 je NEAR $L$avx2_shortcut 32 and r9d,1073741824 33 and r10d,268435968 34 or r10d,r9d 35 cmp r10d,1342177792 36 je NEAR $L$avx_shortcut 23 37 mov rax,rsp 24 38 … … 1820 1834 DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 1821 1835 DB 111,114,103,62,0 1836 1837 ALIGN 64 1838 sha512_block_data_order_xop: 1839 mov QWORD[8+rsp],rdi ;WIN64 prologue 1840 mov QWORD[16+rsp],rsi 1841 mov rax,rsp 1842 $L$SEH_begin_sha512_block_data_order_xop: 1843 mov rdi,rcx 1844 mov rsi,rdx 1845 mov rdx,r8 1846 1847 1848 1849 $L$xop_shortcut: 1850 mov rax,rsp 1851 1852 push rbx 1853 1854 push rbp 1855 1856 push r12 1857 1858 push r13 1859 1860 push r14 1861 1862 push r15 1863 1864 shl rdx,4 1865 sub rsp,256 1866 lea rdx,[rdx*8+rsi] 1867 and rsp,-64 1868 mov QWORD[((128+0))+rsp],rdi 1869 mov QWORD[((128+8))+rsp],rsi 1870 mov QWORD[((128+16))+rsp],rdx 1871 mov QWORD[152+rsp],rax 1872 1873 movaps XMMWORD[(128+32)+rsp],xmm6 1874 movaps XMMWORD[(128+48)+rsp],xmm7 1875 movaps XMMWORD[(128+64)+rsp],xmm8 1876 movaps XMMWORD[(128+80)+rsp],xmm9 1877 movaps XMMWORD[(128+96)+rsp],xmm10 1878 movaps XMMWORD[(128+112)+rsp],xmm11 1879 $L$prologue_xop: 1880 1881 vzeroupper 1882 mov rax,QWORD[rdi] 1883 mov rbx,QWORD[8+rdi] 1884 mov rcx,QWORD[16+rdi] 1885 mov rdx,QWORD[24+rdi] 1886 mov r8,QWORD[32+rdi] 1887 mov r9,QWORD[40+rdi] 1888 mov r10,QWORD[48+rdi] 1889 mov r11,QWORD[56+rdi] 1890 jmp NEAR $L$loop_xop 1891 ALIGN 16 1892 $L$loop_xop: 1893 vmovdqa xmm11,XMMWORD[((K512+1280))] 1894 vmovdqu xmm0,XMMWORD[rsi] 1895 lea rbp,[((K512+128))] 1896 vmovdqu xmm1,XMMWORD[16+rsi] 1897 vmovdqu xmm2,XMMWORD[32+rsi] 1898 vpshufb xmm0,xmm0,xmm11 1899 vmovdqu xmm3,XMMWORD[48+rsi] 1900 vpshufb xmm1,xmm1,xmm11 1901 vmovdqu xmm4,XMMWORD[64+rsi] 1902 vpshufb xmm2,xmm2,xmm11 1903 vmovdqu xmm5,XMMWORD[80+rsi] 1904 vpshufb xmm3,xmm3,xmm11 1905 vmovdqu xmm6,XMMWORD[96+rsi] 1906 vpshufb xmm4,xmm4,xmm11 1907 vmovdqu xmm7,XMMWORD[112+rsi] 1908 vpshufb xmm5,xmm5,xmm11 1909 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp] 1910 vpshufb xmm6,xmm6,xmm11 1911 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp] 1912 vpshufb xmm7,xmm7,xmm11 1913 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] 1914 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp] 1915 vmovdqa XMMWORD[rsp],xmm8 1916 vpaddq xmm8,xmm4,XMMWORD[rbp] 1917 vmovdqa XMMWORD[16+rsp],xmm9 1918 vpaddq xmm9,xmm5,XMMWORD[32+rbp] 1919 vmovdqa XMMWORD[32+rsp],xmm10 1920 vpaddq xmm10,xmm6,XMMWORD[64+rbp] 1921 vmovdqa XMMWORD[48+rsp],xmm11 1922 vpaddq xmm11,xmm7,XMMWORD[96+rbp] 1923 vmovdqa XMMWORD[64+rsp],xmm8 1924 mov r14,rax 1925 vmovdqa XMMWORD[80+rsp],xmm9 1926 mov rdi,rbx 1927 vmovdqa XMMWORD[96+rsp],xmm10 1928 xor rdi,rcx 1929 vmovdqa XMMWORD[112+rsp],xmm11 1930 mov r13,r8 1931 jmp NEAR $L$xop_00_47 1932 1933 ALIGN 16 1934 $L$xop_00_47: 1935 add rbp,256 1936 vpalignr xmm8,xmm1,xmm0,8 1937 ror r13,23 1938 mov rax,r14 1939 vpalignr xmm11,xmm5,xmm4,8 1940 mov r12,r9 1941 ror r14,5 1942 DB 143,72,120,195,200,56 1943 xor r13,r8 1944 xor r12,r10 1945 vpsrlq xmm8,xmm8,7 1946 ror r13,4 1947 xor r14,rax 1948 vpaddq xmm0,xmm0,xmm11 1949 and r12,r8 1950 xor r13,r8 1951 add r11,QWORD[rsp] 1952 mov r15,rax 1953 DB 143,72,120,195,209,7 1954 xor r12,r10 1955 ror r14,6 1956 vpxor xmm8,xmm8,xmm9 1957 xor r15,rbx 1958 add r11,r12 1959 ror r13,14 1960 and rdi,r15 1961 DB 143,104,120,195,223,3 1962 xor r14,rax 1963 add r11,r13 1964 vpxor xmm8,xmm8,xmm10 1965 xor rdi,rbx 1966 ror r14,28 1967 vpsrlq xmm10,xmm7,6 1968 add rdx,r11 1969 add r11,rdi 1970 vpaddq xmm0,xmm0,xmm8 1971 mov r13,rdx 1972 add r14,r11 1973 DB 143,72,120,195,203,42 1974 ror r13,23 1975 mov r11,r14 1976 vpxor xmm11,xmm11,xmm10 1977 mov r12,r8 1978 ror r14,5 1979 xor r13,rdx 1980 xor r12,r9 1981 vpxor xmm11,xmm11,xmm9 1982 ror r13,4 1983 xor r14,r11 1984 and r12,rdx 1985 xor r13,rdx 1986 vpaddq xmm0,xmm0,xmm11 1987 add r10,QWORD[8+rsp] 1988 mov rdi,r11 1989 xor r12,r9 1990 ror r14,6 1991 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp] 1992 xor rdi,rax 1993 add r10,r12 1994 ror r13,14 1995 and r15,rdi 1996 xor r14,r11 1997 add r10,r13 1998 xor r15,rax 1999 ror r14,28 2000 add rcx,r10 2001 add r10,r15 2002 mov r13,rcx 2003 add r14,r10 2004 vmovdqa XMMWORD[rsp],xmm10 2005 vpalignr xmm8,xmm2,xmm1,8 2006 ror r13,23 2007 mov r10,r14 2008 vpalignr xmm11,xmm6,xmm5,8 2009 mov r12,rdx 2010 ror r14,5 2011 DB 143,72,120,195,200,56 2012 xor r13,rcx 2013 xor r12,r8 2014 vpsrlq xmm8,xmm8,7 2015 ror r13,4 2016 xor r14,r10 2017 vpaddq xmm1,xmm1,xmm11 2018 and r12,rcx 2019 xor r13,rcx 2020 add r9,QWORD[16+rsp] 2021 mov r15,r10 2022 DB 143,72,120,195,209,7 2023 xor r12,r8 2024 ror r14,6 2025 vpxor xmm8,xmm8,xmm9 2026 xor r15,r11 2027 add r9,r12 2028 ror r13,14 2029 and rdi,r15 2030 DB 143,104,120,195,216,3 2031 xor r14,r10 2032 add r9,r13 2033 vpxor xmm8,xmm8,xmm10 2034 xor rdi,r11 2035 ror r14,28 2036 vpsrlq xmm10,xmm0,6 2037 add rbx,r9 2038 add r9,rdi 2039 vpaddq xmm1,xmm1,xmm8 2040 mov r13,rbx 2041 add r14,r9 2042 DB 143,72,120,195,203,42 2043 ror r13,23 2044 mov r9,r14 2045 vpxor xmm11,xmm11,xmm10 2046 mov r12,rcx 2047 ror r14,5 2048 xor r13,rbx 2049 xor r12,rdx 2050 vpxor xmm11,xmm11,xmm9 2051 ror r13,4 2052 xor r14,r9 2053 and r12,rbx 2054 xor r13,rbx 2055 vpaddq xmm1,xmm1,xmm11 2056 add r8,QWORD[24+rsp] 2057 mov rdi,r9 2058 xor r12,rdx 2059 ror r14,6 2060 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp] 2061 xor rdi,r10 2062 add r8,r12 2063 ror r13,14 2064 and r15,rdi 2065 xor r14,r9 2066 add r8,r13 2067 xor r15,r10 2068 ror r14,28 2069 add rax,r8 2070 add r8,r15 2071 mov r13,rax 2072 add r14,r8 2073 vmovdqa XMMWORD[16+rsp],xmm10 2074 vpalignr xmm8,xmm3,xmm2,8 2075 ror r13,23 2076 mov r8,r14 2077 vpalignr xmm11,xmm7,xmm6,8 2078 mov r12,rbx 2079 ror r14,5 2080 DB 143,72,120,195,200,56 2081 xor r13,rax 2082 xor r12,rcx 2083 vpsrlq xmm8,xmm8,7 2084 ror r13,4 2085 xor r14,r8 2086 vpaddq xmm2,xmm2,xmm11 2087 and r12,rax 2088 xor r13,rax 2089 add rdx,QWORD[32+rsp] 2090 mov r15,r8 2091 DB 143,72,120,195,209,7 2092 xor r12,rcx 2093 ror r14,6 2094 vpxor xmm8,xmm8,xmm9 2095 xor r15,r9 2096 add rdx,r12 2097 ror r13,14 2098 and rdi,r15 2099 DB 143,104,120,195,217,3 2100 xor r14,r8 2101 add rdx,r13 2102 vpxor xmm8,xmm8,xmm10 2103 xor rdi,r9 2104 ror r14,28 2105 vpsrlq xmm10,xmm1,6 2106 add r11,rdx 2107 add rdx,rdi 2108 vpaddq xmm2,xmm2,xmm8 2109 mov r13,r11 2110 add r14,rdx 2111 DB 143,72,120,195,203,42 2112 ror r13,23 2113 mov rdx,r14 2114 vpxor xmm11,xmm11,xmm10 2115 mov r12,rax 2116 ror r14,5 2117 xor r13,r11 2118 xor r12,rbx 2119 vpxor xmm11,xmm11,xmm9 2120 ror r13,4 2121 xor r14,rdx 2122 and r12,r11 2123 xor r13,r11 2124 vpaddq xmm2,xmm2,xmm11 2125 add rcx,QWORD[40+rsp] 2126 mov rdi,rdx 2127 xor r12,rbx 2128 ror r14,6 2129 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] 2130 xor rdi,r8 2131 add rcx,r12 2132 ror r13,14 2133 and r15,rdi 2134 xor r14,rdx 2135 add rcx,r13 2136 xor r15,r8 2137 ror r14,28 2138 add r10,rcx 2139 add rcx,r15 2140 mov r13,r10 2141 add r14,rcx 2142 vmovdqa XMMWORD[32+rsp],xmm10 2143 vpalignr xmm8,xmm4,xmm3,8 2144 ror r13,23 2145 mov rcx,r14 2146 vpalignr xmm11,xmm0,xmm7,8 2147 mov r12,r11 2148 ror r14,5 2149 DB 143,72,120,195,200,56 2150 xor r13,r10 2151 xor r12,rax 2152 vpsrlq xmm8,xmm8,7 2153 ror r13,4 2154 xor r14,rcx 2155 vpaddq xmm3,xmm3,xmm11 2156 and r12,r10 2157 xor r13,r10 2158 add rbx,QWORD[48+rsp] 2159 mov r15,rcx 2160 DB 143,72,120,195,209,7 2161 xor r12,rax 2162 ror r14,6 2163 vpxor xmm8,xmm8,xmm9 2164 xor r15,rdx 2165 add rbx,r12 2166 ror r13,14 2167 and rdi,r15 2168 DB 143,104,120,195,218,3 2169 xor r14,rcx 2170 add rbx,r13 2171 vpxor xmm8,xmm8,xmm10 2172 xor rdi,rdx 2173 ror r14,28 2174 vpsrlq xmm10,xmm2,6 2175 add r9,rbx 2176 add rbx,rdi 2177 vpaddq xmm3,xmm3,xmm8 2178 mov r13,r9 2179 add r14,rbx 2180 DB 143,72,120,195,203,42 2181 ror r13,23 2182 mov rbx,r14 2183 vpxor xmm11,xmm11,xmm10 2184 mov r12,r10 2185 ror r14,5 2186 xor r13,r9 2187 xor r12,r11 2188 vpxor xmm11,xmm11,xmm9 2189 ror r13,4 2190 xor r14,rbx 2191 and r12,r9 2192 xor r13,r9 2193 vpaddq xmm3,xmm3,xmm11 2194 add rax,QWORD[56+rsp] 2195 mov rdi,rbx 2196 xor r12,r11 2197 ror r14,6 2198 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp] 2199 xor rdi,rcx 2200 add rax,r12 2201 ror r13,14 2202 and r15,rdi 2203 xor r14,rbx 2204 add rax,r13 2205 xor r15,rcx 2206 ror r14,28 2207 add r8,rax 2208 add rax,r15 2209 mov r13,r8 2210 add r14,rax 2211 vmovdqa XMMWORD[48+rsp],xmm10 2212 vpalignr xmm8,xmm5,xmm4,8 2213 ror r13,23 2214 mov rax,r14 2215 vpalignr xmm11,xmm1,xmm0,8 2216 mov r12,r9 2217 ror r14,5 2218 DB 143,72,120,195,200,56 2219 xor r13,r8 2220 xor r12,r10 2221 vpsrlq xmm8,xmm8,7 2222 ror r13,4 2223 xor r14,rax 2224 vpaddq xmm4,xmm4,xmm11 2225 and r12,r8 2226 xor r13,r8 2227 add r11,QWORD[64+rsp] 2228 mov r15,rax 2229 DB 143,72,120,195,209,7 2230 xor r12,r10 2231 ror r14,6 2232 vpxor xmm8,xmm8,xmm9 2233 xor r15,rbx 2234 add r11,r12 2235 ror r13,14 2236 and rdi,r15 2237 DB 143,104,120,195,219,3 2238 xor r14,rax 2239 add r11,r13 2240 vpxor xmm8,xmm8,xmm10 2241 xor rdi,rbx 2242 ror r14,28 2243 vpsrlq xmm10,xmm3,6 2244 add rdx,r11 2245 add r11,rdi 2246 vpaddq xmm4,xmm4,xmm8 2247 mov r13,rdx 2248 add r14,r11 2249 DB 143,72,120,195,203,42 2250 ror r13,23 2251 mov r11,r14 2252 vpxor xmm11,xmm11,xmm10 2253 mov r12,r8 2254 ror r14,5 2255 xor r13,rdx 2256 xor r12,r9 2257 vpxor xmm11,xmm11,xmm9 2258 ror r13,4 2259 xor r14,r11 2260 and r12,rdx 2261 xor r13,rdx 2262 vpaddq xmm4,xmm4,xmm11 2263 add r10,QWORD[72+rsp] 2264 mov rdi,r11 2265 xor r12,r9 2266 ror r14,6 2267 vpaddq xmm10,xmm4,XMMWORD[rbp] 2268 xor rdi,rax 2269 add r10,r12 2270 ror r13,14 2271 and r15,rdi 2272 xor r14,r11 2273 add r10,r13 2274 xor r15,rax 2275 ror r14,28 2276 add rcx,r10 2277 add r10,r15 2278 mov r13,rcx 2279 add r14,r10 2280 vmovdqa XMMWORD[64+rsp],xmm10 2281 vpalignr xmm8,xmm6,xmm5,8 2282 ror r13,23 2283 mov r10,r14 2284 vpalignr xmm11,xmm2,xmm1,8 2285 mov r12,rdx 2286 ror r14,5 2287 DB 143,72,120,195,200,56 2288 xor r13,rcx 2289 xor r12,r8 2290 vpsrlq xmm8,xmm8,7 2291 ror r13,4 2292 xor r14,r10 2293 vpaddq xmm5,xmm5,xmm11 2294 and r12,rcx 2295 xor r13,rcx 2296 add r9,QWORD[80+rsp] 2297 mov r15,r10 2298 DB 143,72,120,195,209,7 2299 xor r12,r8 2300 ror r14,6 2301 vpxor xmm8,xmm8,xmm9 2302 xor r15,r11 2303 add r9,r12 2304 ror r13,14 2305 and rdi,r15 2306 DB 143,104,120,195,220,3 2307 xor r14,r10 2308 add r9,r13 2309 vpxor xmm8,xmm8,xmm10 2310 xor rdi,r11 2311 ror r14,28 2312 vpsrlq xmm10,xmm4,6 2313 add rbx,r9 2314 add r9,rdi 2315 vpaddq xmm5,xmm5,xmm8 2316 mov r13,rbx 2317 add r14,r9 2318 DB 143,72,120,195,203,42 2319 ror r13,23 2320 mov r9,r14 2321 vpxor xmm11,xmm11,xmm10 2322 mov r12,rcx 2323 ror r14,5 2324 xor r13,rbx 2325 xor r12,rdx 2326 vpxor xmm11,xmm11,xmm9 2327 ror r13,4 2328 xor r14,r9 2329 and r12,rbx 2330 xor r13,rbx 2331 vpaddq xmm5,xmm5,xmm11 2332 add r8,QWORD[88+rsp] 2333 mov rdi,r9 2334 xor r12,rdx 2335 ror r14,6 2336 vpaddq xmm10,xmm5,XMMWORD[32+rbp] 2337 xor rdi,r10 2338 add r8,r12 2339 ror r13,14 2340 and r15,rdi 2341 xor r14,r9 2342 add r8,r13 2343 xor r15,r10 2344 ror r14,28 2345 add rax,r8 2346 add r8,r15 2347 mov r13,rax 2348 add r14,r8 2349 vmovdqa XMMWORD[80+rsp],xmm10 2350 vpalignr xmm8,xmm7,xmm6,8 2351 ror r13,23 2352 mov r8,r14 2353 vpalignr xmm11,xmm3,xmm2,8 2354 mov r12,rbx 2355 ror r14,5 2356 DB 143,72,120,195,200,56 2357 xor r13,rax 2358 xor r12,rcx 2359 vpsrlq xmm8,xmm8,7 2360 ror r13,4 2361 xor r14,r8 2362 vpaddq xmm6,xmm6,xmm11 2363 and r12,rax 2364 xor r13,rax 2365 add rdx,QWORD[96+rsp] 2366 mov r15,r8 2367 DB 143,72,120,195,209,7 2368 xor r12,rcx 2369 ror r14,6 2370 vpxor xmm8,xmm8,xmm9 2371 xor r15,r9 2372 add rdx,r12 2373 ror r13,14 2374 and rdi,r15 2375 DB 143,104,120,195,221,3 2376 xor r14,r8 2377 add rdx,r13 2378 vpxor xmm8,xmm8,xmm10 2379 xor rdi,r9 2380 ror r14,28 2381 vpsrlq xmm10,xmm5,6 2382 add r11,rdx 2383 add rdx,rdi 2384 vpaddq xmm6,xmm6,xmm8 2385 mov r13,r11 2386 add r14,rdx 2387 DB 143,72,120,195,203,42 2388 ror r13,23 2389 mov rdx,r14 2390 vpxor xmm11,xmm11,xmm10 2391 mov r12,rax 2392 ror r14,5 2393 xor r13,r11 2394 xor r12,rbx 2395 vpxor xmm11,xmm11,xmm9 2396 ror r13,4 2397 xor r14,rdx 2398 and r12,r11 2399 xor r13,r11 2400 vpaddq xmm6,xmm6,xmm11 2401 add rcx,QWORD[104+rsp] 2402 mov rdi,rdx 2403 xor r12,rbx 2404 ror r14,6 2405 vpaddq xmm10,xmm6,XMMWORD[64+rbp] 2406 xor rdi,r8 2407 add rcx,r12 2408 ror r13,14 2409 and r15,rdi 2410 xor r14,rdx 2411 add rcx,r13 2412 xor r15,r8 2413 ror r14,28 2414 add r10,rcx 2415 add rcx,r15 2416 mov r13,r10 2417 add r14,rcx 2418 vmovdqa XMMWORD[96+rsp],xmm10 2419 vpalignr xmm8,xmm0,xmm7,8 2420 ror r13,23 2421 mov rcx,r14 2422 vpalignr xmm11,xmm4,xmm3,8 2423 mov r12,r11 2424 ror r14,5 2425 DB 143,72,120,195,200,56 2426 xor r13,r10 2427 xor r12,rax 2428 vpsrlq xmm8,xmm8,7 2429 ror r13,4 2430 xor r14,rcx 2431 vpaddq xmm7,xmm7,xmm11 2432 and r12,r10 2433 xor r13,r10 2434 add rbx,QWORD[112+rsp] 2435 mov r15,rcx 2436 DB 143,72,120,195,209,7 2437 xor r12,rax 2438 ror r14,6 2439 vpxor xmm8,xmm8,xmm9 2440 xor r15,rdx 2441 add rbx,r12 2442 ror r13,14 2443 and rdi,r15 2444 DB 143,104,120,195,222,3 2445 xor r14,rcx 2446 add rbx,r13 2447 vpxor xmm8,xmm8,xmm10 2448 xor rdi,rdx 2449 ror r14,28 2450 vpsrlq xmm10,xmm6,6 2451 add r9,rbx 2452 add rbx,rdi 2453 vpaddq xmm7,xmm7,xmm8 2454 mov r13,r9 2455 add r14,rbx 2456 DB 143,72,120,195,203,42 2457 ror r13,23 2458 mov rbx,r14 2459 vpxor xmm11,xmm11,xmm10 2460 mov r12,r10 2461 ror r14,5 2462 xor r13,r9 2463 xor r12,r11 2464 vpxor xmm11,xmm11,xmm9 2465 ror r13,4 2466 xor r14,rbx 2467 and r12,r9 2468 xor r13,r9 2469 vpaddq xmm7,xmm7,xmm11 2470 add rax,QWORD[120+rsp] 2471 mov rdi,rbx 2472 xor r12,r11 2473 ror r14,6 2474 vpaddq xmm10,xmm7,XMMWORD[96+rbp] 2475 xor rdi,rcx 2476 add rax,r12 2477 ror r13,14 2478 and r15,rdi 2479 xor r14,rbx 2480 add rax,r13 2481 xor r15,rcx 2482 ror r14,28 2483 add r8,rax 2484 add rax,r15 2485 mov r13,r8 2486 add r14,rax 2487 vmovdqa XMMWORD[112+rsp],xmm10 2488 cmp BYTE[135+rbp],0 2489 jne NEAR $L$xop_00_47 2490 ror r13,23 2491 mov rax,r14 2492 mov r12,r9 2493 ror r14,5 2494 xor r13,r8 2495 xor r12,r10 2496 ror r13,4 2497 xor r14,rax 2498 and r12,r8 2499 xor r13,r8 2500 add r11,QWORD[rsp] 2501 mov r15,rax 2502 xor r12,r10 2503 ror r14,6 2504 xor r15,rbx 2505 add r11,r12 2506 ror r13,14 2507 and rdi,r15 2508 xor r14,rax 2509 add r11,r13 2510 xor rdi,rbx 2511 ror r14,28 2512 add rdx,r11 2513 add r11,rdi 2514 mov r13,rdx 2515 add r14,r11 2516 ror r13,23 2517 mov r11,r14 2518 mov r12,r8 2519 ror r14,5 2520 xor r13,rdx 2521 xor r12,r9 2522 ror r13,4 2523 xor r14,r11 2524 and r12,rdx 2525 xor r13,rdx 2526 add r10,QWORD[8+rsp] 2527 mov rdi,r11 2528 xor r12,r9 2529 ror r14,6 2530 xor rdi,rax 2531 add r10,r12 2532 ror r13,14 2533 and r15,rdi 2534 xor r14,r11 2535 add r10,r13 2536 xor r15,rax 2537 ror r14,28 2538 add rcx,r10 2539 add r10,r15 2540 mov r13,rcx 2541 add r14,r10 2542 ror r13,23 2543 mov r10,r14 2544 mov r12,rdx 2545 ror r14,5 2546 xor r13,rcx 2547 xor r12,r8 2548 ror r13,4 2549 xor r14,r10 2550 and r12,rcx 2551 xor r13,rcx 2552 add r9,QWORD[16+rsp] 2553 mov r15,r10 2554 xor r12,r8 2555 ror r14,6 2556 xor r15,r11 2557 add r9,r12 2558 ror r13,14 2559 and rdi,r15 2560 xor r14,r10 2561 add r9,r13 2562 xor rdi,r11 2563 ror r14,28 2564 add rbx,r9 2565 add r9,rdi 2566 mov r13,rbx 2567 add r14,r9 2568 ror r13,23 2569 mov r9,r14 2570 mov r12,rcx 2571 ror r14,5 2572 xor r13,rbx 2573 xor r12,rdx 2574 ror r13,4 2575 xor r14,r9 2576 and r12,rbx 2577 xor r13,rbx 2578 add r8,QWORD[24+rsp] 2579 mov rdi,r9 2580 xor r12,rdx 2581 ror r14,6 2582 xor rdi,r10 2583 add r8,r12 2584 ror r13,14 2585 and r15,rdi 2586 xor r14,r9 2587 add r8,r13 2588 xor r15,r10 2589 ror r14,28 2590 add rax,r8 2591 add r8,r15 2592 mov r13,rax 2593 add r14,r8 2594 ror r13,23 2595 mov r8,r14 2596 mov r12,rbx 2597 ror r14,5 2598 xor r13,rax 2599 xor r12,rcx 2600 ror r13,4 2601 xor r14,r8 2602 and r12,rax 2603 xor r13,rax 2604 add rdx,QWORD[32+rsp] 2605 mov r15,r8 2606 xor r12,rcx 2607 ror r14,6 2608 xor r15,r9 2609 add rdx,r12 2610 ror r13,14 2611 and rdi,r15 2612 xor r14,r8 2613 add rdx,r13 2614 xor rdi,r9 2615 ror r14,28 2616 add r11,rdx 2617 add rdx,rdi 2618 mov r13,r11 2619 add r14,rdx 2620 ror r13,23 2621 mov rdx,r14 2622 mov r12,rax 2623 ror r14,5 2624 xor r13,r11 2625 xor r12,rbx 2626 ror r13,4 2627 xor r14,rdx 2628 and r12,r11 2629 xor r13,r11 2630 add rcx,QWORD[40+rsp] 2631 mov rdi,rdx 2632 xor r12,rbx 2633 ror r14,6 2634 xor rdi,r8 2635 add rcx,r12 2636 ror r13,14 2637 and r15,rdi 2638 xor r14,rdx 2639 add rcx,r13 2640 xor r15,r8 2641 ror r14,28 2642 add r10,rcx 2643 add rcx,r15 2644 mov r13,r10 2645 add r14,rcx 2646 ror r13,23 2647 mov rcx,r14 2648 mov r12,r11 2649 ror r14,5 2650 xor r13,r10 2651 xor r12,rax 2652 ror r13,4 2653 xor r14,rcx 2654 and r12,r10 2655 xor r13,r10 2656 add rbx,QWORD[48+rsp] 2657 mov r15,rcx 2658 xor r12,rax 2659 ror r14,6 2660 xor r15,rdx 2661 add rbx,r12 2662 ror r13,14 2663 and rdi,r15 2664 xor r14,rcx 2665 add rbx,r13 2666 xor rdi,rdx 2667 ror r14,28 2668 add r9,rbx 2669 add rbx,rdi 2670 mov r13,r9 2671 add r14,rbx 2672 ror r13,23 2673 mov rbx,r14 2674 mov r12,r10 2675 ror r14,5 2676 xor r13,r9 2677 xor r12,r11 2678 ror r13,4 2679 xor r14,rbx 2680 and r12,r9 2681 xor r13,r9 2682 add rax,QWORD[56+rsp] 2683 mov rdi,rbx 2684 xor r12,r11 2685 ror r14,6 2686 xor rdi,rcx 2687 add rax,r12 2688 ror r13,14 2689 and r15,rdi 2690 xor r14,rbx 2691 add rax,r13 2692 xor r15,rcx 2693 ror r14,28 2694 add r8,rax 2695 add rax,r15 2696 mov r13,r8 2697 add r14,rax 2698 ror r13,23 2699 mov rax,r14 2700 mov r12,r9 2701 ror r14,5 2702 xor r13,r8 2703 xor r12,r10 2704 ror r13,4 2705 xor r14,rax 2706 and r12,r8 2707 xor r13,r8 2708 add r11,QWORD[64+rsp] 2709 mov r15,rax 2710 xor r12,r10 2711 ror r14,6 2712 xor r15,rbx 2713 add r11,r12 2714 ror r13,14 2715 and rdi,r15 2716 xor r14,rax 2717 add r11,r13 2718 xor rdi,rbx 2719 ror r14,28 2720 add rdx,r11 2721 add r11,rdi 2722 mov r13,rdx 2723 add r14,r11 2724 ror r13,23 2725 mov r11,r14 2726 mov r12,r8 2727 ror r14,5 2728 xor r13,rdx 2729 xor r12,r9 2730 ror r13,4 2731 xor r14,r11 2732 and r12,rdx 2733 xor r13,rdx 2734 add r10,QWORD[72+rsp] 2735 mov rdi,r11 2736 xor r12,r9 2737 ror r14,6 2738 xor rdi,rax 2739 add r10,r12 2740 ror r13,14 2741 and r15,rdi 2742 xor r14,r11 2743 add r10,r13 2744 xor r15,rax 2745 ror r14,28 2746 add rcx,r10 2747 add r10,r15 2748 mov r13,rcx 2749 add r14,r10 2750 ror r13,23 2751 mov r10,r14 2752 mov r12,rdx 2753 ror r14,5 2754 xor r13,rcx 2755 xor r12,r8 2756 ror r13,4 2757 xor r14,r10 2758 and r12,rcx 2759 xor r13,rcx 2760 add r9,QWORD[80+rsp] 2761 mov r15,r10 2762 xor r12,r8 2763 ror r14,6 2764 xor r15,r11 2765 add r9,r12 2766 ror r13,14 2767 and rdi,r15 2768 xor r14,r10 2769 add r9,r13 2770 xor rdi,r11 2771 ror r14,28 2772 add rbx,r9 2773 add r9,rdi 2774 mov r13,rbx 2775 add r14,r9 2776 ror r13,23 2777 mov r9,r14 2778 mov r12,rcx 2779 ror r14,5 2780 xor r13,rbx 2781 xor r12,rdx 2782 ror r13,4 2783 xor r14,r9 2784 and r12,rbx 2785 xor r13,rbx 2786 add r8,QWORD[88+rsp] 2787 mov rdi,r9 2788 xor r12,rdx 2789 ror r14,6 2790 xor rdi,r10 2791 add r8,r12 2792 ror r13,14 2793 and r15,rdi 2794 xor r14,r9 2795 add r8,r13 2796 xor r15,r10 2797 ror r14,28 2798 add rax,r8 2799 add r8,r15 2800 mov r13,rax 2801 add r14,r8 2802 ror r13,23 2803 mov r8,r14 2804 mov r12,rbx 2805 ror r14,5 2806 xor r13,rax 2807 xor r12,rcx 2808 ror r13,4 2809 xor r14,r8 2810 and r12,rax 2811 xor r13,rax 2812 add rdx,QWORD[96+rsp] 2813 mov r15,r8 2814 xor r12,rcx 2815 ror r14,6 2816 xor r15,r9 2817 add rdx,r12 2818 ror r13,14 2819 and rdi,r15 2820 xor r14,r8 2821 add rdx,r13 2822 xor rdi,r9 2823 ror r14,28 2824 add r11,rdx 2825 add rdx,rdi 2826 mov r13,r11 2827 add r14,rdx 2828 ror r13,23 2829 mov rdx,r14 2830 mov r12,rax 2831 ror r14,5 2832 xor r13,r11 2833 xor r12,rbx 2834 ror r13,4 2835 xor r14,rdx 2836 and r12,r11 2837 xor r13,r11 2838 add rcx,QWORD[104+rsp] 2839 mov rdi,rdx 2840 xor r12,rbx 2841 ror r14,6 2842 xor rdi,r8 2843 add rcx,r12 2844 ror r13,14 2845 and r15,rdi 2846 xor r14,rdx 2847 add rcx,r13 2848 xor r15,r8 2849 ror r14,28 2850 add r10,rcx 2851 add rcx,r15 2852 mov r13,r10 2853 add r14,rcx 2854 ror r13,23 2855 mov rcx,r14 2856 mov r12,r11 2857 ror r14,5 2858 xor r13,r10 2859 xor r12,rax 2860 ror r13,4 2861 xor r14,rcx 2862 and r12,r10 2863 xor r13,r10 2864 add rbx,QWORD[112+rsp] 2865 mov r15,rcx 2866 xor r12,rax 2867 ror r14,6 2868 xor r15,rdx 2869 add rbx,r12 2870 ror r13,14 2871 and rdi,r15 2872 xor r14,rcx 2873 add rbx,r13 2874 xor rdi,rdx 2875 ror r14,28 2876 add r9,rbx 2877 add rbx,rdi 2878 mov r13,r9 2879 add r14,rbx 2880 ror r13,23 2881 mov rbx,r14 2882 mov r12,r10 2883 ror r14,5 2884 xor r13,r9 2885 xor r12,r11 2886 ror r13,4 2887 xor r14,rbx 2888 and r12,r9 2889 xor r13,r9 2890 add rax,QWORD[120+rsp] 2891 mov rdi,rbx 2892 xor r12,r11 2893 ror r14,6 2894 xor rdi,rcx 2895 add rax,r12 2896 ror r13,14 2897 and r15,rdi 2898 xor r14,rbx 2899 add rax,r13 2900 xor r15,rcx 2901 ror r14,28 2902 add r8,rax 2903 add rax,r15 2904 mov r13,r8 2905 add r14,rax 2906 mov rdi,QWORD[((128+0))+rsp] 2907 mov rax,r14 2908 2909 add rax,QWORD[rdi] 2910 lea rsi,[128+rsi] 2911 add rbx,QWORD[8+rdi] 2912 add rcx,QWORD[16+rdi] 2913 add rdx,QWORD[24+rdi] 2914 add r8,QWORD[32+rdi] 2915 add r9,QWORD[40+rdi] 2916 add r10,QWORD[48+rdi] 2917 add r11,QWORD[56+rdi] 2918 2919 cmp rsi,QWORD[((128+16))+rsp] 2920 2921 mov QWORD[rdi],rax 2922 mov QWORD[8+rdi],rbx 2923 mov QWORD[16+rdi],rcx 2924 mov QWORD[24+rdi],rdx 2925 mov QWORD[32+rdi],r8 2926 mov QWORD[40+rdi],r9 2927 mov QWORD[48+rdi],r10 2928 mov QWORD[56+rdi],r11 2929 jb NEAR $L$loop_xop 2930 2931 mov rsi,QWORD[152+rsp] 2932 2933 vzeroupper 2934 movaps xmm6,XMMWORD[((128+32))+rsp] 2935 movaps xmm7,XMMWORD[((128+48))+rsp] 2936 movaps xmm8,XMMWORD[((128+64))+rsp] 2937 movaps xmm9,XMMWORD[((128+80))+rsp] 2938 movaps xmm10,XMMWORD[((128+96))+rsp] 2939 movaps xmm11,XMMWORD[((128+112))+rsp] 2940 mov r15,QWORD[((-48))+rsi] 2941 2942 mov r14,QWORD[((-40))+rsi] 2943 2944 mov r13,QWORD[((-32))+rsi] 2945 2946 mov r12,QWORD[((-24))+rsi] 2947 2948 mov rbp,QWORD[((-16))+rsi] 2949 2950 mov rbx,QWORD[((-8))+rsi] 2951 2952 lea rsp,[rsi] 2953 2954 $L$epilogue_xop: 2955 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2956 mov rsi,QWORD[16+rsp] 2957 DB 0F3h,0C3h ;repret 2958 2959 $L$SEH_end_sha512_block_data_order_xop: 2960 2961 ALIGN 64 2962 sha512_block_data_order_avx: 2963 mov QWORD[8+rsp],rdi ;WIN64 prologue 2964 mov QWORD[16+rsp],rsi 2965 mov rax,rsp 2966 $L$SEH_begin_sha512_block_data_order_avx: 2967 mov rdi,rcx 2968 mov rsi,rdx 2969 mov rdx,r8 2970 2971 2972 2973 $L$avx_shortcut: 2974 mov rax,rsp 2975 2976 push rbx 2977 2978 push rbp 2979 2980 push r12 2981 2982 push r13 2983 2984 push r14 2985 2986 push r15 2987 2988 shl rdx,4 2989 sub rsp,256 2990 lea rdx,[rdx*8+rsi] 2991 and rsp,-64 2992 mov QWORD[((128+0))+rsp],rdi 2993 mov QWORD[((128+8))+rsp],rsi 2994 mov QWORD[((128+16))+rsp],rdx 2995 mov QWORD[152+rsp],rax 2996 2997 movaps XMMWORD[(128+32)+rsp],xmm6 2998 movaps XMMWORD[(128+48)+rsp],xmm7 2999 movaps XMMWORD[(128+64)+rsp],xmm8 3000 movaps XMMWORD[(128+80)+rsp],xmm9 3001 movaps XMMWORD[(128+96)+rsp],xmm10 3002 movaps XMMWORD[(128+112)+rsp],xmm11 3003 $L$prologue_avx: 3004 3005 vzeroupper 3006 mov rax,QWORD[rdi] 3007 mov rbx,QWORD[8+rdi] 3008 mov rcx,QWORD[16+rdi] 3009 mov rdx,QWORD[24+rdi] 3010 mov r8,QWORD[32+rdi] 3011 mov r9,QWORD[40+rdi] 3012 mov r10,QWORD[48+rdi] 3013 mov r11,QWORD[56+rdi] 3014 jmp NEAR $L$loop_avx 3015 ALIGN 16 3016 $L$loop_avx: 3017 vmovdqa xmm11,XMMWORD[((K512+1280))] 3018 vmovdqu xmm0,XMMWORD[rsi] 3019 lea rbp,[((K512+128))] 3020 vmovdqu xmm1,XMMWORD[16+rsi] 3021 vmovdqu xmm2,XMMWORD[32+rsi] 3022 vpshufb xmm0,xmm0,xmm11 3023 vmovdqu xmm3,XMMWORD[48+rsi] 3024 vpshufb xmm1,xmm1,xmm11 3025 vmovdqu xmm4,XMMWORD[64+rsi] 3026 vpshufb xmm2,xmm2,xmm11 3027 vmovdqu xmm5,XMMWORD[80+rsi] 3028 vpshufb xmm3,xmm3,xmm11 3029 vmovdqu xmm6,XMMWORD[96+rsi] 3030 vpshufb xmm4,xmm4,xmm11 3031 vmovdqu xmm7,XMMWORD[112+rsi] 3032 vpshufb xmm5,xmm5,xmm11 3033 vpaddq xmm8,xmm0,XMMWORD[((-128))+rbp] 3034 vpshufb xmm6,xmm6,xmm11 3035 vpaddq xmm9,xmm1,XMMWORD[((-96))+rbp] 3036 vpshufb xmm7,xmm7,xmm11 3037 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] 3038 vpaddq xmm11,xmm3,XMMWORD[((-32))+rbp] 3039 vmovdqa XMMWORD[rsp],xmm8 3040 vpaddq xmm8,xmm4,XMMWORD[rbp] 3041 vmovdqa XMMWORD[16+rsp],xmm9 3042 vpaddq xmm9,xmm5,XMMWORD[32+rbp] 3043 vmovdqa XMMWORD[32+rsp],xmm10 3044 vpaddq xmm10,xmm6,XMMWORD[64+rbp] 3045 vmovdqa XMMWORD[48+rsp],xmm11 3046 vpaddq xmm11,xmm7,XMMWORD[96+rbp] 3047 vmovdqa XMMWORD[64+rsp],xmm8 3048 mov r14,rax 3049 vmovdqa XMMWORD[80+rsp],xmm9 3050 mov rdi,rbx 3051 vmovdqa XMMWORD[96+rsp],xmm10 3052 xor rdi,rcx 3053 vmovdqa XMMWORD[112+rsp],xmm11 3054 mov r13,r8 3055 jmp NEAR $L$avx_00_47 3056 3057 ALIGN 16 3058 $L$avx_00_47: 3059 add rbp,256 3060 vpalignr xmm8,xmm1,xmm0,8 3061 shrd r13,r13,23 3062 mov rax,r14 3063 vpalignr xmm11,xmm5,xmm4,8 3064 mov r12,r9 3065 shrd r14,r14,5 3066 vpsrlq xmm10,xmm8,1 3067 xor r13,r8 3068 xor r12,r10 3069 vpaddq xmm0,xmm0,xmm11 3070 shrd r13,r13,4 3071 xor r14,rax 3072 vpsrlq xmm11,xmm8,7 3073 and r12,r8 3074 xor r13,r8 3075 vpsllq xmm9,xmm8,56 3076 add r11,QWORD[rsp] 3077 mov r15,rax 3078 vpxor xmm8,xmm11,xmm10 3079 xor r12,r10 3080 shrd r14,r14,6 3081 vpsrlq xmm10,xmm10,7 3082 xor r15,rbx 3083 add r11,r12 3084 vpxor xmm8,xmm8,xmm9 3085 shrd r13,r13,14 3086 and rdi,r15 3087 vpsllq xmm9,xmm9,7 3088 xor r14,rax 3089 add r11,r13 3090 vpxor xmm8,xmm8,xmm10 3091 xor rdi,rbx 3092 shrd r14,r14,28 3093 vpsrlq xmm11,xmm7,6 3094 add rdx,r11 3095 add r11,rdi 3096 vpxor xmm8,xmm8,xmm9 3097 mov r13,rdx 3098 add r14,r11 3099 vpsllq xmm10,xmm7,3 3100 shrd r13,r13,23 3101 mov r11,r14 3102 vpaddq xmm0,xmm0,xmm8 3103 mov r12,r8 3104 shrd r14,r14,5 3105 vpsrlq xmm9,xmm7,19 3106 xor r13,rdx 3107 xor r12,r9 3108 vpxor xmm11,xmm11,xmm10 3109 shrd r13,r13,4 3110 xor r14,r11 3111 vpsllq xmm10,xmm10,42 3112 and r12,rdx 3113 xor r13,rdx 3114 vpxor xmm11,xmm11,xmm9 3115 add r10,QWORD[8+rsp] 3116 mov rdi,r11 3117 vpsrlq xmm9,xmm9,42 3118 xor r12,r9 3119 shrd r14,r14,6 3120 vpxor xmm11,xmm11,xmm10 3121 xor rdi,rax 3122 add r10,r12 3123 vpxor xmm11,xmm11,xmm9 3124 shrd r13,r13,14 3125 and r15,rdi 3126 vpaddq xmm0,xmm0,xmm11 3127 xor r14,r11 3128 add r10,r13 3129 vpaddq xmm10,xmm0,XMMWORD[((-128))+rbp] 3130 xor r15,rax 3131 shrd r14,r14,28 3132 add rcx,r10 3133 add r10,r15 3134 mov r13,rcx 3135 add r14,r10 3136 vmovdqa XMMWORD[rsp],xmm10 3137 vpalignr xmm8,xmm2,xmm1,8 3138 shrd r13,r13,23 3139 mov r10,r14 3140 vpalignr xmm11,xmm6,xmm5,8 3141 mov r12,rdx 3142 shrd r14,r14,5 3143 vpsrlq xmm10,xmm8,1 3144 xor r13,rcx 3145 xor r12,r8 3146 vpaddq xmm1,xmm1,xmm11 3147 shrd r13,r13,4 3148 xor r14,r10 3149 vpsrlq xmm11,xmm8,7 3150 and r12,rcx 3151 xor r13,rcx 3152 vpsllq xmm9,xmm8,56 3153 add r9,QWORD[16+rsp] 3154 mov r15,r10 3155 vpxor xmm8,xmm11,xmm10 3156 xor r12,r8 3157 shrd r14,r14,6 3158 vpsrlq xmm10,xmm10,7 3159 xor r15,r11 3160 add r9,r12 3161 vpxor xmm8,xmm8,xmm9 3162 shrd r13,r13,14 3163 and rdi,r15 3164 vpsllq xmm9,xmm9,7 3165 xor r14,r10 3166 add r9,r13 3167 vpxor xmm8,xmm8,xmm10 3168 xor rdi,r11 3169 shrd r14,r14,28 3170 vpsrlq xmm11,xmm0,6 3171 add rbx,r9 3172 add r9,rdi 3173 vpxor xmm8,xmm8,xmm9 3174 mov r13,rbx 3175 add r14,r9 3176 vpsllq xmm10,xmm0,3 3177 shrd r13,r13,23 3178 mov r9,r14 3179 vpaddq xmm1,xmm1,xmm8 3180 mov r12,rcx 3181 shrd r14,r14,5 3182 vpsrlq xmm9,xmm0,19 3183 xor r13,rbx 3184 xor r12,rdx 3185 vpxor xmm11,xmm11,xmm10 3186 shrd r13,r13,4 3187 xor r14,r9 3188 vpsllq xmm10,xmm10,42 3189 and r12,rbx 3190 xor r13,rbx 3191 vpxor xmm11,xmm11,xmm9 3192 add r8,QWORD[24+rsp] 3193 mov rdi,r9 3194 vpsrlq xmm9,xmm9,42 3195 xor r12,rdx 3196 shrd r14,r14,6 3197 vpxor xmm11,xmm11,xmm10 3198 xor rdi,r10 3199 add r8,r12 3200 vpxor xmm11,xmm11,xmm9 3201 shrd r13,r13,14 3202 and r15,rdi 3203 vpaddq xmm1,xmm1,xmm11 3204 xor r14,r9 3205 add r8,r13 3206 vpaddq xmm10,xmm1,XMMWORD[((-96))+rbp] 3207 xor r15,r10 3208 shrd r14,r14,28 3209 add rax,r8 3210 add r8,r15 3211 mov r13,rax 3212 add r14,r8 3213 vmovdqa XMMWORD[16+rsp],xmm10 3214 vpalignr xmm8,xmm3,xmm2,8 3215 shrd r13,r13,23 3216 mov r8,r14 3217 vpalignr xmm11,xmm7,xmm6,8 3218 mov r12,rbx 3219 shrd r14,r14,5 3220 vpsrlq xmm10,xmm8,1 3221 xor r13,rax 3222 xor r12,rcx 3223 vpaddq xmm2,xmm2,xmm11 3224 shrd r13,r13,4 3225 xor r14,r8 3226 vpsrlq xmm11,xmm8,7 3227 and r12,rax 3228 xor r13,rax 3229 vpsllq xmm9,xmm8,56 3230 add rdx,QWORD[32+rsp] 3231 mov r15,r8 3232 vpxor xmm8,xmm11,xmm10 3233 xor r12,rcx 3234 shrd r14,r14,6 3235 vpsrlq xmm10,xmm10,7 3236 xor r15,r9 3237 add rdx,r12 3238 vpxor xmm8,xmm8,xmm9 3239 shrd r13,r13,14 3240 and rdi,r15 3241 vpsllq xmm9,xmm9,7 3242 xor r14,r8 3243 add rdx,r13 3244 vpxor xmm8,xmm8,xmm10 3245 xor rdi,r9 3246 shrd r14,r14,28 3247 vpsrlq xmm11,xmm1,6 3248 add r11,rdx 3249 add rdx,rdi 3250 vpxor xmm8,xmm8,xmm9 3251 mov r13,r11 3252 add r14,rdx 3253 vpsllq xmm10,xmm1,3 3254 shrd r13,r13,23 3255 mov rdx,r14 3256 vpaddq xmm2,xmm2,xmm8 3257 mov r12,rax 3258 shrd r14,r14,5 3259 vpsrlq xmm9,xmm1,19 3260 xor r13,r11 3261 xor r12,rbx 3262 vpxor xmm11,xmm11,xmm10 3263 shrd r13,r13,4 3264 xor r14,rdx 3265 vpsllq xmm10,xmm10,42 3266 and r12,r11 3267 xor r13,r11 3268 vpxor xmm11,xmm11,xmm9 3269 add rcx,QWORD[40+rsp] 3270 mov rdi,rdx 3271 vpsrlq xmm9,xmm9,42 3272 xor r12,rbx 3273 shrd r14,r14,6 3274 vpxor xmm11,xmm11,xmm10 3275 xor rdi,r8 3276 add rcx,r12 3277 vpxor xmm11,xmm11,xmm9 3278 shrd r13,r13,14 3279 and r15,rdi 3280 vpaddq xmm2,xmm2,xmm11 3281 xor r14,rdx 3282 add rcx,r13 3283 vpaddq xmm10,xmm2,XMMWORD[((-64))+rbp] 3284 xor r15,r8 3285 shrd r14,r14,28 3286 add r10,rcx 3287 add rcx,r15 3288 mov r13,r10 3289 add r14,rcx 3290 vmovdqa XMMWORD[32+rsp],xmm10 3291 vpalignr xmm8,xmm4,xmm3,8 3292 shrd r13,r13,23 3293 mov rcx,r14 3294 vpalignr xmm11,xmm0,xmm7,8 3295 mov r12,r11 3296 shrd r14,r14,5 3297 vpsrlq xmm10,xmm8,1 3298 xor r13,r10 3299 xor r12,rax 3300 vpaddq xmm3,xmm3,xmm11 3301 shrd r13,r13,4 3302 xor r14,rcx 3303 vpsrlq xmm11,xmm8,7 3304 and r12,r10 3305 xor r13,r10 3306 vpsllq xmm9,xmm8,56 3307 add rbx,QWORD[48+rsp] 3308 mov r15,rcx 3309 vpxor xmm8,xmm11,xmm10 3310 xor r12,rax 3311 shrd r14,r14,6 3312 vpsrlq xmm10,xmm10,7 3313 xor r15,rdx 3314 add rbx,r12 3315 vpxor xmm8,xmm8,xmm9 3316 shrd r13,r13,14 3317 and rdi,r15 3318 vpsllq xmm9,xmm9,7 3319 xor r14,rcx 3320 add rbx,r13 3321 vpxor xmm8,xmm8,xmm10 3322 xor rdi,rdx 3323 shrd r14,r14,28 3324 vpsrlq xmm11,xmm2,6 3325 add r9,rbx 3326 add rbx,rdi 3327 vpxor xmm8,xmm8,xmm9 3328 mov r13,r9 3329 add r14,rbx 3330 vpsllq xmm10,xmm2,3 3331 shrd r13,r13,23 3332 mov rbx,r14 3333 vpaddq xmm3,xmm3,xmm8 3334 mov r12,r10 3335 shrd r14,r14,5 3336 vpsrlq xmm9,xmm2,19 3337 xor r13,r9 3338 xor r12,r11 3339 vpxor xmm11,xmm11,xmm10 3340 shrd r13,r13,4 3341 xor r14,rbx 3342 vpsllq xmm10,xmm10,42 3343 and r12,r9 3344 xor r13,r9 3345 vpxor xmm11,xmm11,xmm9 3346 add rax,QWORD[56+rsp] 3347 mov rdi,rbx 3348 vpsrlq xmm9,xmm9,42 3349 xor r12,r11 3350 shrd r14,r14,6 3351 vpxor xmm11,xmm11,xmm10 3352 xor rdi,rcx 3353 add rax,r12 3354 vpxor xmm11,xmm11,xmm9 3355 shrd r13,r13,14 3356 and r15,rdi 3357 vpaddq xmm3,xmm3,xmm11 3358 xor r14,rbx 3359 add rax,r13 3360 vpaddq xmm10,xmm3,XMMWORD[((-32))+rbp] 3361 xor r15,rcx 3362 shrd r14,r14,28 3363 add r8,rax 3364 add rax,r15 3365 mov r13,r8 3366 add r14,rax 3367 vmovdqa XMMWORD[48+rsp],xmm10 3368 vpalignr xmm8,xmm5,xmm4,8 3369 shrd r13,r13,23 3370 mov rax,r14 3371 vpalignr xmm11,xmm1,xmm0,8 3372 mov r12,r9 3373 shrd r14,r14,5 3374 vpsrlq xmm10,xmm8,1 3375 xor r13,r8 3376 xor r12,r10 3377 vpaddq xmm4,xmm4,xmm11 3378 shrd r13,r13,4 3379 xor r14,rax 3380 vpsrlq xmm11,xmm8,7 3381 and r12,r8 3382 xor r13,r8 3383 vpsllq xmm9,xmm8,56 3384 add r11,QWORD[64+rsp] 3385 mov r15,rax 3386 vpxor xmm8,xmm11,xmm10 3387 xor r12,r10 3388 shrd r14,r14,6 3389 vpsrlq xmm10,xmm10,7 3390 xor r15,rbx 3391 add r11,r12 3392 vpxor xmm8,xmm8,xmm9 3393 shrd r13,r13,14 3394 and rdi,r15 3395 vpsllq xmm9,xmm9,7 3396 xor r14,rax 3397 add r11,r13 3398 vpxor xmm8,xmm8,xmm10 3399 xor rdi,rbx 3400 shrd r14,r14,28 3401 vpsrlq xmm11,xmm3,6 3402 add rdx,r11 3403 add r11,rdi 3404 vpxor xmm8,xmm8,xmm9 3405 mov r13,rdx 3406 add r14,r11 3407 vpsllq xmm10,xmm3,3 3408 shrd r13,r13,23 3409 mov r11,r14 3410 vpaddq xmm4,xmm4,xmm8 3411 mov r12,r8 3412 shrd r14,r14,5 3413 vpsrlq xmm9,xmm3,19 3414 xor r13,rdx 3415 xor r12,r9 3416 vpxor xmm11,xmm11,xmm10 3417 shrd r13,r13,4 3418 xor r14,r11 3419 vpsllq xmm10,xmm10,42 3420 and r12,rdx 3421 xor r13,rdx 3422 vpxor xmm11,xmm11,xmm9 3423 add r10,QWORD[72+rsp] 3424 mov rdi,r11 3425 vpsrlq xmm9,xmm9,42 3426 xor r12,r9 3427 shrd r14,r14,6 3428 vpxor xmm11,xmm11,xmm10 3429 xor rdi,rax 3430 add r10,r12 3431 vpxor xmm11,xmm11,xmm9 3432 shrd r13,r13,14 3433 and r15,rdi 3434 vpaddq xmm4,xmm4,xmm11 3435 xor r14,r11 3436 add r10,r13 3437 vpaddq xmm10,xmm4,XMMWORD[rbp] 3438 xor r15,rax 3439 shrd r14,r14,28 3440 add rcx,r10 3441 add r10,r15 3442 mov r13,rcx 3443 add r14,r10 3444 vmovdqa XMMWORD[64+rsp],xmm10 3445 vpalignr xmm8,xmm6,xmm5,8 3446 shrd r13,r13,23 3447 mov r10,r14 3448 vpalignr xmm11,xmm2,xmm1,8 3449 mov r12,rdx 3450 shrd r14,r14,5 3451 vpsrlq xmm10,xmm8,1 3452 xor r13,rcx 3453 xor r12,r8 3454 vpaddq xmm5,xmm5,xmm11 3455 shrd r13,r13,4 3456 xor r14,r10 3457 vpsrlq xmm11,xmm8,7 3458 and r12,rcx 3459 xor r13,rcx 3460 vpsllq xmm9,xmm8,56 3461 add r9,QWORD[80+rsp] 3462 mov r15,r10 3463 vpxor xmm8,xmm11,xmm10 3464 xor r12,r8 3465 shrd r14,r14,6 3466 vpsrlq xmm10,xmm10,7 3467 xor r15,r11 3468 add r9,r12 3469 vpxor xmm8,xmm8,xmm9 3470 shrd r13,r13,14 3471 and rdi,r15 3472 vpsllq xmm9,xmm9,7 3473 xor r14,r10 3474 add r9,r13 3475 vpxor xmm8,xmm8,xmm10 3476 xor rdi,r11 3477 shrd r14,r14,28 3478 vpsrlq xmm11,xmm4,6 3479 add rbx,r9 3480 add r9,rdi 3481 vpxor xmm8,xmm8,xmm9 3482 mov r13,rbx 3483 add r14,r9 3484 vpsllq xmm10,xmm4,3 3485 shrd r13,r13,23 3486 mov r9,r14 3487 vpaddq xmm5,xmm5,xmm8 3488 mov r12,rcx 3489 shrd r14,r14,5 3490 vpsrlq xmm9,xmm4,19 3491 xor r13,rbx 3492 xor r12,rdx 3493 vpxor xmm11,xmm11,xmm10 3494 shrd r13,r13,4 3495 xor r14,r9 3496 vpsllq xmm10,xmm10,42 3497 and r12,rbx 3498 xor r13,rbx 3499 vpxor xmm11,xmm11,xmm9 3500 add r8,QWORD[88+rsp] 3501 mov rdi,r9 3502 vpsrlq xmm9,xmm9,42 3503 xor r12,rdx 3504 shrd r14,r14,6 3505 vpxor xmm11,xmm11,xmm10 3506 xor rdi,r10 3507 add r8,r12 3508 vpxor xmm11,xmm11,xmm9 3509 shrd r13,r13,14 3510 and r15,rdi 3511 vpaddq xmm5,xmm5,xmm11 3512 xor r14,r9 3513 add r8,r13 3514 vpaddq xmm10,xmm5,XMMWORD[32+rbp] 3515 xor r15,r10 3516 shrd r14,r14,28 3517 add rax,r8 3518 add r8,r15 3519 mov r13,rax 3520 add r14,r8 3521 vmovdqa XMMWORD[80+rsp],xmm10 3522 vpalignr xmm8,xmm7,xmm6,8 3523 shrd r13,r13,23 3524 mov r8,r14 3525 vpalignr xmm11,xmm3,xmm2,8 3526 mov r12,rbx 3527 shrd r14,r14,5 3528 vpsrlq xmm10,xmm8,1 3529 xor r13,rax 3530 xor r12,rcx 3531 vpaddq xmm6,xmm6,xmm11 3532 shrd r13,r13,4 3533 xor r14,r8 3534 vpsrlq xmm11,xmm8,7 3535 and r12,rax 3536 xor r13,rax 3537 vpsllq xmm9,xmm8,56 3538 add rdx,QWORD[96+rsp] 3539 mov r15,r8 3540 vpxor xmm8,xmm11,xmm10 3541 xor r12,rcx 3542 shrd r14,r14,6 3543 vpsrlq xmm10,xmm10,7 3544 xor r15,r9 3545 add rdx,r12 3546 vpxor xmm8,xmm8,xmm9 3547 shrd r13,r13,14 3548 and rdi,r15 3549 vpsllq xmm9,xmm9,7 3550 xor r14,r8 3551 add rdx,r13 3552 vpxor xmm8,xmm8,xmm10 3553 xor rdi,r9 3554 shrd r14,r14,28 3555 vpsrlq xmm11,xmm5,6 3556 add r11,rdx 3557 add rdx,rdi 3558 vpxor xmm8,xmm8,xmm9 3559 mov r13,r11 3560 add r14,rdx 3561 vpsllq xmm10,xmm5,3 3562 shrd r13,r13,23 3563 mov rdx,r14 3564 vpaddq xmm6,xmm6,xmm8 3565 mov r12,rax 3566 shrd r14,r14,5 3567 vpsrlq xmm9,xmm5,19 3568 xor r13,r11 3569 xor r12,rbx 3570 vpxor xmm11,xmm11,xmm10 3571 shrd r13,r13,4 3572 xor r14,rdx 3573 vpsllq xmm10,xmm10,42 3574 and r12,r11 3575 xor r13,r11 3576 vpxor xmm11,xmm11,xmm9 3577 add rcx,QWORD[104+rsp] 3578 mov rdi,rdx 3579 vpsrlq xmm9,xmm9,42 3580 xor r12,rbx 3581 shrd r14,r14,6 3582 vpxor xmm11,xmm11,xmm10 3583 xor rdi,r8 3584 add rcx,r12 3585 vpxor xmm11,xmm11,xmm9 3586 shrd r13,r13,14 3587 and r15,rdi 3588 vpaddq xmm6,xmm6,xmm11 3589 xor r14,rdx 3590 add rcx,r13 3591 vpaddq xmm10,xmm6,XMMWORD[64+rbp] 3592 xor r15,r8 3593 shrd r14,r14,28 3594 add r10,rcx 3595 add rcx,r15 3596 mov r13,r10 3597 add r14,rcx 3598 vmovdqa XMMWORD[96+rsp],xmm10 3599 vpalignr xmm8,xmm0,xmm7,8 3600 shrd r13,r13,23 3601 mov rcx,r14 3602 vpalignr xmm11,xmm4,xmm3,8 3603 mov r12,r11 3604 shrd r14,r14,5 3605 vpsrlq xmm10,xmm8,1 3606 xor r13,r10 3607 xor r12,rax 3608 vpaddq xmm7,xmm7,xmm11 3609 shrd r13,r13,4 3610 xor r14,rcx 3611 vpsrlq xmm11,xmm8,7 3612 and r12,r10 3613 xor r13,r10 3614 vpsllq xmm9,xmm8,56 3615 add rbx,QWORD[112+rsp] 3616 mov r15,rcx 3617 vpxor xmm8,xmm11,xmm10 3618 xor r12,rax 3619 shrd r14,r14,6 3620 vpsrlq xmm10,xmm10,7 3621 xor r15,rdx 3622 add rbx,r12 3623 vpxor xmm8,xmm8,xmm9 3624 shrd r13,r13,14 3625 and rdi,r15 3626 vpsllq xmm9,xmm9,7 3627 xor r14,rcx 3628 add rbx,r13 3629 vpxor xmm8,xmm8,xmm10 3630 xor rdi,rdx 3631 shrd r14,r14,28 3632 vpsrlq xmm11,xmm6,6 3633 add r9,rbx 3634 add rbx,rdi 3635 vpxor xmm8,xmm8,xmm9 3636 mov r13,r9 3637 add r14,rbx 3638 vpsllq xmm10,xmm6,3 3639 shrd r13,r13,23 3640 mov rbx,r14 3641 vpaddq xmm7,xmm7,xmm8 3642 mov r12,r10 3643 shrd r14,r14,5 3644 vpsrlq xmm9,xmm6,19 3645 xor r13,r9 3646 xor r12,r11 3647 vpxor xmm11,xmm11,xmm10 3648 shrd r13,r13,4 3649 xor r14,rbx 3650 vpsllq xmm10,xmm10,42 3651 and r12,r9 3652 xor r13,r9 3653 vpxor xmm11,xmm11,xmm9 3654 add rax,QWORD[120+rsp] 3655 mov rdi,rbx 3656 vpsrlq xmm9,xmm9,42 3657 xor r12,r11 3658 shrd r14,r14,6 3659 vpxor xmm11,xmm11,xmm10 3660 xor rdi,rcx 3661 add rax,r12 3662 vpxor xmm11,xmm11,xmm9 3663 shrd r13,r13,14 3664 and r15,rdi 3665 vpaddq xmm7,xmm7,xmm11 3666 xor r14,rbx 3667 add rax,r13 3668 vpaddq xmm10,xmm7,XMMWORD[96+rbp] 3669 xor r15,rcx 3670 shrd r14,r14,28 3671 add r8,rax 3672 add rax,r15 3673 mov r13,r8 3674 add r14,rax 3675 vmovdqa XMMWORD[112+rsp],xmm10 3676 cmp BYTE[135+rbp],0 3677 jne NEAR $L$avx_00_47 3678 shrd r13,r13,23 3679 mov rax,r14 3680 mov r12,r9 3681 shrd r14,r14,5 3682 xor r13,r8 3683 xor r12,r10 3684 shrd r13,r13,4 3685 xor r14,rax 3686 and r12,r8 3687 xor r13,r8 3688 add r11,QWORD[rsp] 3689 mov r15,rax 3690 xor r12,r10 3691 shrd r14,r14,6 3692 xor r15,rbx 3693 add r11,r12 3694 shrd r13,r13,14 3695 and rdi,r15 3696 xor r14,rax 3697 add r11,r13 3698 xor rdi,rbx 3699 shrd r14,r14,28 3700 add rdx,r11 3701 add r11,rdi 3702 mov r13,rdx 3703 add r14,r11 3704 shrd r13,r13,23 3705 mov r11,r14 3706 mov r12,r8 3707 shrd r14,r14,5 3708 xor r13,rdx 3709 xor r12,r9 3710 shrd r13,r13,4 3711 xor r14,r11 3712 and r12,rdx 3713 xor r13,rdx 3714 add r10,QWORD[8+rsp] 3715 mov rdi,r11 3716 xor r12,r9 3717 shrd r14,r14,6 3718 xor rdi,rax 3719 add r10,r12 3720 shrd r13,r13,14 3721 and r15,rdi 3722 xor r14,r11 3723 add r10,r13 3724 xor r15,rax 3725 shrd r14,r14,28 3726 add rcx,r10 3727 add r10,r15 3728 mov r13,rcx 3729 add r14,r10 3730 shrd r13,r13,23 3731 mov r10,r14 3732 mov r12,rdx 3733 shrd r14,r14,5 3734 xor r13,rcx 3735 xor r12,r8 3736 shrd r13,r13,4 3737 xor r14,r10 3738 and r12,rcx 3739 xor r13,rcx 3740 add r9,QWORD[16+rsp] 3741 mov r15,r10 3742 xor r12,r8 3743 shrd r14,r14,6 3744 xor r15,r11 3745 add r9,r12 3746 shrd r13,r13,14 3747 and rdi,r15 3748 xor r14,r10 3749 add r9,r13 3750 xor rdi,r11 3751 shrd r14,r14,28 3752 add rbx,r9 3753 add r9,rdi 3754 mov r13,rbx 3755 add r14,r9 3756 shrd r13,r13,23 3757 mov r9,r14 3758 mov r12,rcx 3759 shrd r14,r14,5 3760 xor r13,rbx 3761 xor r12,rdx 3762 shrd r13,r13,4 3763 xor r14,r9 3764 and r12,rbx 3765 xor r13,rbx 3766 add r8,QWORD[24+rsp] 3767 mov rdi,r9 3768 xor r12,rdx 3769 shrd r14,r14,6 3770 xor rdi,r10 3771 add r8,r12 3772 shrd r13,r13,14 3773 and r15,rdi 3774 xor r14,r9 3775 add r8,r13 3776 xor r15,r10 3777 shrd r14,r14,28 3778 add rax,r8 3779 add r8,r15 3780 mov r13,rax 3781 add r14,r8 3782 shrd r13,r13,23 3783 mov r8,r14 3784 mov r12,rbx 3785 shrd r14,r14,5 3786 xor r13,rax 3787 xor r12,rcx 3788 shrd r13,r13,4 3789 xor r14,r8 3790 and r12,rax 3791 xor r13,rax 3792 add rdx,QWORD[32+rsp] 3793 mov r15,r8 3794 xor r12,rcx 3795 shrd r14,r14,6 3796 xor r15,r9 3797 add rdx,r12 3798 shrd r13,r13,14 3799 and rdi,r15 3800 xor r14,r8 3801 add rdx,r13 3802 xor rdi,r9 3803 shrd r14,r14,28 3804 add r11,rdx 3805 add rdx,rdi 3806 mov r13,r11 3807 add r14,rdx 3808 shrd r13,r13,23 3809 mov rdx,r14 3810 mov r12,rax 3811 shrd r14,r14,5 3812 xor r13,r11 3813 xor r12,rbx 3814 shrd r13,r13,4 3815 xor r14,rdx 3816 and r12,r11 3817 xor r13,r11 3818 add rcx,QWORD[40+rsp] 3819 mov rdi,rdx 3820 xor r12,rbx 3821 shrd r14,r14,6 3822 xor rdi,r8 3823 add rcx,r12 3824 shrd r13,r13,14 3825 and r15,rdi 3826 xor r14,rdx 3827 add rcx,r13 3828 xor r15,r8 3829 shrd r14,r14,28 3830 add r10,rcx 3831 add rcx,r15 3832 mov r13,r10 3833 add r14,rcx 3834 shrd r13,r13,23 3835 mov rcx,r14 3836 mov r12,r11 3837 shrd r14,r14,5 3838 xor r13,r10 3839 xor r12,rax 3840 shrd r13,r13,4 3841 xor r14,rcx 3842 and r12,r10 3843 xor r13,r10 3844 add rbx,QWORD[48+rsp] 3845 mov r15,rcx 3846 xor r12,rax 3847 shrd r14,r14,6 3848 xor r15,rdx 3849 add rbx,r12 3850 shrd r13,r13,14 3851 and rdi,r15 3852 xor r14,rcx 3853 add rbx,r13 3854 xor rdi,rdx 3855 shrd r14,r14,28 3856 add r9,rbx 3857 add rbx,rdi 3858 mov r13,r9 3859 add r14,rbx 3860 shrd r13,r13,23 3861 mov rbx,r14 3862 mov r12,r10 3863 shrd r14,r14,5 3864 xor r13,r9 3865 xor r12,r11 3866 shrd r13,r13,4 3867 xor r14,rbx 3868 and r12,r9 3869 xor r13,r9 3870 add rax,QWORD[56+rsp] 3871 mov rdi,rbx 3872 xor r12,r11 3873 shrd r14,r14,6 3874 xor rdi,rcx 3875 add rax,r12 3876 shrd r13,r13,14 3877 and r15,rdi 3878 xor r14,rbx 3879 add rax,r13 3880 xor r15,rcx 3881 shrd r14,r14,28 3882 add r8,rax 3883 add rax,r15 3884 mov r13,r8 3885 add r14,rax 3886 shrd r13,r13,23 3887 mov rax,r14 3888 mov r12,r9 3889 shrd r14,r14,5 3890 xor r13,r8 3891 xor r12,r10 3892 shrd r13,r13,4 3893 xor r14,rax 3894 and r12,r8 3895 xor r13,r8 3896 add r11,QWORD[64+rsp] 3897 mov r15,rax 3898 xor r12,r10 3899 shrd r14,r14,6 3900 xor r15,rbx 3901 add r11,r12 3902 shrd r13,r13,14 3903 and rdi,r15 3904 xor r14,rax 3905 add r11,r13 3906 xor rdi,rbx 3907 shrd r14,r14,28 3908 add rdx,r11 3909 add r11,rdi 3910 mov r13,rdx 3911 add r14,r11 3912 shrd r13,r13,23 3913 mov r11,r14 3914 mov r12,r8 3915 shrd r14,r14,5 3916 xor r13,rdx 3917 xor r12,r9 3918 shrd r13,r13,4 3919 xor r14,r11 3920 and r12,rdx 3921 xor r13,rdx 3922 add r10,QWORD[72+rsp] 3923 mov rdi,r11 3924 xor r12,r9 3925 shrd r14,r14,6 3926 xor rdi,rax 3927 add r10,r12 3928 shrd r13,r13,14 3929 and r15,rdi 3930 xor r14,r11 3931 add r10,r13 3932 xor r15,rax 3933 shrd r14,r14,28 3934 add rcx,r10 3935 add r10,r15 3936 mov r13,rcx 3937 add r14,r10 3938 shrd r13,r13,23 3939 mov r10,r14 3940 mov r12,rdx 3941 shrd r14,r14,5 3942 xor r13,rcx 3943 xor r12,r8 3944 shrd r13,r13,4 3945 xor r14,r10 3946 and r12,rcx 3947 xor r13,rcx 3948 add r9,QWORD[80+rsp] 3949 mov r15,r10 3950 xor r12,r8 3951 shrd r14,r14,6 3952 xor r15,r11 3953 add r9,r12 3954 shrd r13,r13,14 3955 and rdi,r15 3956 xor r14,r10 3957 add r9,r13 3958 xor rdi,r11 3959 shrd r14,r14,28 3960 add rbx,r9 3961 add r9,rdi 3962 mov r13,rbx 3963 add r14,r9 3964 shrd r13,r13,23 3965 mov r9,r14 3966 mov r12,rcx 3967 shrd r14,r14,5 3968 xor r13,rbx 3969 xor r12,rdx 3970 shrd r13,r13,4 3971 xor r14,r9 3972 and r12,rbx 3973 xor r13,rbx 3974 add r8,QWORD[88+rsp] 3975 mov rdi,r9 3976 xor r12,rdx 3977 shrd r14,r14,6 3978 xor rdi,r10 3979 add r8,r12 3980 shrd r13,r13,14 3981 and r15,rdi 3982 xor r14,r9 3983 add r8,r13 3984 xor r15,r10 3985 shrd r14,r14,28 3986 add rax,r8 3987 add r8,r15 3988 mov r13,rax 3989 add r14,r8 3990 shrd r13,r13,23 3991 mov r8,r14 3992 mov r12,rbx 3993 shrd r14,r14,5 3994 xor r13,rax 3995 xor r12,rcx 3996 shrd r13,r13,4 3997 xor r14,r8 3998 and r12,rax 3999 xor r13,rax 4000 add rdx,QWORD[96+rsp] 4001 mov r15,r8 4002 xor r12,rcx 4003 shrd r14,r14,6 4004 xor r15,r9 4005 add rdx,r12 4006 shrd r13,r13,14 4007 and rdi,r15 4008 xor r14,r8 4009 add rdx,r13 4010 xor rdi,r9 4011 shrd r14,r14,28 4012 add r11,rdx 4013 add rdx,rdi 4014 mov r13,r11 4015 add r14,rdx 4016 shrd r13,r13,23 4017 mov rdx,r14 4018 mov r12,rax 4019 shrd r14,r14,5 4020 xor r13,r11 4021 xor r12,rbx 4022 shrd r13,r13,4 4023 xor r14,rdx 4024 and r12,r11 4025 xor r13,r11 4026 add rcx,QWORD[104+rsp] 4027 mov rdi,rdx 4028 xor r12,rbx 4029 shrd r14,r14,6 4030 xor rdi,r8 4031 add rcx,r12 4032 shrd r13,r13,14 4033 and r15,rdi 4034 xor r14,rdx 4035 add rcx,r13 4036 xor r15,r8 4037 shrd r14,r14,28 4038 add r10,rcx 4039 add rcx,r15 4040 mov r13,r10 4041 add r14,rcx 4042 shrd r13,r13,23 4043 mov rcx,r14 4044 mov r12,r11 4045 shrd r14,r14,5 4046 xor r13,r10 4047 xor r12,rax 4048 shrd r13,r13,4 4049 xor r14,rcx 4050 and r12,r10 4051 xor r13,r10 4052 add rbx,QWORD[112+rsp] 4053 mov r15,rcx 4054 xor r12,rax 4055 shrd r14,r14,6 4056 xor r15,rdx 4057 add rbx,r12 4058 shrd r13,r13,14 4059 and rdi,r15 4060 xor r14,rcx 4061 add rbx,r13 4062 xor rdi,rdx 4063 shrd r14,r14,28 4064 add r9,rbx 4065 add rbx,rdi 4066 mov r13,r9 4067 add r14,rbx 4068 shrd r13,r13,23 4069 mov rbx,r14 4070 mov r12,r10 4071 shrd r14,r14,5 4072 xor r13,r9 4073 xor r12,r11 4074 shrd r13,r13,4 4075 xor r14,rbx 4076 and r12,r9 4077 xor r13,r9 4078 add rax,QWORD[120+rsp] 4079 mov rdi,rbx 4080 xor r12,r11 4081 shrd r14,r14,6 4082 xor rdi,rcx 4083 add rax,r12 4084 shrd r13,r13,14 4085 and r15,rdi 4086 xor r14,rbx 4087 add rax,r13 4088 xor r15,rcx 4089 shrd r14,r14,28 4090 add r8,rax 4091 add rax,r15 4092 mov r13,r8 4093 add r14,rax 4094 mov rdi,QWORD[((128+0))+rsp] 4095 mov rax,r14 4096 4097 add rax,QWORD[rdi] 4098 lea rsi,[128+rsi] 4099 add rbx,QWORD[8+rdi] 4100 add rcx,QWORD[16+rdi] 4101 add rdx,QWORD[24+rdi] 4102 add r8,QWORD[32+rdi] 4103 add r9,QWORD[40+rdi] 4104 add r10,QWORD[48+rdi] 4105 add r11,QWORD[56+rdi] 4106 4107 cmp rsi,QWORD[((128+16))+rsp] 4108 4109 mov QWORD[rdi],rax 4110 mov QWORD[8+rdi],rbx 4111 mov QWORD[16+rdi],rcx 4112 mov QWORD[24+rdi],rdx 4113 mov QWORD[32+rdi],r8 4114 mov QWORD[40+rdi],r9 4115 mov QWORD[48+rdi],r10 4116 mov QWORD[56+rdi],r11 4117 jb NEAR $L$loop_avx 4118 4119 mov rsi,QWORD[152+rsp] 4120 4121 vzeroupper 4122 movaps xmm6,XMMWORD[((128+32))+rsp] 4123 movaps xmm7,XMMWORD[((128+48))+rsp] 4124 movaps xmm8,XMMWORD[((128+64))+rsp] 4125 movaps xmm9,XMMWORD[((128+80))+rsp] 4126 movaps xmm10,XMMWORD[((128+96))+rsp] 4127 movaps xmm11,XMMWORD[((128+112))+rsp] 4128 mov r15,QWORD[((-48))+rsi] 4129 4130 mov r14,QWORD[((-40))+rsi] 4131 4132 mov r13,QWORD[((-32))+rsi] 4133 4134 mov r12,QWORD[((-24))+rsi] 4135 4136 mov rbp,QWORD[((-16))+rsi] 4137 4138 mov rbx,QWORD[((-8))+rsi] 4139 4140 lea rsp,[rsi] 4141 4142 $L$epilogue_avx: 4143 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 4144 mov rsi,QWORD[16+rsp] 4145 DB 0F3h,0C3h ;repret 4146 4147 $L$SEH_end_sha512_block_data_order_avx: 4148 4149 ALIGN 64 4150 sha512_block_data_order_avx2: 4151 mov QWORD[8+rsp],rdi ;WIN64 prologue 4152 mov QWORD[16+rsp],rsi 4153 mov rax,rsp 4154 $L$SEH_begin_sha512_block_data_order_avx2: 4155 mov rdi,rcx 4156 mov rsi,rdx 4157 mov rdx,r8 4158 4159 4160 4161 $L$avx2_shortcut: 4162 mov rax,rsp 4163 4164 push rbx 4165 4166 push rbp 4167 4168 push r12 4169 4170 push r13 4171 4172 push r14 4173 4174 push r15 4175 4176 sub rsp,1408 4177 shl rdx,4 4178 and rsp,-256*8 4179 lea rdx,[rdx*8+rsi] 4180 add rsp,1152 4181 mov QWORD[((128+0))+rsp],rdi 4182 mov QWORD[((128+8))+rsp],rsi 4183 mov QWORD[((128+16))+rsp],rdx 4184 mov QWORD[152+rsp],rax 4185 4186 movaps XMMWORD[(128+32)+rsp],xmm6 4187 movaps XMMWORD[(128+48)+rsp],xmm7 4188 movaps XMMWORD[(128+64)+rsp],xmm8 4189 movaps XMMWORD[(128+80)+rsp],xmm9 4190 movaps XMMWORD[(128+96)+rsp],xmm10 4191 movaps XMMWORD[(128+112)+rsp],xmm11 4192 $L$prologue_avx2: 4193 4194 vzeroupper 4195 sub rsi,-16*8 4196 mov rax,QWORD[rdi] 4197 mov r12,rsi 4198 mov rbx,QWORD[8+rdi] 4199 cmp rsi,rdx 4200 mov rcx,QWORD[16+rdi] 4201 cmove r12,rsp 4202 mov rdx,QWORD[24+rdi] 4203 mov r8,QWORD[32+rdi] 4204 mov r9,QWORD[40+rdi] 4205 mov r10,QWORD[48+rdi] 4206 mov r11,QWORD[56+rdi] 4207 jmp NEAR $L$oop_avx2 4208 ALIGN 16 4209 $L$oop_avx2: 4210 vmovdqu xmm0,XMMWORD[((-128))+rsi] 4211 vmovdqu xmm1,XMMWORD[((-128+16))+rsi] 4212 vmovdqu xmm2,XMMWORD[((-128+32))+rsi] 4213 lea rbp,[((K512+128))] 4214 vmovdqu xmm3,XMMWORD[((-128+48))+rsi] 4215 vmovdqu xmm4,XMMWORD[((-128+64))+rsi] 4216 vmovdqu xmm5,XMMWORD[((-128+80))+rsi] 4217 vmovdqu xmm6,XMMWORD[((-128+96))+rsi] 4218 vmovdqu xmm7,XMMWORD[((-128+112))+rsi] 4219 4220 vmovdqa ymm10,YMMWORD[1152+rbp] 4221 vinserti128 ymm0,ymm0,XMMWORD[r12],1 4222 vinserti128 ymm1,ymm1,XMMWORD[16+r12],1 4223 vpshufb ymm0,ymm0,ymm10 4224 vinserti128 ymm2,ymm2,XMMWORD[32+r12],1 4225 vpshufb ymm1,ymm1,ymm10 4226 vinserti128 ymm3,ymm3,XMMWORD[48+r12],1 4227 vpshufb ymm2,ymm2,ymm10 4228 vinserti128 ymm4,ymm4,XMMWORD[64+r12],1 4229 vpshufb ymm3,ymm3,ymm10 4230 vinserti128 ymm5,ymm5,XMMWORD[80+r12],1 4231 vpshufb ymm4,ymm4,ymm10 4232 vinserti128 ymm6,ymm6,XMMWORD[96+r12],1 4233 vpshufb ymm5,ymm5,ymm10 4234 vinserti128 ymm7,ymm7,XMMWORD[112+r12],1 4235 4236 vpaddq ymm8,ymm0,YMMWORD[((-128))+rbp] 4237 vpshufb ymm6,ymm6,ymm10 4238 vpaddq ymm9,ymm1,YMMWORD[((-96))+rbp] 4239 vpshufb ymm7,ymm7,ymm10 4240 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp] 4241 vpaddq ymm11,ymm3,YMMWORD[((-32))+rbp] 4242 vmovdqa YMMWORD[rsp],ymm8 4243 vpaddq ymm8,ymm4,YMMWORD[rbp] 4244 vmovdqa YMMWORD[32+rsp],ymm9 4245 vpaddq ymm9,ymm5,YMMWORD[32+rbp] 4246 vmovdqa YMMWORD[64+rsp],ymm10 4247 vpaddq ymm10,ymm6,YMMWORD[64+rbp] 4248 vmovdqa YMMWORD[96+rsp],ymm11 4249 lea rsp,[((-128))+rsp] 4250 vpaddq ymm11,ymm7,YMMWORD[96+rbp] 4251 vmovdqa YMMWORD[rsp],ymm8 4252 xor r14,r14 4253 vmovdqa YMMWORD[32+rsp],ymm9 4254 mov rdi,rbx 4255 vmovdqa YMMWORD[64+rsp],ymm10 4256 xor rdi,rcx 4257 vmovdqa YMMWORD[96+rsp],ymm11 4258 mov r12,r9 4259 add rbp,16*2*8 4260 jmp NEAR $L$avx2_00_47 4261 4262 ALIGN 16 4263 $L$avx2_00_47: 4264 lea rsp,[((-128))+rsp] 4265 vpalignr ymm8,ymm1,ymm0,8 4266 add r11,QWORD[((0+256))+rsp] 4267 and r12,r8 4268 rorx r13,r8,41 4269 vpalignr ymm11,ymm5,ymm4,8 4270 rorx r15,r8,18 4271 lea rax,[r14*1+rax] 4272 lea r11,[r12*1+r11] 4273 vpsrlq ymm10,ymm8,1 4274 andn r12,r8,r10 4275 xor r13,r15 4276 rorx r14,r8,14 4277 vpaddq ymm0,ymm0,ymm11 4278 vpsrlq ymm11,ymm8,7 4279 lea r11,[r12*1+r11] 4280 xor r13,r14 4281 mov r15,rax 4282 vpsllq ymm9,ymm8,56 4283 vpxor ymm8,ymm11,ymm10 4284 rorx r12,rax,39 4285 lea r11,[r13*1+r11] 4286 xor r15,rbx 4287 vpsrlq ymm10,ymm10,7 4288 vpxor ymm8,ymm8,ymm9 4289 rorx r14,rax,34 4290 rorx r13,rax,28 4291 lea rdx,[r11*1+rdx] 4292 vpsllq ymm9,ymm9,7 4293 vpxor ymm8,ymm8,ymm10 4294 and rdi,r15 4295 xor r14,r12 4296 xor rdi,rbx 4297 vpsrlq ymm11,ymm7,6 4298 vpxor ymm8,ymm8,ymm9 4299 xor r14,r13 4300 lea r11,[rdi*1+r11] 4301 mov r12,r8 4302 vpsllq ymm10,ymm7,3 4303 vpaddq ymm0,ymm0,ymm8 4304 add r10,QWORD[((8+256))+rsp] 4305 and r12,rdx 4306 rorx r13,rdx,41 4307 vpsrlq ymm9,ymm7,19 4308 vpxor ymm11,ymm11,ymm10 4309 rorx rdi,rdx,18 4310 lea r11,[r14*1+r11] 4311 lea r10,[r12*1+r10] 4312 vpsllq ymm10,ymm10,42 4313 vpxor ymm11,ymm11,ymm9 4314 andn r12,rdx,r9 4315 xor r13,rdi 4316 rorx r14,rdx,14 4317 vpsrlq ymm9,ymm9,42 4318 vpxor ymm11,ymm11,ymm10 4319 lea r10,[r12*1+r10] 4320 xor r13,r14 4321 mov rdi,r11 4322 vpxor ymm11,ymm11,ymm9 4323 rorx r12,r11,39 4324 lea r10,[r13*1+r10] 4325 xor rdi,rax 4326 vpaddq ymm0,ymm0,ymm11 4327 rorx r14,r11,34 4328 rorx r13,r11,28 4329 lea rcx,[r10*1+rcx] 4330 vpaddq ymm10,ymm0,YMMWORD[((-128))+rbp] 4331 and r15,rdi 4332 xor r14,r12 4333 xor r15,rax 4334 xor r14,r13 4335 lea r10,[r15*1+r10] 4336 mov r12,rdx 4337 vmovdqa YMMWORD[rsp],ymm10 4338 vpalignr ymm8,ymm2,ymm1,8 4339 add r9,QWORD[((32+256))+rsp] 4340 and r12,rcx 4341 rorx r13,rcx,41 4342 vpalignr ymm11,ymm6,ymm5,8 4343 rorx r15,rcx,18 4344 lea r10,[r14*1+r10] 4345 lea r9,[r12*1+r9] 4346 vpsrlq ymm10,ymm8,1 4347 andn r12,rcx,r8 4348 xor r13,r15 4349 rorx r14,rcx,14 4350 vpaddq ymm1,ymm1,ymm11 4351 vpsrlq ymm11,ymm8,7 4352 lea r9,[r12*1+r9] 4353 xor r13,r14 4354 mov r15,r10 4355 vpsllq ymm9,ymm8,56 4356 vpxor ymm8,ymm11,ymm10 4357 rorx r12,r10,39 4358 lea r9,[r13*1+r9] 4359 xor r15,r11 4360 vpsrlq ymm10,ymm10,7 4361 vpxor ymm8,ymm8,ymm9 4362 rorx r14,r10,34 4363 rorx r13,r10,28 4364 lea rbx,[r9*1+rbx] 4365 vpsllq ymm9,ymm9,7 4366 vpxor ymm8,ymm8,ymm10 4367 and rdi,r15 4368 xor r14,r12 4369 xor rdi,r11 4370 vpsrlq ymm11,ymm0,6 4371 vpxor ymm8,ymm8,ymm9 4372 xor r14,r13 4373 lea r9,[rdi*1+r9] 4374 mov r12,rcx 4375 vpsllq ymm10,ymm0,3 4376 vpaddq ymm1,ymm1,ymm8 4377 add r8,QWORD[((40+256))+rsp] 4378 and r12,rbx 4379 rorx r13,rbx,41 4380 vpsrlq ymm9,ymm0,19 4381 vpxor ymm11,ymm11,ymm10 4382 rorx rdi,rbx,18 4383 lea r9,[r14*1+r9] 4384 lea r8,[r12*1+r8] 4385 vpsllq ymm10,ymm10,42 4386 vpxor ymm11,ymm11,ymm9 4387 andn r12,rbx,rdx 4388 xor r13,rdi 4389 rorx r14,rbx,14 4390 vpsrlq ymm9,ymm9,42 4391 vpxor ymm11,ymm11,ymm10 4392 lea r8,[r12*1+r8] 4393 xor r13,r14 4394 mov rdi,r9 4395 vpxor ymm11,ymm11,ymm9 4396 rorx r12,r9,39 4397 lea r8,[r13*1+r8] 4398 xor rdi,r10 4399 vpaddq ymm1,ymm1,ymm11 4400 rorx r14,r9,34 4401 rorx r13,r9,28 4402 lea rax,[r8*1+rax] 4403 vpaddq ymm10,ymm1,YMMWORD[((-96))+rbp] 4404 and r15,rdi 4405 xor r14,r12 4406 xor r15,r10 4407 xor r14,r13 4408 lea r8,[r15*1+r8] 4409 mov r12,rbx 4410 vmovdqa YMMWORD[32+rsp],ymm10 4411 vpalignr ymm8,ymm3,ymm2,8 4412 add rdx,QWORD[((64+256))+rsp] 4413 and r12,rax 4414 rorx r13,rax,41 4415 vpalignr ymm11,ymm7,ymm6,8 4416 rorx r15,rax,18 4417 lea r8,[r14*1+r8] 4418 lea rdx,[r12*1+rdx] 4419 vpsrlq ymm10,ymm8,1 4420 andn r12,rax,rcx 4421 xor r13,r15 4422 rorx r14,rax,14 4423 vpaddq ymm2,ymm2,ymm11 4424 vpsrlq ymm11,ymm8,7 4425 lea rdx,[r12*1+rdx] 4426 xor r13,r14 4427 mov r15,r8 4428 vpsllq ymm9,ymm8,56 4429 vpxor ymm8,ymm11,ymm10 4430 rorx r12,r8,39 4431 lea rdx,[r13*1+rdx] 4432 xor r15,r9 4433 vpsrlq ymm10,ymm10,7 4434 vpxor ymm8,ymm8,ymm9 4435 rorx r14,r8,34 4436 rorx r13,r8,28 4437 lea r11,[rdx*1+r11] 4438 vpsllq ymm9,ymm9,7 4439 vpxor ymm8,ymm8,ymm10 4440 and rdi,r15 4441 xor r14,r12 4442 xor rdi,r9 4443 vpsrlq ymm11,ymm1,6 4444 vpxor ymm8,ymm8,ymm9 4445 xor r14,r13 4446 lea rdx,[rdi*1+rdx] 4447 mov r12,rax 4448 vpsllq ymm10,ymm1,3 4449 vpaddq ymm2,ymm2,ymm8 4450 add rcx,QWORD[((72+256))+rsp] 4451 and r12,r11 4452 rorx r13,r11,41 4453 vpsrlq ymm9,ymm1,19 4454 vpxor ymm11,ymm11,ymm10 4455 rorx rdi,r11,18 4456 lea rdx,[r14*1+rdx] 4457 lea rcx,[r12*1+rcx] 4458 vpsllq ymm10,ymm10,42 4459 vpxor ymm11,ymm11,ymm9 4460 andn r12,r11,rbx 4461 xor r13,rdi 4462 rorx r14,r11,14 4463 vpsrlq ymm9,ymm9,42 4464 vpxor ymm11,ymm11,ymm10 4465 lea rcx,[r12*1+rcx] 4466 xor r13,r14 4467 mov rdi,rdx 4468 vpxor ymm11,ymm11,ymm9 4469 rorx r12,rdx,39 4470 lea rcx,[r13*1+rcx] 4471 xor rdi,r8 4472 vpaddq ymm2,ymm2,ymm11 4473 rorx r14,rdx,34 4474 rorx r13,rdx,28 4475 lea r10,[rcx*1+r10] 4476 vpaddq ymm10,ymm2,YMMWORD[((-64))+rbp] 4477 and r15,rdi 4478 xor r14,r12 4479 xor r15,r8 4480 xor r14,r13 4481 lea rcx,[r15*1+rcx] 4482 mov r12,r11 4483 vmovdqa YMMWORD[64+rsp],ymm10 4484 vpalignr ymm8,ymm4,ymm3,8 4485 add rbx,QWORD[((96+256))+rsp] 4486 and r12,r10 4487 rorx r13,r10,41 4488 vpalignr ymm11,ymm0,ymm7,8 4489 rorx r15,r10,18 4490 lea rcx,[r14*1+rcx] 4491 lea rbx,[r12*1+rbx] 4492 vpsrlq ymm10,ymm8,1 4493 andn r12,r10,rax 4494 xor r13,r15 4495 rorx r14,r10,14 4496 vpaddq ymm3,ymm3,ymm11 4497 vpsrlq ymm11,ymm8,7 4498 lea rbx,[r12*1+rbx] 4499 xor r13,r14 4500 mov r15,rcx 4501 vpsllq ymm9,ymm8,56 4502 vpxor ymm8,ymm11,ymm10 4503 rorx r12,rcx,39 4504 lea rbx,[r13*1+rbx] 4505 xor r15,rdx 4506 vpsrlq ymm10,ymm10,7 4507 vpxor ymm8,ymm8,ymm9 4508 rorx r14,rcx,34 4509 rorx r13,rcx,28 4510 lea r9,[rbx*1+r9] 4511 vpsllq ymm9,ymm9,7 4512 vpxor ymm8,ymm8,ymm10 4513 and rdi,r15 4514 xor r14,r12 4515 xor rdi,rdx 4516 vpsrlq ymm11,ymm2,6 4517 vpxor ymm8,ymm8,ymm9 4518 xor r14,r13 4519 lea rbx,[rdi*1+rbx] 4520 mov r12,r10 4521 vpsllq ymm10,ymm2,3 4522 vpaddq ymm3,ymm3,ymm8 4523 add rax,QWORD[((104+256))+rsp] 4524 and r12,r9 4525 rorx r13,r9,41 4526 vpsrlq ymm9,ymm2,19 4527 vpxor ymm11,ymm11,ymm10 4528 rorx rdi,r9,18 4529 lea rbx,[r14*1+rbx] 4530 lea rax,[r12*1+rax] 4531 vpsllq ymm10,ymm10,42 4532 vpxor ymm11,ymm11,ymm9 4533 andn r12,r9,r11 4534 xor r13,rdi 4535 rorx r14,r9,14 4536 vpsrlq ymm9,ymm9,42 4537 vpxor ymm11,ymm11,ymm10 4538 lea rax,[r12*1+rax] 4539 xor r13,r14 4540 mov rdi,rbx 4541 vpxor ymm11,ymm11,ymm9 4542 rorx r12,rbx,39 4543 lea rax,[r13*1+rax] 4544 xor rdi,rcx 4545 vpaddq ymm3,ymm3,ymm11 4546 rorx r14,rbx,34 4547 rorx r13,rbx,28 4548 lea r8,[rax*1+r8] 4549 vpaddq ymm10,ymm3,YMMWORD[((-32))+rbp] 4550 and r15,rdi 4551 xor r14,r12 4552 xor r15,rcx 4553 xor r14,r13 4554 lea rax,[r15*1+rax] 4555 mov r12,r9 4556 vmovdqa YMMWORD[96+rsp],ymm10 4557 lea rsp,[((-128))+rsp] 4558 vpalignr ymm8,ymm5,ymm4,8 4559 add r11,QWORD[((0+256))+rsp] 4560 and r12,r8 4561 rorx r13,r8,41 4562 vpalignr ymm11,ymm1,ymm0,8 4563 rorx r15,r8,18 4564 lea rax,[r14*1+rax] 4565 lea r11,[r12*1+r11] 4566 vpsrlq ymm10,ymm8,1 4567 andn r12,r8,r10 4568 xor r13,r15 4569 rorx r14,r8,14 4570 vpaddq ymm4,ymm4,ymm11 4571 vpsrlq ymm11,ymm8,7 4572 lea r11,[r12*1+r11] 4573 xor r13,r14 4574 mov r15,rax 4575 vpsllq ymm9,ymm8,56 4576 vpxor ymm8,ymm11,ymm10 4577 rorx r12,rax,39 4578 lea r11,[r13*1+r11] 4579 xor r15,rbx 4580 vpsrlq ymm10,ymm10,7 4581 vpxor ymm8,ymm8,ymm9 4582 rorx r14,rax,34 4583 rorx r13,rax,28 4584 lea rdx,[r11*1+rdx] 4585 vpsllq ymm9,ymm9,7 4586 vpxor ymm8,ymm8,ymm10 4587 and rdi,r15 4588 xor r14,r12 4589 xor rdi,rbx 4590 vpsrlq ymm11,ymm3,6 4591 vpxor ymm8,ymm8,ymm9 4592 xor r14,r13 4593 lea r11,[rdi*1+r11] 4594 mov r12,r8 4595 vpsllq ymm10,ymm3,3 4596 vpaddq ymm4,ymm4,ymm8 4597 add r10,QWORD[((8+256))+rsp] 4598 and r12,rdx 4599 rorx r13,rdx,41 4600 vpsrlq ymm9,ymm3,19 4601 vpxor ymm11,ymm11,ymm10 4602 rorx rdi,rdx,18 4603 lea r11,[r14*1+r11] 4604 lea r10,[r12*1+r10] 4605 vpsllq ymm10,ymm10,42 4606 vpxor ymm11,ymm11,ymm9 4607 andn r12,rdx,r9 4608 xor r13,rdi 4609 rorx r14,rdx,14 4610 vpsrlq ymm9,ymm9,42 4611 vpxor ymm11,ymm11,ymm10 4612 lea r10,[r12*1+r10] 4613 xor r13,r14 4614 mov rdi,r11 4615 vpxor ymm11,ymm11,ymm9 4616 rorx r12,r11,39 4617 lea r10,[r13*1+r10] 4618 xor rdi,rax 4619 vpaddq ymm4,ymm4,ymm11 4620 rorx r14,r11,34 4621 rorx r13,r11,28 4622 lea rcx,[r10*1+rcx] 4623 vpaddq ymm10,ymm4,YMMWORD[rbp] 4624 and r15,rdi 4625 xor r14,r12 4626 xor r15,rax 4627 xor r14,r13 4628 lea r10,[r15*1+r10] 4629 mov r12,rdx 4630 vmovdqa YMMWORD[rsp],ymm10 4631 vpalignr ymm8,ymm6,ymm5,8 4632 add r9,QWORD[((32+256))+rsp] 4633 and r12,rcx 4634 rorx r13,rcx,41 4635 vpalignr ymm11,ymm2,ymm1,8 4636 rorx r15,rcx,18 4637 lea r10,[r14*1+r10] 4638 lea r9,[r12*1+r9] 4639 vpsrlq ymm10,ymm8,1 4640 andn r12,rcx,r8 4641 xor r13,r15 4642 rorx r14,rcx,14 4643 vpaddq ymm5,ymm5,ymm11 4644 vpsrlq ymm11,ymm8,7 4645 lea r9,[r12*1+r9] 4646 xor r13,r14 4647 mov r15,r10 4648 vpsllq ymm9,ymm8,56 4649 vpxor ymm8,ymm11,ymm10 4650 rorx r12,r10,39 4651 lea r9,[r13*1+r9] 4652 xor r15,r11 4653 vpsrlq ymm10,ymm10,7 4654 vpxor ymm8,ymm8,ymm9 4655 rorx r14,r10,34 4656 rorx r13,r10,28 4657 lea rbx,[r9*1+rbx] 4658 vpsllq ymm9,ymm9,7 4659 vpxor ymm8,ymm8,ymm10 4660 and rdi,r15 4661 xor r14,r12 4662 xor rdi,r11 4663 vpsrlq ymm11,ymm4,6 4664 vpxor ymm8,ymm8,ymm9 4665 xor r14,r13 4666 lea r9,[rdi*1+r9] 4667 mov r12,rcx 4668 vpsllq ymm10,ymm4,3 4669 vpaddq ymm5,ymm5,ymm8 4670 add r8,QWORD[((40+256))+rsp] 4671 and r12,rbx 4672 rorx r13,rbx,41 4673 vpsrlq ymm9,ymm4,19 4674 vpxor ymm11,ymm11,ymm10 4675 rorx rdi,rbx,18 4676 lea r9,[r14*1+r9] 4677 lea r8,[r12*1+r8] 4678 vpsllq ymm10,ymm10,42 4679 vpxor ymm11,ymm11,ymm9 4680 andn r12,rbx,rdx 4681 xor r13,rdi 4682 rorx r14,rbx,14 4683 vpsrlq ymm9,ymm9,42 4684 vpxor ymm11,ymm11,ymm10 4685 lea r8,[r12*1+r8] 4686 xor r13,r14 4687 mov rdi,r9 4688 vpxor ymm11,ymm11,ymm9 4689 rorx r12,r9,39 4690 lea r8,[r13*1+r8] 4691 xor rdi,r10 4692 vpaddq ymm5,ymm5,ymm11 4693 rorx r14,r9,34 4694 rorx r13,r9,28 4695 lea rax,[r8*1+rax] 4696 vpaddq ymm10,ymm5,YMMWORD[32+rbp] 4697 and r15,rdi 4698 xor r14,r12 4699 xor r15,r10 4700 xor r14,r13 4701 lea r8,[r15*1+r8] 4702 mov r12,rbx 4703 vmovdqa YMMWORD[32+rsp],ymm10 4704 vpalignr ymm8,ymm7,ymm6,8 4705 add rdx,QWORD[((64+256))+rsp] 4706 and r12,rax 4707 rorx r13,rax,41 4708 vpalignr ymm11,ymm3,ymm2,8 4709 rorx r15,rax,18 4710 lea r8,[r14*1+r8] 4711 lea rdx,[r12*1+rdx] 4712 vpsrlq ymm10,ymm8,1 4713 andn r12,rax,rcx 4714 xor r13,r15 4715 rorx r14,rax,14 4716 vpaddq ymm6,ymm6,ymm11 4717 vpsrlq ymm11,ymm8,7 4718 lea rdx,[r12*1+rdx] 4719 xor r13,r14 4720 mov r15,r8 4721 vpsllq ymm9,ymm8,56 4722 vpxor ymm8,ymm11,ymm10 4723 rorx r12,r8,39 4724 lea rdx,[r13*1+rdx] 4725 xor r15,r9 4726 vpsrlq ymm10,ymm10,7 4727 vpxor ymm8,ymm8,ymm9 4728 rorx r14,r8,34 4729 rorx r13,r8,28 4730 lea r11,[rdx*1+r11] 4731 vpsllq ymm9,ymm9,7 4732 vpxor ymm8,ymm8,ymm10 4733 and rdi,r15 4734 xor r14,r12 4735 xor rdi,r9 4736 vpsrlq ymm11,ymm5,6 4737 vpxor ymm8,ymm8,ymm9 4738 xor r14,r13 4739 lea rdx,[rdi*1+rdx] 4740 mov r12,rax 4741 vpsllq ymm10,ymm5,3 4742 vpaddq ymm6,ymm6,ymm8 4743 add rcx,QWORD[((72+256))+rsp] 4744 and r12,r11 4745 rorx r13,r11,41 4746 vpsrlq ymm9,ymm5,19 4747 vpxor ymm11,ymm11,ymm10 4748 rorx rdi,r11,18 4749 lea rdx,[r14*1+rdx] 4750 lea rcx,[r12*1+rcx] 4751 vpsllq ymm10,ymm10,42 4752 vpxor ymm11,ymm11,ymm9 4753 andn r12,r11,rbx 4754 xor r13,rdi 4755 rorx r14,r11,14 4756 vpsrlq ymm9,ymm9,42 4757 vpxor ymm11,ymm11,ymm10 4758 lea rcx,[r12*1+rcx] 4759 xor r13,r14 4760 mov rdi,rdx 4761 vpxor ymm11,ymm11,ymm9 4762 rorx r12,rdx,39 4763 lea rcx,[r13*1+rcx] 4764 xor rdi,r8 4765 vpaddq ymm6,ymm6,ymm11 4766 rorx r14,rdx,34 4767 rorx r13,rdx,28 4768 lea r10,[rcx*1+r10] 4769 vpaddq ymm10,ymm6,YMMWORD[64+rbp] 4770 and r15,rdi 4771 xor r14,r12 4772 xor r15,r8 4773 xor r14,r13 4774 lea rcx,[r15*1+rcx] 4775 mov r12,r11 4776 vmovdqa YMMWORD[64+rsp],ymm10 4777 vpalignr ymm8,ymm0,ymm7,8 4778 add rbx,QWORD[((96+256))+rsp] 4779 and r12,r10 4780 rorx r13,r10,41 4781 vpalignr ymm11,ymm4,ymm3,8 4782 rorx r15,r10,18 4783 lea rcx,[r14*1+rcx] 4784 lea rbx,[r12*1+rbx] 4785 vpsrlq ymm10,ymm8,1 4786 andn r12,r10,rax 4787 xor r13,r15 4788 rorx r14,r10,14 4789 vpaddq ymm7,ymm7,ymm11 4790 vpsrlq ymm11,ymm8,7 4791 lea rbx,[r12*1+rbx] 4792 xor r13,r14 4793 mov r15,rcx 4794 vpsllq ymm9,ymm8,56 4795 vpxor ymm8,ymm11,ymm10 4796 rorx r12,rcx,39 4797 lea rbx,[r13*1+rbx] 4798 xor r15,rdx 4799 vpsrlq ymm10,ymm10,7 4800 vpxor ymm8,ymm8,ymm9 4801 rorx r14,rcx,34 4802 rorx r13,rcx,28 4803 lea r9,[rbx*1+r9] 4804 vpsllq ymm9,ymm9,7 4805 vpxor ymm8,ymm8,ymm10 4806 and rdi,r15 4807 xor r14,r12 4808 xor rdi,rdx 4809 vpsrlq ymm11,ymm6,6 4810 vpxor ymm8,ymm8,ymm9 4811 xor r14,r13 4812 lea rbx,[rdi*1+rbx] 4813 mov r12,r10 4814 vpsllq ymm10,ymm6,3 4815 vpaddq ymm7,ymm7,ymm8 4816 add rax,QWORD[((104+256))+rsp] 4817 and r12,r9 4818 rorx r13,r9,41 4819 vpsrlq ymm9,ymm6,19 4820 vpxor ymm11,ymm11,ymm10 4821 rorx rdi,r9,18 4822 lea rbx,[r14*1+rbx] 4823 lea rax,[r12*1+rax] 4824 vpsllq ymm10,ymm10,42 4825 vpxor ymm11,ymm11,ymm9 4826 andn r12,r9,r11 4827 xor r13,rdi 4828 rorx r14,r9,14 4829 vpsrlq ymm9,ymm9,42 4830 vpxor ymm11,ymm11,ymm10 4831 lea rax,[r12*1+rax] 4832 xor r13,r14 4833 mov rdi,rbx 4834 vpxor ymm11,ymm11,ymm9 4835 rorx r12,rbx,39 4836 lea rax,[r13*1+rax] 4837 xor rdi,rcx 4838 vpaddq ymm7,ymm7,ymm11 4839 rorx r14,rbx,34 4840 rorx r13,rbx,28 4841 lea r8,[rax*1+r8] 4842 vpaddq ymm10,ymm7,YMMWORD[96+rbp] 4843 and r15,rdi 4844 xor r14,r12 4845 xor r15,rcx 4846 xor r14,r13 4847 lea rax,[r15*1+rax] 4848 mov r12,r9 4849 vmovdqa YMMWORD[96+rsp],ymm10 4850 lea rbp,[256+rbp] 4851 cmp BYTE[((-121))+rbp],0 4852 jne NEAR $L$avx2_00_47 4853 add r11,QWORD[((0+128))+rsp] 4854 and r12,r8 4855 rorx r13,r8,41 4856 rorx r15,r8,18 4857 lea rax,[r14*1+rax] 4858 lea r11,[r12*1+r11] 4859 andn r12,r8,r10 4860 xor r13,r15 4861 rorx r14,r8,14 4862 lea r11,[r12*1+r11] 4863 xor r13,r14 4864 mov r15,rax 4865 rorx r12,rax,39 4866 lea r11,[r13*1+r11] 4867 xor r15,rbx 4868 rorx r14,rax,34 4869 rorx r13,rax,28 4870 lea rdx,[r11*1+rdx] 4871 and rdi,r15 4872 xor r14,r12 4873 xor rdi,rbx 4874 xor r14,r13 4875 lea r11,[rdi*1+r11] 4876 mov r12,r8 4877 add r10,QWORD[((8+128))+rsp] 4878 and r12,rdx 4879 rorx r13,rdx,41 4880 rorx rdi,rdx,18 4881 lea r11,[r14*1+r11] 4882 lea r10,[r12*1+r10] 4883 andn r12,rdx,r9 4884 xor r13,rdi 4885 rorx r14,rdx,14 4886 lea r10,[r12*1+r10] 4887 xor r13,r14 4888 mov rdi,r11 4889 rorx r12,r11,39 4890 lea r10,[r13*1+r10] 4891 xor rdi,rax 4892 rorx r14,r11,34 4893 rorx r13,r11,28 4894 lea rcx,[r10*1+rcx] 4895 and r15,rdi 4896 xor r14,r12 4897 xor r15,rax 4898 xor r14,r13 4899 lea r10,[r15*1+r10] 4900 mov r12,rdx 4901 add r9,QWORD[((32+128))+rsp] 4902 and r12,rcx 4903 rorx r13,rcx,41 4904 rorx r15,rcx,18 4905 lea r10,[r14*1+r10] 4906 lea r9,[r12*1+r9] 4907 andn r12,rcx,r8 4908 xor r13,r15 4909 rorx r14,rcx,14 4910 lea r9,[r12*1+r9] 4911 xor r13,r14 4912 mov r15,r10 4913 rorx r12,r10,39 4914 lea r9,[r13*1+r9] 4915 xor r15,r11 4916 rorx r14,r10,34 4917 rorx r13,r10,28 4918 lea rbx,[r9*1+rbx] 4919 and rdi,r15 4920 xor r14,r12 4921 xor rdi,r11 4922 xor r14,r13 4923 lea r9,[rdi*1+r9] 4924 mov r12,rcx 4925 add r8,QWORD[((40+128))+rsp] 4926 and r12,rbx 4927 rorx r13,rbx,41 4928 rorx rdi,rbx,18 4929 lea r9,[r14*1+r9] 4930 lea r8,[r12*1+r8] 4931 andn r12,rbx,rdx 4932 xor r13,rdi 4933 rorx r14,rbx,14 4934 lea r8,[r12*1+r8] 4935 xor r13,r14 4936 mov rdi,r9 4937 rorx r12,r9,39 4938 lea r8,[r13*1+r8] 4939 xor rdi,r10 4940 rorx r14,r9,34 4941 rorx r13,r9,28 4942 lea rax,[r8*1+rax] 4943 and r15,rdi 4944 xor r14,r12 4945 xor r15,r10 4946 xor r14,r13 4947 lea r8,[r15*1+r8] 4948 mov r12,rbx 4949 add rdx,QWORD[((64+128))+rsp] 4950 and r12,rax 4951 rorx r13,rax,41 4952 rorx r15,rax,18 4953 lea r8,[r14*1+r8] 4954 lea rdx,[r12*1+rdx] 4955 andn r12,rax,rcx 4956 xor r13,r15 4957 rorx r14,rax,14 4958 lea rdx,[r12*1+rdx] 4959 xor r13,r14 4960 mov r15,r8 4961 rorx r12,r8,39 4962 lea rdx,[r13*1+rdx] 4963 xor r15,r9 4964 rorx r14,r8,34 4965 rorx r13,r8,28 4966 lea r11,[rdx*1+r11] 4967 and rdi,r15 4968 xor r14,r12 4969 xor rdi,r9 4970 xor r14,r13 4971 lea rdx,[rdi*1+rdx] 4972 mov r12,rax 4973 add rcx,QWORD[((72+128))+rsp] 4974 and r12,r11 4975 rorx r13,r11,41 4976 rorx rdi,r11,18 4977 lea rdx,[r14*1+rdx] 4978 lea rcx,[r12*1+rcx] 4979 andn r12,r11,rbx 4980 xor r13,rdi 4981 rorx r14,r11,14 4982 lea rcx,[r12*1+rcx] 4983 xor r13,r14 4984 mov rdi,rdx 4985 rorx r12,rdx,39 4986 lea rcx,[r13*1+rcx] 4987 xor rdi,r8 4988 rorx r14,rdx,34 4989 rorx r13,rdx,28 4990 lea r10,[rcx*1+r10] 4991 and r15,rdi 4992 xor r14,r12 4993 xor r15,r8 4994 xor r14,r13 4995 lea rcx,[r15*1+rcx] 4996 mov r12,r11 4997 add rbx,QWORD[((96+128))+rsp] 4998 and r12,r10 4999 rorx r13,r10,41 5000 rorx r15,r10,18 5001 lea rcx,[r14*1+rcx] 5002 lea rbx,[r12*1+rbx] 5003 andn r12,r10,rax 5004 xor r13,r15 5005 rorx r14,r10,14 5006 lea rbx,[r12*1+rbx] 5007 xor r13,r14 5008 mov r15,rcx 5009 rorx r12,rcx,39 5010 lea rbx,[r13*1+rbx] 5011 xor r15,rdx 5012 rorx r14,rcx,34 5013 rorx r13,rcx,28 5014 lea r9,[rbx*1+r9] 5015 and rdi,r15 5016 xor r14,r12 5017 xor rdi,rdx 5018 xor r14,r13 5019 lea rbx,[rdi*1+rbx] 5020 mov r12,r10 5021 add rax,QWORD[((104+128))+rsp] 5022 and r12,r9 5023 rorx r13,r9,41 5024 rorx rdi,r9,18 5025 lea rbx,[r14*1+rbx] 5026 lea rax,[r12*1+rax] 5027 andn r12,r9,r11 5028 xor r13,rdi 5029 rorx r14,r9,14 5030 lea rax,[r12*1+rax] 5031 xor r13,r14 5032 mov rdi,rbx 5033 rorx r12,rbx,39 5034 lea rax,[r13*1+rax] 5035 xor rdi,rcx 5036 rorx r14,rbx,34 5037 rorx r13,rbx,28 5038 lea r8,[rax*1+r8] 5039 and r15,rdi 5040 xor r14,r12 5041 xor r15,rcx 5042 xor r14,r13 5043 lea rax,[r15*1+rax] 5044 mov r12,r9 5045 add r11,QWORD[rsp] 5046 and r12,r8 5047 rorx r13,r8,41 5048 rorx r15,r8,18 5049 lea rax,[r14*1+rax] 5050 lea r11,[r12*1+r11] 5051 andn r12,r8,r10 5052 xor r13,r15 5053 rorx r14,r8,14 5054 lea r11,[r12*1+r11] 5055 xor r13,r14 5056 mov r15,rax 5057 rorx r12,rax,39 5058 lea r11,[r13*1+r11] 5059 xor r15,rbx 5060 rorx r14,rax,34 5061 rorx r13,rax,28 5062 lea rdx,[r11*1+rdx] 5063 and rdi,r15 5064 xor r14,r12 5065 xor rdi,rbx 5066 xor r14,r13 5067 lea r11,[rdi*1+r11] 5068 mov r12,r8 5069 add r10,QWORD[8+rsp] 5070 and r12,rdx 5071 rorx r13,rdx,41 5072 rorx rdi,rdx,18 5073 lea r11,[r14*1+r11] 5074 lea r10,[r12*1+r10] 5075 andn r12,rdx,r9 5076 xor r13,rdi 5077 rorx r14,rdx,14 5078 lea r10,[r12*1+r10] 5079 xor r13,r14 5080 mov rdi,r11 5081 rorx r12,r11,39 5082 lea r10,[r13*1+r10] 5083 xor rdi,rax 5084 rorx r14,r11,34 5085 rorx r13,r11,28 5086 lea rcx,[r10*1+rcx] 5087 and r15,rdi 5088 xor r14,r12 5089 xor r15,rax 5090 xor r14,r13 5091 lea r10,[r15*1+r10] 5092 mov r12,rdx 5093 add r9,QWORD[32+rsp] 5094 and r12,rcx 5095 rorx r13,rcx,41 5096 rorx r15,rcx,18 5097 lea r10,[r14*1+r10] 5098 lea r9,[r12*1+r9] 5099 andn r12,rcx,r8 5100 xor r13,r15 5101 rorx r14,rcx,14 5102 lea r9,[r12*1+r9] 5103 xor r13,r14 5104 mov r15,r10 5105 rorx r12,r10,39 5106 lea r9,[r13*1+r9] 5107 xor r15,r11 5108 rorx r14,r10,34 5109 rorx r13,r10,28 5110 lea rbx,[r9*1+rbx] 5111 and rdi,r15 5112 xor r14,r12 5113 xor rdi,r11 5114 xor r14,r13 5115 lea r9,[rdi*1+r9] 5116 mov r12,rcx 5117 add r8,QWORD[40+rsp] 5118 and r12,rbx 5119 rorx r13,rbx,41 5120 rorx rdi,rbx,18 5121 lea r9,[r14*1+r9] 5122 lea r8,[r12*1+r8] 5123 andn r12,rbx,rdx 5124 xor r13,rdi 5125 rorx r14,rbx,14 5126 lea r8,[r12*1+r8] 5127 xor r13,r14 5128 mov rdi,r9 5129 rorx r12,r9,39 5130 lea r8,[r13*1+r8] 5131 xor rdi,r10 5132 rorx r14,r9,34 5133 rorx r13,r9,28 5134 lea rax,[r8*1+rax] 5135 and r15,rdi 5136 xor r14,r12 5137 xor r15,r10 5138 xor r14,r13 5139 lea r8,[r15*1+r8] 5140 mov r12,rbx 5141 add rdx,QWORD[64+rsp] 5142 and r12,rax 5143 rorx r13,rax,41 5144 rorx r15,rax,18 5145 lea r8,[r14*1+r8] 5146 lea rdx,[r12*1+rdx] 5147 andn r12,rax,rcx 5148 xor r13,r15 5149 rorx r14,rax,14 5150 lea rdx,[r12*1+rdx] 5151 xor r13,r14 5152 mov r15,r8 5153 rorx r12,r8,39 5154 lea rdx,[r13*1+rdx] 5155 xor r15,r9 5156 rorx r14,r8,34 5157 rorx r13,r8,28 5158 lea r11,[rdx*1+r11] 5159 and rdi,r15 5160 xor r14,r12 5161 xor rdi,r9 5162 xor r14,r13 5163 lea rdx,[rdi*1+rdx] 5164 mov r12,rax 5165 add rcx,QWORD[72+rsp] 5166 and r12,r11 5167 rorx r13,r11,41 5168 rorx rdi,r11,18 5169 lea rdx,[r14*1+rdx] 5170 lea rcx,[r12*1+rcx] 5171 andn r12,r11,rbx 5172 xor r13,rdi 5173 rorx r14,r11,14 5174 lea rcx,[r12*1+rcx] 5175 xor r13,r14 5176 mov rdi,rdx 5177 rorx r12,rdx,39 5178 lea rcx,[r13*1+rcx] 5179 xor rdi,r8 5180 rorx r14,rdx,34 5181 rorx r13,rdx,28 5182 lea r10,[rcx*1+r10] 5183 and r15,rdi 5184 xor r14,r12 5185 xor r15,r8 5186 xor r14,r13 5187 lea rcx,[r15*1+rcx] 5188 mov r12,r11 5189 add rbx,QWORD[96+rsp] 5190 and r12,r10 5191 rorx r13,r10,41 5192 rorx r15,r10,18 5193 lea rcx,[r14*1+rcx] 5194 lea rbx,[r12*1+rbx] 5195 andn r12,r10,rax 5196 xor r13,r15 5197 rorx r14,r10,14 5198 lea rbx,[r12*1+rbx] 5199 xor r13,r14 5200 mov r15,rcx 5201 rorx r12,rcx,39 5202 lea rbx,[r13*1+rbx] 5203 xor r15,rdx 5204 rorx r14,rcx,34 5205 rorx r13,rcx,28 5206 lea r9,[rbx*1+r9] 5207 and rdi,r15 5208 xor r14,r12 5209 xor rdi,rdx 5210 xor r14,r13 5211 lea rbx,[rdi*1+rbx] 5212 mov r12,r10 5213 add rax,QWORD[104+rsp] 5214 and r12,r9 5215 rorx r13,r9,41 5216 rorx rdi,r9,18 5217 lea rbx,[r14*1+rbx] 5218 lea rax,[r12*1+rax] 5219 andn r12,r9,r11 5220 xor r13,rdi 5221 rorx r14,r9,14 5222 lea rax,[r12*1+rax] 5223 xor r13,r14 5224 mov rdi,rbx 5225 rorx r12,rbx,39 5226 lea rax,[r13*1+rax] 5227 xor rdi,rcx 5228 rorx r14,rbx,34 5229 rorx r13,rbx,28 5230 lea r8,[rax*1+r8] 5231 and r15,rdi 5232 xor r14,r12 5233 xor r15,rcx 5234 xor r14,r13 5235 lea rax,[r15*1+rax] 5236 mov r12,r9 5237 mov rdi,QWORD[1280+rsp] 5238 add rax,r14 5239 5240 lea rbp,[1152+rsp] 5241 5242 add rax,QWORD[rdi] 5243 add rbx,QWORD[8+rdi] 5244 add rcx,QWORD[16+rdi] 5245 add rdx,QWORD[24+rdi] 5246 add r8,QWORD[32+rdi] 5247 add r9,QWORD[40+rdi] 5248 add r10,QWORD[48+rdi] 5249 add r11,QWORD[56+rdi] 5250 5251 mov QWORD[rdi],rax 5252 mov QWORD[8+rdi],rbx 5253 mov QWORD[16+rdi],rcx 5254 mov QWORD[24+rdi],rdx 5255 mov QWORD[32+rdi],r8 5256 mov QWORD[40+rdi],r9 5257 mov QWORD[48+rdi],r10 5258 mov QWORD[56+rdi],r11 5259 5260 cmp rsi,QWORD[144+rbp] 5261 je NEAR $L$done_avx2 5262 5263 xor r14,r14 5264 mov rdi,rbx 5265 xor rdi,rcx 5266 mov r12,r9 5267 jmp NEAR $L$ower_avx2 5268 ALIGN 16 5269 $L$ower_avx2: 5270 add r11,QWORD[((0+16))+rbp] 5271 and r12,r8 5272 rorx r13,r8,41 5273 rorx r15,r8,18 5274 lea rax,[r14*1+rax] 5275 lea r11,[r12*1+r11] 5276 andn r12,r8,r10 5277 xor r13,r15 5278 rorx r14,r8,14 5279 lea r11,[r12*1+r11] 5280 xor r13,r14 5281 mov r15,rax 5282 rorx r12,rax,39 5283 lea r11,[r13*1+r11] 5284 xor r15,rbx 5285 rorx r14,rax,34 5286 rorx r13,rax,28 5287 lea rdx,[r11*1+rdx] 5288 and rdi,r15 5289 xor r14,r12 5290 xor rdi,rbx 5291 xor r14,r13 5292 lea r11,[rdi*1+r11] 5293 mov r12,r8 5294 add r10,QWORD[((8+16))+rbp] 5295 and r12,rdx 5296 rorx r13,rdx,41 5297 rorx rdi,rdx,18 5298 lea r11,[r14*1+r11] 5299 lea r10,[r12*1+r10] 5300 andn r12,rdx,r9 5301 xor r13,rdi 5302 rorx r14,rdx,14 5303 lea r10,[r12*1+r10] 5304 xor r13,r14 5305 mov rdi,r11 5306 rorx r12,r11,39 5307 lea r10,[r13*1+r10] 5308 xor rdi,rax 5309 rorx r14,r11,34 5310 rorx r13,r11,28 5311 lea rcx,[r10*1+rcx] 5312 and r15,rdi 5313 xor r14,r12 5314 xor r15,rax 5315 xor r14,r13 5316 lea r10,[r15*1+r10] 5317 mov r12,rdx 5318 add r9,QWORD[((32+16))+rbp] 5319 and r12,rcx 5320 rorx r13,rcx,41 5321 rorx r15,rcx,18 5322 lea r10,[r14*1+r10] 5323 lea r9,[r12*1+r9] 5324 andn r12,rcx,r8 5325 xor r13,r15 5326 rorx r14,rcx,14 5327 lea r9,[r12*1+r9] 5328 xor r13,r14 5329 mov r15,r10 5330 rorx r12,r10,39 5331 lea r9,[r13*1+r9] 5332 xor r15,r11 5333 rorx r14,r10,34 5334 rorx r13,r10,28 5335 lea rbx,[r9*1+rbx] 5336 and rdi,r15 5337 xor r14,r12 5338 xor rdi,r11 5339 xor r14,r13 5340 lea r9,[rdi*1+r9] 5341 mov r12,rcx 5342 add r8,QWORD[((40+16))+rbp] 5343 and r12,rbx 5344 rorx r13,rbx,41 5345 rorx rdi,rbx,18 5346 lea r9,[r14*1+r9] 5347 lea r8,[r12*1+r8] 5348 andn r12,rbx,rdx 5349 xor r13,rdi 5350 rorx r14,rbx,14 5351 lea r8,[r12*1+r8] 5352 xor r13,r14 5353 mov rdi,r9 5354 rorx r12,r9,39 5355 lea r8,[r13*1+r8] 5356 xor rdi,r10 5357 rorx r14,r9,34 5358 rorx r13,r9,28 5359 lea rax,[r8*1+rax] 5360 and r15,rdi 5361 xor r14,r12 5362 xor r15,r10 5363 xor r14,r13 5364 lea r8,[r15*1+r8] 5365 mov r12,rbx 5366 add rdx,QWORD[((64+16))+rbp] 5367 and r12,rax 5368 rorx r13,rax,41 5369 rorx r15,rax,18 5370 lea r8,[r14*1+r8] 5371 lea rdx,[r12*1+rdx] 5372 andn r12,rax,rcx 5373 xor r13,r15 5374 rorx r14,rax,14 5375 lea rdx,[r12*1+rdx] 5376 xor r13,r14 5377 mov r15,r8 5378 rorx r12,r8,39 5379 lea rdx,[r13*1+rdx] 5380 xor r15,r9 5381 rorx r14,r8,34 5382 rorx r13,r8,28 5383 lea r11,[rdx*1+r11] 5384 and rdi,r15 5385 xor r14,r12 5386 xor rdi,r9 5387 xor r14,r13 5388 lea rdx,[rdi*1+rdx] 5389 mov r12,rax 5390 add rcx,QWORD[((72+16))+rbp] 5391 and r12,r11 5392 rorx r13,r11,41 5393 rorx rdi,r11,18 5394 lea rdx,[r14*1+rdx] 5395 lea rcx,[r12*1+rcx] 5396 andn r12,r11,rbx 5397 xor r13,rdi 5398 rorx r14,r11,14 5399 lea rcx,[r12*1+rcx] 5400 xor r13,r14 5401 mov rdi,rdx 5402 rorx r12,rdx,39 5403 lea rcx,[r13*1+rcx] 5404 xor rdi,r8 5405 rorx r14,rdx,34 5406 rorx r13,rdx,28 5407 lea r10,[rcx*1+r10] 5408 and r15,rdi 5409 xor r14,r12 5410 xor r15,r8 5411 xor r14,r13 5412 lea rcx,[r15*1+rcx] 5413 mov r12,r11 5414 add rbx,QWORD[((96+16))+rbp] 5415 and r12,r10 5416 rorx r13,r10,41 5417 rorx r15,r10,18 5418 lea rcx,[r14*1+rcx] 5419 lea rbx,[r12*1+rbx] 5420 andn r12,r10,rax 5421 xor r13,r15 5422 rorx r14,r10,14 5423 lea rbx,[r12*1+rbx] 5424 xor r13,r14 5425 mov r15,rcx 5426 rorx r12,rcx,39 5427 lea rbx,[r13*1+rbx] 5428 xor r15,rdx 5429 rorx r14,rcx,34 5430 rorx r13,rcx,28 5431 lea r9,[rbx*1+r9] 5432 and rdi,r15 5433 xor r14,r12 5434 xor rdi,rdx 5435 xor r14,r13 5436 lea rbx,[rdi*1+rbx] 5437 mov r12,r10 5438 add rax,QWORD[((104+16))+rbp] 5439 and r12,r9 5440 rorx r13,r9,41 5441 rorx rdi,r9,18 5442 lea rbx,[r14*1+rbx] 5443 lea rax,[r12*1+rax] 5444 andn r12,r9,r11 5445 xor r13,rdi 5446 rorx r14,r9,14 5447 lea rax,[r12*1+rax] 5448 xor r13,r14 5449 mov rdi,rbx 5450 rorx r12,rbx,39 5451 lea rax,[r13*1+rax] 5452 xor rdi,rcx 5453 rorx r14,rbx,34 5454 rorx r13,rbx,28 5455 lea r8,[rax*1+r8] 5456 and r15,rdi 5457 xor r14,r12 5458 xor r15,rcx 5459 xor r14,r13 5460 lea rax,[r15*1+rax] 5461 mov r12,r9 5462 lea rbp,[((-128))+rbp] 5463 cmp rbp,rsp 5464 jae NEAR $L$ower_avx2 5465 5466 mov rdi,QWORD[1280+rsp] 5467 add rax,r14 5468 5469 lea rsp,[1152+rsp] 5470 5471 5472 5473 add rax,QWORD[rdi] 5474 add rbx,QWORD[8+rdi] 5475 add rcx,QWORD[16+rdi] 5476 add rdx,QWORD[24+rdi] 5477 add r8,QWORD[32+rdi] 5478 add r9,QWORD[40+rdi] 5479 lea rsi,[256+rsi] 5480 add r10,QWORD[48+rdi] 5481 mov r12,rsi 5482 add r11,QWORD[56+rdi] 5483 cmp rsi,QWORD[((128+16))+rsp] 5484 5485 mov QWORD[rdi],rax 5486 cmove r12,rsp 5487 mov QWORD[8+rdi],rbx 5488 mov QWORD[16+rdi],rcx 5489 mov QWORD[24+rdi],rdx 5490 mov QWORD[32+rdi],r8 5491 mov QWORD[40+rdi],r9 5492 mov QWORD[48+rdi],r10 5493 mov QWORD[56+rdi],r11 5494 5495 jbe NEAR $L$oop_avx2 5496 lea rbp,[rsp] 5497 5498 5499 5500 5501 $L$done_avx2: 5502 mov rsi,QWORD[152+rbp] 5503 5504 vzeroupper 5505 movaps xmm6,XMMWORD[((128+32))+rbp] 5506 movaps xmm7,XMMWORD[((128+48))+rbp] 5507 movaps xmm8,XMMWORD[((128+64))+rbp] 5508 movaps xmm9,XMMWORD[((128+80))+rbp] 5509 movaps xmm10,XMMWORD[((128+96))+rbp] 5510 movaps xmm11,XMMWORD[((128+112))+rbp] 5511 mov r15,QWORD[((-48))+rsi] 5512 5513 mov r14,QWORD[((-40))+rsi] 5514 5515 mov r13,QWORD[((-32))+rsi] 5516 5517 mov r12,QWORD[((-24))+rsi] 5518 5519 mov rbp,QWORD[((-16))+rsi] 5520 5521 mov rbx,QWORD[((-8))+rsi] 5522 5523 lea rsp,[rsi] 5524 5525 $L$epilogue_avx2: 5526 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 5527 mov rsi,QWORD[16+rsp] 5528 DB 0F3h,0C3h ;repret 5529 5530 $L$SEH_end_sha512_block_data_order_avx2: 1822 5531 EXTERN __imp_RtlVirtualUnwind 1823 5532 … … 1852 5561 cmp rbx,r10 1853 5562 jae NEAR $L$in_prologue 5563 lea r10,[$L$avx2_shortcut] 5564 cmp rbx,r10 5565 jb NEAR $L$not_in_avx2 5566 5567 and rax,-256*8 5568 add rax,1152 5569 $L$not_in_avx2: 1854 5570 mov rsi,rax 1855 5571 mov rax,QWORD[((128+24))+rax] … … 1921 5637 DD $L$SEH_end_sha512_block_data_order wrt ..imagebase 1922 5638 DD $L$SEH_info_sha512_block_data_order wrt ..imagebase 5639 DD $L$SEH_begin_sha512_block_data_order_xop wrt ..imagebase 5640 DD $L$SEH_end_sha512_block_data_order_xop wrt ..imagebase 5641 DD $L$SEH_info_sha512_block_data_order_xop wrt ..imagebase 5642 DD $L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase 5643 DD $L$SEH_end_sha512_block_data_order_avx wrt ..imagebase 5644 DD $L$SEH_info_sha512_block_data_order_avx wrt ..imagebase 5645 DD $L$SEH_begin_sha512_block_data_order_avx2 wrt ..imagebase 5646 DD $L$SEH_end_sha512_block_data_order_avx2 wrt ..imagebase 5647 DD $L$SEH_info_sha512_block_data_order_avx2 wrt ..imagebase 1923 5648 section .xdata rdata align=8 1924 5649 ALIGN 8 … … 1927 5652 DD se_handler wrt ..imagebase 1928 5653 DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase 5654 $L$SEH_info_sha512_block_data_order_xop: 5655 DB 9,0,0,0 5656 DD se_handler wrt ..imagebase 5657 DD $L$prologue_xop wrt ..imagebase,$L$epilogue_xop wrt ..imagebase 5658 $L$SEH_info_sha512_block_data_order_avx: 5659 DB 9,0,0,0 5660 DD se_handler wrt ..imagebase 5661 DD $L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase 5662 $L$SEH_info_sha512_block_data_order_avx2: 5663 DB 9,0,0,0 5664 DD se_handler wrt ..imagebase 5665 DD $L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/x25519-x86_64.S
r97373 r99371 410 410 411 411 $L$SEH_end_x25519_fe51_mul121666: 412 EXTERN OPENSSL_ia32cap_P 412 413 global x25519_fe64_eligible 413 414 … … 415 416 x25519_fe64_eligible: 416 417 418 mov ecx,DWORD[((OPENSSL_ia32cap_P+8))] 417 419 xor eax,eax 420 and ecx,0x80100 421 cmp ecx,0x80100 422 cmove eax,ecx 418 423 DB 0F3h,0C3h ;repret 419 424 … … 422 427 global x25519_fe64_mul 423 428 429 ALIGN 32 430 x25519_fe64_mul: 431 mov QWORD[8+rsp],rdi ;WIN64 prologue 432 mov QWORD[16+rsp],rsi 433 mov rax,rsp 434 $L$SEH_begin_x25519_fe64_mul: 435 mov rdi,rcx 436 mov rsi,rdx 437 mov rdx,r8 438 439 440 441 push rbp 442 443 push rbx 444 445 push r12 446 447 push r13 448 449 push r14 450 451 push r15 452 453 push rdi 454 455 lea rsp,[((-16))+rsp] 456 457 $L$fe64_mul_body: 458 459 mov rax,rdx 460 mov rbp,QWORD[rdx] 461 mov rdx,QWORD[rsi] 462 mov rcx,QWORD[8+rax] 463 mov r14,QWORD[16+rax] 464 mov r15,QWORD[24+rax] 465 466 mulx rax,r8,rbp 467 xor edi,edi 468 mulx rbx,r9,rcx 469 adcx r9,rax 470 mulx rax,r10,r14 471 adcx r10,rbx 472 mulx r12,r11,r15 473 mov rdx,QWORD[8+rsi] 474 adcx r11,rax 475 mov QWORD[rsp],r14 476 adcx r12,rdi 477 478 mulx rbx,rax,rbp 479 adox r9,rax 480 adcx r10,rbx 481 mulx rbx,rax,rcx 482 adox r10,rax 483 adcx r11,rbx 484 mulx rbx,rax,r14 485 adox r11,rax 486 adcx r12,rbx 487 mulx r13,rax,r15 488 mov rdx,QWORD[16+rsi] 489 adox r12,rax 490 adcx r13,rdi 491 adox r13,rdi 492 493 mulx rbx,rax,rbp 494 adcx r10,rax 495 adox r11,rbx 496 mulx rbx,rax,rcx 497 adcx r11,rax 498 adox r12,rbx 499 mulx rbx,rax,r14 500 adcx r12,rax 501 adox r13,rbx 502 mulx r14,rax,r15 503 mov rdx,QWORD[24+rsi] 504 adcx r13,rax 505 adox r14,rdi 506 adcx r14,rdi 507 508 mulx rbx,rax,rbp 509 adox r11,rax 510 adcx r12,rbx 511 mulx rbx,rax,rcx 512 adox r12,rax 513 adcx r13,rbx 514 mulx rbx,rax,QWORD[rsp] 515 adox r13,rax 516 adcx r14,rbx 517 mulx r15,rax,r15 518 mov edx,38 519 adox r14,rax 520 adcx r15,rdi 521 adox r15,rdi 522 523 jmp NEAR $L$reduce64 524 $L$fe64_mul_epilogue: 525 526 $L$SEH_end_x25519_fe64_mul: 527 424 528 global x25519_fe64_sqr 529 530 ALIGN 32 531 x25519_fe64_sqr: 532 mov QWORD[8+rsp],rdi ;WIN64 prologue 533 mov QWORD[16+rsp],rsi 534 mov rax,rsp 535 $L$SEH_begin_x25519_fe64_sqr: 536 mov rdi,rcx 537 mov rsi,rdx 538 539 540 541 push rbp 542 543 push rbx 544 545 push r12 546 547 push r13 548 549 push r14 550 551 push r15 552 553 push rdi 554 555 lea rsp,[((-16))+rsp] 556 557 $L$fe64_sqr_body: 558 559 mov rdx,QWORD[rsi] 560 mov rcx,QWORD[8+rsi] 561 mov rbp,QWORD[16+rsi] 562 mov rsi,QWORD[24+rsi] 563 564 565 mulx r15,r8,rdx 566 mulx rax,r9,rcx 567 xor edi,edi 568 mulx rbx,r10,rbp 569 adcx r10,rax 570 mulx r12,r11,rsi 571 mov rdx,rcx 572 adcx r11,rbx 573 adcx r12,rdi 574 575 576 mulx rbx,rax,rbp 577 adox r11,rax 578 adcx r12,rbx 579 mulx r13,rax,rsi 580 mov rdx,rbp 581 adox r12,rax 582 adcx r13,rdi 583 584 585 mulx r14,rax,rsi 586 mov rdx,rcx 587 adox r13,rax 588 adcx r14,rdi 589 adox r14,rdi 590 591 adcx r9,r9 592 adox r9,r15 593 adcx r10,r10 594 mulx rbx,rax,rdx 595 mov rdx,rbp 596 adcx r11,r11 597 adox r10,rax 598 adcx r12,r12 599 adox r11,rbx 600 mulx rbx,rax,rdx 601 mov rdx,rsi 602 adcx r13,r13 603 adox r12,rax 604 adcx r14,r14 605 adox r13,rbx 606 mulx r15,rax,rdx 607 mov edx,38 608 adox r14,rax 609 adcx r15,rdi 610 adox r15,rdi 611 jmp NEAR $L$reduce64 612 613 ALIGN 32 614 $L$reduce64: 615 mulx rbx,rax,r12 616 adcx r8,rax 617 adox r9,rbx 618 mulx rbx,rax,r13 619 adcx r9,rax 620 adox r10,rbx 621 mulx rbx,rax,r14 622 adcx r10,rax 623 adox r11,rbx 624 mulx r12,rax,r15 625 adcx r11,rax 626 adox r12,rdi 627 adcx r12,rdi 628 629 mov rdi,QWORD[16+rsp] 630 imul r12,rdx 631 632 add r8,r12 633 adc r9,0 634 adc r10,0 635 adc r11,0 636 637 sbb rax,rax 638 and rax,38 639 640 add r8,rax 641 mov QWORD[8+rdi],r9 642 mov QWORD[16+rdi],r10 643 mov QWORD[24+rdi],r11 644 mov QWORD[rdi],r8 645 646 mov r15,QWORD[24+rsp] 647 648 mov r14,QWORD[32+rsp] 649 650 mov r13,QWORD[40+rsp] 651 652 mov r12,QWORD[48+rsp] 653 654 mov rbx,QWORD[56+rsp] 655 656 mov rbp,QWORD[64+rsp] 657 658 lea rsp,[72+rsp] 659 660 $L$fe64_sqr_epilogue: 661 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 662 mov rsi,QWORD[16+rsp] 663 DB 0F3h,0C3h ;repret 664 665 $L$SEH_end_x25519_fe64_sqr: 666 425 667 global x25519_fe64_mul121666 668 669 ALIGN 32 670 x25519_fe64_mul121666: 671 mov QWORD[8+rsp],rdi ;WIN64 prologue 672 mov QWORD[16+rsp],rsi 673 mov rax,rsp 674 $L$SEH_begin_x25519_fe64_mul121666: 675 mov rdi,rcx 676 mov rsi,rdx 677 678 679 $L$fe64_mul121666_body: 680 681 mov edx,121666 682 mulx rcx,r8,QWORD[rsi] 683 mulx rax,r9,QWORD[8+rsi] 684 add r9,rcx 685 mulx rcx,r10,QWORD[16+rsi] 686 adc r10,rax 687 mulx rax,r11,QWORD[24+rsi] 688 adc r11,rcx 689 adc rax,0 690 691 imul rax,rax,38 692 693 add r8,rax 694 adc r9,0 695 adc r10,0 696 adc r11,0 697 698 sbb rax,rax 699 and rax,38 700 701 add r8,rax 702 mov QWORD[8+rdi],r9 703 mov QWORD[16+rdi],r10 704 mov QWORD[24+rdi],r11 705 mov QWORD[rdi],r8 706 707 $L$fe64_mul121666_epilogue: 708 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 709 mov rsi,QWORD[16+rsp] 710 DB 0F3h,0C3h ;repret 711 712 $L$SEH_end_x25519_fe64_mul121666: 713 426 714 global x25519_fe64_add 715 716 ALIGN 32 717 x25519_fe64_add: 718 mov QWORD[8+rsp],rdi ;WIN64 prologue 719 mov QWORD[16+rsp],rsi 720 mov rax,rsp 721 $L$SEH_begin_x25519_fe64_add: 722 mov rdi,rcx 723 mov rsi,rdx 724 mov rdx,r8 725 726 727 $L$fe64_add_body: 728 729 mov r8,QWORD[rsi] 730 mov r9,QWORD[8+rsi] 731 mov r10,QWORD[16+rsi] 732 mov r11,QWORD[24+rsi] 733 734 add r8,QWORD[rdx] 735 adc r9,QWORD[8+rdx] 736 adc r10,QWORD[16+rdx] 737 adc r11,QWORD[24+rdx] 738 739 sbb rax,rax 740 and rax,38 741 742 add r8,rax 743 adc r9,0 744 adc r10,0 745 mov QWORD[8+rdi],r9 746 adc r11,0 747 mov QWORD[16+rdi],r10 748 sbb rax,rax 749 mov QWORD[24+rdi],r11 750 and rax,38 751 752 add r8,rax 753 mov QWORD[rdi],r8 754 755 $L$fe64_add_epilogue: 756 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 757 mov rsi,QWORD[16+rsp] 758 DB 0F3h,0C3h ;repret 759 760 $L$SEH_end_x25519_fe64_add: 761 427 762 global x25519_fe64_sub 763 764 ALIGN 32 765 x25519_fe64_sub: 766 mov QWORD[8+rsp],rdi ;WIN64 prologue 767 mov QWORD[16+rsp],rsi 768 mov rax,rsp 769 $L$SEH_begin_x25519_fe64_sub: 770 mov rdi,rcx 771 mov rsi,rdx 772 mov rdx,r8 773 774 775 $L$fe64_sub_body: 776 777 mov r8,QWORD[rsi] 778 mov r9,QWORD[8+rsi] 779 mov r10,QWORD[16+rsi] 780 mov r11,QWORD[24+rsi] 781 782 sub r8,QWORD[rdx] 783 sbb r9,QWORD[8+rdx] 784 sbb r10,QWORD[16+rdx] 785 sbb r11,QWORD[24+rdx] 786 787 sbb rax,rax 788 and rax,38 789 790 sub r8,rax 791 sbb r9,0 792 sbb r10,0 793 mov QWORD[8+rdi],r9 794 sbb r11,0 795 mov QWORD[16+rdi],r10 796 sbb rax,rax 797 mov QWORD[24+rdi],r11 798 and rax,38 799 800 sub r8,rax 801 mov QWORD[rdi],r8 802 803 $L$fe64_sub_epilogue: 804 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 805 mov rsi,QWORD[16+rsp] 806 DB 0F3h,0C3h ;repret 807 808 $L$SEH_end_x25519_fe64_sub: 809 428 810 global x25519_fe64_tobytes 429 x25519_fe64_mul: 430 x25519_fe64_sqr: 431 x25519_fe64_mul121666: 432 x25519_fe64_add: 433 x25519_fe64_sub: 811 812 ALIGN 32 434 813 x25519_fe64_tobytes: 435 436 DB 0x0f,0x0b 814 mov QWORD[8+rsp],rdi ;WIN64 prologue 815 mov QWORD[16+rsp],rsi 816 mov rax,rsp 817 $L$SEH_begin_x25519_fe64_tobytes: 818 mov rdi,rcx 819 mov rsi,rdx 820 821 822 $L$fe64_to_body: 823 824 mov r8,QWORD[rsi] 825 mov r9,QWORD[8+rsi] 826 mov r10,QWORD[16+rsi] 827 mov r11,QWORD[24+rsi] 828 829 830 lea rax,[r11*1+r11] 831 sar r11,63 832 shr rax,1 833 and r11,19 834 add r11,19 835 836 add r8,r11 837 adc r9,0 838 adc r10,0 839 adc rax,0 840 841 lea r11,[rax*1+rax] 842 sar rax,63 843 shr r11,1 844 not rax 845 and rax,19 846 847 sub r8,rax 848 sbb r9,0 849 sbb r10,0 850 sbb r11,0 851 852 mov QWORD[rdi],r8 853 mov QWORD[8+rdi],r9 854 mov QWORD[16+rdi],r10 855 mov QWORD[24+rdi],r11 856 857 $L$fe64_to_epilogue: 858 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 859 mov rsi,QWORD[16+rsp] 437 860 DB 0F3h,0C3h ;repret 438 861 439 862 $L$SEH_end_x25519_fe64_tobytes: 440 863 DB 88,50,53,53,49,57,32,112,114,105,109,105,116,105,118,101 441 864 DB 115,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82 … … 574 997 DD $L$SEH_end_x25519_fe51_mul121666 wrt ..imagebase 575 998 DD $L$SEH_info_x25519_fe51_mul121666 wrt ..imagebase 999 DD $L$SEH_begin_x25519_fe64_mul wrt ..imagebase 1000 DD $L$SEH_end_x25519_fe64_mul wrt ..imagebase 1001 DD $L$SEH_info_x25519_fe64_mul wrt ..imagebase 1002 1003 DD $L$SEH_begin_x25519_fe64_sqr wrt ..imagebase 1004 DD $L$SEH_end_x25519_fe64_sqr wrt ..imagebase 1005 DD $L$SEH_info_x25519_fe64_sqr wrt ..imagebase 1006 1007 DD $L$SEH_begin_x25519_fe64_mul121666 wrt ..imagebase 1008 DD $L$SEH_end_x25519_fe64_mul121666 wrt ..imagebase 1009 DD $L$SEH_info_x25519_fe64_mul121666 wrt ..imagebase 1010 1011 DD $L$SEH_begin_x25519_fe64_add wrt ..imagebase 1012 DD $L$SEH_end_x25519_fe64_add wrt ..imagebase 1013 DD $L$SEH_info_x25519_fe64_add wrt ..imagebase 1014 1015 DD $L$SEH_begin_x25519_fe64_sub wrt ..imagebase 1016 DD $L$SEH_end_x25519_fe64_sub wrt ..imagebase 1017 DD $L$SEH_info_x25519_fe64_sub wrt ..imagebase 1018 1019 DD $L$SEH_begin_x25519_fe64_tobytes wrt ..imagebase 1020 DD $L$SEH_end_x25519_fe64_tobytes wrt ..imagebase 1021 DD $L$SEH_info_x25519_fe64_tobytes wrt ..imagebase 576 1022 section .xdata rdata align=8 577 1023 ALIGN 8 … … 591 1037 DD $L$fe51_mul121666_body wrt ..imagebase,$L$fe51_mul121666_epilogue wrt ..imagebase 592 1038 DD 88,0 1039 $L$SEH_info_x25519_fe64_mul: 1040 DB 9,0,0,0 1041 DD full_handler wrt ..imagebase 1042 DD $L$fe64_mul_body wrt ..imagebase,$L$fe64_mul_epilogue wrt ..imagebase 1043 DD 72,0 1044 $L$SEH_info_x25519_fe64_sqr: 1045 DB 9,0,0,0 1046 DD full_handler wrt ..imagebase 1047 DD $L$fe64_sqr_body wrt ..imagebase,$L$fe64_sqr_epilogue wrt ..imagebase 1048 DD 72,0 1049 $L$SEH_info_x25519_fe64_mul121666: 1050 DB 9,0,0,0 1051 DD short_handler wrt ..imagebase 1052 DD $L$fe64_mul121666_body wrt ..imagebase,$L$fe64_mul121666_epilogue wrt ..imagebase 1053 $L$SEH_info_x25519_fe64_add: 1054 DB 9,0,0,0 1055 DD short_handler wrt ..imagebase 1056 DD $L$fe64_add_body wrt ..imagebase,$L$fe64_add_epilogue wrt ..imagebase 1057 $L$SEH_info_x25519_fe64_sub: 1058 DB 9,0,0,0 1059 DD short_handler wrt ..imagebase 1060 DD $L$fe64_sub_body wrt ..imagebase,$L$fe64_sub_epilogue wrt ..imagebase 1061 $L$SEH_info_x25519_fe64_tobytes: 1062 DB 9,0,0,0 1063 DD short_handler wrt ..imagebase 1064 DD $L$fe64_to_body wrt ..imagebase,$L$fe64_to_epilogue wrt ..imagebase -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/x86_64-mont.S
r97373 r99371 32 32 cmp r9d,8 33 33 jb NEAR $L$mul_enter 34 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))] 34 35 cmp rdx,rsi 35 36 jne NEAR $L$mul4x_enter … … 294 295 295 296 $L$mul4x_enter: 297 and r11d,0x80100 298 cmp r11d,0x80100 299 je NEAR $L$mulx4x_enter 296 300 push rbx 297 301 … … 719 723 720 724 $L$SEH_end_bn_mul4x_mont: 725 EXTERN bn_sqrx8x_internal 721 726 EXTERN bn_sqr8x_internal 722 727 … … 814 819 DB 102,72,15,110,207 815 820 DB 102,73,15,110,218 821 mov eax,DWORD[((OPENSSL_ia32cap_P+8))] 822 and eax,0x80100 823 cmp eax,0x80100 824 jne NEAR $L$sqr8x_nox 825 826 call bn_sqrx8x_internal 827 828 829 830 831 lea rbx,[rcx*1+r8] 832 mov r9,rcx 833 mov rdx,rcx 834 DB 102,72,15,126,207 835 sar rcx,3+2 836 jmp NEAR $L$sqr8x_sub 837 838 ALIGN 32 839 $L$sqr8x_nox: 816 840 call bn_sqr8x_internal 817 841 … … 903 927 904 928 $L$SEH_end_bn_sqr8x_mont: 929 930 ALIGN 32 931 bn_mulx4x_mont: 932 mov QWORD[8+rsp],rdi ;WIN64 prologue 933 mov QWORD[16+rsp],rsi 934 mov rax,rsp 935 $L$SEH_begin_bn_mulx4x_mont: 936 mov rdi,rcx 937 mov rsi,rdx 938 mov rdx,r8 939 mov rcx,r9 940 mov r8,QWORD[40+rsp] 941 mov r9,QWORD[48+rsp] 942 943 944 945 mov rax,rsp 946 947 $L$mulx4x_enter: 948 push rbx 949 950 push rbp 951 952 push r12 953 954 push r13 955 956 push r14 957 958 push r15 959 960 $L$mulx4x_prologue: 961 962 shl r9d,3 963 xor r10,r10 964 sub r10,r9 965 mov r8,QWORD[r8] 966 lea rbp,[((-72))+r10*1+rsp] 967 and rbp,-128 968 mov r11,rsp 969 sub r11,rbp 970 and r11,-4096 971 lea rsp,[rbp*1+r11] 972 mov r10,QWORD[rsp] 973 cmp rsp,rbp 974 ja NEAR $L$mulx4x_page_walk 975 jmp NEAR $L$mulx4x_page_walk_done 976 977 ALIGN 16 978 $L$mulx4x_page_walk: 979 lea rsp,[((-4096))+rsp] 980 mov r10,QWORD[rsp] 981 cmp rsp,rbp 982 ja NEAR $L$mulx4x_page_walk 983 $L$mulx4x_page_walk_done: 984 985 lea r10,[r9*1+rdx] 986 987 988 989 990 991 992 993 994 995 996 997 998 mov QWORD[rsp],r9 999 shr r9,5 1000 mov QWORD[16+rsp],r10 1001 sub r9,1 1002 mov QWORD[24+rsp],r8 1003 mov QWORD[32+rsp],rdi 1004 mov QWORD[40+rsp],rax 1005 1006 mov QWORD[48+rsp],r9 1007 jmp NEAR $L$mulx4x_body 1008 1009 ALIGN 32 1010 $L$mulx4x_body: 1011 lea rdi,[8+rdx] 1012 mov rdx,QWORD[rdx] 1013 lea rbx,[((64+32))+rsp] 1014 mov r9,rdx 1015 1016 mulx rax,r8,QWORD[rsi] 1017 mulx r14,r11,QWORD[8+rsi] 1018 add r11,rax 1019 mov QWORD[8+rsp],rdi 1020 mulx r13,r12,QWORD[16+rsi] 1021 adc r12,r14 1022 adc r13,0 1023 1024 mov rdi,r8 1025 imul r8,QWORD[24+rsp] 1026 xor rbp,rbp 1027 1028 mulx r14,rax,QWORD[24+rsi] 1029 mov rdx,r8 1030 lea rsi,[32+rsi] 1031 adcx r13,rax 1032 adcx r14,rbp 1033 1034 mulx r10,rax,QWORD[rcx] 1035 adcx rdi,rax 1036 adox r10,r11 1037 mulx r11,rax,QWORD[8+rcx] 1038 adcx r10,rax 1039 adox r11,r12 1040 DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 1041 mov rdi,QWORD[48+rsp] 1042 mov QWORD[((-32))+rbx],r10 1043 adcx r11,rax 1044 adox r12,r13 1045 mulx r15,rax,QWORD[24+rcx] 1046 mov rdx,r9 1047 mov QWORD[((-24))+rbx],r11 1048 adcx r12,rax 1049 adox r15,rbp 1050 lea rcx,[32+rcx] 1051 mov QWORD[((-16))+rbx],r12 1052 1053 jmp NEAR $L$mulx4x_1st 1054 1055 ALIGN 32 1056 $L$mulx4x_1st: 1057 adcx r15,rbp 1058 mulx rax,r10,QWORD[rsi] 1059 adcx r10,r14 1060 mulx r14,r11,QWORD[8+rsi] 1061 adcx r11,rax 1062 mulx rax,r12,QWORD[16+rsi] 1063 adcx r12,r14 1064 mulx r14,r13,QWORD[24+rsi] 1065 DB 0x67,0x67 1066 mov rdx,r8 1067 adcx r13,rax 1068 adcx r14,rbp 1069 lea rsi,[32+rsi] 1070 lea rbx,[32+rbx] 1071 1072 adox r10,r15 1073 mulx r15,rax,QWORD[rcx] 1074 adcx r10,rax 1075 adox r11,r15 1076 mulx r15,rax,QWORD[8+rcx] 1077 adcx r11,rax 1078 adox r12,r15 1079 mulx r15,rax,QWORD[16+rcx] 1080 mov QWORD[((-40))+rbx],r10 1081 adcx r12,rax 1082 mov QWORD[((-32))+rbx],r11 1083 adox r13,r15 1084 mulx r15,rax,QWORD[24+rcx] 1085 mov rdx,r9 1086 mov QWORD[((-24))+rbx],r12 1087 adcx r13,rax 1088 adox r15,rbp 1089 lea rcx,[32+rcx] 1090 mov QWORD[((-16))+rbx],r13 1091 1092 dec rdi 1093 jnz NEAR $L$mulx4x_1st 1094 1095 mov rax,QWORD[rsp] 1096 mov rdi,QWORD[8+rsp] 1097 adc r15,rbp 1098 add r14,r15 1099 sbb r15,r15 1100 mov QWORD[((-8))+rbx],r14 1101 jmp NEAR $L$mulx4x_outer 1102 1103 ALIGN 32 1104 $L$mulx4x_outer: 1105 mov rdx,QWORD[rdi] 1106 lea rdi,[8+rdi] 1107 sub rsi,rax 1108 mov QWORD[rbx],r15 1109 lea rbx,[((64+32))+rsp] 1110 sub rcx,rax 1111 1112 mulx r11,r8,QWORD[rsi] 1113 xor ebp,ebp 1114 mov r9,rdx 1115 mulx r12,r14,QWORD[8+rsi] 1116 adox r8,QWORD[((-32))+rbx] 1117 adcx r11,r14 1118 mulx r13,r15,QWORD[16+rsi] 1119 adox r11,QWORD[((-24))+rbx] 1120 adcx r12,r15 1121 adox r12,QWORD[((-16))+rbx] 1122 adcx r13,rbp 1123 adox r13,rbp 1124 1125 mov QWORD[8+rsp],rdi 1126 mov r15,r8 1127 imul r8,QWORD[24+rsp] 1128 xor ebp,ebp 1129 1130 mulx r14,rax,QWORD[24+rsi] 1131 mov rdx,r8 1132 adcx r13,rax 1133 adox r13,QWORD[((-8))+rbx] 1134 adcx r14,rbp 1135 lea rsi,[32+rsi] 1136 adox r14,rbp 1137 1138 mulx r10,rax,QWORD[rcx] 1139 adcx r15,rax 1140 adox r10,r11 1141 mulx r11,rax,QWORD[8+rcx] 1142 adcx r10,rax 1143 adox r11,r12 1144 mulx r12,rax,QWORD[16+rcx] 1145 mov QWORD[((-32))+rbx],r10 1146 adcx r11,rax 1147 adox r12,r13 1148 mulx r15,rax,QWORD[24+rcx] 1149 mov rdx,r9 1150 mov QWORD[((-24))+rbx],r11 1151 lea rcx,[32+rcx] 1152 adcx r12,rax 1153 adox r15,rbp 1154 mov rdi,QWORD[48+rsp] 1155 mov QWORD[((-16))+rbx],r12 1156 1157 jmp NEAR $L$mulx4x_inner 1158 1159 ALIGN 32 1160 $L$mulx4x_inner: 1161 mulx rax,r10,QWORD[rsi] 1162 adcx r15,rbp 1163 adox r10,r14 1164 mulx r14,r11,QWORD[8+rsi] 1165 adcx r10,QWORD[rbx] 1166 adox r11,rax 1167 mulx rax,r12,QWORD[16+rsi] 1168 adcx r11,QWORD[8+rbx] 1169 adox r12,r14 1170 mulx r14,r13,QWORD[24+rsi] 1171 mov rdx,r8 1172 adcx r12,QWORD[16+rbx] 1173 adox r13,rax 1174 adcx r13,QWORD[24+rbx] 1175 adox r14,rbp 1176 lea rsi,[32+rsi] 1177 lea rbx,[32+rbx] 1178 adcx r14,rbp 1179 1180 adox r10,r15 1181 mulx r15,rax,QWORD[rcx] 1182 adcx r10,rax 1183 adox r11,r15 1184 mulx r15,rax,QWORD[8+rcx] 1185 adcx r11,rax 1186 adox r12,r15 1187 mulx r15,rax,QWORD[16+rcx] 1188 mov QWORD[((-40))+rbx],r10 1189 adcx r12,rax 1190 adox r13,r15 1191 mulx r15,rax,QWORD[24+rcx] 1192 mov rdx,r9 1193 mov QWORD[((-32))+rbx],r11 1194 mov QWORD[((-24))+rbx],r12 1195 adcx r13,rax 1196 adox r15,rbp 1197 lea rcx,[32+rcx] 1198 mov QWORD[((-16))+rbx],r13 1199 1200 dec rdi 1201 jnz NEAR $L$mulx4x_inner 1202 1203 mov rax,QWORD[rsp] 1204 mov rdi,QWORD[8+rsp] 1205 adc r15,rbp 1206 sub rbp,QWORD[rbx] 1207 adc r14,r15 1208 sbb r15,r15 1209 mov QWORD[((-8))+rbx],r14 1210 1211 cmp rdi,QWORD[16+rsp] 1212 jne NEAR $L$mulx4x_outer 1213 1214 lea rbx,[64+rsp] 1215 sub rcx,rax 1216 neg r15 1217 mov rdx,rax 1218 shr rax,3+2 1219 mov rdi,QWORD[32+rsp] 1220 jmp NEAR $L$mulx4x_sub 1221 1222 ALIGN 32 1223 $L$mulx4x_sub: 1224 mov r11,QWORD[rbx] 1225 mov r12,QWORD[8+rbx] 1226 mov r13,QWORD[16+rbx] 1227 mov r14,QWORD[24+rbx] 1228 lea rbx,[32+rbx] 1229 sbb r11,QWORD[rcx] 1230 sbb r12,QWORD[8+rcx] 1231 sbb r13,QWORD[16+rcx] 1232 sbb r14,QWORD[24+rcx] 1233 lea rcx,[32+rcx] 1234 mov QWORD[rdi],r11 1235 mov QWORD[8+rdi],r12 1236 mov QWORD[16+rdi],r13 1237 mov QWORD[24+rdi],r14 1238 lea rdi,[32+rdi] 1239 dec rax 1240 jnz NEAR $L$mulx4x_sub 1241 1242 sbb r15,0 1243 lea rbx,[64+rsp] 1244 sub rdi,rdx 1245 1246 DB 102,73,15,110,207 1247 pxor xmm0,xmm0 1248 pshufd xmm1,xmm1,0 1249 mov rsi,QWORD[40+rsp] 1250 1251 jmp NEAR $L$mulx4x_cond_copy 1252 1253 ALIGN 32 1254 $L$mulx4x_cond_copy: 1255 movdqa xmm2,XMMWORD[rbx] 1256 movdqa xmm3,XMMWORD[16+rbx] 1257 lea rbx,[32+rbx] 1258 movdqu xmm4,XMMWORD[rdi] 1259 movdqu xmm5,XMMWORD[16+rdi] 1260 lea rdi,[32+rdi] 1261 movdqa XMMWORD[(-32)+rbx],xmm0 1262 movdqa XMMWORD[(-16)+rbx],xmm0 1263 pcmpeqd xmm0,xmm1 1264 pand xmm2,xmm1 1265 pand xmm3,xmm1 1266 pand xmm4,xmm0 1267 pand xmm5,xmm0 1268 pxor xmm0,xmm0 1269 por xmm4,xmm2 1270 por xmm5,xmm3 1271 movdqu XMMWORD[(-32)+rdi],xmm4 1272 movdqu XMMWORD[(-16)+rdi],xmm5 1273 sub rdx,32 1274 jnz NEAR $L$mulx4x_cond_copy 1275 1276 mov QWORD[rbx],rdx 1277 1278 mov rax,1 1279 mov r15,QWORD[((-48))+rsi] 1280 1281 mov r14,QWORD[((-40))+rsi] 1282 1283 mov r13,QWORD[((-32))+rsi] 1284 1285 mov r12,QWORD[((-24))+rsi] 1286 1287 mov rbp,QWORD[((-16))+rsi] 1288 1289 mov rbx,QWORD[((-8))+rsi] 1290 1291 lea rsp,[rsi] 1292 1293 $L$mulx4x_epilogue: 1294 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1295 mov rsi,QWORD[16+rsp] 1296 DB 0F3h,0C3h ;repret 1297 1298 $L$SEH_end_bn_mulx4x_mont: 905 1299 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 906 1300 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 … … 1054 1448 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1055 1449 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1450 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase 1451 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase 1452 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase 1056 1453 section .xdata rdata align=8 1057 1454 ALIGN 8 … … 1069 1466 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1070 1467 ALIGN 8 1468 $L$SEH_info_bn_mulx4x_mont: 1469 DB 9,0,0,0 1470 DD sqr_handler wrt ..imagebase 1471 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 1472 ALIGN 8 -
trunk/src/libs/openssl-3.1.0/crypto/genasm-nasm/x86_64-mont5.S
r97373 r99371 30 30 test r9d,7 31 31 jnz NEAR $L$mul_enter 32 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))] 32 33 jmp NEAR $L$mul4x_enter 33 34 … … 480 481 481 482 $L$mul4x_enter: 483 and r11d,0x80108 484 cmp r11d,0x80108 485 je NEAR $L$mulx4x_enter 482 486 push rbx 483 487 … … 1123 1127 mov rax,rsp 1124 1128 1129 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))] 1130 and r11d,0x80108 1131 cmp r11d,0x80108 1132 je NEAR $L$powerx5_enter 1125 1133 push rbx 1126 1134 … … 2085 2093 mov r10,r9 2086 2094 neg r9 2095 DB 0F3h,0C3h ;repret 2096 2097 2098 2099 ALIGN 32 2100 bn_mulx4x_mont_gather5: 2101 mov QWORD[8+rsp],rdi ;WIN64 prologue 2102 mov QWORD[16+rsp],rsi 2103 mov rax,rsp 2104 $L$SEH_begin_bn_mulx4x_mont_gather5: 2105 mov rdi,rcx 2106 mov rsi,rdx 2107 mov rdx,r8 2108 mov rcx,r9 2109 mov r8,QWORD[40+rsp] 2110 mov r9,QWORD[48+rsp] 2111 2112 2113 2114 mov rax,rsp 2115 2116 $L$mulx4x_enter: 2117 push rbx 2118 2119 push rbp 2120 2121 push r12 2122 2123 push r13 2124 2125 push r14 2126 2127 push r15 2128 2129 $L$mulx4x_prologue: 2130 2131 shl r9d,3 2132 lea r10,[r9*2+r9] 2133 neg r9 2134 mov r8,QWORD[r8] 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 lea r11,[((-320))+r9*2+rsp] 2146 mov rbp,rsp 2147 sub r11,rdi 2148 and r11,4095 2149 cmp r10,r11 2150 jb NEAR $L$mulx4xsp_alt 2151 sub rbp,r11 2152 lea rbp,[((-320))+r9*2+rbp] 2153 jmp NEAR $L$mulx4xsp_done 2154 2155 $L$mulx4xsp_alt: 2156 lea r10,[((4096-320))+r9*2] 2157 lea rbp,[((-320))+r9*2+rbp] 2158 sub r11,r10 2159 mov r10,0 2160 cmovc r11,r10 2161 sub rbp,r11 2162 $L$mulx4xsp_done: 2163 and rbp,-64 2164 mov r11,rsp 2165 sub r11,rbp 2166 and r11,-4096 2167 lea rsp,[rbp*1+r11] 2168 mov r10,QWORD[rsp] 2169 cmp rsp,rbp 2170 ja NEAR $L$mulx4x_page_walk 2171 jmp NEAR $L$mulx4x_page_walk_done 2172 2173 $L$mulx4x_page_walk: 2174 lea rsp,[((-4096))+rsp] 2175 mov r10,QWORD[rsp] 2176 cmp rsp,rbp 2177 ja NEAR $L$mulx4x_page_walk 2178 $L$mulx4x_page_walk_done: 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 mov QWORD[32+rsp],r8 2193 mov QWORD[40+rsp],rax 2194 2195 $L$mulx4x_body: 2196 call mulx4x_internal 2197 2198 mov rsi,QWORD[40+rsp] 2199 2200 mov rax,1 2201 2202 mov r15,QWORD[((-48))+rsi] 2203 2204 mov r14,QWORD[((-40))+rsi] 2205 2206 mov r13,QWORD[((-32))+rsi] 2207 2208 mov r12,QWORD[((-24))+rsi] 2209 2210 mov rbp,QWORD[((-16))+rsi] 2211 2212 mov rbx,QWORD[((-8))+rsi] 2213 2214 lea rsp,[rsi] 2215 2216 $L$mulx4x_epilogue: 2217 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2218 mov rsi,QWORD[16+rsp] 2219 DB 0F3h,0C3h ;repret 2220 2221 $L$SEH_end_bn_mulx4x_mont_gather5: 2222 2223 2224 ALIGN 32 2225 mulx4x_internal: 2226 2227 mov QWORD[8+rsp],r9 2228 mov r10,r9 2229 neg r9 2230 shl r9,5 2231 neg r10 2232 lea r13,[128+r9*1+rdx] 2233 shr r9,5+5 2234 movd xmm5,DWORD[56+rax] 2235 sub r9,1 2236 lea rax,[$L$inc] 2237 mov QWORD[((16+8))+rsp],r13 2238 mov QWORD[((24+8))+rsp],r9 2239 mov QWORD[((56+8))+rsp],rdi 2240 movdqa xmm0,XMMWORD[rax] 2241 movdqa xmm1,XMMWORD[16+rax] 2242 lea r10,[((88-112))+r10*1+rsp] 2243 lea rdi,[128+rdx] 2244 2245 pshufd xmm5,xmm5,0 2246 movdqa xmm4,xmm1 2247 DB 0x67 2248 movdqa xmm2,xmm1 2249 DB 0x67 2250 paddd xmm1,xmm0 2251 pcmpeqd xmm0,xmm5 2252 movdqa xmm3,xmm4 2253 paddd xmm2,xmm1 2254 pcmpeqd xmm1,xmm5 2255 movdqa XMMWORD[112+r10],xmm0 2256 movdqa xmm0,xmm4 2257 2258 paddd xmm3,xmm2 2259 pcmpeqd xmm2,xmm5 2260 movdqa XMMWORD[128+r10],xmm1 2261 movdqa xmm1,xmm4 2262 2263 paddd xmm0,xmm3 2264 pcmpeqd xmm3,xmm5 2265 movdqa XMMWORD[144+r10],xmm2 2266 movdqa xmm2,xmm4 2267 2268 paddd xmm1,xmm0 2269 pcmpeqd xmm0,xmm5 2270 movdqa XMMWORD[160+r10],xmm3 2271 movdqa xmm3,xmm4 2272 paddd xmm2,xmm1 2273 pcmpeqd xmm1,xmm5 2274 movdqa XMMWORD[176+r10],xmm0 2275 movdqa xmm0,xmm4 2276 2277 paddd xmm3,xmm2 2278 pcmpeqd xmm2,xmm5 2279 movdqa XMMWORD[192+r10],xmm1 2280 movdqa xmm1,xmm4 2281 2282 paddd xmm0,xmm3 2283 pcmpeqd xmm3,xmm5 2284 movdqa XMMWORD[208+r10],xmm2 2285 movdqa xmm2,xmm4 2286 2287 paddd xmm1,xmm0 2288 pcmpeqd xmm0,xmm5 2289 movdqa XMMWORD[224+r10],xmm3 2290 movdqa xmm3,xmm4 2291 paddd xmm2,xmm1 2292 pcmpeqd xmm1,xmm5 2293 movdqa XMMWORD[240+r10],xmm0 2294 movdqa xmm0,xmm4 2295 2296 paddd xmm3,xmm2 2297 pcmpeqd xmm2,xmm5 2298 movdqa XMMWORD[256+r10],xmm1 2299 movdqa xmm1,xmm4 2300 2301 paddd xmm0,xmm3 2302 pcmpeqd xmm3,xmm5 2303 movdqa XMMWORD[272+r10],xmm2 2304 movdqa xmm2,xmm4 2305 2306 paddd xmm1,xmm0 2307 pcmpeqd xmm0,xmm5 2308 movdqa XMMWORD[288+r10],xmm3 2309 movdqa xmm3,xmm4 2310 DB 0x67 2311 paddd xmm2,xmm1 2312 pcmpeqd xmm1,xmm5 2313 movdqa XMMWORD[304+r10],xmm0 2314 2315 paddd xmm3,xmm2 2316 pcmpeqd xmm2,xmm5 2317 movdqa XMMWORD[320+r10],xmm1 2318 2319 pcmpeqd xmm3,xmm5 2320 movdqa XMMWORD[336+r10],xmm2 2321 2322 pand xmm0,XMMWORD[64+rdi] 2323 pand xmm1,XMMWORD[80+rdi] 2324 pand xmm2,XMMWORD[96+rdi] 2325 movdqa XMMWORD[352+r10],xmm3 2326 pand xmm3,XMMWORD[112+rdi] 2327 por xmm0,xmm2 2328 por xmm1,xmm3 2329 movdqa xmm4,XMMWORD[((-128))+rdi] 2330 movdqa xmm5,XMMWORD[((-112))+rdi] 2331 movdqa xmm2,XMMWORD[((-96))+rdi] 2332 pand xmm4,XMMWORD[112+r10] 2333 movdqa xmm3,XMMWORD[((-80))+rdi] 2334 pand xmm5,XMMWORD[128+r10] 2335 por xmm0,xmm4 2336 pand xmm2,XMMWORD[144+r10] 2337 por xmm1,xmm5 2338 pand xmm3,XMMWORD[160+r10] 2339 por xmm0,xmm2 2340 por xmm1,xmm3 2341 movdqa xmm4,XMMWORD[((-64))+rdi] 2342 movdqa xmm5,XMMWORD[((-48))+rdi] 2343 movdqa xmm2,XMMWORD[((-32))+rdi] 2344 pand xmm4,XMMWORD[176+r10] 2345 movdqa xmm3,XMMWORD[((-16))+rdi] 2346 pand xmm5,XMMWORD[192+r10] 2347 por xmm0,xmm4 2348 pand xmm2,XMMWORD[208+r10] 2349 por xmm1,xmm5 2350 pand xmm3,XMMWORD[224+r10] 2351 por xmm0,xmm2 2352 por xmm1,xmm3 2353 movdqa xmm4,XMMWORD[rdi] 2354 movdqa xmm5,XMMWORD[16+rdi] 2355 movdqa xmm2,XMMWORD[32+rdi] 2356 pand xmm4,XMMWORD[240+r10] 2357 movdqa xmm3,XMMWORD[48+rdi] 2358 pand xmm5,XMMWORD[256+r10] 2359 por xmm0,xmm4 2360 pand xmm2,XMMWORD[272+r10] 2361 por xmm1,xmm5 2362 pand xmm3,XMMWORD[288+r10] 2363 por xmm0,xmm2 2364 por xmm1,xmm3 2365 pxor xmm0,xmm1 2366 pshufd xmm1,xmm0,0x4e 2367 por xmm0,xmm1 2368 lea rdi,[256+rdi] 2369 DB 102,72,15,126,194 2370 lea rbx,[((64+32+8))+rsp] 2371 2372 mov r9,rdx 2373 mulx rax,r8,QWORD[rsi] 2374 mulx r12,r11,QWORD[8+rsi] 2375 add r11,rax 2376 mulx r13,rax,QWORD[16+rsi] 2377 adc r12,rax 2378 adc r13,0 2379 mulx r14,rax,QWORD[24+rsi] 2380 2381 mov r15,r8 2382 imul r8,QWORD[((32+8))+rsp] 2383 xor rbp,rbp 2384 mov rdx,r8 2385 2386 mov QWORD[((8+8))+rsp],rdi 2387 2388 lea rsi,[32+rsi] 2389 adcx r13,rax 2390 adcx r14,rbp 2391 2392 mulx r10,rax,QWORD[rcx] 2393 adcx r15,rax 2394 adox r10,r11 2395 mulx r11,rax,QWORD[8+rcx] 2396 adcx r10,rax 2397 adox r11,r12 2398 mulx r12,rax,QWORD[16+rcx] 2399 mov rdi,QWORD[((24+8))+rsp] 2400 mov QWORD[((-32))+rbx],r10 2401 adcx r11,rax 2402 adox r12,r13 2403 mulx r15,rax,QWORD[24+rcx] 2404 mov rdx,r9 2405 mov QWORD[((-24))+rbx],r11 2406 adcx r12,rax 2407 adox r15,rbp 2408 lea rcx,[32+rcx] 2409 mov QWORD[((-16))+rbx],r12 2410 jmp NEAR $L$mulx4x_1st 2411 2412 ALIGN 32 2413 $L$mulx4x_1st: 2414 adcx r15,rbp 2415 mulx rax,r10,QWORD[rsi] 2416 adcx r10,r14 2417 mulx r14,r11,QWORD[8+rsi] 2418 adcx r11,rax 2419 mulx rax,r12,QWORD[16+rsi] 2420 adcx r12,r14 2421 mulx r14,r13,QWORD[24+rsi] 2422 DB 0x67,0x67 2423 mov rdx,r8 2424 adcx r13,rax 2425 adcx r14,rbp 2426 lea rsi,[32+rsi] 2427 lea rbx,[32+rbx] 2428 2429 adox r10,r15 2430 mulx r15,rax,QWORD[rcx] 2431 adcx r10,rax 2432 adox r11,r15 2433 mulx r15,rax,QWORD[8+rcx] 2434 adcx r11,rax 2435 adox r12,r15 2436 mulx r15,rax,QWORD[16+rcx] 2437 mov QWORD[((-40))+rbx],r10 2438 adcx r12,rax 2439 mov QWORD[((-32))+rbx],r11 2440 adox r13,r15 2441 mulx r15,rax,QWORD[24+rcx] 2442 mov rdx,r9 2443 mov QWORD[((-24))+rbx],r12 2444 adcx r13,rax 2445 adox r15,rbp 2446 lea rcx,[32+rcx] 2447 mov QWORD[((-16))+rbx],r13 2448 2449 dec rdi 2450 jnz NEAR $L$mulx4x_1st 2451 2452 mov rax,QWORD[8+rsp] 2453 adc r15,rbp 2454 lea rsi,[rax*1+rsi] 2455 add r14,r15 2456 mov rdi,QWORD[((8+8))+rsp] 2457 adc rbp,rbp 2458 mov QWORD[((-8))+rbx],r14 2459 jmp NEAR $L$mulx4x_outer 2460 2461 ALIGN 32 2462 $L$mulx4x_outer: 2463 lea r10,[((16-256))+rbx] 2464 pxor xmm4,xmm4 2465 DB 0x67,0x67 2466 pxor xmm5,xmm5 2467 movdqa xmm0,XMMWORD[((-128))+rdi] 2468 movdqa xmm1,XMMWORD[((-112))+rdi] 2469 movdqa xmm2,XMMWORD[((-96))+rdi] 2470 pand xmm0,XMMWORD[256+r10] 2471 movdqa xmm3,XMMWORD[((-80))+rdi] 2472 pand xmm1,XMMWORD[272+r10] 2473 por xmm4,xmm0 2474 pand xmm2,XMMWORD[288+r10] 2475 por xmm5,xmm1 2476 pand xmm3,XMMWORD[304+r10] 2477 por xmm4,xmm2 2478 por xmm5,xmm3 2479 movdqa xmm0,XMMWORD[((-64))+rdi] 2480 movdqa xmm1,XMMWORD[((-48))+rdi] 2481 movdqa xmm2,XMMWORD[((-32))+rdi] 2482 pand xmm0,XMMWORD[320+r10] 2483 movdqa xmm3,XMMWORD[((-16))+rdi] 2484 pand xmm1,XMMWORD[336+r10] 2485 por xmm4,xmm0 2486 pand xmm2,XMMWORD[352+r10] 2487 por xmm5,xmm1 2488 pand xmm3,XMMWORD[368+r10] 2489 por xmm4,xmm2 2490 por xmm5,xmm3 2491 movdqa xmm0,XMMWORD[rdi] 2492 movdqa xmm1,XMMWORD[16+rdi] 2493 movdqa xmm2,XMMWORD[32+rdi] 2494 pand xmm0,XMMWORD[384+r10] 2495 movdqa xmm3,XMMWORD[48+rdi] 2496 pand xmm1,XMMWORD[400+r10] 2497 por xmm4,xmm0 2498 pand xmm2,XMMWORD[416+r10] 2499 por xmm5,xmm1 2500 pand xmm3,XMMWORD[432+r10] 2501 por xmm4,xmm2 2502 por xmm5,xmm3 2503 movdqa xmm0,XMMWORD[64+rdi] 2504 movdqa xmm1,XMMWORD[80+rdi] 2505 movdqa xmm2,XMMWORD[96+rdi] 2506 pand xmm0,XMMWORD[448+r10] 2507 movdqa xmm3,XMMWORD[112+rdi] 2508 pand xmm1,XMMWORD[464+r10] 2509 por xmm4,xmm0 2510 pand xmm2,XMMWORD[480+r10] 2511 por xmm5,xmm1 2512 pand xmm3,XMMWORD[496+r10] 2513 por xmm4,xmm2 2514 por xmm5,xmm3 2515 por xmm4,xmm5 2516 pshufd xmm0,xmm4,0x4e 2517 por xmm0,xmm4 2518 lea rdi,[256+rdi] 2519 DB 102,72,15,126,194 2520 2521 mov QWORD[rbx],rbp 2522 lea rbx,[32+rax*1+rbx] 2523 mulx r11,r8,QWORD[rsi] 2524 xor rbp,rbp 2525 mov r9,rdx 2526 mulx r12,r14,QWORD[8+rsi] 2527 adox r8,QWORD[((-32))+rbx] 2528 adcx r11,r14 2529 mulx r13,r15,QWORD[16+rsi] 2530 adox r11,QWORD[((-24))+rbx] 2531 adcx r12,r15 2532 mulx r14,rdx,QWORD[24+rsi] 2533 adox r12,QWORD[((-16))+rbx] 2534 adcx r13,rdx 2535 lea rcx,[rax*1+rcx] 2536 lea rsi,[32+rsi] 2537 adox r13,QWORD[((-8))+rbx] 2538 adcx r14,rbp 2539 adox r14,rbp 2540 2541 mov r15,r8 2542 imul r8,QWORD[((32+8))+rsp] 2543 2544 mov rdx,r8 2545 xor rbp,rbp 2546 mov QWORD[((8+8))+rsp],rdi 2547 2548 mulx r10,rax,QWORD[rcx] 2549 adcx r15,rax 2550 adox r10,r11 2551 mulx r11,rax,QWORD[8+rcx] 2552 adcx r10,rax 2553 adox r11,r12 2554 mulx r12,rax,QWORD[16+rcx] 2555 adcx r11,rax 2556 adox r12,r13 2557 mulx r15,rax,QWORD[24+rcx] 2558 mov rdx,r9 2559 mov rdi,QWORD[((24+8))+rsp] 2560 mov QWORD[((-32))+rbx],r10 2561 adcx r12,rax 2562 mov QWORD[((-24))+rbx],r11 2563 adox r15,rbp 2564 mov QWORD[((-16))+rbx],r12 2565 lea rcx,[32+rcx] 2566 jmp NEAR $L$mulx4x_inner 2567 2568 ALIGN 32 2569 $L$mulx4x_inner: 2570 mulx rax,r10,QWORD[rsi] 2571 adcx r15,rbp 2572 adox r10,r14 2573 mulx r14,r11,QWORD[8+rsi] 2574 adcx r10,QWORD[rbx] 2575 adox r11,rax 2576 mulx rax,r12,QWORD[16+rsi] 2577 adcx r11,QWORD[8+rbx] 2578 adox r12,r14 2579 mulx r14,r13,QWORD[24+rsi] 2580 mov rdx,r8 2581 adcx r12,QWORD[16+rbx] 2582 adox r13,rax 2583 adcx r13,QWORD[24+rbx] 2584 adox r14,rbp 2585 lea rsi,[32+rsi] 2586 lea rbx,[32+rbx] 2587 adcx r14,rbp 2588 2589 adox r10,r15 2590 mulx r15,rax,QWORD[rcx] 2591 adcx r10,rax 2592 adox r11,r15 2593 mulx r15,rax,QWORD[8+rcx] 2594 adcx r11,rax 2595 adox r12,r15 2596 mulx r15,rax,QWORD[16+rcx] 2597 mov QWORD[((-40))+rbx],r10 2598 adcx r12,rax 2599 adox r13,r15 2600 mov QWORD[((-32))+rbx],r11 2601 mulx r15,rax,QWORD[24+rcx] 2602 mov rdx,r9 2603 lea rcx,[32+rcx] 2604 mov QWORD[((-24))+rbx],r12 2605 adcx r13,rax 2606 adox r15,rbp 2607 mov QWORD[((-16))+rbx],r13 2608 2609 dec rdi 2610 jnz NEAR $L$mulx4x_inner 2611 2612 mov rax,QWORD[((0+8))+rsp] 2613 adc r15,rbp 2614 sub rdi,QWORD[rbx] 2615 mov rdi,QWORD[((8+8))+rsp] 2616 mov r10,QWORD[((16+8))+rsp] 2617 adc r14,r15 2618 lea rsi,[rax*1+rsi] 2619 adc rbp,rbp 2620 mov QWORD[((-8))+rbx],r14 2621 2622 cmp rdi,r10 2623 jb NEAR $L$mulx4x_outer 2624 2625 mov r10,QWORD[((-8))+rcx] 2626 mov r8,rbp 2627 mov r12,QWORD[rax*1+rcx] 2628 lea rbp,[rax*1+rcx] 2629 mov rcx,rax 2630 lea rdi,[rax*1+rbx] 2631 xor eax,eax 2632 xor r15,r15 2633 sub r10,r14 2634 adc r15,r15 2635 or r8,r15 2636 sar rcx,3+2 2637 sub rax,r8 2638 mov rdx,QWORD[((56+8))+rsp] 2639 dec r12 2640 mov r13,QWORD[8+rbp] 2641 xor r8,r8 2642 mov r14,QWORD[16+rbp] 2643 mov r15,QWORD[24+rbp] 2644 jmp NEAR $L$sqrx4x_sub_entry 2645 2646 2647 2648 ALIGN 32 2649 bn_powerx5: 2650 mov QWORD[8+rsp],rdi ;WIN64 prologue 2651 mov QWORD[16+rsp],rsi 2652 mov rax,rsp 2653 $L$SEH_begin_bn_powerx5: 2654 mov rdi,rcx 2655 mov rsi,rdx 2656 mov rdx,r8 2657 mov rcx,r9 2658 mov r8,QWORD[40+rsp] 2659 mov r9,QWORD[48+rsp] 2660 2661 2662 2663 mov rax,rsp 2664 2665 $L$powerx5_enter: 2666 push rbx 2667 2668 push rbp 2669 2670 push r12 2671 2672 push r13 2673 2674 push r14 2675 2676 push r15 2677 2678 $L$powerx5_prologue: 2679 2680 shl r9d,3 2681 lea r10,[r9*2+r9] 2682 neg r9 2683 mov r8,QWORD[r8] 2684 2685 2686 2687 2688 2689 2690 2691 2692 lea r11,[((-320))+r9*2+rsp] 2693 mov rbp,rsp 2694 sub r11,rdi 2695 and r11,4095 2696 cmp r10,r11 2697 jb NEAR $L$pwrx_sp_alt 2698 sub rbp,r11 2699 lea rbp,[((-320))+r9*2+rbp] 2700 jmp NEAR $L$pwrx_sp_done 2701 2702 ALIGN 32 2703 $L$pwrx_sp_alt: 2704 lea r10,[((4096-320))+r9*2] 2705 lea rbp,[((-320))+r9*2+rbp] 2706 sub r11,r10 2707 mov r10,0 2708 cmovc r11,r10 2709 sub rbp,r11 2710 $L$pwrx_sp_done: 2711 and rbp,-64 2712 mov r11,rsp 2713 sub r11,rbp 2714 and r11,-4096 2715 lea rsp,[rbp*1+r11] 2716 mov r10,QWORD[rsp] 2717 cmp rsp,rbp 2718 ja NEAR $L$pwrx_page_walk 2719 jmp NEAR $L$pwrx_page_walk_done 2720 2721 $L$pwrx_page_walk: 2722 lea rsp,[((-4096))+rsp] 2723 mov r10,QWORD[rsp] 2724 cmp rsp,rbp 2725 ja NEAR $L$pwrx_page_walk 2726 $L$pwrx_page_walk_done: 2727 2728 mov r10,r9 2729 neg r9 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 pxor xmm0,xmm0 2743 DB 102,72,15,110,207 2744 DB 102,72,15,110,209 2745 DB 102,73,15,110,218 2746 DB 102,72,15,110,226 2747 mov QWORD[32+rsp],r8 2748 mov QWORD[40+rsp],rax 2749 2750 $L$powerx5_body: 2751 2752 call __bn_sqrx8x_internal 2753 call __bn_postx4x_internal 2754 call __bn_sqrx8x_internal 2755 call __bn_postx4x_internal 2756 call __bn_sqrx8x_internal 2757 call __bn_postx4x_internal 2758 call __bn_sqrx8x_internal 2759 call __bn_postx4x_internal 2760 call __bn_sqrx8x_internal 2761 call __bn_postx4x_internal 2762 2763 mov r9,r10 2764 mov rdi,rsi 2765 DB 102,72,15,126,209 2766 DB 102,72,15,126,226 2767 mov rax,QWORD[40+rsp] 2768 2769 call mulx4x_internal 2770 2771 mov rsi,QWORD[40+rsp] 2772 2773 mov rax,1 2774 2775 mov r15,QWORD[((-48))+rsi] 2776 2777 mov r14,QWORD[((-40))+rsi] 2778 2779 mov r13,QWORD[((-32))+rsi] 2780 2781 mov r12,QWORD[((-24))+rsi] 2782 2783 mov rbp,QWORD[((-16))+rsi] 2784 2785 mov rbx,QWORD[((-8))+rsi] 2786 2787 lea rsp,[rsi] 2788 2789 $L$powerx5_epilogue: 2790 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 2791 mov rsi,QWORD[16+rsp] 2792 DB 0F3h,0C3h ;repret 2793 2794 $L$SEH_end_bn_powerx5: 2795 2796 global bn_sqrx8x_internal 2797 2798 2799 ALIGN 32 2800 bn_sqrx8x_internal: 2801 __bn_sqrx8x_internal: 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 lea rdi,[((48+8))+rsp] 2844 lea rbp,[r9*1+rsi] 2845 mov QWORD[((0+8))+rsp],r9 2846 mov QWORD[((8+8))+rsp],rbp 2847 jmp NEAR $L$sqr8x_zero_start 2848 2849 ALIGN 32 2850 DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2851 $L$sqrx8x_zero: 2852 DB 0x3e 2853 movdqa XMMWORD[rdi],xmm0 2854 movdqa XMMWORD[16+rdi],xmm0 2855 movdqa XMMWORD[32+rdi],xmm0 2856 movdqa XMMWORD[48+rdi],xmm0 2857 $L$sqr8x_zero_start: 2858 movdqa XMMWORD[64+rdi],xmm0 2859 movdqa XMMWORD[80+rdi],xmm0 2860 movdqa XMMWORD[96+rdi],xmm0 2861 movdqa XMMWORD[112+rdi],xmm0 2862 lea rdi,[128+rdi] 2863 sub r9,64 2864 jnz NEAR $L$sqrx8x_zero 2865 2866 mov rdx,QWORD[rsi] 2867 2868 xor r10,r10 2869 xor r11,r11 2870 xor r12,r12 2871 xor r13,r13 2872 xor r14,r14 2873 xor r15,r15 2874 lea rdi,[((48+8))+rsp] 2875 xor rbp,rbp 2876 jmp NEAR $L$sqrx8x_outer_loop 2877 2878 ALIGN 32 2879 $L$sqrx8x_outer_loop: 2880 mulx rax,r8,QWORD[8+rsi] 2881 adcx r8,r9 2882 adox r10,rax 2883 mulx rax,r9,QWORD[16+rsi] 2884 adcx r9,r10 2885 adox r11,rax 2886 DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 2887 adcx r10,r11 2888 adox r12,rax 2889 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 2890 adcx r11,r12 2891 adox r13,rax 2892 mulx rax,r12,QWORD[40+rsi] 2893 adcx r12,r13 2894 adox r14,rax 2895 mulx rax,r13,QWORD[48+rsi] 2896 adcx r13,r14 2897 adox rax,r15 2898 mulx r15,r14,QWORD[56+rsi] 2899 mov rdx,QWORD[8+rsi] 2900 adcx r14,rax 2901 adox r15,rbp 2902 adc r15,QWORD[64+rdi] 2903 mov QWORD[8+rdi],r8 2904 mov QWORD[16+rdi],r9 2905 sbb rcx,rcx 2906 xor rbp,rbp 2907 2908 2909 mulx rbx,r8,QWORD[16+rsi] 2910 mulx rax,r9,QWORD[24+rsi] 2911 adcx r8,r10 2912 adox r9,rbx 2913 mulx rbx,r10,QWORD[32+rsi] 2914 adcx r9,r11 2915 adox r10,rax 2916 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 2917 adcx r10,r12 2918 adox r11,rbx 2919 DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 2920 adcx r11,r13 2921 adox r12,r14 2922 DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 2923 mov rdx,QWORD[16+rsi] 2924 adcx r12,rax 2925 adox r13,rbx 2926 adcx r13,r15 2927 adox r14,rbp 2928 adcx r14,rbp 2929 2930 mov QWORD[24+rdi],r8 2931 mov QWORD[32+rdi],r9 2932 2933 mulx rbx,r8,QWORD[24+rsi] 2934 mulx rax,r9,QWORD[32+rsi] 2935 adcx r8,r10 2936 adox r9,rbx 2937 mulx rbx,r10,QWORD[40+rsi] 2938 adcx r9,r11 2939 adox r10,rax 2940 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 2941 adcx r10,r12 2942 adox r11,r13 2943 DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 2944 DB 0x3e 2945 mov rdx,QWORD[24+rsi] 2946 adcx r11,rbx 2947 adox r12,rax 2948 adcx r12,r14 2949 mov QWORD[40+rdi],r8 2950 mov QWORD[48+rdi],r9 2951 mulx rax,r8,QWORD[32+rsi] 2952 adox r13,rbp 2953 adcx r13,rbp 2954 2955 mulx rbx,r9,QWORD[40+rsi] 2956 adcx r8,r10 2957 adox r9,rax 2958 mulx rax,r10,QWORD[48+rsi] 2959 adcx r9,r11 2960 adox r10,r12 2961 mulx r12,r11,QWORD[56+rsi] 2962 mov rdx,QWORD[32+rsi] 2963 mov r14,QWORD[40+rsi] 2964 adcx r10,rbx 2965 adox r11,rax 2966 mov r15,QWORD[48+rsi] 2967 adcx r11,r13 2968 adox r12,rbp 2969 adcx r12,rbp 2970 2971 mov QWORD[56+rdi],r8 2972 mov QWORD[64+rdi],r9 2973 2974 mulx rax,r9,r14 2975 mov r8,QWORD[56+rsi] 2976 adcx r9,r10 2977 mulx rbx,r10,r15 2978 adox r10,rax 2979 adcx r10,r11 2980 mulx rax,r11,r8 2981 mov rdx,r14 2982 adox r11,rbx 2983 adcx r11,r12 2984 2985 adcx rax,rbp 2986 2987 mulx rbx,r14,r15 2988 mulx r13,r12,r8 2989 mov rdx,r15 2990 lea rsi,[64+rsi] 2991 adcx r11,r14 2992 adox r12,rbx 2993 adcx r12,rax 2994 adox r13,rbp 2995 2996 DB 0x67,0x67 2997 mulx r14,r8,r8 2998 adcx r13,r8 2999 adcx r14,rbp 3000 3001 cmp rsi,QWORD[((8+8))+rsp] 3002 je NEAR $L$sqrx8x_outer_break 3003 3004 neg rcx 3005 mov rcx,-8 3006 mov r15,rbp 3007 mov r8,QWORD[64+rdi] 3008 adcx r9,QWORD[72+rdi] 3009 adcx r10,QWORD[80+rdi] 3010 adcx r11,QWORD[88+rdi] 3011 adc r12,QWORD[96+rdi] 3012 adc r13,QWORD[104+rdi] 3013 adc r14,QWORD[112+rdi] 3014 adc r15,QWORD[120+rdi] 3015 lea rbp,[rsi] 3016 lea rdi,[128+rdi] 3017 sbb rax,rax 3018 3019 mov rdx,QWORD[((-64))+rsi] 3020 mov QWORD[((16+8))+rsp],rax 3021 mov QWORD[((24+8))+rsp],rdi 3022 3023 3024 xor eax,eax 3025 jmp NEAR $L$sqrx8x_loop 3026 3027 ALIGN 32 3028 $L$sqrx8x_loop: 3029 mov rbx,r8 3030 mulx r8,rax,QWORD[rbp] 3031 adcx rbx,rax 3032 adox r8,r9 3033 3034 mulx r9,rax,QWORD[8+rbp] 3035 adcx r8,rax 3036 adox r9,r10 3037 3038 mulx r10,rax,QWORD[16+rbp] 3039 adcx r9,rax 3040 adox r10,r11 3041 3042 mulx r11,rax,QWORD[24+rbp] 3043 adcx r10,rax 3044 adox r11,r12 3045 3046 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3047 adcx r11,rax 3048 adox r12,r13 3049 3050 mulx r13,rax,QWORD[40+rbp] 3051 adcx r12,rax 3052 adox r13,r14 3053 3054 mulx r14,rax,QWORD[48+rbp] 3055 mov QWORD[rcx*8+rdi],rbx 3056 mov ebx,0 3057 adcx r13,rax 3058 adox r14,r15 3059 3060 DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3061 mov rdx,QWORD[8+rcx*8+rsi] 3062 adcx r14,rax 3063 adox r15,rbx 3064 adcx r15,rbx 3065 3066 DB 0x67 3067 inc rcx 3068 jnz NEAR $L$sqrx8x_loop 3069 3070 lea rbp,[64+rbp] 3071 mov rcx,-8 3072 cmp rbp,QWORD[((8+8))+rsp] 3073 je NEAR $L$sqrx8x_break 3074 3075 sub rbx,QWORD[((16+8))+rsp] 3076 DB 0x66 3077 mov rdx,QWORD[((-64))+rsi] 3078 adcx r8,QWORD[rdi] 3079 adcx r9,QWORD[8+rdi] 3080 adc r10,QWORD[16+rdi] 3081 adc r11,QWORD[24+rdi] 3082 adc r12,QWORD[32+rdi] 3083 adc r13,QWORD[40+rdi] 3084 adc r14,QWORD[48+rdi] 3085 adc r15,QWORD[56+rdi] 3086 lea rdi,[64+rdi] 3087 DB 0x67 3088 sbb rax,rax 3089 xor ebx,ebx 3090 mov QWORD[((16+8))+rsp],rax 3091 jmp NEAR $L$sqrx8x_loop 3092 3093 ALIGN 32 3094 $L$sqrx8x_break: 3095 xor rbp,rbp 3096 sub rbx,QWORD[((16+8))+rsp] 3097 adcx r8,rbp 3098 mov rcx,QWORD[((24+8))+rsp] 3099 adcx r9,rbp 3100 mov rdx,QWORD[rsi] 3101 adc r10,0 3102 mov QWORD[rdi],r8 3103 adc r11,0 3104 adc r12,0 3105 adc r13,0 3106 adc r14,0 3107 adc r15,0 3108 cmp rdi,rcx 3109 je NEAR $L$sqrx8x_outer_loop 3110 3111 mov QWORD[8+rdi],r9 3112 mov r9,QWORD[8+rcx] 3113 mov QWORD[16+rdi],r10 3114 mov r10,QWORD[16+rcx] 3115 mov QWORD[24+rdi],r11 3116 mov r11,QWORD[24+rcx] 3117 mov QWORD[32+rdi],r12 3118 mov r12,QWORD[32+rcx] 3119 mov QWORD[40+rdi],r13 3120 mov r13,QWORD[40+rcx] 3121 mov QWORD[48+rdi],r14 3122 mov r14,QWORD[48+rcx] 3123 mov QWORD[56+rdi],r15 3124 mov r15,QWORD[56+rcx] 3125 mov rdi,rcx 3126 jmp NEAR $L$sqrx8x_outer_loop 3127 3128 ALIGN 32 3129 $L$sqrx8x_outer_break: 3130 mov QWORD[72+rdi],r9 3131 DB 102,72,15,126,217 3132 mov QWORD[80+rdi],r10 3133 mov QWORD[88+rdi],r11 3134 mov QWORD[96+rdi],r12 3135 mov QWORD[104+rdi],r13 3136 mov QWORD[112+rdi],r14 3137 lea rdi,[((48+8))+rsp] 3138 mov rdx,QWORD[rcx*1+rsi] 3139 3140 mov r11,QWORD[8+rdi] 3141 xor r10,r10 3142 mov r9,QWORD[((0+8))+rsp] 3143 adox r11,r11 3144 mov r12,QWORD[16+rdi] 3145 mov r13,QWORD[24+rdi] 3146 3147 3148 ALIGN 32 3149 $L$sqrx4x_shift_n_add: 3150 mulx rbx,rax,rdx 3151 adox r12,r12 3152 adcx rax,r10 3153 DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3154 DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3155 adox r13,r13 3156 adcx rbx,r11 3157 mov r11,QWORD[40+rdi] 3158 mov QWORD[rdi],rax 3159 mov QWORD[8+rdi],rbx 3160 3161 mulx rbx,rax,rdx 3162 adox r10,r10 3163 adcx rax,r12 3164 mov rdx,QWORD[16+rcx*1+rsi] 3165 mov r12,QWORD[48+rdi] 3166 adox r11,r11 3167 adcx rbx,r13 3168 mov r13,QWORD[56+rdi] 3169 mov QWORD[16+rdi],rax 3170 mov QWORD[24+rdi],rbx 3171 3172 mulx rbx,rax,rdx 3173 adox r12,r12 3174 adcx rax,r10 3175 mov rdx,QWORD[24+rcx*1+rsi] 3176 lea rcx,[32+rcx] 3177 mov r10,QWORD[64+rdi] 3178 adox r13,r13 3179 adcx rbx,r11 3180 mov r11,QWORD[72+rdi] 3181 mov QWORD[32+rdi],rax 3182 mov QWORD[40+rdi],rbx 3183 3184 mulx rbx,rax,rdx 3185 adox r10,r10 3186 adcx rax,r12 3187 jrcxz $L$sqrx4x_shift_n_add_break 3188 DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3189 adox r11,r11 3190 adcx rbx,r13 3191 mov r12,QWORD[80+rdi] 3192 mov r13,QWORD[88+rdi] 3193 mov QWORD[48+rdi],rax 3194 mov QWORD[56+rdi],rbx 3195 lea rdi,[64+rdi] 3196 nop 3197 jmp NEAR $L$sqrx4x_shift_n_add 3198 3199 ALIGN 32 3200 $L$sqrx4x_shift_n_add_break: 3201 adcx rbx,r13 3202 mov QWORD[48+rdi],rax 3203 mov QWORD[56+rdi],rbx 3204 lea rdi,[64+rdi] 3205 DB 102,72,15,126,213 3206 __bn_sqrx8x_reduction: 3207 xor eax,eax 3208 mov rbx,QWORD[((32+8))+rsp] 3209 mov rdx,QWORD[((48+8))+rsp] 3210 lea rcx,[((-64))+r9*1+rbp] 3211 3212 mov QWORD[((0+8))+rsp],rcx 3213 mov QWORD[((8+8))+rsp],rdi 3214 3215 lea rdi,[((48+8))+rsp] 3216 jmp NEAR $L$sqrx8x_reduction_loop 3217 3218 ALIGN 32 3219 $L$sqrx8x_reduction_loop: 3220 mov r9,QWORD[8+rdi] 3221 mov r10,QWORD[16+rdi] 3222 mov r11,QWORD[24+rdi] 3223 mov r12,QWORD[32+rdi] 3224 mov r8,rdx 3225 imul rdx,rbx 3226 mov r13,QWORD[40+rdi] 3227 mov r14,QWORD[48+rdi] 3228 mov r15,QWORD[56+rdi] 3229 mov QWORD[((24+8))+rsp],rax 3230 3231 lea rdi,[64+rdi] 3232 xor rsi,rsi 3233 mov rcx,-8 3234 jmp NEAR $L$sqrx8x_reduce 3235 3236 ALIGN 32 3237 $L$sqrx8x_reduce: 3238 mov rbx,r8 3239 mulx r8,rax,QWORD[rbp] 3240 adcx rax,rbx 3241 adox r8,r9 3242 3243 mulx r9,rbx,QWORD[8+rbp] 3244 adcx r8,rbx 3245 adox r9,r10 3246 3247 mulx r10,rbx,QWORD[16+rbp] 3248 adcx r9,rbx 3249 adox r10,r11 3250 3251 mulx r11,rbx,QWORD[24+rbp] 3252 adcx r10,rbx 3253 adox r11,r12 3254 3255 DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3256 mov rax,rdx 3257 mov rdx,r8 3258 adcx r11,rbx 3259 adox r12,r13 3260 3261 mulx rdx,rbx,QWORD[((32+8))+rsp] 3262 mov rdx,rax 3263 mov QWORD[((64+48+8))+rcx*8+rsp],rax 3264 3265 mulx r13,rax,QWORD[40+rbp] 3266 adcx r12,rax 3267 adox r13,r14 3268 3269 mulx r14,rax,QWORD[48+rbp] 3270 adcx r13,rax 3271 adox r14,r15 3272 3273 mulx r15,rax,QWORD[56+rbp] 3274 mov rdx,rbx 3275 adcx r14,rax 3276 adox r15,rsi 3277 adcx r15,rsi 3278 3279 DB 0x67,0x67,0x67 3280 inc rcx 3281 jnz NEAR $L$sqrx8x_reduce 3282 3283 mov rax,rsi 3284 cmp rbp,QWORD[((0+8))+rsp] 3285 jae NEAR $L$sqrx8x_no_tail 3286 3287 mov rdx,QWORD[((48+8))+rsp] 3288 add r8,QWORD[rdi] 3289 lea rbp,[64+rbp] 3290 mov rcx,-8 3291 adcx r9,QWORD[8+rdi] 3292 adcx r10,QWORD[16+rdi] 3293 adc r11,QWORD[24+rdi] 3294 adc r12,QWORD[32+rdi] 3295 adc r13,QWORD[40+rdi] 3296 adc r14,QWORD[48+rdi] 3297 adc r15,QWORD[56+rdi] 3298 lea rdi,[64+rdi] 3299 sbb rax,rax 3300 3301 xor rsi,rsi 3302 mov QWORD[((16+8))+rsp],rax 3303 jmp NEAR $L$sqrx8x_tail 3304 3305 ALIGN 32 3306 $L$sqrx8x_tail: 3307 mov rbx,r8 3308 mulx r8,rax,QWORD[rbp] 3309 adcx rbx,rax 3310 adox r8,r9 3311 3312 mulx r9,rax,QWORD[8+rbp] 3313 adcx r8,rax 3314 adox r9,r10 3315 3316 mulx r10,rax,QWORD[16+rbp] 3317 adcx r9,rax 3318 adox r10,r11 3319 3320 mulx r11,rax,QWORD[24+rbp] 3321 adcx r10,rax 3322 adox r11,r12 3323 3324 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3325 adcx r11,rax 3326 adox r12,r13 3327 3328 mulx r13,rax,QWORD[40+rbp] 3329 adcx r12,rax 3330 adox r13,r14 3331 3332 mulx r14,rax,QWORD[48+rbp] 3333 adcx r13,rax 3334 adox r14,r15 3335 3336 mulx r15,rax,QWORD[56+rbp] 3337 mov rdx,QWORD[((72+48+8))+rcx*8+rsp] 3338 adcx r14,rax 3339 adox r15,rsi 3340 mov QWORD[rcx*8+rdi],rbx 3341 mov rbx,r8 3342 adcx r15,rsi 3343 3344 inc rcx 3345 jnz NEAR $L$sqrx8x_tail 3346 3347 cmp rbp,QWORD[((0+8))+rsp] 3348 jae NEAR $L$sqrx8x_tail_done 3349 3350 sub rsi,QWORD[((16+8))+rsp] 3351 mov rdx,QWORD[((48+8))+rsp] 3352 lea rbp,[64+rbp] 3353 adc r8,QWORD[rdi] 3354 adc r9,QWORD[8+rdi] 3355 adc r10,QWORD[16+rdi] 3356 adc r11,QWORD[24+rdi] 3357 adc r12,QWORD[32+rdi] 3358 adc r13,QWORD[40+rdi] 3359 adc r14,QWORD[48+rdi] 3360 adc r15,QWORD[56+rdi] 3361 lea rdi,[64+rdi] 3362 sbb rax,rax 3363 sub rcx,8 3364 3365 xor rsi,rsi 3366 mov QWORD[((16+8))+rsp],rax 3367 jmp NEAR $L$sqrx8x_tail 3368 3369 ALIGN 32 3370 $L$sqrx8x_tail_done: 3371 xor rax,rax 3372 add r8,QWORD[((24+8))+rsp] 3373 adc r9,0 3374 adc r10,0 3375 adc r11,0 3376 adc r12,0 3377 adc r13,0 3378 adc r14,0 3379 adc r15,0 3380 adc rax,0 3381 3382 sub rsi,QWORD[((16+8))+rsp] 3383 $L$sqrx8x_no_tail: 3384 adc r8,QWORD[rdi] 3385 DB 102,72,15,126,217 3386 adc r9,QWORD[8+rdi] 3387 mov rsi,QWORD[56+rbp] 3388 DB 102,72,15,126,213 3389 adc r10,QWORD[16+rdi] 3390 adc r11,QWORD[24+rdi] 3391 adc r12,QWORD[32+rdi] 3392 adc r13,QWORD[40+rdi] 3393 adc r14,QWORD[48+rdi] 3394 adc r15,QWORD[56+rdi] 3395 adc rax,0 3396 3397 mov rbx,QWORD[((32+8))+rsp] 3398 mov rdx,QWORD[64+rcx*1+rdi] 3399 3400 mov QWORD[rdi],r8 3401 lea r8,[64+rdi] 3402 mov QWORD[8+rdi],r9 3403 mov QWORD[16+rdi],r10 3404 mov QWORD[24+rdi],r11 3405 mov QWORD[32+rdi],r12 3406 mov QWORD[40+rdi],r13 3407 mov QWORD[48+rdi],r14 3408 mov QWORD[56+rdi],r15 3409 3410 lea rdi,[64+rcx*1+rdi] 3411 cmp r8,QWORD[((8+8))+rsp] 3412 jb NEAR $L$sqrx8x_reduction_loop 3413 DB 0F3h,0C3h ;repret 3414 3415 3416 ALIGN 32 3417 __bn_postx4x_internal: 3418 3419 mov r12,QWORD[rbp] 3420 mov r10,rcx 3421 mov r9,rcx 3422 neg rax 3423 sar rcx,3+2 3424 3425 DB 102,72,15,126,202 3426 DB 102,72,15,126,206 3427 dec r12 3428 mov r13,QWORD[8+rbp] 3429 xor r8,r8 3430 mov r14,QWORD[16+rbp] 3431 mov r15,QWORD[24+rbp] 3432 jmp NEAR $L$sqrx4x_sub_entry 3433 3434 ALIGN 16 3435 $L$sqrx4x_sub: 3436 mov r12,QWORD[rbp] 3437 mov r13,QWORD[8+rbp] 3438 mov r14,QWORD[16+rbp] 3439 mov r15,QWORD[24+rbp] 3440 $L$sqrx4x_sub_entry: 3441 andn r12,r12,rax 3442 lea rbp,[32+rbp] 3443 andn r13,r13,rax 3444 andn r14,r14,rax 3445 andn r15,r15,rax 3446 3447 neg r8 3448 adc r12,QWORD[rdi] 3449 adc r13,QWORD[8+rdi] 3450 adc r14,QWORD[16+rdi] 3451 adc r15,QWORD[24+rdi] 3452 mov QWORD[rdx],r12 3453 lea rdi,[32+rdi] 3454 mov QWORD[8+rdx],r13 3455 sbb r8,r8 3456 mov QWORD[16+rdx],r14 3457 mov QWORD[24+rdx],r15 3458 lea rdx,[32+rdx] 3459 3460 inc rcx 3461 jnz NEAR $L$sqrx4x_sub 3462 3463 neg r9 3464 2087 3465 DB 0F3h,0C3h ;repret 2088 3466 … … 2420 3798 DD $L$SEH_end_bn_power5 wrt ..imagebase 2421 3799 DD $L$SEH_info_bn_power5 wrt ..imagebase 3800 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase 3801 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase 3802 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase 3803 3804 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase 3805 DD $L$SEH_end_bn_powerx5 wrt ..imagebase 3806 DD $L$SEH_info_bn_powerx5 wrt ..imagebase 2422 3807 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 2423 3808 DD $L$SEH_end_bn_gather5 wrt ..imagebase … … 2441 3826 DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase 2442 3827 ALIGN 8 3828 $L$SEH_info_bn_mulx4x_mont_gather5: 3829 DB 9,0,0,0 3830 DD mul_handler wrt ..imagebase 3831 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase 3832 ALIGN 8 3833 $L$SEH_info_bn_powerx5: 3834 DB 9,0,0,0 3835 DD mul_handler wrt ..imagebase 3836 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase 3837 ALIGN 8 2443 3838 $L$SEH_info_bn_gather5: 2444 3839 DB 0x01,0x0b,0x03,0x0a -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/asn1.h
r94320 r99371 3 3 * Generated by Makefile from asn1.h.in 4 4 * 5 * Copyright 1995-202 1The OpenSSL Project Authors. All Rights Reserved.5 * Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved. 6 6 * 7 7 * Licensed under the Apache License 2.0 (the "License"). You may not use … … 22 22 # endif 23 23 24 # ifndef OPENSSL_NO_STDIO 25 # include <stdio.h> 26 # endif 24 27 # include <time.h> 25 28 # include <openssl/e_os2.h> -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/cmp.h
r94320 r99371 3 3 * Generated by Makefile from cmp.h.in 4 4 * 5 * Copyright 2007-202 1The OpenSSL Project Authors. All Rights Reserved.5 * Copyright 2007-2022 The OpenSSL Project Authors. All Rights Reserved. 6 6 * Copyright Nokia 2007-2019 7 7 * Copyright Siemens AG 2015-2019 … … 194 194 * } 195 195 */ 196 # define OSSL_CMP_PKISTATUS_accepted 0 197 # define OSSL_CMP_PKISTATUS_grantedWithMods 1 198 # define OSSL_CMP_PKISTATUS_rejection 2 199 # define OSSL_CMP_PKISTATUS_waiting 3 200 # define OSSL_CMP_PKISTATUS_revocationWarning 4 196 # define OSSL_CMP_PKISTATUS_request -3 197 # define OSSL_CMP_PKISTATUS_trans -2 198 # define OSSL_CMP_PKISTATUS_unspecified -1 199 # define OSSL_CMP_PKISTATUS_accepted 0 200 # define OSSL_CMP_PKISTATUS_grantedWithMods 1 201 # define OSSL_CMP_PKISTATUS_rejection 2 202 # define OSSL_CMP_PKISTATUS_waiting 3 203 # define OSSL_CMP_PKISTATUS_revocationWarning 4 201 204 # define OSSL_CMP_PKISTATUS_revocationNotification 5 202 # define OSSL_CMP_PKISTATUS_keyUpdateWarning 6205 # define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 203 206 204 207 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; … … 445 448 int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); 446 449 int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); 450 int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); 447 451 int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, 448 452 STACK_OF(X509) *extraCertsOut); -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/conf.h
r94320 r99371 3 3 * Generated by Makefile from conf.h.in 4 4 * 5 * Copyright 1995-202 1The OpenSSL Project Authors. All Rights Reserved.5 * Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved. 6 6 * 7 7 * Licensed under the Apache License 2.0 (the "License"). You may not use … … 28 28 # include <openssl/types.h> 29 29 # include <openssl/conferr.h> 30 # ifndef OPENSSL_NO_STDIO 31 # include <stdio.h> 32 # endif 30 33 31 34 #ifdef __cplusplus -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/crypto.h
r95219 r99371 342 342 CRYPTO_free_fn *free_fn); 343 343 344 void *CRYPTO_malloc(size_t num, const char *file, int line);345 void *CRYPTO_zalloc(size_t num, const char *file, int line);346 void *CRYPTO_memdup(const void *str, size_t siz, const char *file, int line);347 char *CRYPTO_strdup(const char *str, const char *file, int line);348 char *CRYPTO_strndup(const char *str, size_t s, const char *file, int line);344 OSSL_CRYPTO_ALLOC void *CRYPTO_malloc(size_t num, const char *file, int line); 345 OSSL_CRYPTO_ALLOC void *CRYPTO_zalloc(size_t num, const char *file, int line); 346 OSSL_CRYPTO_ALLOC void *CRYPTO_memdup(const void *str, size_t siz, const char *file, int line); 347 OSSL_CRYPTO_ALLOC char *CRYPTO_strdup(const char *str, const char *file, int line); 348 OSSL_CRYPTO_ALLOC char *CRYPTO_strndup(const char *str, size_t s, const char *file, int line); 349 349 void CRYPTO_free(void *ptr, const char *file, int line); 350 350 void CRYPTO_clear_free(void *ptr, size_t num, const char *file, int line); … … 355 355 int CRYPTO_secure_malloc_init(size_t sz, size_t minsize); 356 356 int CRYPTO_secure_malloc_done(void); 357 void *CRYPTO_secure_malloc(size_t num, const char *file, int line);358 void *CRYPTO_secure_zalloc(size_t num, const char *file, int line);357 OSSL_CRYPTO_ALLOC void *CRYPTO_secure_malloc(size_t num, const char *file, int line); 358 OSSL_CRYPTO_ALLOC void *CRYPTO_secure_zalloc(size_t num, const char *file, int line); 359 359 void CRYPTO_secure_free(void *ptr, const char *file, int line); 360 360 void CRYPTO_secure_clear_free(void *ptr, size_t num, -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/lhash.h
r94320 r99371 1 1 /* 2 * Copyright 1995-202 1The OpenSSL Project Authors. All Rights Reserved.2 * Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved. 3 3 * 4 4 * Licensed under the Apache License 2.0 (the "License"). You may not use … … 25 25 # include <openssl/e_os2.h> 26 26 # include <openssl/bio.h> 27 # ifndef OPENSSL_NO_STDIO 28 # include <stdio.h> 29 # endif 27 30 28 31 #ifdef __cplusplus … … 93 96 94 97 # ifndef OPENSSL_NO_STDIO 95 void OPENSSL_LH_stats(const OPENSSL_LHASH *lh, FILE *fp); 96 void OPENSSL_LH_node_stats(const OPENSSL_LHASH *lh, FILE *fp); 97 void OPENSSL_LH_node_usage_stats(const OPENSSL_LHASH *lh, FILE *fp); 98 # endif 99 void OPENSSL_LH_stats_bio(const OPENSSL_LHASH *lh, BIO *out); 100 void OPENSSL_LH_node_stats_bio(const OPENSSL_LHASH *lh, BIO *out); 101 void OPENSSL_LH_node_usage_stats_bio(const OPENSSL_LHASH *lh, BIO *out); 98 # ifndef OPENSSL_NO_DEPRECATED_3_1 99 OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats(const OPENSSL_LHASH *lh, FILE *fp); 100 OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats(const OPENSSL_LHASH *lh, FILE *fp); 101 OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats(const OPENSSL_LHASH *lh, FILE *fp); 102 # endif 103 # endif 104 # ifndef OPENSSL_NO_DEPRECATED_3_1 105 OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_stats_bio(const OPENSSL_LHASH *lh, BIO *out); 106 OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_stats_bio(const OPENSSL_LHASH *lh, BIO *out); 107 OSSL_DEPRECATEDIN_3_1 void OPENSSL_LH_node_usage_stats_bio(const OPENSSL_LHASH *lh, BIO *out); 108 # endif 102 109 103 110 # ifndef OPENSSL_NO_DEPRECATED_1_1_0 … … 130 137 /* Helper macro for internal use */ 131 138 # define DEFINE_LHASH_OF_INTERNAL(type) \ 132 LHASH_OF(type) { union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; }; \ 139 LHASH_OF(type) { \ 140 union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ 141 }; \ 133 142 typedef int (*lh_##type##_compfunc)(const type *a, const type *b); \ 134 143 typedef unsigned long (*lh_##type##_hashfunc)(const type *a); \ 135 144 typedef void (*lh_##type##_doallfunc)(type *a); \ 136 static ossl_unused ossl_inline type *ossl_check_##type##_lh_plain_type(type *ptr) \ 145 static ossl_unused ossl_inline type *\ 146 ossl_check_##type##_lh_plain_type(type *ptr) \ 137 147 { \ 138 148 return ptr; \ 139 149 } \ 140 static ossl_unused ossl_inline const type *ossl_check_const_##type##_lh_plain_type(const type *ptr) \ 150 static ossl_unused ossl_inline const type * \ 151 ossl_check_const_##type##_lh_plain_type(const type *ptr) \ 141 152 { \ 142 153 return ptr; \ 143 154 } \ 144 static ossl_unused ossl_inline const OPENSSL_LHASH *ossl_check_const_##type##_lh_type(const LHASH_OF(type) *lh) \ 155 static ossl_unused ossl_inline const OPENSSL_LHASH * \ 156 ossl_check_const_##type##_lh_type(const LHASH_OF(type) *lh) \ 145 157 { \ 146 158 return (const OPENSSL_LHASH *)lh; \ 147 159 } \ 148 static ossl_unused ossl_inline OPENSSL_LHASH *ossl_check_##type##_lh_type(LHASH_OF(type) *lh) \ 160 static ossl_unused ossl_inline OPENSSL_LHASH * \ 161 ossl_check_##type##_lh_type(LHASH_OF(type) *lh) \ 149 162 { \ 150 163 return (OPENSSL_LHASH *)lh; \ 151 164 } \ 152 static ossl_unused ossl_inline OPENSSL_LH_COMPFUNC ossl_check_##type##_lh_compfunc_type(lh_##type##_compfunc cmp) \ 165 static ossl_unused ossl_inline OPENSSL_LH_COMPFUNC \ 166 ossl_check_##type##_lh_compfunc_type(lh_##type##_compfunc cmp) \ 153 167 { \ 154 168 return (OPENSSL_LH_COMPFUNC)cmp; \ 155 169 } \ 156 static ossl_unused ossl_inline OPENSSL_LH_HASHFUNC ossl_check_##type##_lh_hashfunc_type(lh_##type##_hashfunc hfn) \ 170 static ossl_unused ossl_inline OPENSSL_LH_HASHFUNC \ 171 ossl_check_##type##_lh_hashfunc_type(lh_##type##_hashfunc hfn) \ 157 172 { \ 158 173 return (OPENSSL_LH_HASHFUNC)hfn; \ 159 174 } \ 160 static ossl_unused ossl_inline OPENSSL_LH_DOALL_FUNC ossl_check_##type##_lh_doallfunc_type(lh_##type##_doallfunc dfn) \ 175 static ossl_unused ossl_inline OPENSSL_LH_DOALL_FUNC \ 176 ossl_check_##type##_lh_doallfunc_type(lh_##type##_doallfunc dfn) \ 161 177 { \ 162 178 return (OPENSSL_LH_DOALL_FUNC)dfn; \ … … 164 180 LHASH_OF(type) 165 181 166 # define DEFINE_LHASH_OF(type) \ 167 LHASH_OF(type) { union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; }; \ 168 static ossl_unused ossl_inline LHASH_OF(type) *lh_##type##_new(unsigned long (*hfn)(const type *), \ 169 int (*cfn)(const type *, const type *)) \ 182 # ifndef OPENSSL_NO_DEPRECATED_3_1 183 # define DEFINE_LHASH_OF_DEPRECATED(type) \ 184 static ossl_unused ossl_inline void \ 185 lh_##type##_node_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ 186 { \ 187 OPENSSL_LH_node_stats_bio((const OPENSSL_LHASH *)lh, out); \ 188 } \ 189 static ossl_unused ossl_inline void \ 190 lh_##type##_node_usage_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ 191 { \ 192 OPENSSL_LH_node_usage_stats_bio((const OPENSSL_LHASH *)lh, out); \ 193 } \ 194 static ossl_unused ossl_inline void \ 195 lh_##type##_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ 196 { \ 197 OPENSSL_LH_stats_bio((const OPENSSL_LHASH *)lh, out); \ 198 } 199 # else 200 # define DEFINE_LHASH_OF_DEPRECATED(type) 201 # endif 202 203 # define DEFINE_LHASH_OF_EX(type) \ 204 LHASH_OF(type) { \ 205 union lh_##type##_dummy { void* d1; unsigned long d2; int d3; } dummy; \ 206 }; \ 207 static ossl_unused ossl_inline LHASH_OF(type) * \ 208 lh_##type##_new(unsigned long (*hfn)(const type *), \ 209 int (*cfn)(const type *, const type *)) \ 170 210 { \ 171 211 return (LHASH_OF(type) *) \ 172 212 OPENSSL_LH_new((OPENSSL_LH_HASHFUNC)hfn, (OPENSSL_LH_COMPFUNC)cfn); \ 173 213 } \ 174 static ossl_unused ossl_inline void lh_##type##_free(LHASH_OF(type) *lh) \ 214 static ossl_unused ossl_inline void \ 215 lh_##type##_free(LHASH_OF(type) *lh) \ 175 216 { \ 176 217 OPENSSL_LH_free((OPENSSL_LHASH *)lh); \ 177 218 } \ 178 static ossl_unused ossl_inline void lh_##type##_flush(LHASH_OF(type) *lh) \ 219 static ossl_unused ossl_inline void \ 220 lh_##type##_flush(LHASH_OF(type) *lh) \ 179 221 { \ 180 222 OPENSSL_LH_flush((OPENSSL_LHASH *)lh); \ 181 223 } \ 182 static ossl_unused ossl_inline type *lh_##type##_insert(LHASH_OF(type) *lh, type *d) \ 224 static ossl_unused ossl_inline type * \ 225 lh_##type##_insert(LHASH_OF(type) *lh, type *d) \ 183 226 { \ 184 227 return (type *)OPENSSL_LH_insert((OPENSSL_LHASH *)lh, d); \ 185 228 } \ 186 static ossl_unused ossl_inline type *lh_##type##_delete(LHASH_OF(type) *lh, const type *d) \ 229 static ossl_unused ossl_inline type * \ 230 lh_##type##_delete(LHASH_OF(type) *lh, const type *d) \ 187 231 { \ 188 232 return (type *)OPENSSL_LH_delete((OPENSSL_LHASH *)lh, d); \ 189 233 } \ 190 static ossl_unused ossl_inline type *lh_##type##_retrieve(LHASH_OF(type) *lh, const type *d) \ 234 static ossl_unused ossl_inline type * \ 235 lh_##type##_retrieve(LHASH_OF(type) *lh, const type *d) \ 191 236 { \ 192 237 return (type *)OPENSSL_LH_retrieve((OPENSSL_LHASH *)lh, d); \ 193 238 } \ 194 static ossl_unused ossl_inline int lh_##type##_error(LHASH_OF(type) *lh) \ 239 static ossl_unused ossl_inline int \ 240 lh_##type##_error(LHASH_OF(type) *lh) \ 195 241 { \ 196 242 return OPENSSL_LH_error((OPENSSL_LHASH *)lh); \ 197 243 } \ 198 static ossl_unused ossl_inline unsigned long lh_##type##_num_items(LHASH_OF(type) *lh) \ 244 static ossl_unused ossl_inline unsigned long \ 245 lh_##type##_num_items(LHASH_OF(type) *lh) \ 199 246 { \ 200 247 return OPENSSL_LH_num_items((OPENSSL_LHASH *)lh); \ 201 248 } \ 202 static ossl_unused ossl_inline void lh_##type##_node_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ 203 { \ 204 OPENSSL_LH_node_stats_bio((const OPENSSL_LHASH *)lh, out); \ 205 } \ 206 static ossl_unused ossl_inline void lh_##type##_node_usage_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ 207 { \ 208 OPENSSL_LH_node_usage_stats_bio((const OPENSSL_LHASH *)lh, out); \ 209 } \ 210 static ossl_unused ossl_inline void lh_##type##_stats_bio(const LHASH_OF(type) *lh, BIO *out) \ 211 { \ 212 OPENSSL_LH_stats_bio((const OPENSSL_LHASH *)lh, out); \ 213 } \ 214 static ossl_unused ossl_inline unsigned long lh_##type##_get_down_load(LHASH_OF(type) *lh) \ 249 static ossl_unused ossl_inline unsigned long \ 250 lh_##type##_get_down_load(LHASH_OF(type) *lh) \ 215 251 { \ 216 252 return OPENSSL_LH_get_down_load((OPENSSL_LHASH *)lh); \ 217 253 } \ 218 static ossl_unused ossl_inline void lh_##type##_set_down_load(LHASH_OF(type) *lh, unsigned long dl) \ 254 static ossl_unused ossl_inline void \ 255 lh_##type##_set_down_load(LHASH_OF(type) *lh, unsigned long dl) \ 219 256 { \ 220 257 OPENSSL_LH_set_down_load((OPENSSL_LHASH *)lh, dl); \ 221 258 } \ 222 static ossl_unused ossl_inline void lh_##type##_doall(LHASH_OF(type) *lh,\223 259 static ossl_unused ossl_inline void \ 260 lh_##type##_doall(LHASH_OF(type) *lh, void (*doall)(type *)) \ 224 261 { \ 225 262 OPENSSL_LH_doall((OPENSSL_LHASH *)lh, (OPENSSL_LH_DOALL_FUNC)doall); \ 226 263 } \ 227 static ossl_unused ossl_inline void lh_##type##_doall_arg(LHASH_OF(type) *lh,\228 void (*doallarg)(type *, void *), \229 264 static ossl_unused ossl_inline void \ 265 lh_##type##_doall_arg(LHASH_OF(type) *lh, \ 266 void (*doallarg)(type *, void *), void *arg) \ 230 267 { \ 231 268 OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, \ 232 269 (OPENSSL_LH_DOALL_FUNCARG)doallarg, arg); \ 233 270 } \ 271 LHASH_OF(type) 272 273 # define DEFINE_LHASH_OF(type) \ 274 DEFINE_LHASH_OF_EX(type); \ 275 DEFINE_LHASH_OF_DEPRECATED(type) \ 234 276 LHASH_OF(type) 235 277 … … 246 288 argtype *arg) \ 247 289 { \ 248 OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, (OPENSSL_LH_DOALL_FUNCARG)fn, (void *)arg); \ 290 OPENSSL_LH_doall_arg((OPENSSL_LHASH *)lh, \ 291 (OPENSSL_LH_DOALL_FUNCARG)fn, (void *)arg); \ 249 292 } \ 250 293 LHASH_OF(type) -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/pkcs12.h
r94320 r99371 3 3 * Generated by Makefile from pkcs12.h.in 4 4 * 5 * Copyright 1999-202 1The OpenSSL Project Authors. All Rights Reserved.5 * Copyright 1999-2022 The OpenSSL Project Authors. All Rights Reserved. 6 6 * 7 7 * Licensed under the Apache License 2.0 (the "License"). You may not use … … 26 26 # include <openssl/x509.h> 27 27 # include <openssl/pkcs12err.h> 28 # ifndef OPENSSL_NO_STDIO 29 # include <stdio.h> 30 # endif 28 31 29 32 #ifdef __cplusplus -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/pkcs7.h
r94320 r99371 3 3 * Generated by Makefile from pkcs7.h.in 4 4 * 5 * Copyright 1995-202 1The OpenSSL Project Authors. All Rights Reserved.5 * Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved. 6 6 * 7 7 * Licensed under the Apache License 2.0 (the "License"). You may not use … … 29 29 # include <openssl/types.h> 30 30 # include <openssl/pkcs7err.h> 31 # ifndef OPENSSL_NO_STDIO 32 # include <stdio.h> 33 # endif 31 34 32 35 #ifdef __cplusplus -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/ssl.h
r95219 r99371 43 43 # include <openssl/sslerr.h> 44 44 # include <openssl/prov_ssl.h> 45 # ifndef OPENSSL_NO_STDIO 46 # include <stdio.h> 47 # endif 45 48 46 49 #ifdef __cplusplus -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/x509.h
r95219 r99371 41 41 # include <openssl/sha.h> 42 42 # include <openssl/x509err.h> 43 # ifndef OPENSSL_NO_STDIO 44 # include <stdio.h> 45 # endif 43 46 44 47 #ifdef __cplusplus -
trunk/src/libs/openssl-3.1.0/gen-includes/openssl/x509v3.h
r94320 r99371 3 3 * Generated by Makefile from x509v3.h.in 4 4 * 5 * Copyright 1999-202 1The OpenSSL Project Authors. All Rights Reserved.5 * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. 6 6 * 7 7 * Licensed under the Apache License 2.0 (the "License"). You may not use … … 26 26 # include <openssl/conf.h> 27 27 # include <openssl/x509v3err.h> 28 # ifndef OPENSSL_NO_STDIO 29 # include <stdio.h> 30 # endif 28 31 29 32 #ifdef __cplusplus … … 178 181 ASN1_IA5STRING *rfc822Name; 179 182 ASN1_IA5STRING *dNSName; 180 ASN1_ TYPE*x400Address;183 ASN1_STRING *x400Address; 181 184 X509_NAME *directoryName; 182 185 EDIPARTYNAME *ediPartyName;
Note:
See TracChangeset
for help on using the changeset viewer.