- Timestamp:
- Mar 7, 2022 4:12:02 PM (3 years ago)
- Location:
- trunk/src/libs/openssl-3.0.1/crypto
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/libs/openssl-3.0.1/crypto/bn/Makefile.kmk
r94107 r94114 100 100 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86-gf2m.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/x86-gf2m.S 101 101 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx2.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/rsaz-avx2.S 102 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx512.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/rsaz-avx512.S 102 103 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-x86_64.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/rsaz-x86_64.S 103 104 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86_64-gf2m.pl macosx $(VBOX_PATH_CRYPTO)/genasm-macosx/x86_64-gf2m.S … … 109 110 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86-gf2m.pl win32n $(VBOX_PATH_CRYPTO)/genasm-nasm/x86-gf2m.S 110 111 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx2.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/rsaz-avx2.S 112 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-avx512.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/rsaz-avx512.S 111 113 perl $(VBOX_PATH_CRYPTO_BN)/asm/rsaz-x86_64.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/rsaz-x86_64.S 112 114 perl $(VBOX_PATH_CRYPTO_BN)/asm/x86_64-gf2m.pl nasm $(VBOX_PATH_CRYPTO)/genasm-nasm/x86_64-gf2m.S -
trunk/src/libs/openssl-3.0.1/crypto/genasm-nasm/rsaz-avx2.S
r94083 r94114 6 6 7 7 8 global rsaz_ 1024_sqr_avx28 global rsaz_avx2_eligible 9 9 10 ALIGN 64 11 rsaz_1024_sqr_avx2: 12 mov QWORD[8+rsp],rdi ;WIN64 prologue 13 mov QWORD[16+rsp],rsi 14 mov rax,rsp 15 $L$SEH_begin_rsaz_1024_sqr_avx2: 16 mov rdi,rcx 17 mov rsi,rdx 18 mov rdx,r8 19 mov rcx,r9 20 mov r8,QWORD[40+rsp] 21 22 23 24 lea rax,[rsp] 25 26 push rbx 27 28 push rbp 29 30 push r12 31 32 push r13 33 34 push r14 35 36 push r15 37 38 vzeroupper 39 lea rsp,[((-168))+rsp] 40 vmovaps XMMWORD[(-216)+rax],xmm6 41 vmovaps XMMWORD[(-200)+rax],xmm7 42 vmovaps XMMWORD[(-184)+rax],xmm8 43 vmovaps XMMWORD[(-168)+rax],xmm9 44 vmovaps XMMWORD[(-152)+rax],xmm10 45 vmovaps XMMWORD[(-136)+rax],xmm11 46 vmovaps XMMWORD[(-120)+rax],xmm12 47 vmovaps XMMWORD[(-104)+rax],xmm13 48 vmovaps XMMWORD[(-88)+rax],xmm14 49 vmovaps XMMWORD[(-72)+rax],xmm15 50 $L$sqr_1024_body: 51 mov rbp,rax 52 53 mov r13,rdx 54 sub rsp,832 55 mov r15,r13 56 sub rdi,-128 57 sub rsi,-128 58 sub r13,-128 59 60 and r15,4095 61 add r15,32*10 62 shr r15,12 63 vpxor ymm9,ymm9,ymm9 64 jz NEAR $L$sqr_1024_no_n_copy 65 66 67 68 69 70 sub rsp,32*10 71 vmovdqu ymm0,YMMWORD[((0-128))+r13] 72 and rsp,-2048 73 vmovdqu ymm1,YMMWORD[((32-128))+r13] 74 vmovdqu ymm2,YMMWORD[((64-128))+r13] 75 vmovdqu ymm3,YMMWORD[((96-128))+r13] 76 vmovdqu ymm4,YMMWORD[((128-128))+r13] 77 vmovdqu ymm5,YMMWORD[((160-128))+r13] 78 vmovdqu ymm6,YMMWORD[((192-128))+r13] 79 vmovdqu ymm7,YMMWORD[((224-128))+r13] 80 vmovdqu ymm8,YMMWORD[((256-128))+r13] 81 lea r13,[((832+128))+rsp] 82 vmovdqu YMMWORD[(0-128)+r13],ymm0 83 vmovdqu YMMWORD[(32-128)+r13],ymm1 84 vmovdqu YMMWORD[(64-128)+r13],ymm2 85 vmovdqu YMMWORD[(96-128)+r13],ymm3 86 vmovdqu YMMWORD[(128-128)+r13],ymm4 87 vmovdqu YMMWORD[(160-128)+r13],ymm5 88 vmovdqu YMMWORD[(192-128)+r13],ymm6 89 vmovdqu YMMWORD[(224-128)+r13],ymm7 90 vmovdqu YMMWORD[(256-128)+r13],ymm8 91 vmovdqu YMMWORD[(288-128)+r13],ymm9 92 93 $L$sqr_1024_no_n_copy: 94 and rsp,-1024 95 96 vmovdqu ymm1,YMMWORD[((32-128))+rsi] 97 vmovdqu ymm2,YMMWORD[((64-128))+rsi] 98 vmovdqu ymm3,YMMWORD[((96-128))+rsi] 99 vmovdqu ymm4,YMMWORD[((128-128))+rsi] 100 vmovdqu ymm5,YMMWORD[((160-128))+rsi] 101 vmovdqu ymm6,YMMWORD[((192-128))+rsi] 102 vmovdqu ymm7,YMMWORD[((224-128))+rsi] 103 vmovdqu ymm8,YMMWORD[((256-128))+rsi] 104 105 lea rbx,[192+rsp] 106 vmovdqu ymm15,YMMWORD[$L$and_mask] 107 jmp NEAR $L$OOP_GRANDE_SQR_1024 108 109 ALIGN 32 110 $L$OOP_GRANDE_SQR_1024: 111 lea r9,[((576+128))+rsp] 112 lea r12,[448+rsp] 113 114 115 116 117 vpaddq ymm1,ymm1,ymm1 118 vpbroadcastq ymm10,QWORD[((0-128))+rsi] 119 vpaddq ymm2,ymm2,ymm2 120 vmovdqa YMMWORD[(0-128)+r9],ymm1 121 vpaddq ymm3,ymm3,ymm3 122 vmovdqa YMMWORD[(32-128)+r9],ymm2 123 vpaddq ymm4,ymm4,ymm4 124 vmovdqa YMMWORD[(64-128)+r9],ymm3 125 vpaddq ymm5,ymm5,ymm5 126 vmovdqa YMMWORD[(96-128)+r9],ymm4 127 vpaddq ymm6,ymm6,ymm6 128 vmovdqa YMMWORD[(128-128)+r9],ymm5 129 vpaddq ymm7,ymm7,ymm7 130 vmovdqa YMMWORD[(160-128)+r9],ymm6 131 vpaddq ymm8,ymm8,ymm8 132 vmovdqa YMMWORD[(192-128)+r9],ymm7 133 vpxor ymm9,ymm9,ymm9 134 vmovdqa YMMWORD[(224-128)+r9],ymm8 135 136 vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] 137 vpbroadcastq ymm11,QWORD[((32-128))+rsi] 138 vmovdqu YMMWORD[(288-192)+rbx],ymm9 139 vpmuludq ymm1,ymm1,ymm10 140 vmovdqu YMMWORD[(320-448)+r12],ymm9 141 vpmuludq ymm2,ymm2,ymm10 142 vmovdqu YMMWORD[(352-448)+r12],ymm9 143 vpmuludq ymm3,ymm3,ymm10 144 vmovdqu YMMWORD[(384-448)+r12],ymm9 145 vpmuludq ymm4,ymm4,ymm10 146 vmovdqu YMMWORD[(416-448)+r12],ymm9 147 vpmuludq ymm5,ymm5,ymm10 148 vmovdqu YMMWORD[(448-448)+r12],ymm9 149 vpmuludq ymm6,ymm6,ymm10 150 vmovdqu YMMWORD[(480-448)+r12],ymm9 151 vpmuludq ymm7,ymm7,ymm10 152 vmovdqu YMMWORD[(512-448)+r12],ymm9 153 vpmuludq ymm8,ymm8,ymm10 154 vpbroadcastq ymm10,QWORD[((64-128))+rsi] 155 vmovdqu YMMWORD[(544-448)+r12],ymm9 156 157 mov r15,rsi 158 mov r14d,4 159 jmp NEAR $L$sqr_entry_1024 160 ALIGN 32 161 $L$OOP_SQR_1024: 162 vpbroadcastq ymm11,QWORD[((32-128))+r15] 163 vpmuludq ymm0,ymm10,YMMWORD[((0-128))+rsi] 164 vpaddq ymm0,ymm0,YMMWORD[((0-192))+rbx] 165 vpmuludq ymm1,ymm10,YMMWORD[((0-128))+r9] 166 vpaddq ymm1,ymm1,YMMWORD[((32-192))+rbx] 167 vpmuludq ymm2,ymm10,YMMWORD[((32-128))+r9] 168 vpaddq ymm2,ymm2,YMMWORD[((64-192))+rbx] 169 vpmuludq ymm3,ymm10,YMMWORD[((64-128))+r9] 170 vpaddq ymm3,ymm3,YMMWORD[((96-192))+rbx] 171 vpmuludq ymm4,ymm10,YMMWORD[((96-128))+r9] 172 vpaddq ymm4,ymm4,YMMWORD[((128-192))+rbx] 173 vpmuludq ymm5,ymm10,YMMWORD[((128-128))+r9] 174 vpaddq ymm5,ymm5,YMMWORD[((160-192))+rbx] 175 vpmuludq ymm6,ymm10,YMMWORD[((160-128))+r9] 176 vpaddq ymm6,ymm6,YMMWORD[((192-192))+rbx] 177 vpmuludq ymm7,ymm10,YMMWORD[((192-128))+r9] 178 vpaddq ymm7,ymm7,YMMWORD[((224-192))+rbx] 179 vpmuludq ymm8,ymm10,YMMWORD[((224-128))+r9] 180 vpbroadcastq ymm10,QWORD[((64-128))+r15] 181 vpaddq ymm8,ymm8,YMMWORD[((256-192))+rbx] 182 $L$sqr_entry_1024: 183 vmovdqu YMMWORD[(0-192)+rbx],ymm0 184 vmovdqu YMMWORD[(32-192)+rbx],ymm1 185 186 vpmuludq ymm12,ymm11,YMMWORD[((32-128))+rsi] 187 vpaddq ymm2,ymm2,ymm12 188 vpmuludq ymm14,ymm11,YMMWORD[((32-128))+r9] 189 vpaddq ymm3,ymm3,ymm14 190 vpmuludq ymm13,ymm11,YMMWORD[((64-128))+r9] 191 vpaddq ymm4,ymm4,ymm13 192 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+r9] 193 vpaddq ymm5,ymm5,ymm12 194 vpmuludq ymm14,ymm11,YMMWORD[((128-128))+r9] 195 vpaddq ymm6,ymm6,ymm14 196 vpmuludq ymm13,ymm11,YMMWORD[((160-128))+r9] 197 vpaddq ymm7,ymm7,ymm13 198 vpmuludq ymm12,ymm11,YMMWORD[((192-128))+r9] 199 vpaddq ymm8,ymm8,ymm12 200 vpmuludq ymm0,ymm11,YMMWORD[((224-128))+r9] 201 vpbroadcastq ymm11,QWORD[((96-128))+r15] 202 vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] 203 204 vmovdqu YMMWORD[(64-192)+rbx],ymm2 205 vmovdqu YMMWORD[(96-192)+rbx],ymm3 206 207 vpmuludq ymm13,ymm10,YMMWORD[((64-128))+rsi] 208 vpaddq ymm4,ymm4,ymm13 209 vpmuludq ymm12,ymm10,YMMWORD[((64-128))+r9] 210 vpaddq ymm5,ymm5,ymm12 211 vpmuludq ymm14,ymm10,YMMWORD[((96-128))+r9] 212 vpaddq ymm6,ymm6,ymm14 213 vpmuludq ymm13,ymm10,YMMWORD[((128-128))+r9] 214 vpaddq ymm7,ymm7,ymm13 215 vpmuludq ymm12,ymm10,YMMWORD[((160-128))+r9] 216 vpaddq ymm8,ymm8,ymm12 217 vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] 218 vpaddq ymm0,ymm0,ymm14 219 vpmuludq ymm1,ymm10,YMMWORD[((224-128))+r9] 220 vpbroadcastq ymm10,QWORD[((128-128))+r15] 221 vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] 222 223 vmovdqu YMMWORD[(128-192)+rbx],ymm4 224 vmovdqu YMMWORD[(160-192)+rbx],ymm5 225 226 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rsi] 227 vpaddq ymm6,ymm6,ymm12 228 vpmuludq ymm14,ymm11,YMMWORD[((96-128))+r9] 229 vpaddq ymm7,ymm7,ymm14 230 vpmuludq ymm13,ymm11,YMMWORD[((128-128))+r9] 231 vpaddq ymm8,ymm8,ymm13 232 vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] 233 vpaddq ymm0,ymm0,ymm12 234 vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] 235 vpaddq ymm1,ymm1,ymm14 236 vpmuludq ymm2,ymm11,YMMWORD[((224-128))+r9] 237 vpbroadcastq ymm11,QWORD[((160-128))+r15] 238 vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] 239 240 vmovdqu YMMWORD[(192-192)+rbx],ymm6 241 vmovdqu YMMWORD[(224-192)+rbx],ymm7 242 243 vpmuludq ymm12,ymm10,YMMWORD[((128-128))+rsi] 244 vpaddq ymm8,ymm8,ymm12 245 vpmuludq ymm14,ymm10,YMMWORD[((128-128))+r9] 246 vpaddq ymm0,ymm0,ymm14 247 vpmuludq ymm13,ymm10,YMMWORD[((160-128))+r9] 248 vpaddq ymm1,ymm1,ymm13 249 vpmuludq ymm12,ymm10,YMMWORD[((192-128))+r9] 250 vpaddq ymm2,ymm2,ymm12 251 vpmuludq ymm3,ymm10,YMMWORD[((224-128))+r9] 252 vpbroadcastq ymm10,QWORD[((192-128))+r15] 253 vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] 254 255 vmovdqu YMMWORD[(256-192)+rbx],ymm8 256 vmovdqu YMMWORD[(288-192)+rbx],ymm0 257 lea rbx,[8+rbx] 258 259 vpmuludq ymm13,ymm11,YMMWORD[((160-128))+rsi] 260 vpaddq ymm1,ymm1,ymm13 261 vpmuludq ymm12,ymm11,YMMWORD[((160-128))+r9] 262 vpaddq ymm2,ymm2,ymm12 263 vpmuludq ymm14,ymm11,YMMWORD[((192-128))+r9] 264 vpaddq ymm3,ymm3,ymm14 265 vpmuludq ymm4,ymm11,YMMWORD[((224-128))+r9] 266 vpbroadcastq ymm11,QWORD[((224-128))+r15] 267 vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] 268 269 vmovdqu YMMWORD[(320-448)+r12],ymm1 270 vmovdqu YMMWORD[(352-448)+r12],ymm2 271 272 vpmuludq ymm12,ymm10,YMMWORD[((192-128))+rsi] 273 vpaddq ymm3,ymm3,ymm12 274 vpmuludq ymm14,ymm10,YMMWORD[((192-128))+r9] 275 vpbroadcastq ymm0,QWORD[((256-128))+r15] 276 vpaddq ymm4,ymm4,ymm14 277 vpmuludq ymm5,ymm10,YMMWORD[((224-128))+r9] 278 vpbroadcastq ymm10,QWORD[((0+8-128))+r15] 279 vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] 280 281 vmovdqu YMMWORD[(384-448)+r12],ymm3 282 vmovdqu YMMWORD[(416-448)+r12],ymm4 283 lea r15,[8+r15] 284 285 vpmuludq ymm12,ymm11,YMMWORD[((224-128))+rsi] 286 vpaddq ymm5,ymm5,ymm12 287 vpmuludq ymm6,ymm11,YMMWORD[((224-128))+r9] 288 vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] 289 290 vpmuludq ymm7,ymm0,YMMWORD[((256-128))+rsi] 291 vmovdqu YMMWORD[(448-448)+r12],ymm5 292 vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] 293 vmovdqu YMMWORD[(480-448)+r12],ymm6 294 vmovdqu YMMWORD[(512-448)+r12],ymm7 295 lea r12,[8+r12] 296 297 dec r14d 298 jnz NEAR $L$OOP_SQR_1024 299 300 vmovdqu ymm8,YMMWORD[256+rsp] 301 vmovdqu ymm1,YMMWORD[288+rsp] 302 vmovdqu ymm2,YMMWORD[320+rsp] 303 lea rbx,[192+rsp] 304 305 vpsrlq ymm14,ymm8,29 306 vpand ymm8,ymm8,ymm15 307 vpsrlq ymm11,ymm1,29 308 vpand ymm1,ymm1,ymm15 309 310 vpermq ymm14,ymm14,0x93 311 vpxor ymm9,ymm9,ymm9 312 vpermq ymm11,ymm11,0x93 313 314 vpblendd ymm10,ymm14,ymm9,3 315 vpblendd ymm14,ymm11,ymm14,3 316 vpaddq ymm8,ymm8,ymm10 317 vpblendd ymm11,ymm9,ymm11,3 318 vpaddq ymm1,ymm1,ymm14 319 vpaddq ymm2,ymm2,ymm11 320 vmovdqu YMMWORD[(288-192)+rbx],ymm1 321 vmovdqu YMMWORD[(320-192)+rbx],ymm2 322 323 mov rax,QWORD[rsp] 324 mov r10,QWORD[8+rsp] 325 mov r11,QWORD[16+rsp] 326 mov r12,QWORD[24+rsp] 327 vmovdqu ymm1,YMMWORD[32+rsp] 328 vmovdqu ymm2,YMMWORD[((64-192))+rbx] 329 vmovdqu ymm3,YMMWORD[((96-192))+rbx] 330 vmovdqu ymm4,YMMWORD[((128-192))+rbx] 331 vmovdqu ymm5,YMMWORD[((160-192))+rbx] 332 vmovdqu ymm6,YMMWORD[((192-192))+rbx] 333 vmovdqu ymm7,YMMWORD[((224-192))+rbx] 334 335 mov r9,rax 336 imul eax,ecx 337 and eax,0x1fffffff 338 vmovd xmm12,eax 339 340 mov rdx,rax 341 imul rax,QWORD[((-128))+r13] 342 vpbroadcastq ymm12,xmm12 343 add r9,rax 344 mov rax,rdx 345 imul rax,QWORD[((8-128))+r13] 346 shr r9,29 347 add r10,rax 348 mov rax,rdx 349 imul rax,QWORD[((16-128))+r13] 350 add r10,r9 351 add r11,rax 352 imul rdx,QWORD[((24-128))+r13] 353 add r12,rdx 354 355 mov rax,r10 356 imul eax,ecx 357 and eax,0x1fffffff 358 359 mov r14d,9 360 jmp NEAR $L$OOP_REDUCE_1024 361 362 ALIGN 32 363 $L$OOP_REDUCE_1024: 364 vmovd xmm13,eax 365 vpbroadcastq ymm13,xmm13 366 367 vpmuludq ymm10,ymm12,YMMWORD[((32-128))+r13] 368 mov rdx,rax 369 imul rax,QWORD[((-128))+r13] 370 vpaddq ymm1,ymm1,ymm10 371 add r10,rax 372 vpmuludq ymm14,ymm12,YMMWORD[((64-128))+r13] 373 mov rax,rdx 374 imul rax,QWORD[((8-128))+r13] 375 vpaddq ymm2,ymm2,ymm14 376 vpmuludq ymm11,ymm12,YMMWORD[((96-128))+r13] 377 DB 0x67 378 add r11,rax 379 DB 0x67 380 mov rax,rdx 381 imul rax,QWORD[((16-128))+r13] 382 shr r10,29 383 vpaddq ymm3,ymm3,ymm11 384 vpmuludq ymm10,ymm12,YMMWORD[((128-128))+r13] 385 add r12,rax 386 add r11,r10 387 vpaddq ymm4,ymm4,ymm10 388 vpmuludq ymm14,ymm12,YMMWORD[((160-128))+r13] 389 mov rax,r11 390 imul eax,ecx 391 vpaddq ymm5,ymm5,ymm14 392 vpmuludq ymm11,ymm12,YMMWORD[((192-128))+r13] 393 and eax,0x1fffffff 394 vpaddq ymm6,ymm6,ymm11 395 vpmuludq ymm10,ymm12,YMMWORD[((224-128))+r13] 396 vpaddq ymm7,ymm7,ymm10 397 vpmuludq ymm14,ymm12,YMMWORD[((256-128))+r13] 398 vmovd xmm12,eax 399 400 vpaddq ymm8,ymm8,ymm14 401 402 vpbroadcastq ymm12,xmm12 403 404 vpmuludq ymm11,ymm13,YMMWORD[((32-8-128))+r13] 405 vmovdqu ymm14,YMMWORD[((96-8-128))+r13] 406 mov rdx,rax 407 imul rax,QWORD[((-128))+r13] 408 vpaddq ymm1,ymm1,ymm11 409 vpmuludq ymm10,ymm13,YMMWORD[((64-8-128))+r13] 410 vmovdqu ymm11,YMMWORD[((128-8-128))+r13] 411 add r11,rax 412 mov rax,rdx 413 imul rax,QWORD[((8-128))+r13] 414 vpaddq ymm2,ymm2,ymm10 415 add rax,r12 416 shr r11,29 417 vpmuludq ymm14,ymm14,ymm13 418 vmovdqu ymm10,YMMWORD[((160-8-128))+r13] 419 add rax,r11 420 vpaddq ymm3,ymm3,ymm14 421 vpmuludq ymm11,ymm11,ymm13 422 vmovdqu ymm14,YMMWORD[((192-8-128))+r13] 423 DB 0x67 424 mov r12,rax 425 imul eax,ecx 426 vpaddq ymm4,ymm4,ymm11 427 vpmuludq ymm10,ymm10,ymm13 428 DB 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 429 and eax,0x1fffffff 430 vpaddq ymm5,ymm5,ymm10 431 vpmuludq ymm14,ymm14,ymm13 432 vmovdqu ymm10,YMMWORD[((256-8-128))+r13] 433 vpaddq ymm6,ymm6,ymm14 434 vpmuludq ymm11,ymm11,ymm13 435 vmovdqu ymm9,YMMWORD[((288-8-128))+r13] 436 vmovd xmm0,eax 437 imul rax,QWORD[((-128))+r13] 438 vpaddq ymm7,ymm7,ymm11 439 vpmuludq ymm10,ymm10,ymm13 440 vmovdqu ymm14,YMMWORD[((32-16-128))+r13] 441 vpbroadcastq ymm0,xmm0 442 vpaddq ymm8,ymm8,ymm10 443 vpmuludq ymm9,ymm9,ymm13 444 vmovdqu ymm11,YMMWORD[((64-16-128))+r13] 445 add r12,rax 446 447 vmovdqu ymm13,YMMWORD[((32-24-128))+r13] 448 vpmuludq ymm14,ymm14,ymm12 449 vmovdqu ymm10,YMMWORD[((96-16-128))+r13] 450 vpaddq ymm1,ymm1,ymm14 451 vpmuludq ymm13,ymm13,ymm0 452 vpmuludq ymm11,ymm11,ymm12 453 DB 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff 454 vpaddq ymm13,ymm13,ymm1 455 vpaddq ymm2,ymm2,ymm11 456 vpmuludq ymm10,ymm10,ymm12 457 vmovdqu ymm11,YMMWORD[((160-16-128))+r13] 458 DB 0x67 459 vmovq rax,xmm13 460 vmovdqu YMMWORD[rsp],ymm13 461 vpaddq ymm3,ymm3,ymm10 462 vpmuludq ymm14,ymm14,ymm12 463 vmovdqu ymm10,YMMWORD[((192-16-128))+r13] 464 vpaddq ymm4,ymm4,ymm14 465 vpmuludq ymm11,ymm11,ymm12 466 vmovdqu ymm14,YMMWORD[((224-16-128))+r13] 467 vpaddq ymm5,ymm5,ymm11 468 vpmuludq ymm10,ymm10,ymm12 469 vmovdqu ymm11,YMMWORD[((256-16-128))+r13] 470 vpaddq ymm6,ymm6,ymm10 471 vpmuludq ymm14,ymm14,ymm12 472 shr r12,29 473 vmovdqu ymm10,YMMWORD[((288-16-128))+r13] 474 add rax,r12 475 vpaddq ymm7,ymm7,ymm14 476 vpmuludq ymm11,ymm11,ymm12 477 478 mov r9,rax 479 imul eax,ecx 480 vpaddq ymm8,ymm8,ymm11 481 vpmuludq ymm10,ymm10,ymm12 482 and eax,0x1fffffff 483 vmovd xmm12,eax 484 vmovdqu ymm11,YMMWORD[((96-24-128))+r13] 485 DB 0x67 486 vpaddq ymm9,ymm9,ymm10 487 vpbroadcastq ymm12,xmm12 488 489 vpmuludq ymm14,ymm0,YMMWORD[((64-24-128))+r13] 490 vmovdqu ymm10,YMMWORD[((128-24-128))+r13] 491 mov rdx,rax 492 imul rax,QWORD[((-128))+r13] 493 mov r10,QWORD[8+rsp] 494 vpaddq ymm1,ymm2,ymm14 495 vpmuludq ymm11,ymm11,ymm0 496 vmovdqu ymm14,YMMWORD[((160-24-128))+r13] 497 add r9,rax 498 mov rax,rdx 499 imul rax,QWORD[((8-128))+r13] 500 DB 0x67 501 shr r9,29 502 mov r11,QWORD[16+rsp] 503 vpaddq ymm2,ymm3,ymm11 504 vpmuludq ymm10,ymm10,ymm0 505 vmovdqu ymm11,YMMWORD[((192-24-128))+r13] 506 add r10,rax 507 mov rax,rdx 508 imul rax,QWORD[((16-128))+r13] 509 vpaddq ymm3,ymm4,ymm10 510 vpmuludq ymm14,ymm14,ymm0 511 vmovdqu ymm10,YMMWORD[((224-24-128))+r13] 512 imul rdx,QWORD[((24-128))+r13] 513 add r11,rax 514 lea rax,[r10*1+r9] 515 vpaddq ymm4,ymm5,ymm14 516 vpmuludq ymm11,ymm11,ymm0 517 vmovdqu ymm14,YMMWORD[((256-24-128))+r13] 518 mov r10,rax 519 imul eax,ecx 520 vpmuludq ymm10,ymm10,ymm0 521 vpaddq ymm5,ymm6,ymm11 522 vmovdqu ymm11,YMMWORD[((288-24-128))+r13] 523 and eax,0x1fffffff 524 vpaddq ymm6,ymm7,ymm10 525 vpmuludq ymm14,ymm14,ymm0 526 add rdx,QWORD[24+rsp] 527 vpaddq ymm7,ymm8,ymm14 528 vpmuludq ymm11,ymm11,ymm0 529 vpaddq ymm8,ymm9,ymm11 530 vmovq xmm9,r12 531 mov r12,rdx 532 533 dec r14d 534 jnz NEAR $L$OOP_REDUCE_1024 535 lea r12,[448+rsp] 536 vpaddq ymm0,ymm13,ymm9 537 vpxor ymm9,ymm9,ymm9 538 539 vpaddq ymm0,ymm0,YMMWORD[((288-192))+rbx] 540 vpaddq ymm1,ymm1,YMMWORD[((320-448))+r12] 541 vpaddq ymm2,ymm2,YMMWORD[((352-448))+r12] 542 vpaddq ymm3,ymm3,YMMWORD[((384-448))+r12] 543 vpaddq ymm4,ymm4,YMMWORD[((416-448))+r12] 544 vpaddq ymm5,ymm5,YMMWORD[((448-448))+r12] 545 vpaddq ymm6,ymm6,YMMWORD[((480-448))+r12] 546 vpaddq ymm7,ymm7,YMMWORD[((512-448))+r12] 547 vpaddq ymm8,ymm8,YMMWORD[((544-448))+r12] 548 549 vpsrlq ymm14,ymm0,29 550 vpand ymm0,ymm0,ymm15 551 vpsrlq ymm11,ymm1,29 552 vpand ymm1,ymm1,ymm15 553 vpsrlq ymm12,ymm2,29 554 vpermq ymm14,ymm14,0x93 555 vpand ymm2,ymm2,ymm15 556 vpsrlq ymm13,ymm3,29 557 vpermq ymm11,ymm11,0x93 558 vpand ymm3,ymm3,ymm15 559 vpermq ymm12,ymm12,0x93 560 561 vpblendd ymm10,ymm14,ymm9,3 562 vpermq ymm13,ymm13,0x93 563 vpblendd ymm14,ymm11,ymm14,3 564 vpaddq ymm0,ymm0,ymm10 565 vpblendd ymm11,ymm12,ymm11,3 566 vpaddq ymm1,ymm1,ymm14 567 vpblendd ymm12,ymm13,ymm12,3 568 vpaddq ymm2,ymm2,ymm11 569 vpblendd ymm13,ymm9,ymm13,3 570 vpaddq ymm3,ymm3,ymm12 571 vpaddq ymm4,ymm4,ymm13 572 573 vpsrlq ymm14,ymm0,29 574 vpand ymm0,ymm0,ymm15 575 vpsrlq ymm11,ymm1,29 576 vpand ymm1,ymm1,ymm15 577 vpsrlq ymm12,ymm2,29 578 vpermq ymm14,ymm14,0x93 579 vpand ymm2,ymm2,ymm15 580 vpsrlq ymm13,ymm3,29 581 vpermq ymm11,ymm11,0x93 582 vpand ymm3,ymm3,ymm15 583 vpermq ymm12,ymm12,0x93 584 585 vpblendd ymm10,ymm14,ymm9,3 586 vpermq ymm13,ymm13,0x93 587 vpblendd ymm14,ymm11,ymm14,3 588 vpaddq ymm0,ymm0,ymm10 589 vpblendd ymm11,ymm12,ymm11,3 590 vpaddq ymm1,ymm1,ymm14 591 vmovdqu YMMWORD[(0-128)+rdi],ymm0 592 vpblendd ymm12,ymm13,ymm12,3 593 vpaddq ymm2,ymm2,ymm11 594 vmovdqu YMMWORD[(32-128)+rdi],ymm1 595 vpblendd ymm13,ymm9,ymm13,3 596 vpaddq ymm3,ymm3,ymm12 597 vmovdqu YMMWORD[(64-128)+rdi],ymm2 598 vpaddq ymm4,ymm4,ymm13 599 vmovdqu YMMWORD[(96-128)+rdi],ymm3 600 vpsrlq ymm14,ymm4,29 601 vpand ymm4,ymm4,ymm15 602 vpsrlq ymm11,ymm5,29 603 vpand ymm5,ymm5,ymm15 604 vpsrlq ymm12,ymm6,29 605 vpermq ymm14,ymm14,0x93 606 vpand ymm6,ymm6,ymm15 607 vpsrlq ymm13,ymm7,29 608 vpermq ymm11,ymm11,0x93 609 vpand ymm7,ymm7,ymm15 610 vpsrlq ymm0,ymm8,29 611 vpermq ymm12,ymm12,0x93 612 vpand ymm8,ymm8,ymm15 613 vpermq ymm13,ymm13,0x93 614 615 vpblendd ymm10,ymm14,ymm9,3 616 vpermq ymm0,ymm0,0x93 617 vpblendd ymm14,ymm11,ymm14,3 618 vpaddq ymm4,ymm4,ymm10 619 vpblendd ymm11,ymm12,ymm11,3 620 vpaddq ymm5,ymm5,ymm14 621 vpblendd ymm12,ymm13,ymm12,3 622 vpaddq ymm6,ymm6,ymm11 623 vpblendd ymm13,ymm0,ymm13,3 624 vpaddq ymm7,ymm7,ymm12 625 vpaddq ymm8,ymm8,ymm13 626 627 vpsrlq ymm14,ymm4,29 628 vpand ymm4,ymm4,ymm15 629 vpsrlq ymm11,ymm5,29 630 vpand ymm5,ymm5,ymm15 631 vpsrlq ymm12,ymm6,29 632 vpermq ymm14,ymm14,0x93 633 vpand ymm6,ymm6,ymm15 634 vpsrlq ymm13,ymm7,29 635 vpermq ymm11,ymm11,0x93 636 vpand ymm7,ymm7,ymm15 637 vpsrlq ymm0,ymm8,29 638 vpermq ymm12,ymm12,0x93 639 vpand ymm8,ymm8,ymm15 640 vpermq ymm13,ymm13,0x93 641 642 vpblendd ymm10,ymm14,ymm9,3 643 vpermq ymm0,ymm0,0x93 644 vpblendd ymm14,ymm11,ymm14,3 645 vpaddq ymm4,ymm4,ymm10 646 vpblendd ymm11,ymm12,ymm11,3 647 vpaddq ymm5,ymm5,ymm14 648 vmovdqu YMMWORD[(128-128)+rdi],ymm4 649 vpblendd ymm12,ymm13,ymm12,3 650 vpaddq ymm6,ymm6,ymm11 651 vmovdqu YMMWORD[(160-128)+rdi],ymm5 652 vpblendd ymm13,ymm0,ymm13,3 653 vpaddq ymm7,ymm7,ymm12 654 vmovdqu YMMWORD[(192-128)+rdi],ymm6 655 vpaddq ymm8,ymm8,ymm13 656 vmovdqu YMMWORD[(224-128)+rdi],ymm7 657 vmovdqu YMMWORD[(256-128)+rdi],ymm8 658 659 mov rsi,rdi 660 dec r8d 661 jne NEAR $L$OOP_GRANDE_SQR_1024 662 663 vzeroall 664 mov rax,rbp 665 666 $L$sqr_1024_in_tail: 667 movaps xmm6,XMMWORD[((-216))+rax] 668 movaps xmm7,XMMWORD[((-200))+rax] 669 movaps xmm8,XMMWORD[((-184))+rax] 670 movaps xmm9,XMMWORD[((-168))+rax] 671 movaps xmm10,XMMWORD[((-152))+rax] 672 movaps xmm11,XMMWORD[((-136))+rax] 673 movaps xmm12,XMMWORD[((-120))+rax] 674 movaps xmm13,XMMWORD[((-104))+rax] 675 movaps xmm14,XMMWORD[((-88))+rax] 676 movaps xmm15,XMMWORD[((-72))+rax] 677 mov r15,QWORD[((-48))+rax] 678 679 mov r14,QWORD[((-40))+rax] 680 681 mov r13,QWORD[((-32))+rax] 682 683 mov r12,QWORD[((-24))+rax] 684 685 mov rbp,QWORD[((-16))+rax] 686 687 mov rbx,QWORD[((-8))+rax] 688 689 lea rsp,[rax] 690 691 $L$sqr_1024_epilogue: 692 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 693 mov rsi,QWORD[16+rsp] 694 DB 0F3h,0C3h ;repret 695 696 $L$SEH_end_rsaz_1024_sqr_avx2: 697 global rsaz_1024_mul_avx2 698 699 ALIGN 64 700 rsaz_1024_mul_avx2: 701 mov QWORD[8+rsp],rdi ;WIN64 prologue 702 mov QWORD[16+rsp],rsi 703 mov rax,rsp 704 $L$SEH_begin_rsaz_1024_mul_avx2: 705 mov rdi,rcx 706 mov rsi,rdx 707 mov rdx,r8 708 mov rcx,r9 709 mov r8,QWORD[40+rsp] 710 711 712 713 lea rax,[rsp] 714 715 push rbx 716 717 push rbp 718 719 push r12 720 721 push r13 722 723 push r14 724 725 push r15 726 727 vzeroupper 728 lea rsp,[((-168))+rsp] 729 vmovaps XMMWORD[(-216)+rax],xmm6 730 vmovaps XMMWORD[(-200)+rax],xmm7 731 vmovaps XMMWORD[(-184)+rax],xmm8 732 vmovaps XMMWORD[(-168)+rax],xmm9 733 vmovaps XMMWORD[(-152)+rax],xmm10 734 vmovaps XMMWORD[(-136)+rax],xmm11 735 vmovaps XMMWORD[(-120)+rax],xmm12 736 vmovaps XMMWORD[(-104)+rax],xmm13 737 vmovaps XMMWORD[(-88)+rax],xmm14 738 vmovaps XMMWORD[(-72)+rax],xmm15 739 $L$mul_1024_body: 740 mov rbp,rax 741 742 vzeroall 743 mov r13,rdx 744 sub rsp,64 745 746 747 748 749 750 751 DB 0x67,0x67 752 mov r15,rsi 753 and r15,4095 754 add r15,32*10 755 shr r15,12 756 mov r15,rsi 757 cmovnz rsi,r13 758 cmovnz r13,r15 759 760 mov r15,rcx 761 sub rsi,-128 762 sub rcx,-128 763 sub rdi,-128 764 765 and r15,4095 766 add r15,32*10 767 DB 0x67,0x67 768 shr r15,12 769 jz NEAR $L$mul_1024_no_n_copy 770 771 772 773 774 775 sub rsp,32*10 776 vmovdqu ymm0,YMMWORD[((0-128))+rcx] 777 and rsp,-512 778 vmovdqu ymm1,YMMWORD[((32-128))+rcx] 779 vmovdqu ymm2,YMMWORD[((64-128))+rcx] 780 vmovdqu ymm3,YMMWORD[((96-128))+rcx] 781 vmovdqu ymm4,YMMWORD[((128-128))+rcx] 782 vmovdqu ymm5,YMMWORD[((160-128))+rcx] 783 vmovdqu ymm6,YMMWORD[((192-128))+rcx] 784 vmovdqu ymm7,YMMWORD[((224-128))+rcx] 785 vmovdqu ymm8,YMMWORD[((256-128))+rcx] 786 lea rcx,[((64+128))+rsp] 787 vmovdqu YMMWORD[(0-128)+rcx],ymm0 788 vpxor ymm0,ymm0,ymm0 789 vmovdqu YMMWORD[(32-128)+rcx],ymm1 790 vpxor ymm1,ymm1,ymm1 791 vmovdqu YMMWORD[(64-128)+rcx],ymm2 792 vpxor ymm2,ymm2,ymm2 793 vmovdqu YMMWORD[(96-128)+rcx],ymm3 794 vpxor ymm3,ymm3,ymm3 795 vmovdqu YMMWORD[(128-128)+rcx],ymm4 796 vpxor ymm4,ymm4,ymm4 797 vmovdqu YMMWORD[(160-128)+rcx],ymm5 798 vpxor ymm5,ymm5,ymm5 799 vmovdqu YMMWORD[(192-128)+rcx],ymm6 800 vpxor ymm6,ymm6,ymm6 801 vmovdqu YMMWORD[(224-128)+rcx],ymm7 802 vpxor ymm7,ymm7,ymm7 803 vmovdqu YMMWORD[(256-128)+rcx],ymm8 804 vmovdqa ymm8,ymm0 805 vmovdqu YMMWORD[(288-128)+rcx],ymm9 806 $L$mul_1024_no_n_copy: 807 and rsp,-64 808 809 mov rbx,QWORD[r13] 810 vpbroadcastq ymm10,QWORD[r13] 811 vmovdqu YMMWORD[rsp],ymm0 812 xor r9,r9 813 DB 0x67 814 xor r10,r10 815 xor r11,r11 816 xor r12,r12 817 818 vmovdqu ymm15,YMMWORD[$L$and_mask] 819 mov r14d,9 820 vmovdqu YMMWORD[(288-128)+rdi],ymm9 821 jmp NEAR $L$oop_mul_1024 822 823 ALIGN 32 824 $L$oop_mul_1024: 825 vpsrlq ymm9,ymm3,29 826 mov rax,rbx 827 imul rax,QWORD[((-128))+rsi] 828 add rax,r9 829 mov r10,rbx 830 imul r10,QWORD[((8-128))+rsi] 831 add r10,QWORD[8+rsp] 832 833 mov r9,rax 834 imul eax,r8d 835 and eax,0x1fffffff 836 837 mov r11,rbx 838 imul r11,QWORD[((16-128))+rsi] 839 add r11,QWORD[16+rsp] 840 841 mov r12,rbx 842 imul r12,QWORD[((24-128))+rsi] 843 add r12,QWORD[24+rsp] 844 vpmuludq ymm0,ymm10,YMMWORD[((32-128))+rsi] 845 vmovd xmm11,eax 846 vpaddq ymm1,ymm1,ymm0 847 vpmuludq ymm12,ymm10,YMMWORD[((64-128))+rsi] 848 vpbroadcastq ymm11,xmm11 849 vpaddq ymm2,ymm2,ymm12 850 vpmuludq ymm13,ymm10,YMMWORD[((96-128))+rsi] 851 vpand ymm3,ymm3,ymm15 852 vpaddq ymm3,ymm3,ymm13 853 vpmuludq ymm0,ymm10,YMMWORD[((128-128))+rsi] 854 vpaddq ymm4,ymm4,ymm0 855 vpmuludq ymm12,ymm10,YMMWORD[((160-128))+rsi] 856 vpaddq ymm5,ymm5,ymm12 857 vpmuludq ymm13,ymm10,YMMWORD[((192-128))+rsi] 858 vpaddq ymm6,ymm6,ymm13 859 vpmuludq ymm0,ymm10,YMMWORD[((224-128))+rsi] 860 vpermq ymm9,ymm9,0x93 861 vpaddq ymm7,ymm7,ymm0 862 vpmuludq ymm12,ymm10,YMMWORD[((256-128))+rsi] 863 vpbroadcastq ymm10,QWORD[8+r13] 864 vpaddq ymm8,ymm8,ymm12 865 866 mov rdx,rax 867 imul rax,QWORD[((-128))+rcx] 868 add r9,rax 869 mov rax,rdx 870 imul rax,QWORD[((8-128))+rcx] 871 add r10,rax 872 mov rax,rdx 873 imul rax,QWORD[((16-128))+rcx] 874 add r11,rax 875 shr r9,29 876 imul rdx,QWORD[((24-128))+rcx] 877 add r12,rdx 878 add r10,r9 879 880 vpmuludq ymm13,ymm11,YMMWORD[((32-128))+rcx] 881 vmovq rbx,xmm10 882 vpaddq ymm1,ymm1,ymm13 883 vpmuludq ymm0,ymm11,YMMWORD[((64-128))+rcx] 884 vpaddq ymm2,ymm2,ymm0 885 vpmuludq ymm12,ymm11,YMMWORD[((96-128))+rcx] 886 vpaddq ymm3,ymm3,ymm12 887 vpmuludq ymm13,ymm11,YMMWORD[((128-128))+rcx] 888 vpaddq ymm4,ymm4,ymm13 889 vpmuludq ymm0,ymm11,YMMWORD[((160-128))+rcx] 890 vpaddq ymm5,ymm5,ymm0 891 vpmuludq ymm12,ymm11,YMMWORD[((192-128))+rcx] 892 vpaddq ymm6,ymm6,ymm12 893 vpmuludq ymm13,ymm11,YMMWORD[((224-128))+rcx] 894 vpblendd ymm12,ymm9,ymm14,3 895 vpaddq ymm7,ymm7,ymm13 896 vpmuludq ymm0,ymm11,YMMWORD[((256-128))+rcx] 897 vpaddq ymm3,ymm3,ymm12 898 vpaddq ymm8,ymm8,ymm0 899 900 mov rax,rbx 901 imul rax,QWORD[((-128))+rsi] 902 add r10,rax 903 vmovdqu ymm12,YMMWORD[((-8+32-128))+rsi] 904 mov rax,rbx 905 imul rax,QWORD[((8-128))+rsi] 906 add r11,rax 907 vmovdqu ymm13,YMMWORD[((-8+64-128))+rsi] 908 909 mov rax,r10 910 vpblendd ymm9,ymm9,ymm14,0xfc 911 imul eax,r8d 912 vpaddq ymm4,ymm4,ymm9 913 and eax,0x1fffffff 914 915 imul rbx,QWORD[((16-128))+rsi] 916 add r12,rbx 917 vpmuludq ymm12,ymm12,ymm10 918 vmovd xmm11,eax 919 vmovdqu ymm0,YMMWORD[((-8+96-128))+rsi] 920 vpaddq ymm1,ymm1,ymm12 921 vpmuludq ymm13,ymm13,ymm10 922 vpbroadcastq ymm11,xmm11 923 vmovdqu ymm12,YMMWORD[((-8+128-128))+rsi] 924 vpaddq ymm2,ymm2,ymm13 925 vpmuludq ymm0,ymm0,ymm10 926 vmovdqu ymm13,YMMWORD[((-8+160-128))+rsi] 927 vpaddq ymm3,ymm3,ymm0 928 vpmuludq ymm12,ymm12,ymm10 929 vmovdqu ymm0,YMMWORD[((-8+192-128))+rsi] 930 vpaddq ymm4,ymm4,ymm12 931 vpmuludq ymm13,ymm13,ymm10 932 vmovdqu ymm12,YMMWORD[((-8+224-128))+rsi] 933 vpaddq ymm5,ymm5,ymm13 934 vpmuludq ymm0,ymm0,ymm10 935 vmovdqu ymm13,YMMWORD[((-8+256-128))+rsi] 936 vpaddq ymm6,ymm6,ymm0 937 vpmuludq ymm12,ymm12,ymm10 938 vmovdqu ymm9,YMMWORD[((-8+288-128))+rsi] 939 vpaddq ymm7,ymm7,ymm12 940 vpmuludq ymm13,ymm13,ymm10 941 vpaddq ymm8,ymm8,ymm13 942 vpmuludq ymm9,ymm9,ymm10 943 vpbroadcastq ymm10,QWORD[16+r13] 944 945 mov rdx,rax 946 imul rax,QWORD[((-128))+rcx] 947 add r10,rax 948 vmovdqu ymm0,YMMWORD[((-8+32-128))+rcx] 949 mov rax,rdx 950 imul rax,QWORD[((8-128))+rcx] 951 add r11,rax 952 vmovdqu ymm12,YMMWORD[((-8+64-128))+rcx] 953 shr r10,29 954 imul rdx,QWORD[((16-128))+rcx] 955 add r12,rdx 956 add r11,r10 957 958 vpmuludq ymm0,ymm0,ymm11 959 vmovq rbx,xmm10 960 vmovdqu ymm13,YMMWORD[((-8+96-128))+rcx] 961 vpaddq ymm1,ymm1,ymm0 962 vpmuludq ymm12,ymm12,ymm11 963 vmovdqu ymm0,YMMWORD[((-8+128-128))+rcx] 964 vpaddq ymm2,ymm2,ymm12 965 vpmuludq ymm13,ymm13,ymm11 966 vmovdqu ymm12,YMMWORD[((-8+160-128))+rcx] 967 vpaddq ymm3,ymm3,ymm13 968 vpmuludq ymm0,ymm0,ymm11 969 vmovdqu ymm13,YMMWORD[((-8+192-128))+rcx] 970 vpaddq ymm4,ymm4,ymm0 971 vpmuludq ymm12,ymm12,ymm11 972 vmovdqu ymm0,YMMWORD[((-8+224-128))+rcx] 973 vpaddq ymm5,ymm5,ymm12 974 vpmuludq ymm13,ymm13,ymm11 975 vmovdqu ymm12,YMMWORD[((-8+256-128))+rcx] 976 vpaddq ymm6,ymm6,ymm13 977 vpmuludq ymm0,ymm0,ymm11 978 vmovdqu ymm13,YMMWORD[((-8+288-128))+rcx] 979 vpaddq ymm7,ymm7,ymm0 980 vpmuludq ymm12,ymm12,ymm11 981 vpaddq ymm8,ymm8,ymm12 982 vpmuludq ymm13,ymm13,ymm11 983 vpaddq ymm9,ymm9,ymm13 984 985 vmovdqu ymm0,YMMWORD[((-16+32-128))+rsi] 986 mov rax,rbx 987 imul rax,QWORD[((-128))+rsi] 988 add rax,r11 989 990 vmovdqu ymm12,YMMWORD[((-16+64-128))+rsi] 991 mov r11,rax 992 imul eax,r8d 993 and eax,0x1fffffff 994 995 imul rbx,QWORD[((8-128))+rsi] 996 add r12,rbx 997 vpmuludq ymm0,ymm0,ymm10 998 vmovd xmm11,eax 999 vmovdqu ymm13,YMMWORD[((-16+96-128))+rsi] 1000 vpaddq ymm1,ymm1,ymm0 1001 vpmuludq ymm12,ymm12,ymm10 1002 vpbroadcastq ymm11,xmm11 1003 vmovdqu ymm0,YMMWORD[((-16+128-128))+rsi] 1004 vpaddq ymm2,ymm2,ymm12 1005 vpmuludq ymm13,ymm13,ymm10 1006 vmovdqu ymm12,YMMWORD[((-16+160-128))+rsi] 1007 vpaddq ymm3,ymm3,ymm13 1008 vpmuludq ymm0,ymm0,ymm10 1009 vmovdqu ymm13,YMMWORD[((-16+192-128))+rsi] 1010 vpaddq ymm4,ymm4,ymm0 1011 vpmuludq ymm12,ymm12,ymm10 1012 vmovdqu ymm0,YMMWORD[((-16+224-128))+rsi] 1013 vpaddq ymm5,ymm5,ymm12 1014 vpmuludq ymm13,ymm13,ymm10 1015 vmovdqu ymm12,YMMWORD[((-16+256-128))+rsi] 1016 vpaddq ymm6,ymm6,ymm13 1017 vpmuludq ymm0,ymm0,ymm10 1018 vmovdqu ymm13,YMMWORD[((-16+288-128))+rsi] 1019 vpaddq ymm7,ymm7,ymm0 1020 vpmuludq ymm12,ymm12,ymm10 1021 vpaddq ymm8,ymm8,ymm12 1022 vpmuludq ymm13,ymm13,ymm10 1023 vpbroadcastq ymm10,QWORD[24+r13] 1024 vpaddq ymm9,ymm9,ymm13 1025 1026 vmovdqu ymm0,YMMWORD[((-16+32-128))+rcx] 1027 mov rdx,rax 1028 imul rax,QWORD[((-128))+rcx] 1029 add r11,rax 1030 vmovdqu ymm12,YMMWORD[((-16+64-128))+rcx] 1031 imul rdx,QWORD[((8-128))+rcx] 1032 add r12,rdx 1033 shr r11,29 1034 1035 vpmuludq ymm0,ymm0,ymm11 1036 vmovq rbx,xmm10 1037 vmovdqu ymm13,YMMWORD[((-16+96-128))+rcx] 1038 vpaddq ymm1,ymm1,ymm0 1039 vpmuludq ymm12,ymm12,ymm11 1040 vmovdqu ymm0,YMMWORD[((-16+128-128))+rcx] 1041 vpaddq ymm2,ymm2,ymm12 1042 vpmuludq ymm13,ymm13,ymm11 1043 vmovdqu ymm12,YMMWORD[((-16+160-128))+rcx] 1044 vpaddq ymm3,ymm3,ymm13 1045 vpmuludq ymm0,ymm0,ymm11 1046 vmovdqu ymm13,YMMWORD[((-16+192-128))+rcx] 1047 vpaddq ymm4,ymm4,ymm0 1048 vpmuludq ymm12,ymm12,ymm11 1049 vmovdqu ymm0,YMMWORD[((-16+224-128))+rcx] 1050 vpaddq ymm5,ymm5,ymm12 1051 vpmuludq ymm13,ymm13,ymm11 1052 vmovdqu ymm12,YMMWORD[((-16+256-128))+rcx] 1053 vpaddq ymm6,ymm6,ymm13 1054 vpmuludq ymm0,ymm0,ymm11 1055 vmovdqu ymm13,YMMWORD[((-16+288-128))+rcx] 1056 vpaddq ymm7,ymm7,ymm0 1057 vpmuludq ymm12,ymm12,ymm11 1058 vmovdqu ymm0,YMMWORD[((-24+32-128))+rsi] 1059 vpaddq ymm8,ymm8,ymm12 1060 vpmuludq ymm13,ymm13,ymm11 1061 vmovdqu ymm12,YMMWORD[((-24+64-128))+rsi] 1062 vpaddq ymm9,ymm9,ymm13 1063 1064 add r12,r11 1065 imul rbx,QWORD[((-128))+rsi] 1066 add r12,rbx 1067 1068 mov rax,r12 1069 imul eax,r8d 1070 and eax,0x1fffffff 1071 1072 vpmuludq ymm0,ymm0,ymm10 1073 vmovd xmm11,eax 1074 vmovdqu ymm13,YMMWORD[((-24+96-128))+rsi] 1075 vpaddq ymm1,ymm1,ymm0 1076 vpmuludq ymm12,ymm12,ymm10 1077 vpbroadcastq ymm11,xmm11 1078 vmovdqu ymm0,YMMWORD[((-24+128-128))+rsi] 1079 vpaddq ymm2,ymm2,ymm12 1080 vpmuludq ymm13,ymm13,ymm10 1081 vmovdqu ymm12,YMMWORD[((-24+160-128))+rsi] 1082 vpaddq ymm3,ymm3,ymm13 1083 vpmuludq ymm0,ymm0,ymm10 1084 vmovdqu ymm13,YMMWORD[((-24+192-128))+rsi] 1085 vpaddq ymm4,ymm4,ymm0 1086 vpmuludq ymm12,ymm12,ymm10 1087 vmovdqu ymm0,YMMWORD[((-24+224-128))+rsi] 1088 vpaddq ymm5,ymm5,ymm12 1089 vpmuludq ymm13,ymm13,ymm10 1090 vmovdqu ymm12,YMMWORD[((-24+256-128))+rsi] 1091 vpaddq ymm6,ymm6,ymm13 1092 vpmuludq ymm0,ymm0,ymm10 1093 vmovdqu ymm13,YMMWORD[((-24+288-128))+rsi] 1094 vpaddq ymm7,ymm7,ymm0 1095 vpmuludq ymm12,ymm12,ymm10 1096 vpaddq ymm8,ymm8,ymm12 1097 vpmuludq ymm13,ymm13,ymm10 1098 vpbroadcastq ymm10,QWORD[32+r13] 1099 vpaddq ymm9,ymm9,ymm13 1100 add r13,32 1101 1102 vmovdqu ymm0,YMMWORD[((-24+32-128))+rcx] 1103 imul rax,QWORD[((-128))+rcx] 1104 add r12,rax 1105 shr r12,29 1106 1107 vmovdqu ymm12,YMMWORD[((-24+64-128))+rcx] 1108 vpmuludq ymm0,ymm0,ymm11 1109 vmovq rbx,xmm10 1110 vmovdqu ymm13,YMMWORD[((-24+96-128))+rcx] 1111 vpaddq ymm0,ymm1,ymm0 1112 vpmuludq ymm12,ymm12,ymm11 1113 vmovdqu YMMWORD[rsp],ymm0 1114 vpaddq ymm1,ymm2,ymm12 1115 vmovdqu ymm0,YMMWORD[((-24+128-128))+rcx] 1116 vpmuludq ymm13,ymm13,ymm11 1117 vmovdqu ymm12,YMMWORD[((-24+160-128))+rcx] 1118 vpaddq ymm2,ymm3,ymm13 1119 vpmuludq ymm0,ymm0,ymm11 1120 vmovdqu ymm13,YMMWORD[((-24+192-128))+rcx] 1121 vpaddq ymm3,ymm4,ymm0 1122 vpmuludq ymm12,ymm12,ymm11 1123 vmovdqu ymm0,YMMWORD[((-24+224-128))+rcx] 1124 vpaddq ymm4,ymm5,ymm12 1125 vpmuludq ymm13,ymm13,ymm11 1126 vmovdqu ymm12,YMMWORD[((-24+256-128))+rcx] 1127 vpaddq ymm5,ymm6,ymm13 1128 vpmuludq ymm0,ymm0,ymm11 1129 vmovdqu ymm13,YMMWORD[((-24+288-128))+rcx] 1130 mov r9,r12 1131 vpaddq ymm6,ymm7,ymm0 1132 vpmuludq ymm12,ymm12,ymm11 1133 add r9,QWORD[rsp] 1134 vpaddq ymm7,ymm8,ymm12 1135 vpmuludq ymm13,ymm13,ymm11 1136 vmovq xmm12,r12 1137 vpaddq ymm8,ymm9,ymm13 1138 1139 dec r14d 1140 jnz NEAR $L$oop_mul_1024 1141 vpaddq ymm0,ymm12,YMMWORD[rsp] 1142 1143 vpsrlq ymm12,ymm0,29 1144 vpand ymm0,ymm0,ymm15 1145 vpsrlq ymm13,ymm1,29 1146 vpand ymm1,ymm1,ymm15 1147 vpsrlq ymm10,ymm2,29 1148 vpermq ymm12,ymm12,0x93 1149 vpand ymm2,ymm2,ymm15 1150 vpsrlq ymm11,ymm3,29 1151 vpermq ymm13,ymm13,0x93 1152 vpand ymm3,ymm3,ymm15 1153 1154 vpblendd ymm9,ymm12,ymm14,3 1155 vpermq ymm10,ymm10,0x93 1156 vpblendd ymm12,ymm13,ymm12,3 1157 vpermq ymm11,ymm11,0x93 1158 vpaddq ymm0,ymm0,ymm9 1159 vpblendd ymm13,ymm10,ymm13,3 1160 vpaddq ymm1,ymm1,ymm12 1161 vpblendd ymm10,ymm11,ymm10,3 1162 vpaddq ymm2,ymm2,ymm13 1163 vpblendd ymm11,ymm14,ymm11,3 1164 vpaddq ymm3,ymm3,ymm10 1165 vpaddq ymm4,ymm4,ymm11 1166 1167 vpsrlq ymm12,ymm0,29 1168 vpand ymm0,ymm0,ymm15 1169 vpsrlq ymm13,ymm1,29 1170 vpand ymm1,ymm1,ymm15 1171 vpsrlq ymm10,ymm2,29 1172 vpermq ymm12,ymm12,0x93 1173 vpand ymm2,ymm2,ymm15 1174 vpsrlq ymm11,ymm3,29 1175 vpermq ymm13,ymm13,0x93 1176 vpand ymm3,ymm3,ymm15 1177 vpermq ymm10,ymm10,0x93 1178 1179 vpblendd ymm9,ymm12,ymm14,3 1180 vpermq ymm11,ymm11,0x93 1181 vpblendd ymm12,ymm13,ymm12,3 1182 vpaddq ymm0,ymm0,ymm9 1183 vpblendd ymm13,ymm10,ymm13,3 1184 vpaddq ymm1,ymm1,ymm12 1185 vpblendd ymm10,ymm11,ymm10,3 1186 vpaddq ymm2,ymm2,ymm13 1187 vpblendd ymm11,ymm14,ymm11,3 1188 vpaddq ymm3,ymm3,ymm10 1189 vpaddq ymm4,ymm4,ymm11 1190 1191 vmovdqu YMMWORD[(0-128)+rdi],ymm0 1192 vmovdqu YMMWORD[(32-128)+rdi],ymm1 1193 vmovdqu YMMWORD[(64-128)+rdi],ymm2 1194 vmovdqu YMMWORD[(96-128)+rdi],ymm3 1195 vpsrlq ymm12,ymm4,29 1196 vpand ymm4,ymm4,ymm15 1197 vpsrlq ymm13,ymm5,29 1198 vpand ymm5,ymm5,ymm15 1199 vpsrlq ymm10,ymm6,29 1200 vpermq ymm12,ymm12,0x93 1201 vpand ymm6,ymm6,ymm15 1202 vpsrlq ymm11,ymm7,29 1203 vpermq ymm13,ymm13,0x93 1204 vpand ymm7,ymm7,ymm15 1205 vpsrlq ymm0,ymm8,29 1206 vpermq ymm10,ymm10,0x93 1207 vpand ymm8,ymm8,ymm15 1208 vpermq ymm11,ymm11,0x93 1209 1210 vpblendd ymm9,ymm12,ymm14,3 1211 vpermq ymm0,ymm0,0x93 1212 vpblendd ymm12,ymm13,ymm12,3 1213 vpaddq ymm4,ymm4,ymm9 1214 vpblendd ymm13,ymm10,ymm13,3 1215 vpaddq ymm5,ymm5,ymm12 1216 vpblendd ymm10,ymm11,ymm10,3 1217 vpaddq ymm6,ymm6,ymm13 1218 vpblendd ymm11,ymm0,ymm11,3 1219 vpaddq ymm7,ymm7,ymm10 1220 vpaddq ymm8,ymm8,ymm11 1221 1222 vpsrlq ymm12,ymm4,29 1223 vpand ymm4,ymm4,ymm15 1224 vpsrlq ymm13,ymm5,29 1225 vpand ymm5,ymm5,ymm15 1226 vpsrlq ymm10,ymm6,29 1227 vpermq ymm12,ymm12,0x93 1228 vpand ymm6,ymm6,ymm15 1229 vpsrlq ymm11,ymm7,29 1230 vpermq ymm13,ymm13,0x93 1231 vpand ymm7,ymm7,ymm15 1232 vpsrlq ymm0,ymm8,29 1233 vpermq ymm10,ymm10,0x93 1234 vpand ymm8,ymm8,ymm15 1235 vpermq ymm11,ymm11,0x93 1236 1237 vpblendd ymm9,ymm12,ymm14,3 1238 vpermq ymm0,ymm0,0x93 1239 vpblendd ymm12,ymm13,ymm12,3 1240 vpaddq ymm4,ymm4,ymm9 1241 vpblendd ymm13,ymm10,ymm13,3 1242 vpaddq ymm5,ymm5,ymm12 1243 vpblendd ymm10,ymm11,ymm10,3 1244 vpaddq ymm6,ymm6,ymm13 1245 vpblendd ymm11,ymm0,ymm11,3 1246 vpaddq ymm7,ymm7,ymm10 1247 vpaddq ymm8,ymm8,ymm11 1248 1249 vmovdqu YMMWORD[(128-128)+rdi],ymm4 1250 vmovdqu YMMWORD[(160-128)+rdi],ymm5 1251 vmovdqu YMMWORD[(192-128)+rdi],ymm6 1252 vmovdqu YMMWORD[(224-128)+rdi],ymm7 1253 vmovdqu YMMWORD[(256-128)+rdi],ymm8 1254 vzeroupper 1255 1256 mov rax,rbp 1257 1258 $L$mul_1024_in_tail: 1259 movaps xmm6,XMMWORD[((-216))+rax] 1260 movaps xmm7,XMMWORD[((-200))+rax] 1261 movaps xmm8,XMMWORD[((-184))+rax] 1262 movaps xmm9,XMMWORD[((-168))+rax] 1263 movaps xmm10,XMMWORD[((-152))+rax] 1264 movaps xmm11,XMMWORD[((-136))+rax] 1265 movaps xmm12,XMMWORD[((-120))+rax] 1266 movaps xmm13,XMMWORD[((-104))+rax] 1267 movaps xmm14,XMMWORD[((-88))+rax] 1268 movaps xmm15,XMMWORD[((-72))+rax] 1269 mov r15,QWORD[((-48))+rax] 1270 1271 mov r14,QWORD[((-40))+rax] 1272 1273 mov r13,QWORD[((-32))+rax] 1274 1275 mov r12,QWORD[((-24))+rax] 1276 1277 mov rbp,QWORD[((-16))+rax] 1278 1279 mov rbx,QWORD[((-8))+rax] 1280 1281 lea rsp,[rax] 1282 1283 $L$mul_1024_epilogue: 1284 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1285 mov rsi,QWORD[16+rsp] 1286 DB 0F3h,0C3h ;repret 1287 1288 $L$SEH_end_rsaz_1024_mul_avx2: 1289 global rsaz_1024_red2norm_avx2 1290 1291 ALIGN 32 1292 rsaz_1024_red2norm_avx2: 1293 1294 sub rdx,-128 1295 xor rax,rax 1296 mov r8,QWORD[((-128))+rdx] 1297 mov r9,QWORD[((-120))+rdx] 1298 mov r10,QWORD[((-112))+rdx] 1299 shl r8,0 1300 shl r9,29 1301 mov r11,r10 1302 shl r10,58 1303 shr r11,6 1304 add rax,r8 1305 add rax,r9 1306 add rax,r10 1307 adc r11,0 1308 mov QWORD[rcx],rax 1309 mov rax,r11 1310 mov r8,QWORD[((-104))+rdx] 1311 mov r9,QWORD[((-96))+rdx] 1312 shl r8,23 1313 mov r10,r9 1314 shl r9,52 1315 shr r10,12 1316 add rax,r8 1317 add rax,r9 1318 adc r10,0 1319 mov QWORD[8+rcx],rax 1320 mov rax,r10 1321 mov r11,QWORD[((-88))+rdx] 1322 mov r8,QWORD[((-80))+rdx] 1323 shl r11,17 1324 mov r9,r8 1325 shl r8,46 1326 shr r9,18 1327 add rax,r11 1328 add rax,r8 1329 adc r9,0 1330 mov QWORD[16+rcx],rax 1331 mov rax,r9 1332 mov r10,QWORD[((-72))+rdx] 1333 mov r11,QWORD[((-64))+rdx] 1334 shl r10,11 1335 mov r8,r11 1336 shl r11,40 1337 shr r8,24 1338 add rax,r10 1339 add rax,r11 1340 adc r8,0 1341 mov QWORD[24+rcx],rax 1342 mov rax,r8 1343 mov r9,QWORD[((-56))+rdx] 1344 mov r10,QWORD[((-48))+rdx] 1345 mov r11,QWORD[((-40))+rdx] 1346 shl r9,5 1347 shl r10,34 1348 mov r8,r11 1349 shl r11,63 1350 shr r8,1 1351 add rax,r9 1352 add rax,r10 1353 add rax,r11 1354 adc r8,0 1355 mov QWORD[32+rcx],rax 1356 mov rax,r8 1357 mov r9,QWORD[((-32))+rdx] 1358 mov r10,QWORD[((-24))+rdx] 1359 shl r9,28 1360 mov r11,r10 1361 shl r10,57 1362 shr r11,7 1363 add rax,r9 1364 add rax,r10 1365 adc r11,0 1366 mov QWORD[40+rcx],rax 1367 mov rax,r11 1368 mov r8,QWORD[((-16))+rdx] 1369 mov r9,QWORD[((-8))+rdx] 1370 shl r8,22 1371 mov r10,r9 1372 shl r9,51 1373 shr r10,13 1374 add rax,r8 1375 add rax,r9 1376 adc r10,0 1377 mov QWORD[48+rcx],rax 1378 mov rax,r10 1379 mov r11,QWORD[rdx] 1380 mov r8,QWORD[8+rdx] 1381 shl r11,16 1382 mov r9,r8 1383 shl r8,45 1384 shr r9,19 1385 add rax,r11 1386 add rax,r8 1387 adc r9,0 1388 mov QWORD[56+rcx],rax 1389 mov rax,r9 1390 mov r10,QWORD[16+rdx] 1391 mov r11,QWORD[24+rdx] 1392 shl r10,10 1393 mov r8,r11 1394 shl r11,39 1395 shr r8,25 1396 add rax,r10 1397 add rax,r11 1398 adc r8,0 1399 mov QWORD[64+rcx],rax 1400 mov rax,r8 1401 mov r9,QWORD[32+rdx] 1402 mov r10,QWORD[40+rdx] 1403 mov r11,QWORD[48+rdx] 1404 shl r9,4 1405 shl r10,33 1406 mov r8,r11 1407 shl r11,62 1408 shr r8,2 1409 add rax,r9 1410 add rax,r10 1411 add rax,r11 1412 adc r8,0 1413 mov QWORD[72+rcx],rax 1414 mov rax,r8 1415 mov r9,QWORD[56+rdx] 1416 mov r10,QWORD[64+rdx] 1417 shl r9,27 1418 mov r11,r10 1419 shl r10,56 1420 shr r11,8 1421 add rax,r9 1422 add rax,r10 1423 adc r11,0 1424 mov QWORD[80+rcx],rax 1425 mov rax,r11 1426 mov r8,QWORD[72+rdx] 1427 mov r9,QWORD[80+rdx] 1428 shl r8,21 1429 mov r10,r9 1430 shl r9,50 1431 shr r10,14 1432 add rax,r8 1433 add rax,r9 1434 adc r10,0 1435 mov QWORD[88+rcx],rax 1436 mov rax,r10 1437 mov r11,QWORD[88+rdx] 1438 mov r8,QWORD[96+rdx] 1439 shl r11,15 1440 mov r9,r8 1441 shl r8,44 1442 shr r9,20 1443 add rax,r11 1444 add rax,r8 1445 adc r9,0 1446 mov QWORD[96+rcx],rax 1447 mov rax,r9 1448 mov r10,QWORD[104+rdx] 1449 mov r11,QWORD[112+rdx] 1450 shl r10,9 1451 mov r8,r11 1452 shl r11,38 1453 shr r8,26 1454 add rax,r10 1455 add rax,r11 1456 adc r8,0 1457 mov QWORD[104+rcx],rax 1458 mov rax,r8 1459 mov r9,QWORD[120+rdx] 1460 mov r10,QWORD[128+rdx] 1461 mov r11,QWORD[136+rdx] 1462 shl r9,3 1463 shl r10,32 1464 mov r8,r11 1465 shl r11,61 1466 shr r8,3 1467 add rax,r9 1468 add rax,r10 1469 add rax,r11 1470 adc r8,0 1471 mov QWORD[112+rcx],rax 1472 mov rax,r8 1473 mov r9,QWORD[144+rdx] 1474 mov r10,QWORD[152+rdx] 1475 shl r9,26 1476 mov r11,r10 1477 shl r10,55 1478 shr r11,9 1479 add rax,r9 1480 add rax,r10 1481 adc r11,0 1482 mov QWORD[120+rcx],rax 1483 mov rax,r11 10 rsaz_avx2_eligible: 11 xor eax,eax 1484 12 DB 0F3h,0C3h ;repret 1485 13 1486 14 15 global rsaz_1024_sqr_avx2 16 global rsaz_1024_mul_avx2 17 global rsaz_1024_norm2red_avx2 18 global rsaz_1024_red2norm_avx2 19 global rsaz_1024_scatter5_avx2 20 global rsaz_1024_gather5_avx2 1487 21 1488 global rsaz_1024_norm2red_avx2 1489 1490 ALIGN 32 22 rsaz_1024_sqr_avx2: 23 rsaz_1024_mul_avx2: 1491 24 rsaz_1024_norm2red_avx2: 1492 1493 sub rcx,-128 1494 mov r8,QWORD[rdx] 1495 mov eax,0x1fffffff 1496 mov r9,QWORD[8+rdx] 1497 mov r11,r8 1498 shr r11,0 1499 and r11,rax 1500 mov QWORD[((-128))+rcx],r11 1501 mov r10,r8 1502 shr r10,29 1503 and r10,rax 1504 mov QWORD[((-120))+rcx],r10 1505 shrd r8,r9,58 1506 and r8,rax 1507 mov QWORD[((-112))+rcx],r8 1508 mov r10,QWORD[16+rdx] 1509 mov r8,r9 1510 shr r8,23 1511 and r8,rax 1512 mov QWORD[((-104))+rcx],r8 1513 shrd r9,r10,52 1514 and r9,rax 1515 mov QWORD[((-96))+rcx],r9 1516 mov r11,QWORD[24+rdx] 1517 mov r9,r10 1518 shr r9,17 1519 and r9,rax 1520 mov QWORD[((-88))+rcx],r9 1521 shrd r10,r11,46 1522 and r10,rax 1523 mov QWORD[((-80))+rcx],r10 1524 mov r8,QWORD[32+rdx] 1525 mov r10,r11 1526 shr r10,11 1527 and r10,rax 1528 mov QWORD[((-72))+rcx],r10 1529 shrd r11,r8,40 1530 and r11,rax 1531 mov QWORD[((-64))+rcx],r11 1532 mov r9,QWORD[40+rdx] 1533 mov r11,r8 1534 shr r11,5 1535 and r11,rax 1536 mov QWORD[((-56))+rcx],r11 1537 mov r10,r8 1538 shr r10,34 1539 and r10,rax 1540 mov QWORD[((-48))+rcx],r10 1541 shrd r8,r9,63 1542 and r8,rax 1543 mov QWORD[((-40))+rcx],r8 1544 mov r10,QWORD[48+rdx] 1545 mov r8,r9 1546 shr r8,28 1547 and r8,rax 1548 mov QWORD[((-32))+rcx],r8 1549 shrd r9,r10,57 1550 and r9,rax 1551 mov QWORD[((-24))+rcx],r9 1552 mov r11,QWORD[56+rdx] 1553 mov r9,r10 1554 shr r9,22 1555 and r9,rax 1556 mov QWORD[((-16))+rcx],r9 1557 shrd r10,r11,51 1558 and r10,rax 1559 mov QWORD[((-8))+rcx],r10 1560 mov r8,QWORD[64+rdx] 1561 mov r10,r11 1562 shr r10,16 1563 and r10,rax 1564 mov QWORD[rcx],r10 1565 shrd r11,r8,45 1566 and r11,rax 1567 mov QWORD[8+rcx],r11 1568 mov r9,QWORD[72+rdx] 1569 mov r11,r8 1570 shr r11,10 1571 and r11,rax 1572 mov QWORD[16+rcx],r11 1573 shrd r8,r9,39 1574 and r8,rax 1575 mov QWORD[24+rcx],r8 1576 mov r10,QWORD[80+rdx] 1577 mov r8,r9 1578 shr r8,4 1579 and r8,rax 1580 mov QWORD[32+rcx],r8 1581 mov r11,r9 1582 shr r11,33 1583 and r11,rax 1584 mov QWORD[40+rcx],r11 1585 shrd r9,r10,62 1586 and r9,rax 1587 mov QWORD[48+rcx],r9 1588 mov r11,QWORD[88+rdx] 1589 mov r9,r10 1590 shr r9,27 1591 and r9,rax 1592 mov QWORD[56+rcx],r9 1593 shrd r10,r11,56 1594 and r10,rax 1595 mov QWORD[64+rcx],r10 1596 mov r8,QWORD[96+rdx] 1597 mov r10,r11 1598 shr r10,21 1599 and r10,rax 1600 mov QWORD[72+rcx],r10 1601 shrd r11,r8,50 1602 and r11,rax 1603 mov QWORD[80+rcx],r11 1604 mov r9,QWORD[104+rdx] 1605 mov r11,r8 1606 shr r11,15 1607 and r11,rax 1608 mov QWORD[88+rcx],r11 1609 shrd r8,r9,44 1610 and r8,rax 1611 mov QWORD[96+rcx],r8 1612 mov r10,QWORD[112+rdx] 1613 mov r8,r9 1614 shr r8,9 1615 and r8,rax 1616 mov QWORD[104+rcx],r8 1617 shrd r9,r10,38 1618 and r9,rax 1619 mov QWORD[112+rcx],r9 1620 mov r11,QWORD[120+rdx] 1621 mov r9,r10 1622 shr r9,3 1623 and r9,rax 1624 mov QWORD[120+rcx],r9 1625 mov r8,r10 1626 shr r8,32 1627 and r8,rax 1628 mov QWORD[128+rcx],r8 1629 shrd r10,r11,61 1630 and r10,rax 1631 mov QWORD[136+rcx],r10 1632 xor r8,r8 1633 mov r10,r11 1634 shr r10,26 1635 and r10,rax 1636 mov QWORD[144+rcx],r10 1637 shrd r11,r8,55 1638 and r11,rax 1639 mov QWORD[152+rcx],r11 1640 mov QWORD[160+rcx],r8 1641 mov QWORD[168+rcx],r8 1642 mov QWORD[176+rcx],r8 1643 mov QWORD[184+rcx],r8 25 rsaz_1024_red2norm_avx2: 26 rsaz_1024_scatter5_avx2: 27 rsaz_1024_gather5_avx2: 28 DB 0x0f,0x0b 1644 29 DB 0F3h,0C3h ;repret 1645 30 1646 1647 global rsaz_1024_scatter5_avx21648 1649 ALIGN 321650 rsaz_1024_scatter5_avx2:1651 1652 vzeroupper1653 vmovdqu ymm5,YMMWORD[$L$scatter_permd]1654 shl r8d,41655 lea rcx,[r8*1+rcx]1656 mov eax,91657 jmp NEAR $L$oop_scatter_10241658 1659 ALIGN 321660 $L$oop_scatter_1024:1661 vmovdqu ymm0,YMMWORD[rdx]1662 lea rdx,[32+rdx]1663 vpermd ymm0,ymm5,ymm01664 vmovdqu XMMWORD[rcx],xmm01665 lea rcx,[512+rcx]1666 dec eax1667 jnz NEAR $L$oop_scatter_10241668 1669 vzeroupper1670 DB 0F3h,0C3h ;repret1671 1672 1673 1674 global rsaz_1024_gather5_avx21675 1676 ALIGN 321677 rsaz_1024_gather5_avx2:1678 1679 vzeroupper1680 mov r11,rsp1681 1682 lea rax,[((-136))+rsp]1683 $L$SEH_begin_rsaz_1024_gather5:1684 1685 DB 0x48,0x8d,0x60,0xe01686 DB 0xc5,0xf8,0x29,0x70,0xe01687 DB 0xc5,0xf8,0x29,0x78,0xf01688 DB 0xc5,0x78,0x29,0x40,0x001689 DB 0xc5,0x78,0x29,0x48,0x101690 DB 0xc5,0x78,0x29,0x50,0x201691 DB 0xc5,0x78,0x29,0x58,0x301692 DB 0xc5,0x78,0x29,0x60,0x401693 DB 0xc5,0x78,0x29,0x68,0x501694 DB 0xc5,0x78,0x29,0x70,0x601695 DB 0xc5,0x78,0x29,0x78,0x701696 lea rsp,[((-256))+rsp]1697 and rsp,-321698 lea r10,[$L$inc]1699 lea rax,[((-128))+rsp]1700 1701 vmovd xmm4,r8d1702 vmovdqa ymm0,YMMWORD[r10]1703 vmovdqa ymm1,YMMWORD[32+r10]1704 vmovdqa ymm5,YMMWORD[64+r10]1705 vpbroadcastd ymm4,xmm41706 1707 vpaddd ymm2,ymm0,ymm51708 vpcmpeqd ymm0,ymm0,ymm41709 vpaddd ymm3,ymm1,ymm51710 vpcmpeqd ymm1,ymm1,ymm41711 vmovdqa YMMWORD[(0+128)+rax],ymm01712 vpaddd ymm0,ymm2,ymm51713 vpcmpeqd ymm2,ymm2,ymm41714 vmovdqa YMMWORD[(32+128)+rax],ymm11715 vpaddd ymm1,ymm3,ymm51716 vpcmpeqd ymm3,ymm3,ymm41717 vmovdqa YMMWORD[(64+128)+rax],ymm21718 vpaddd ymm2,ymm0,ymm51719 vpcmpeqd ymm0,ymm0,ymm41720 vmovdqa YMMWORD[(96+128)+rax],ymm31721 vpaddd ymm3,ymm1,ymm51722 vpcmpeqd ymm1,ymm1,ymm41723 vmovdqa YMMWORD[(128+128)+rax],ymm01724 vpaddd ymm8,ymm2,ymm51725 vpcmpeqd ymm2,ymm2,ymm41726 vmovdqa YMMWORD[(160+128)+rax],ymm11727 vpaddd ymm9,ymm3,ymm51728 vpcmpeqd ymm3,ymm3,ymm41729 vmovdqa YMMWORD[(192+128)+rax],ymm21730 vpaddd ymm10,ymm8,ymm51731 vpcmpeqd ymm8,ymm8,ymm41732 vmovdqa YMMWORD[(224+128)+rax],ymm31733 vpaddd ymm11,ymm9,ymm51734 vpcmpeqd ymm9,ymm9,ymm41735 vpaddd ymm12,ymm10,ymm51736 vpcmpeqd ymm10,ymm10,ymm41737 vpaddd ymm13,ymm11,ymm51738 vpcmpeqd ymm11,ymm11,ymm41739 vpaddd ymm14,ymm12,ymm51740 vpcmpeqd ymm12,ymm12,ymm41741 vpaddd ymm15,ymm13,ymm51742 vpcmpeqd ymm13,ymm13,ymm41743 vpcmpeqd ymm14,ymm14,ymm41744 vpcmpeqd ymm15,ymm15,ymm41745 1746 vmovdqa ymm7,YMMWORD[((-32))+r10]1747 lea rdx,[128+rdx]1748 mov r8d,91749 1750 $L$oop_gather_1024:1751 vmovdqa ymm0,YMMWORD[((0-128))+rdx]1752 vmovdqa ymm1,YMMWORD[((32-128))+rdx]1753 vmovdqa ymm2,YMMWORD[((64-128))+rdx]1754 vmovdqa ymm3,YMMWORD[((96-128))+rdx]1755 vpand ymm0,ymm0,YMMWORD[((0+128))+rax]1756 vpand ymm1,ymm1,YMMWORD[((32+128))+rax]1757 vpand ymm2,ymm2,YMMWORD[((64+128))+rax]1758 vpor ymm4,ymm1,ymm01759 vpand ymm3,ymm3,YMMWORD[((96+128))+rax]1760 vmovdqa ymm0,YMMWORD[((128-128))+rdx]1761 vmovdqa ymm1,YMMWORD[((160-128))+rdx]1762 vpor ymm5,ymm3,ymm21763 vmovdqa ymm2,YMMWORD[((192-128))+rdx]1764 vmovdqa ymm3,YMMWORD[((224-128))+rdx]1765 vpand ymm0,ymm0,YMMWORD[((128+128))+rax]1766 vpand ymm1,ymm1,YMMWORD[((160+128))+rax]1767 vpand ymm2,ymm2,YMMWORD[((192+128))+rax]1768 vpor ymm4,ymm4,ymm01769 vpand ymm3,ymm3,YMMWORD[((224+128))+rax]1770 vpand ymm0,ymm8,YMMWORD[((256-128))+rdx]1771 vpor ymm5,ymm5,ymm11772 vpand ymm1,ymm9,YMMWORD[((288-128))+rdx]1773 vpor ymm4,ymm4,ymm21774 vpand ymm2,ymm10,YMMWORD[((320-128))+rdx]1775 vpor ymm5,ymm5,ymm31776 vpand ymm3,ymm11,YMMWORD[((352-128))+rdx]1777 vpor ymm4,ymm4,ymm01778 vpand ymm0,ymm12,YMMWORD[((384-128))+rdx]1779 vpor ymm5,ymm5,ymm11780 vpand ymm1,ymm13,YMMWORD[((416-128))+rdx]1781 vpor ymm4,ymm4,ymm21782 vpand ymm2,ymm14,YMMWORD[((448-128))+rdx]1783 vpor ymm5,ymm5,ymm31784 vpand ymm3,ymm15,YMMWORD[((480-128))+rdx]1785 lea rdx,[512+rdx]1786 vpor ymm4,ymm4,ymm01787 vpor ymm5,ymm5,ymm11788 vpor ymm4,ymm4,ymm21789 vpor ymm5,ymm5,ymm31790 1791 vpor ymm4,ymm4,ymm51792 vextracti128 xmm5,ymm4,11793 vpor xmm5,xmm5,xmm41794 vpermd ymm5,ymm7,ymm51795 vmovdqu YMMWORD[rcx],ymm51796 lea rcx,[32+rcx]1797 dec r8d1798 jnz NEAR $L$oop_gather_10241799 1800 vpxor ymm0,ymm0,ymm01801 vmovdqu YMMWORD[rcx],ymm01802 vzeroupper1803 movaps xmm6,XMMWORD[((-168))+r11]1804 movaps xmm7,XMMWORD[((-152))+r11]1805 movaps xmm8,XMMWORD[((-136))+r11]1806 movaps xmm9,XMMWORD[((-120))+r11]1807 movaps xmm10,XMMWORD[((-104))+r11]1808 movaps xmm11,XMMWORD[((-88))+r11]1809 movaps xmm12,XMMWORD[((-72))+r11]1810 movaps xmm13,XMMWORD[((-56))+r11]1811 movaps xmm14,XMMWORD[((-40))+r11]1812 movaps xmm15,XMMWORD[((-24))+r11]1813 lea rsp,[r11]1814 1815 DB 0F3h,0C3h ;repret1816 1817 $L$SEH_end_rsaz_1024_gather5:1818 1819 EXTERN OPENSSL_ia32cap_P1820 global rsaz_avx2_eligible1821 1822 ALIGN 321823 rsaz_avx2_eligible:1824 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]1825 mov ecx,5245441826 mov edx,01827 and ecx,eax1828 cmp ecx,5245441829 cmove eax,edx1830 and eax,321831 shr eax,51832 DB 0F3h,0C3h ;repret1833 1834 1835 ALIGN 641836 $L$and_mask:1837 DQ 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff1838 $L$scatter_permd:1839 DD 0,2,4,6,7,7,7,71840 $L$gather_permd:1841 DD 0,7,1,7,2,7,3,71842 $L$inc:1843 DD 0,0,0,0,1,1,1,11844 DD 2,2,2,2,3,3,3,31845 DD 4,4,4,4,4,4,4,41846 ALIGN 641847 EXTERN __imp_RtlVirtualUnwind1848 1849 ALIGN 161850 rsaz_se_handler:1851 push rsi1852 push rdi1853 push rbx1854 push rbp1855 push r121856 push r131857 push r141858 push r151859 pushfq1860 sub rsp,641861 1862 mov rax,QWORD[120+r8]1863 mov rbx,QWORD[248+r8]1864 1865 mov rsi,QWORD[8+r9]1866 mov r11,QWORD[56+r9]1867 1868 mov r10d,DWORD[r11]1869 lea r10,[r10*1+rsi]1870 cmp rbx,r101871 jb NEAR $L$common_seh_tail1872 1873 mov r10d,DWORD[4+r11]1874 lea r10,[r10*1+rsi]1875 cmp rbx,r101876 jae NEAR $L$common_seh_tail1877 1878 mov rbp,QWORD[160+r8]1879 1880 mov r10d,DWORD[8+r11]1881 lea r10,[r10*1+rsi]1882 cmp rbx,r101883 cmovc rax,rbp1884 1885 mov r15,QWORD[((-48))+rax]1886 mov r14,QWORD[((-40))+rax]1887 mov r13,QWORD[((-32))+rax]1888 mov r12,QWORD[((-24))+rax]1889 mov rbp,QWORD[((-16))+rax]1890 mov rbx,QWORD[((-8))+rax]1891 mov QWORD[240+r8],r151892 mov QWORD[232+r8],r141893 mov QWORD[224+r8],r131894 mov QWORD[216+r8],r121895 mov QWORD[160+r8],rbp1896 mov QWORD[144+r8],rbx1897 1898 lea rsi,[((-216))+rax]1899 lea rdi,[512+r8]1900 mov ecx,201901 DD 0xa548f3fc1902 1903 $L$common_seh_tail:1904 mov rdi,QWORD[8+rax]1905 mov rsi,QWORD[16+rax]1906 mov QWORD[152+r8],rax1907 mov QWORD[168+r8],rsi1908 mov QWORD[176+r8],rdi1909 1910 mov rdi,QWORD[40+r9]1911 mov rsi,r81912 mov ecx,1541913 DD 0xa548f3fc1914 1915 mov rsi,r91916 xor rcx,rcx1917 mov rdx,QWORD[8+rsi]1918 mov r8,QWORD[rsi]1919 mov r9,QWORD[16+rsi]1920 mov r10,QWORD[40+rsi]1921 lea r11,[56+rsi]1922 lea r12,[24+rsi]1923 mov QWORD[32+rsp],r101924 mov QWORD[40+rsp],r111925 mov QWORD[48+rsp],r121926 mov QWORD[56+rsp],rcx1927 call QWORD[__imp_RtlVirtualUnwind]1928 1929 mov eax,11930 add rsp,641931 popfq1932 pop r151933 pop r141934 pop r131935 pop r121936 pop rbp1937 pop rbx1938 pop rdi1939 pop rsi1940 DB 0F3h,0C3h ;repret1941 1942 1943 section .pdata rdata align=41944 ALIGN 41945 DD $L$SEH_begin_rsaz_1024_sqr_avx2 wrt ..imagebase1946 DD $L$SEH_end_rsaz_1024_sqr_avx2 wrt ..imagebase1947 DD $L$SEH_info_rsaz_1024_sqr_avx2 wrt ..imagebase1948 1949 DD $L$SEH_begin_rsaz_1024_mul_avx2 wrt ..imagebase1950 DD $L$SEH_end_rsaz_1024_mul_avx2 wrt ..imagebase1951 DD $L$SEH_info_rsaz_1024_mul_avx2 wrt ..imagebase1952 1953 DD $L$SEH_begin_rsaz_1024_gather5 wrt ..imagebase1954 DD $L$SEH_end_rsaz_1024_gather5 wrt ..imagebase1955 DD $L$SEH_info_rsaz_1024_gather5 wrt ..imagebase1956 section .xdata rdata align=81957 ALIGN 81958 $L$SEH_info_rsaz_1024_sqr_avx2:1959 DB 9,0,0,01960 DD rsaz_se_handler wrt ..imagebase1961 DD $L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase,$L$sqr_1024_in_tail wrt ..imagebase1962 DD 01963 $L$SEH_info_rsaz_1024_mul_avx2:1964 DB 9,0,0,01965 DD rsaz_se_handler wrt ..imagebase1966 DD $L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase,$L$mul_1024_in_tail wrt ..imagebase1967 DD 01968 $L$SEH_info_rsaz_1024_gather5:1969 DB 0x01,0x36,0x17,0x0b1970 DB 0x36,0xf8,0x09,0x001971 DB 0x31,0xe8,0x08,0x001972 DB 0x2c,0xd8,0x07,0x001973 DB 0x27,0xc8,0x06,0x001974 DB 0x22,0xb8,0x05,0x001975 DB 0x1d,0xa8,0x04,0x001976 DB 0x18,0x98,0x03,0x001977 DB 0x13,0x88,0x02,0x001978 DB 0x0e,0x78,0x01,0x001979 DB 0x09,0x68,0x00,0x001980 DB 0x04,0x01,0x15,0x001981 DB 0x00,0xb3,0x00,0x00 -
trunk/src/libs/openssl-3.0.1/crypto/genasm-nasm/rsaz-x86_64.S
r94083 r94114 44 44 mov rax,QWORD[8+rsi] 45 45 mov QWORD[128+rsp],rcx 46 mov r11d,0x8010047 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]48 cmp r11d,0x8010049 je NEAR $L$oop_sqrx50 46 jmp NEAR $L$oop_sqr 51 47 … … 418 414 dec r8d 419 415 jnz NEAR $L$oop_sqr 420 jmp NEAR $L$sqr_tail421 422 ALIGN 32423 $L$oop_sqrx:424 mov DWORD[((128+8))+rsp],r8d425 DB 102,72,15,110,199426 427 mulx r9,r8,rax428 mov rbx,rax429 430 mulx r10,rcx,QWORD[16+rsi]431 xor rbp,rbp432 433 mulx r11,rax,QWORD[24+rsi]434 adcx r9,rcx435 436 DB 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00437 adcx r10,rax438 439 DB 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00440 adcx r11,rcx441 442 mulx r14,rcx,QWORD[48+rsi]443 adcx r12,rax444 adcx r13,rcx445 446 mulx r15,rax,QWORD[56+rsi]447 adcx r14,rax448 adcx r15,rbp449 450 mulx rdi,rax,rdx451 mov rdx,rbx452 xor rcx,rcx453 adox r8,r8454 adcx r8,rdi455 adox rcx,rbp456 adcx rcx,rbp457 458 mov QWORD[rsp],rax459 mov QWORD[8+rsp],r8460 461 462 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00463 adox r10,rax464 adcx r11,rbx465 466 mulx r8,rdi,QWORD[24+rsi]467 adox r11,rdi468 DB 0x66469 adcx r12,r8470 471 mulx rbx,rax,QWORD[32+rsi]472 adox r12,rax473 adcx r13,rbx474 475 mulx r8,rdi,QWORD[40+rsi]476 adox r13,rdi477 adcx r14,r8478 479 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00480 adox r14,rax481 adcx r15,rbx482 483 DB 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00484 adox r15,rdi485 adcx r8,rbp486 mulx rdi,rax,rdx487 adox r8,rbp488 DB 0x48,0x8b,0x96,0x10,0x00,0x00,0x00489 490 xor rbx,rbx491 adox r9,r9492 493 adcx rax,rcx494 adox r10,r10495 adcx r9,rax496 adox rbx,rbp497 adcx r10,rdi498 adcx rbx,rbp499 500 mov QWORD[16+rsp],r9501 DB 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00502 503 504 mulx r9,rdi,QWORD[24+rsi]505 adox r12,rdi506 adcx r13,r9507 508 mulx rcx,rax,QWORD[32+rsi]509 adox r13,rax510 adcx r14,rcx511 512 DB 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00513 adox r14,rdi514 adcx r15,r9515 516 DB 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00517 adox r15,rax518 adcx r8,rcx519 520 mulx r9,rdi,QWORD[56+rsi]521 adox r8,rdi522 adcx r9,rbp523 mulx rdi,rax,rdx524 adox r9,rbp525 mov rdx,QWORD[24+rsi]526 527 xor rcx,rcx528 adox r11,r11529 530 adcx rax,rbx531 adox r12,r12532 adcx r11,rax533 adox rcx,rbp534 adcx r12,rdi535 adcx rcx,rbp536 537 mov QWORD[32+rsp],r11538 mov QWORD[40+rsp],r12539 540 541 mulx rbx,rax,QWORD[32+rsi]542 adox r14,rax543 adcx r15,rbx544 545 mulx r10,rdi,QWORD[40+rsi]546 adox r15,rdi547 adcx r8,r10548 549 mulx rbx,rax,QWORD[48+rsi]550 adox r8,rax551 adcx r9,rbx552 553 mulx r10,rdi,QWORD[56+rsi]554 adox r9,rdi555 adcx r10,rbp556 mulx rdi,rax,rdx557 adox r10,rbp558 mov rdx,QWORD[32+rsi]559 560 xor rbx,rbx561 adox r13,r13562 563 adcx rax,rcx564 adox r14,r14565 adcx r13,rax566 adox rbx,rbp567 adcx r14,rdi568 adcx rbx,rbp569 570 mov QWORD[48+rsp],r13571 mov QWORD[56+rsp],r14572 573 574 mulx r11,rdi,QWORD[40+rsi]575 adox r8,rdi576 adcx r9,r11577 578 mulx rcx,rax,QWORD[48+rsi]579 adox r9,rax580 adcx r10,rcx581 582 mulx r11,rdi,QWORD[56+rsi]583 adox r10,rdi584 adcx r11,rbp585 mulx rdi,rax,rdx586 mov rdx,QWORD[40+rsi]587 adox r11,rbp588 589 xor rcx,rcx590 adox r15,r15591 592 adcx rax,rbx593 adox r8,r8594 adcx r15,rax595 adox rcx,rbp596 adcx r8,rdi597 adcx rcx,rbp598 599 mov QWORD[64+rsp],r15600 mov QWORD[72+rsp],r8601 602 603 DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00604 adox r10,rax605 adcx r11,rbx606 607 DB 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00608 adox r11,rdi609 adcx r12,rbp610 mulx rdi,rax,rdx611 adox r12,rbp612 mov rdx,QWORD[48+rsi]613 614 xor rbx,rbx615 adox r9,r9616 617 adcx rax,rcx618 adox r10,r10619 adcx r9,rax620 adcx r10,rdi621 adox rbx,rbp622 adcx rbx,rbp623 624 mov QWORD[80+rsp],r9625 mov QWORD[88+rsp],r10626 627 628 DB 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00629 adox r12,rax630 adox r13,rbp631 632 mulx rdi,rax,rdx633 xor rcx,rcx634 mov rdx,QWORD[56+rsi]635 adox r11,r11636 637 adcx rax,rbx638 adox r12,r12639 adcx r11,rax640 adox rcx,rbp641 adcx r12,rdi642 adcx rcx,rbp643 644 DB 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00645 DB 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00646 647 648 mulx rdx,rax,rdx649 xor rbx,rbx650 adox r13,r13651 652 adcx rax,rcx653 adox rbx,rbp654 adcx rax,r13655 adcx rbx,rdx656 657 DB 102,72,15,126,199658 DB 102,72,15,126,205659 660 mov rdx,QWORD[128+rsp]661 mov r8,QWORD[rsp]662 mov r9,QWORD[8+rsp]663 mov r10,QWORD[16+rsp]664 mov r11,QWORD[24+rsp]665 mov r12,QWORD[32+rsp]666 mov r13,QWORD[40+rsp]667 mov r14,QWORD[48+rsp]668 mov r15,QWORD[56+rsp]669 670 mov QWORD[112+rsp],rax671 mov QWORD[120+rsp],rbx672 673 call __rsaz_512_reducex674 675 add r8,QWORD[64+rsp]676 adc r9,QWORD[72+rsp]677 adc r10,QWORD[80+rsp]678 adc r11,QWORD[88+rsp]679 adc r12,QWORD[96+rsp]680 adc r13,QWORD[104+rsp]681 adc r14,QWORD[112+rsp]682 adc r15,QWORD[120+rsp]683 sbb rcx,rcx684 685 call __rsaz_512_subtract686 687 mov rdx,r8688 mov rax,r9689 mov r8d,DWORD[((128+8))+rsp]690 mov rsi,rdi691 692 dec r8d693 jnz NEAR $L$oop_sqrx694 695 $L$sqr_tail:696 416 697 417 lea rax,[((128+24+48))+rsp] … … 752 472 DB 102,72,15,110,201 753 473 mov QWORD[128+rsp],r8 754 mov r11d,0x80100755 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]756 cmp r11d,0x80100757 je NEAR $L$mulx758 474 mov rbx,QWORD[rdx] 759 475 mov rbp,rdx … … 773 489 774 490 call __rsaz_512_reduce 775 jmp NEAR $L$mul_tail776 777 ALIGN 32778 $L$mulx:779 mov rbp,rdx780 mov rdx,QWORD[rdx]781 call __rsaz_512_mulx782 783 DB 102,72,15,126,199784 DB 102,72,15,126,205785 786 mov rdx,QWORD[128+rsp]787 mov r8,QWORD[rsp]788 mov r9,QWORD[8+rsp]789 mov r10,QWORD[16+rsp]790 mov r11,QWORD[24+rsp]791 mov r12,QWORD[32+rsp]792 mov r13,QWORD[40+rsp]793 mov r14,QWORD[48+rsp]794 mov r15,QWORD[56+rsp]795 796 call __rsaz_512_reducex797 $L$mul_tail:798 491 add r8,QWORD[64+rsp] 799 492 adc r9,QWORD[72+rsp] … … 927 620 pshufd xmm9,xmm8,0x4e 928 621 por xmm8,xmm9 929 mov r11d,0x80100930 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]931 cmp r11d,0x80100932 je NEAR $L$mulx_gather933 622 DB 102,76,15,126,195 934 623 … … 1111 800 1112 801 call __rsaz_512_reduce 1113 jmp NEAR $L$mul_gather_tail1114 1115 ALIGN 321116 $L$mulx_gather:1117 DB 102,76,15,126,1941118 1119 mov QWORD[128+rsp],r81120 mov QWORD[((128+8))+rsp],rdi1121 mov QWORD[((128+16))+rsp],rcx1122 1123 mulx r8,rbx,QWORD[rsi]1124 mov QWORD[rsp],rbx1125 xor edi,edi1126 1127 mulx r9,rax,QWORD[8+rsi]1128 1129 mulx r10,rbx,QWORD[16+rsi]1130 adcx r8,rax1131 1132 mulx r11,rax,QWORD[24+rsi]1133 adcx r9,rbx1134 1135 mulx r12,rbx,QWORD[32+rsi]1136 adcx r10,rax1137 1138 mulx r13,rax,QWORD[40+rsi]1139 adcx r11,rbx1140 1141 mulx r14,rbx,QWORD[48+rsi]1142 adcx r12,rax1143 1144 mulx r15,rax,QWORD[56+rsi]1145 adcx r13,rbx1146 adcx r14,rax1147 DB 0x671148 mov rbx,r81149 adcx r15,rdi1150 1151 mov rcx,-71152 jmp NEAR $L$oop_mulx_gather1153 1154 ALIGN 321155 $L$oop_mulx_gather:1156 movdqa xmm8,XMMWORD[rbp]1157 movdqa xmm9,XMMWORD[16+rbp]1158 movdqa xmm10,XMMWORD[32+rbp]1159 movdqa xmm11,XMMWORD[48+rbp]1160 pand xmm8,xmm01161 movdqa xmm12,XMMWORD[64+rbp]1162 pand xmm9,xmm11163 movdqa xmm13,XMMWORD[80+rbp]1164 pand xmm10,xmm21165 movdqa xmm14,XMMWORD[96+rbp]1166 pand xmm11,xmm31167 movdqa xmm15,XMMWORD[112+rbp]1168 lea rbp,[128+rbp]1169 pand xmm12,xmm41170 pand xmm13,xmm51171 pand xmm14,xmm61172 pand xmm15,xmm71173 por xmm8,xmm101174 por xmm9,xmm111175 por xmm8,xmm121176 por xmm9,xmm131177 por xmm8,xmm141178 por xmm9,xmm151179 1180 por xmm8,xmm91181 pshufd xmm9,xmm8,0x4e1182 por xmm8,xmm91183 DB 102,76,15,126,1941184 1185 DB 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x001186 adcx rbx,rax1187 adox r8,r91188 1189 mulx r9,rax,QWORD[8+rsi]1190 adcx r8,rax1191 adox r9,r101192 1193 mulx r10,rax,QWORD[16+rsi]1194 adcx r9,rax1195 adox r10,r111196 1197 DB 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x001198 adcx r10,rax1199 adox r11,r121200 1201 mulx r12,rax,QWORD[32+rsi]1202 adcx r11,rax1203 adox r12,r131204 1205 mulx r13,rax,QWORD[40+rsi]1206 adcx r12,rax1207 adox r13,r141208 1209 DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x001210 adcx r13,rax1211 DB 0x671212 adox r14,r151213 1214 mulx r15,rax,QWORD[56+rsi]1215 mov QWORD[64+rcx*8+rsp],rbx1216 adcx r14,rax1217 adox r15,rdi1218 mov rbx,r81219 adcx r15,rdi1220 1221 inc rcx1222 jnz NEAR $L$oop_mulx_gather1223 1224 mov QWORD[64+rsp],r81225 mov QWORD[((64+8))+rsp],r91226 mov QWORD[((64+16))+rsp],r101227 mov QWORD[((64+24))+rsp],r111228 mov QWORD[((64+32))+rsp],r121229 mov QWORD[((64+40))+rsp],r131230 mov QWORD[((64+48))+rsp],r141231 mov QWORD[((64+56))+rsp],r151232 1233 mov rdx,QWORD[128+rsp]1234 mov rdi,QWORD[((128+8))+rsp]1235 mov rbp,QWORD[((128+16))+rsp]1236 1237 mov r8,QWORD[rsp]1238 mov r9,QWORD[8+rsp]1239 mov r10,QWORD[16+rsp]1240 mov r11,QWORD[24+rsp]1241 mov r12,QWORD[32+rsp]1242 mov r13,QWORD[40+rsp]1243 mov r14,QWORD[48+rsp]1244 mov r15,QWORD[56+rsp]1245 1246 call __rsaz_512_reducex1247 1248 $L$mul_gather_tail:1249 802 add r8,QWORD[64+rsp] 1250 803 adc r9,QWORD[72+rsp] … … 1333 886 1334 887 mov rbp,rdi 1335 mov r11d,0x801001336 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]1337 cmp r11d,0x801001338 je NEAR $L$mulx_scatter1339 888 mov rbx,QWORD[rdi] 1340 889 call __rsaz_512_mul … … 1353 902 1354 903 call __rsaz_512_reduce 1355 jmp NEAR $L$mul_scatter_tail1356 1357 ALIGN 321358 $L$mulx_scatter:1359 mov rdx,QWORD[rdi]1360 call __rsaz_512_mulx1361 1362 DB 102,72,15,126,1991363 DB 102,72,15,126,2051364 1365 mov rdx,QWORD[128+rsp]1366 mov r8,QWORD[rsp]1367 mov r9,QWORD[8+rsp]1368 mov r10,QWORD[16+rsp]1369 mov r11,QWORD[24+rsp]1370 mov r12,QWORD[32+rsp]1371 mov r13,QWORD[40+rsp]1372 mov r14,QWORD[48+rsp]1373 mov r15,QWORD[56+rsp]1374 1375 call __rsaz_512_reducex1376 1377 $L$mul_scatter_tail:1378 904 add r8,QWORD[64+rsp] 1379 905 adc r9,QWORD[72+rsp] … … 1451 977 1452 978 $L$mul_by_one_body: 1453 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]1454 979 mov rbp,rdx 1455 980 mov QWORD[128+rsp],rcx … … 1472 997 movdqa XMMWORD[80+rsp],xmm0 1473 998 movdqa XMMWORD[96+rsp],xmm0 1474 and eax,0x801001475 cmp eax,0x801001476 je NEAR $L$by_one_callx1477 999 call __rsaz_512_reduce 1478 jmp NEAR $L$by_one_tail1479 ALIGN 321480 $L$by_one_callx:1481 mov rdx,QWORD[128+rsp]1482 call __rsaz_512_reducex1483 $L$by_one_tail:1484 1000 mov QWORD[rdi],r8 1485 1001 mov QWORD[8+rdi],r9 … … 1594 1110 dec ecx 1595 1111 jne NEAR $L$reduction_loop 1596 1597 DB 0F3h,0C3h ;repret1598 1599 1600 1601 ALIGN 321602 __rsaz_512_reducex:1603 1604 1605 imul rdx,r81606 xor rsi,rsi1607 mov ecx,81608 jmp NEAR $L$reduction_loopx1609 1610 ALIGN 321611 $L$reduction_loopx:1612 mov rbx,r81613 mulx r8,rax,QWORD[rbp]1614 adcx rax,rbx1615 adox r8,r91616 1617 mulx r9,rax,QWORD[8+rbp]1618 adcx r8,rax1619 adox r9,r101620 1621 mulx r10,rbx,QWORD[16+rbp]1622 adcx r9,rbx1623 adox r10,r111624 1625 mulx r11,rbx,QWORD[24+rbp]1626 adcx r10,rbx1627 adox r11,r121628 1629 DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x001630 mov rax,rdx1631 mov rdx,r81632 adcx r11,rbx1633 adox r12,r131634 1635 mulx rdx,rbx,QWORD[((128+8))+rsp]1636 mov rdx,rax1637 1638 mulx r13,rax,QWORD[40+rbp]1639 adcx r12,rax1640 adox r13,r141641 1642 DB 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x001643 adcx r13,rax1644 adox r14,r151645 1646 mulx r15,rax,QWORD[56+rbp]1647 mov rdx,rbx1648 adcx r14,rax1649 adox r15,rsi1650 adcx r15,rsi1651 1652 dec ecx1653 jne NEAR $L$reduction_loopx1654 1112 1655 1113 DB 0F3h,0C3h ;repret … … 1855 1313 mov QWORD[48+rdi],r14 1856 1314 mov QWORD[56+rdi],r15 1857 1858 DB 0F3h,0C3h ;repret1859 1860 1861 1862 ALIGN 321863 __rsaz_512_mulx:1864 1865 mulx r8,rbx,QWORD[rsi]1866 mov rcx,-61867 1868 mulx r9,rax,QWORD[8+rsi]1869 mov QWORD[8+rsp],rbx1870 1871 mulx r10,rbx,QWORD[16+rsi]1872 adc r8,rax1873 1874 mulx r11,rax,QWORD[24+rsi]1875 adc r9,rbx1876 1877 mulx r12,rbx,QWORD[32+rsi]1878 adc r10,rax1879 1880 mulx r13,rax,QWORD[40+rsi]1881 adc r11,rbx1882 1883 mulx r14,rbx,QWORD[48+rsi]1884 adc r12,rax1885 1886 mulx r15,rax,QWORD[56+rsi]1887 mov rdx,QWORD[8+rbp]1888 adc r13,rbx1889 adc r14,rax1890 adc r15,01891 1892 xor rdi,rdi1893 jmp NEAR $L$oop_mulx1894 1895 ALIGN 321896 $L$oop_mulx:1897 mov rbx,r81898 mulx r8,rax,QWORD[rsi]1899 adcx rbx,rax1900 adox r8,r91901 1902 mulx r9,rax,QWORD[8+rsi]1903 adcx r8,rax1904 adox r9,r101905 1906 mulx r10,rax,QWORD[16+rsi]1907 adcx r9,rax1908 adox r10,r111909 1910 mulx r11,rax,QWORD[24+rsi]1911 adcx r10,rax1912 adox r11,r121913 1914 DB 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x001915 adcx r11,rax1916 adox r12,r131917 1918 mulx r13,rax,QWORD[40+rsi]1919 adcx r12,rax1920 adox r13,r141921 1922 mulx r14,rax,QWORD[48+rsi]1923 adcx r13,rax1924 adox r14,r151925 1926 mulx r15,rax,QWORD[56+rsi]1927 mov rdx,QWORD[64+rcx*8+rbp]1928 mov QWORD[((8+64-8))+rcx*8+rsp],rbx1929 adcx r14,rax1930 adox r15,rdi1931 adcx r15,rdi1932 1933 inc rcx1934 jnz NEAR $L$oop_mulx1935 1936 mov rbx,r81937 mulx r8,rax,QWORD[rsi]1938 adcx rbx,rax1939 adox r8,r91940 1941 DB 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x001942 adcx r8,rax1943 adox r9,r101944 1945 DB 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x001946 adcx r9,rax1947 adox r10,r111948 1949 mulx r11,rax,QWORD[24+rsi]1950 adcx r10,rax1951 adox r11,r121952 1953 mulx r12,rax,QWORD[32+rsi]1954 adcx r11,rax1955 adox r12,r131956 1957 mulx r13,rax,QWORD[40+rsi]1958 adcx r12,rax1959 adox r13,r141960 1961 DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x001962 adcx r13,rax1963 adox r14,r151964 1965 DB 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x001966 adcx r14,rax1967 adox r15,rdi1968 adcx r15,rdi1969 1970 mov QWORD[((8+64-8))+rsp],rbx1971 mov QWORD[((8+64))+rsp],r81972 mov QWORD[((8+64+8))+rsp],r91973 mov QWORD[((8+64+16))+rsp],r101974 mov QWORD[((8+64+24))+rsp],r111975 mov QWORD[((8+64+32))+rsp],r121976 mov QWORD[((8+64+40))+rsp],r131977 mov QWORD[((8+64+48))+rsp],r141978 mov QWORD[((8+64+56))+rsp],r151979 1315 1980 1316 DB 0F3h,0C3h ;repret -
trunk/src/libs/openssl-3.0.1/crypto/genasm-nasm/x86_64-mont.S
r94083 r94114 32 32 cmp r9d,8 33 33 jb NEAR $L$mul_enter 34 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]35 34 cmp rdx,rsi 36 35 jne NEAR $L$mul4x_enter … … 295 294 296 295 $L$mul4x_enter: 297 and r11d,0x80100298 cmp r11d,0x80100299 je NEAR $L$mulx4x_enter300 296 push rbx 301 297 … … 723 719 724 720 $L$SEH_end_bn_mul4x_mont: 725 EXTERN bn_sqrx8x_internal726 721 EXTERN bn_sqr8x_internal 727 722 … … 819 814 DB 102,72,15,110,207 820 815 DB 102,73,15,110,218 821 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]822 and eax,0x80100823 cmp eax,0x80100824 jne NEAR $L$sqr8x_nox825 826 call bn_sqrx8x_internal827 828 829 830 831 lea rbx,[rcx*1+r8]832 mov r9,rcx833 mov rdx,rcx834 DB 102,72,15,126,207835 sar rcx,3+2836 jmp NEAR $L$sqr8x_sub837 838 ALIGN 32839 $L$sqr8x_nox:840 816 call bn_sqr8x_internal 841 817 … … 927 903 928 904 $L$SEH_end_bn_sqr8x_mont: 929 930 ALIGN 32931 bn_mulx4x_mont:932 mov QWORD[8+rsp],rdi ;WIN64 prologue933 mov QWORD[16+rsp],rsi934 mov rax,rsp935 $L$SEH_begin_bn_mulx4x_mont:936 mov rdi,rcx937 mov rsi,rdx938 mov rdx,r8939 mov rcx,r9940 mov r8,QWORD[40+rsp]941 mov r9,QWORD[48+rsp]942 943 944 945 mov rax,rsp946 947 $L$mulx4x_enter:948 push rbx949 950 push rbp951 952 push r12953 954 push r13955 956 push r14957 958 push r15959 960 $L$mulx4x_prologue:961 962 shl r9d,3963 xor r10,r10964 sub r10,r9965 mov r8,QWORD[r8]966 lea rbp,[((-72))+r10*1+rsp]967 and rbp,-128968 mov r11,rsp969 sub r11,rbp970 and r11,-4096971 lea rsp,[rbp*1+r11]972 mov r10,QWORD[rsp]973 cmp rsp,rbp974 ja NEAR $L$mulx4x_page_walk975 jmp NEAR $L$mulx4x_page_walk_done976 977 ALIGN 16978 $L$mulx4x_page_walk:979 lea rsp,[((-4096))+rsp]980 mov r10,QWORD[rsp]981 cmp rsp,rbp982 ja NEAR $L$mulx4x_page_walk983 $L$mulx4x_page_walk_done:984 985 lea r10,[r9*1+rdx]986 987 988 989 990 991 992 993 994 995 996 997 998 mov QWORD[rsp],r9999 shr r9,51000 mov QWORD[16+rsp],r101001 sub r9,11002 mov QWORD[24+rsp],r81003 mov QWORD[32+rsp],rdi1004 mov QWORD[40+rsp],rax1005 1006 mov QWORD[48+rsp],r91007 jmp NEAR $L$mulx4x_body1008 1009 ALIGN 321010 $L$mulx4x_body:1011 lea rdi,[8+rdx]1012 mov rdx,QWORD[rdx]1013 lea rbx,[((64+32))+rsp]1014 mov r9,rdx1015 1016 mulx rax,r8,QWORD[rsi]1017 mulx r14,r11,QWORD[8+rsi]1018 add r11,rax1019 mov QWORD[8+rsp],rdi1020 mulx r13,r12,QWORD[16+rsi]1021 adc r12,r141022 adc r13,01023 1024 mov rdi,r81025 imul r8,QWORD[24+rsp]1026 xor rbp,rbp1027 1028 mulx r14,rax,QWORD[24+rsi]1029 mov rdx,r81030 lea rsi,[32+rsi]1031 adcx r13,rax1032 adcx r14,rbp1033 1034 mulx r10,rax,QWORD[rcx]1035 adcx rdi,rax1036 adox r10,r111037 mulx r11,rax,QWORD[8+rcx]1038 adcx r10,rax1039 adox r11,r121040 DB 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x001041 mov rdi,QWORD[48+rsp]1042 mov QWORD[((-32))+rbx],r101043 adcx r11,rax1044 adox r12,r131045 mulx r15,rax,QWORD[24+rcx]1046 mov rdx,r91047 mov QWORD[((-24))+rbx],r111048 adcx r12,rax1049 adox r15,rbp1050 lea rcx,[32+rcx]1051 mov QWORD[((-16))+rbx],r121052 1053 jmp NEAR $L$mulx4x_1st1054 1055 ALIGN 321056 $L$mulx4x_1st:1057 adcx r15,rbp1058 mulx rax,r10,QWORD[rsi]1059 adcx r10,r141060 mulx r14,r11,QWORD[8+rsi]1061 adcx r11,rax1062 mulx rax,r12,QWORD[16+rsi]1063 adcx r12,r141064 mulx r14,r13,QWORD[24+rsi]1065 DB 0x67,0x671066 mov rdx,r81067 adcx r13,rax1068 adcx r14,rbp1069 lea rsi,[32+rsi]1070 lea rbx,[32+rbx]1071 1072 adox r10,r151073 mulx r15,rax,QWORD[rcx]1074 adcx r10,rax1075 adox r11,r151076 mulx r15,rax,QWORD[8+rcx]1077 adcx r11,rax1078 adox r12,r151079 mulx r15,rax,QWORD[16+rcx]1080 mov QWORD[((-40))+rbx],r101081 adcx r12,rax1082 mov QWORD[((-32))+rbx],r111083 adox r13,r151084 mulx r15,rax,QWORD[24+rcx]1085 mov rdx,r91086 mov QWORD[((-24))+rbx],r121087 adcx r13,rax1088 adox r15,rbp1089 lea rcx,[32+rcx]1090 mov QWORD[((-16))+rbx],r131091 1092 dec rdi1093 jnz NEAR $L$mulx4x_1st1094 1095 mov rax,QWORD[rsp]1096 mov rdi,QWORD[8+rsp]1097 adc r15,rbp1098 add r14,r151099 sbb r15,r151100 mov QWORD[((-8))+rbx],r141101 jmp NEAR $L$mulx4x_outer1102 1103 ALIGN 321104 $L$mulx4x_outer:1105 mov rdx,QWORD[rdi]1106 lea rdi,[8+rdi]1107 sub rsi,rax1108 mov QWORD[rbx],r151109 lea rbx,[((64+32))+rsp]1110 sub rcx,rax1111 1112 mulx r11,r8,QWORD[rsi]1113 xor ebp,ebp1114 mov r9,rdx1115 mulx r12,r14,QWORD[8+rsi]1116 adox r8,QWORD[((-32))+rbx]1117 adcx r11,r141118 mulx r13,r15,QWORD[16+rsi]1119 adox r11,QWORD[((-24))+rbx]1120 adcx r12,r151121 adox r12,QWORD[((-16))+rbx]1122 adcx r13,rbp1123 adox r13,rbp1124 1125 mov QWORD[8+rsp],rdi1126 mov r15,r81127 imul r8,QWORD[24+rsp]1128 xor ebp,ebp1129 1130 mulx r14,rax,QWORD[24+rsi]1131 mov rdx,r81132 adcx r13,rax1133 adox r13,QWORD[((-8))+rbx]1134 adcx r14,rbp1135 lea rsi,[32+rsi]1136 adox r14,rbp1137 1138 mulx r10,rax,QWORD[rcx]1139 adcx r15,rax1140 adox r10,r111141 mulx r11,rax,QWORD[8+rcx]1142 adcx r10,rax1143 adox r11,r121144 mulx r12,rax,QWORD[16+rcx]1145 mov QWORD[((-32))+rbx],r101146 adcx r11,rax1147 adox r12,r131148 mulx r15,rax,QWORD[24+rcx]1149 mov rdx,r91150 mov QWORD[((-24))+rbx],r111151 lea rcx,[32+rcx]1152 adcx r12,rax1153 adox r15,rbp1154 mov rdi,QWORD[48+rsp]1155 mov QWORD[((-16))+rbx],r121156 1157 jmp NEAR $L$mulx4x_inner1158 1159 ALIGN 321160 $L$mulx4x_inner:1161 mulx rax,r10,QWORD[rsi]1162 adcx r15,rbp1163 adox r10,r141164 mulx r14,r11,QWORD[8+rsi]1165 adcx r10,QWORD[rbx]1166 adox r11,rax1167 mulx rax,r12,QWORD[16+rsi]1168 adcx r11,QWORD[8+rbx]1169 adox r12,r141170 mulx r14,r13,QWORD[24+rsi]1171 mov rdx,r81172 adcx r12,QWORD[16+rbx]1173 adox r13,rax1174 adcx r13,QWORD[24+rbx]1175 adox r14,rbp1176 lea rsi,[32+rsi]1177 lea rbx,[32+rbx]1178 adcx r14,rbp1179 1180 adox r10,r151181 mulx r15,rax,QWORD[rcx]1182 adcx r10,rax1183 adox r11,r151184 mulx r15,rax,QWORD[8+rcx]1185 adcx r11,rax1186 adox r12,r151187 mulx r15,rax,QWORD[16+rcx]1188 mov QWORD[((-40))+rbx],r101189 adcx r12,rax1190 adox r13,r151191 mulx r15,rax,QWORD[24+rcx]1192 mov rdx,r91193 mov QWORD[((-32))+rbx],r111194 mov QWORD[((-24))+rbx],r121195 adcx r13,rax1196 adox r15,rbp1197 lea rcx,[32+rcx]1198 mov QWORD[((-16))+rbx],r131199 1200 dec rdi1201 jnz NEAR $L$mulx4x_inner1202 1203 mov rax,QWORD[rsp]1204 mov rdi,QWORD[8+rsp]1205 adc r15,rbp1206 sub rbp,QWORD[rbx]1207 adc r14,r151208 sbb r15,r151209 mov QWORD[((-8))+rbx],r141210 1211 cmp rdi,QWORD[16+rsp]1212 jne NEAR $L$mulx4x_outer1213 1214 lea rbx,[64+rsp]1215 sub rcx,rax1216 neg r151217 mov rdx,rax1218 shr rax,3+21219 mov rdi,QWORD[32+rsp]1220 jmp NEAR $L$mulx4x_sub1221 1222 ALIGN 321223 $L$mulx4x_sub:1224 mov r11,QWORD[rbx]1225 mov r12,QWORD[8+rbx]1226 mov r13,QWORD[16+rbx]1227 mov r14,QWORD[24+rbx]1228 lea rbx,[32+rbx]1229 sbb r11,QWORD[rcx]1230 sbb r12,QWORD[8+rcx]1231 sbb r13,QWORD[16+rcx]1232 sbb r14,QWORD[24+rcx]1233 lea rcx,[32+rcx]1234 mov QWORD[rdi],r111235 mov QWORD[8+rdi],r121236 mov QWORD[16+rdi],r131237 mov QWORD[24+rdi],r141238 lea rdi,[32+rdi]1239 dec rax1240 jnz NEAR $L$mulx4x_sub1241 1242 sbb r15,01243 lea rbx,[64+rsp]1244 sub rdi,rdx1245 1246 DB 102,73,15,110,2071247 pxor xmm0,xmm01248 pshufd xmm1,xmm1,01249 mov rsi,QWORD[40+rsp]1250 1251 jmp NEAR $L$mulx4x_cond_copy1252 1253 ALIGN 321254 $L$mulx4x_cond_copy:1255 movdqa xmm2,XMMWORD[rbx]1256 movdqa xmm3,XMMWORD[16+rbx]1257 lea rbx,[32+rbx]1258 movdqu xmm4,XMMWORD[rdi]1259 movdqu xmm5,XMMWORD[16+rdi]1260 lea rdi,[32+rdi]1261 movdqa XMMWORD[(-32)+rbx],xmm01262 movdqa XMMWORD[(-16)+rbx],xmm01263 pcmpeqd xmm0,xmm11264 pand xmm2,xmm11265 pand xmm3,xmm11266 pand xmm4,xmm01267 pand xmm5,xmm01268 pxor xmm0,xmm01269 por xmm4,xmm21270 por xmm5,xmm31271 movdqu XMMWORD[(-32)+rdi],xmm41272 movdqu XMMWORD[(-16)+rdi],xmm51273 sub rdx,321274 jnz NEAR $L$mulx4x_cond_copy1275 1276 mov QWORD[rbx],rdx1277 1278 mov rax,11279 mov r15,QWORD[((-48))+rsi]1280 1281 mov r14,QWORD[((-40))+rsi]1282 1283 mov r13,QWORD[((-32))+rsi]1284 1285 mov r12,QWORD[((-24))+rsi]1286 1287 mov rbp,QWORD[((-16))+rsi]1288 1289 mov rbx,QWORD[((-8))+rsi]1290 1291 lea rsp,[rsi]1292 1293 $L$mulx4x_epilogue:1294 mov rdi,QWORD[8+rsp] ;WIN64 epilogue1295 mov rsi,QWORD[16+rsp]1296 DB 0F3h,0C3h ;repret1297 1298 $L$SEH_end_bn_mulx4x_mont:1299 905 DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1300 906 DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 … … 1448 1054 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase 1449 1055 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase 1450 DD $L$SEH_begin_bn_mulx4x_mont wrt ..imagebase1451 DD $L$SEH_end_bn_mulx4x_mont wrt ..imagebase1452 DD $L$SEH_info_bn_mulx4x_mont wrt ..imagebase1453 1056 section .xdata rdata align=8 1454 1057 ALIGN 8 … … 1466 1069 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase 1467 1070 ALIGN 8 1468 $L$SEH_info_bn_mulx4x_mont:1469 DB 9,0,0,01470 DD sqr_handler wrt ..imagebase1471 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase1472 ALIGN 8 -
trunk/src/libs/openssl-3.0.1/crypto/genasm-nasm/x86_64-mont5.S
r94083 r94114 30 30 test r9d,7 31 31 jnz NEAR $L$mul_enter 32 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]33 32 jmp NEAR $L$mul4x_enter 34 33 … … 481 480 482 481 $L$mul4x_enter: 483 and r11d,0x80108484 cmp r11d,0x80108485 je NEAR $L$mulx4x_enter486 482 push rbx 487 483 … … 1127 1123 mov rax,rsp 1128 1124 1129 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]1130 and r11d,0x801081131 cmp r11d,0x801081132 je NEAR $L$powerx5_enter1133 1125 push rbx 1134 1126 … … 2234 2226 mov rbp,rcx 2235 2227 DB 102,73,15,110,218 2236 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]2237 and r11d,0x801082238 cmp r11d,0x801082239 jne NEAR $L$from_mont_nox2240 2241 lea rdi,[r9*1+rax]2242 call __bn_sqrx8x_reduction2243 call __bn_postx4x_internal2244 2245 pxor xmm0,xmm02246 lea rax,[48+rsp]2247 jmp NEAR $L$from_mont_zero2248 2249 ALIGN 322250 $L$from_mont_nox:2251 2228 call __bn_sqr8x_reduction 2252 2229 call __bn_post4x_internal … … 2289 2266 2290 2267 $L$SEH_end_bn_from_mont8x: 2291 2292 ALIGN 322293 bn_mulx4x_mont_gather5:2294 mov QWORD[8+rsp],rdi ;WIN64 prologue2295 mov QWORD[16+rsp],rsi2296 mov rax,rsp2297 $L$SEH_begin_bn_mulx4x_mont_gather5:2298 mov rdi,rcx2299 mov rsi,rdx2300 mov rdx,r82301 mov rcx,r92302 mov r8,QWORD[40+rsp]2303 mov r9,QWORD[48+rsp]2304 2305 2306 2307 mov rax,rsp2308 2309 $L$mulx4x_enter:2310 push rbx2311 2312 push rbp2313 2314 push r122315 2316 push r132317 2318 push r142319 2320 push r152321 2322 $L$mulx4x_prologue:2323 2324 shl r9d,32325 lea r10,[r9*2+r9]2326 neg r92327 mov r8,QWORD[r8]2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 lea r11,[((-320))+r9*2+rsp]2339 mov rbp,rsp2340 sub r11,rdi2341 and r11,40952342 cmp r10,r112343 jb NEAR $L$mulx4xsp_alt2344 sub rbp,r112345 lea rbp,[((-320))+r9*2+rbp]2346 jmp NEAR $L$mulx4xsp_done2347 2348 $L$mulx4xsp_alt:2349 lea r10,[((4096-320))+r9*2]2350 lea rbp,[((-320))+r9*2+rbp]2351 sub r11,r102352 mov r10,02353 cmovc r11,r102354 sub rbp,r112355 $L$mulx4xsp_done:2356 and rbp,-642357 mov r11,rsp2358 sub r11,rbp2359 and r11,-40962360 lea rsp,[rbp*1+r11]2361 mov r10,QWORD[rsp]2362 cmp rsp,rbp2363 ja NEAR $L$mulx4x_page_walk2364 jmp NEAR $L$mulx4x_page_walk_done2365 2366 $L$mulx4x_page_walk:2367 lea rsp,[((-4096))+rsp]2368 mov r10,QWORD[rsp]2369 cmp rsp,rbp2370 ja NEAR $L$mulx4x_page_walk2371 $L$mulx4x_page_walk_done:2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 mov QWORD[32+rsp],r82386 mov QWORD[40+rsp],rax2387 2388 $L$mulx4x_body:2389 call mulx4x_internal2390 2391 mov rsi,QWORD[40+rsp]2392 2393 mov rax,12394 2395 mov r15,QWORD[((-48))+rsi]2396 2397 mov r14,QWORD[((-40))+rsi]2398 2399 mov r13,QWORD[((-32))+rsi]2400 2401 mov r12,QWORD[((-24))+rsi]2402 2403 mov rbp,QWORD[((-16))+rsi]2404 2405 mov rbx,QWORD[((-8))+rsi]2406 2407 lea rsp,[rsi]2408 2409 $L$mulx4x_epilogue:2410 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2411 mov rsi,QWORD[16+rsp]2412 DB 0F3h,0C3h ;repret2413 2414 $L$SEH_end_bn_mulx4x_mont_gather5:2415 2416 2417 ALIGN 322418 mulx4x_internal:2419 2420 mov QWORD[8+rsp],r92421 mov r10,r92422 neg r92423 shl r9,52424 neg r102425 lea r13,[128+r9*1+rdx]2426 shr r9,5+52427 movd xmm5,DWORD[56+rax]2428 sub r9,12429 lea rax,[$L$inc]2430 mov QWORD[((16+8))+rsp],r132431 mov QWORD[((24+8))+rsp],r92432 mov QWORD[((56+8))+rsp],rdi2433 movdqa xmm0,XMMWORD[rax]2434 movdqa xmm1,XMMWORD[16+rax]2435 lea r10,[((88-112))+r10*1+rsp]2436 lea rdi,[128+rdx]2437 2438 pshufd xmm5,xmm5,02439 movdqa xmm4,xmm12440 DB 0x672441 movdqa xmm2,xmm12442 DB 0x672443 paddd xmm1,xmm02444 pcmpeqd xmm0,xmm52445 movdqa xmm3,xmm42446 paddd xmm2,xmm12447 pcmpeqd xmm1,xmm52448 movdqa XMMWORD[112+r10],xmm02449 movdqa xmm0,xmm42450 2451 paddd xmm3,xmm22452 pcmpeqd xmm2,xmm52453 movdqa XMMWORD[128+r10],xmm12454 movdqa xmm1,xmm42455 2456 paddd xmm0,xmm32457 pcmpeqd xmm3,xmm52458 movdqa XMMWORD[144+r10],xmm22459 movdqa xmm2,xmm42460 2461 paddd xmm1,xmm02462 pcmpeqd xmm0,xmm52463 movdqa XMMWORD[160+r10],xmm32464 movdqa xmm3,xmm42465 paddd xmm2,xmm12466 pcmpeqd xmm1,xmm52467 movdqa XMMWORD[176+r10],xmm02468 movdqa xmm0,xmm42469 2470 paddd xmm3,xmm22471 pcmpeqd xmm2,xmm52472 movdqa XMMWORD[192+r10],xmm12473 movdqa xmm1,xmm42474 2475 paddd xmm0,xmm32476 pcmpeqd xmm3,xmm52477 movdqa XMMWORD[208+r10],xmm22478 movdqa xmm2,xmm42479 2480 paddd xmm1,xmm02481 pcmpeqd xmm0,xmm52482 movdqa XMMWORD[224+r10],xmm32483 movdqa xmm3,xmm42484 paddd xmm2,xmm12485 pcmpeqd xmm1,xmm52486 movdqa XMMWORD[240+r10],xmm02487 movdqa xmm0,xmm42488 2489 paddd xmm3,xmm22490 pcmpeqd xmm2,xmm52491 movdqa XMMWORD[256+r10],xmm12492 movdqa xmm1,xmm42493 2494 paddd xmm0,xmm32495 pcmpeqd xmm3,xmm52496 movdqa XMMWORD[272+r10],xmm22497 movdqa xmm2,xmm42498 2499 paddd xmm1,xmm02500 pcmpeqd xmm0,xmm52501 movdqa XMMWORD[288+r10],xmm32502 movdqa xmm3,xmm42503 DB 0x672504 paddd xmm2,xmm12505 pcmpeqd xmm1,xmm52506 movdqa XMMWORD[304+r10],xmm02507 2508 paddd xmm3,xmm22509 pcmpeqd xmm2,xmm52510 movdqa XMMWORD[320+r10],xmm12511 2512 pcmpeqd xmm3,xmm52513 movdqa XMMWORD[336+r10],xmm22514 2515 pand xmm0,XMMWORD[64+rdi]2516 pand xmm1,XMMWORD[80+rdi]2517 pand xmm2,XMMWORD[96+rdi]2518 movdqa XMMWORD[352+r10],xmm32519 pand xmm3,XMMWORD[112+rdi]2520 por xmm0,xmm22521 por xmm1,xmm32522 movdqa xmm4,XMMWORD[((-128))+rdi]2523 movdqa xmm5,XMMWORD[((-112))+rdi]2524 movdqa xmm2,XMMWORD[((-96))+rdi]2525 pand xmm4,XMMWORD[112+r10]2526 movdqa xmm3,XMMWORD[((-80))+rdi]2527 pand xmm5,XMMWORD[128+r10]2528 por xmm0,xmm42529 pand xmm2,XMMWORD[144+r10]2530 por xmm1,xmm52531 pand xmm3,XMMWORD[160+r10]2532 por xmm0,xmm22533 por xmm1,xmm32534 movdqa xmm4,XMMWORD[((-64))+rdi]2535 movdqa xmm5,XMMWORD[((-48))+rdi]2536 movdqa xmm2,XMMWORD[((-32))+rdi]2537 pand xmm4,XMMWORD[176+r10]2538 movdqa xmm3,XMMWORD[((-16))+rdi]2539 pand xmm5,XMMWORD[192+r10]2540 por xmm0,xmm42541 pand xmm2,XMMWORD[208+r10]2542 por xmm1,xmm52543 pand xmm3,XMMWORD[224+r10]2544 por xmm0,xmm22545 por xmm1,xmm32546 movdqa xmm4,XMMWORD[rdi]2547 movdqa xmm5,XMMWORD[16+rdi]2548 movdqa xmm2,XMMWORD[32+rdi]2549 pand xmm4,XMMWORD[240+r10]2550 movdqa xmm3,XMMWORD[48+rdi]2551 pand xmm5,XMMWORD[256+r10]2552 por xmm0,xmm42553 pand xmm2,XMMWORD[272+r10]2554 por xmm1,xmm52555 pand xmm3,XMMWORD[288+r10]2556 por xmm0,xmm22557 por xmm1,xmm32558 pxor xmm0,xmm12559 pshufd xmm1,xmm0,0x4e2560 por xmm0,xmm12561 lea rdi,[256+rdi]2562 DB 102,72,15,126,1942563 lea rbx,[((64+32+8))+rsp]2564 2565 mov r9,rdx2566 mulx rax,r8,QWORD[rsi]2567 mulx r12,r11,QWORD[8+rsi]2568 add r11,rax2569 mulx r13,rax,QWORD[16+rsi]2570 adc r12,rax2571 adc r13,02572 mulx r14,rax,QWORD[24+rsi]2573 2574 mov r15,r82575 imul r8,QWORD[((32+8))+rsp]2576 xor rbp,rbp2577 mov rdx,r82578 2579 mov QWORD[((8+8))+rsp],rdi2580 2581 lea rsi,[32+rsi]2582 adcx r13,rax2583 adcx r14,rbp2584 2585 mulx r10,rax,QWORD[rcx]2586 adcx r15,rax2587 adox r10,r112588 mulx r11,rax,QWORD[8+rcx]2589 adcx r10,rax2590 adox r11,r122591 mulx r12,rax,QWORD[16+rcx]2592 mov rdi,QWORD[((24+8))+rsp]2593 mov QWORD[((-32))+rbx],r102594 adcx r11,rax2595 adox r12,r132596 mulx r15,rax,QWORD[24+rcx]2597 mov rdx,r92598 mov QWORD[((-24))+rbx],r112599 adcx r12,rax2600 adox r15,rbp2601 lea rcx,[32+rcx]2602 mov QWORD[((-16))+rbx],r122603 jmp NEAR $L$mulx4x_1st2604 2605 ALIGN 322606 $L$mulx4x_1st:2607 adcx r15,rbp2608 mulx rax,r10,QWORD[rsi]2609 adcx r10,r142610 mulx r14,r11,QWORD[8+rsi]2611 adcx r11,rax2612 mulx rax,r12,QWORD[16+rsi]2613 adcx r12,r142614 mulx r14,r13,QWORD[24+rsi]2615 DB 0x67,0x672616 mov rdx,r82617 adcx r13,rax2618 adcx r14,rbp2619 lea rsi,[32+rsi]2620 lea rbx,[32+rbx]2621 2622 adox r10,r152623 mulx r15,rax,QWORD[rcx]2624 adcx r10,rax2625 adox r11,r152626 mulx r15,rax,QWORD[8+rcx]2627 adcx r11,rax2628 adox r12,r152629 mulx r15,rax,QWORD[16+rcx]2630 mov QWORD[((-40))+rbx],r102631 adcx r12,rax2632 mov QWORD[((-32))+rbx],r112633 adox r13,r152634 mulx r15,rax,QWORD[24+rcx]2635 mov rdx,r92636 mov QWORD[((-24))+rbx],r122637 adcx r13,rax2638 adox r15,rbp2639 lea rcx,[32+rcx]2640 mov QWORD[((-16))+rbx],r132641 2642 dec rdi2643 jnz NEAR $L$mulx4x_1st2644 2645 mov rax,QWORD[8+rsp]2646 adc r15,rbp2647 lea rsi,[rax*1+rsi]2648 add r14,r152649 mov rdi,QWORD[((8+8))+rsp]2650 adc rbp,rbp2651 mov QWORD[((-8))+rbx],r142652 jmp NEAR $L$mulx4x_outer2653 2654 ALIGN 322655 $L$mulx4x_outer:2656 lea r10,[((16-256))+rbx]2657 pxor xmm4,xmm42658 DB 0x67,0x672659 pxor xmm5,xmm52660 movdqa xmm0,XMMWORD[((-128))+rdi]2661 movdqa xmm1,XMMWORD[((-112))+rdi]2662 movdqa xmm2,XMMWORD[((-96))+rdi]2663 pand xmm0,XMMWORD[256+r10]2664 movdqa xmm3,XMMWORD[((-80))+rdi]2665 pand xmm1,XMMWORD[272+r10]2666 por xmm4,xmm02667 pand xmm2,XMMWORD[288+r10]2668 por xmm5,xmm12669 pand xmm3,XMMWORD[304+r10]2670 por xmm4,xmm22671 por xmm5,xmm32672 movdqa xmm0,XMMWORD[((-64))+rdi]2673 movdqa xmm1,XMMWORD[((-48))+rdi]2674 movdqa xmm2,XMMWORD[((-32))+rdi]2675 pand xmm0,XMMWORD[320+r10]2676 movdqa xmm3,XMMWORD[((-16))+rdi]2677 pand xmm1,XMMWORD[336+r10]2678 por xmm4,xmm02679 pand xmm2,XMMWORD[352+r10]2680 por xmm5,xmm12681 pand xmm3,XMMWORD[368+r10]2682 por xmm4,xmm22683 por xmm5,xmm32684 movdqa xmm0,XMMWORD[rdi]2685 movdqa xmm1,XMMWORD[16+rdi]2686 movdqa xmm2,XMMWORD[32+rdi]2687 pand xmm0,XMMWORD[384+r10]2688 movdqa xmm3,XMMWORD[48+rdi]2689 pand xmm1,XMMWORD[400+r10]2690 por xmm4,xmm02691 pand xmm2,XMMWORD[416+r10]2692 por xmm5,xmm12693 pand xmm3,XMMWORD[432+r10]2694 por xmm4,xmm22695 por xmm5,xmm32696 movdqa xmm0,XMMWORD[64+rdi]2697 movdqa xmm1,XMMWORD[80+rdi]2698 movdqa xmm2,XMMWORD[96+rdi]2699 pand xmm0,XMMWORD[448+r10]2700 movdqa xmm3,XMMWORD[112+rdi]2701 pand xmm1,XMMWORD[464+r10]2702 por xmm4,xmm02703 pand xmm2,XMMWORD[480+r10]2704 por xmm5,xmm12705 pand xmm3,XMMWORD[496+r10]2706 por xmm4,xmm22707 por xmm5,xmm32708 por xmm4,xmm52709 pshufd xmm0,xmm4,0x4e2710 por xmm0,xmm42711 lea rdi,[256+rdi]2712 DB 102,72,15,126,1942713 2714 mov QWORD[rbx],rbp2715 lea rbx,[32+rax*1+rbx]2716 mulx r11,r8,QWORD[rsi]2717 xor rbp,rbp2718 mov r9,rdx2719 mulx r12,r14,QWORD[8+rsi]2720 adox r8,QWORD[((-32))+rbx]2721 adcx r11,r142722 mulx r13,r15,QWORD[16+rsi]2723 adox r11,QWORD[((-24))+rbx]2724 adcx r12,r152725 mulx r14,rdx,QWORD[24+rsi]2726 adox r12,QWORD[((-16))+rbx]2727 adcx r13,rdx2728 lea rcx,[rax*1+rcx]2729 lea rsi,[32+rsi]2730 adox r13,QWORD[((-8))+rbx]2731 adcx r14,rbp2732 adox r14,rbp2733 2734 mov r15,r82735 imul r8,QWORD[((32+8))+rsp]2736 2737 mov rdx,r82738 xor rbp,rbp2739 mov QWORD[((8+8))+rsp],rdi2740 2741 mulx r10,rax,QWORD[rcx]2742 adcx r15,rax2743 adox r10,r112744 mulx r11,rax,QWORD[8+rcx]2745 adcx r10,rax2746 adox r11,r122747 mulx r12,rax,QWORD[16+rcx]2748 adcx r11,rax2749 adox r12,r132750 mulx r15,rax,QWORD[24+rcx]2751 mov rdx,r92752 mov rdi,QWORD[((24+8))+rsp]2753 mov QWORD[((-32))+rbx],r102754 adcx r12,rax2755 mov QWORD[((-24))+rbx],r112756 adox r15,rbp2757 mov QWORD[((-16))+rbx],r122758 lea rcx,[32+rcx]2759 jmp NEAR $L$mulx4x_inner2760 2761 ALIGN 322762 $L$mulx4x_inner:2763 mulx rax,r10,QWORD[rsi]2764 adcx r15,rbp2765 adox r10,r142766 mulx r14,r11,QWORD[8+rsi]2767 adcx r10,QWORD[rbx]2768 adox r11,rax2769 mulx rax,r12,QWORD[16+rsi]2770 adcx r11,QWORD[8+rbx]2771 adox r12,r142772 mulx r14,r13,QWORD[24+rsi]2773 mov rdx,r82774 adcx r12,QWORD[16+rbx]2775 adox r13,rax2776 adcx r13,QWORD[24+rbx]2777 adox r14,rbp2778 lea rsi,[32+rsi]2779 lea rbx,[32+rbx]2780 adcx r14,rbp2781 2782 adox r10,r152783 mulx r15,rax,QWORD[rcx]2784 adcx r10,rax2785 adox r11,r152786 mulx r15,rax,QWORD[8+rcx]2787 adcx r11,rax2788 adox r12,r152789 mulx r15,rax,QWORD[16+rcx]2790 mov QWORD[((-40))+rbx],r102791 adcx r12,rax2792 adox r13,r152793 mov QWORD[((-32))+rbx],r112794 mulx r15,rax,QWORD[24+rcx]2795 mov rdx,r92796 lea rcx,[32+rcx]2797 mov QWORD[((-24))+rbx],r122798 adcx r13,rax2799 adox r15,rbp2800 mov QWORD[((-16))+rbx],r132801 2802 dec rdi2803 jnz NEAR $L$mulx4x_inner2804 2805 mov rax,QWORD[((0+8))+rsp]2806 adc r15,rbp2807 sub rdi,QWORD[rbx]2808 mov rdi,QWORD[((8+8))+rsp]2809 mov r10,QWORD[((16+8))+rsp]2810 adc r14,r152811 lea rsi,[rax*1+rsi]2812 adc rbp,rbp2813 mov QWORD[((-8))+rbx],r142814 2815 cmp rdi,r102816 jb NEAR $L$mulx4x_outer2817 2818 mov r10,QWORD[((-8))+rcx]2819 mov r8,rbp2820 mov r12,QWORD[rax*1+rcx]2821 lea rbp,[rax*1+rcx]2822 mov rcx,rax2823 lea rdi,[rax*1+rbx]2824 xor eax,eax2825 xor r15,r152826 sub r10,r142827 adc r15,r152828 or r8,r152829 sar rcx,3+22830 sub rax,r82831 mov rdx,QWORD[((56+8))+rsp]2832 dec r122833 mov r13,QWORD[8+rbp]2834 xor r8,r82835 mov r14,QWORD[16+rbp]2836 mov r15,QWORD[24+rbp]2837 jmp NEAR $L$sqrx4x_sub_entry2838 2839 2840 2841 ALIGN 322842 bn_powerx5:2843 mov QWORD[8+rsp],rdi ;WIN64 prologue2844 mov QWORD[16+rsp],rsi2845 mov rax,rsp2846 $L$SEH_begin_bn_powerx5:2847 mov rdi,rcx2848 mov rsi,rdx2849 mov rdx,r82850 mov rcx,r92851 mov r8,QWORD[40+rsp]2852 mov r9,QWORD[48+rsp]2853 2854 2855 2856 mov rax,rsp2857 2858 $L$powerx5_enter:2859 push rbx2860 2861 push rbp2862 2863 push r122864 2865 push r132866 2867 push r142868 2869 push r152870 2871 $L$powerx5_prologue:2872 2873 shl r9d,32874 lea r10,[r9*2+r9]2875 neg r92876 mov r8,QWORD[r8]2877 2878 2879 2880 2881 2882 2883 2884 2885 lea r11,[((-320))+r9*2+rsp]2886 mov rbp,rsp2887 sub r11,rdi2888 and r11,40952889 cmp r10,r112890 jb NEAR $L$pwrx_sp_alt2891 sub rbp,r112892 lea rbp,[((-320))+r9*2+rbp]2893 jmp NEAR $L$pwrx_sp_done2894 2895 ALIGN 322896 $L$pwrx_sp_alt:2897 lea r10,[((4096-320))+r9*2]2898 lea rbp,[((-320))+r9*2+rbp]2899 sub r11,r102900 mov r10,02901 cmovc r11,r102902 sub rbp,r112903 $L$pwrx_sp_done:2904 and rbp,-642905 mov r11,rsp2906 sub r11,rbp2907 and r11,-40962908 lea rsp,[rbp*1+r11]2909 mov r10,QWORD[rsp]2910 cmp rsp,rbp2911 ja NEAR $L$pwrx_page_walk2912 jmp NEAR $L$pwrx_page_walk_done2913 2914 $L$pwrx_page_walk:2915 lea rsp,[((-4096))+rsp]2916 mov r10,QWORD[rsp]2917 cmp rsp,rbp2918 ja NEAR $L$pwrx_page_walk2919 $L$pwrx_page_walk_done:2920 2921 mov r10,r92922 neg r92923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 pxor xmm0,xmm02936 DB 102,72,15,110,2072937 DB 102,72,15,110,2092938 DB 102,73,15,110,2182939 DB 102,72,15,110,2262940 mov QWORD[32+rsp],r82941 mov QWORD[40+rsp],rax2942 2943 $L$powerx5_body:2944 2945 call __bn_sqrx8x_internal2946 call __bn_postx4x_internal2947 call __bn_sqrx8x_internal2948 call __bn_postx4x_internal2949 call __bn_sqrx8x_internal2950 call __bn_postx4x_internal2951 call __bn_sqrx8x_internal2952 call __bn_postx4x_internal2953 call __bn_sqrx8x_internal2954 call __bn_postx4x_internal2955 2956 mov r9,r102957 mov rdi,rsi2958 DB 102,72,15,126,2092959 DB 102,72,15,126,2262960 mov rax,QWORD[40+rsp]2961 2962 call mulx4x_internal2963 2964 mov rsi,QWORD[40+rsp]2965 2966 mov rax,12967 2968 mov r15,QWORD[((-48))+rsi]2969 2970 mov r14,QWORD[((-40))+rsi]2971 2972 mov r13,QWORD[((-32))+rsi]2973 2974 mov r12,QWORD[((-24))+rsi]2975 2976 mov rbp,QWORD[((-16))+rsi]2977 2978 mov rbx,QWORD[((-8))+rsi]2979 2980 lea rsp,[rsi]2981 2982 $L$powerx5_epilogue:2983 mov rdi,QWORD[8+rsp] ;WIN64 epilogue2984 mov rsi,QWORD[16+rsp]2985 DB 0F3h,0C3h ;repret2986 2987 $L$SEH_end_bn_powerx5:2988 2989 global bn_sqrx8x_internal2990 2991 2992 ALIGN 322993 bn_sqrx8x_internal:2994 __bn_sqrx8x_internal:2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 lea rdi,[((48+8))+rsp]3037 lea rbp,[r9*1+rsi]3038 mov QWORD[((0+8))+rsp],r93039 mov QWORD[((8+8))+rsp],rbp3040 jmp NEAR $L$sqr8x_zero_start3041 3042 ALIGN 323043 DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x003044 $L$sqrx8x_zero:3045 DB 0x3e3046 movdqa XMMWORD[rdi],xmm03047 movdqa XMMWORD[16+rdi],xmm03048 movdqa XMMWORD[32+rdi],xmm03049 movdqa XMMWORD[48+rdi],xmm03050 $L$sqr8x_zero_start:3051 movdqa XMMWORD[64+rdi],xmm03052 movdqa XMMWORD[80+rdi],xmm03053 movdqa XMMWORD[96+rdi],xmm03054 movdqa XMMWORD[112+rdi],xmm03055 lea rdi,[128+rdi]3056 sub r9,643057 jnz NEAR $L$sqrx8x_zero3058 3059 mov rdx,QWORD[rsi]3060 3061 xor r10,r103062 xor r11,r113063 xor r12,r123064 xor r13,r133065 xor r14,r143066 xor r15,r153067 lea rdi,[((48+8))+rsp]3068 xor rbp,rbp3069 jmp NEAR $L$sqrx8x_outer_loop3070 3071 ALIGN 323072 $L$sqrx8x_outer_loop:3073 mulx rax,r8,QWORD[8+rsi]3074 adcx r8,r93075 adox r10,rax3076 mulx rax,r9,QWORD[16+rsi]3077 adcx r9,r103078 adox r11,rax3079 DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x003080 adcx r10,r113081 adox r12,rax3082 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x003083 adcx r11,r123084 adox r13,rax3085 mulx rax,r12,QWORD[40+rsi]3086 adcx r12,r133087 adox r14,rax3088 mulx rax,r13,QWORD[48+rsi]3089 adcx r13,r143090 adox rax,r153091 mulx r15,r14,QWORD[56+rsi]3092 mov rdx,QWORD[8+rsi]3093 adcx r14,rax3094 adox r15,rbp3095 adc r15,QWORD[64+rdi]3096 mov QWORD[8+rdi],r83097 mov QWORD[16+rdi],r93098 sbb rcx,rcx3099 xor rbp,rbp3100 3101 3102 mulx rbx,r8,QWORD[16+rsi]3103 mulx rax,r9,QWORD[24+rsi]3104 adcx r8,r103105 adox r9,rbx3106 mulx rbx,r10,QWORD[32+rsi]3107 adcx r9,r113108 adox r10,rax3109 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x003110 adcx r10,r123111 adox r11,rbx3112 DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x003113 adcx r11,r133114 adox r12,r143115 DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x003116 mov rdx,QWORD[16+rsi]3117 adcx r12,rax3118 adox r13,rbx3119 adcx r13,r153120 adox r14,rbp3121 adcx r14,rbp3122 3123 mov QWORD[24+rdi],r83124 mov QWORD[32+rdi],r93125 3126 mulx rbx,r8,QWORD[24+rsi]3127 mulx rax,r9,QWORD[32+rsi]3128 adcx r8,r103129 adox r9,rbx3130 mulx rbx,r10,QWORD[40+rsi]3131 adcx r9,r113132 adox r10,rax3133 DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x003134 adcx r10,r123135 adox r11,r133136 DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x003137 DB 0x3e3138 mov rdx,QWORD[24+rsi]3139 adcx r11,rbx3140 adox r12,rax3141 adcx r12,r143142 mov QWORD[40+rdi],r83143 mov QWORD[48+rdi],r93144 mulx rax,r8,QWORD[32+rsi]3145 adox r13,rbp3146 adcx r13,rbp3147 3148 mulx rbx,r9,QWORD[40+rsi]3149 adcx r8,r103150 adox r9,rax3151 mulx rax,r10,QWORD[48+rsi]3152 adcx r9,r113153 adox r10,r123154 mulx r12,r11,QWORD[56+rsi]3155 mov rdx,QWORD[32+rsi]3156 mov r14,QWORD[40+rsi]3157 adcx r10,rbx3158 adox r11,rax3159 mov r15,QWORD[48+rsi]3160 adcx r11,r133161 adox r12,rbp3162 adcx r12,rbp3163 3164 mov QWORD[56+rdi],r83165 mov QWORD[64+rdi],r93166 3167 mulx rax,r9,r143168 mov r8,QWORD[56+rsi]3169 adcx r9,r103170 mulx rbx,r10,r153171 adox r10,rax3172 adcx r10,r113173 mulx rax,r11,r83174 mov rdx,r143175 adox r11,rbx3176 adcx r11,r123177 3178 adcx rax,rbp3179 3180 mulx rbx,r14,r153181 mulx r13,r12,r83182 mov rdx,r153183 lea rsi,[64+rsi]3184 adcx r11,r143185 adox r12,rbx3186 adcx r12,rax3187 adox r13,rbp3188 3189 DB 0x67,0x673190 mulx r14,r8,r83191 adcx r13,r83192 adcx r14,rbp3193 3194 cmp rsi,QWORD[((8+8))+rsp]3195 je NEAR $L$sqrx8x_outer_break3196 3197 neg rcx3198 mov rcx,-83199 mov r15,rbp3200 mov r8,QWORD[64+rdi]3201 adcx r9,QWORD[72+rdi]3202 adcx r10,QWORD[80+rdi]3203 adcx r11,QWORD[88+rdi]3204 adc r12,QWORD[96+rdi]3205 adc r13,QWORD[104+rdi]3206 adc r14,QWORD[112+rdi]3207 adc r15,QWORD[120+rdi]3208 lea rbp,[rsi]3209 lea rdi,[128+rdi]3210 sbb rax,rax3211 3212 mov rdx,QWORD[((-64))+rsi]3213 mov QWORD[((16+8))+rsp],rax3214 mov QWORD[((24+8))+rsp],rdi3215 3216 3217 xor eax,eax3218 jmp NEAR $L$sqrx8x_loop3219 3220 ALIGN 323221 $L$sqrx8x_loop:3222 mov rbx,r83223 mulx r8,rax,QWORD[rbp]3224 adcx rbx,rax3225 adox r8,r93226 3227 mulx r9,rax,QWORD[8+rbp]3228 adcx r8,rax3229 adox r9,r103230 3231 mulx r10,rax,QWORD[16+rbp]3232 adcx r9,rax3233 adox r10,r113234 3235 mulx r11,rax,QWORD[24+rbp]3236 adcx r10,rax3237 adox r11,r123238 3239 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x003240 adcx r11,rax3241 adox r12,r133242 3243 mulx r13,rax,QWORD[40+rbp]3244 adcx r12,rax3245 adox r13,r143246 3247 mulx r14,rax,QWORD[48+rbp]3248 mov QWORD[rcx*8+rdi],rbx3249 mov ebx,03250 adcx r13,rax3251 adox r14,r153252 3253 DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x003254 mov rdx,QWORD[8+rcx*8+rsi]3255 adcx r14,rax3256 adox r15,rbx3257 adcx r15,rbx3258 3259 DB 0x673260 inc rcx3261 jnz NEAR $L$sqrx8x_loop3262 3263 lea rbp,[64+rbp]3264 mov rcx,-83265 cmp rbp,QWORD[((8+8))+rsp]3266 je NEAR $L$sqrx8x_break3267 3268 sub rbx,QWORD[((16+8))+rsp]3269 DB 0x663270 mov rdx,QWORD[((-64))+rsi]3271 adcx r8,QWORD[rdi]3272 adcx r9,QWORD[8+rdi]3273 adc r10,QWORD[16+rdi]3274 adc r11,QWORD[24+rdi]3275 adc r12,QWORD[32+rdi]3276 adc r13,QWORD[40+rdi]3277 adc r14,QWORD[48+rdi]3278 adc r15,QWORD[56+rdi]3279 lea rdi,[64+rdi]3280 DB 0x673281 sbb rax,rax3282 xor ebx,ebx3283 mov QWORD[((16+8))+rsp],rax3284 jmp NEAR $L$sqrx8x_loop3285 3286 ALIGN 323287 $L$sqrx8x_break:3288 xor rbp,rbp3289 sub rbx,QWORD[((16+8))+rsp]3290 adcx r8,rbp3291 mov rcx,QWORD[((24+8))+rsp]3292 adcx r9,rbp3293 mov rdx,QWORD[rsi]3294 adc r10,03295 mov QWORD[rdi],r83296 adc r11,03297 adc r12,03298 adc r13,03299 adc r14,03300 adc r15,03301 cmp rdi,rcx3302 je NEAR $L$sqrx8x_outer_loop3303 3304 mov QWORD[8+rdi],r93305 mov r9,QWORD[8+rcx]3306 mov QWORD[16+rdi],r103307 mov r10,QWORD[16+rcx]3308 mov QWORD[24+rdi],r113309 mov r11,QWORD[24+rcx]3310 mov QWORD[32+rdi],r123311 mov r12,QWORD[32+rcx]3312 mov QWORD[40+rdi],r133313 mov r13,QWORD[40+rcx]3314 mov QWORD[48+rdi],r143315 mov r14,QWORD[48+rcx]3316 mov QWORD[56+rdi],r153317 mov r15,QWORD[56+rcx]3318 mov rdi,rcx3319 jmp NEAR $L$sqrx8x_outer_loop3320 3321 ALIGN 323322 $L$sqrx8x_outer_break:3323 mov QWORD[72+rdi],r93324 DB 102,72,15,126,2173325 mov QWORD[80+rdi],r103326 mov QWORD[88+rdi],r113327 mov QWORD[96+rdi],r123328 mov QWORD[104+rdi],r133329 mov QWORD[112+rdi],r143330 lea rdi,[((48+8))+rsp]3331 mov rdx,QWORD[rcx*1+rsi]3332 3333 mov r11,QWORD[8+rdi]3334 xor r10,r103335 mov r9,QWORD[((0+8))+rsp]3336 adox r11,r113337 mov r12,QWORD[16+rdi]3338 mov r13,QWORD[24+rdi]3339 3340 3341 ALIGN 323342 $L$sqrx4x_shift_n_add:3343 mulx rbx,rax,rdx3344 adox r12,r123345 adcx rax,r103346 DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x003347 DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x003348 adox r13,r133349 adcx rbx,r113350 mov r11,QWORD[40+rdi]3351 mov QWORD[rdi],rax3352 mov QWORD[8+rdi],rbx3353 3354 mulx rbx,rax,rdx3355 adox r10,r103356 adcx rax,r123357 mov rdx,QWORD[16+rcx*1+rsi]3358 mov r12,QWORD[48+rdi]3359 adox r11,r113360 adcx rbx,r133361 mov r13,QWORD[56+rdi]3362 mov QWORD[16+rdi],rax3363 mov QWORD[24+rdi],rbx3364 3365 mulx rbx,rax,rdx3366 adox r12,r123367 adcx rax,r103368 mov rdx,QWORD[24+rcx*1+rsi]3369 lea rcx,[32+rcx]3370 mov r10,QWORD[64+rdi]3371 adox r13,r133372 adcx rbx,r113373 mov r11,QWORD[72+rdi]3374 mov QWORD[32+rdi],rax3375 mov QWORD[40+rdi],rbx3376 3377 mulx rbx,rax,rdx3378 adox r10,r103379 adcx rax,r123380 jrcxz $L$sqrx4x_shift_n_add_break3381 DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x003382 adox r11,r113383 adcx rbx,r133384 mov r12,QWORD[80+rdi]3385 mov r13,QWORD[88+rdi]3386 mov QWORD[48+rdi],rax3387 mov QWORD[56+rdi],rbx3388 lea rdi,[64+rdi]3389 nop3390 jmp NEAR $L$sqrx4x_shift_n_add3391 3392 ALIGN 323393 $L$sqrx4x_shift_n_add_break:3394 adcx rbx,r133395 mov QWORD[48+rdi],rax3396 mov QWORD[56+rdi],rbx3397 lea rdi,[64+rdi]3398 DB 102,72,15,126,2133399 __bn_sqrx8x_reduction:3400 xor eax,eax3401 mov rbx,QWORD[((32+8))+rsp]3402 mov rdx,QWORD[((48+8))+rsp]3403 lea rcx,[((-64))+r9*1+rbp]3404 3405 mov QWORD[((0+8))+rsp],rcx3406 mov QWORD[((8+8))+rsp],rdi3407 3408 lea rdi,[((48+8))+rsp]3409 jmp NEAR $L$sqrx8x_reduction_loop3410 3411 ALIGN 323412 $L$sqrx8x_reduction_loop:3413 mov r9,QWORD[8+rdi]3414 mov r10,QWORD[16+rdi]3415 mov r11,QWORD[24+rdi]3416 mov r12,QWORD[32+rdi]3417 mov r8,rdx3418 imul rdx,rbx3419 mov r13,QWORD[40+rdi]3420 mov r14,QWORD[48+rdi]3421 mov r15,QWORD[56+rdi]3422 mov QWORD[((24+8))+rsp],rax3423 3424 lea rdi,[64+rdi]3425 xor rsi,rsi3426 mov rcx,-83427 jmp NEAR $L$sqrx8x_reduce3428 3429 ALIGN 323430 $L$sqrx8x_reduce:3431 mov rbx,r83432 mulx r8,rax,QWORD[rbp]3433 adcx rax,rbx3434 adox r8,r93435 3436 mulx r9,rbx,QWORD[8+rbp]3437 adcx r8,rbx3438 adox r9,r103439 3440 mulx r10,rbx,QWORD[16+rbp]3441 adcx r9,rbx3442 adox r10,r113443 3444 mulx r11,rbx,QWORD[24+rbp]3445 adcx r10,rbx3446 adox r11,r123447 3448 DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x003449 mov rax,rdx3450 mov rdx,r83451 adcx r11,rbx3452 adox r12,r133453 3454 mulx rdx,rbx,QWORD[((32+8))+rsp]3455 mov rdx,rax3456 mov QWORD[((64+48+8))+rcx*8+rsp],rax3457 3458 mulx r13,rax,QWORD[40+rbp]3459 adcx r12,rax3460 adox r13,r143461 3462 mulx r14,rax,QWORD[48+rbp]3463 adcx r13,rax3464 adox r14,r153465 3466 mulx r15,rax,QWORD[56+rbp]3467 mov rdx,rbx3468 adcx r14,rax3469 adox r15,rsi3470 adcx r15,rsi3471 3472 DB 0x67,0x67,0x673473 inc rcx3474 jnz NEAR $L$sqrx8x_reduce3475 3476 mov rax,rsi3477 cmp rbp,QWORD[((0+8))+rsp]3478 jae NEAR $L$sqrx8x_no_tail3479 3480 mov rdx,QWORD[((48+8))+rsp]3481 add r8,QWORD[rdi]3482 lea rbp,[64+rbp]3483 mov rcx,-83484 adcx r9,QWORD[8+rdi]3485 adcx r10,QWORD[16+rdi]3486 adc r11,QWORD[24+rdi]3487 adc r12,QWORD[32+rdi]3488 adc r13,QWORD[40+rdi]3489 adc r14,QWORD[48+rdi]3490 adc r15,QWORD[56+rdi]3491 lea rdi,[64+rdi]3492 sbb rax,rax3493 3494 xor rsi,rsi3495 mov QWORD[((16+8))+rsp],rax3496 jmp NEAR $L$sqrx8x_tail3497 3498 ALIGN 323499 $L$sqrx8x_tail:3500 mov rbx,r83501 mulx r8,rax,QWORD[rbp]3502 adcx rbx,rax3503 adox r8,r93504 3505 mulx r9,rax,QWORD[8+rbp]3506 adcx r8,rax3507 adox r9,r103508 3509 mulx r10,rax,QWORD[16+rbp]3510 adcx r9,rax3511 adox r10,r113512 3513 mulx r11,rax,QWORD[24+rbp]3514 adcx r10,rax3515 adox r11,r123516 3517 DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x003518 adcx r11,rax3519 adox r12,r133520 3521 mulx r13,rax,QWORD[40+rbp]3522 adcx r12,rax3523 adox r13,r143524 3525 mulx r14,rax,QWORD[48+rbp]3526 adcx r13,rax3527 adox r14,r153528 3529 mulx r15,rax,QWORD[56+rbp]3530 mov rdx,QWORD[((72+48+8))+rcx*8+rsp]3531 adcx r14,rax3532 adox r15,rsi3533 mov QWORD[rcx*8+rdi],rbx3534 mov rbx,r83535 adcx r15,rsi3536 3537 inc rcx3538 jnz NEAR $L$sqrx8x_tail3539 3540 cmp rbp,QWORD[((0+8))+rsp]3541 jae NEAR $L$sqrx8x_tail_done3542 3543 sub rsi,QWORD[((16+8))+rsp]3544 mov rdx,QWORD[((48+8))+rsp]3545 lea rbp,[64+rbp]3546 adc r8,QWORD[rdi]3547 adc r9,QWORD[8+rdi]3548 adc r10,QWORD[16+rdi]3549 adc r11,QWORD[24+rdi]3550 adc r12,QWORD[32+rdi]3551 adc r13,QWORD[40+rdi]3552 adc r14,QWORD[48+rdi]3553 adc r15,QWORD[56+rdi]3554 lea rdi,[64+rdi]3555 sbb rax,rax3556 sub rcx,83557 3558 xor rsi,rsi3559 mov QWORD[((16+8))+rsp],rax3560 jmp NEAR $L$sqrx8x_tail3561 3562 ALIGN 323563 $L$sqrx8x_tail_done:3564 xor rax,rax3565 add r8,QWORD[((24+8))+rsp]3566 adc r9,03567 adc r10,03568 adc r11,03569 adc r12,03570 adc r13,03571 adc r14,03572 adc r15,03573 adc rax,03574 3575 sub rsi,QWORD[((16+8))+rsp]3576 $L$sqrx8x_no_tail:3577 adc r8,QWORD[rdi]3578 DB 102,72,15,126,2173579 adc r9,QWORD[8+rdi]3580 mov rsi,QWORD[56+rbp]3581 DB 102,72,15,126,2133582 adc r10,QWORD[16+rdi]3583 adc r11,QWORD[24+rdi]3584 adc r12,QWORD[32+rdi]3585 adc r13,QWORD[40+rdi]3586 adc r14,QWORD[48+rdi]3587 adc r15,QWORD[56+rdi]3588 adc rax,03589 3590 mov rbx,QWORD[((32+8))+rsp]3591 mov rdx,QWORD[64+rcx*1+rdi]3592 3593 mov QWORD[rdi],r83594 lea r8,[64+rdi]3595 mov QWORD[8+rdi],r93596 mov QWORD[16+rdi],r103597 mov QWORD[24+rdi],r113598 mov QWORD[32+rdi],r123599 mov QWORD[40+rdi],r133600 mov QWORD[48+rdi],r143601 mov QWORD[56+rdi],r153602 3603 lea rdi,[64+rcx*1+rdi]3604 cmp r8,QWORD[((8+8))+rsp]3605 jb NEAR $L$sqrx8x_reduction_loop3606 DB 0F3h,0C3h ;repret3607 3608 3609 ALIGN 323610 __bn_postx4x_internal:3611 3612 mov r12,QWORD[rbp]3613 mov r10,rcx3614 mov r9,rcx3615 neg rax3616 sar rcx,3+23617 3618 DB 102,72,15,126,2023619 DB 102,72,15,126,2063620 dec r123621 mov r13,QWORD[8+rbp]3622 xor r8,r83623 mov r14,QWORD[16+rbp]3624 mov r15,QWORD[24+rbp]3625 jmp NEAR $L$sqrx4x_sub_entry3626 3627 ALIGN 163628 $L$sqrx4x_sub:3629 mov r12,QWORD[rbp]3630 mov r13,QWORD[8+rbp]3631 mov r14,QWORD[16+rbp]3632 mov r15,QWORD[24+rbp]3633 $L$sqrx4x_sub_entry:3634 andn r12,r12,rax3635 lea rbp,[32+rbp]3636 andn r13,r13,rax3637 andn r14,r14,rax3638 andn r15,r15,rax3639 3640 neg r83641 adc r12,QWORD[rdi]3642 adc r13,QWORD[8+rdi]3643 adc r14,QWORD[16+rdi]3644 adc r15,QWORD[24+rdi]3645 mov QWORD[rdx],r123646 lea rdi,[32+rdi]3647 mov QWORD[8+rdx],r133648 sbb r8,r83649 mov QWORD[16+rdx],r143650 mov QWORD[24+rdx],r153651 lea rdx,[32+rdx]3652 3653 inc rcx3654 jnz NEAR $L$sqrx4x_sub3655 3656 neg r93657 3658 DB 0F3h,0C3h ;repret3659 3660 3661 2268 global bn_get_bits5 3662 2269 … … 3995 2602 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 3996 2603 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 3997 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase3998 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase3999 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase4000 4001 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase4002 DD $L$SEH_end_bn_powerx5 wrt ..imagebase4003 DD $L$SEH_info_bn_powerx5 wrt ..imagebase4004 2604 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 4005 2605 DD $L$SEH_end_bn_gather5 wrt ..imagebase … … 4028 2628 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 4029 2629 ALIGN 8 4030 $L$SEH_info_bn_mulx4x_mont_gather5:4031 DB 9,0,0,04032 DD mul_handler wrt ..imagebase4033 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase4034 ALIGN 84035 $L$SEH_info_bn_powerx5:4036 DB 9,0,0,04037 DD mul_handler wrt ..imagebase4038 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase4039 ALIGN 84040 2630 $L$SEH_info_bn_gather5: 4041 2631 DB 0x01,0x0b,0x03,0x0a
Note:
See TracChangeset
for help on using the changeset viewer.