VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/x86_64-mont5.S@ 95219

Last change on this file since 95219 was 95219, checked in by vboxsync, 3 years ago

libs/openssl: Switched to v3.0.3, bugref:10128

File size: 62.7 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10global bn_mul_mont_gather5
11
12ALIGN 64
13bn_mul_mont_gather5:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17$L$SEH_begin_bn_mul_mont_gather5:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
24
25
26
27 mov r9d,r9d
28 mov rax,rsp
29
30 test r9d,7
31 jnz NEAR $L$mul_enter
32 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
33 jmp NEAR $L$mul4x_enter
34
35ALIGN 16
36$L$mul_enter:
37 movd xmm5,DWORD[56+rsp]
38 push rbx
39
40 push rbp
41
42 push r12
43
44 push r13
45
46 push r14
47
48 push r15
49
50
51 neg r9
52 mov r11,rsp
53 lea r10,[((-280))+r9*8+rsp]
54 neg r9
55 and r10,-1024
56
57
58
59
60
61
62
63
64
65 sub r11,r10
66 and r11,-4096
67 lea rsp,[r11*1+r10]
68 mov r11,QWORD[rsp]
69 cmp rsp,r10
70 ja NEAR $L$mul_page_walk
71 jmp NEAR $L$mul_page_walk_done
72
73$L$mul_page_walk:
74 lea rsp,[((-4096))+rsp]
75 mov r11,QWORD[rsp]
76 cmp rsp,r10
77 ja NEAR $L$mul_page_walk
78$L$mul_page_walk_done:
79
80 lea r10,[$L$inc]
81 mov QWORD[8+r9*8+rsp],rax
82
83$L$mul_body:
84
85 lea r12,[128+rdx]
86 movdqa xmm0,XMMWORD[r10]
87 movdqa xmm1,XMMWORD[16+r10]
88 lea r10,[((24-112))+r9*8+rsp]
89 and r10,-16
90
91 pshufd xmm5,xmm5,0
92 movdqa xmm4,xmm1
93 movdqa xmm2,xmm1
94 paddd xmm1,xmm0
95 pcmpeqd xmm0,xmm5
96DB 0x67
97 movdqa xmm3,xmm4
98 paddd xmm2,xmm1
99 pcmpeqd xmm1,xmm5
100 movdqa XMMWORD[112+r10],xmm0
101 movdqa xmm0,xmm4
102
103 paddd xmm3,xmm2
104 pcmpeqd xmm2,xmm5
105 movdqa XMMWORD[128+r10],xmm1
106 movdqa xmm1,xmm4
107
108 paddd xmm0,xmm3
109 pcmpeqd xmm3,xmm5
110 movdqa XMMWORD[144+r10],xmm2
111 movdqa xmm2,xmm4
112
113 paddd xmm1,xmm0
114 pcmpeqd xmm0,xmm5
115 movdqa XMMWORD[160+r10],xmm3
116 movdqa xmm3,xmm4
117 paddd xmm2,xmm1
118 pcmpeqd xmm1,xmm5
119 movdqa XMMWORD[176+r10],xmm0
120 movdqa xmm0,xmm4
121
122 paddd xmm3,xmm2
123 pcmpeqd xmm2,xmm5
124 movdqa XMMWORD[192+r10],xmm1
125 movdqa xmm1,xmm4
126
127 paddd xmm0,xmm3
128 pcmpeqd xmm3,xmm5
129 movdqa XMMWORD[208+r10],xmm2
130 movdqa xmm2,xmm4
131
132 paddd xmm1,xmm0
133 pcmpeqd xmm0,xmm5
134 movdqa XMMWORD[224+r10],xmm3
135 movdqa xmm3,xmm4
136 paddd xmm2,xmm1
137 pcmpeqd xmm1,xmm5
138 movdqa XMMWORD[240+r10],xmm0
139 movdqa xmm0,xmm4
140
141 paddd xmm3,xmm2
142 pcmpeqd xmm2,xmm5
143 movdqa XMMWORD[256+r10],xmm1
144 movdqa xmm1,xmm4
145
146 paddd xmm0,xmm3
147 pcmpeqd xmm3,xmm5
148 movdqa XMMWORD[272+r10],xmm2
149 movdqa xmm2,xmm4
150
151 paddd xmm1,xmm0
152 pcmpeqd xmm0,xmm5
153 movdqa XMMWORD[288+r10],xmm3
154 movdqa xmm3,xmm4
155 paddd xmm2,xmm1
156 pcmpeqd xmm1,xmm5
157 movdqa XMMWORD[304+r10],xmm0
158
159 paddd xmm3,xmm2
160DB 0x67
161 pcmpeqd xmm2,xmm5
162 movdqa XMMWORD[320+r10],xmm1
163
164 pcmpeqd xmm3,xmm5
165 movdqa XMMWORD[336+r10],xmm2
166 pand xmm0,XMMWORD[64+r12]
167
168 pand xmm1,XMMWORD[80+r12]
169 pand xmm2,XMMWORD[96+r12]
170 movdqa XMMWORD[352+r10],xmm3
171 pand xmm3,XMMWORD[112+r12]
172 por xmm0,xmm2
173 por xmm1,xmm3
174 movdqa xmm4,XMMWORD[((-128))+r12]
175 movdqa xmm5,XMMWORD[((-112))+r12]
176 movdqa xmm2,XMMWORD[((-96))+r12]
177 pand xmm4,XMMWORD[112+r10]
178 movdqa xmm3,XMMWORD[((-80))+r12]
179 pand xmm5,XMMWORD[128+r10]
180 por xmm0,xmm4
181 pand xmm2,XMMWORD[144+r10]
182 por xmm1,xmm5
183 pand xmm3,XMMWORD[160+r10]
184 por xmm0,xmm2
185 por xmm1,xmm3
186 movdqa xmm4,XMMWORD[((-64))+r12]
187 movdqa xmm5,XMMWORD[((-48))+r12]
188 movdqa xmm2,XMMWORD[((-32))+r12]
189 pand xmm4,XMMWORD[176+r10]
190 movdqa xmm3,XMMWORD[((-16))+r12]
191 pand xmm5,XMMWORD[192+r10]
192 por xmm0,xmm4
193 pand xmm2,XMMWORD[208+r10]
194 por xmm1,xmm5
195 pand xmm3,XMMWORD[224+r10]
196 por xmm0,xmm2
197 por xmm1,xmm3
198 movdqa xmm4,XMMWORD[r12]
199 movdqa xmm5,XMMWORD[16+r12]
200 movdqa xmm2,XMMWORD[32+r12]
201 pand xmm4,XMMWORD[240+r10]
202 movdqa xmm3,XMMWORD[48+r12]
203 pand xmm5,XMMWORD[256+r10]
204 por xmm0,xmm4
205 pand xmm2,XMMWORD[272+r10]
206 por xmm1,xmm5
207 pand xmm3,XMMWORD[288+r10]
208 por xmm0,xmm2
209 por xmm1,xmm3
210 por xmm0,xmm1
211 pshufd xmm1,xmm0,0x4e
212 por xmm0,xmm1
213 lea r12,[256+r12]
214DB 102,72,15,126,195
215
216 mov r8,QWORD[r8]
217 mov rax,QWORD[rsi]
218
219 xor r14,r14
220 xor r15,r15
221
222 mov rbp,r8
223 mul rbx
224 mov r10,rax
225 mov rax,QWORD[rcx]
226
227 imul rbp,r10
228 mov r11,rdx
229
230 mul rbp
231 add r10,rax
232 mov rax,QWORD[8+rsi]
233 adc rdx,0
234 mov r13,rdx
235
236 lea r15,[1+r15]
237 jmp NEAR $L$1st_enter
238
239ALIGN 16
240$L$1st:
241 add r13,rax
242 mov rax,QWORD[r15*8+rsi]
243 adc rdx,0
244 add r13,r11
245 mov r11,r10
246 adc rdx,0
247 mov QWORD[((-16))+r15*8+rsp],r13
248 mov r13,rdx
249
250$L$1st_enter:
251 mul rbx
252 add r11,rax
253 mov rax,QWORD[r15*8+rcx]
254 adc rdx,0
255 lea r15,[1+r15]
256 mov r10,rdx
257
258 mul rbp
259 cmp r15,r9
260 jne NEAR $L$1st
261
262
263 add r13,rax
264 adc rdx,0
265 add r13,r11
266 adc rdx,0
267 mov QWORD[((-16))+r9*8+rsp],r13
268 mov r13,rdx
269 mov r11,r10
270
271 xor rdx,rdx
272 add r13,r11
273 adc rdx,0
274 mov QWORD[((-8))+r9*8+rsp],r13
275 mov QWORD[r9*8+rsp],rdx
276
277 lea r14,[1+r14]
278 jmp NEAR $L$outer
279ALIGN 16
280$L$outer:
281 lea rdx,[((24+128))+r9*8+rsp]
282 and rdx,-16
283 pxor xmm4,xmm4
284 pxor xmm5,xmm5
285 movdqa xmm0,XMMWORD[((-128))+r12]
286 movdqa xmm1,XMMWORD[((-112))+r12]
287 movdqa xmm2,XMMWORD[((-96))+r12]
288 movdqa xmm3,XMMWORD[((-80))+r12]
289 pand xmm0,XMMWORD[((-128))+rdx]
290 pand xmm1,XMMWORD[((-112))+rdx]
291 por xmm4,xmm0
292 pand xmm2,XMMWORD[((-96))+rdx]
293 por xmm5,xmm1
294 pand xmm3,XMMWORD[((-80))+rdx]
295 por xmm4,xmm2
296 por xmm5,xmm3
297 movdqa xmm0,XMMWORD[((-64))+r12]
298 movdqa xmm1,XMMWORD[((-48))+r12]
299 movdqa xmm2,XMMWORD[((-32))+r12]
300 movdqa xmm3,XMMWORD[((-16))+r12]
301 pand xmm0,XMMWORD[((-64))+rdx]
302 pand xmm1,XMMWORD[((-48))+rdx]
303 por xmm4,xmm0
304 pand xmm2,XMMWORD[((-32))+rdx]
305 por xmm5,xmm1
306 pand xmm3,XMMWORD[((-16))+rdx]
307 por xmm4,xmm2
308 por xmm5,xmm3
309 movdqa xmm0,XMMWORD[r12]
310 movdqa xmm1,XMMWORD[16+r12]
311 movdqa xmm2,XMMWORD[32+r12]
312 movdqa xmm3,XMMWORD[48+r12]
313 pand xmm0,XMMWORD[rdx]
314 pand xmm1,XMMWORD[16+rdx]
315 por xmm4,xmm0
316 pand xmm2,XMMWORD[32+rdx]
317 por xmm5,xmm1
318 pand xmm3,XMMWORD[48+rdx]
319 por xmm4,xmm2
320 por xmm5,xmm3
321 movdqa xmm0,XMMWORD[64+r12]
322 movdqa xmm1,XMMWORD[80+r12]
323 movdqa xmm2,XMMWORD[96+r12]
324 movdqa xmm3,XMMWORD[112+r12]
325 pand xmm0,XMMWORD[64+rdx]
326 pand xmm1,XMMWORD[80+rdx]
327 por xmm4,xmm0
328 pand xmm2,XMMWORD[96+rdx]
329 por xmm5,xmm1
330 pand xmm3,XMMWORD[112+rdx]
331 por xmm4,xmm2
332 por xmm5,xmm3
333 por xmm4,xmm5
334 pshufd xmm0,xmm4,0x4e
335 por xmm0,xmm4
336 lea r12,[256+r12]
337
338 mov rax,QWORD[rsi]
339DB 102,72,15,126,195
340
341 xor r15,r15
342 mov rbp,r8
343 mov r10,QWORD[rsp]
344
345 mul rbx
346 add r10,rax
347 mov rax,QWORD[rcx]
348 adc rdx,0
349
350 imul rbp,r10
351 mov r11,rdx
352
353 mul rbp
354 add r10,rax
355 mov rax,QWORD[8+rsi]
356 adc rdx,0
357 mov r10,QWORD[8+rsp]
358 mov r13,rdx
359
360 lea r15,[1+r15]
361 jmp NEAR $L$inner_enter
362
363ALIGN 16
364$L$inner:
365 add r13,rax
366 mov rax,QWORD[r15*8+rsi]
367 adc rdx,0
368 add r13,r10
369 mov r10,QWORD[r15*8+rsp]
370 adc rdx,0
371 mov QWORD[((-16))+r15*8+rsp],r13
372 mov r13,rdx
373
374$L$inner_enter:
375 mul rbx
376 add r11,rax
377 mov rax,QWORD[r15*8+rcx]
378 adc rdx,0
379 add r10,r11
380 mov r11,rdx
381 adc r11,0
382 lea r15,[1+r15]
383
384 mul rbp
385 cmp r15,r9
386 jne NEAR $L$inner
387
388 add r13,rax
389 adc rdx,0
390 add r13,r10
391 mov r10,QWORD[r9*8+rsp]
392 adc rdx,0
393 mov QWORD[((-16))+r9*8+rsp],r13
394 mov r13,rdx
395
396 xor rdx,rdx
397 add r13,r11
398 adc rdx,0
399 add r13,r10
400 adc rdx,0
401 mov QWORD[((-8))+r9*8+rsp],r13
402 mov QWORD[r9*8+rsp],rdx
403
404 lea r14,[1+r14]
405 cmp r14,r9
406 jb NEAR $L$outer
407
408 xor r14,r14
409 mov rax,QWORD[rsp]
410 lea rsi,[rsp]
411 mov r15,r9
412 jmp NEAR $L$sub
413ALIGN 16
414$L$sub: sbb rax,QWORD[r14*8+rcx]
415 mov QWORD[r14*8+rdi],rax
416 mov rax,QWORD[8+r14*8+rsi]
417 lea r14,[1+r14]
418 dec r15
419 jnz NEAR $L$sub
420
421 sbb rax,0
422 mov rbx,-1
423 xor rbx,rax
424 xor r14,r14
425 mov r15,r9
426
427$L$copy:
428 mov rcx,QWORD[r14*8+rdi]
429 mov rdx,QWORD[r14*8+rsp]
430 and rcx,rbx
431 and rdx,rax
432 mov QWORD[r14*8+rsp],r14
433 or rdx,rcx
434 mov QWORD[r14*8+rdi],rdx
435 lea r14,[1+r14]
436 sub r15,1
437 jnz NEAR $L$copy
438
439 mov rsi,QWORD[8+r9*8+rsp]
440
441 mov rax,1
442
443 mov r15,QWORD[((-48))+rsi]
444
445 mov r14,QWORD[((-40))+rsi]
446
447 mov r13,QWORD[((-32))+rsi]
448
449 mov r12,QWORD[((-24))+rsi]
450
451 mov rbp,QWORD[((-16))+rsi]
452
453 mov rbx,QWORD[((-8))+rsi]
454
455 lea rsp,[rsi]
456
457$L$mul_epilogue:
458 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
459 mov rsi,QWORD[16+rsp]
460 DB 0F3h,0C3h ;repret
461
462$L$SEH_end_bn_mul_mont_gather5:
463
464ALIGN 32
465bn_mul4x_mont_gather5:
466 mov QWORD[8+rsp],rdi ;WIN64 prologue
467 mov QWORD[16+rsp],rsi
468 mov rax,rsp
469$L$SEH_begin_bn_mul4x_mont_gather5:
470 mov rdi,rcx
471 mov rsi,rdx
472 mov rdx,r8
473 mov rcx,r9
474 mov r8,QWORD[40+rsp]
475 mov r9,QWORD[48+rsp]
476
477
478
479DB 0x67
480 mov rax,rsp
481
482$L$mul4x_enter:
483 and r11d,0x80108
484 cmp r11d,0x80108
485 je NEAR $L$mulx4x_enter
486 push rbx
487
488 push rbp
489
490 push r12
491
492 push r13
493
494 push r14
495
496 push r15
497
498$L$mul4x_prologue:
499
500DB 0x67
501 shl r9d,3
502 lea r10,[r9*2+r9]
503 neg r9
504
505
506
507
508
509
510
511
512
513
514 lea r11,[((-320))+r9*2+rsp]
515 mov rbp,rsp
516 sub r11,rdi
517 and r11,4095
518 cmp r10,r11
519 jb NEAR $L$mul4xsp_alt
520 sub rbp,r11
521 lea rbp,[((-320))+r9*2+rbp]
522 jmp NEAR $L$mul4xsp_done
523
524ALIGN 32
525$L$mul4xsp_alt:
526 lea r10,[((4096-320))+r9*2]
527 lea rbp,[((-320))+r9*2+rbp]
528 sub r11,r10
529 mov r10,0
530 cmovc r11,r10
531 sub rbp,r11
532$L$mul4xsp_done:
533 and rbp,-64
534 mov r11,rsp
535 sub r11,rbp
536 and r11,-4096
537 lea rsp,[rbp*1+r11]
538 mov r10,QWORD[rsp]
539 cmp rsp,rbp
540 ja NEAR $L$mul4x_page_walk
541 jmp NEAR $L$mul4x_page_walk_done
542
543$L$mul4x_page_walk:
544 lea rsp,[((-4096))+rsp]
545 mov r10,QWORD[rsp]
546 cmp rsp,rbp
547 ja NEAR $L$mul4x_page_walk
548$L$mul4x_page_walk_done:
549
550 neg r9
551
552 mov QWORD[40+rsp],rax
553
554$L$mul4x_body:
555
556 call mul4x_internal
557
558 mov rsi,QWORD[40+rsp]
559
560 mov rax,1
561
562 mov r15,QWORD[((-48))+rsi]
563
564 mov r14,QWORD[((-40))+rsi]
565
566 mov r13,QWORD[((-32))+rsi]
567
568 mov r12,QWORD[((-24))+rsi]
569
570 mov rbp,QWORD[((-16))+rsi]
571
572 mov rbx,QWORD[((-8))+rsi]
573
574 lea rsp,[rsi]
575
576$L$mul4x_epilogue:
577 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
578 mov rsi,QWORD[16+rsp]
579 DB 0F3h,0C3h ;repret
580
581$L$SEH_end_bn_mul4x_mont_gather5:
582
583
584ALIGN 32
585mul4x_internal:
586
587 shl r9,5
588 movd xmm5,DWORD[56+rax]
589 lea rax,[$L$inc]
590 lea r13,[128+r9*1+rdx]
591 shr r9,5
592 movdqa xmm0,XMMWORD[rax]
593 movdqa xmm1,XMMWORD[16+rax]
594 lea r10,[((88-112))+r9*1+rsp]
595 lea r12,[128+rdx]
596
597 pshufd xmm5,xmm5,0
598 movdqa xmm4,xmm1
599DB 0x67,0x67
600 movdqa xmm2,xmm1
601 paddd xmm1,xmm0
602 pcmpeqd xmm0,xmm5
603DB 0x67
604 movdqa xmm3,xmm4
605 paddd xmm2,xmm1
606 pcmpeqd xmm1,xmm5
607 movdqa XMMWORD[112+r10],xmm0
608 movdqa xmm0,xmm4
609
610 paddd xmm3,xmm2
611 pcmpeqd xmm2,xmm5
612 movdqa XMMWORD[128+r10],xmm1
613 movdqa xmm1,xmm4
614
615 paddd xmm0,xmm3
616 pcmpeqd xmm3,xmm5
617 movdqa XMMWORD[144+r10],xmm2
618 movdqa xmm2,xmm4
619
620 paddd xmm1,xmm0
621 pcmpeqd xmm0,xmm5
622 movdqa XMMWORD[160+r10],xmm3
623 movdqa xmm3,xmm4
624 paddd xmm2,xmm1
625 pcmpeqd xmm1,xmm5
626 movdqa XMMWORD[176+r10],xmm0
627 movdqa xmm0,xmm4
628
629 paddd xmm3,xmm2
630 pcmpeqd xmm2,xmm5
631 movdqa XMMWORD[192+r10],xmm1
632 movdqa xmm1,xmm4
633
634 paddd xmm0,xmm3
635 pcmpeqd xmm3,xmm5
636 movdqa XMMWORD[208+r10],xmm2
637 movdqa xmm2,xmm4
638
639 paddd xmm1,xmm0
640 pcmpeqd xmm0,xmm5
641 movdqa XMMWORD[224+r10],xmm3
642 movdqa xmm3,xmm4
643 paddd xmm2,xmm1
644 pcmpeqd xmm1,xmm5
645 movdqa XMMWORD[240+r10],xmm0
646 movdqa xmm0,xmm4
647
648 paddd xmm3,xmm2
649 pcmpeqd xmm2,xmm5
650 movdqa XMMWORD[256+r10],xmm1
651 movdqa xmm1,xmm4
652
653 paddd xmm0,xmm3
654 pcmpeqd xmm3,xmm5
655 movdqa XMMWORD[272+r10],xmm2
656 movdqa xmm2,xmm4
657
658 paddd xmm1,xmm0
659 pcmpeqd xmm0,xmm5
660 movdqa XMMWORD[288+r10],xmm3
661 movdqa xmm3,xmm4
662 paddd xmm2,xmm1
663 pcmpeqd xmm1,xmm5
664 movdqa XMMWORD[304+r10],xmm0
665
666 paddd xmm3,xmm2
667DB 0x67
668 pcmpeqd xmm2,xmm5
669 movdqa XMMWORD[320+r10],xmm1
670
671 pcmpeqd xmm3,xmm5
672 movdqa XMMWORD[336+r10],xmm2
673 pand xmm0,XMMWORD[64+r12]
674
675 pand xmm1,XMMWORD[80+r12]
676 pand xmm2,XMMWORD[96+r12]
677 movdqa XMMWORD[352+r10],xmm3
678 pand xmm3,XMMWORD[112+r12]
679 por xmm0,xmm2
680 por xmm1,xmm3
681 movdqa xmm4,XMMWORD[((-128))+r12]
682 movdqa xmm5,XMMWORD[((-112))+r12]
683 movdqa xmm2,XMMWORD[((-96))+r12]
684 pand xmm4,XMMWORD[112+r10]
685 movdqa xmm3,XMMWORD[((-80))+r12]
686 pand xmm5,XMMWORD[128+r10]
687 por xmm0,xmm4
688 pand xmm2,XMMWORD[144+r10]
689 por xmm1,xmm5
690 pand xmm3,XMMWORD[160+r10]
691 por xmm0,xmm2
692 por xmm1,xmm3
693 movdqa xmm4,XMMWORD[((-64))+r12]
694 movdqa xmm5,XMMWORD[((-48))+r12]
695 movdqa xmm2,XMMWORD[((-32))+r12]
696 pand xmm4,XMMWORD[176+r10]
697 movdqa xmm3,XMMWORD[((-16))+r12]
698 pand xmm5,XMMWORD[192+r10]
699 por xmm0,xmm4
700 pand xmm2,XMMWORD[208+r10]
701 por xmm1,xmm5
702 pand xmm3,XMMWORD[224+r10]
703 por xmm0,xmm2
704 por xmm1,xmm3
705 movdqa xmm4,XMMWORD[r12]
706 movdqa xmm5,XMMWORD[16+r12]
707 movdqa xmm2,XMMWORD[32+r12]
708 pand xmm4,XMMWORD[240+r10]
709 movdqa xmm3,XMMWORD[48+r12]
710 pand xmm5,XMMWORD[256+r10]
711 por xmm0,xmm4
712 pand xmm2,XMMWORD[272+r10]
713 por xmm1,xmm5
714 pand xmm3,XMMWORD[288+r10]
715 por xmm0,xmm2
716 por xmm1,xmm3
717 por xmm0,xmm1
718 pshufd xmm1,xmm0,0x4e
719 por xmm0,xmm1
720 lea r12,[256+r12]
721DB 102,72,15,126,195
722
723 mov QWORD[((16+8))+rsp],r13
724 mov QWORD[((56+8))+rsp],rdi
725
726 mov r8,QWORD[r8]
727 mov rax,QWORD[rsi]
728 lea rsi,[r9*1+rsi]
729 neg r9
730
731 mov rbp,r8
732 mul rbx
733 mov r10,rax
734 mov rax,QWORD[rcx]
735
736 imul rbp,r10
737 lea r14,[((64+8))+rsp]
738 mov r11,rdx
739
740 mul rbp
741 add r10,rax
742 mov rax,QWORD[8+r9*1+rsi]
743 adc rdx,0
744 mov rdi,rdx
745
746 mul rbx
747 add r11,rax
748 mov rax,QWORD[8+rcx]
749 adc rdx,0
750 mov r10,rdx
751
752 mul rbp
753 add rdi,rax
754 mov rax,QWORD[16+r9*1+rsi]
755 adc rdx,0
756 add rdi,r11
757 lea r15,[32+r9]
758 lea rcx,[32+rcx]
759 adc rdx,0
760 mov QWORD[r14],rdi
761 mov r13,rdx
762 jmp NEAR $L$1st4x
763
764ALIGN 32
765$L$1st4x:
766 mul rbx
767 add r10,rax
768 mov rax,QWORD[((-16))+rcx]
769 lea r14,[32+r14]
770 adc rdx,0
771 mov r11,rdx
772
773 mul rbp
774 add r13,rax
775 mov rax,QWORD[((-8))+r15*1+rsi]
776 adc rdx,0
777 add r13,r10
778 adc rdx,0
779 mov QWORD[((-24))+r14],r13
780 mov rdi,rdx
781
782 mul rbx
783 add r11,rax
784 mov rax,QWORD[((-8))+rcx]
785 adc rdx,0
786 mov r10,rdx
787
788 mul rbp
789 add rdi,rax
790 mov rax,QWORD[r15*1+rsi]
791 adc rdx,0
792 add rdi,r11
793 adc rdx,0
794 mov QWORD[((-16))+r14],rdi
795 mov r13,rdx
796
797 mul rbx
798 add r10,rax
799 mov rax,QWORD[rcx]
800 adc rdx,0
801 mov r11,rdx
802
803 mul rbp
804 add r13,rax
805 mov rax,QWORD[8+r15*1+rsi]
806 adc rdx,0
807 add r13,r10
808 adc rdx,0
809 mov QWORD[((-8))+r14],r13
810 mov rdi,rdx
811
812 mul rbx
813 add r11,rax
814 mov rax,QWORD[8+rcx]
815 adc rdx,0
816 mov r10,rdx
817
818 mul rbp
819 add rdi,rax
820 mov rax,QWORD[16+r15*1+rsi]
821 adc rdx,0
822 add rdi,r11
823 lea rcx,[32+rcx]
824 adc rdx,0
825 mov QWORD[r14],rdi
826 mov r13,rdx
827
828 add r15,32
829 jnz NEAR $L$1st4x
830
831 mul rbx
832 add r10,rax
833 mov rax,QWORD[((-16))+rcx]
834 lea r14,[32+r14]
835 adc rdx,0
836 mov r11,rdx
837
838 mul rbp
839 add r13,rax
840 mov rax,QWORD[((-8))+rsi]
841 adc rdx,0
842 add r13,r10
843 adc rdx,0
844 mov QWORD[((-24))+r14],r13
845 mov rdi,rdx
846
847 mul rbx
848 add r11,rax
849 mov rax,QWORD[((-8))+rcx]
850 adc rdx,0
851 mov r10,rdx
852
853 mul rbp
854 add rdi,rax
855 mov rax,QWORD[r9*1+rsi]
856 adc rdx,0
857 add rdi,r11
858 adc rdx,0
859 mov QWORD[((-16))+r14],rdi
860 mov r13,rdx
861
862 lea rcx,[r9*1+rcx]
863
864 xor rdi,rdi
865 add r13,r10
866 adc rdi,0
867 mov QWORD[((-8))+r14],r13
868
869 jmp NEAR $L$outer4x
870
871ALIGN 32
872$L$outer4x:
873 lea rdx,[((16+128))+r14]
874 pxor xmm4,xmm4
875 pxor xmm5,xmm5
876 movdqa xmm0,XMMWORD[((-128))+r12]
877 movdqa xmm1,XMMWORD[((-112))+r12]
878 movdqa xmm2,XMMWORD[((-96))+r12]
879 movdqa xmm3,XMMWORD[((-80))+r12]
880 pand xmm0,XMMWORD[((-128))+rdx]
881 pand xmm1,XMMWORD[((-112))+rdx]
882 por xmm4,xmm0
883 pand xmm2,XMMWORD[((-96))+rdx]
884 por xmm5,xmm1
885 pand xmm3,XMMWORD[((-80))+rdx]
886 por xmm4,xmm2
887 por xmm5,xmm3
888 movdqa xmm0,XMMWORD[((-64))+r12]
889 movdqa xmm1,XMMWORD[((-48))+r12]
890 movdqa xmm2,XMMWORD[((-32))+r12]
891 movdqa xmm3,XMMWORD[((-16))+r12]
892 pand xmm0,XMMWORD[((-64))+rdx]
893 pand xmm1,XMMWORD[((-48))+rdx]
894 por xmm4,xmm0
895 pand xmm2,XMMWORD[((-32))+rdx]
896 por xmm5,xmm1
897 pand xmm3,XMMWORD[((-16))+rdx]
898 por xmm4,xmm2
899 por xmm5,xmm3
900 movdqa xmm0,XMMWORD[r12]
901 movdqa xmm1,XMMWORD[16+r12]
902 movdqa xmm2,XMMWORD[32+r12]
903 movdqa xmm3,XMMWORD[48+r12]
904 pand xmm0,XMMWORD[rdx]
905 pand xmm1,XMMWORD[16+rdx]
906 por xmm4,xmm0
907 pand xmm2,XMMWORD[32+rdx]
908 por xmm5,xmm1
909 pand xmm3,XMMWORD[48+rdx]
910 por xmm4,xmm2
911 por xmm5,xmm3
912 movdqa xmm0,XMMWORD[64+r12]
913 movdqa xmm1,XMMWORD[80+r12]
914 movdqa xmm2,XMMWORD[96+r12]
915 movdqa xmm3,XMMWORD[112+r12]
916 pand xmm0,XMMWORD[64+rdx]
917 pand xmm1,XMMWORD[80+rdx]
918 por xmm4,xmm0
919 pand xmm2,XMMWORD[96+rdx]
920 por xmm5,xmm1
921 pand xmm3,XMMWORD[112+rdx]
922 por xmm4,xmm2
923 por xmm5,xmm3
924 por xmm4,xmm5
925 pshufd xmm0,xmm4,0x4e
926 por xmm0,xmm4
927 lea r12,[256+r12]
928DB 102,72,15,126,195
929
930 mov r10,QWORD[r9*1+r14]
931 mov rbp,r8
932 mul rbx
933 add r10,rax
934 mov rax,QWORD[rcx]
935 adc rdx,0
936
937 imul rbp,r10
938 mov r11,rdx
939 mov QWORD[r14],rdi
940
941 lea r14,[r9*1+r14]
942
943 mul rbp
944 add r10,rax
945 mov rax,QWORD[8+r9*1+rsi]
946 adc rdx,0
947 mov rdi,rdx
948
949 mul rbx
950 add r11,rax
951 mov rax,QWORD[8+rcx]
952 adc rdx,0
953 add r11,QWORD[8+r14]
954 adc rdx,0
955 mov r10,rdx
956
957 mul rbp
958 add rdi,rax
959 mov rax,QWORD[16+r9*1+rsi]
960 adc rdx,0
961 add rdi,r11
962 lea r15,[32+r9]
963 lea rcx,[32+rcx]
964 adc rdx,0
965 mov r13,rdx
966 jmp NEAR $L$inner4x
967
968ALIGN 32
969$L$inner4x:
970 mul rbx
971 add r10,rax
972 mov rax,QWORD[((-16))+rcx]
973 adc rdx,0
974 add r10,QWORD[16+r14]
975 lea r14,[32+r14]
976 adc rdx,0
977 mov r11,rdx
978
979 mul rbp
980 add r13,rax
981 mov rax,QWORD[((-8))+r15*1+rsi]
982 adc rdx,0
983 add r13,r10
984 adc rdx,0
985 mov QWORD[((-32))+r14],rdi
986 mov rdi,rdx
987
988 mul rbx
989 add r11,rax
990 mov rax,QWORD[((-8))+rcx]
991 adc rdx,0
992 add r11,QWORD[((-8))+r14]
993 adc rdx,0
994 mov r10,rdx
995
996 mul rbp
997 add rdi,rax
998 mov rax,QWORD[r15*1+rsi]
999 adc rdx,0
1000 add rdi,r11
1001 adc rdx,0
1002 mov QWORD[((-24))+r14],r13
1003 mov r13,rdx
1004
1005 mul rbx
1006 add r10,rax
1007 mov rax,QWORD[rcx]
1008 adc rdx,0
1009 add r10,QWORD[r14]
1010 adc rdx,0
1011 mov r11,rdx
1012
1013 mul rbp
1014 add r13,rax
1015 mov rax,QWORD[8+r15*1+rsi]
1016 adc rdx,0
1017 add r13,r10
1018 adc rdx,0
1019 mov QWORD[((-16))+r14],rdi
1020 mov rdi,rdx
1021
1022 mul rbx
1023 add r11,rax
1024 mov rax,QWORD[8+rcx]
1025 adc rdx,0
1026 add r11,QWORD[8+r14]
1027 adc rdx,0
1028 mov r10,rdx
1029
1030 mul rbp
1031 add rdi,rax
1032 mov rax,QWORD[16+r15*1+rsi]
1033 adc rdx,0
1034 add rdi,r11
1035 lea rcx,[32+rcx]
1036 adc rdx,0
1037 mov QWORD[((-8))+r14],r13
1038 mov r13,rdx
1039
1040 add r15,32
1041 jnz NEAR $L$inner4x
1042
1043 mul rbx
1044 add r10,rax
1045 mov rax,QWORD[((-16))+rcx]
1046 adc rdx,0
1047 add r10,QWORD[16+r14]
1048 lea r14,[32+r14]
1049 adc rdx,0
1050 mov r11,rdx
1051
1052 mul rbp
1053 add r13,rax
1054 mov rax,QWORD[((-8))+rsi]
1055 adc rdx,0
1056 add r13,r10
1057 adc rdx,0
1058 mov QWORD[((-32))+r14],rdi
1059 mov rdi,rdx
1060
1061 mul rbx
1062 add r11,rax
1063 mov rax,rbp
1064 mov rbp,QWORD[((-8))+rcx]
1065 adc rdx,0
1066 add r11,QWORD[((-8))+r14]
1067 adc rdx,0
1068 mov r10,rdx
1069
1070 mul rbp
1071 add rdi,rax
1072 mov rax,QWORD[r9*1+rsi]
1073 adc rdx,0
1074 add rdi,r11
1075 adc rdx,0
1076 mov QWORD[((-24))+r14],r13
1077 mov r13,rdx
1078
1079 mov QWORD[((-16))+r14],rdi
1080 lea rcx,[r9*1+rcx]
1081
1082 xor rdi,rdi
1083 add r13,r10
1084 adc rdi,0
1085 add r13,QWORD[r14]
1086 adc rdi,0
1087 mov QWORD[((-8))+r14],r13
1088
1089 cmp r12,QWORD[((16+8))+rsp]
1090 jb NEAR $L$outer4x
1091 xor rax,rax
1092 sub rbp,r13
1093 adc r15,r15
1094 or rdi,r15
1095 sub rax,rdi
1096 lea rbx,[r9*1+r14]
1097 mov r12,QWORD[rcx]
1098 lea rbp,[rcx]
1099 mov rcx,r9
1100 sar rcx,3+2
1101 mov rdi,QWORD[((56+8))+rsp]
1102 dec r12
1103 xor r10,r10
1104 mov r13,QWORD[8+rbp]
1105 mov r14,QWORD[16+rbp]
1106 mov r15,QWORD[24+rbp]
1107 jmp NEAR $L$sqr4x_sub_entry
1108
1109
1110global bn_power5
1111
1112ALIGN 32
1113bn_power5:
1114 mov QWORD[8+rsp],rdi ;WIN64 prologue
1115 mov QWORD[16+rsp],rsi
1116 mov rax,rsp
1117$L$SEH_begin_bn_power5:
1118 mov rdi,rcx
1119 mov rsi,rdx
1120 mov rdx,r8
1121 mov rcx,r9
1122 mov r8,QWORD[40+rsp]
1123 mov r9,QWORD[48+rsp]
1124
1125
1126
1127 mov rax,rsp
1128
1129 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
1130 and r11d,0x80108
1131 cmp r11d,0x80108
1132 je NEAR $L$powerx5_enter
1133 push rbx
1134
1135 push rbp
1136
1137 push r12
1138
1139 push r13
1140
1141 push r14
1142
1143 push r15
1144
1145$L$power5_prologue:
1146
1147 shl r9d,3
1148 lea r10d,[r9*2+r9]
1149 neg r9
1150 mov r8,QWORD[r8]
1151
1152
1153
1154
1155
1156
1157
1158
1159 lea r11,[((-320))+r9*2+rsp]
1160 mov rbp,rsp
1161 sub r11,rdi
1162 and r11,4095
1163 cmp r10,r11
1164 jb NEAR $L$pwr_sp_alt
1165 sub rbp,r11
1166 lea rbp,[((-320))+r9*2+rbp]
1167 jmp NEAR $L$pwr_sp_done
1168
1169ALIGN 32
1170$L$pwr_sp_alt:
1171 lea r10,[((4096-320))+r9*2]
1172 lea rbp,[((-320))+r9*2+rbp]
1173 sub r11,r10
1174 mov r10,0
1175 cmovc r11,r10
1176 sub rbp,r11
1177$L$pwr_sp_done:
1178 and rbp,-64
1179 mov r11,rsp
1180 sub r11,rbp
1181 and r11,-4096
1182 lea rsp,[rbp*1+r11]
1183 mov r10,QWORD[rsp]
1184 cmp rsp,rbp
1185 ja NEAR $L$pwr_page_walk
1186 jmp NEAR $L$pwr_page_walk_done
1187
1188$L$pwr_page_walk:
1189 lea rsp,[((-4096))+rsp]
1190 mov r10,QWORD[rsp]
1191 cmp rsp,rbp
1192 ja NEAR $L$pwr_page_walk
1193$L$pwr_page_walk_done:
1194
1195 mov r10,r9
1196 neg r9
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207 mov QWORD[32+rsp],r8
1208 mov QWORD[40+rsp],rax
1209
1210$L$power5_body:
1211DB 102,72,15,110,207
1212DB 102,72,15,110,209
1213DB 102,73,15,110,218
1214DB 102,72,15,110,226
1215
1216 call __bn_sqr8x_internal
1217 call __bn_post4x_internal
1218 call __bn_sqr8x_internal
1219 call __bn_post4x_internal
1220 call __bn_sqr8x_internal
1221 call __bn_post4x_internal
1222 call __bn_sqr8x_internal
1223 call __bn_post4x_internal
1224 call __bn_sqr8x_internal
1225 call __bn_post4x_internal
1226
1227DB 102,72,15,126,209
1228DB 102,72,15,126,226
1229 mov rdi,rsi
1230 mov rax,QWORD[40+rsp]
1231 lea r8,[32+rsp]
1232
1233 call mul4x_internal
1234
1235 mov rsi,QWORD[40+rsp]
1236
1237 mov rax,1
1238 mov r15,QWORD[((-48))+rsi]
1239
1240 mov r14,QWORD[((-40))+rsi]
1241
1242 mov r13,QWORD[((-32))+rsi]
1243
1244 mov r12,QWORD[((-24))+rsi]
1245
1246 mov rbp,QWORD[((-16))+rsi]
1247
1248 mov rbx,QWORD[((-8))+rsi]
1249
1250 lea rsp,[rsi]
1251
1252$L$power5_epilogue:
1253 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1254 mov rsi,QWORD[16+rsp]
1255 DB 0F3h,0C3h ;repret
1256
1257$L$SEH_end_bn_power5:
1258
1259global bn_sqr8x_internal
1260
1261
1262ALIGN 32
1263bn_sqr8x_internal:
1264__bn_sqr8x_internal:
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339 lea rbp,[32+r10]
1340 lea rsi,[r9*1+rsi]
1341
1342 mov rcx,r9
1343
1344
1345 mov r14,QWORD[((-32))+rbp*1+rsi]
1346 lea rdi,[((48+8))+r9*2+rsp]
1347 mov rax,QWORD[((-24))+rbp*1+rsi]
1348 lea rdi,[((-32))+rbp*1+rdi]
1349 mov rbx,QWORD[((-16))+rbp*1+rsi]
1350 mov r15,rax
1351
1352 mul r14
1353 mov r10,rax
1354 mov rax,rbx
1355 mov r11,rdx
1356 mov QWORD[((-24))+rbp*1+rdi],r10
1357
1358 mul r14
1359 add r11,rax
1360 mov rax,rbx
1361 adc rdx,0
1362 mov QWORD[((-16))+rbp*1+rdi],r11
1363 mov r10,rdx
1364
1365
1366 mov rbx,QWORD[((-8))+rbp*1+rsi]
1367 mul r15
1368 mov r12,rax
1369 mov rax,rbx
1370 mov r13,rdx
1371
1372 lea rcx,[rbp]
1373 mul r14
1374 add r10,rax
1375 mov rax,rbx
1376 mov r11,rdx
1377 adc r11,0
1378 add r10,r12
1379 adc r11,0
1380 mov QWORD[((-8))+rcx*1+rdi],r10
1381 jmp NEAR $L$sqr4x_1st
1382
1383ALIGN 32
1384$L$sqr4x_1st:
1385 mov rbx,QWORD[rcx*1+rsi]
1386 mul r15
1387 add r13,rax
1388 mov rax,rbx
1389 mov r12,rdx
1390 adc r12,0
1391
1392 mul r14
1393 add r11,rax
1394 mov rax,rbx
1395 mov rbx,QWORD[8+rcx*1+rsi]
1396 mov r10,rdx
1397 adc r10,0
1398 add r11,r13
1399 adc r10,0
1400
1401
1402 mul r15
1403 add r12,rax
1404 mov rax,rbx
1405 mov QWORD[rcx*1+rdi],r11
1406 mov r13,rdx
1407 adc r13,0
1408
1409 mul r14
1410 add r10,rax
1411 mov rax,rbx
1412 mov rbx,QWORD[16+rcx*1+rsi]
1413 mov r11,rdx
1414 adc r11,0
1415 add r10,r12
1416 adc r11,0
1417
1418 mul r15
1419 add r13,rax
1420 mov rax,rbx
1421 mov QWORD[8+rcx*1+rdi],r10
1422 mov r12,rdx
1423 adc r12,0
1424
1425 mul r14
1426 add r11,rax
1427 mov rax,rbx
1428 mov rbx,QWORD[24+rcx*1+rsi]
1429 mov r10,rdx
1430 adc r10,0
1431 add r11,r13
1432 adc r10,0
1433
1434
1435 mul r15
1436 add r12,rax
1437 mov rax,rbx
1438 mov QWORD[16+rcx*1+rdi],r11
1439 mov r13,rdx
1440 adc r13,0
1441 lea rcx,[32+rcx]
1442
1443 mul r14
1444 add r10,rax
1445 mov rax,rbx
1446 mov r11,rdx
1447 adc r11,0
1448 add r10,r12
1449 adc r11,0
1450 mov QWORD[((-8))+rcx*1+rdi],r10
1451
1452 cmp rcx,0
1453 jne NEAR $L$sqr4x_1st
1454
1455 mul r15
1456 add r13,rax
1457 lea rbp,[16+rbp]
1458 adc rdx,0
1459 add r13,r11
1460 adc rdx,0
1461
1462 mov QWORD[rdi],r13
1463 mov r12,rdx
1464 mov QWORD[8+rdi],rdx
1465 jmp NEAR $L$sqr4x_outer
1466
1467ALIGN 32
1468$L$sqr4x_outer:
1469 mov r14,QWORD[((-32))+rbp*1+rsi]
1470 lea rdi,[((48+8))+r9*2+rsp]
1471 mov rax,QWORD[((-24))+rbp*1+rsi]
1472 lea rdi,[((-32))+rbp*1+rdi]
1473 mov rbx,QWORD[((-16))+rbp*1+rsi]
1474 mov r15,rax
1475
1476 mul r14
1477 mov r10,QWORD[((-24))+rbp*1+rdi]
1478 add r10,rax
1479 mov rax,rbx
1480 adc rdx,0
1481 mov QWORD[((-24))+rbp*1+rdi],r10
1482 mov r11,rdx
1483
1484 mul r14
1485 add r11,rax
1486 mov rax,rbx
1487 adc rdx,0
1488 add r11,QWORD[((-16))+rbp*1+rdi]
1489 mov r10,rdx
1490 adc r10,0
1491 mov QWORD[((-16))+rbp*1+rdi],r11
1492
1493 xor r12,r12
1494
1495 mov rbx,QWORD[((-8))+rbp*1+rsi]
1496 mul r15
1497 add r12,rax
1498 mov rax,rbx
1499 adc rdx,0
1500 add r12,QWORD[((-8))+rbp*1+rdi]
1501 mov r13,rdx
1502 adc r13,0
1503
1504 mul r14
1505 add r10,rax
1506 mov rax,rbx
1507 adc rdx,0
1508 add r10,r12
1509 mov r11,rdx
1510 adc r11,0
1511 mov QWORD[((-8))+rbp*1+rdi],r10
1512
1513 lea rcx,[rbp]
1514 jmp NEAR $L$sqr4x_inner
1515
1516ALIGN 32
1517$L$sqr4x_inner:
1518 mov rbx,QWORD[rcx*1+rsi]
1519 mul r15
1520 add r13,rax
1521 mov rax,rbx
1522 mov r12,rdx
1523 adc r12,0
1524 add r13,QWORD[rcx*1+rdi]
1525 adc r12,0
1526
1527DB 0x67
1528 mul r14
1529 add r11,rax
1530 mov rax,rbx
1531 mov rbx,QWORD[8+rcx*1+rsi]
1532 mov r10,rdx
1533 adc r10,0
1534 add r11,r13
1535 adc r10,0
1536
1537 mul r15
1538 add r12,rax
1539 mov QWORD[rcx*1+rdi],r11
1540 mov rax,rbx
1541 mov r13,rdx
1542 adc r13,0
1543 add r12,QWORD[8+rcx*1+rdi]
1544 lea rcx,[16+rcx]
1545 adc r13,0
1546
1547 mul r14
1548 add r10,rax
1549 mov rax,rbx
1550 adc rdx,0
1551 add r10,r12
1552 mov r11,rdx
1553 adc r11,0
1554 mov QWORD[((-8))+rcx*1+rdi],r10
1555
1556 cmp rcx,0
1557 jne NEAR $L$sqr4x_inner
1558
1559DB 0x67
1560 mul r15
1561 add r13,rax
1562 adc rdx,0
1563 add r13,r11
1564 adc rdx,0
1565
1566 mov QWORD[rdi],r13
1567 mov r12,rdx
1568 mov QWORD[8+rdi],rdx
1569
1570 add rbp,16
1571 jnz NEAR $L$sqr4x_outer
1572
1573
1574 mov r14,QWORD[((-32))+rsi]
1575 lea rdi,[((48+8))+r9*2+rsp]
1576 mov rax,QWORD[((-24))+rsi]
1577 lea rdi,[((-32))+rbp*1+rdi]
1578 mov rbx,QWORD[((-16))+rsi]
1579 mov r15,rax
1580
1581 mul r14
1582 add r10,rax
1583 mov rax,rbx
1584 mov r11,rdx
1585 adc r11,0
1586
1587 mul r14
1588 add r11,rax
1589 mov rax,rbx
1590 mov QWORD[((-24))+rdi],r10
1591 mov r10,rdx
1592 adc r10,0
1593 add r11,r13
1594 mov rbx,QWORD[((-8))+rsi]
1595 adc r10,0
1596
1597 mul r15
1598 add r12,rax
1599 mov rax,rbx
1600 mov QWORD[((-16))+rdi],r11
1601 mov r13,rdx
1602 adc r13,0
1603
1604 mul r14
1605 add r10,rax
1606 mov rax,rbx
1607 mov r11,rdx
1608 adc r11,0
1609 add r10,r12
1610 adc r11,0
1611 mov QWORD[((-8))+rdi],r10
1612
1613 mul r15
1614 add r13,rax
1615 mov rax,QWORD[((-16))+rsi]
1616 adc rdx,0
1617 add r13,r11
1618 adc rdx,0
1619
1620 mov QWORD[rdi],r13
1621 mov r12,rdx
1622 mov QWORD[8+rdi],rdx
1623
1624 mul rbx
1625 add rbp,16
1626 xor r14,r14
1627 sub rbp,r9
1628 xor r15,r15
1629
1630 add rax,r12
1631 adc rdx,0
1632 mov QWORD[8+rdi],rax
1633 mov QWORD[16+rdi],rdx
1634 mov QWORD[24+rdi],r15
1635
1636 mov rax,QWORD[((-16))+rbp*1+rsi]
1637 lea rdi,[((48+8))+rsp]
1638 xor r10,r10
1639 mov r11,QWORD[8+rdi]
1640
1641 lea r12,[r10*2+r14]
1642 shr r10,63
1643 lea r13,[r11*2+rcx]
1644 shr r11,63
1645 or r13,r10
1646 mov r10,QWORD[16+rdi]
1647 mov r14,r11
1648 mul rax
1649 neg r15
1650 mov r11,QWORD[24+rdi]
1651 adc r12,rax
1652 mov rax,QWORD[((-8))+rbp*1+rsi]
1653 mov QWORD[rdi],r12
1654 adc r13,rdx
1655
1656 lea rbx,[r10*2+r14]
1657 mov QWORD[8+rdi],r13
1658 sbb r15,r15
1659 shr r10,63
1660 lea r8,[r11*2+rcx]
1661 shr r11,63
1662 or r8,r10
1663 mov r10,QWORD[32+rdi]
1664 mov r14,r11
1665 mul rax
1666 neg r15
1667 mov r11,QWORD[40+rdi]
1668 adc rbx,rax
1669 mov rax,QWORD[rbp*1+rsi]
1670 mov QWORD[16+rdi],rbx
1671 adc r8,rdx
1672 lea rbp,[16+rbp]
1673 mov QWORD[24+rdi],r8
1674 sbb r15,r15
1675 lea rdi,[64+rdi]
1676 jmp NEAR $L$sqr4x_shift_n_add
1677
1678ALIGN 32
1679$L$sqr4x_shift_n_add:
1680 lea r12,[r10*2+r14]
1681 shr r10,63
1682 lea r13,[r11*2+rcx]
1683 shr r11,63
1684 or r13,r10
1685 mov r10,QWORD[((-16))+rdi]
1686 mov r14,r11
1687 mul rax
1688 neg r15
1689 mov r11,QWORD[((-8))+rdi]
1690 adc r12,rax
1691 mov rax,QWORD[((-8))+rbp*1+rsi]
1692 mov QWORD[((-32))+rdi],r12
1693 adc r13,rdx
1694
1695 lea rbx,[r10*2+r14]
1696 mov QWORD[((-24))+rdi],r13
1697 sbb r15,r15
1698 shr r10,63
1699 lea r8,[r11*2+rcx]
1700 shr r11,63
1701 or r8,r10
1702 mov r10,QWORD[rdi]
1703 mov r14,r11
1704 mul rax
1705 neg r15
1706 mov r11,QWORD[8+rdi]
1707 adc rbx,rax
1708 mov rax,QWORD[rbp*1+rsi]
1709 mov QWORD[((-16))+rdi],rbx
1710 adc r8,rdx
1711
1712 lea r12,[r10*2+r14]
1713 mov QWORD[((-8))+rdi],r8
1714 sbb r15,r15
1715 shr r10,63
1716 lea r13,[r11*2+rcx]
1717 shr r11,63
1718 or r13,r10
1719 mov r10,QWORD[16+rdi]
1720 mov r14,r11
1721 mul rax
1722 neg r15
1723 mov r11,QWORD[24+rdi]
1724 adc r12,rax
1725 mov rax,QWORD[8+rbp*1+rsi]
1726 mov QWORD[rdi],r12
1727 adc r13,rdx
1728
1729 lea rbx,[r10*2+r14]
1730 mov QWORD[8+rdi],r13
1731 sbb r15,r15
1732 shr r10,63
1733 lea r8,[r11*2+rcx]
1734 shr r11,63
1735 or r8,r10
1736 mov r10,QWORD[32+rdi]
1737 mov r14,r11
1738 mul rax
1739 neg r15
1740 mov r11,QWORD[40+rdi]
1741 adc rbx,rax
1742 mov rax,QWORD[16+rbp*1+rsi]
1743 mov QWORD[16+rdi],rbx
1744 adc r8,rdx
1745 mov QWORD[24+rdi],r8
1746 sbb r15,r15
1747 lea rdi,[64+rdi]
1748 add rbp,32
1749 jnz NEAR $L$sqr4x_shift_n_add
1750
1751 lea r12,[r10*2+r14]
1752DB 0x67
1753 shr r10,63
1754 lea r13,[r11*2+rcx]
1755 shr r11,63
1756 or r13,r10
1757 mov r10,QWORD[((-16))+rdi]
1758 mov r14,r11
1759 mul rax
1760 neg r15
1761 mov r11,QWORD[((-8))+rdi]
1762 adc r12,rax
1763 mov rax,QWORD[((-8))+rsi]
1764 mov QWORD[((-32))+rdi],r12
1765 adc r13,rdx
1766
1767 lea rbx,[r10*2+r14]
1768 mov QWORD[((-24))+rdi],r13
1769 sbb r15,r15
1770 shr r10,63
1771 lea r8,[r11*2+rcx]
1772 shr r11,63
1773 or r8,r10
1774 mul rax
1775 neg r15
1776 adc rbx,rax
1777 adc r8,rdx
1778 mov QWORD[((-16))+rdi],rbx
1779 mov QWORD[((-8))+rdi],r8
1780DB 102,72,15,126,213
1781__bn_sqr8x_reduction:
1782 xor rax,rax
1783 lea rcx,[rbp*1+r9]
1784 lea rdx,[((48+8))+r9*2+rsp]
1785 mov QWORD[((0+8))+rsp],rcx
1786 lea rdi,[((48+8))+r9*1+rsp]
1787 mov QWORD[((8+8))+rsp],rdx
1788 neg r9
1789 jmp NEAR $L$8x_reduction_loop
1790
1791ALIGN 32
1792$L$8x_reduction_loop:
1793 lea rdi,[r9*1+rdi]
1794DB 0x66
1795 mov rbx,QWORD[rdi]
1796 mov r9,QWORD[8+rdi]
1797 mov r10,QWORD[16+rdi]
1798 mov r11,QWORD[24+rdi]
1799 mov r12,QWORD[32+rdi]
1800 mov r13,QWORD[40+rdi]
1801 mov r14,QWORD[48+rdi]
1802 mov r15,QWORD[56+rdi]
1803 mov QWORD[rdx],rax
1804 lea rdi,[64+rdi]
1805
1806DB 0x67
1807 mov r8,rbx
1808 imul rbx,QWORD[((32+8))+rsp]
1809 mov rax,QWORD[rbp]
1810 mov ecx,8
1811 jmp NEAR $L$8x_reduce
1812
1813ALIGN 32
1814$L$8x_reduce:
1815 mul rbx
1816 mov rax,QWORD[8+rbp]
1817 neg r8
1818 mov r8,rdx
1819 adc r8,0
1820
1821 mul rbx
1822 add r9,rax
1823 mov rax,QWORD[16+rbp]
1824 adc rdx,0
1825 add r8,r9
1826 mov QWORD[((48-8+8))+rcx*8+rsp],rbx
1827 mov r9,rdx
1828 adc r9,0
1829
1830 mul rbx
1831 add r10,rax
1832 mov rax,QWORD[24+rbp]
1833 adc rdx,0
1834 add r9,r10
1835 mov rsi,QWORD[((32+8))+rsp]
1836 mov r10,rdx
1837 adc r10,0
1838
1839 mul rbx
1840 add r11,rax
1841 mov rax,QWORD[32+rbp]
1842 adc rdx,0
1843 imul rsi,r8
1844 add r10,r11
1845 mov r11,rdx
1846 adc r11,0
1847
1848 mul rbx
1849 add r12,rax
1850 mov rax,QWORD[40+rbp]
1851 adc rdx,0
1852 add r11,r12
1853 mov r12,rdx
1854 adc r12,0
1855
1856 mul rbx
1857 add r13,rax
1858 mov rax,QWORD[48+rbp]
1859 adc rdx,0
1860 add r12,r13
1861 mov r13,rdx
1862 adc r13,0
1863
1864 mul rbx
1865 add r14,rax
1866 mov rax,QWORD[56+rbp]
1867 adc rdx,0
1868 add r13,r14
1869 mov r14,rdx
1870 adc r14,0
1871
1872 mul rbx
1873 mov rbx,rsi
1874 add r15,rax
1875 mov rax,QWORD[rbp]
1876 adc rdx,0
1877 add r14,r15
1878 mov r15,rdx
1879 adc r15,0
1880
1881 dec ecx
1882 jnz NEAR $L$8x_reduce
1883
1884 lea rbp,[64+rbp]
1885 xor rax,rax
1886 mov rdx,QWORD[((8+8))+rsp]
1887 cmp rbp,QWORD[((0+8))+rsp]
1888 jae NEAR $L$8x_no_tail
1889
1890DB 0x66
1891 add r8,QWORD[rdi]
1892 adc r9,QWORD[8+rdi]
1893 adc r10,QWORD[16+rdi]
1894 adc r11,QWORD[24+rdi]
1895 adc r12,QWORD[32+rdi]
1896 adc r13,QWORD[40+rdi]
1897 adc r14,QWORD[48+rdi]
1898 adc r15,QWORD[56+rdi]
1899 sbb rsi,rsi
1900
1901 mov rbx,QWORD[((48+56+8))+rsp]
1902 mov ecx,8
1903 mov rax,QWORD[rbp]
1904 jmp NEAR $L$8x_tail
1905
1906ALIGN 32
1907$L$8x_tail:
1908 mul rbx
1909 add r8,rax
1910 mov rax,QWORD[8+rbp]
1911 mov QWORD[rdi],r8
1912 mov r8,rdx
1913 adc r8,0
1914
1915 mul rbx
1916 add r9,rax
1917 mov rax,QWORD[16+rbp]
1918 adc rdx,0
1919 add r8,r9
1920 lea rdi,[8+rdi]
1921 mov r9,rdx
1922 adc r9,0
1923
1924 mul rbx
1925 add r10,rax
1926 mov rax,QWORD[24+rbp]
1927 adc rdx,0
1928 add r9,r10
1929 mov r10,rdx
1930 adc r10,0
1931
1932 mul rbx
1933 add r11,rax
1934 mov rax,QWORD[32+rbp]
1935 adc rdx,0
1936 add r10,r11
1937 mov r11,rdx
1938 adc r11,0
1939
1940 mul rbx
1941 add r12,rax
1942 mov rax,QWORD[40+rbp]
1943 adc rdx,0
1944 add r11,r12
1945 mov r12,rdx
1946 adc r12,0
1947
1948 mul rbx
1949 add r13,rax
1950 mov rax,QWORD[48+rbp]
1951 adc rdx,0
1952 add r12,r13
1953 mov r13,rdx
1954 adc r13,0
1955
1956 mul rbx
1957 add r14,rax
1958 mov rax,QWORD[56+rbp]
1959 adc rdx,0
1960 add r13,r14
1961 mov r14,rdx
1962 adc r14,0
1963
1964 mul rbx
1965 mov rbx,QWORD[((48-16+8))+rcx*8+rsp]
1966 add r15,rax
1967 adc rdx,0
1968 add r14,r15
1969 mov rax,QWORD[rbp]
1970 mov r15,rdx
1971 adc r15,0
1972
1973 dec ecx
1974 jnz NEAR $L$8x_tail
1975
1976 lea rbp,[64+rbp]
1977 mov rdx,QWORD[((8+8))+rsp]
1978 cmp rbp,QWORD[((0+8))+rsp]
1979 jae NEAR $L$8x_tail_done
1980
1981 mov rbx,QWORD[((48+56+8))+rsp]
1982 neg rsi
1983 mov rax,QWORD[rbp]
1984 adc r8,QWORD[rdi]
1985 adc r9,QWORD[8+rdi]
1986 adc r10,QWORD[16+rdi]
1987 adc r11,QWORD[24+rdi]
1988 adc r12,QWORD[32+rdi]
1989 adc r13,QWORD[40+rdi]
1990 adc r14,QWORD[48+rdi]
1991 adc r15,QWORD[56+rdi]
1992 sbb rsi,rsi
1993
1994 mov ecx,8
1995 jmp NEAR $L$8x_tail
1996
1997ALIGN 32
1998$L$8x_tail_done:
1999 xor rax,rax
2000 add r8,QWORD[rdx]
2001 adc r9,0
2002 adc r10,0
2003 adc r11,0
2004 adc r12,0
2005 adc r13,0
2006 adc r14,0
2007 adc r15,0
2008 adc rax,0
2009
2010 neg rsi
2011$L$8x_no_tail:
2012 adc r8,QWORD[rdi]
2013 adc r9,QWORD[8+rdi]
2014 adc r10,QWORD[16+rdi]
2015 adc r11,QWORD[24+rdi]
2016 adc r12,QWORD[32+rdi]
2017 adc r13,QWORD[40+rdi]
2018 adc r14,QWORD[48+rdi]
2019 adc r15,QWORD[56+rdi]
2020 adc rax,0
2021 mov rcx,QWORD[((-8))+rbp]
2022 xor rsi,rsi
2023
2024DB 102,72,15,126,213
2025
2026 mov QWORD[rdi],r8
2027 mov QWORD[8+rdi],r9
2028DB 102,73,15,126,217
2029 mov QWORD[16+rdi],r10
2030 mov QWORD[24+rdi],r11
2031 mov QWORD[32+rdi],r12
2032 mov QWORD[40+rdi],r13
2033 mov QWORD[48+rdi],r14
2034 mov QWORD[56+rdi],r15
2035 lea rdi,[64+rdi]
2036
2037 cmp rdi,rdx
2038 jb NEAR $L$8x_reduction_loop
2039 DB 0F3h,0C3h ;repret
2040
2041
2042
2043ALIGN 32
2044__bn_post4x_internal:
2045
2046 mov r12,QWORD[rbp]
2047 lea rbx,[r9*1+rdi]
2048 mov rcx,r9
2049DB 102,72,15,126,207
2050 neg rax
2051DB 102,72,15,126,206
2052 sar rcx,3+2
2053 dec r12
2054 xor r10,r10
2055 mov r13,QWORD[8+rbp]
2056 mov r14,QWORD[16+rbp]
2057 mov r15,QWORD[24+rbp]
2058 jmp NEAR $L$sqr4x_sub_entry
2059
2060ALIGN 16
2061$L$sqr4x_sub:
2062 mov r12,QWORD[rbp]
2063 mov r13,QWORD[8+rbp]
2064 mov r14,QWORD[16+rbp]
2065 mov r15,QWORD[24+rbp]
2066$L$sqr4x_sub_entry:
2067 lea rbp,[32+rbp]
2068 not r12
2069 not r13
2070 not r14
2071 not r15
2072 and r12,rax
2073 and r13,rax
2074 and r14,rax
2075 and r15,rax
2076
2077 neg r10
2078 adc r12,QWORD[rbx]
2079 adc r13,QWORD[8+rbx]
2080 adc r14,QWORD[16+rbx]
2081 adc r15,QWORD[24+rbx]
2082 mov QWORD[rdi],r12
2083 lea rbx,[32+rbx]
2084 mov QWORD[8+rdi],r13
2085 sbb r10,r10
2086 mov QWORD[16+rdi],r14
2087 mov QWORD[24+rdi],r15
2088 lea rdi,[32+rdi]
2089
2090 inc rcx
2091 jnz NEAR $L$sqr4x_sub
2092
2093 mov r10,r9
2094 neg r9
2095 DB 0F3h,0C3h ;repret
2096
2097
2098global bn_from_montgomery
2099
2100ALIGN 32
2101bn_from_montgomery:
2102
2103 test DWORD[48+rsp],7
2104 jz NEAR bn_from_mont8x
2105 xor eax,eax
2106 DB 0F3h,0C3h ;repret
2107
2108
2109
2110
2111ALIGN 32
2112bn_from_mont8x:
2113 mov QWORD[8+rsp],rdi ;WIN64 prologue
2114 mov QWORD[16+rsp],rsi
2115 mov rax,rsp
2116$L$SEH_begin_bn_from_mont8x:
2117 mov rdi,rcx
2118 mov rsi,rdx
2119 mov rdx,r8
2120 mov rcx,r9
2121 mov r8,QWORD[40+rsp]
2122 mov r9,QWORD[48+rsp]
2123
2124
2125
2126DB 0x67
2127 mov rax,rsp
2128
2129 push rbx
2130
2131 push rbp
2132
2133 push r12
2134
2135 push r13
2136
2137 push r14
2138
2139 push r15
2140
2141$L$from_prologue:
2142
2143 shl r9d,3
2144 lea r10,[r9*2+r9]
2145 neg r9
2146 mov r8,QWORD[r8]
2147
2148
2149
2150
2151
2152
2153
2154
2155 lea r11,[((-320))+r9*2+rsp]
2156 mov rbp,rsp
2157 sub r11,rdi
2158 and r11,4095
2159 cmp r10,r11
2160 jb NEAR $L$from_sp_alt
2161 sub rbp,r11
2162 lea rbp,[((-320))+r9*2+rbp]
2163 jmp NEAR $L$from_sp_done
2164
2165ALIGN 32
2166$L$from_sp_alt:
2167 lea r10,[((4096-320))+r9*2]
2168 lea rbp,[((-320))+r9*2+rbp]
2169 sub r11,r10
2170 mov r10,0
2171 cmovc r11,r10
2172 sub rbp,r11
2173$L$from_sp_done:
2174 and rbp,-64
2175 mov r11,rsp
2176 sub r11,rbp
2177 and r11,-4096
2178 lea rsp,[rbp*1+r11]
2179 mov r10,QWORD[rsp]
2180 cmp rsp,rbp
2181 ja NEAR $L$from_page_walk
2182 jmp NEAR $L$from_page_walk_done
2183
2184$L$from_page_walk:
2185 lea rsp,[((-4096))+rsp]
2186 mov r10,QWORD[rsp]
2187 cmp rsp,rbp
2188 ja NEAR $L$from_page_walk
2189$L$from_page_walk_done:
2190
2191 mov r10,r9
2192 neg r9
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203 mov QWORD[32+rsp],r8
2204 mov QWORD[40+rsp],rax
2205
2206$L$from_body:
2207 mov r11,r9
2208 lea rax,[48+rsp]
2209 pxor xmm0,xmm0
2210 jmp NEAR $L$mul_by_1
2211
2212ALIGN 32
2213$L$mul_by_1:
2214 movdqu xmm1,XMMWORD[rsi]
2215 movdqu xmm2,XMMWORD[16+rsi]
2216 movdqu xmm3,XMMWORD[32+rsi]
2217 movdqa XMMWORD[r9*1+rax],xmm0
2218 movdqu xmm4,XMMWORD[48+rsi]
2219 movdqa XMMWORD[16+r9*1+rax],xmm0
2220DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2221 movdqa XMMWORD[rax],xmm1
2222 movdqa XMMWORD[32+r9*1+rax],xmm0
2223 movdqa XMMWORD[16+rax],xmm2
2224 movdqa XMMWORD[48+r9*1+rax],xmm0
2225 movdqa XMMWORD[32+rax],xmm3
2226 movdqa XMMWORD[48+rax],xmm4
2227 lea rax,[64+rax]
2228 sub r11,64
2229 jnz NEAR $L$mul_by_1
2230
2231DB 102,72,15,110,207
2232DB 102,72,15,110,209
2233DB 0x67
2234 mov rbp,rcx
2235DB 102,73,15,110,218
2236 mov r11d,DWORD[((OPENSSL_ia32cap_P+8))]
2237 and r11d,0x80108
2238 cmp r11d,0x80108
2239 jne NEAR $L$from_mont_nox
2240
2241 lea rdi,[r9*1+rax]
2242 call __bn_sqrx8x_reduction
2243 call __bn_postx4x_internal
2244
2245 pxor xmm0,xmm0
2246 lea rax,[48+rsp]
2247 jmp NEAR $L$from_mont_zero
2248
2249ALIGN 32
2250$L$from_mont_nox:
2251 call __bn_sqr8x_reduction
2252 call __bn_post4x_internal
2253
2254 pxor xmm0,xmm0
2255 lea rax,[48+rsp]
2256 jmp NEAR $L$from_mont_zero
2257
2258ALIGN 32
2259$L$from_mont_zero:
2260 mov rsi,QWORD[40+rsp]
2261
2262 movdqa XMMWORD[rax],xmm0
2263 movdqa XMMWORD[16+rax],xmm0
2264 movdqa XMMWORD[32+rax],xmm0
2265 movdqa XMMWORD[48+rax],xmm0
2266 lea rax,[64+rax]
2267 sub r9,32
2268 jnz NEAR $L$from_mont_zero
2269
2270 mov rax,1
2271 mov r15,QWORD[((-48))+rsi]
2272
2273 mov r14,QWORD[((-40))+rsi]
2274
2275 mov r13,QWORD[((-32))+rsi]
2276
2277 mov r12,QWORD[((-24))+rsi]
2278
2279 mov rbp,QWORD[((-16))+rsi]
2280
2281 mov rbx,QWORD[((-8))+rsi]
2282
2283 lea rsp,[rsi]
2284
2285$L$from_epilogue:
2286 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2287 mov rsi,QWORD[16+rsp]
2288 DB 0F3h,0C3h ;repret
2289
2290$L$SEH_end_bn_from_mont8x:
2291
2292ALIGN 32
2293bn_mulx4x_mont_gather5:
2294 mov QWORD[8+rsp],rdi ;WIN64 prologue
2295 mov QWORD[16+rsp],rsi
2296 mov rax,rsp
2297$L$SEH_begin_bn_mulx4x_mont_gather5:
2298 mov rdi,rcx
2299 mov rsi,rdx
2300 mov rdx,r8
2301 mov rcx,r9
2302 mov r8,QWORD[40+rsp]
2303 mov r9,QWORD[48+rsp]
2304
2305
2306
2307 mov rax,rsp
2308
2309$L$mulx4x_enter:
2310 push rbx
2311
2312 push rbp
2313
2314 push r12
2315
2316 push r13
2317
2318 push r14
2319
2320 push r15
2321
2322$L$mulx4x_prologue:
2323
2324 shl r9d,3
2325 lea r10,[r9*2+r9]
2326 neg r9
2327 mov r8,QWORD[r8]
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338 lea r11,[((-320))+r9*2+rsp]
2339 mov rbp,rsp
2340 sub r11,rdi
2341 and r11,4095
2342 cmp r10,r11
2343 jb NEAR $L$mulx4xsp_alt
2344 sub rbp,r11
2345 lea rbp,[((-320))+r9*2+rbp]
2346 jmp NEAR $L$mulx4xsp_done
2347
2348$L$mulx4xsp_alt:
2349 lea r10,[((4096-320))+r9*2]
2350 lea rbp,[((-320))+r9*2+rbp]
2351 sub r11,r10
2352 mov r10,0
2353 cmovc r11,r10
2354 sub rbp,r11
2355$L$mulx4xsp_done:
2356 and rbp,-64
2357 mov r11,rsp
2358 sub r11,rbp
2359 and r11,-4096
2360 lea rsp,[rbp*1+r11]
2361 mov r10,QWORD[rsp]
2362 cmp rsp,rbp
2363 ja NEAR $L$mulx4x_page_walk
2364 jmp NEAR $L$mulx4x_page_walk_done
2365
2366$L$mulx4x_page_walk:
2367 lea rsp,[((-4096))+rsp]
2368 mov r10,QWORD[rsp]
2369 cmp rsp,rbp
2370 ja NEAR $L$mulx4x_page_walk
2371$L$mulx4x_page_walk_done:
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385 mov QWORD[32+rsp],r8
2386 mov QWORD[40+rsp],rax
2387
2388$L$mulx4x_body:
2389 call mulx4x_internal
2390
2391 mov rsi,QWORD[40+rsp]
2392
2393 mov rax,1
2394
2395 mov r15,QWORD[((-48))+rsi]
2396
2397 mov r14,QWORD[((-40))+rsi]
2398
2399 mov r13,QWORD[((-32))+rsi]
2400
2401 mov r12,QWORD[((-24))+rsi]
2402
2403 mov rbp,QWORD[((-16))+rsi]
2404
2405 mov rbx,QWORD[((-8))+rsi]
2406
2407 lea rsp,[rsi]
2408
2409$L$mulx4x_epilogue:
2410 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2411 mov rsi,QWORD[16+rsp]
2412 DB 0F3h,0C3h ;repret
2413
2414$L$SEH_end_bn_mulx4x_mont_gather5:
2415
2416
2417ALIGN 32
2418mulx4x_internal:
2419
2420 mov QWORD[8+rsp],r9
2421 mov r10,r9
2422 neg r9
2423 shl r9,5
2424 neg r10
2425 lea r13,[128+r9*1+rdx]
2426 shr r9,5+5
2427 movd xmm5,DWORD[56+rax]
2428 sub r9,1
2429 lea rax,[$L$inc]
2430 mov QWORD[((16+8))+rsp],r13
2431 mov QWORD[((24+8))+rsp],r9
2432 mov QWORD[((56+8))+rsp],rdi
2433 movdqa xmm0,XMMWORD[rax]
2434 movdqa xmm1,XMMWORD[16+rax]
2435 lea r10,[((88-112))+r10*1+rsp]
2436 lea rdi,[128+rdx]
2437
2438 pshufd xmm5,xmm5,0
2439 movdqa xmm4,xmm1
2440DB 0x67
2441 movdqa xmm2,xmm1
2442DB 0x67
2443 paddd xmm1,xmm0
2444 pcmpeqd xmm0,xmm5
2445 movdqa xmm3,xmm4
2446 paddd xmm2,xmm1
2447 pcmpeqd xmm1,xmm5
2448 movdqa XMMWORD[112+r10],xmm0
2449 movdqa xmm0,xmm4
2450
2451 paddd xmm3,xmm2
2452 pcmpeqd xmm2,xmm5
2453 movdqa XMMWORD[128+r10],xmm1
2454 movdqa xmm1,xmm4
2455
2456 paddd xmm0,xmm3
2457 pcmpeqd xmm3,xmm5
2458 movdqa XMMWORD[144+r10],xmm2
2459 movdqa xmm2,xmm4
2460
2461 paddd xmm1,xmm0
2462 pcmpeqd xmm0,xmm5
2463 movdqa XMMWORD[160+r10],xmm3
2464 movdqa xmm3,xmm4
2465 paddd xmm2,xmm1
2466 pcmpeqd xmm1,xmm5
2467 movdqa XMMWORD[176+r10],xmm0
2468 movdqa xmm0,xmm4
2469
2470 paddd xmm3,xmm2
2471 pcmpeqd xmm2,xmm5
2472 movdqa XMMWORD[192+r10],xmm1
2473 movdqa xmm1,xmm4
2474
2475 paddd xmm0,xmm3
2476 pcmpeqd xmm3,xmm5
2477 movdqa XMMWORD[208+r10],xmm2
2478 movdqa xmm2,xmm4
2479
2480 paddd xmm1,xmm0
2481 pcmpeqd xmm0,xmm5
2482 movdqa XMMWORD[224+r10],xmm3
2483 movdqa xmm3,xmm4
2484 paddd xmm2,xmm1
2485 pcmpeqd xmm1,xmm5
2486 movdqa XMMWORD[240+r10],xmm0
2487 movdqa xmm0,xmm4
2488
2489 paddd xmm3,xmm2
2490 pcmpeqd xmm2,xmm5
2491 movdqa XMMWORD[256+r10],xmm1
2492 movdqa xmm1,xmm4
2493
2494 paddd xmm0,xmm3
2495 pcmpeqd xmm3,xmm5
2496 movdqa XMMWORD[272+r10],xmm2
2497 movdqa xmm2,xmm4
2498
2499 paddd xmm1,xmm0
2500 pcmpeqd xmm0,xmm5
2501 movdqa XMMWORD[288+r10],xmm3
2502 movdqa xmm3,xmm4
2503DB 0x67
2504 paddd xmm2,xmm1
2505 pcmpeqd xmm1,xmm5
2506 movdqa XMMWORD[304+r10],xmm0
2507
2508 paddd xmm3,xmm2
2509 pcmpeqd xmm2,xmm5
2510 movdqa XMMWORD[320+r10],xmm1
2511
2512 pcmpeqd xmm3,xmm5
2513 movdqa XMMWORD[336+r10],xmm2
2514
2515 pand xmm0,XMMWORD[64+rdi]
2516 pand xmm1,XMMWORD[80+rdi]
2517 pand xmm2,XMMWORD[96+rdi]
2518 movdqa XMMWORD[352+r10],xmm3
2519 pand xmm3,XMMWORD[112+rdi]
2520 por xmm0,xmm2
2521 por xmm1,xmm3
2522 movdqa xmm4,XMMWORD[((-128))+rdi]
2523 movdqa xmm5,XMMWORD[((-112))+rdi]
2524 movdqa xmm2,XMMWORD[((-96))+rdi]
2525 pand xmm4,XMMWORD[112+r10]
2526 movdqa xmm3,XMMWORD[((-80))+rdi]
2527 pand xmm5,XMMWORD[128+r10]
2528 por xmm0,xmm4
2529 pand xmm2,XMMWORD[144+r10]
2530 por xmm1,xmm5
2531 pand xmm3,XMMWORD[160+r10]
2532 por xmm0,xmm2
2533 por xmm1,xmm3
2534 movdqa xmm4,XMMWORD[((-64))+rdi]
2535 movdqa xmm5,XMMWORD[((-48))+rdi]
2536 movdqa xmm2,XMMWORD[((-32))+rdi]
2537 pand xmm4,XMMWORD[176+r10]
2538 movdqa xmm3,XMMWORD[((-16))+rdi]
2539 pand xmm5,XMMWORD[192+r10]
2540 por xmm0,xmm4
2541 pand xmm2,XMMWORD[208+r10]
2542 por xmm1,xmm5
2543 pand xmm3,XMMWORD[224+r10]
2544 por xmm0,xmm2
2545 por xmm1,xmm3
2546 movdqa xmm4,XMMWORD[rdi]
2547 movdqa xmm5,XMMWORD[16+rdi]
2548 movdqa xmm2,XMMWORD[32+rdi]
2549 pand xmm4,XMMWORD[240+r10]
2550 movdqa xmm3,XMMWORD[48+rdi]
2551 pand xmm5,XMMWORD[256+r10]
2552 por xmm0,xmm4
2553 pand xmm2,XMMWORD[272+r10]
2554 por xmm1,xmm5
2555 pand xmm3,XMMWORD[288+r10]
2556 por xmm0,xmm2
2557 por xmm1,xmm3
2558 pxor xmm0,xmm1
2559 pshufd xmm1,xmm0,0x4e
2560 por xmm0,xmm1
2561 lea rdi,[256+rdi]
2562DB 102,72,15,126,194
2563 lea rbx,[((64+32+8))+rsp]
2564
2565 mov r9,rdx
2566 mulx rax,r8,QWORD[rsi]
2567 mulx r12,r11,QWORD[8+rsi]
2568 add r11,rax
2569 mulx r13,rax,QWORD[16+rsi]
2570 adc r12,rax
2571 adc r13,0
2572 mulx r14,rax,QWORD[24+rsi]
2573
2574 mov r15,r8
2575 imul r8,QWORD[((32+8))+rsp]
2576 xor rbp,rbp
2577 mov rdx,r8
2578
2579 mov QWORD[((8+8))+rsp],rdi
2580
2581 lea rsi,[32+rsi]
2582 adcx r13,rax
2583 adcx r14,rbp
2584
2585 mulx r10,rax,QWORD[rcx]
2586 adcx r15,rax
2587 adox r10,r11
2588 mulx r11,rax,QWORD[8+rcx]
2589 adcx r10,rax
2590 adox r11,r12
2591 mulx r12,rax,QWORD[16+rcx]
2592 mov rdi,QWORD[((24+8))+rsp]
2593 mov QWORD[((-32))+rbx],r10
2594 adcx r11,rax
2595 adox r12,r13
2596 mulx r15,rax,QWORD[24+rcx]
2597 mov rdx,r9
2598 mov QWORD[((-24))+rbx],r11
2599 adcx r12,rax
2600 adox r15,rbp
2601 lea rcx,[32+rcx]
2602 mov QWORD[((-16))+rbx],r12
2603 jmp NEAR $L$mulx4x_1st
2604
2605ALIGN 32
2606$L$mulx4x_1st:
2607 adcx r15,rbp
2608 mulx rax,r10,QWORD[rsi]
2609 adcx r10,r14
2610 mulx r14,r11,QWORD[8+rsi]
2611 adcx r11,rax
2612 mulx rax,r12,QWORD[16+rsi]
2613 adcx r12,r14
2614 mulx r14,r13,QWORD[24+rsi]
2615DB 0x67,0x67
2616 mov rdx,r8
2617 adcx r13,rax
2618 adcx r14,rbp
2619 lea rsi,[32+rsi]
2620 lea rbx,[32+rbx]
2621
2622 adox r10,r15
2623 mulx r15,rax,QWORD[rcx]
2624 adcx r10,rax
2625 adox r11,r15
2626 mulx r15,rax,QWORD[8+rcx]
2627 adcx r11,rax
2628 adox r12,r15
2629 mulx r15,rax,QWORD[16+rcx]
2630 mov QWORD[((-40))+rbx],r10
2631 adcx r12,rax
2632 mov QWORD[((-32))+rbx],r11
2633 adox r13,r15
2634 mulx r15,rax,QWORD[24+rcx]
2635 mov rdx,r9
2636 mov QWORD[((-24))+rbx],r12
2637 adcx r13,rax
2638 adox r15,rbp
2639 lea rcx,[32+rcx]
2640 mov QWORD[((-16))+rbx],r13
2641
2642 dec rdi
2643 jnz NEAR $L$mulx4x_1st
2644
2645 mov rax,QWORD[8+rsp]
2646 adc r15,rbp
2647 lea rsi,[rax*1+rsi]
2648 add r14,r15
2649 mov rdi,QWORD[((8+8))+rsp]
2650 adc rbp,rbp
2651 mov QWORD[((-8))+rbx],r14
2652 jmp NEAR $L$mulx4x_outer
2653
2654ALIGN 32
2655$L$mulx4x_outer:
2656 lea r10,[((16-256))+rbx]
2657 pxor xmm4,xmm4
2658DB 0x67,0x67
2659 pxor xmm5,xmm5
2660 movdqa xmm0,XMMWORD[((-128))+rdi]
2661 movdqa xmm1,XMMWORD[((-112))+rdi]
2662 movdqa xmm2,XMMWORD[((-96))+rdi]
2663 pand xmm0,XMMWORD[256+r10]
2664 movdqa xmm3,XMMWORD[((-80))+rdi]
2665 pand xmm1,XMMWORD[272+r10]
2666 por xmm4,xmm0
2667 pand xmm2,XMMWORD[288+r10]
2668 por xmm5,xmm1
2669 pand xmm3,XMMWORD[304+r10]
2670 por xmm4,xmm2
2671 por xmm5,xmm3
2672 movdqa xmm0,XMMWORD[((-64))+rdi]
2673 movdqa xmm1,XMMWORD[((-48))+rdi]
2674 movdqa xmm2,XMMWORD[((-32))+rdi]
2675 pand xmm0,XMMWORD[320+r10]
2676 movdqa xmm3,XMMWORD[((-16))+rdi]
2677 pand xmm1,XMMWORD[336+r10]
2678 por xmm4,xmm0
2679 pand xmm2,XMMWORD[352+r10]
2680 por xmm5,xmm1
2681 pand xmm3,XMMWORD[368+r10]
2682 por xmm4,xmm2
2683 por xmm5,xmm3
2684 movdqa xmm0,XMMWORD[rdi]
2685 movdqa xmm1,XMMWORD[16+rdi]
2686 movdqa xmm2,XMMWORD[32+rdi]
2687 pand xmm0,XMMWORD[384+r10]
2688 movdqa xmm3,XMMWORD[48+rdi]
2689 pand xmm1,XMMWORD[400+r10]
2690 por xmm4,xmm0
2691 pand xmm2,XMMWORD[416+r10]
2692 por xmm5,xmm1
2693 pand xmm3,XMMWORD[432+r10]
2694 por xmm4,xmm2
2695 por xmm5,xmm3
2696 movdqa xmm0,XMMWORD[64+rdi]
2697 movdqa xmm1,XMMWORD[80+rdi]
2698 movdqa xmm2,XMMWORD[96+rdi]
2699 pand xmm0,XMMWORD[448+r10]
2700 movdqa xmm3,XMMWORD[112+rdi]
2701 pand xmm1,XMMWORD[464+r10]
2702 por xmm4,xmm0
2703 pand xmm2,XMMWORD[480+r10]
2704 por xmm5,xmm1
2705 pand xmm3,XMMWORD[496+r10]
2706 por xmm4,xmm2
2707 por xmm5,xmm3
2708 por xmm4,xmm5
2709 pshufd xmm0,xmm4,0x4e
2710 por xmm0,xmm4
2711 lea rdi,[256+rdi]
2712DB 102,72,15,126,194
2713
2714 mov QWORD[rbx],rbp
2715 lea rbx,[32+rax*1+rbx]
2716 mulx r11,r8,QWORD[rsi]
2717 xor rbp,rbp
2718 mov r9,rdx
2719 mulx r12,r14,QWORD[8+rsi]
2720 adox r8,QWORD[((-32))+rbx]
2721 adcx r11,r14
2722 mulx r13,r15,QWORD[16+rsi]
2723 adox r11,QWORD[((-24))+rbx]
2724 adcx r12,r15
2725 mulx r14,rdx,QWORD[24+rsi]
2726 adox r12,QWORD[((-16))+rbx]
2727 adcx r13,rdx
2728 lea rcx,[rax*1+rcx]
2729 lea rsi,[32+rsi]
2730 adox r13,QWORD[((-8))+rbx]
2731 adcx r14,rbp
2732 adox r14,rbp
2733
2734 mov r15,r8
2735 imul r8,QWORD[((32+8))+rsp]
2736
2737 mov rdx,r8
2738 xor rbp,rbp
2739 mov QWORD[((8+8))+rsp],rdi
2740
2741 mulx r10,rax,QWORD[rcx]
2742 adcx r15,rax
2743 adox r10,r11
2744 mulx r11,rax,QWORD[8+rcx]
2745 adcx r10,rax
2746 adox r11,r12
2747 mulx r12,rax,QWORD[16+rcx]
2748 adcx r11,rax
2749 adox r12,r13
2750 mulx r15,rax,QWORD[24+rcx]
2751 mov rdx,r9
2752 mov rdi,QWORD[((24+8))+rsp]
2753 mov QWORD[((-32))+rbx],r10
2754 adcx r12,rax
2755 mov QWORD[((-24))+rbx],r11
2756 adox r15,rbp
2757 mov QWORD[((-16))+rbx],r12
2758 lea rcx,[32+rcx]
2759 jmp NEAR $L$mulx4x_inner
2760
2761ALIGN 32
2762$L$mulx4x_inner:
2763 mulx rax,r10,QWORD[rsi]
2764 adcx r15,rbp
2765 adox r10,r14
2766 mulx r14,r11,QWORD[8+rsi]
2767 adcx r10,QWORD[rbx]
2768 adox r11,rax
2769 mulx rax,r12,QWORD[16+rsi]
2770 adcx r11,QWORD[8+rbx]
2771 adox r12,r14
2772 mulx r14,r13,QWORD[24+rsi]
2773 mov rdx,r8
2774 adcx r12,QWORD[16+rbx]
2775 adox r13,rax
2776 adcx r13,QWORD[24+rbx]
2777 adox r14,rbp
2778 lea rsi,[32+rsi]
2779 lea rbx,[32+rbx]
2780 adcx r14,rbp
2781
2782 adox r10,r15
2783 mulx r15,rax,QWORD[rcx]
2784 adcx r10,rax
2785 adox r11,r15
2786 mulx r15,rax,QWORD[8+rcx]
2787 adcx r11,rax
2788 adox r12,r15
2789 mulx r15,rax,QWORD[16+rcx]
2790 mov QWORD[((-40))+rbx],r10
2791 adcx r12,rax
2792 adox r13,r15
2793 mov QWORD[((-32))+rbx],r11
2794 mulx r15,rax,QWORD[24+rcx]
2795 mov rdx,r9
2796 lea rcx,[32+rcx]
2797 mov QWORD[((-24))+rbx],r12
2798 adcx r13,rax
2799 adox r15,rbp
2800 mov QWORD[((-16))+rbx],r13
2801
2802 dec rdi
2803 jnz NEAR $L$mulx4x_inner
2804
2805 mov rax,QWORD[((0+8))+rsp]
2806 adc r15,rbp
2807 sub rdi,QWORD[rbx]
2808 mov rdi,QWORD[((8+8))+rsp]
2809 mov r10,QWORD[((16+8))+rsp]
2810 adc r14,r15
2811 lea rsi,[rax*1+rsi]
2812 adc rbp,rbp
2813 mov QWORD[((-8))+rbx],r14
2814
2815 cmp rdi,r10
2816 jb NEAR $L$mulx4x_outer
2817
2818 mov r10,QWORD[((-8))+rcx]
2819 mov r8,rbp
2820 mov r12,QWORD[rax*1+rcx]
2821 lea rbp,[rax*1+rcx]
2822 mov rcx,rax
2823 lea rdi,[rax*1+rbx]
2824 xor eax,eax
2825 xor r15,r15
2826 sub r10,r14
2827 adc r15,r15
2828 or r8,r15
2829 sar rcx,3+2
2830 sub rax,r8
2831 mov rdx,QWORD[((56+8))+rsp]
2832 dec r12
2833 mov r13,QWORD[8+rbp]
2834 xor r8,r8
2835 mov r14,QWORD[16+rbp]
2836 mov r15,QWORD[24+rbp]
2837 jmp NEAR $L$sqrx4x_sub_entry
2838
2839
2840
2841ALIGN 32
2842bn_powerx5:
2843 mov QWORD[8+rsp],rdi ;WIN64 prologue
2844 mov QWORD[16+rsp],rsi
2845 mov rax,rsp
2846$L$SEH_begin_bn_powerx5:
2847 mov rdi,rcx
2848 mov rsi,rdx
2849 mov rdx,r8
2850 mov rcx,r9
2851 mov r8,QWORD[40+rsp]
2852 mov r9,QWORD[48+rsp]
2853
2854
2855
2856 mov rax,rsp
2857
2858$L$powerx5_enter:
2859 push rbx
2860
2861 push rbp
2862
2863 push r12
2864
2865 push r13
2866
2867 push r14
2868
2869 push r15
2870
2871$L$powerx5_prologue:
2872
2873 shl r9d,3
2874 lea r10,[r9*2+r9]
2875 neg r9
2876 mov r8,QWORD[r8]
2877
2878
2879
2880
2881
2882
2883
2884
2885 lea r11,[((-320))+r9*2+rsp]
2886 mov rbp,rsp
2887 sub r11,rdi
2888 and r11,4095
2889 cmp r10,r11
2890 jb NEAR $L$pwrx_sp_alt
2891 sub rbp,r11
2892 lea rbp,[((-320))+r9*2+rbp]
2893 jmp NEAR $L$pwrx_sp_done
2894
2895ALIGN 32
2896$L$pwrx_sp_alt:
2897 lea r10,[((4096-320))+r9*2]
2898 lea rbp,[((-320))+r9*2+rbp]
2899 sub r11,r10
2900 mov r10,0
2901 cmovc r11,r10
2902 sub rbp,r11
2903$L$pwrx_sp_done:
2904 and rbp,-64
2905 mov r11,rsp
2906 sub r11,rbp
2907 and r11,-4096
2908 lea rsp,[rbp*1+r11]
2909 mov r10,QWORD[rsp]
2910 cmp rsp,rbp
2911 ja NEAR $L$pwrx_page_walk
2912 jmp NEAR $L$pwrx_page_walk_done
2913
2914$L$pwrx_page_walk:
2915 lea rsp,[((-4096))+rsp]
2916 mov r10,QWORD[rsp]
2917 cmp rsp,rbp
2918 ja NEAR $L$pwrx_page_walk
2919$L$pwrx_page_walk_done:
2920
2921 mov r10,r9
2922 neg r9
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935 pxor xmm0,xmm0
2936DB 102,72,15,110,207
2937DB 102,72,15,110,209
2938DB 102,73,15,110,218
2939DB 102,72,15,110,226
2940 mov QWORD[32+rsp],r8
2941 mov QWORD[40+rsp],rax
2942
2943$L$powerx5_body:
2944
2945 call __bn_sqrx8x_internal
2946 call __bn_postx4x_internal
2947 call __bn_sqrx8x_internal
2948 call __bn_postx4x_internal
2949 call __bn_sqrx8x_internal
2950 call __bn_postx4x_internal
2951 call __bn_sqrx8x_internal
2952 call __bn_postx4x_internal
2953 call __bn_sqrx8x_internal
2954 call __bn_postx4x_internal
2955
2956 mov r9,r10
2957 mov rdi,rsi
2958DB 102,72,15,126,209
2959DB 102,72,15,126,226
2960 mov rax,QWORD[40+rsp]
2961
2962 call mulx4x_internal
2963
2964 mov rsi,QWORD[40+rsp]
2965
2966 mov rax,1
2967
2968 mov r15,QWORD[((-48))+rsi]
2969
2970 mov r14,QWORD[((-40))+rsi]
2971
2972 mov r13,QWORD[((-32))+rsi]
2973
2974 mov r12,QWORD[((-24))+rsi]
2975
2976 mov rbp,QWORD[((-16))+rsi]
2977
2978 mov rbx,QWORD[((-8))+rsi]
2979
2980 lea rsp,[rsi]
2981
2982$L$powerx5_epilogue:
2983 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
2984 mov rsi,QWORD[16+rsp]
2985 DB 0F3h,0C3h ;repret
2986
2987$L$SEH_end_bn_powerx5:
2988
2989global bn_sqrx8x_internal
2990
2991
2992ALIGN 32
2993bn_sqrx8x_internal:
2994__bn_sqrx8x_internal:
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036 lea rdi,[((48+8))+rsp]
3037 lea rbp,[r9*1+rsi]
3038 mov QWORD[((0+8))+rsp],r9
3039 mov QWORD[((8+8))+rsp],rbp
3040 jmp NEAR $L$sqr8x_zero_start
3041
3042ALIGN 32
3043DB 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
3044$L$sqrx8x_zero:
3045DB 0x3e
3046 movdqa XMMWORD[rdi],xmm0
3047 movdqa XMMWORD[16+rdi],xmm0
3048 movdqa XMMWORD[32+rdi],xmm0
3049 movdqa XMMWORD[48+rdi],xmm0
3050$L$sqr8x_zero_start:
3051 movdqa XMMWORD[64+rdi],xmm0
3052 movdqa XMMWORD[80+rdi],xmm0
3053 movdqa XMMWORD[96+rdi],xmm0
3054 movdqa XMMWORD[112+rdi],xmm0
3055 lea rdi,[128+rdi]
3056 sub r9,64
3057 jnz NEAR $L$sqrx8x_zero
3058
3059 mov rdx,QWORD[rsi]
3060
3061 xor r10,r10
3062 xor r11,r11
3063 xor r12,r12
3064 xor r13,r13
3065 xor r14,r14
3066 xor r15,r15
3067 lea rdi,[((48+8))+rsp]
3068 xor rbp,rbp
3069 jmp NEAR $L$sqrx8x_outer_loop
3070
3071ALIGN 32
3072$L$sqrx8x_outer_loop:
3073 mulx rax,r8,QWORD[8+rsi]
3074 adcx r8,r9
3075 adox r10,rax
3076 mulx rax,r9,QWORD[16+rsi]
3077 adcx r9,r10
3078 adox r11,rax
3079DB 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
3080 adcx r10,r11
3081 adox r12,rax
3082DB 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
3083 adcx r11,r12
3084 adox r13,rax
3085 mulx rax,r12,QWORD[40+rsi]
3086 adcx r12,r13
3087 adox r14,rax
3088 mulx rax,r13,QWORD[48+rsi]
3089 adcx r13,r14
3090 adox rax,r15
3091 mulx r15,r14,QWORD[56+rsi]
3092 mov rdx,QWORD[8+rsi]
3093 adcx r14,rax
3094 adox r15,rbp
3095 adc r15,QWORD[64+rdi]
3096 mov QWORD[8+rdi],r8
3097 mov QWORD[16+rdi],r9
3098 sbb rcx,rcx
3099 xor rbp,rbp
3100
3101
3102 mulx rbx,r8,QWORD[16+rsi]
3103 mulx rax,r9,QWORD[24+rsi]
3104 adcx r8,r10
3105 adox r9,rbx
3106 mulx rbx,r10,QWORD[32+rsi]
3107 adcx r9,r11
3108 adox r10,rax
3109DB 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
3110 adcx r10,r12
3111 adox r11,rbx
3112DB 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
3113 adcx r11,r13
3114 adox r12,r14
3115DB 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
3116 mov rdx,QWORD[16+rsi]
3117 adcx r12,rax
3118 adox r13,rbx
3119 adcx r13,r15
3120 adox r14,rbp
3121 adcx r14,rbp
3122
3123 mov QWORD[24+rdi],r8
3124 mov QWORD[32+rdi],r9
3125
3126 mulx rbx,r8,QWORD[24+rsi]
3127 mulx rax,r9,QWORD[32+rsi]
3128 adcx r8,r10
3129 adox r9,rbx
3130 mulx rbx,r10,QWORD[40+rsi]
3131 adcx r9,r11
3132 adox r10,rax
3133DB 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
3134 adcx r10,r12
3135 adox r11,r13
3136DB 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
3137DB 0x3e
3138 mov rdx,QWORD[24+rsi]
3139 adcx r11,rbx
3140 adox r12,rax
3141 adcx r12,r14
3142 mov QWORD[40+rdi],r8
3143 mov QWORD[48+rdi],r9
3144 mulx rax,r8,QWORD[32+rsi]
3145 adox r13,rbp
3146 adcx r13,rbp
3147
3148 mulx rbx,r9,QWORD[40+rsi]
3149 adcx r8,r10
3150 adox r9,rax
3151 mulx rax,r10,QWORD[48+rsi]
3152 adcx r9,r11
3153 adox r10,r12
3154 mulx r12,r11,QWORD[56+rsi]
3155 mov rdx,QWORD[32+rsi]
3156 mov r14,QWORD[40+rsi]
3157 adcx r10,rbx
3158 adox r11,rax
3159 mov r15,QWORD[48+rsi]
3160 adcx r11,r13
3161 adox r12,rbp
3162 adcx r12,rbp
3163
3164 mov QWORD[56+rdi],r8
3165 mov QWORD[64+rdi],r9
3166
3167 mulx rax,r9,r14
3168 mov r8,QWORD[56+rsi]
3169 adcx r9,r10
3170 mulx rbx,r10,r15
3171 adox r10,rax
3172 adcx r10,r11
3173 mulx rax,r11,r8
3174 mov rdx,r14
3175 adox r11,rbx
3176 adcx r11,r12
3177
3178 adcx rax,rbp
3179
3180 mulx rbx,r14,r15
3181 mulx r13,r12,r8
3182 mov rdx,r15
3183 lea rsi,[64+rsi]
3184 adcx r11,r14
3185 adox r12,rbx
3186 adcx r12,rax
3187 adox r13,rbp
3188
3189DB 0x67,0x67
3190 mulx r14,r8,r8
3191 adcx r13,r8
3192 adcx r14,rbp
3193
3194 cmp rsi,QWORD[((8+8))+rsp]
3195 je NEAR $L$sqrx8x_outer_break
3196
3197 neg rcx
3198 mov rcx,-8
3199 mov r15,rbp
3200 mov r8,QWORD[64+rdi]
3201 adcx r9,QWORD[72+rdi]
3202 adcx r10,QWORD[80+rdi]
3203 adcx r11,QWORD[88+rdi]
3204 adc r12,QWORD[96+rdi]
3205 adc r13,QWORD[104+rdi]
3206 adc r14,QWORD[112+rdi]
3207 adc r15,QWORD[120+rdi]
3208 lea rbp,[rsi]
3209 lea rdi,[128+rdi]
3210 sbb rax,rax
3211
3212 mov rdx,QWORD[((-64))+rsi]
3213 mov QWORD[((16+8))+rsp],rax
3214 mov QWORD[((24+8))+rsp],rdi
3215
3216
3217 xor eax,eax
3218 jmp NEAR $L$sqrx8x_loop
3219
3220ALIGN 32
3221$L$sqrx8x_loop:
3222 mov rbx,r8
3223 mulx r8,rax,QWORD[rbp]
3224 adcx rbx,rax
3225 adox r8,r9
3226
3227 mulx r9,rax,QWORD[8+rbp]
3228 adcx r8,rax
3229 adox r9,r10
3230
3231 mulx r10,rax,QWORD[16+rbp]
3232 adcx r9,rax
3233 adox r10,r11
3234
3235 mulx r11,rax,QWORD[24+rbp]
3236 adcx r10,rax
3237 adox r11,r12
3238
3239DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3240 adcx r11,rax
3241 adox r12,r13
3242
3243 mulx r13,rax,QWORD[40+rbp]
3244 adcx r12,rax
3245 adox r13,r14
3246
3247 mulx r14,rax,QWORD[48+rbp]
3248 mov QWORD[rcx*8+rdi],rbx
3249 mov ebx,0
3250 adcx r13,rax
3251 adox r14,r15
3252
3253DB 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
3254 mov rdx,QWORD[8+rcx*8+rsi]
3255 adcx r14,rax
3256 adox r15,rbx
3257 adcx r15,rbx
3258
3259DB 0x67
3260 inc rcx
3261 jnz NEAR $L$sqrx8x_loop
3262
3263 lea rbp,[64+rbp]
3264 mov rcx,-8
3265 cmp rbp,QWORD[((8+8))+rsp]
3266 je NEAR $L$sqrx8x_break
3267
3268 sub rbx,QWORD[((16+8))+rsp]
3269DB 0x66
3270 mov rdx,QWORD[((-64))+rsi]
3271 adcx r8,QWORD[rdi]
3272 adcx r9,QWORD[8+rdi]
3273 adc r10,QWORD[16+rdi]
3274 adc r11,QWORD[24+rdi]
3275 adc r12,QWORD[32+rdi]
3276 adc r13,QWORD[40+rdi]
3277 adc r14,QWORD[48+rdi]
3278 adc r15,QWORD[56+rdi]
3279 lea rdi,[64+rdi]
3280DB 0x67
3281 sbb rax,rax
3282 xor ebx,ebx
3283 mov QWORD[((16+8))+rsp],rax
3284 jmp NEAR $L$sqrx8x_loop
3285
3286ALIGN 32
3287$L$sqrx8x_break:
3288 xor rbp,rbp
3289 sub rbx,QWORD[((16+8))+rsp]
3290 adcx r8,rbp
3291 mov rcx,QWORD[((24+8))+rsp]
3292 adcx r9,rbp
3293 mov rdx,QWORD[rsi]
3294 adc r10,0
3295 mov QWORD[rdi],r8
3296 adc r11,0
3297 adc r12,0
3298 adc r13,0
3299 adc r14,0
3300 adc r15,0
3301 cmp rdi,rcx
3302 je NEAR $L$sqrx8x_outer_loop
3303
3304 mov QWORD[8+rdi],r9
3305 mov r9,QWORD[8+rcx]
3306 mov QWORD[16+rdi],r10
3307 mov r10,QWORD[16+rcx]
3308 mov QWORD[24+rdi],r11
3309 mov r11,QWORD[24+rcx]
3310 mov QWORD[32+rdi],r12
3311 mov r12,QWORD[32+rcx]
3312 mov QWORD[40+rdi],r13
3313 mov r13,QWORD[40+rcx]
3314 mov QWORD[48+rdi],r14
3315 mov r14,QWORD[48+rcx]
3316 mov QWORD[56+rdi],r15
3317 mov r15,QWORD[56+rcx]
3318 mov rdi,rcx
3319 jmp NEAR $L$sqrx8x_outer_loop
3320
3321ALIGN 32
3322$L$sqrx8x_outer_break:
3323 mov QWORD[72+rdi],r9
3324DB 102,72,15,126,217
3325 mov QWORD[80+rdi],r10
3326 mov QWORD[88+rdi],r11
3327 mov QWORD[96+rdi],r12
3328 mov QWORD[104+rdi],r13
3329 mov QWORD[112+rdi],r14
3330 lea rdi,[((48+8))+rsp]
3331 mov rdx,QWORD[rcx*1+rsi]
3332
3333 mov r11,QWORD[8+rdi]
3334 xor r10,r10
3335 mov r9,QWORD[((0+8))+rsp]
3336 adox r11,r11
3337 mov r12,QWORD[16+rdi]
3338 mov r13,QWORD[24+rdi]
3339
3340
3341ALIGN 32
3342$L$sqrx4x_shift_n_add:
3343 mulx rbx,rax,rdx
3344 adox r12,r12
3345 adcx rax,r10
3346DB 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3347DB 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3348 adox r13,r13
3349 adcx rbx,r11
3350 mov r11,QWORD[40+rdi]
3351 mov QWORD[rdi],rax
3352 mov QWORD[8+rdi],rbx
3353
3354 mulx rbx,rax,rdx
3355 adox r10,r10
3356 adcx rax,r12
3357 mov rdx,QWORD[16+rcx*1+rsi]
3358 mov r12,QWORD[48+rdi]
3359 adox r11,r11
3360 adcx rbx,r13
3361 mov r13,QWORD[56+rdi]
3362 mov QWORD[16+rdi],rax
3363 mov QWORD[24+rdi],rbx
3364
3365 mulx rbx,rax,rdx
3366 adox r12,r12
3367 adcx rax,r10
3368 mov rdx,QWORD[24+rcx*1+rsi]
3369 lea rcx,[32+rcx]
3370 mov r10,QWORD[64+rdi]
3371 adox r13,r13
3372 adcx rbx,r11
3373 mov r11,QWORD[72+rdi]
3374 mov QWORD[32+rdi],rax
3375 mov QWORD[40+rdi],rbx
3376
3377 mulx rbx,rax,rdx
3378 adox r10,r10
3379 adcx rax,r12
3380 jrcxz $L$sqrx4x_shift_n_add_break
3381DB 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3382 adox r11,r11
3383 adcx rbx,r13
3384 mov r12,QWORD[80+rdi]
3385 mov r13,QWORD[88+rdi]
3386 mov QWORD[48+rdi],rax
3387 mov QWORD[56+rdi],rbx
3388 lea rdi,[64+rdi]
3389 nop
3390 jmp NEAR $L$sqrx4x_shift_n_add
3391
3392ALIGN 32
3393$L$sqrx4x_shift_n_add_break:
3394 adcx rbx,r13
3395 mov QWORD[48+rdi],rax
3396 mov QWORD[56+rdi],rbx
3397 lea rdi,[64+rdi]
3398DB 102,72,15,126,213
3399__bn_sqrx8x_reduction:
3400 xor eax,eax
3401 mov rbx,QWORD[((32+8))+rsp]
3402 mov rdx,QWORD[((48+8))+rsp]
3403 lea rcx,[((-64))+r9*1+rbp]
3404
3405 mov QWORD[((0+8))+rsp],rcx
3406 mov QWORD[((8+8))+rsp],rdi
3407
3408 lea rdi,[((48+8))+rsp]
3409 jmp NEAR $L$sqrx8x_reduction_loop
3410
3411ALIGN 32
3412$L$sqrx8x_reduction_loop:
3413 mov r9,QWORD[8+rdi]
3414 mov r10,QWORD[16+rdi]
3415 mov r11,QWORD[24+rdi]
3416 mov r12,QWORD[32+rdi]
3417 mov r8,rdx
3418 imul rdx,rbx
3419 mov r13,QWORD[40+rdi]
3420 mov r14,QWORD[48+rdi]
3421 mov r15,QWORD[56+rdi]
3422 mov QWORD[((24+8))+rsp],rax
3423
3424 lea rdi,[64+rdi]
3425 xor rsi,rsi
3426 mov rcx,-8
3427 jmp NEAR $L$sqrx8x_reduce
3428
3429ALIGN 32
3430$L$sqrx8x_reduce:
3431 mov rbx,r8
3432 mulx r8,rax,QWORD[rbp]
3433 adcx rax,rbx
3434 adox r8,r9
3435
3436 mulx r9,rbx,QWORD[8+rbp]
3437 adcx r8,rbx
3438 adox r9,r10
3439
3440 mulx r10,rbx,QWORD[16+rbp]
3441 adcx r9,rbx
3442 adox r10,r11
3443
3444 mulx r11,rbx,QWORD[24+rbp]
3445 adcx r10,rbx
3446 adox r11,r12
3447
3448DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3449 mov rax,rdx
3450 mov rdx,r8
3451 adcx r11,rbx
3452 adox r12,r13
3453
3454 mulx rdx,rbx,QWORD[((32+8))+rsp]
3455 mov rdx,rax
3456 mov QWORD[((64+48+8))+rcx*8+rsp],rax
3457
3458 mulx r13,rax,QWORD[40+rbp]
3459 adcx r12,rax
3460 adox r13,r14
3461
3462 mulx r14,rax,QWORD[48+rbp]
3463 adcx r13,rax
3464 adox r14,r15
3465
3466 mulx r15,rax,QWORD[56+rbp]
3467 mov rdx,rbx
3468 adcx r14,rax
3469 adox r15,rsi
3470 adcx r15,rsi
3471
3472DB 0x67,0x67,0x67
3473 inc rcx
3474 jnz NEAR $L$sqrx8x_reduce
3475
3476 mov rax,rsi
3477 cmp rbp,QWORD[((0+8))+rsp]
3478 jae NEAR $L$sqrx8x_no_tail
3479
3480 mov rdx,QWORD[((48+8))+rsp]
3481 add r8,QWORD[rdi]
3482 lea rbp,[64+rbp]
3483 mov rcx,-8
3484 adcx r9,QWORD[8+rdi]
3485 adcx r10,QWORD[16+rdi]
3486 adc r11,QWORD[24+rdi]
3487 adc r12,QWORD[32+rdi]
3488 adc r13,QWORD[40+rdi]
3489 adc r14,QWORD[48+rdi]
3490 adc r15,QWORD[56+rdi]
3491 lea rdi,[64+rdi]
3492 sbb rax,rax
3493
3494 xor rsi,rsi
3495 mov QWORD[((16+8))+rsp],rax
3496 jmp NEAR $L$sqrx8x_tail
3497
3498ALIGN 32
3499$L$sqrx8x_tail:
3500 mov rbx,r8
3501 mulx r8,rax,QWORD[rbp]
3502 adcx rbx,rax
3503 adox r8,r9
3504
3505 mulx r9,rax,QWORD[8+rbp]
3506 adcx r8,rax
3507 adox r9,r10
3508
3509 mulx r10,rax,QWORD[16+rbp]
3510 adcx r9,rax
3511 adox r10,r11
3512
3513 mulx r11,rax,QWORD[24+rbp]
3514 adcx r10,rax
3515 adox r11,r12
3516
3517DB 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3518 adcx r11,rax
3519 adox r12,r13
3520
3521 mulx r13,rax,QWORD[40+rbp]
3522 adcx r12,rax
3523 adox r13,r14
3524
3525 mulx r14,rax,QWORD[48+rbp]
3526 adcx r13,rax
3527 adox r14,r15
3528
3529 mulx r15,rax,QWORD[56+rbp]
3530 mov rdx,QWORD[((72+48+8))+rcx*8+rsp]
3531 adcx r14,rax
3532 adox r15,rsi
3533 mov QWORD[rcx*8+rdi],rbx
3534 mov rbx,r8
3535 adcx r15,rsi
3536
3537 inc rcx
3538 jnz NEAR $L$sqrx8x_tail
3539
3540 cmp rbp,QWORD[((0+8))+rsp]
3541 jae NEAR $L$sqrx8x_tail_done
3542
3543 sub rsi,QWORD[((16+8))+rsp]
3544 mov rdx,QWORD[((48+8))+rsp]
3545 lea rbp,[64+rbp]
3546 adc r8,QWORD[rdi]
3547 adc r9,QWORD[8+rdi]
3548 adc r10,QWORD[16+rdi]
3549 adc r11,QWORD[24+rdi]
3550 adc r12,QWORD[32+rdi]
3551 adc r13,QWORD[40+rdi]
3552 adc r14,QWORD[48+rdi]
3553 adc r15,QWORD[56+rdi]
3554 lea rdi,[64+rdi]
3555 sbb rax,rax
3556 sub rcx,8
3557
3558 xor rsi,rsi
3559 mov QWORD[((16+8))+rsp],rax
3560 jmp NEAR $L$sqrx8x_tail
3561
3562ALIGN 32
3563$L$sqrx8x_tail_done:
3564 xor rax,rax
3565 add r8,QWORD[((24+8))+rsp]
3566 adc r9,0
3567 adc r10,0
3568 adc r11,0
3569 adc r12,0
3570 adc r13,0
3571 adc r14,0
3572 adc r15,0
3573 adc rax,0
3574
3575 sub rsi,QWORD[((16+8))+rsp]
3576$L$sqrx8x_no_tail:
3577 adc r8,QWORD[rdi]
3578DB 102,72,15,126,217
3579 adc r9,QWORD[8+rdi]
3580 mov rsi,QWORD[56+rbp]
3581DB 102,72,15,126,213
3582 adc r10,QWORD[16+rdi]
3583 adc r11,QWORD[24+rdi]
3584 adc r12,QWORD[32+rdi]
3585 adc r13,QWORD[40+rdi]
3586 adc r14,QWORD[48+rdi]
3587 adc r15,QWORD[56+rdi]
3588 adc rax,0
3589
3590 mov rbx,QWORD[((32+8))+rsp]
3591 mov rdx,QWORD[64+rcx*1+rdi]
3592
3593 mov QWORD[rdi],r8
3594 lea r8,[64+rdi]
3595 mov QWORD[8+rdi],r9
3596 mov QWORD[16+rdi],r10
3597 mov QWORD[24+rdi],r11
3598 mov QWORD[32+rdi],r12
3599 mov QWORD[40+rdi],r13
3600 mov QWORD[48+rdi],r14
3601 mov QWORD[56+rdi],r15
3602
3603 lea rdi,[64+rcx*1+rdi]
3604 cmp r8,QWORD[((8+8))+rsp]
3605 jb NEAR $L$sqrx8x_reduction_loop
3606 DB 0F3h,0C3h ;repret
3607
3608
3609ALIGN 32
3610__bn_postx4x_internal:
3611
3612 mov r12,QWORD[rbp]
3613 mov r10,rcx
3614 mov r9,rcx
3615 neg rax
3616 sar rcx,3+2
3617
3618DB 102,72,15,126,202
3619DB 102,72,15,126,206
3620 dec r12
3621 mov r13,QWORD[8+rbp]
3622 xor r8,r8
3623 mov r14,QWORD[16+rbp]
3624 mov r15,QWORD[24+rbp]
3625 jmp NEAR $L$sqrx4x_sub_entry
3626
3627ALIGN 16
3628$L$sqrx4x_sub:
3629 mov r12,QWORD[rbp]
3630 mov r13,QWORD[8+rbp]
3631 mov r14,QWORD[16+rbp]
3632 mov r15,QWORD[24+rbp]
3633$L$sqrx4x_sub_entry:
3634 andn r12,r12,rax
3635 lea rbp,[32+rbp]
3636 andn r13,r13,rax
3637 andn r14,r14,rax
3638 andn r15,r15,rax
3639
3640 neg r8
3641 adc r12,QWORD[rdi]
3642 adc r13,QWORD[8+rdi]
3643 adc r14,QWORD[16+rdi]
3644 adc r15,QWORD[24+rdi]
3645 mov QWORD[rdx],r12
3646 lea rdi,[32+rdi]
3647 mov QWORD[8+rdx],r13
3648 sbb r8,r8
3649 mov QWORD[16+rdx],r14
3650 mov QWORD[24+rdx],r15
3651 lea rdx,[32+rdx]
3652
3653 inc rcx
3654 jnz NEAR $L$sqrx4x_sub
3655
3656 neg r9
3657
3658 DB 0F3h,0C3h ;repret
3659
3660
3661global bn_get_bits5
3662
3663ALIGN 16
3664bn_get_bits5:
3665
3666 lea r10,[rcx]
3667 lea r11,[1+rcx]
3668 mov ecx,edx
3669 shr edx,4
3670 and ecx,15
3671 lea eax,[((-8))+rcx]
3672 cmp ecx,11
3673 cmova r10,r11
3674 cmova ecx,eax
3675 movzx eax,WORD[rdx*2+r10]
3676 shr eax,cl
3677 and eax,31
3678 DB 0F3h,0C3h ;repret
3679
3680
3681
3682global bn_scatter5
3683
3684ALIGN 16
3685bn_scatter5:
3686
3687 cmp edx,0
3688 jz NEAR $L$scatter_epilogue
3689 lea r8,[r9*8+r8]
3690$L$scatter:
3691 mov rax,QWORD[rcx]
3692 lea rcx,[8+rcx]
3693 mov QWORD[r8],rax
3694 lea r8,[256+r8]
3695 sub edx,1
3696 jnz NEAR $L$scatter
3697$L$scatter_epilogue:
3698 DB 0F3h,0C3h ;repret
3699
3700
3701
3702global bn_gather5
3703
3704ALIGN 32
3705bn_gather5:
3706$L$SEH_begin_bn_gather5:
3707
3708
3709DB 0x4c,0x8d,0x14,0x24
3710DB 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3711 lea rax,[$L$inc]
3712 and rsp,-16
3713
3714 movd xmm5,r9d
3715 movdqa xmm0,XMMWORD[rax]
3716 movdqa xmm1,XMMWORD[16+rax]
3717 lea r11,[128+r8]
3718 lea rax,[128+rsp]
3719
3720 pshufd xmm5,xmm5,0
3721 movdqa xmm4,xmm1
3722 movdqa xmm2,xmm1
3723 paddd xmm1,xmm0
3724 pcmpeqd xmm0,xmm5
3725 movdqa xmm3,xmm4
3726
3727 paddd xmm2,xmm1
3728 pcmpeqd xmm1,xmm5
3729 movdqa XMMWORD[(-128)+rax],xmm0
3730 movdqa xmm0,xmm4
3731
3732 paddd xmm3,xmm2
3733 pcmpeqd xmm2,xmm5
3734 movdqa XMMWORD[(-112)+rax],xmm1
3735 movdqa xmm1,xmm4
3736
3737 paddd xmm0,xmm3
3738 pcmpeqd xmm3,xmm5
3739 movdqa XMMWORD[(-96)+rax],xmm2
3740 movdqa xmm2,xmm4
3741 paddd xmm1,xmm0
3742 pcmpeqd xmm0,xmm5
3743 movdqa XMMWORD[(-80)+rax],xmm3
3744 movdqa xmm3,xmm4
3745
3746 paddd xmm2,xmm1
3747 pcmpeqd xmm1,xmm5
3748 movdqa XMMWORD[(-64)+rax],xmm0
3749 movdqa xmm0,xmm4
3750
3751 paddd xmm3,xmm2
3752 pcmpeqd xmm2,xmm5
3753 movdqa XMMWORD[(-48)+rax],xmm1
3754 movdqa xmm1,xmm4
3755
3756 paddd xmm0,xmm3
3757 pcmpeqd xmm3,xmm5
3758 movdqa XMMWORD[(-32)+rax],xmm2
3759 movdqa xmm2,xmm4
3760 paddd xmm1,xmm0
3761 pcmpeqd xmm0,xmm5
3762 movdqa XMMWORD[(-16)+rax],xmm3
3763 movdqa xmm3,xmm4
3764
3765 paddd xmm2,xmm1
3766 pcmpeqd xmm1,xmm5
3767 movdqa XMMWORD[rax],xmm0
3768 movdqa xmm0,xmm4
3769
3770 paddd xmm3,xmm2
3771 pcmpeqd xmm2,xmm5
3772 movdqa XMMWORD[16+rax],xmm1
3773 movdqa xmm1,xmm4
3774
3775 paddd xmm0,xmm3
3776 pcmpeqd xmm3,xmm5
3777 movdqa XMMWORD[32+rax],xmm2
3778 movdqa xmm2,xmm4
3779 paddd xmm1,xmm0
3780 pcmpeqd xmm0,xmm5
3781 movdqa XMMWORD[48+rax],xmm3
3782 movdqa xmm3,xmm4
3783
3784 paddd xmm2,xmm1
3785 pcmpeqd xmm1,xmm5
3786 movdqa XMMWORD[64+rax],xmm0
3787 movdqa xmm0,xmm4
3788
3789 paddd xmm3,xmm2
3790 pcmpeqd xmm2,xmm5
3791 movdqa XMMWORD[80+rax],xmm1
3792 movdqa xmm1,xmm4
3793
3794 paddd xmm0,xmm3
3795 pcmpeqd xmm3,xmm5
3796 movdqa XMMWORD[96+rax],xmm2
3797 movdqa xmm2,xmm4
3798 movdqa XMMWORD[112+rax],xmm3
3799 jmp NEAR $L$gather
3800
3801ALIGN 32
3802$L$gather:
3803 pxor xmm4,xmm4
3804 pxor xmm5,xmm5
3805 movdqa xmm0,XMMWORD[((-128))+r11]
3806 movdqa xmm1,XMMWORD[((-112))+r11]
3807 movdqa xmm2,XMMWORD[((-96))+r11]
3808 pand xmm0,XMMWORD[((-128))+rax]
3809 movdqa xmm3,XMMWORD[((-80))+r11]
3810 pand xmm1,XMMWORD[((-112))+rax]
3811 por xmm4,xmm0
3812 pand xmm2,XMMWORD[((-96))+rax]
3813 por xmm5,xmm1
3814 pand xmm3,XMMWORD[((-80))+rax]
3815 por xmm4,xmm2
3816 por xmm5,xmm3
3817 movdqa xmm0,XMMWORD[((-64))+r11]
3818 movdqa xmm1,XMMWORD[((-48))+r11]
3819 movdqa xmm2,XMMWORD[((-32))+r11]
3820 pand xmm0,XMMWORD[((-64))+rax]
3821 movdqa xmm3,XMMWORD[((-16))+r11]
3822 pand xmm1,XMMWORD[((-48))+rax]
3823 por xmm4,xmm0
3824 pand xmm2,XMMWORD[((-32))+rax]
3825 por xmm5,xmm1
3826 pand xmm3,XMMWORD[((-16))+rax]
3827 por xmm4,xmm2
3828 por xmm5,xmm3
3829 movdqa xmm0,XMMWORD[r11]
3830 movdqa xmm1,XMMWORD[16+r11]
3831 movdqa xmm2,XMMWORD[32+r11]
3832 pand xmm0,XMMWORD[rax]
3833 movdqa xmm3,XMMWORD[48+r11]
3834 pand xmm1,XMMWORD[16+rax]
3835 por xmm4,xmm0
3836 pand xmm2,XMMWORD[32+rax]
3837 por xmm5,xmm1
3838 pand xmm3,XMMWORD[48+rax]
3839 por xmm4,xmm2
3840 por xmm5,xmm3
3841 movdqa xmm0,XMMWORD[64+r11]
3842 movdqa xmm1,XMMWORD[80+r11]
3843 movdqa xmm2,XMMWORD[96+r11]
3844 pand xmm0,XMMWORD[64+rax]
3845 movdqa xmm3,XMMWORD[112+r11]
3846 pand xmm1,XMMWORD[80+rax]
3847 por xmm4,xmm0
3848 pand xmm2,XMMWORD[96+rax]
3849 por xmm5,xmm1
3850 pand xmm3,XMMWORD[112+rax]
3851 por xmm4,xmm2
3852 por xmm5,xmm3
3853 por xmm4,xmm5
3854 lea r11,[256+r11]
3855 pshufd xmm0,xmm4,0x4e
3856 por xmm0,xmm4
3857 movq QWORD[rcx],xmm0
3858 lea rcx,[8+rcx]
3859 sub edx,1
3860 jnz NEAR $L$gather
3861
3862 lea rsp,[r10]
3863 DB 0F3h,0C3h ;repret
3864$L$SEH_end_bn_gather5:
3865
3866
3867ALIGN 64
3868$L$inc:
3869 DD 0,0,1,1
3870 DD 2,2,2,2
3871DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
3872DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
3873DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
3874DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
3875DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
3876DB 112,101,110,115,115,108,46,111,114,103,62,0
3877EXTERN __imp_RtlVirtualUnwind
3878
3879ALIGN 16
3880mul_handler:
3881 push rsi
3882 push rdi
3883 push rbx
3884 push rbp
3885 push r12
3886 push r13
3887 push r14
3888 push r15
3889 pushfq
3890 sub rsp,64
3891
3892 mov rax,QWORD[120+r8]
3893 mov rbx,QWORD[248+r8]
3894
3895 mov rsi,QWORD[8+r9]
3896 mov r11,QWORD[56+r9]
3897
3898 mov r10d,DWORD[r11]
3899 lea r10,[r10*1+rsi]
3900 cmp rbx,r10
3901 jb NEAR $L$common_seh_tail
3902
3903 mov r10d,DWORD[4+r11]
3904 lea r10,[r10*1+rsi]
3905 cmp rbx,r10
3906 jb NEAR $L$common_pop_regs
3907
3908 mov rax,QWORD[152+r8]
3909
3910 mov r10d,DWORD[8+r11]
3911 lea r10,[r10*1+rsi]
3912 cmp rbx,r10
3913 jae NEAR $L$common_seh_tail
3914
3915 lea r10,[$L$mul_epilogue]
3916 cmp rbx,r10
3917 ja NEAR $L$body_40
3918
3919 mov r10,QWORD[192+r8]
3920 mov rax,QWORD[8+r10*8+rax]
3921
3922 jmp NEAR $L$common_pop_regs
3923
3924$L$body_40:
3925 mov rax,QWORD[40+rax]
3926$L$common_pop_regs:
3927 mov rbx,QWORD[((-8))+rax]
3928 mov rbp,QWORD[((-16))+rax]
3929 mov r12,QWORD[((-24))+rax]
3930 mov r13,QWORD[((-32))+rax]
3931 mov r14,QWORD[((-40))+rax]
3932 mov r15,QWORD[((-48))+rax]
3933 mov QWORD[144+r8],rbx
3934 mov QWORD[160+r8],rbp
3935 mov QWORD[216+r8],r12
3936 mov QWORD[224+r8],r13
3937 mov QWORD[232+r8],r14
3938 mov QWORD[240+r8],r15
3939
3940$L$common_seh_tail:
3941 mov rdi,QWORD[8+rax]
3942 mov rsi,QWORD[16+rax]
3943 mov QWORD[152+r8],rax
3944 mov QWORD[168+r8],rsi
3945 mov QWORD[176+r8],rdi
3946
3947 mov rdi,QWORD[40+r9]
3948 mov rsi,r8
3949 mov ecx,154
3950 DD 0xa548f3fc
3951
3952 mov rsi,r9
3953 xor rcx,rcx
3954 mov rdx,QWORD[8+rsi]
3955 mov r8,QWORD[rsi]
3956 mov r9,QWORD[16+rsi]
3957 mov r10,QWORD[40+rsi]
3958 lea r11,[56+rsi]
3959 lea r12,[24+rsi]
3960 mov QWORD[32+rsp],r10
3961 mov QWORD[40+rsp],r11
3962 mov QWORD[48+rsp],r12
3963 mov QWORD[56+rsp],rcx
3964 call QWORD[__imp_RtlVirtualUnwind]
3965
3966 mov eax,1
3967 add rsp,64
3968 popfq
3969 pop r15
3970 pop r14
3971 pop r13
3972 pop r12
3973 pop rbp
3974 pop rbx
3975 pop rdi
3976 pop rsi
3977 DB 0F3h,0C3h ;repret
3978
3979
3980section .pdata rdata align=4
3981ALIGN 4
3982 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
3983 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
3984 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
3985
3986 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
3987 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
3988 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
3989
3990 DD $L$SEH_begin_bn_power5 wrt ..imagebase
3991 DD $L$SEH_end_bn_power5 wrt ..imagebase
3992 DD $L$SEH_info_bn_power5 wrt ..imagebase
3993
3994 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase
3995 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase
3996 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase
3997 DD $L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase
3998 DD $L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase
3999 DD $L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase
4000
4001 DD $L$SEH_begin_bn_powerx5 wrt ..imagebase
4002 DD $L$SEH_end_bn_powerx5 wrt ..imagebase
4003 DD $L$SEH_info_bn_powerx5 wrt ..imagebase
4004 DD $L$SEH_begin_bn_gather5 wrt ..imagebase
4005 DD $L$SEH_end_bn_gather5 wrt ..imagebase
4006 DD $L$SEH_info_bn_gather5 wrt ..imagebase
4007
4008section .xdata rdata align=8
4009ALIGN 8
4010$L$SEH_info_bn_mul_mont_gather5:
4011DB 9,0,0,0
4012 DD mul_handler wrt ..imagebase
4013 DD $L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
4014ALIGN 8
4015$L$SEH_info_bn_mul4x_mont_gather5:
4016DB 9,0,0,0
4017 DD mul_handler wrt ..imagebase
4018 DD $L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
4019ALIGN 8
4020$L$SEH_info_bn_power5:
4021DB 9,0,0,0
4022 DD mul_handler wrt ..imagebase
4023 DD $L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase
4024ALIGN 8
4025$L$SEH_info_bn_from_mont8x:
4026DB 9,0,0,0
4027 DD mul_handler wrt ..imagebase
4028 DD $L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
4029ALIGN 8
4030$L$SEH_info_bn_mulx4x_mont_gather5:
4031DB 9,0,0,0
4032 DD mul_handler wrt ..imagebase
4033 DD $L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase
4034ALIGN 8
4035$L$SEH_info_bn_powerx5:
4036DB 9,0,0,0
4037 DD mul_handler wrt ..imagebase
4038 DD $L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase
4039ALIGN 8
4040$L$SEH_info_bn_gather5:
4041DB 0x01,0x0b,0x03,0x0a
4042DB 0x0b,0x01,0x21,0x00
4043DB 0x04,0xa3,0x00,0x00
4044ALIGN 8
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette