VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1j/crypto/genasm-nasm/x86_64-mont.S@ 88461

Last change on this file since 88461 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 15.9 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10global bn_mul_mont
11
12ALIGN 16
13bn_mul_mont:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17$L$SEH_begin_bn_mul_mont:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23 mov r9,QWORD[48+rsp]
24
25
26
27 mov r9d,r9d
28 mov rax,rsp
29
30 test r9d,3
31 jnz NEAR $L$mul_enter
32 cmp r9d,8
33 jb NEAR $L$mul_enter
34 cmp rdx,rsi
35 jne NEAR $L$mul4x_enter
36 test r9d,7
37 jz NEAR $L$sqr8x_enter
38 jmp NEAR $L$mul4x_enter
39
40ALIGN 16
41$L$mul_enter:
42 push rbx
43
44 push rbp
45
46 push r12
47
48 push r13
49
50 push r14
51
52 push r15
53
54
55 neg r9
56 mov r11,rsp
57 lea r10,[((-16))+r9*8+rsp]
58 neg r9
59 and r10,-1024
60
61
62
63
64
65
66
67
68
69 sub r11,r10
70 and r11,-4096
71 lea rsp,[r11*1+r10]
72 mov r11,QWORD[rsp]
73 cmp rsp,r10
74 ja NEAR $L$mul_page_walk
75 jmp NEAR $L$mul_page_walk_done
76
77ALIGN 16
78$L$mul_page_walk:
79 lea rsp,[((-4096))+rsp]
80 mov r11,QWORD[rsp]
81 cmp rsp,r10
82 ja NEAR $L$mul_page_walk
83$L$mul_page_walk_done:
84
85 mov QWORD[8+r9*8+rsp],rax
86
87$L$mul_body:
88 mov r12,rdx
89 mov r8,QWORD[r8]
90 mov rbx,QWORD[r12]
91 mov rax,QWORD[rsi]
92
93 xor r14,r14
94 xor r15,r15
95
96 mov rbp,r8
97 mul rbx
98 mov r10,rax
99 mov rax,QWORD[rcx]
100
101 imul rbp,r10
102 mov r11,rdx
103
104 mul rbp
105 add r10,rax
106 mov rax,QWORD[8+rsi]
107 adc rdx,0
108 mov r13,rdx
109
110 lea r15,[1+r15]
111 jmp NEAR $L$1st_enter
112
113ALIGN 16
114$L$1st:
115 add r13,rax
116 mov rax,QWORD[r15*8+rsi]
117 adc rdx,0
118 add r13,r11
119 mov r11,r10
120 adc rdx,0
121 mov QWORD[((-16))+r15*8+rsp],r13
122 mov r13,rdx
123
124$L$1st_enter:
125 mul rbx
126 add r11,rax
127 mov rax,QWORD[r15*8+rcx]
128 adc rdx,0
129 lea r15,[1+r15]
130 mov r10,rdx
131
132 mul rbp
133 cmp r15,r9
134 jne NEAR $L$1st
135
136 add r13,rax
137 mov rax,QWORD[rsi]
138 adc rdx,0
139 add r13,r11
140 adc rdx,0
141 mov QWORD[((-16))+r15*8+rsp],r13
142 mov r13,rdx
143 mov r11,r10
144
145 xor rdx,rdx
146 add r13,r11
147 adc rdx,0
148 mov QWORD[((-8))+r9*8+rsp],r13
149 mov QWORD[r9*8+rsp],rdx
150
151 lea r14,[1+r14]
152 jmp NEAR $L$outer
153ALIGN 16
154$L$outer:
155 mov rbx,QWORD[r14*8+r12]
156 xor r15,r15
157 mov rbp,r8
158 mov r10,QWORD[rsp]
159 mul rbx
160 add r10,rax
161 mov rax,QWORD[rcx]
162 adc rdx,0
163
164 imul rbp,r10
165 mov r11,rdx
166
167 mul rbp
168 add r10,rax
169 mov rax,QWORD[8+rsi]
170 adc rdx,0
171 mov r10,QWORD[8+rsp]
172 mov r13,rdx
173
174 lea r15,[1+r15]
175 jmp NEAR $L$inner_enter
176
177ALIGN 16
178$L$inner:
179 add r13,rax
180 mov rax,QWORD[r15*8+rsi]
181 adc rdx,0
182 add r13,r10
183 mov r10,QWORD[r15*8+rsp]
184 adc rdx,0
185 mov QWORD[((-16))+r15*8+rsp],r13
186 mov r13,rdx
187
188$L$inner_enter:
189 mul rbx
190 add r11,rax
191 mov rax,QWORD[r15*8+rcx]
192 adc rdx,0
193 add r10,r11
194 mov r11,rdx
195 adc r11,0
196 lea r15,[1+r15]
197
198 mul rbp
199 cmp r15,r9
200 jne NEAR $L$inner
201
202 add r13,rax
203 mov rax,QWORD[rsi]
204 adc rdx,0
205 add r13,r10
206 mov r10,QWORD[r15*8+rsp]
207 adc rdx,0
208 mov QWORD[((-16))+r15*8+rsp],r13
209 mov r13,rdx
210
211 xor rdx,rdx
212 add r13,r11
213 adc rdx,0
214 add r13,r10
215 adc rdx,0
216 mov QWORD[((-8))+r9*8+rsp],r13
217 mov QWORD[r9*8+rsp],rdx
218
219 lea r14,[1+r14]
220 cmp r14,r9
221 jb NEAR $L$outer
222
223 xor r14,r14
224 mov rax,QWORD[rsp]
225 mov r15,r9
226
227ALIGN 16
228$L$sub: sbb rax,QWORD[r14*8+rcx]
229 mov QWORD[r14*8+rdi],rax
230 mov rax,QWORD[8+r14*8+rsp]
231 lea r14,[1+r14]
232 dec r15
233 jnz NEAR $L$sub
234
235 sbb rax,0
236 mov rbx,-1
237 xor rbx,rax
238 xor r14,r14
239 mov r15,r9
240
241$L$copy:
242 mov rcx,QWORD[r14*8+rdi]
243 mov rdx,QWORD[r14*8+rsp]
244 and rcx,rbx
245 and rdx,rax
246 mov QWORD[r14*8+rsp],r9
247 or rdx,rcx
248 mov QWORD[r14*8+rdi],rdx
249 lea r14,[1+r14]
250 sub r15,1
251 jnz NEAR $L$copy
252
253 mov rsi,QWORD[8+r9*8+rsp]
254
255 mov rax,1
256 mov r15,QWORD[((-48))+rsi]
257
258 mov r14,QWORD[((-40))+rsi]
259
260 mov r13,QWORD[((-32))+rsi]
261
262 mov r12,QWORD[((-24))+rsi]
263
264 mov rbp,QWORD[((-16))+rsi]
265
266 mov rbx,QWORD[((-8))+rsi]
267
268 lea rsp,[rsi]
269
270$L$mul_epilogue:
271 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
272 mov rsi,QWORD[16+rsp]
273 DB 0F3h,0C3h ;repret
274
275$L$SEH_end_bn_mul_mont:
276
277ALIGN 16
278bn_mul4x_mont:
279 mov QWORD[8+rsp],rdi ;WIN64 prologue
280 mov QWORD[16+rsp],rsi
281 mov rax,rsp
282$L$SEH_begin_bn_mul4x_mont:
283 mov rdi,rcx
284 mov rsi,rdx
285 mov rdx,r8
286 mov rcx,r9
287 mov r8,QWORD[40+rsp]
288 mov r9,QWORD[48+rsp]
289
290
291
292 mov r9d,r9d
293 mov rax,rsp
294
295$L$mul4x_enter:
296 push rbx
297
298 push rbp
299
300 push r12
301
302 push r13
303
304 push r14
305
306 push r15
307
308
309 neg r9
310 mov r11,rsp
311 lea r10,[((-32))+r9*8+rsp]
312 neg r9
313 and r10,-1024
314
315 sub r11,r10
316 and r11,-4096
317 lea rsp,[r11*1+r10]
318 mov r11,QWORD[rsp]
319 cmp rsp,r10
320 ja NEAR $L$mul4x_page_walk
321 jmp NEAR $L$mul4x_page_walk_done
322
323$L$mul4x_page_walk:
324 lea rsp,[((-4096))+rsp]
325 mov r11,QWORD[rsp]
326 cmp rsp,r10
327 ja NEAR $L$mul4x_page_walk
328$L$mul4x_page_walk_done:
329
330 mov QWORD[8+r9*8+rsp],rax
331
332$L$mul4x_body:
333 mov QWORD[16+r9*8+rsp],rdi
334 mov r12,rdx
335 mov r8,QWORD[r8]
336 mov rbx,QWORD[r12]
337 mov rax,QWORD[rsi]
338
339 xor r14,r14
340 xor r15,r15
341
342 mov rbp,r8
343 mul rbx
344 mov r10,rax
345 mov rax,QWORD[rcx]
346
347 imul rbp,r10
348 mov r11,rdx
349
350 mul rbp
351 add r10,rax
352 mov rax,QWORD[8+rsi]
353 adc rdx,0
354 mov rdi,rdx
355
356 mul rbx
357 add r11,rax
358 mov rax,QWORD[8+rcx]
359 adc rdx,0
360 mov r10,rdx
361
362 mul rbp
363 add rdi,rax
364 mov rax,QWORD[16+rsi]
365 adc rdx,0
366 add rdi,r11
367 lea r15,[4+r15]
368 adc rdx,0
369 mov QWORD[rsp],rdi
370 mov r13,rdx
371 jmp NEAR $L$1st4x
372ALIGN 16
373$L$1st4x:
374 mul rbx
375 add r10,rax
376 mov rax,QWORD[((-16))+r15*8+rcx]
377 adc rdx,0
378 mov r11,rdx
379
380 mul rbp
381 add r13,rax
382 mov rax,QWORD[((-8))+r15*8+rsi]
383 adc rdx,0
384 add r13,r10
385 adc rdx,0
386 mov QWORD[((-24))+r15*8+rsp],r13
387 mov rdi,rdx
388
389 mul rbx
390 add r11,rax
391 mov rax,QWORD[((-8))+r15*8+rcx]
392 adc rdx,0
393 mov r10,rdx
394
395 mul rbp
396 add rdi,rax
397 mov rax,QWORD[r15*8+rsi]
398 adc rdx,0
399 add rdi,r11
400 adc rdx,0
401 mov QWORD[((-16))+r15*8+rsp],rdi
402 mov r13,rdx
403
404 mul rbx
405 add r10,rax
406 mov rax,QWORD[r15*8+rcx]
407 adc rdx,0
408 mov r11,rdx
409
410 mul rbp
411 add r13,rax
412 mov rax,QWORD[8+r15*8+rsi]
413 adc rdx,0
414 add r13,r10
415 adc rdx,0
416 mov QWORD[((-8))+r15*8+rsp],r13
417 mov rdi,rdx
418
419 mul rbx
420 add r11,rax
421 mov rax,QWORD[8+r15*8+rcx]
422 adc rdx,0
423 lea r15,[4+r15]
424 mov r10,rdx
425
426 mul rbp
427 add rdi,rax
428 mov rax,QWORD[((-16))+r15*8+rsi]
429 adc rdx,0
430 add rdi,r11
431 adc rdx,0
432 mov QWORD[((-32))+r15*8+rsp],rdi
433 mov r13,rdx
434 cmp r15,r9
435 jb NEAR $L$1st4x
436
437 mul rbx
438 add r10,rax
439 mov rax,QWORD[((-16))+r15*8+rcx]
440 adc rdx,0
441 mov r11,rdx
442
443 mul rbp
444 add r13,rax
445 mov rax,QWORD[((-8))+r15*8+rsi]
446 adc rdx,0
447 add r13,r10
448 adc rdx,0
449 mov QWORD[((-24))+r15*8+rsp],r13
450 mov rdi,rdx
451
452 mul rbx
453 add r11,rax
454 mov rax,QWORD[((-8))+r15*8+rcx]
455 adc rdx,0
456 mov r10,rdx
457
458 mul rbp
459 add rdi,rax
460 mov rax,QWORD[rsi]
461 adc rdx,0
462 add rdi,r11
463 adc rdx,0
464 mov QWORD[((-16))+r15*8+rsp],rdi
465 mov r13,rdx
466
467 xor rdi,rdi
468 add r13,r10
469 adc rdi,0
470 mov QWORD[((-8))+r15*8+rsp],r13
471 mov QWORD[r15*8+rsp],rdi
472
473 lea r14,[1+r14]
474ALIGN 4
475$L$outer4x:
476 mov rbx,QWORD[r14*8+r12]
477 xor r15,r15
478 mov r10,QWORD[rsp]
479 mov rbp,r8
480 mul rbx
481 add r10,rax
482 mov rax,QWORD[rcx]
483 adc rdx,0
484
485 imul rbp,r10
486 mov r11,rdx
487
488 mul rbp
489 add r10,rax
490 mov rax,QWORD[8+rsi]
491 adc rdx,0
492 mov rdi,rdx
493
494 mul rbx
495 add r11,rax
496 mov rax,QWORD[8+rcx]
497 adc rdx,0
498 add r11,QWORD[8+rsp]
499 adc rdx,0
500 mov r10,rdx
501
502 mul rbp
503 add rdi,rax
504 mov rax,QWORD[16+rsi]
505 adc rdx,0
506 add rdi,r11
507 lea r15,[4+r15]
508 adc rdx,0
509 mov QWORD[rsp],rdi
510 mov r13,rdx
511 jmp NEAR $L$inner4x
512ALIGN 16
513$L$inner4x:
514 mul rbx
515 add r10,rax
516 mov rax,QWORD[((-16))+r15*8+rcx]
517 adc rdx,0
518 add r10,QWORD[((-16))+r15*8+rsp]
519 adc rdx,0
520 mov r11,rdx
521
522 mul rbp
523 add r13,rax
524 mov rax,QWORD[((-8))+r15*8+rsi]
525 adc rdx,0
526 add r13,r10
527 adc rdx,0
528 mov QWORD[((-24))+r15*8+rsp],r13
529 mov rdi,rdx
530
531 mul rbx
532 add r11,rax
533 mov rax,QWORD[((-8))+r15*8+rcx]
534 adc rdx,0
535 add r11,QWORD[((-8))+r15*8+rsp]
536 adc rdx,0
537 mov r10,rdx
538
539 mul rbp
540 add rdi,rax
541 mov rax,QWORD[r15*8+rsi]
542 adc rdx,0
543 add rdi,r11
544 adc rdx,0
545 mov QWORD[((-16))+r15*8+rsp],rdi
546 mov r13,rdx
547
548 mul rbx
549 add r10,rax
550 mov rax,QWORD[r15*8+rcx]
551 adc rdx,0
552 add r10,QWORD[r15*8+rsp]
553 adc rdx,0
554 mov r11,rdx
555
556 mul rbp
557 add r13,rax
558 mov rax,QWORD[8+r15*8+rsi]
559 adc rdx,0
560 add r13,r10
561 adc rdx,0
562 mov QWORD[((-8))+r15*8+rsp],r13
563 mov rdi,rdx
564
565 mul rbx
566 add r11,rax
567 mov rax,QWORD[8+r15*8+rcx]
568 adc rdx,0
569 add r11,QWORD[8+r15*8+rsp]
570 adc rdx,0
571 lea r15,[4+r15]
572 mov r10,rdx
573
574 mul rbp
575 add rdi,rax
576 mov rax,QWORD[((-16))+r15*8+rsi]
577 adc rdx,0
578 add rdi,r11
579 adc rdx,0
580 mov QWORD[((-32))+r15*8+rsp],rdi
581 mov r13,rdx
582 cmp r15,r9
583 jb NEAR $L$inner4x
584
585 mul rbx
586 add r10,rax
587 mov rax,QWORD[((-16))+r15*8+rcx]
588 adc rdx,0
589 add r10,QWORD[((-16))+r15*8+rsp]
590 adc rdx,0
591 mov r11,rdx
592
593 mul rbp
594 add r13,rax
595 mov rax,QWORD[((-8))+r15*8+rsi]
596 adc rdx,0
597 add r13,r10
598 adc rdx,0
599 mov QWORD[((-24))+r15*8+rsp],r13
600 mov rdi,rdx
601
602 mul rbx
603 add r11,rax
604 mov rax,QWORD[((-8))+r15*8+rcx]
605 adc rdx,0
606 add r11,QWORD[((-8))+r15*8+rsp]
607 adc rdx,0
608 lea r14,[1+r14]
609 mov r10,rdx
610
611 mul rbp
612 add rdi,rax
613 mov rax,QWORD[rsi]
614 adc rdx,0
615 add rdi,r11
616 adc rdx,0
617 mov QWORD[((-16))+r15*8+rsp],rdi
618 mov r13,rdx
619
620 xor rdi,rdi
621 add r13,r10
622 adc rdi,0
623 add r13,QWORD[r9*8+rsp]
624 adc rdi,0
625 mov QWORD[((-8))+r15*8+rsp],r13
626 mov QWORD[r15*8+rsp],rdi
627
628 cmp r14,r9
629 jb NEAR $L$outer4x
630 mov rdi,QWORD[16+r9*8+rsp]
631 lea r15,[((-4))+r9]
632 mov rax,QWORD[rsp]
633 mov rdx,QWORD[8+rsp]
634 shr r15,2
635 lea rsi,[rsp]
636 xor r14,r14
637
638 sub rax,QWORD[rcx]
639 mov rbx,QWORD[16+rsi]
640 mov rbp,QWORD[24+rsi]
641 sbb rdx,QWORD[8+rcx]
642
643$L$sub4x:
644 mov QWORD[r14*8+rdi],rax
645 mov QWORD[8+r14*8+rdi],rdx
646 sbb rbx,QWORD[16+r14*8+rcx]
647 mov rax,QWORD[32+r14*8+rsi]
648 mov rdx,QWORD[40+r14*8+rsi]
649 sbb rbp,QWORD[24+r14*8+rcx]
650 mov QWORD[16+r14*8+rdi],rbx
651 mov QWORD[24+r14*8+rdi],rbp
652 sbb rax,QWORD[32+r14*8+rcx]
653 mov rbx,QWORD[48+r14*8+rsi]
654 mov rbp,QWORD[56+r14*8+rsi]
655 sbb rdx,QWORD[40+r14*8+rcx]
656 lea r14,[4+r14]
657 dec r15
658 jnz NEAR $L$sub4x
659
660 mov QWORD[r14*8+rdi],rax
661 mov rax,QWORD[32+r14*8+rsi]
662 sbb rbx,QWORD[16+r14*8+rcx]
663 mov QWORD[8+r14*8+rdi],rdx
664 sbb rbp,QWORD[24+r14*8+rcx]
665 mov QWORD[16+r14*8+rdi],rbx
666
667 sbb rax,0
668 mov QWORD[24+r14*8+rdi],rbp
669 pxor xmm0,xmm0
670DB 102,72,15,110,224
671 pcmpeqd xmm5,xmm5
672 pshufd xmm4,xmm4,0
673 mov r15,r9
674 pxor xmm5,xmm4
675 shr r15,2
676 xor eax,eax
677
678 jmp NEAR $L$copy4x
679ALIGN 16
680$L$copy4x:
681 movdqa xmm1,XMMWORD[rax*1+rsp]
682 movdqu xmm2,XMMWORD[rax*1+rdi]
683 pand xmm1,xmm4
684 pand xmm2,xmm5
685 movdqa xmm3,XMMWORD[16+rax*1+rsp]
686 movdqa XMMWORD[rax*1+rsp],xmm0
687 por xmm1,xmm2
688 movdqu xmm2,XMMWORD[16+rax*1+rdi]
689 movdqu XMMWORD[rax*1+rdi],xmm1
690 pand xmm3,xmm4
691 pand xmm2,xmm5
692 movdqa XMMWORD[16+rax*1+rsp],xmm0
693 por xmm3,xmm2
694 movdqu XMMWORD[16+rax*1+rdi],xmm3
695 lea rax,[32+rax]
696 dec r15
697 jnz NEAR $L$copy4x
698 mov rsi,QWORD[8+r9*8+rsp]
699
700 mov rax,1
701 mov r15,QWORD[((-48))+rsi]
702
703 mov r14,QWORD[((-40))+rsi]
704
705 mov r13,QWORD[((-32))+rsi]
706
707 mov r12,QWORD[((-24))+rsi]
708
709 mov rbp,QWORD[((-16))+rsi]
710
711 mov rbx,QWORD[((-8))+rsi]
712
713 lea rsp,[rsi]
714
715$L$mul4x_epilogue:
716 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
717 mov rsi,QWORD[16+rsp]
718 DB 0F3h,0C3h ;repret
719
720$L$SEH_end_bn_mul4x_mont:
721EXTERN bn_sqr8x_internal
722
723
724ALIGN 32
725bn_sqr8x_mont:
726 mov QWORD[8+rsp],rdi ;WIN64 prologue
727 mov QWORD[16+rsp],rsi
728 mov rax,rsp
729$L$SEH_begin_bn_sqr8x_mont:
730 mov rdi,rcx
731 mov rsi,rdx
732 mov rdx,r8
733 mov rcx,r9
734 mov r8,QWORD[40+rsp]
735 mov r9,QWORD[48+rsp]
736
737
738
739 mov rax,rsp
740
741$L$sqr8x_enter:
742 push rbx
743
744 push rbp
745
746 push r12
747
748 push r13
749
750 push r14
751
752 push r15
753
754$L$sqr8x_prologue:
755
756 mov r10d,r9d
757 shl r9d,3
758 shl r10,3+2
759 neg r9
760
761
762
763
764
765
766 lea r11,[((-64))+r9*2+rsp]
767 mov rbp,rsp
768 mov r8,QWORD[r8]
769 sub r11,rsi
770 and r11,4095
771 cmp r10,r11
772 jb NEAR $L$sqr8x_sp_alt
773 sub rbp,r11
774 lea rbp,[((-64))+r9*2+rbp]
775 jmp NEAR $L$sqr8x_sp_done
776
777ALIGN 32
778$L$sqr8x_sp_alt:
779 lea r10,[((4096-64))+r9*2]
780 lea rbp,[((-64))+r9*2+rbp]
781 sub r11,r10
782 mov r10,0
783 cmovc r11,r10
784 sub rbp,r11
785$L$sqr8x_sp_done:
786 and rbp,-64
787 mov r11,rsp
788 sub r11,rbp
789 and r11,-4096
790 lea rsp,[rbp*1+r11]
791 mov r10,QWORD[rsp]
792 cmp rsp,rbp
793 ja NEAR $L$sqr8x_page_walk
794 jmp NEAR $L$sqr8x_page_walk_done
795
796ALIGN 16
797$L$sqr8x_page_walk:
798 lea rsp,[((-4096))+rsp]
799 mov r10,QWORD[rsp]
800 cmp rsp,rbp
801 ja NEAR $L$sqr8x_page_walk
802$L$sqr8x_page_walk_done:
803
804 mov r10,r9
805 neg r9
806
807 mov QWORD[32+rsp],r8
808 mov QWORD[40+rsp],rax
809
810$L$sqr8x_body:
811
812DB 102,72,15,110,209
813 pxor xmm0,xmm0
814DB 102,72,15,110,207
815DB 102,73,15,110,218
816 call bn_sqr8x_internal
817
818
819
820
821 lea rbx,[r9*1+rdi]
822 mov rcx,r9
823 mov rdx,r9
824DB 102,72,15,126,207
825 sar rcx,3+2
826 jmp NEAR $L$sqr8x_sub
827
828ALIGN 32
829$L$sqr8x_sub:
830 mov r12,QWORD[rbx]
831 mov r13,QWORD[8+rbx]
832 mov r14,QWORD[16+rbx]
833 mov r15,QWORD[24+rbx]
834 lea rbx,[32+rbx]
835 sbb r12,QWORD[rbp]
836 sbb r13,QWORD[8+rbp]
837 sbb r14,QWORD[16+rbp]
838 sbb r15,QWORD[24+rbp]
839 lea rbp,[32+rbp]
840 mov QWORD[rdi],r12
841 mov QWORD[8+rdi],r13
842 mov QWORD[16+rdi],r14
843 mov QWORD[24+rdi],r15
844 lea rdi,[32+rdi]
845 inc rcx
846 jnz NEAR $L$sqr8x_sub
847
848 sbb rax,0
849 lea rbx,[r9*1+rbx]
850 lea rdi,[r9*1+rdi]
851
852DB 102,72,15,110,200
853 pxor xmm0,xmm0
854 pshufd xmm1,xmm1,0
855 mov rsi,QWORD[40+rsp]
856
857 jmp NEAR $L$sqr8x_cond_copy
858
859ALIGN 32
860$L$sqr8x_cond_copy:
861 movdqa xmm2,XMMWORD[rbx]
862 movdqa xmm3,XMMWORD[16+rbx]
863 lea rbx,[32+rbx]
864 movdqu xmm4,XMMWORD[rdi]
865 movdqu xmm5,XMMWORD[16+rdi]
866 lea rdi,[32+rdi]
867 movdqa XMMWORD[(-32)+rbx],xmm0
868 movdqa XMMWORD[(-16)+rbx],xmm0
869 movdqa XMMWORD[(-32)+rdx*1+rbx],xmm0
870 movdqa XMMWORD[(-16)+rdx*1+rbx],xmm0
871 pcmpeqd xmm0,xmm1
872 pand xmm2,xmm1
873 pand xmm3,xmm1
874 pand xmm4,xmm0
875 pand xmm5,xmm0
876 pxor xmm0,xmm0
877 por xmm4,xmm2
878 por xmm5,xmm3
879 movdqu XMMWORD[(-32)+rdi],xmm4
880 movdqu XMMWORD[(-16)+rdi],xmm5
881 add r9,32
882 jnz NEAR $L$sqr8x_cond_copy
883
884 mov rax,1
885 mov r15,QWORD[((-48))+rsi]
886
887 mov r14,QWORD[((-40))+rsi]
888
889 mov r13,QWORD[((-32))+rsi]
890
891 mov r12,QWORD[((-24))+rsi]
892
893 mov rbp,QWORD[((-16))+rsi]
894
895 mov rbx,QWORD[((-8))+rsi]
896
897 lea rsp,[rsi]
898
899$L$sqr8x_epilogue:
900 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
901 mov rsi,QWORD[16+rsp]
902 DB 0F3h,0C3h ;repret
903
904$L$SEH_end_bn_sqr8x_mont:
905DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
906DB 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
907DB 54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
908DB 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
909DB 115,108,46,111,114,103,62,0
910ALIGN 16
911EXTERN __imp_RtlVirtualUnwind
912
913ALIGN 16
914mul_handler:
915 push rsi
916 push rdi
917 push rbx
918 push rbp
919 push r12
920 push r13
921 push r14
922 push r15
923 pushfq
924 sub rsp,64
925
926 mov rax,QWORD[120+r8]
927 mov rbx,QWORD[248+r8]
928
929 mov rsi,QWORD[8+r9]
930 mov r11,QWORD[56+r9]
931
932 mov r10d,DWORD[r11]
933 lea r10,[r10*1+rsi]
934 cmp rbx,r10
935 jb NEAR $L$common_seh_tail
936
937 mov rax,QWORD[152+r8]
938
939 mov r10d,DWORD[4+r11]
940 lea r10,[r10*1+rsi]
941 cmp rbx,r10
942 jae NEAR $L$common_seh_tail
943
944 mov r10,QWORD[192+r8]
945 mov rax,QWORD[8+r10*8+rax]
946
947 jmp NEAR $L$common_pop_regs
948
949
950
951ALIGN 16
952sqr_handler:
953 push rsi
954 push rdi
955 push rbx
956 push rbp
957 push r12
958 push r13
959 push r14
960 push r15
961 pushfq
962 sub rsp,64
963
964 mov rax,QWORD[120+r8]
965 mov rbx,QWORD[248+r8]
966
967 mov rsi,QWORD[8+r9]
968 mov r11,QWORD[56+r9]
969
970 mov r10d,DWORD[r11]
971 lea r10,[r10*1+rsi]
972 cmp rbx,r10
973 jb NEAR $L$common_seh_tail
974
975 mov r10d,DWORD[4+r11]
976 lea r10,[r10*1+rsi]
977 cmp rbx,r10
978 jb NEAR $L$common_pop_regs
979
980 mov rax,QWORD[152+r8]
981
982 mov r10d,DWORD[8+r11]
983 lea r10,[r10*1+rsi]
984 cmp rbx,r10
985 jae NEAR $L$common_seh_tail
986
987 mov rax,QWORD[40+rax]
988
989$L$common_pop_regs:
990 mov rbx,QWORD[((-8))+rax]
991 mov rbp,QWORD[((-16))+rax]
992 mov r12,QWORD[((-24))+rax]
993 mov r13,QWORD[((-32))+rax]
994 mov r14,QWORD[((-40))+rax]
995 mov r15,QWORD[((-48))+rax]
996 mov QWORD[144+r8],rbx
997 mov QWORD[160+r8],rbp
998 mov QWORD[216+r8],r12
999 mov QWORD[224+r8],r13
1000 mov QWORD[232+r8],r14
1001 mov QWORD[240+r8],r15
1002
1003$L$common_seh_tail:
1004 mov rdi,QWORD[8+rax]
1005 mov rsi,QWORD[16+rax]
1006 mov QWORD[152+r8],rax
1007 mov QWORD[168+r8],rsi
1008 mov QWORD[176+r8],rdi
1009
1010 mov rdi,QWORD[40+r9]
1011 mov rsi,r8
1012 mov ecx,154
1013 DD 0xa548f3fc
1014
1015 mov rsi,r9
1016 xor rcx,rcx
1017 mov rdx,QWORD[8+rsi]
1018 mov r8,QWORD[rsi]
1019 mov r9,QWORD[16+rsi]
1020 mov r10,QWORD[40+rsi]
1021 lea r11,[56+rsi]
1022 lea r12,[24+rsi]
1023 mov QWORD[32+rsp],r10
1024 mov QWORD[40+rsp],r11
1025 mov QWORD[48+rsp],r12
1026 mov QWORD[56+rsp],rcx
1027 call QWORD[__imp_RtlVirtualUnwind]
1028
1029 mov eax,1
1030 add rsp,64
1031 popfq
1032 pop r15
1033 pop r14
1034 pop r13
1035 pop r12
1036 pop rbp
1037 pop rbx
1038 pop rdi
1039 pop rsi
1040 DB 0F3h,0C3h ;repret
1041
1042
1043section .pdata rdata align=4
1044ALIGN 4
1045 DD $L$SEH_begin_bn_mul_mont wrt ..imagebase
1046 DD $L$SEH_end_bn_mul_mont wrt ..imagebase
1047 DD $L$SEH_info_bn_mul_mont wrt ..imagebase
1048
1049 DD $L$SEH_begin_bn_mul4x_mont wrt ..imagebase
1050 DD $L$SEH_end_bn_mul4x_mont wrt ..imagebase
1051 DD $L$SEH_info_bn_mul4x_mont wrt ..imagebase
1052
1053 DD $L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
1054 DD $L$SEH_end_bn_sqr8x_mont wrt ..imagebase
1055 DD $L$SEH_info_bn_sqr8x_mont wrt ..imagebase
1056section .xdata rdata align=8
1057ALIGN 8
1058$L$SEH_info_bn_mul_mont:
1059DB 9,0,0,0
1060 DD mul_handler wrt ..imagebase
1061 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
1062$L$SEH_info_bn_mul4x_mont:
1063DB 9,0,0,0
1064 DD mul_handler wrt ..imagebase
1065 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
1066$L$SEH_info_bn_sqr8x_mont:
1067DB 9,0,0,0
1068 DD sqr_handler wrt ..imagebase
1069 DD $L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
1070ALIGN 8
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette