VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.3/crypto/genasm-nasm/rsaz-x86_64.S@ 95219

Last change on this file since 95219 was 95219, checked in by vboxsync, 3 years ago

libs/openssl: Switched to v3.0.3, bugref:10128

File size: 33.8 KB
Line 
1default rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section .text code align=64
6
7
8EXTERN OPENSSL_ia32cap_P
9
10global rsaz_512_sqr
11
12ALIGN 32
13rsaz_512_sqr:
14 mov QWORD[8+rsp],rdi ;WIN64 prologue
15 mov QWORD[16+rsp],rsi
16 mov rax,rsp
17$L$SEH_begin_rsaz_512_sqr:
18 mov rdi,rcx
19 mov rsi,rdx
20 mov rdx,r8
21 mov rcx,r9
22 mov r8,QWORD[40+rsp]
23
24
25
26 push rbx
27
28 push rbp
29
30 push r12
31
32 push r13
33
34 push r14
35
36 push r15
37
38
39 sub rsp,128+24
40
41$L$sqr_body:
42DB 102,72,15,110,202
43 mov rdx,QWORD[rsi]
44 mov rax,QWORD[8+rsi]
45 mov QWORD[128+rsp],rcx
46 mov r11d,0x80100
47 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
48 cmp r11d,0x80100
49 je NEAR $L$oop_sqrx
50 jmp NEAR $L$oop_sqr
51
52ALIGN 32
53$L$oop_sqr:
54 mov DWORD[((128+8))+rsp],r8d
55
56 mov rbx,rdx
57 mov rbp,rax
58 mul rdx
59 mov r8,rax
60 mov rax,QWORD[16+rsi]
61 mov r9,rdx
62
63 mul rbx
64 add r9,rax
65 mov rax,QWORD[24+rsi]
66 mov r10,rdx
67 adc r10,0
68
69 mul rbx
70 add r10,rax
71 mov rax,QWORD[32+rsi]
72 mov r11,rdx
73 adc r11,0
74
75 mul rbx
76 add r11,rax
77 mov rax,QWORD[40+rsi]
78 mov r12,rdx
79 adc r12,0
80
81 mul rbx
82 add r12,rax
83 mov rax,QWORD[48+rsi]
84 mov r13,rdx
85 adc r13,0
86
87 mul rbx
88 add r13,rax
89 mov rax,QWORD[56+rsi]
90 mov r14,rdx
91 adc r14,0
92
93 mul rbx
94 add r14,rax
95 mov rax,rbx
96 adc rdx,0
97
98 xor rcx,rcx
99 add r8,r8
100 mov r15,rdx
101 adc rcx,0
102
103 mul rax
104 add rdx,r8
105 adc rcx,0
106
107 mov QWORD[rsp],rax
108 mov QWORD[8+rsp],rdx
109
110
111 mov rax,QWORD[16+rsi]
112 mul rbp
113 add r10,rax
114 mov rax,QWORD[24+rsi]
115 mov rbx,rdx
116 adc rbx,0
117
118 mul rbp
119 add r11,rax
120 mov rax,QWORD[32+rsi]
121 adc rdx,0
122 add r11,rbx
123 mov rbx,rdx
124 adc rbx,0
125
126 mul rbp
127 add r12,rax
128 mov rax,QWORD[40+rsi]
129 adc rdx,0
130 add r12,rbx
131 mov rbx,rdx
132 adc rbx,0
133
134 mul rbp
135 add r13,rax
136 mov rax,QWORD[48+rsi]
137 adc rdx,0
138 add r13,rbx
139 mov rbx,rdx
140 adc rbx,0
141
142 mul rbp
143 add r14,rax
144 mov rax,QWORD[56+rsi]
145 adc rdx,0
146 add r14,rbx
147 mov rbx,rdx
148 adc rbx,0
149
150 mul rbp
151 add r15,rax
152 mov rax,rbp
153 adc rdx,0
154 add r15,rbx
155 adc rdx,0
156
157 xor rbx,rbx
158 add r9,r9
159 mov r8,rdx
160 adc r10,r10
161 adc rbx,0
162
163 mul rax
164
165 add rax,rcx
166 mov rbp,QWORD[16+rsi]
167 add r9,rax
168 mov rax,QWORD[24+rsi]
169 adc r10,rdx
170 adc rbx,0
171
172 mov QWORD[16+rsp],r9
173 mov QWORD[24+rsp],r10
174
175
176 mul rbp
177 add r12,rax
178 mov rax,QWORD[32+rsi]
179 mov rcx,rdx
180 adc rcx,0
181
182 mul rbp
183 add r13,rax
184 mov rax,QWORD[40+rsi]
185 adc rdx,0
186 add r13,rcx
187 mov rcx,rdx
188 adc rcx,0
189
190 mul rbp
191 add r14,rax
192 mov rax,QWORD[48+rsi]
193 adc rdx,0
194 add r14,rcx
195 mov rcx,rdx
196 adc rcx,0
197
198 mul rbp
199 add r15,rax
200 mov rax,QWORD[56+rsi]
201 adc rdx,0
202 add r15,rcx
203 mov rcx,rdx
204 adc rcx,0
205
206 mul rbp
207 add r8,rax
208 mov rax,rbp
209 adc rdx,0
210 add r8,rcx
211 adc rdx,0
212
213 xor rcx,rcx
214 add r11,r11
215 mov r9,rdx
216 adc r12,r12
217 adc rcx,0
218
219 mul rax
220
221 add rax,rbx
222 mov r10,QWORD[24+rsi]
223 add r11,rax
224 mov rax,QWORD[32+rsi]
225 adc r12,rdx
226 adc rcx,0
227
228 mov QWORD[32+rsp],r11
229 mov QWORD[40+rsp],r12
230
231
232 mov r11,rax
233 mul r10
234 add r14,rax
235 mov rax,QWORD[40+rsi]
236 mov rbx,rdx
237 adc rbx,0
238
239 mov r12,rax
240 mul r10
241 add r15,rax
242 mov rax,QWORD[48+rsi]
243 adc rdx,0
244 add r15,rbx
245 mov rbx,rdx
246 adc rbx,0
247
248 mov rbp,rax
249 mul r10
250 add r8,rax
251 mov rax,QWORD[56+rsi]
252 adc rdx,0
253 add r8,rbx
254 mov rbx,rdx
255 adc rbx,0
256
257 mul r10
258 add r9,rax
259 mov rax,r10
260 adc rdx,0
261 add r9,rbx
262 adc rdx,0
263
264 xor rbx,rbx
265 add r13,r13
266 mov r10,rdx
267 adc r14,r14
268 adc rbx,0
269
270 mul rax
271
272 add rax,rcx
273 add r13,rax
274 mov rax,r12
275 adc r14,rdx
276 adc rbx,0
277
278 mov QWORD[48+rsp],r13
279 mov QWORD[56+rsp],r14
280
281
282 mul r11
283 add r8,rax
284 mov rax,rbp
285 mov rcx,rdx
286 adc rcx,0
287
288 mul r11
289 add r9,rax
290 mov rax,QWORD[56+rsi]
291 adc rdx,0
292 add r9,rcx
293 mov rcx,rdx
294 adc rcx,0
295
296 mov r14,rax
297 mul r11
298 add r10,rax
299 mov rax,r11
300 adc rdx,0
301 add r10,rcx
302 adc rdx,0
303
304 xor rcx,rcx
305 add r15,r15
306 mov r11,rdx
307 adc r8,r8
308 adc rcx,0
309
310 mul rax
311
312 add rax,rbx
313 add r15,rax
314 mov rax,rbp
315 adc r8,rdx
316 adc rcx,0
317
318 mov QWORD[64+rsp],r15
319 mov QWORD[72+rsp],r8
320
321
322 mul r12
323 add r10,rax
324 mov rax,r14
325 mov rbx,rdx
326 adc rbx,0
327
328 mul r12
329 add r11,rax
330 mov rax,r12
331 adc rdx,0
332 add r11,rbx
333 adc rdx,0
334
335 xor rbx,rbx
336 add r9,r9
337 mov r12,rdx
338 adc r10,r10
339 adc rbx,0
340
341 mul rax
342
343 add rax,rcx
344 add r9,rax
345 mov rax,r14
346 adc r10,rdx
347 adc rbx,0
348
349 mov QWORD[80+rsp],r9
350 mov QWORD[88+rsp],r10
351
352
353 mul rbp
354 add r12,rax
355 mov rax,rbp
356 adc rdx,0
357
358 xor rcx,rcx
359 add r11,r11
360 mov r13,rdx
361 adc r12,r12
362 adc rcx,0
363
364 mul rax
365
366 add rax,rbx
367 add r11,rax
368 mov rax,r14
369 adc r12,rdx
370 adc rcx,0
371
372 mov QWORD[96+rsp],r11
373 mov QWORD[104+rsp],r12
374
375
376 xor rbx,rbx
377 add r13,r13
378 adc rbx,0
379
380 mul rax
381
382 add rax,rcx
383 add rax,r13
384 adc rdx,rbx
385
386 mov r8,QWORD[rsp]
387 mov r9,QWORD[8+rsp]
388 mov r10,QWORD[16+rsp]
389 mov r11,QWORD[24+rsp]
390 mov r12,QWORD[32+rsp]
391 mov r13,QWORD[40+rsp]
392 mov r14,QWORD[48+rsp]
393 mov r15,QWORD[56+rsp]
394DB 102,72,15,126,205
395
396 mov QWORD[112+rsp],rax
397 mov QWORD[120+rsp],rdx
398
399 call __rsaz_512_reduce
400
401 add r8,QWORD[64+rsp]
402 adc r9,QWORD[72+rsp]
403 adc r10,QWORD[80+rsp]
404 adc r11,QWORD[88+rsp]
405 adc r12,QWORD[96+rsp]
406 adc r13,QWORD[104+rsp]
407 adc r14,QWORD[112+rsp]
408 adc r15,QWORD[120+rsp]
409 sbb rcx,rcx
410
411 call __rsaz_512_subtract
412
413 mov rdx,r8
414 mov rax,r9
415 mov r8d,DWORD[((128+8))+rsp]
416 mov rsi,rdi
417
418 dec r8d
419 jnz NEAR $L$oop_sqr
420 jmp NEAR $L$sqr_tail
421
422ALIGN 32
423$L$oop_sqrx:
424 mov DWORD[((128+8))+rsp],r8d
425DB 102,72,15,110,199
426
427 mulx r9,r8,rax
428 mov rbx,rax
429
430 mulx r10,rcx,QWORD[16+rsi]
431 xor rbp,rbp
432
433 mulx r11,rax,QWORD[24+rsi]
434 adcx r9,rcx
435
436DB 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
437 adcx r10,rax
438
439DB 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
440 adcx r11,rcx
441
442 mulx r14,rcx,QWORD[48+rsi]
443 adcx r12,rax
444 adcx r13,rcx
445
446 mulx r15,rax,QWORD[56+rsi]
447 adcx r14,rax
448 adcx r15,rbp
449
450 mulx rdi,rax,rdx
451 mov rdx,rbx
452 xor rcx,rcx
453 adox r8,r8
454 adcx r8,rdi
455 adox rcx,rbp
456 adcx rcx,rbp
457
458 mov QWORD[rsp],rax
459 mov QWORD[8+rsp],r8
460
461
462DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
463 adox r10,rax
464 adcx r11,rbx
465
466 mulx r8,rdi,QWORD[24+rsi]
467 adox r11,rdi
468DB 0x66
469 adcx r12,r8
470
471 mulx rbx,rax,QWORD[32+rsi]
472 adox r12,rax
473 adcx r13,rbx
474
475 mulx r8,rdi,QWORD[40+rsi]
476 adox r13,rdi
477 adcx r14,r8
478
479DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
480 adox r14,rax
481 adcx r15,rbx
482
483DB 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
484 adox r15,rdi
485 adcx r8,rbp
486 mulx rdi,rax,rdx
487 adox r8,rbp
488DB 0x48,0x8b,0x96,0x10,0x00,0x00,0x00
489
490 xor rbx,rbx
491 adox r9,r9
492
493 adcx rax,rcx
494 adox r10,r10
495 adcx r9,rax
496 adox rbx,rbp
497 adcx r10,rdi
498 adcx rbx,rbp
499
500 mov QWORD[16+rsp],r9
501DB 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
502
503
504 mulx r9,rdi,QWORD[24+rsi]
505 adox r12,rdi
506 adcx r13,r9
507
508 mulx rcx,rax,QWORD[32+rsi]
509 adox r13,rax
510 adcx r14,rcx
511
512DB 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
513 adox r14,rdi
514 adcx r15,r9
515
516DB 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
517 adox r15,rax
518 adcx r8,rcx
519
520 mulx r9,rdi,QWORD[56+rsi]
521 adox r8,rdi
522 adcx r9,rbp
523 mulx rdi,rax,rdx
524 adox r9,rbp
525 mov rdx,QWORD[24+rsi]
526
527 xor rcx,rcx
528 adox r11,r11
529
530 adcx rax,rbx
531 adox r12,r12
532 adcx r11,rax
533 adox rcx,rbp
534 adcx r12,rdi
535 adcx rcx,rbp
536
537 mov QWORD[32+rsp],r11
538 mov QWORD[40+rsp],r12
539
540
541 mulx rbx,rax,QWORD[32+rsi]
542 adox r14,rax
543 adcx r15,rbx
544
545 mulx r10,rdi,QWORD[40+rsi]
546 adox r15,rdi
547 adcx r8,r10
548
549 mulx rbx,rax,QWORD[48+rsi]
550 adox r8,rax
551 adcx r9,rbx
552
553 mulx r10,rdi,QWORD[56+rsi]
554 adox r9,rdi
555 adcx r10,rbp
556 mulx rdi,rax,rdx
557 adox r10,rbp
558 mov rdx,QWORD[32+rsi]
559
560 xor rbx,rbx
561 adox r13,r13
562
563 adcx rax,rcx
564 adox r14,r14
565 adcx r13,rax
566 adox rbx,rbp
567 adcx r14,rdi
568 adcx rbx,rbp
569
570 mov QWORD[48+rsp],r13
571 mov QWORD[56+rsp],r14
572
573
574 mulx r11,rdi,QWORD[40+rsi]
575 adox r8,rdi
576 adcx r9,r11
577
578 mulx rcx,rax,QWORD[48+rsi]
579 adox r9,rax
580 adcx r10,rcx
581
582 mulx r11,rdi,QWORD[56+rsi]
583 adox r10,rdi
584 adcx r11,rbp
585 mulx rdi,rax,rdx
586 mov rdx,QWORD[40+rsi]
587 adox r11,rbp
588
589 xor rcx,rcx
590 adox r15,r15
591
592 adcx rax,rbx
593 adox r8,r8
594 adcx r15,rax
595 adox rcx,rbp
596 adcx r8,rdi
597 adcx rcx,rbp
598
599 mov QWORD[64+rsp],r15
600 mov QWORD[72+rsp],r8
601
602
603DB 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
604 adox r10,rax
605 adcx r11,rbx
606
607DB 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
608 adox r11,rdi
609 adcx r12,rbp
610 mulx rdi,rax,rdx
611 adox r12,rbp
612 mov rdx,QWORD[48+rsi]
613
614 xor rbx,rbx
615 adox r9,r9
616
617 adcx rax,rcx
618 adox r10,r10
619 adcx r9,rax
620 adcx r10,rdi
621 adox rbx,rbp
622 adcx rbx,rbp
623
624 mov QWORD[80+rsp],r9
625 mov QWORD[88+rsp],r10
626
627
628DB 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
629 adox r12,rax
630 adox r13,rbp
631
632 mulx rdi,rax,rdx
633 xor rcx,rcx
634 mov rdx,QWORD[56+rsi]
635 adox r11,r11
636
637 adcx rax,rbx
638 adox r12,r12
639 adcx r11,rax
640 adox rcx,rbp
641 adcx r12,rdi
642 adcx rcx,rbp
643
644DB 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
645DB 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
646
647
648 mulx rdx,rax,rdx
649 xor rbx,rbx
650 adox r13,r13
651
652 adcx rax,rcx
653 adox rbx,rbp
654 adcx rax,r13
655 adcx rbx,rdx
656
657DB 102,72,15,126,199
658DB 102,72,15,126,205
659
660 mov rdx,QWORD[128+rsp]
661 mov r8,QWORD[rsp]
662 mov r9,QWORD[8+rsp]
663 mov r10,QWORD[16+rsp]
664 mov r11,QWORD[24+rsp]
665 mov r12,QWORD[32+rsp]
666 mov r13,QWORD[40+rsp]
667 mov r14,QWORD[48+rsp]
668 mov r15,QWORD[56+rsp]
669
670 mov QWORD[112+rsp],rax
671 mov QWORD[120+rsp],rbx
672
673 call __rsaz_512_reducex
674
675 add r8,QWORD[64+rsp]
676 adc r9,QWORD[72+rsp]
677 adc r10,QWORD[80+rsp]
678 adc r11,QWORD[88+rsp]
679 adc r12,QWORD[96+rsp]
680 adc r13,QWORD[104+rsp]
681 adc r14,QWORD[112+rsp]
682 adc r15,QWORD[120+rsp]
683 sbb rcx,rcx
684
685 call __rsaz_512_subtract
686
687 mov rdx,r8
688 mov rax,r9
689 mov r8d,DWORD[((128+8))+rsp]
690 mov rsi,rdi
691
692 dec r8d
693 jnz NEAR $L$oop_sqrx
694
695$L$sqr_tail:
696
697 lea rax,[((128+24+48))+rsp]
698
699 mov r15,QWORD[((-48))+rax]
700
701 mov r14,QWORD[((-40))+rax]
702
703 mov r13,QWORD[((-32))+rax]
704
705 mov r12,QWORD[((-24))+rax]
706
707 mov rbp,QWORD[((-16))+rax]
708
709 mov rbx,QWORD[((-8))+rax]
710
711 lea rsp,[rax]
712
713$L$sqr_epilogue:
714 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
715 mov rsi,QWORD[16+rsp]
716 DB 0F3h,0C3h ;repret
717
718$L$SEH_end_rsaz_512_sqr:
719global rsaz_512_mul
720
721ALIGN 32
722rsaz_512_mul:
723 mov QWORD[8+rsp],rdi ;WIN64 prologue
724 mov QWORD[16+rsp],rsi
725 mov rax,rsp
726$L$SEH_begin_rsaz_512_mul:
727 mov rdi,rcx
728 mov rsi,rdx
729 mov rdx,r8
730 mov rcx,r9
731 mov r8,QWORD[40+rsp]
732
733
734
735 push rbx
736
737 push rbp
738
739 push r12
740
741 push r13
742
743 push r14
744
745 push r15
746
747
748 sub rsp,128+24
749
750$L$mul_body:
751DB 102,72,15,110,199
752DB 102,72,15,110,201
753 mov QWORD[128+rsp],r8
754 mov r11d,0x80100
755 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
756 cmp r11d,0x80100
757 je NEAR $L$mulx
758 mov rbx,QWORD[rdx]
759 mov rbp,rdx
760 call __rsaz_512_mul
761
762DB 102,72,15,126,199
763DB 102,72,15,126,205
764
765 mov r8,QWORD[rsp]
766 mov r9,QWORD[8+rsp]
767 mov r10,QWORD[16+rsp]
768 mov r11,QWORD[24+rsp]
769 mov r12,QWORD[32+rsp]
770 mov r13,QWORD[40+rsp]
771 mov r14,QWORD[48+rsp]
772 mov r15,QWORD[56+rsp]
773
774 call __rsaz_512_reduce
775 jmp NEAR $L$mul_tail
776
777ALIGN 32
778$L$mulx:
779 mov rbp,rdx
780 mov rdx,QWORD[rdx]
781 call __rsaz_512_mulx
782
783DB 102,72,15,126,199
784DB 102,72,15,126,205
785
786 mov rdx,QWORD[128+rsp]
787 mov r8,QWORD[rsp]
788 mov r9,QWORD[8+rsp]
789 mov r10,QWORD[16+rsp]
790 mov r11,QWORD[24+rsp]
791 mov r12,QWORD[32+rsp]
792 mov r13,QWORD[40+rsp]
793 mov r14,QWORD[48+rsp]
794 mov r15,QWORD[56+rsp]
795
796 call __rsaz_512_reducex
797$L$mul_tail:
798 add r8,QWORD[64+rsp]
799 adc r9,QWORD[72+rsp]
800 adc r10,QWORD[80+rsp]
801 adc r11,QWORD[88+rsp]
802 adc r12,QWORD[96+rsp]
803 adc r13,QWORD[104+rsp]
804 adc r14,QWORD[112+rsp]
805 adc r15,QWORD[120+rsp]
806 sbb rcx,rcx
807
808 call __rsaz_512_subtract
809
810 lea rax,[((128+24+48))+rsp]
811
812 mov r15,QWORD[((-48))+rax]
813
814 mov r14,QWORD[((-40))+rax]
815
816 mov r13,QWORD[((-32))+rax]
817
818 mov r12,QWORD[((-24))+rax]
819
820 mov rbp,QWORD[((-16))+rax]
821
822 mov rbx,QWORD[((-8))+rax]
823
824 lea rsp,[rax]
825
826$L$mul_epilogue:
827 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
828 mov rsi,QWORD[16+rsp]
829 DB 0F3h,0C3h ;repret
830
831$L$SEH_end_rsaz_512_mul:
832global rsaz_512_mul_gather4
833
834ALIGN 32
835rsaz_512_mul_gather4:
836 mov QWORD[8+rsp],rdi ;WIN64 prologue
837 mov QWORD[16+rsp],rsi
838 mov rax,rsp
839$L$SEH_begin_rsaz_512_mul_gather4:
840 mov rdi,rcx
841 mov rsi,rdx
842 mov rdx,r8
843 mov rcx,r9
844 mov r8,QWORD[40+rsp]
845 mov r9,QWORD[48+rsp]
846
847
848
849 push rbx
850
851 push rbp
852
853 push r12
854
855 push r13
856
857 push r14
858
859 push r15
860
861
862 sub rsp,328
863
864 movaps XMMWORD[160+rsp],xmm6
865 movaps XMMWORD[176+rsp],xmm7
866 movaps XMMWORD[192+rsp],xmm8
867 movaps XMMWORD[208+rsp],xmm9
868 movaps XMMWORD[224+rsp],xmm10
869 movaps XMMWORD[240+rsp],xmm11
870 movaps XMMWORD[256+rsp],xmm12
871 movaps XMMWORD[272+rsp],xmm13
872 movaps XMMWORD[288+rsp],xmm14
873 movaps XMMWORD[304+rsp],xmm15
874$L$mul_gather4_body:
875 movd xmm8,r9d
876 movdqa xmm1,XMMWORD[(($L$inc+16))]
877 movdqa xmm0,XMMWORD[$L$inc]
878
879 pshufd xmm8,xmm8,0
880 movdqa xmm7,xmm1
881 movdqa xmm2,xmm1
882 paddd xmm1,xmm0
883 pcmpeqd xmm0,xmm8
884 movdqa xmm3,xmm7
885 paddd xmm2,xmm1
886 pcmpeqd xmm1,xmm8
887 movdqa xmm4,xmm7
888 paddd xmm3,xmm2
889 pcmpeqd xmm2,xmm8
890 movdqa xmm5,xmm7
891 paddd xmm4,xmm3
892 pcmpeqd xmm3,xmm8
893 movdqa xmm6,xmm7
894 paddd xmm5,xmm4
895 pcmpeqd xmm4,xmm8
896 paddd xmm6,xmm5
897 pcmpeqd xmm5,xmm8
898 paddd xmm7,xmm6
899 pcmpeqd xmm6,xmm8
900 pcmpeqd xmm7,xmm8
901
902 movdqa xmm8,XMMWORD[rdx]
903 movdqa xmm9,XMMWORD[16+rdx]
904 movdqa xmm10,XMMWORD[32+rdx]
905 movdqa xmm11,XMMWORD[48+rdx]
906 pand xmm8,xmm0
907 movdqa xmm12,XMMWORD[64+rdx]
908 pand xmm9,xmm1
909 movdqa xmm13,XMMWORD[80+rdx]
910 pand xmm10,xmm2
911 movdqa xmm14,XMMWORD[96+rdx]
912 pand xmm11,xmm3
913 movdqa xmm15,XMMWORD[112+rdx]
914 lea rbp,[128+rdx]
915 pand xmm12,xmm4
916 pand xmm13,xmm5
917 pand xmm14,xmm6
918 pand xmm15,xmm7
919 por xmm8,xmm10
920 por xmm9,xmm11
921 por xmm8,xmm12
922 por xmm9,xmm13
923 por xmm8,xmm14
924 por xmm9,xmm15
925
926 por xmm8,xmm9
927 pshufd xmm9,xmm8,0x4e
928 por xmm8,xmm9
929 mov r11d,0x80100
930 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
931 cmp r11d,0x80100
932 je NEAR $L$mulx_gather
933DB 102,76,15,126,195
934
935 mov QWORD[128+rsp],r8
936 mov QWORD[((128+8))+rsp],rdi
937 mov QWORD[((128+16))+rsp],rcx
938
939 mov rax,QWORD[rsi]
940 mov rcx,QWORD[8+rsi]
941 mul rbx
942 mov QWORD[rsp],rax
943 mov rax,rcx
944 mov r8,rdx
945
946 mul rbx
947 add r8,rax
948 mov rax,QWORD[16+rsi]
949 mov r9,rdx
950 adc r9,0
951
952 mul rbx
953 add r9,rax
954 mov rax,QWORD[24+rsi]
955 mov r10,rdx
956 adc r10,0
957
958 mul rbx
959 add r10,rax
960 mov rax,QWORD[32+rsi]
961 mov r11,rdx
962 adc r11,0
963
964 mul rbx
965 add r11,rax
966 mov rax,QWORD[40+rsi]
967 mov r12,rdx
968 adc r12,0
969
970 mul rbx
971 add r12,rax
972 mov rax,QWORD[48+rsi]
973 mov r13,rdx
974 adc r13,0
975
976 mul rbx
977 add r13,rax
978 mov rax,QWORD[56+rsi]
979 mov r14,rdx
980 adc r14,0
981
982 mul rbx
983 add r14,rax
984 mov rax,QWORD[rsi]
985 mov r15,rdx
986 adc r15,0
987
988 lea rdi,[8+rsp]
989 mov ecx,7
990 jmp NEAR $L$oop_mul_gather
991
992ALIGN 32
993$L$oop_mul_gather:
994 movdqa xmm8,XMMWORD[rbp]
995 movdqa xmm9,XMMWORD[16+rbp]
996 movdqa xmm10,XMMWORD[32+rbp]
997 movdqa xmm11,XMMWORD[48+rbp]
998 pand xmm8,xmm0
999 movdqa xmm12,XMMWORD[64+rbp]
1000 pand xmm9,xmm1
1001 movdqa xmm13,XMMWORD[80+rbp]
1002 pand xmm10,xmm2
1003 movdqa xmm14,XMMWORD[96+rbp]
1004 pand xmm11,xmm3
1005 movdqa xmm15,XMMWORD[112+rbp]
1006 lea rbp,[128+rbp]
1007 pand xmm12,xmm4
1008 pand xmm13,xmm5
1009 pand xmm14,xmm6
1010 pand xmm15,xmm7
1011 por xmm8,xmm10
1012 por xmm9,xmm11
1013 por xmm8,xmm12
1014 por xmm9,xmm13
1015 por xmm8,xmm14
1016 por xmm9,xmm15
1017
1018 por xmm8,xmm9
1019 pshufd xmm9,xmm8,0x4e
1020 por xmm8,xmm9
1021DB 102,76,15,126,195
1022
1023 mul rbx
1024 add r8,rax
1025 mov rax,QWORD[8+rsi]
1026 mov QWORD[rdi],r8
1027 mov r8,rdx
1028 adc r8,0
1029
1030 mul rbx
1031 add r9,rax
1032 mov rax,QWORD[16+rsi]
1033 adc rdx,0
1034 add r8,r9
1035 mov r9,rdx
1036 adc r9,0
1037
1038 mul rbx
1039 add r10,rax
1040 mov rax,QWORD[24+rsi]
1041 adc rdx,0
1042 add r9,r10
1043 mov r10,rdx
1044 adc r10,0
1045
1046 mul rbx
1047 add r11,rax
1048 mov rax,QWORD[32+rsi]
1049 adc rdx,0
1050 add r10,r11
1051 mov r11,rdx
1052 adc r11,0
1053
1054 mul rbx
1055 add r12,rax
1056 mov rax,QWORD[40+rsi]
1057 adc rdx,0
1058 add r11,r12
1059 mov r12,rdx
1060 adc r12,0
1061
1062 mul rbx
1063 add r13,rax
1064 mov rax,QWORD[48+rsi]
1065 adc rdx,0
1066 add r12,r13
1067 mov r13,rdx
1068 adc r13,0
1069
1070 mul rbx
1071 add r14,rax
1072 mov rax,QWORD[56+rsi]
1073 adc rdx,0
1074 add r13,r14
1075 mov r14,rdx
1076 adc r14,0
1077
1078 mul rbx
1079 add r15,rax
1080 mov rax,QWORD[rsi]
1081 adc rdx,0
1082 add r14,r15
1083 mov r15,rdx
1084 adc r15,0
1085
1086 lea rdi,[8+rdi]
1087
1088 dec ecx
1089 jnz NEAR $L$oop_mul_gather
1090
1091 mov QWORD[rdi],r8
1092 mov QWORD[8+rdi],r9
1093 mov QWORD[16+rdi],r10
1094 mov QWORD[24+rdi],r11
1095 mov QWORD[32+rdi],r12
1096 mov QWORD[40+rdi],r13
1097 mov QWORD[48+rdi],r14
1098 mov QWORD[56+rdi],r15
1099
1100 mov rdi,QWORD[((128+8))+rsp]
1101 mov rbp,QWORD[((128+16))+rsp]
1102
1103 mov r8,QWORD[rsp]
1104 mov r9,QWORD[8+rsp]
1105 mov r10,QWORD[16+rsp]
1106 mov r11,QWORD[24+rsp]
1107 mov r12,QWORD[32+rsp]
1108 mov r13,QWORD[40+rsp]
1109 mov r14,QWORD[48+rsp]
1110 mov r15,QWORD[56+rsp]
1111
1112 call __rsaz_512_reduce
1113 jmp NEAR $L$mul_gather_tail
1114
1115ALIGN 32
1116$L$mulx_gather:
1117DB 102,76,15,126,194
1118
1119 mov QWORD[128+rsp],r8
1120 mov QWORD[((128+8))+rsp],rdi
1121 mov QWORD[((128+16))+rsp],rcx
1122
1123 mulx r8,rbx,QWORD[rsi]
1124 mov QWORD[rsp],rbx
1125 xor edi,edi
1126
1127 mulx r9,rax,QWORD[8+rsi]
1128
1129 mulx r10,rbx,QWORD[16+rsi]
1130 adcx r8,rax
1131
1132 mulx r11,rax,QWORD[24+rsi]
1133 adcx r9,rbx
1134
1135 mulx r12,rbx,QWORD[32+rsi]
1136 adcx r10,rax
1137
1138 mulx r13,rax,QWORD[40+rsi]
1139 adcx r11,rbx
1140
1141 mulx r14,rbx,QWORD[48+rsi]
1142 adcx r12,rax
1143
1144 mulx r15,rax,QWORD[56+rsi]
1145 adcx r13,rbx
1146 adcx r14,rax
1147DB 0x67
1148 mov rbx,r8
1149 adcx r15,rdi
1150
1151 mov rcx,-7
1152 jmp NEAR $L$oop_mulx_gather
1153
1154ALIGN 32
1155$L$oop_mulx_gather:
1156 movdqa xmm8,XMMWORD[rbp]
1157 movdqa xmm9,XMMWORD[16+rbp]
1158 movdqa xmm10,XMMWORD[32+rbp]
1159 movdqa xmm11,XMMWORD[48+rbp]
1160 pand xmm8,xmm0
1161 movdqa xmm12,XMMWORD[64+rbp]
1162 pand xmm9,xmm1
1163 movdqa xmm13,XMMWORD[80+rbp]
1164 pand xmm10,xmm2
1165 movdqa xmm14,XMMWORD[96+rbp]
1166 pand xmm11,xmm3
1167 movdqa xmm15,XMMWORD[112+rbp]
1168 lea rbp,[128+rbp]
1169 pand xmm12,xmm4
1170 pand xmm13,xmm5
1171 pand xmm14,xmm6
1172 pand xmm15,xmm7
1173 por xmm8,xmm10
1174 por xmm9,xmm11
1175 por xmm8,xmm12
1176 por xmm9,xmm13
1177 por xmm8,xmm14
1178 por xmm9,xmm15
1179
1180 por xmm8,xmm9
1181 pshufd xmm9,xmm8,0x4e
1182 por xmm8,xmm9
1183DB 102,76,15,126,194
1184
1185DB 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1186 adcx rbx,rax
1187 adox r8,r9
1188
1189 mulx r9,rax,QWORD[8+rsi]
1190 adcx r8,rax
1191 adox r9,r10
1192
1193 mulx r10,rax,QWORD[16+rsi]
1194 adcx r9,rax
1195 adox r10,r11
1196
1197DB 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1198 adcx r10,rax
1199 adox r11,r12
1200
1201 mulx r12,rax,QWORD[32+rsi]
1202 adcx r11,rax
1203 adox r12,r13
1204
1205 mulx r13,rax,QWORD[40+rsi]
1206 adcx r12,rax
1207 adox r13,r14
1208
1209DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1210 adcx r13,rax
1211DB 0x67
1212 adox r14,r15
1213
1214 mulx r15,rax,QWORD[56+rsi]
1215 mov QWORD[64+rcx*8+rsp],rbx
1216 adcx r14,rax
1217 adox r15,rdi
1218 mov rbx,r8
1219 adcx r15,rdi
1220
1221 inc rcx
1222 jnz NEAR $L$oop_mulx_gather
1223
1224 mov QWORD[64+rsp],r8
1225 mov QWORD[((64+8))+rsp],r9
1226 mov QWORD[((64+16))+rsp],r10
1227 mov QWORD[((64+24))+rsp],r11
1228 mov QWORD[((64+32))+rsp],r12
1229 mov QWORD[((64+40))+rsp],r13
1230 mov QWORD[((64+48))+rsp],r14
1231 mov QWORD[((64+56))+rsp],r15
1232
1233 mov rdx,QWORD[128+rsp]
1234 mov rdi,QWORD[((128+8))+rsp]
1235 mov rbp,QWORD[((128+16))+rsp]
1236
1237 mov r8,QWORD[rsp]
1238 mov r9,QWORD[8+rsp]
1239 mov r10,QWORD[16+rsp]
1240 mov r11,QWORD[24+rsp]
1241 mov r12,QWORD[32+rsp]
1242 mov r13,QWORD[40+rsp]
1243 mov r14,QWORD[48+rsp]
1244 mov r15,QWORD[56+rsp]
1245
1246 call __rsaz_512_reducex
1247
1248$L$mul_gather_tail:
1249 add r8,QWORD[64+rsp]
1250 adc r9,QWORD[72+rsp]
1251 adc r10,QWORD[80+rsp]
1252 adc r11,QWORD[88+rsp]
1253 adc r12,QWORD[96+rsp]
1254 adc r13,QWORD[104+rsp]
1255 adc r14,QWORD[112+rsp]
1256 adc r15,QWORD[120+rsp]
1257 sbb rcx,rcx
1258
1259 call __rsaz_512_subtract
1260
1261 lea rax,[((128+24+48))+rsp]
1262 movaps xmm6,XMMWORD[((160-200))+rax]
1263 movaps xmm7,XMMWORD[((176-200))+rax]
1264 movaps xmm8,XMMWORD[((192-200))+rax]
1265 movaps xmm9,XMMWORD[((208-200))+rax]
1266 movaps xmm10,XMMWORD[((224-200))+rax]
1267 movaps xmm11,XMMWORD[((240-200))+rax]
1268 movaps xmm12,XMMWORD[((256-200))+rax]
1269 movaps xmm13,XMMWORD[((272-200))+rax]
1270 movaps xmm14,XMMWORD[((288-200))+rax]
1271 movaps xmm15,XMMWORD[((304-200))+rax]
1272 lea rax,[176+rax]
1273
1274 mov r15,QWORD[((-48))+rax]
1275
1276 mov r14,QWORD[((-40))+rax]
1277
1278 mov r13,QWORD[((-32))+rax]
1279
1280 mov r12,QWORD[((-24))+rax]
1281
1282 mov rbp,QWORD[((-16))+rax]
1283
1284 mov rbx,QWORD[((-8))+rax]
1285
1286 lea rsp,[rax]
1287
1288$L$mul_gather4_epilogue:
1289 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1290 mov rsi,QWORD[16+rsp]
1291 DB 0F3h,0C3h ;repret
1292
1293$L$SEH_end_rsaz_512_mul_gather4:
1294global rsaz_512_mul_scatter4
1295
1296ALIGN 32
1297rsaz_512_mul_scatter4:
1298 mov QWORD[8+rsp],rdi ;WIN64 prologue
1299 mov QWORD[16+rsp],rsi
1300 mov rax,rsp
1301$L$SEH_begin_rsaz_512_mul_scatter4:
1302 mov rdi,rcx
1303 mov rsi,rdx
1304 mov rdx,r8
1305 mov rcx,r9
1306 mov r8,QWORD[40+rsp]
1307 mov r9,QWORD[48+rsp]
1308
1309
1310
1311 push rbx
1312
1313 push rbp
1314
1315 push r12
1316
1317 push r13
1318
1319 push r14
1320
1321 push r15
1322
1323
1324 mov r9d,r9d
1325 sub rsp,128+24
1326
1327$L$mul_scatter4_body:
1328 lea r8,[r9*8+r8]
1329DB 102,72,15,110,199
1330DB 102,72,15,110,202
1331DB 102,73,15,110,208
1332 mov QWORD[128+rsp],rcx
1333
1334 mov rbp,rdi
1335 mov r11d,0x80100
1336 and r11d,DWORD[((OPENSSL_ia32cap_P+8))]
1337 cmp r11d,0x80100
1338 je NEAR $L$mulx_scatter
1339 mov rbx,QWORD[rdi]
1340 call __rsaz_512_mul
1341
1342DB 102,72,15,126,199
1343DB 102,72,15,126,205
1344
1345 mov r8,QWORD[rsp]
1346 mov r9,QWORD[8+rsp]
1347 mov r10,QWORD[16+rsp]
1348 mov r11,QWORD[24+rsp]
1349 mov r12,QWORD[32+rsp]
1350 mov r13,QWORD[40+rsp]
1351 mov r14,QWORD[48+rsp]
1352 mov r15,QWORD[56+rsp]
1353
1354 call __rsaz_512_reduce
1355 jmp NEAR $L$mul_scatter_tail
1356
1357ALIGN 32
1358$L$mulx_scatter:
1359 mov rdx,QWORD[rdi]
1360 call __rsaz_512_mulx
1361
1362DB 102,72,15,126,199
1363DB 102,72,15,126,205
1364
1365 mov rdx,QWORD[128+rsp]
1366 mov r8,QWORD[rsp]
1367 mov r9,QWORD[8+rsp]
1368 mov r10,QWORD[16+rsp]
1369 mov r11,QWORD[24+rsp]
1370 mov r12,QWORD[32+rsp]
1371 mov r13,QWORD[40+rsp]
1372 mov r14,QWORD[48+rsp]
1373 mov r15,QWORD[56+rsp]
1374
1375 call __rsaz_512_reducex
1376
1377$L$mul_scatter_tail:
1378 add r8,QWORD[64+rsp]
1379 adc r9,QWORD[72+rsp]
1380 adc r10,QWORD[80+rsp]
1381 adc r11,QWORD[88+rsp]
1382 adc r12,QWORD[96+rsp]
1383 adc r13,QWORD[104+rsp]
1384 adc r14,QWORD[112+rsp]
1385 adc r15,QWORD[120+rsp]
1386DB 102,72,15,126,214
1387 sbb rcx,rcx
1388
1389 call __rsaz_512_subtract
1390
1391 mov QWORD[rsi],r8
1392 mov QWORD[128+rsi],r9
1393 mov QWORD[256+rsi],r10
1394 mov QWORD[384+rsi],r11
1395 mov QWORD[512+rsi],r12
1396 mov QWORD[640+rsi],r13
1397 mov QWORD[768+rsi],r14
1398 mov QWORD[896+rsi],r15
1399
1400 lea rax,[((128+24+48))+rsp]
1401
1402 mov r15,QWORD[((-48))+rax]
1403
1404 mov r14,QWORD[((-40))+rax]
1405
1406 mov r13,QWORD[((-32))+rax]
1407
1408 mov r12,QWORD[((-24))+rax]
1409
1410 mov rbp,QWORD[((-16))+rax]
1411
1412 mov rbx,QWORD[((-8))+rax]
1413
1414 lea rsp,[rax]
1415
1416$L$mul_scatter4_epilogue:
1417 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1418 mov rsi,QWORD[16+rsp]
1419 DB 0F3h,0C3h ;repret
1420
1421$L$SEH_end_rsaz_512_mul_scatter4:
1422global rsaz_512_mul_by_one
1423
1424ALIGN 32
1425rsaz_512_mul_by_one:
1426 mov QWORD[8+rsp],rdi ;WIN64 prologue
1427 mov QWORD[16+rsp],rsi
1428 mov rax,rsp
1429$L$SEH_begin_rsaz_512_mul_by_one:
1430 mov rdi,rcx
1431 mov rsi,rdx
1432 mov rdx,r8
1433 mov rcx,r9
1434
1435
1436
1437 push rbx
1438
1439 push rbp
1440
1441 push r12
1442
1443 push r13
1444
1445 push r14
1446
1447 push r15
1448
1449
1450 sub rsp,128+24
1451
1452$L$mul_by_one_body:
1453 mov eax,DWORD[((OPENSSL_ia32cap_P+8))]
1454 mov rbp,rdx
1455 mov QWORD[128+rsp],rcx
1456
1457 mov r8,QWORD[rsi]
1458 pxor xmm0,xmm0
1459 mov r9,QWORD[8+rsi]
1460 mov r10,QWORD[16+rsi]
1461 mov r11,QWORD[24+rsi]
1462 mov r12,QWORD[32+rsi]
1463 mov r13,QWORD[40+rsi]
1464 mov r14,QWORD[48+rsi]
1465 mov r15,QWORD[56+rsi]
1466
1467 movdqa XMMWORD[rsp],xmm0
1468 movdqa XMMWORD[16+rsp],xmm0
1469 movdqa XMMWORD[32+rsp],xmm0
1470 movdqa XMMWORD[48+rsp],xmm0
1471 movdqa XMMWORD[64+rsp],xmm0
1472 movdqa XMMWORD[80+rsp],xmm0
1473 movdqa XMMWORD[96+rsp],xmm0
1474 and eax,0x80100
1475 cmp eax,0x80100
1476 je NEAR $L$by_one_callx
1477 call __rsaz_512_reduce
1478 jmp NEAR $L$by_one_tail
1479ALIGN 32
1480$L$by_one_callx:
1481 mov rdx,QWORD[128+rsp]
1482 call __rsaz_512_reducex
1483$L$by_one_tail:
1484 mov QWORD[rdi],r8
1485 mov QWORD[8+rdi],r9
1486 mov QWORD[16+rdi],r10
1487 mov QWORD[24+rdi],r11
1488 mov QWORD[32+rdi],r12
1489 mov QWORD[40+rdi],r13
1490 mov QWORD[48+rdi],r14
1491 mov QWORD[56+rdi],r15
1492
1493 lea rax,[((128+24+48))+rsp]
1494
1495 mov r15,QWORD[((-48))+rax]
1496
1497 mov r14,QWORD[((-40))+rax]
1498
1499 mov r13,QWORD[((-32))+rax]
1500
1501 mov r12,QWORD[((-24))+rax]
1502
1503 mov rbp,QWORD[((-16))+rax]
1504
1505 mov rbx,QWORD[((-8))+rax]
1506
1507 lea rsp,[rax]
1508
1509$L$mul_by_one_epilogue:
1510 mov rdi,QWORD[8+rsp] ;WIN64 epilogue
1511 mov rsi,QWORD[16+rsp]
1512 DB 0F3h,0C3h ;repret
1513
1514$L$SEH_end_rsaz_512_mul_by_one:
1515
1516ALIGN 32
1517__rsaz_512_reduce:
1518
1519 mov rbx,r8
1520 imul rbx,QWORD[((128+8))+rsp]
1521 mov rax,QWORD[rbp]
1522 mov ecx,8
1523 jmp NEAR $L$reduction_loop
1524
1525ALIGN 32
1526$L$reduction_loop:
1527 mul rbx
1528 mov rax,QWORD[8+rbp]
1529 neg r8
1530 mov r8,rdx
1531 adc r8,0
1532
1533 mul rbx
1534 add r9,rax
1535 mov rax,QWORD[16+rbp]
1536 adc rdx,0
1537 add r8,r9
1538 mov r9,rdx
1539 adc r9,0
1540
1541 mul rbx
1542 add r10,rax
1543 mov rax,QWORD[24+rbp]
1544 adc rdx,0
1545 add r9,r10
1546 mov r10,rdx
1547 adc r10,0
1548
1549 mul rbx
1550 add r11,rax
1551 mov rax,QWORD[32+rbp]
1552 adc rdx,0
1553 add r10,r11
1554 mov rsi,QWORD[((128+8))+rsp]
1555
1556
1557 adc rdx,0
1558 mov r11,rdx
1559
1560 mul rbx
1561 add r12,rax
1562 mov rax,QWORD[40+rbp]
1563 adc rdx,0
1564 imul rsi,r8
1565 add r11,r12
1566 mov r12,rdx
1567 adc r12,0
1568
1569 mul rbx
1570 add r13,rax
1571 mov rax,QWORD[48+rbp]
1572 adc rdx,0
1573 add r12,r13
1574 mov r13,rdx
1575 adc r13,0
1576
1577 mul rbx
1578 add r14,rax
1579 mov rax,QWORD[56+rbp]
1580 adc rdx,0
1581 add r13,r14
1582 mov r14,rdx
1583 adc r14,0
1584
1585 mul rbx
1586 mov rbx,rsi
1587 add r15,rax
1588 mov rax,QWORD[rbp]
1589 adc rdx,0
1590 add r14,r15
1591 mov r15,rdx
1592 adc r15,0
1593
1594 dec ecx
1595 jne NEAR $L$reduction_loop
1596
1597 DB 0F3h,0C3h ;repret
1598
1599
1600
1601ALIGN 32
1602__rsaz_512_reducex:
1603
1604
1605 imul rdx,r8
1606 xor rsi,rsi
1607 mov ecx,8
1608 jmp NEAR $L$reduction_loopx
1609
1610ALIGN 32
1611$L$reduction_loopx:
1612 mov rbx,r8
1613 mulx r8,rax,QWORD[rbp]
1614 adcx rax,rbx
1615 adox r8,r9
1616
1617 mulx r9,rax,QWORD[8+rbp]
1618 adcx r8,rax
1619 adox r9,r10
1620
1621 mulx r10,rbx,QWORD[16+rbp]
1622 adcx r9,rbx
1623 adox r10,r11
1624
1625 mulx r11,rbx,QWORD[24+rbp]
1626 adcx r10,rbx
1627 adox r11,r12
1628
1629DB 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1630 mov rax,rdx
1631 mov rdx,r8
1632 adcx r11,rbx
1633 adox r12,r13
1634
1635 mulx rdx,rbx,QWORD[((128+8))+rsp]
1636 mov rdx,rax
1637
1638 mulx r13,rax,QWORD[40+rbp]
1639 adcx r12,rax
1640 adox r13,r14
1641
1642DB 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1643 adcx r13,rax
1644 adox r14,r15
1645
1646 mulx r15,rax,QWORD[56+rbp]
1647 mov rdx,rbx
1648 adcx r14,rax
1649 adox r15,rsi
1650 adcx r15,rsi
1651
1652 dec ecx
1653 jne NEAR $L$reduction_loopx
1654
1655 DB 0F3h,0C3h ;repret
1656
1657
1658
1659ALIGN 32
1660__rsaz_512_subtract:
1661
1662 mov QWORD[rdi],r8
1663 mov QWORD[8+rdi],r9
1664 mov QWORD[16+rdi],r10
1665 mov QWORD[24+rdi],r11
1666 mov QWORD[32+rdi],r12
1667 mov QWORD[40+rdi],r13
1668 mov QWORD[48+rdi],r14
1669 mov QWORD[56+rdi],r15
1670
1671 mov r8,QWORD[rbp]
1672 mov r9,QWORD[8+rbp]
1673 neg r8
1674 not r9
1675 and r8,rcx
1676 mov r10,QWORD[16+rbp]
1677 and r9,rcx
1678 not r10
1679 mov r11,QWORD[24+rbp]
1680 and r10,rcx
1681 not r11
1682 mov r12,QWORD[32+rbp]
1683 and r11,rcx
1684 not r12
1685 mov r13,QWORD[40+rbp]
1686 and r12,rcx
1687 not r13
1688 mov r14,QWORD[48+rbp]
1689 and r13,rcx
1690 not r14
1691 mov r15,QWORD[56+rbp]
1692 and r14,rcx
1693 not r15
1694 and r15,rcx
1695
1696 add r8,QWORD[rdi]
1697 adc r9,QWORD[8+rdi]
1698 adc r10,QWORD[16+rdi]
1699 adc r11,QWORD[24+rdi]
1700 adc r12,QWORD[32+rdi]
1701 adc r13,QWORD[40+rdi]
1702 adc r14,QWORD[48+rdi]
1703 adc r15,QWORD[56+rdi]
1704
1705 mov QWORD[rdi],r8
1706 mov QWORD[8+rdi],r9
1707 mov QWORD[16+rdi],r10
1708 mov QWORD[24+rdi],r11
1709 mov QWORD[32+rdi],r12
1710 mov QWORD[40+rdi],r13
1711 mov QWORD[48+rdi],r14
1712 mov QWORD[56+rdi],r15
1713
1714 DB 0F3h,0C3h ;repret
1715
1716
1717
1718ALIGN 32
1719__rsaz_512_mul:
1720
1721 lea rdi,[8+rsp]
1722
1723 mov rax,QWORD[rsi]
1724 mul rbx
1725 mov QWORD[rdi],rax
1726 mov rax,QWORD[8+rsi]
1727 mov r8,rdx
1728
1729 mul rbx
1730 add r8,rax
1731 mov rax,QWORD[16+rsi]
1732 mov r9,rdx
1733 adc r9,0
1734
1735 mul rbx
1736 add r9,rax
1737 mov rax,QWORD[24+rsi]
1738 mov r10,rdx
1739 adc r10,0
1740
1741 mul rbx
1742 add r10,rax
1743 mov rax,QWORD[32+rsi]
1744 mov r11,rdx
1745 adc r11,0
1746
1747 mul rbx
1748 add r11,rax
1749 mov rax,QWORD[40+rsi]
1750 mov r12,rdx
1751 adc r12,0
1752
1753 mul rbx
1754 add r12,rax
1755 mov rax,QWORD[48+rsi]
1756 mov r13,rdx
1757 adc r13,0
1758
1759 mul rbx
1760 add r13,rax
1761 mov rax,QWORD[56+rsi]
1762 mov r14,rdx
1763 adc r14,0
1764
1765 mul rbx
1766 add r14,rax
1767 mov rax,QWORD[rsi]
1768 mov r15,rdx
1769 adc r15,0
1770
1771 lea rbp,[8+rbp]
1772 lea rdi,[8+rdi]
1773
1774 mov ecx,7
1775 jmp NEAR $L$oop_mul
1776
1777ALIGN 32
1778$L$oop_mul:
1779 mov rbx,QWORD[rbp]
1780 mul rbx
1781 add r8,rax
1782 mov rax,QWORD[8+rsi]
1783 mov QWORD[rdi],r8
1784 mov r8,rdx
1785 adc r8,0
1786
1787 mul rbx
1788 add r9,rax
1789 mov rax,QWORD[16+rsi]
1790 adc rdx,0
1791 add r8,r9
1792 mov r9,rdx
1793 adc r9,0
1794
1795 mul rbx
1796 add r10,rax
1797 mov rax,QWORD[24+rsi]
1798 adc rdx,0
1799 add r9,r10
1800 mov r10,rdx
1801 adc r10,0
1802
1803 mul rbx
1804 add r11,rax
1805 mov rax,QWORD[32+rsi]
1806 adc rdx,0
1807 add r10,r11
1808 mov r11,rdx
1809 adc r11,0
1810
1811 mul rbx
1812 add r12,rax
1813 mov rax,QWORD[40+rsi]
1814 adc rdx,0
1815 add r11,r12
1816 mov r12,rdx
1817 adc r12,0
1818
1819 mul rbx
1820 add r13,rax
1821 mov rax,QWORD[48+rsi]
1822 adc rdx,0
1823 add r12,r13
1824 mov r13,rdx
1825 adc r13,0
1826
1827 mul rbx
1828 add r14,rax
1829 mov rax,QWORD[56+rsi]
1830 adc rdx,0
1831 add r13,r14
1832 mov r14,rdx
1833 lea rbp,[8+rbp]
1834 adc r14,0
1835
1836 mul rbx
1837 add r15,rax
1838 mov rax,QWORD[rsi]
1839 adc rdx,0
1840 add r14,r15
1841 mov r15,rdx
1842 adc r15,0
1843
1844 lea rdi,[8+rdi]
1845
1846 dec ecx
1847 jnz NEAR $L$oop_mul
1848
1849 mov QWORD[rdi],r8
1850 mov QWORD[8+rdi],r9
1851 mov QWORD[16+rdi],r10
1852 mov QWORD[24+rdi],r11
1853 mov QWORD[32+rdi],r12
1854 mov QWORD[40+rdi],r13
1855 mov QWORD[48+rdi],r14
1856 mov QWORD[56+rdi],r15
1857
1858 DB 0F3h,0C3h ;repret
1859
1860
1861
1862ALIGN 32
1863__rsaz_512_mulx:
1864
1865 mulx r8,rbx,QWORD[rsi]
1866 mov rcx,-6
1867
1868 mulx r9,rax,QWORD[8+rsi]
1869 mov QWORD[8+rsp],rbx
1870
1871 mulx r10,rbx,QWORD[16+rsi]
1872 adc r8,rax
1873
1874 mulx r11,rax,QWORD[24+rsi]
1875 adc r9,rbx
1876
1877 mulx r12,rbx,QWORD[32+rsi]
1878 adc r10,rax
1879
1880 mulx r13,rax,QWORD[40+rsi]
1881 adc r11,rbx
1882
1883 mulx r14,rbx,QWORD[48+rsi]
1884 adc r12,rax
1885
1886 mulx r15,rax,QWORD[56+rsi]
1887 mov rdx,QWORD[8+rbp]
1888 adc r13,rbx
1889 adc r14,rax
1890 adc r15,0
1891
1892 xor rdi,rdi
1893 jmp NEAR $L$oop_mulx
1894
1895ALIGN 32
1896$L$oop_mulx:
1897 mov rbx,r8
1898 mulx r8,rax,QWORD[rsi]
1899 adcx rbx,rax
1900 adox r8,r9
1901
1902 mulx r9,rax,QWORD[8+rsi]
1903 adcx r8,rax
1904 adox r9,r10
1905
1906 mulx r10,rax,QWORD[16+rsi]
1907 adcx r9,rax
1908 adox r10,r11
1909
1910 mulx r11,rax,QWORD[24+rsi]
1911 adcx r10,rax
1912 adox r11,r12
1913
1914DB 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1915 adcx r11,rax
1916 adox r12,r13
1917
1918 mulx r13,rax,QWORD[40+rsi]
1919 adcx r12,rax
1920 adox r13,r14
1921
1922 mulx r14,rax,QWORD[48+rsi]
1923 adcx r13,rax
1924 adox r14,r15
1925
1926 mulx r15,rax,QWORD[56+rsi]
1927 mov rdx,QWORD[64+rcx*8+rbp]
1928 mov QWORD[((8+64-8))+rcx*8+rsp],rbx
1929 adcx r14,rax
1930 adox r15,rdi
1931 adcx r15,rdi
1932
1933 inc rcx
1934 jnz NEAR $L$oop_mulx
1935
1936 mov rbx,r8
1937 mulx r8,rax,QWORD[rsi]
1938 adcx rbx,rax
1939 adox r8,r9
1940
1941DB 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1942 adcx r8,rax
1943 adox r9,r10
1944
1945DB 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1946 adcx r9,rax
1947 adox r10,r11
1948
1949 mulx r11,rax,QWORD[24+rsi]
1950 adcx r10,rax
1951 adox r11,r12
1952
1953 mulx r12,rax,QWORD[32+rsi]
1954 adcx r11,rax
1955 adox r12,r13
1956
1957 mulx r13,rax,QWORD[40+rsi]
1958 adcx r12,rax
1959 adox r13,r14
1960
1961DB 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1962 adcx r13,rax
1963 adox r14,r15
1964
1965DB 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1966 adcx r14,rax
1967 adox r15,rdi
1968 adcx r15,rdi
1969
1970 mov QWORD[((8+64-8))+rsp],rbx
1971 mov QWORD[((8+64))+rsp],r8
1972 mov QWORD[((8+64+8))+rsp],r9
1973 mov QWORD[((8+64+16))+rsp],r10
1974 mov QWORD[((8+64+24))+rsp],r11
1975 mov QWORD[((8+64+32))+rsp],r12
1976 mov QWORD[((8+64+40))+rsp],r13
1977 mov QWORD[((8+64+48))+rsp],r14
1978 mov QWORD[((8+64+56))+rsp],r15
1979
1980 DB 0F3h,0C3h ;repret
1981
1982
1983global rsaz_512_scatter4
1984
1985ALIGN 16
1986rsaz_512_scatter4:
1987
1988 lea rcx,[r8*8+rcx]
1989 mov r9d,8
1990 jmp NEAR $L$oop_scatter
1991ALIGN 16
1992$L$oop_scatter:
1993 mov rax,QWORD[rdx]
1994 lea rdx,[8+rdx]
1995 mov QWORD[rcx],rax
1996 lea rcx,[128+rcx]
1997 dec r9d
1998 jnz NEAR $L$oop_scatter
1999 DB 0F3h,0C3h ;repret
2000
2001
2002
2003global rsaz_512_gather4
2004
2005ALIGN 16
2006rsaz_512_gather4:
2007
2008$L$SEH_begin_rsaz_512_gather4:
2009DB 0x48,0x81,0xec,0xa8,0x00,0x00,0x00
2010DB 0x0f,0x29,0x34,0x24
2011DB 0x0f,0x29,0x7c,0x24,0x10
2012DB 0x44,0x0f,0x29,0x44,0x24,0x20
2013DB 0x44,0x0f,0x29,0x4c,0x24,0x30
2014DB 0x44,0x0f,0x29,0x54,0x24,0x40
2015DB 0x44,0x0f,0x29,0x5c,0x24,0x50
2016DB 0x44,0x0f,0x29,0x64,0x24,0x60
2017DB 0x44,0x0f,0x29,0x6c,0x24,0x70
2018DB 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0
2019DB 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0
2020 movd xmm8,r8d
2021 movdqa xmm1,XMMWORD[(($L$inc+16))]
2022 movdqa xmm0,XMMWORD[$L$inc]
2023
2024 pshufd xmm8,xmm8,0
2025 movdqa xmm7,xmm1
2026 movdqa xmm2,xmm1
2027 paddd xmm1,xmm0
2028 pcmpeqd xmm0,xmm8
2029 movdqa xmm3,xmm7
2030 paddd xmm2,xmm1
2031 pcmpeqd xmm1,xmm8
2032 movdqa xmm4,xmm7
2033 paddd xmm3,xmm2
2034 pcmpeqd xmm2,xmm8
2035 movdqa xmm5,xmm7
2036 paddd xmm4,xmm3
2037 pcmpeqd xmm3,xmm8
2038 movdqa xmm6,xmm7
2039 paddd xmm5,xmm4
2040 pcmpeqd xmm4,xmm8
2041 paddd xmm6,xmm5
2042 pcmpeqd xmm5,xmm8
2043 paddd xmm7,xmm6
2044 pcmpeqd xmm6,xmm8
2045 pcmpeqd xmm7,xmm8
2046 mov r9d,8
2047 jmp NEAR $L$oop_gather
2048ALIGN 16
2049$L$oop_gather:
2050 movdqa xmm8,XMMWORD[rdx]
2051 movdqa xmm9,XMMWORD[16+rdx]
2052 movdqa xmm10,XMMWORD[32+rdx]
2053 movdqa xmm11,XMMWORD[48+rdx]
2054 pand xmm8,xmm0
2055 movdqa xmm12,XMMWORD[64+rdx]
2056 pand xmm9,xmm1
2057 movdqa xmm13,XMMWORD[80+rdx]
2058 pand xmm10,xmm2
2059 movdqa xmm14,XMMWORD[96+rdx]
2060 pand xmm11,xmm3
2061 movdqa xmm15,XMMWORD[112+rdx]
2062 lea rdx,[128+rdx]
2063 pand xmm12,xmm4
2064 pand xmm13,xmm5
2065 pand xmm14,xmm6
2066 pand xmm15,xmm7
2067 por xmm8,xmm10
2068 por xmm9,xmm11
2069 por xmm8,xmm12
2070 por xmm9,xmm13
2071 por xmm8,xmm14
2072 por xmm9,xmm15
2073
2074 por xmm8,xmm9
2075 pshufd xmm9,xmm8,0x4e
2076 por xmm8,xmm9
2077 movq QWORD[rcx],xmm8
2078 lea rcx,[8+rcx]
2079 dec r9d
2080 jnz NEAR $L$oop_gather
2081 movaps xmm6,XMMWORD[rsp]
2082 movaps xmm7,XMMWORD[16+rsp]
2083 movaps xmm8,XMMWORD[32+rsp]
2084 movaps xmm9,XMMWORD[48+rsp]
2085 movaps xmm10,XMMWORD[64+rsp]
2086 movaps xmm11,XMMWORD[80+rsp]
2087 movaps xmm12,XMMWORD[96+rsp]
2088 movaps xmm13,XMMWORD[112+rsp]
2089 movaps xmm14,XMMWORD[128+rsp]
2090 movaps xmm15,XMMWORD[144+rsp]
2091 add rsp,0xa8
2092 DB 0F3h,0C3h ;repret
2093$L$SEH_end_rsaz_512_gather4:
2094
2095
2096
2097ALIGN 64
2098$L$inc:
2099 DD 0,0,1,1
2100 DD 2,2,2,2
2101EXTERN __imp_RtlVirtualUnwind
2102
2103ALIGN 16
2104se_handler:
2105 push rsi
2106 push rdi
2107 push rbx
2108 push rbp
2109 push r12
2110 push r13
2111 push r14
2112 push r15
2113 pushfq
2114 sub rsp,64
2115
2116 mov rax,QWORD[120+r8]
2117 mov rbx,QWORD[248+r8]
2118
2119 mov rsi,QWORD[8+r9]
2120 mov r11,QWORD[56+r9]
2121
2122 mov r10d,DWORD[r11]
2123 lea r10,[r10*1+rsi]
2124 cmp rbx,r10
2125 jb NEAR $L$common_seh_tail
2126
2127 mov rax,QWORD[152+r8]
2128
2129 mov r10d,DWORD[4+r11]
2130 lea r10,[r10*1+rsi]
2131 cmp rbx,r10
2132 jae NEAR $L$common_seh_tail
2133
2134 lea rax,[((128+24+48))+rax]
2135
2136 lea rbx,[$L$mul_gather4_epilogue]
2137 cmp rbx,r10
2138 jne NEAR $L$se_not_in_mul_gather4
2139
2140 lea rax,[176+rax]
2141
2142 lea rsi,[((-48-168))+rax]
2143 lea rdi,[512+r8]
2144 mov ecx,20
2145 DD 0xa548f3fc
2146
2147$L$se_not_in_mul_gather4:
2148 mov rbx,QWORD[((-8))+rax]
2149 mov rbp,QWORD[((-16))+rax]
2150 mov r12,QWORD[((-24))+rax]
2151 mov r13,QWORD[((-32))+rax]
2152 mov r14,QWORD[((-40))+rax]
2153 mov r15,QWORD[((-48))+rax]
2154 mov QWORD[144+r8],rbx
2155 mov QWORD[160+r8],rbp
2156 mov QWORD[216+r8],r12
2157 mov QWORD[224+r8],r13
2158 mov QWORD[232+r8],r14
2159 mov QWORD[240+r8],r15
2160
2161$L$common_seh_tail:
2162 mov rdi,QWORD[8+rax]
2163 mov rsi,QWORD[16+rax]
2164 mov QWORD[152+r8],rax
2165 mov QWORD[168+r8],rsi
2166 mov QWORD[176+r8],rdi
2167
2168 mov rdi,QWORD[40+r9]
2169 mov rsi,r8
2170 mov ecx,154
2171 DD 0xa548f3fc
2172
2173 mov rsi,r9
2174 xor rcx,rcx
2175 mov rdx,QWORD[8+rsi]
2176 mov r8,QWORD[rsi]
2177 mov r9,QWORD[16+rsi]
2178 mov r10,QWORD[40+rsi]
2179 lea r11,[56+rsi]
2180 lea r12,[24+rsi]
2181 mov QWORD[32+rsp],r10
2182 mov QWORD[40+rsp],r11
2183 mov QWORD[48+rsp],r12
2184 mov QWORD[56+rsp],rcx
2185 call QWORD[__imp_RtlVirtualUnwind]
2186
2187 mov eax,1
2188 add rsp,64
2189 popfq
2190 pop r15
2191 pop r14
2192 pop r13
2193 pop r12
2194 pop rbp
2195 pop rbx
2196 pop rdi
2197 pop rsi
2198 DB 0F3h,0C3h ;repret
2199
2200
2201section .pdata rdata align=4
2202ALIGN 4
2203 DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase
2204 DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase
2205 DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase
2206
2207 DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase
2208 DD $L$SEH_end_rsaz_512_mul wrt ..imagebase
2209 DD $L$SEH_info_rsaz_512_mul wrt ..imagebase
2210
2211 DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase
2212 DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase
2213 DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase
2214
2215 DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase
2216 DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase
2217 DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase
2218
2219 DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase
2220 DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase
2221 DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase
2222
2223 DD $L$SEH_begin_rsaz_512_gather4 wrt ..imagebase
2224 DD $L$SEH_end_rsaz_512_gather4 wrt ..imagebase
2225 DD $L$SEH_info_rsaz_512_gather4 wrt ..imagebase
2226
2227section .xdata rdata align=8
2228ALIGN 8
2229$L$SEH_info_rsaz_512_sqr:
2230DB 9,0,0,0
2231 DD se_handler wrt ..imagebase
2232 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
2233$L$SEH_info_rsaz_512_mul:
2234DB 9,0,0,0
2235 DD se_handler wrt ..imagebase
2236 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
2237$L$SEH_info_rsaz_512_mul_gather4:
2238DB 9,0,0,0
2239 DD se_handler wrt ..imagebase
2240 DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase
2241$L$SEH_info_rsaz_512_mul_scatter4:
2242DB 9,0,0,0
2243 DD se_handler wrt ..imagebase
2244 DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase
2245$L$SEH_info_rsaz_512_mul_by_one:
2246DB 9,0,0,0
2247 DD se_handler wrt ..imagebase
2248 DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase
2249$L$SEH_info_rsaz_512_gather4:
2250DB 0x01,0x46,0x16,0x00
2251DB 0x46,0xf8,0x09,0x00
2252DB 0x3d,0xe8,0x08,0x00
2253DB 0x34,0xd8,0x07,0x00
2254DB 0x2e,0xc8,0x06,0x00
2255DB 0x28,0xb8,0x05,0x00
2256DB 0x22,0xa8,0x04,0x00
2257DB 0x1c,0x98,0x03,0x00
2258DB 0x16,0x88,0x02,0x00
2259DB 0x10,0x78,0x01,0x00
2260DB 0x0b,0x68,0x00,0x00
2261DB 0x07,0x01,0x15,0x00
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette