VirtualBox

source: vbox/trunk/src/libs/openssl-3.1.7/crypto/genasm-macosx/rsaz-x86_64.S@ 107935

Last change on this file since 107935 was 94083, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Recreate asm files, bugref:10128

File size: 17.6 KB
Line 
1.text
2
3
4
5.globl _rsaz_512_sqr
6
7.p2align 5
8_rsaz_512_sqr:
9
10 pushq %rbx
11
12 pushq %rbp
13
14 pushq %r12
15
16 pushq %r13
17
18 pushq %r14
19
20 pushq %r15
21
22
23 subq $128+24,%rsp
24
25L$sqr_body:
26.byte 102,72,15,110,202
27 movq (%rsi),%rdx
28 movq 8(%rsi),%rax
29 movq %rcx,128(%rsp)
30 jmp L$oop_sqr
31
32.p2align 5
33L$oop_sqr:
34 movl %r8d,128+8(%rsp)
35
36 movq %rdx,%rbx
37 movq %rax,%rbp
38 mulq %rdx
39 movq %rax,%r8
40 movq 16(%rsi),%rax
41 movq %rdx,%r9
42
43 mulq %rbx
44 addq %rax,%r9
45 movq 24(%rsi),%rax
46 movq %rdx,%r10
47 adcq $0,%r10
48
49 mulq %rbx
50 addq %rax,%r10
51 movq 32(%rsi),%rax
52 movq %rdx,%r11
53 adcq $0,%r11
54
55 mulq %rbx
56 addq %rax,%r11
57 movq 40(%rsi),%rax
58 movq %rdx,%r12
59 adcq $0,%r12
60
61 mulq %rbx
62 addq %rax,%r12
63 movq 48(%rsi),%rax
64 movq %rdx,%r13
65 adcq $0,%r13
66
67 mulq %rbx
68 addq %rax,%r13
69 movq 56(%rsi),%rax
70 movq %rdx,%r14
71 adcq $0,%r14
72
73 mulq %rbx
74 addq %rax,%r14
75 movq %rbx,%rax
76 adcq $0,%rdx
77
78 xorq %rcx,%rcx
79 addq %r8,%r8
80 movq %rdx,%r15
81 adcq $0,%rcx
82
83 mulq %rax
84 addq %r8,%rdx
85 adcq $0,%rcx
86
87 movq %rax,(%rsp)
88 movq %rdx,8(%rsp)
89
90
91 movq 16(%rsi),%rax
92 mulq %rbp
93 addq %rax,%r10
94 movq 24(%rsi),%rax
95 movq %rdx,%rbx
96 adcq $0,%rbx
97
98 mulq %rbp
99 addq %rax,%r11
100 movq 32(%rsi),%rax
101 adcq $0,%rdx
102 addq %rbx,%r11
103 movq %rdx,%rbx
104 adcq $0,%rbx
105
106 mulq %rbp
107 addq %rax,%r12
108 movq 40(%rsi),%rax
109 adcq $0,%rdx
110 addq %rbx,%r12
111 movq %rdx,%rbx
112 adcq $0,%rbx
113
114 mulq %rbp
115 addq %rax,%r13
116 movq 48(%rsi),%rax
117 adcq $0,%rdx
118 addq %rbx,%r13
119 movq %rdx,%rbx
120 adcq $0,%rbx
121
122 mulq %rbp
123 addq %rax,%r14
124 movq 56(%rsi),%rax
125 adcq $0,%rdx
126 addq %rbx,%r14
127 movq %rdx,%rbx
128 adcq $0,%rbx
129
130 mulq %rbp
131 addq %rax,%r15
132 movq %rbp,%rax
133 adcq $0,%rdx
134 addq %rbx,%r15
135 adcq $0,%rdx
136
137 xorq %rbx,%rbx
138 addq %r9,%r9
139 movq %rdx,%r8
140 adcq %r10,%r10
141 adcq $0,%rbx
142
143 mulq %rax
144
145 addq %rcx,%rax
146 movq 16(%rsi),%rbp
147 addq %rax,%r9
148 movq 24(%rsi),%rax
149 adcq %rdx,%r10
150 adcq $0,%rbx
151
152 movq %r9,16(%rsp)
153 movq %r10,24(%rsp)
154
155
156 mulq %rbp
157 addq %rax,%r12
158 movq 32(%rsi),%rax
159 movq %rdx,%rcx
160 adcq $0,%rcx
161
162 mulq %rbp
163 addq %rax,%r13
164 movq 40(%rsi),%rax
165 adcq $0,%rdx
166 addq %rcx,%r13
167 movq %rdx,%rcx
168 adcq $0,%rcx
169
170 mulq %rbp
171 addq %rax,%r14
172 movq 48(%rsi),%rax
173 adcq $0,%rdx
174 addq %rcx,%r14
175 movq %rdx,%rcx
176 adcq $0,%rcx
177
178 mulq %rbp
179 addq %rax,%r15
180 movq 56(%rsi),%rax
181 adcq $0,%rdx
182 addq %rcx,%r15
183 movq %rdx,%rcx
184 adcq $0,%rcx
185
186 mulq %rbp
187 addq %rax,%r8
188 movq %rbp,%rax
189 adcq $0,%rdx
190 addq %rcx,%r8
191 adcq $0,%rdx
192
193 xorq %rcx,%rcx
194 addq %r11,%r11
195 movq %rdx,%r9
196 adcq %r12,%r12
197 adcq $0,%rcx
198
199 mulq %rax
200
201 addq %rbx,%rax
202 movq 24(%rsi),%r10
203 addq %rax,%r11
204 movq 32(%rsi),%rax
205 adcq %rdx,%r12
206 adcq $0,%rcx
207
208 movq %r11,32(%rsp)
209 movq %r12,40(%rsp)
210
211
212 movq %rax,%r11
213 mulq %r10
214 addq %rax,%r14
215 movq 40(%rsi),%rax
216 movq %rdx,%rbx
217 adcq $0,%rbx
218
219 movq %rax,%r12
220 mulq %r10
221 addq %rax,%r15
222 movq 48(%rsi),%rax
223 adcq $0,%rdx
224 addq %rbx,%r15
225 movq %rdx,%rbx
226 adcq $0,%rbx
227
228 movq %rax,%rbp
229 mulq %r10
230 addq %rax,%r8
231 movq 56(%rsi),%rax
232 adcq $0,%rdx
233 addq %rbx,%r8
234 movq %rdx,%rbx
235 adcq $0,%rbx
236
237 mulq %r10
238 addq %rax,%r9
239 movq %r10,%rax
240 adcq $0,%rdx
241 addq %rbx,%r9
242 adcq $0,%rdx
243
244 xorq %rbx,%rbx
245 addq %r13,%r13
246 movq %rdx,%r10
247 adcq %r14,%r14
248 adcq $0,%rbx
249
250 mulq %rax
251
252 addq %rcx,%rax
253 addq %rax,%r13
254 movq %r12,%rax
255 adcq %rdx,%r14
256 adcq $0,%rbx
257
258 movq %r13,48(%rsp)
259 movq %r14,56(%rsp)
260
261
262 mulq %r11
263 addq %rax,%r8
264 movq %rbp,%rax
265 movq %rdx,%rcx
266 adcq $0,%rcx
267
268 mulq %r11
269 addq %rax,%r9
270 movq 56(%rsi),%rax
271 adcq $0,%rdx
272 addq %rcx,%r9
273 movq %rdx,%rcx
274 adcq $0,%rcx
275
276 movq %rax,%r14
277 mulq %r11
278 addq %rax,%r10
279 movq %r11,%rax
280 adcq $0,%rdx
281 addq %rcx,%r10
282 adcq $0,%rdx
283
284 xorq %rcx,%rcx
285 addq %r15,%r15
286 movq %rdx,%r11
287 adcq %r8,%r8
288 adcq $0,%rcx
289
290 mulq %rax
291
292 addq %rbx,%rax
293 addq %rax,%r15
294 movq %rbp,%rax
295 adcq %rdx,%r8
296 adcq $0,%rcx
297
298 movq %r15,64(%rsp)
299 movq %r8,72(%rsp)
300
301
302 mulq %r12
303 addq %rax,%r10
304 movq %r14,%rax
305 movq %rdx,%rbx
306 adcq $0,%rbx
307
308 mulq %r12
309 addq %rax,%r11
310 movq %r12,%rax
311 adcq $0,%rdx
312 addq %rbx,%r11
313 adcq $0,%rdx
314
315 xorq %rbx,%rbx
316 addq %r9,%r9
317 movq %rdx,%r12
318 adcq %r10,%r10
319 adcq $0,%rbx
320
321 mulq %rax
322
323 addq %rcx,%rax
324 addq %rax,%r9
325 movq %r14,%rax
326 adcq %rdx,%r10
327 adcq $0,%rbx
328
329 movq %r9,80(%rsp)
330 movq %r10,88(%rsp)
331
332
333 mulq %rbp
334 addq %rax,%r12
335 movq %rbp,%rax
336 adcq $0,%rdx
337
338 xorq %rcx,%rcx
339 addq %r11,%r11
340 movq %rdx,%r13
341 adcq %r12,%r12
342 adcq $0,%rcx
343
344 mulq %rax
345
346 addq %rbx,%rax
347 addq %rax,%r11
348 movq %r14,%rax
349 adcq %rdx,%r12
350 adcq $0,%rcx
351
352 movq %r11,96(%rsp)
353 movq %r12,104(%rsp)
354
355
356 xorq %rbx,%rbx
357 addq %r13,%r13
358 adcq $0,%rbx
359
360 mulq %rax
361
362 addq %rcx,%rax
363 addq %r13,%rax
364 adcq %rbx,%rdx
365
366 movq (%rsp),%r8
367 movq 8(%rsp),%r9
368 movq 16(%rsp),%r10
369 movq 24(%rsp),%r11
370 movq 32(%rsp),%r12
371 movq 40(%rsp),%r13
372 movq 48(%rsp),%r14
373 movq 56(%rsp),%r15
374.byte 102,72,15,126,205
375
376 movq %rax,112(%rsp)
377 movq %rdx,120(%rsp)
378
379 call __rsaz_512_reduce
380
381 addq 64(%rsp),%r8
382 adcq 72(%rsp),%r9
383 adcq 80(%rsp),%r10
384 adcq 88(%rsp),%r11
385 adcq 96(%rsp),%r12
386 adcq 104(%rsp),%r13
387 adcq 112(%rsp),%r14
388 adcq 120(%rsp),%r15
389 sbbq %rcx,%rcx
390
391 call __rsaz_512_subtract
392
393 movq %r8,%rdx
394 movq %r9,%rax
395 movl 128+8(%rsp),%r8d
396 movq %rdi,%rsi
397
398 decl %r8d
399 jnz L$oop_sqr
400
401 leaq 128+24+48(%rsp),%rax
402
403 movq -48(%rax),%r15
404
405 movq -40(%rax),%r14
406
407 movq -32(%rax),%r13
408
409 movq -24(%rax),%r12
410
411 movq -16(%rax),%rbp
412
413 movq -8(%rax),%rbx
414
415 leaq (%rax),%rsp
416
417L$sqr_epilogue:
418 .byte 0xf3,0xc3
419
420
421.globl _rsaz_512_mul
422
423.p2align 5
424_rsaz_512_mul:
425
426 pushq %rbx
427
428 pushq %rbp
429
430 pushq %r12
431
432 pushq %r13
433
434 pushq %r14
435
436 pushq %r15
437
438
439 subq $128+24,%rsp
440
441L$mul_body:
442.byte 102,72,15,110,199
443.byte 102,72,15,110,201
444 movq %r8,128(%rsp)
445 movq (%rdx),%rbx
446 movq %rdx,%rbp
447 call __rsaz_512_mul
448
449.byte 102,72,15,126,199
450.byte 102,72,15,126,205
451
452 movq (%rsp),%r8
453 movq 8(%rsp),%r9
454 movq 16(%rsp),%r10
455 movq 24(%rsp),%r11
456 movq 32(%rsp),%r12
457 movq 40(%rsp),%r13
458 movq 48(%rsp),%r14
459 movq 56(%rsp),%r15
460
461 call __rsaz_512_reduce
462 addq 64(%rsp),%r8
463 adcq 72(%rsp),%r9
464 adcq 80(%rsp),%r10
465 adcq 88(%rsp),%r11
466 adcq 96(%rsp),%r12
467 adcq 104(%rsp),%r13
468 adcq 112(%rsp),%r14
469 adcq 120(%rsp),%r15
470 sbbq %rcx,%rcx
471
472 call __rsaz_512_subtract
473
474 leaq 128+24+48(%rsp),%rax
475
476 movq -48(%rax),%r15
477
478 movq -40(%rax),%r14
479
480 movq -32(%rax),%r13
481
482 movq -24(%rax),%r12
483
484 movq -16(%rax),%rbp
485
486 movq -8(%rax),%rbx
487
488 leaq (%rax),%rsp
489
490L$mul_epilogue:
491 .byte 0xf3,0xc3
492
493
494.globl _rsaz_512_mul_gather4
495
496.p2align 5
497_rsaz_512_mul_gather4:
498
499 pushq %rbx
500
501 pushq %rbp
502
503 pushq %r12
504
505 pushq %r13
506
507 pushq %r14
508
509 pushq %r15
510
511
512 subq $152,%rsp
513
514L$mul_gather4_body:
515 movd %r9d,%xmm8
516 movdqa L$inc+16(%rip),%xmm1
517 movdqa L$inc(%rip),%xmm0
518
519 pshufd $0,%xmm8,%xmm8
520 movdqa %xmm1,%xmm7
521 movdqa %xmm1,%xmm2
522 paddd %xmm0,%xmm1
523 pcmpeqd %xmm8,%xmm0
524 movdqa %xmm7,%xmm3
525 paddd %xmm1,%xmm2
526 pcmpeqd %xmm8,%xmm1
527 movdqa %xmm7,%xmm4
528 paddd %xmm2,%xmm3
529 pcmpeqd %xmm8,%xmm2
530 movdqa %xmm7,%xmm5
531 paddd %xmm3,%xmm4
532 pcmpeqd %xmm8,%xmm3
533 movdqa %xmm7,%xmm6
534 paddd %xmm4,%xmm5
535 pcmpeqd %xmm8,%xmm4
536 paddd %xmm5,%xmm6
537 pcmpeqd %xmm8,%xmm5
538 paddd %xmm6,%xmm7
539 pcmpeqd %xmm8,%xmm6
540 pcmpeqd %xmm8,%xmm7
541
542 movdqa 0(%rdx),%xmm8
543 movdqa 16(%rdx),%xmm9
544 movdqa 32(%rdx),%xmm10
545 movdqa 48(%rdx),%xmm11
546 pand %xmm0,%xmm8
547 movdqa 64(%rdx),%xmm12
548 pand %xmm1,%xmm9
549 movdqa 80(%rdx),%xmm13
550 pand %xmm2,%xmm10
551 movdqa 96(%rdx),%xmm14
552 pand %xmm3,%xmm11
553 movdqa 112(%rdx),%xmm15
554 leaq 128(%rdx),%rbp
555 pand %xmm4,%xmm12
556 pand %xmm5,%xmm13
557 pand %xmm6,%xmm14
558 pand %xmm7,%xmm15
559 por %xmm10,%xmm8
560 por %xmm11,%xmm9
561 por %xmm12,%xmm8
562 por %xmm13,%xmm9
563 por %xmm14,%xmm8
564 por %xmm15,%xmm9
565
566 por %xmm9,%xmm8
567 pshufd $0x4e,%xmm8,%xmm9
568 por %xmm9,%xmm8
569.byte 102,76,15,126,195
570
571 movq %r8,128(%rsp)
572 movq %rdi,128+8(%rsp)
573 movq %rcx,128+16(%rsp)
574
575 movq (%rsi),%rax
576 movq 8(%rsi),%rcx
577 mulq %rbx
578 movq %rax,(%rsp)
579 movq %rcx,%rax
580 movq %rdx,%r8
581
582 mulq %rbx
583 addq %rax,%r8
584 movq 16(%rsi),%rax
585 movq %rdx,%r9
586 adcq $0,%r9
587
588 mulq %rbx
589 addq %rax,%r9
590 movq 24(%rsi),%rax
591 movq %rdx,%r10
592 adcq $0,%r10
593
594 mulq %rbx
595 addq %rax,%r10
596 movq 32(%rsi),%rax
597 movq %rdx,%r11
598 adcq $0,%r11
599
600 mulq %rbx
601 addq %rax,%r11
602 movq 40(%rsi),%rax
603 movq %rdx,%r12
604 adcq $0,%r12
605
606 mulq %rbx
607 addq %rax,%r12
608 movq 48(%rsi),%rax
609 movq %rdx,%r13
610 adcq $0,%r13
611
612 mulq %rbx
613 addq %rax,%r13
614 movq 56(%rsi),%rax
615 movq %rdx,%r14
616 adcq $0,%r14
617
618 mulq %rbx
619 addq %rax,%r14
620 movq (%rsi),%rax
621 movq %rdx,%r15
622 adcq $0,%r15
623
624 leaq 8(%rsp),%rdi
625 movl $7,%ecx
626 jmp L$oop_mul_gather
627
628.p2align 5
629L$oop_mul_gather:
630 movdqa 0(%rbp),%xmm8
631 movdqa 16(%rbp),%xmm9
632 movdqa 32(%rbp),%xmm10
633 movdqa 48(%rbp),%xmm11
634 pand %xmm0,%xmm8
635 movdqa 64(%rbp),%xmm12
636 pand %xmm1,%xmm9
637 movdqa 80(%rbp),%xmm13
638 pand %xmm2,%xmm10
639 movdqa 96(%rbp),%xmm14
640 pand %xmm3,%xmm11
641 movdqa 112(%rbp),%xmm15
642 leaq 128(%rbp),%rbp
643 pand %xmm4,%xmm12
644 pand %xmm5,%xmm13
645 pand %xmm6,%xmm14
646 pand %xmm7,%xmm15
647 por %xmm10,%xmm8
648 por %xmm11,%xmm9
649 por %xmm12,%xmm8
650 por %xmm13,%xmm9
651 por %xmm14,%xmm8
652 por %xmm15,%xmm9
653
654 por %xmm9,%xmm8
655 pshufd $0x4e,%xmm8,%xmm9
656 por %xmm9,%xmm8
657.byte 102,76,15,126,195
658
659 mulq %rbx
660 addq %rax,%r8
661 movq 8(%rsi),%rax
662 movq %r8,(%rdi)
663 movq %rdx,%r8
664 adcq $0,%r8
665
666 mulq %rbx
667 addq %rax,%r9
668 movq 16(%rsi),%rax
669 adcq $0,%rdx
670 addq %r9,%r8
671 movq %rdx,%r9
672 adcq $0,%r9
673
674 mulq %rbx
675 addq %rax,%r10
676 movq 24(%rsi),%rax
677 adcq $0,%rdx
678 addq %r10,%r9
679 movq %rdx,%r10
680 adcq $0,%r10
681
682 mulq %rbx
683 addq %rax,%r11
684 movq 32(%rsi),%rax
685 adcq $0,%rdx
686 addq %r11,%r10
687 movq %rdx,%r11
688 adcq $0,%r11
689
690 mulq %rbx
691 addq %rax,%r12
692 movq 40(%rsi),%rax
693 adcq $0,%rdx
694 addq %r12,%r11
695 movq %rdx,%r12
696 adcq $0,%r12
697
698 mulq %rbx
699 addq %rax,%r13
700 movq 48(%rsi),%rax
701 adcq $0,%rdx
702 addq %r13,%r12
703 movq %rdx,%r13
704 adcq $0,%r13
705
706 mulq %rbx
707 addq %rax,%r14
708 movq 56(%rsi),%rax
709 adcq $0,%rdx
710 addq %r14,%r13
711 movq %rdx,%r14
712 adcq $0,%r14
713
714 mulq %rbx
715 addq %rax,%r15
716 movq (%rsi),%rax
717 adcq $0,%rdx
718 addq %r15,%r14
719 movq %rdx,%r15
720 adcq $0,%r15
721
722 leaq 8(%rdi),%rdi
723
724 decl %ecx
725 jnz L$oop_mul_gather
726
727 movq %r8,(%rdi)
728 movq %r9,8(%rdi)
729 movq %r10,16(%rdi)
730 movq %r11,24(%rdi)
731 movq %r12,32(%rdi)
732 movq %r13,40(%rdi)
733 movq %r14,48(%rdi)
734 movq %r15,56(%rdi)
735
736 movq 128+8(%rsp),%rdi
737 movq 128+16(%rsp),%rbp
738
739 movq (%rsp),%r8
740 movq 8(%rsp),%r9
741 movq 16(%rsp),%r10
742 movq 24(%rsp),%r11
743 movq 32(%rsp),%r12
744 movq 40(%rsp),%r13
745 movq 48(%rsp),%r14
746 movq 56(%rsp),%r15
747
748 call __rsaz_512_reduce
749 addq 64(%rsp),%r8
750 adcq 72(%rsp),%r9
751 adcq 80(%rsp),%r10
752 adcq 88(%rsp),%r11
753 adcq 96(%rsp),%r12
754 adcq 104(%rsp),%r13
755 adcq 112(%rsp),%r14
756 adcq 120(%rsp),%r15
757 sbbq %rcx,%rcx
758
759 call __rsaz_512_subtract
760
761 leaq 128+24+48(%rsp),%rax
762
763 movq -48(%rax),%r15
764
765 movq -40(%rax),%r14
766
767 movq -32(%rax),%r13
768
769 movq -24(%rax),%r12
770
771 movq -16(%rax),%rbp
772
773 movq -8(%rax),%rbx
774
775 leaq (%rax),%rsp
776
777L$mul_gather4_epilogue:
778 .byte 0xf3,0xc3
779
780
781.globl _rsaz_512_mul_scatter4
782
783.p2align 5
784_rsaz_512_mul_scatter4:
785
786 pushq %rbx
787
788 pushq %rbp
789
790 pushq %r12
791
792 pushq %r13
793
794 pushq %r14
795
796 pushq %r15
797
798
799 movl %r9d,%r9d
800 subq $128+24,%rsp
801
802L$mul_scatter4_body:
803 leaq (%r8,%r9,8),%r8
804.byte 102,72,15,110,199
805.byte 102,72,15,110,202
806.byte 102,73,15,110,208
807 movq %rcx,128(%rsp)
808
809 movq %rdi,%rbp
810 movq (%rdi),%rbx
811 call __rsaz_512_mul
812
813.byte 102,72,15,126,199
814.byte 102,72,15,126,205
815
816 movq (%rsp),%r8
817 movq 8(%rsp),%r9
818 movq 16(%rsp),%r10
819 movq 24(%rsp),%r11
820 movq 32(%rsp),%r12
821 movq 40(%rsp),%r13
822 movq 48(%rsp),%r14
823 movq 56(%rsp),%r15
824
825 call __rsaz_512_reduce
826 addq 64(%rsp),%r8
827 adcq 72(%rsp),%r9
828 adcq 80(%rsp),%r10
829 adcq 88(%rsp),%r11
830 adcq 96(%rsp),%r12
831 adcq 104(%rsp),%r13
832 adcq 112(%rsp),%r14
833 adcq 120(%rsp),%r15
834.byte 102,72,15,126,214
835 sbbq %rcx,%rcx
836
837 call __rsaz_512_subtract
838
839 movq %r8,0(%rsi)
840 movq %r9,128(%rsi)
841 movq %r10,256(%rsi)
842 movq %r11,384(%rsi)
843 movq %r12,512(%rsi)
844 movq %r13,640(%rsi)
845 movq %r14,768(%rsi)
846 movq %r15,896(%rsi)
847
848 leaq 128+24+48(%rsp),%rax
849
850 movq -48(%rax),%r15
851
852 movq -40(%rax),%r14
853
854 movq -32(%rax),%r13
855
856 movq -24(%rax),%r12
857
858 movq -16(%rax),%rbp
859
860 movq -8(%rax),%rbx
861
862 leaq (%rax),%rsp
863
864L$mul_scatter4_epilogue:
865 .byte 0xf3,0xc3
866
867
868.globl _rsaz_512_mul_by_one
869
870.p2align 5
871_rsaz_512_mul_by_one:
872
873 pushq %rbx
874
875 pushq %rbp
876
877 pushq %r12
878
879 pushq %r13
880
881 pushq %r14
882
883 pushq %r15
884
885
886 subq $128+24,%rsp
887
888L$mul_by_one_body:
889 movq %rdx,%rbp
890 movq %rcx,128(%rsp)
891
892 movq (%rsi),%r8
893 pxor %xmm0,%xmm0
894 movq 8(%rsi),%r9
895 movq 16(%rsi),%r10
896 movq 24(%rsi),%r11
897 movq 32(%rsi),%r12
898 movq 40(%rsi),%r13
899 movq 48(%rsi),%r14
900 movq 56(%rsi),%r15
901
902 movdqa %xmm0,(%rsp)
903 movdqa %xmm0,16(%rsp)
904 movdqa %xmm0,32(%rsp)
905 movdqa %xmm0,48(%rsp)
906 movdqa %xmm0,64(%rsp)
907 movdqa %xmm0,80(%rsp)
908 movdqa %xmm0,96(%rsp)
909 call __rsaz_512_reduce
910 movq %r8,(%rdi)
911 movq %r9,8(%rdi)
912 movq %r10,16(%rdi)
913 movq %r11,24(%rdi)
914 movq %r12,32(%rdi)
915 movq %r13,40(%rdi)
916 movq %r14,48(%rdi)
917 movq %r15,56(%rdi)
918
919 leaq 128+24+48(%rsp),%rax
920
921 movq -48(%rax),%r15
922
923 movq -40(%rax),%r14
924
925 movq -32(%rax),%r13
926
927 movq -24(%rax),%r12
928
929 movq -16(%rax),%rbp
930
931 movq -8(%rax),%rbx
932
933 leaq (%rax),%rsp
934
935L$mul_by_one_epilogue:
936 .byte 0xf3,0xc3
937
938
939
940.p2align 5
941__rsaz_512_reduce:
942
943 movq %r8,%rbx
944 imulq 128+8(%rsp),%rbx
945 movq 0(%rbp),%rax
946 movl $8,%ecx
947 jmp L$reduction_loop
948
949.p2align 5
950L$reduction_loop:
951 mulq %rbx
952 movq 8(%rbp),%rax
953 negq %r8
954 movq %rdx,%r8
955 adcq $0,%r8
956
957 mulq %rbx
958 addq %rax,%r9
959 movq 16(%rbp),%rax
960 adcq $0,%rdx
961 addq %r9,%r8
962 movq %rdx,%r9
963 adcq $0,%r9
964
965 mulq %rbx
966 addq %rax,%r10
967 movq 24(%rbp),%rax
968 adcq $0,%rdx
969 addq %r10,%r9
970 movq %rdx,%r10
971 adcq $0,%r10
972
973 mulq %rbx
974 addq %rax,%r11
975 movq 32(%rbp),%rax
976 adcq $0,%rdx
977 addq %r11,%r10
978 movq 128+8(%rsp),%rsi
979
980
981 adcq $0,%rdx
982 movq %rdx,%r11
983
984 mulq %rbx
985 addq %rax,%r12
986 movq 40(%rbp),%rax
987 adcq $0,%rdx
988 imulq %r8,%rsi
989 addq %r12,%r11
990 movq %rdx,%r12
991 adcq $0,%r12
992
993 mulq %rbx
994 addq %rax,%r13
995 movq 48(%rbp),%rax
996 adcq $0,%rdx
997 addq %r13,%r12
998 movq %rdx,%r13
999 adcq $0,%r13
1000
1001 mulq %rbx
1002 addq %rax,%r14
1003 movq 56(%rbp),%rax
1004 adcq $0,%rdx
1005 addq %r14,%r13
1006 movq %rdx,%r14
1007 adcq $0,%r14
1008
1009 mulq %rbx
1010 movq %rsi,%rbx
1011 addq %rax,%r15
1012 movq 0(%rbp),%rax
1013 adcq $0,%rdx
1014 addq %r15,%r14
1015 movq %rdx,%r15
1016 adcq $0,%r15
1017
1018 decl %ecx
1019 jne L$reduction_loop
1020
1021 .byte 0xf3,0xc3
1022
1023
1024
1025.p2align 5
1026__rsaz_512_subtract:
1027
1028 movq %r8,(%rdi)
1029 movq %r9,8(%rdi)
1030 movq %r10,16(%rdi)
1031 movq %r11,24(%rdi)
1032 movq %r12,32(%rdi)
1033 movq %r13,40(%rdi)
1034 movq %r14,48(%rdi)
1035 movq %r15,56(%rdi)
1036
1037 movq 0(%rbp),%r8
1038 movq 8(%rbp),%r9
1039 negq %r8
1040 notq %r9
1041 andq %rcx,%r8
1042 movq 16(%rbp),%r10
1043 andq %rcx,%r9
1044 notq %r10
1045 movq 24(%rbp),%r11
1046 andq %rcx,%r10
1047 notq %r11
1048 movq 32(%rbp),%r12
1049 andq %rcx,%r11
1050 notq %r12
1051 movq 40(%rbp),%r13
1052 andq %rcx,%r12
1053 notq %r13
1054 movq 48(%rbp),%r14
1055 andq %rcx,%r13
1056 notq %r14
1057 movq 56(%rbp),%r15
1058 andq %rcx,%r14
1059 notq %r15
1060 andq %rcx,%r15
1061
1062 addq (%rdi),%r8
1063 adcq 8(%rdi),%r9
1064 adcq 16(%rdi),%r10
1065 adcq 24(%rdi),%r11
1066 adcq 32(%rdi),%r12
1067 adcq 40(%rdi),%r13
1068 adcq 48(%rdi),%r14
1069 adcq 56(%rdi),%r15
1070
1071 movq %r8,(%rdi)
1072 movq %r9,8(%rdi)
1073 movq %r10,16(%rdi)
1074 movq %r11,24(%rdi)
1075 movq %r12,32(%rdi)
1076 movq %r13,40(%rdi)
1077 movq %r14,48(%rdi)
1078 movq %r15,56(%rdi)
1079
1080 .byte 0xf3,0xc3
1081
1082
1083
1084.p2align 5
1085__rsaz_512_mul:
1086
1087 leaq 8(%rsp),%rdi
1088
1089 movq (%rsi),%rax
1090 mulq %rbx
1091 movq %rax,(%rdi)
1092 movq 8(%rsi),%rax
1093 movq %rdx,%r8
1094
1095 mulq %rbx
1096 addq %rax,%r8
1097 movq 16(%rsi),%rax
1098 movq %rdx,%r9
1099 adcq $0,%r9
1100
1101 mulq %rbx
1102 addq %rax,%r9
1103 movq 24(%rsi),%rax
1104 movq %rdx,%r10
1105 adcq $0,%r10
1106
1107 mulq %rbx
1108 addq %rax,%r10
1109 movq 32(%rsi),%rax
1110 movq %rdx,%r11
1111 adcq $0,%r11
1112
1113 mulq %rbx
1114 addq %rax,%r11
1115 movq 40(%rsi),%rax
1116 movq %rdx,%r12
1117 adcq $0,%r12
1118
1119 mulq %rbx
1120 addq %rax,%r12
1121 movq 48(%rsi),%rax
1122 movq %rdx,%r13
1123 adcq $0,%r13
1124
1125 mulq %rbx
1126 addq %rax,%r13
1127 movq 56(%rsi),%rax
1128 movq %rdx,%r14
1129 adcq $0,%r14
1130
1131 mulq %rbx
1132 addq %rax,%r14
1133 movq (%rsi),%rax
1134 movq %rdx,%r15
1135 adcq $0,%r15
1136
1137 leaq 8(%rbp),%rbp
1138 leaq 8(%rdi),%rdi
1139
1140 movl $7,%ecx
1141 jmp L$oop_mul
1142
1143.p2align 5
1144L$oop_mul:
1145 movq (%rbp),%rbx
1146 mulq %rbx
1147 addq %rax,%r8
1148 movq 8(%rsi),%rax
1149 movq %r8,(%rdi)
1150 movq %rdx,%r8
1151 adcq $0,%r8
1152
1153 mulq %rbx
1154 addq %rax,%r9
1155 movq 16(%rsi),%rax
1156 adcq $0,%rdx
1157 addq %r9,%r8
1158 movq %rdx,%r9
1159 adcq $0,%r9
1160
1161 mulq %rbx
1162 addq %rax,%r10
1163 movq 24(%rsi),%rax
1164 adcq $0,%rdx
1165 addq %r10,%r9
1166 movq %rdx,%r10
1167 adcq $0,%r10
1168
1169 mulq %rbx
1170 addq %rax,%r11
1171 movq 32(%rsi),%rax
1172 adcq $0,%rdx
1173 addq %r11,%r10
1174 movq %rdx,%r11
1175 adcq $0,%r11
1176
1177 mulq %rbx
1178 addq %rax,%r12
1179 movq 40(%rsi),%rax
1180 adcq $0,%rdx
1181 addq %r12,%r11
1182 movq %rdx,%r12
1183 adcq $0,%r12
1184
1185 mulq %rbx
1186 addq %rax,%r13
1187 movq 48(%rsi),%rax
1188 adcq $0,%rdx
1189 addq %r13,%r12
1190 movq %rdx,%r13
1191 adcq $0,%r13
1192
1193 mulq %rbx
1194 addq %rax,%r14
1195 movq 56(%rsi),%rax
1196 adcq $0,%rdx
1197 addq %r14,%r13
1198 movq %rdx,%r14
1199 leaq 8(%rbp),%rbp
1200 adcq $0,%r14
1201
1202 mulq %rbx
1203 addq %rax,%r15
1204 movq (%rsi),%rax
1205 adcq $0,%rdx
1206 addq %r15,%r14
1207 movq %rdx,%r15
1208 adcq $0,%r15
1209
1210 leaq 8(%rdi),%rdi
1211
1212 decl %ecx
1213 jnz L$oop_mul
1214
1215 movq %r8,(%rdi)
1216 movq %r9,8(%rdi)
1217 movq %r10,16(%rdi)
1218 movq %r11,24(%rdi)
1219 movq %r12,32(%rdi)
1220 movq %r13,40(%rdi)
1221 movq %r14,48(%rdi)
1222 movq %r15,56(%rdi)
1223
1224 .byte 0xf3,0xc3
1225
1226
1227.globl _rsaz_512_scatter4
1228
1229.p2align 4
1230_rsaz_512_scatter4:
1231
1232 leaq (%rdi,%rdx,8),%rdi
1233 movl $8,%r9d
1234 jmp L$oop_scatter
1235.p2align 4
1236L$oop_scatter:
1237 movq (%rsi),%rax
1238 leaq 8(%rsi),%rsi
1239 movq %rax,(%rdi)
1240 leaq 128(%rdi),%rdi
1241 decl %r9d
1242 jnz L$oop_scatter
1243 .byte 0xf3,0xc3
1244
1245
1246
1247.globl _rsaz_512_gather4
1248
1249.p2align 4
1250_rsaz_512_gather4:
1251
1252 movd %edx,%xmm8
1253 movdqa L$inc+16(%rip),%xmm1
1254 movdqa L$inc(%rip),%xmm0
1255
1256 pshufd $0,%xmm8,%xmm8
1257 movdqa %xmm1,%xmm7
1258 movdqa %xmm1,%xmm2
1259 paddd %xmm0,%xmm1
1260 pcmpeqd %xmm8,%xmm0
1261 movdqa %xmm7,%xmm3
1262 paddd %xmm1,%xmm2
1263 pcmpeqd %xmm8,%xmm1
1264 movdqa %xmm7,%xmm4
1265 paddd %xmm2,%xmm3
1266 pcmpeqd %xmm8,%xmm2
1267 movdqa %xmm7,%xmm5
1268 paddd %xmm3,%xmm4
1269 pcmpeqd %xmm8,%xmm3
1270 movdqa %xmm7,%xmm6
1271 paddd %xmm4,%xmm5
1272 pcmpeqd %xmm8,%xmm4
1273 paddd %xmm5,%xmm6
1274 pcmpeqd %xmm8,%xmm5
1275 paddd %xmm6,%xmm7
1276 pcmpeqd %xmm8,%xmm6
1277 pcmpeqd %xmm8,%xmm7
1278 movl $8,%r9d
1279 jmp L$oop_gather
1280.p2align 4
1281L$oop_gather:
1282 movdqa 0(%rsi),%xmm8
1283 movdqa 16(%rsi),%xmm9
1284 movdqa 32(%rsi),%xmm10
1285 movdqa 48(%rsi),%xmm11
1286 pand %xmm0,%xmm8
1287 movdqa 64(%rsi),%xmm12
1288 pand %xmm1,%xmm9
1289 movdqa 80(%rsi),%xmm13
1290 pand %xmm2,%xmm10
1291 movdqa 96(%rsi),%xmm14
1292 pand %xmm3,%xmm11
1293 movdqa 112(%rsi),%xmm15
1294 leaq 128(%rsi),%rsi
1295 pand %xmm4,%xmm12
1296 pand %xmm5,%xmm13
1297 pand %xmm6,%xmm14
1298 pand %xmm7,%xmm15
1299 por %xmm10,%xmm8
1300 por %xmm11,%xmm9
1301 por %xmm12,%xmm8
1302 por %xmm13,%xmm9
1303 por %xmm14,%xmm8
1304 por %xmm15,%xmm9
1305
1306 por %xmm9,%xmm8
1307 pshufd $0x4e,%xmm8,%xmm9
1308 por %xmm9,%xmm8
1309 movq %xmm8,(%rdi)
1310 leaq 8(%rdi),%rdi
1311 decl %r9d
1312 jnz L$oop_gather
1313 .byte 0xf3,0xc3
1314L$SEH_end_rsaz_512_gather4:
1315
1316
1317
1318.p2align 6
1319L$inc:
1320.long 0,0, 1,1
1321.long 2,2, 2,2
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette