VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.9/crypto/genasm-elf/rsaz-x86_64.S@ 100942

Last change on this file since 100942 was 94096, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Fix for gnu assemblers and recreate asm files, ​bugref:10128

File size: 21.0 KB
Line 
1.text
2
3
4
5.globl rsaz_512_sqr
6.type rsaz_512_sqr,@function
7.align 32
8rsaz_512_sqr:
9.cfi_startproc
10 pushq %rbx
11.cfi_adjust_cfa_offset 8
12.cfi_offset %rbx,-16
13 pushq %rbp
14.cfi_adjust_cfa_offset 8
15.cfi_offset %rbp,-24
16 pushq %r12
17.cfi_adjust_cfa_offset 8
18.cfi_offset %r12,-32
19 pushq %r13
20.cfi_adjust_cfa_offset 8
21.cfi_offset %r13,-40
22 pushq %r14
23.cfi_adjust_cfa_offset 8
24.cfi_offset %r14,-48
25 pushq %r15
26.cfi_adjust_cfa_offset 8
27.cfi_offset %r15,-56
28
29 subq $128+24,%rsp
30.cfi_adjust_cfa_offset 128+24
31.Lsqr_body:
32.byte 102,72,15,110,202
33 movq (%rsi),%rdx
34 movq 8(%rsi),%rax
35 movq %rcx,128(%rsp)
36 jmp .Loop_sqr
37
38.align 32
39.Loop_sqr:
40 movl %r8d,128+8(%rsp)
41
42 movq %rdx,%rbx
43 movq %rax,%rbp
44 mulq %rdx
45 movq %rax,%r8
46 movq 16(%rsi),%rax
47 movq %rdx,%r9
48
49 mulq %rbx
50 addq %rax,%r9
51 movq 24(%rsi),%rax
52 movq %rdx,%r10
53 adcq $0,%r10
54
55 mulq %rbx
56 addq %rax,%r10
57 movq 32(%rsi),%rax
58 movq %rdx,%r11
59 adcq $0,%r11
60
61 mulq %rbx
62 addq %rax,%r11
63 movq 40(%rsi),%rax
64 movq %rdx,%r12
65 adcq $0,%r12
66
67 mulq %rbx
68 addq %rax,%r12
69 movq 48(%rsi),%rax
70 movq %rdx,%r13
71 adcq $0,%r13
72
73 mulq %rbx
74 addq %rax,%r13
75 movq 56(%rsi),%rax
76 movq %rdx,%r14
77 adcq $0,%r14
78
79 mulq %rbx
80 addq %rax,%r14
81 movq %rbx,%rax
82 adcq $0,%rdx
83
84 xorq %rcx,%rcx
85 addq %r8,%r8
86 movq %rdx,%r15
87 adcq $0,%rcx
88
89 mulq %rax
90 addq %r8,%rdx
91 adcq $0,%rcx
92
93 movq %rax,(%rsp)
94 movq %rdx,8(%rsp)
95
96
97 movq 16(%rsi),%rax
98 mulq %rbp
99 addq %rax,%r10
100 movq 24(%rsi),%rax
101 movq %rdx,%rbx
102 adcq $0,%rbx
103
104 mulq %rbp
105 addq %rax,%r11
106 movq 32(%rsi),%rax
107 adcq $0,%rdx
108 addq %rbx,%r11
109 movq %rdx,%rbx
110 adcq $0,%rbx
111
112 mulq %rbp
113 addq %rax,%r12
114 movq 40(%rsi),%rax
115 adcq $0,%rdx
116 addq %rbx,%r12
117 movq %rdx,%rbx
118 adcq $0,%rbx
119
120 mulq %rbp
121 addq %rax,%r13
122 movq 48(%rsi),%rax
123 adcq $0,%rdx
124 addq %rbx,%r13
125 movq %rdx,%rbx
126 adcq $0,%rbx
127
128 mulq %rbp
129 addq %rax,%r14
130 movq 56(%rsi),%rax
131 adcq $0,%rdx
132 addq %rbx,%r14
133 movq %rdx,%rbx
134 adcq $0,%rbx
135
136 mulq %rbp
137 addq %rax,%r15
138 movq %rbp,%rax
139 adcq $0,%rdx
140 addq %rbx,%r15
141 adcq $0,%rdx
142
143 xorq %rbx,%rbx
144 addq %r9,%r9
145 movq %rdx,%r8
146 adcq %r10,%r10
147 adcq $0,%rbx
148
149 mulq %rax
150
151 addq %rcx,%rax
152 movq 16(%rsi),%rbp
153 addq %rax,%r9
154 movq 24(%rsi),%rax
155 adcq %rdx,%r10
156 adcq $0,%rbx
157
158 movq %r9,16(%rsp)
159 movq %r10,24(%rsp)
160
161
162 mulq %rbp
163 addq %rax,%r12
164 movq 32(%rsi),%rax
165 movq %rdx,%rcx
166 adcq $0,%rcx
167
168 mulq %rbp
169 addq %rax,%r13
170 movq 40(%rsi),%rax
171 adcq $0,%rdx
172 addq %rcx,%r13
173 movq %rdx,%rcx
174 adcq $0,%rcx
175
176 mulq %rbp
177 addq %rax,%r14
178 movq 48(%rsi),%rax
179 adcq $0,%rdx
180 addq %rcx,%r14
181 movq %rdx,%rcx
182 adcq $0,%rcx
183
184 mulq %rbp
185 addq %rax,%r15
186 movq 56(%rsi),%rax
187 adcq $0,%rdx
188 addq %rcx,%r15
189 movq %rdx,%rcx
190 adcq $0,%rcx
191
192 mulq %rbp
193 addq %rax,%r8
194 movq %rbp,%rax
195 adcq $0,%rdx
196 addq %rcx,%r8
197 adcq $0,%rdx
198
199 xorq %rcx,%rcx
200 addq %r11,%r11
201 movq %rdx,%r9
202 adcq %r12,%r12
203 adcq $0,%rcx
204
205 mulq %rax
206
207 addq %rbx,%rax
208 movq 24(%rsi),%r10
209 addq %rax,%r11
210 movq 32(%rsi),%rax
211 adcq %rdx,%r12
212 adcq $0,%rcx
213
214 movq %r11,32(%rsp)
215 movq %r12,40(%rsp)
216
217
218 movq %rax,%r11
219 mulq %r10
220 addq %rax,%r14
221 movq 40(%rsi),%rax
222 movq %rdx,%rbx
223 adcq $0,%rbx
224
225 movq %rax,%r12
226 mulq %r10
227 addq %rax,%r15
228 movq 48(%rsi),%rax
229 adcq $0,%rdx
230 addq %rbx,%r15
231 movq %rdx,%rbx
232 adcq $0,%rbx
233
234 movq %rax,%rbp
235 mulq %r10
236 addq %rax,%r8
237 movq 56(%rsi),%rax
238 adcq $0,%rdx
239 addq %rbx,%r8
240 movq %rdx,%rbx
241 adcq $0,%rbx
242
243 mulq %r10
244 addq %rax,%r9
245 movq %r10,%rax
246 adcq $0,%rdx
247 addq %rbx,%r9
248 adcq $0,%rdx
249
250 xorq %rbx,%rbx
251 addq %r13,%r13
252 movq %rdx,%r10
253 adcq %r14,%r14
254 adcq $0,%rbx
255
256 mulq %rax
257
258 addq %rcx,%rax
259 addq %rax,%r13
260 movq %r12,%rax
261 adcq %rdx,%r14
262 adcq $0,%rbx
263
264 movq %r13,48(%rsp)
265 movq %r14,56(%rsp)
266
267
268 mulq %r11
269 addq %rax,%r8
270 movq %rbp,%rax
271 movq %rdx,%rcx
272 adcq $0,%rcx
273
274 mulq %r11
275 addq %rax,%r9
276 movq 56(%rsi),%rax
277 adcq $0,%rdx
278 addq %rcx,%r9
279 movq %rdx,%rcx
280 adcq $0,%rcx
281
282 movq %rax,%r14
283 mulq %r11
284 addq %rax,%r10
285 movq %r11,%rax
286 adcq $0,%rdx
287 addq %rcx,%r10
288 adcq $0,%rdx
289
290 xorq %rcx,%rcx
291 addq %r15,%r15
292 movq %rdx,%r11
293 adcq %r8,%r8
294 adcq $0,%rcx
295
296 mulq %rax
297
298 addq %rbx,%rax
299 addq %rax,%r15
300 movq %rbp,%rax
301 adcq %rdx,%r8
302 adcq $0,%rcx
303
304 movq %r15,64(%rsp)
305 movq %r8,72(%rsp)
306
307
308 mulq %r12
309 addq %rax,%r10
310 movq %r14,%rax
311 movq %rdx,%rbx
312 adcq $0,%rbx
313
314 mulq %r12
315 addq %rax,%r11
316 movq %r12,%rax
317 adcq $0,%rdx
318 addq %rbx,%r11
319 adcq $0,%rdx
320
321 xorq %rbx,%rbx
322 addq %r9,%r9
323 movq %rdx,%r12
324 adcq %r10,%r10
325 adcq $0,%rbx
326
327 mulq %rax
328
329 addq %rcx,%rax
330 addq %rax,%r9
331 movq %r14,%rax
332 adcq %rdx,%r10
333 adcq $0,%rbx
334
335 movq %r9,80(%rsp)
336 movq %r10,88(%rsp)
337
338
339 mulq %rbp
340 addq %rax,%r12
341 movq %rbp,%rax
342 adcq $0,%rdx
343
344 xorq %rcx,%rcx
345 addq %r11,%r11
346 movq %rdx,%r13
347 adcq %r12,%r12
348 adcq $0,%rcx
349
350 mulq %rax
351
352 addq %rbx,%rax
353 addq %rax,%r11
354 movq %r14,%rax
355 adcq %rdx,%r12
356 adcq $0,%rcx
357
358 movq %r11,96(%rsp)
359 movq %r12,104(%rsp)
360
361
362 xorq %rbx,%rbx
363 addq %r13,%r13
364 adcq $0,%rbx
365
366 mulq %rax
367
368 addq %rcx,%rax
369 addq %r13,%rax
370 adcq %rbx,%rdx
371
372 movq (%rsp),%r8
373 movq 8(%rsp),%r9
374 movq 16(%rsp),%r10
375 movq 24(%rsp),%r11
376 movq 32(%rsp),%r12
377 movq 40(%rsp),%r13
378 movq 48(%rsp),%r14
379 movq 56(%rsp),%r15
380.byte 102,72,15,126,205
381
382 movq %rax,112(%rsp)
383 movq %rdx,120(%rsp)
384
385 call __rsaz_512_reduce
386
387 addq 64(%rsp),%r8
388 adcq 72(%rsp),%r9
389 adcq 80(%rsp),%r10
390 adcq 88(%rsp),%r11
391 adcq 96(%rsp),%r12
392 adcq 104(%rsp),%r13
393 adcq 112(%rsp),%r14
394 adcq 120(%rsp),%r15
395 sbbq %rcx,%rcx
396
397 call __rsaz_512_subtract
398
399 movq %r8,%rdx
400 movq %r9,%rax
401 movl 128+8(%rsp),%r8d
402 movq %rdi,%rsi
403
404 decl %r8d
405 jnz .Loop_sqr
406
407 leaq 128+24+48(%rsp),%rax
408.cfi_def_cfa %rax,8
409 movq -48(%rax),%r15
410.cfi_restore %r15
411 movq -40(%rax),%r14
412.cfi_restore %r14
413 movq -32(%rax),%r13
414.cfi_restore %r13
415 movq -24(%rax),%r12
416.cfi_restore %r12
417 movq -16(%rax),%rbp
418.cfi_restore %rbp
419 movq -8(%rax),%rbx
420.cfi_restore %rbx
421 leaq (%rax),%rsp
422.cfi_def_cfa_register %rsp
423.Lsqr_epilogue:
424 .byte 0xf3,0xc3
425.cfi_endproc
426.size rsaz_512_sqr,.-rsaz_512_sqr
427.globl rsaz_512_mul
428.type rsaz_512_mul,@function
429.align 32
430rsaz_512_mul:
431.cfi_startproc
432 pushq %rbx
433.cfi_adjust_cfa_offset 8
434.cfi_offset %rbx,-16
435 pushq %rbp
436.cfi_adjust_cfa_offset 8
437.cfi_offset %rbp,-24
438 pushq %r12
439.cfi_adjust_cfa_offset 8
440.cfi_offset %r12,-32
441 pushq %r13
442.cfi_adjust_cfa_offset 8
443.cfi_offset %r13,-40
444 pushq %r14
445.cfi_adjust_cfa_offset 8
446.cfi_offset %r14,-48
447 pushq %r15
448.cfi_adjust_cfa_offset 8
449.cfi_offset %r15,-56
450
451 subq $128+24,%rsp
452.cfi_adjust_cfa_offset 128+24
453.Lmul_body:
454.byte 102,72,15,110,199
455.byte 102,72,15,110,201
456 movq %r8,128(%rsp)
457 movq (%rdx),%rbx
458 movq %rdx,%rbp
459 call __rsaz_512_mul
460
461.byte 102,72,15,126,199
462.byte 102,72,15,126,205
463
464 movq (%rsp),%r8
465 movq 8(%rsp),%r9
466 movq 16(%rsp),%r10
467 movq 24(%rsp),%r11
468 movq 32(%rsp),%r12
469 movq 40(%rsp),%r13
470 movq 48(%rsp),%r14
471 movq 56(%rsp),%r15
472
473 call __rsaz_512_reduce
474 addq 64(%rsp),%r8
475 adcq 72(%rsp),%r9
476 adcq 80(%rsp),%r10
477 adcq 88(%rsp),%r11
478 adcq 96(%rsp),%r12
479 adcq 104(%rsp),%r13
480 adcq 112(%rsp),%r14
481 adcq 120(%rsp),%r15
482 sbbq %rcx,%rcx
483
484 call __rsaz_512_subtract
485
486 leaq 128+24+48(%rsp),%rax
487.cfi_def_cfa %rax,8
488 movq -48(%rax),%r15
489.cfi_restore %r15
490 movq -40(%rax),%r14
491.cfi_restore %r14
492 movq -32(%rax),%r13
493.cfi_restore %r13
494 movq -24(%rax),%r12
495.cfi_restore %r12
496 movq -16(%rax),%rbp
497.cfi_restore %rbp
498 movq -8(%rax),%rbx
499.cfi_restore %rbx
500 leaq (%rax),%rsp
501.cfi_def_cfa_register %rsp
502.Lmul_epilogue:
503 .byte 0xf3,0xc3
504.cfi_endproc
505.size rsaz_512_mul,.-rsaz_512_mul
506.globl rsaz_512_mul_gather4
507.type rsaz_512_mul_gather4,@function
508.align 32
509rsaz_512_mul_gather4:
510.cfi_startproc
511 pushq %rbx
512.cfi_adjust_cfa_offset 8
513.cfi_offset %rbx,-16
514 pushq %rbp
515.cfi_adjust_cfa_offset 8
516.cfi_offset %rbp,-24
517 pushq %r12
518.cfi_adjust_cfa_offset 8
519.cfi_offset %r12,-32
520 pushq %r13
521.cfi_adjust_cfa_offset 8
522.cfi_offset %r13,-40
523 pushq %r14
524.cfi_adjust_cfa_offset 8
525.cfi_offset %r14,-48
526 pushq %r15
527.cfi_adjust_cfa_offset 8
528.cfi_offset %r15,-56
529
530 subq $152,%rsp
531.cfi_adjust_cfa_offset 152
532.Lmul_gather4_body:
533 movd %r9d,%xmm8
534 movdqa .Linc+16(%rip),%xmm1
535 movdqa .Linc(%rip),%xmm0
536
537 pshufd $0,%xmm8,%xmm8
538 movdqa %xmm1,%xmm7
539 movdqa %xmm1,%xmm2
540 paddd %xmm0,%xmm1
541 pcmpeqd %xmm8,%xmm0
542 movdqa %xmm7,%xmm3
543 paddd %xmm1,%xmm2
544 pcmpeqd %xmm8,%xmm1
545 movdqa %xmm7,%xmm4
546 paddd %xmm2,%xmm3
547 pcmpeqd %xmm8,%xmm2
548 movdqa %xmm7,%xmm5
549 paddd %xmm3,%xmm4
550 pcmpeqd %xmm8,%xmm3
551 movdqa %xmm7,%xmm6
552 paddd %xmm4,%xmm5
553 pcmpeqd %xmm8,%xmm4
554 paddd %xmm5,%xmm6
555 pcmpeqd %xmm8,%xmm5
556 paddd %xmm6,%xmm7
557 pcmpeqd %xmm8,%xmm6
558 pcmpeqd %xmm8,%xmm7
559
560 movdqa 0(%rdx),%xmm8
561 movdqa 16(%rdx),%xmm9
562 movdqa 32(%rdx),%xmm10
563 movdqa 48(%rdx),%xmm11
564 pand %xmm0,%xmm8
565 movdqa 64(%rdx),%xmm12
566 pand %xmm1,%xmm9
567 movdqa 80(%rdx),%xmm13
568 pand %xmm2,%xmm10
569 movdqa 96(%rdx),%xmm14
570 pand %xmm3,%xmm11
571 movdqa 112(%rdx),%xmm15
572 leaq 128(%rdx),%rbp
573 pand %xmm4,%xmm12
574 pand %xmm5,%xmm13
575 pand %xmm6,%xmm14
576 pand %xmm7,%xmm15
577 por %xmm10,%xmm8
578 por %xmm11,%xmm9
579 por %xmm12,%xmm8
580 por %xmm13,%xmm9
581 por %xmm14,%xmm8
582 por %xmm15,%xmm9
583
584 por %xmm9,%xmm8
585 pshufd $0x4e,%xmm8,%xmm9
586 por %xmm9,%xmm8
587.byte 102,76,15,126,195
588
589 movq %r8,128(%rsp)
590 movq %rdi,128+8(%rsp)
591 movq %rcx,128+16(%rsp)
592
593 movq (%rsi),%rax
594 movq 8(%rsi),%rcx
595 mulq %rbx
596 movq %rax,(%rsp)
597 movq %rcx,%rax
598 movq %rdx,%r8
599
600 mulq %rbx
601 addq %rax,%r8
602 movq 16(%rsi),%rax
603 movq %rdx,%r9
604 adcq $0,%r9
605
606 mulq %rbx
607 addq %rax,%r9
608 movq 24(%rsi),%rax
609 movq %rdx,%r10
610 adcq $0,%r10
611
612 mulq %rbx
613 addq %rax,%r10
614 movq 32(%rsi),%rax
615 movq %rdx,%r11
616 adcq $0,%r11
617
618 mulq %rbx
619 addq %rax,%r11
620 movq 40(%rsi),%rax
621 movq %rdx,%r12
622 adcq $0,%r12
623
624 mulq %rbx
625 addq %rax,%r12
626 movq 48(%rsi),%rax
627 movq %rdx,%r13
628 adcq $0,%r13
629
630 mulq %rbx
631 addq %rax,%r13
632 movq 56(%rsi),%rax
633 movq %rdx,%r14
634 adcq $0,%r14
635
636 mulq %rbx
637 addq %rax,%r14
638 movq (%rsi),%rax
639 movq %rdx,%r15
640 adcq $0,%r15
641
642 leaq 8(%rsp),%rdi
643 movl $7,%ecx
644 jmp .Loop_mul_gather
645
646.align 32
647.Loop_mul_gather:
648 movdqa 0(%rbp),%xmm8
649 movdqa 16(%rbp),%xmm9
650 movdqa 32(%rbp),%xmm10
651 movdqa 48(%rbp),%xmm11
652 pand %xmm0,%xmm8
653 movdqa 64(%rbp),%xmm12
654 pand %xmm1,%xmm9
655 movdqa 80(%rbp),%xmm13
656 pand %xmm2,%xmm10
657 movdqa 96(%rbp),%xmm14
658 pand %xmm3,%xmm11
659 movdqa 112(%rbp),%xmm15
660 leaq 128(%rbp),%rbp
661 pand %xmm4,%xmm12
662 pand %xmm5,%xmm13
663 pand %xmm6,%xmm14
664 pand %xmm7,%xmm15
665 por %xmm10,%xmm8
666 por %xmm11,%xmm9
667 por %xmm12,%xmm8
668 por %xmm13,%xmm9
669 por %xmm14,%xmm8
670 por %xmm15,%xmm9
671
672 por %xmm9,%xmm8
673 pshufd $0x4e,%xmm8,%xmm9
674 por %xmm9,%xmm8
675.byte 102,76,15,126,195
676
677 mulq %rbx
678 addq %rax,%r8
679 movq 8(%rsi),%rax
680 movq %r8,(%rdi)
681 movq %rdx,%r8
682 adcq $0,%r8
683
684 mulq %rbx
685 addq %rax,%r9
686 movq 16(%rsi),%rax
687 adcq $0,%rdx
688 addq %r9,%r8
689 movq %rdx,%r9
690 adcq $0,%r9
691
692 mulq %rbx
693 addq %rax,%r10
694 movq 24(%rsi),%rax
695 adcq $0,%rdx
696 addq %r10,%r9
697 movq %rdx,%r10
698 adcq $0,%r10
699
700 mulq %rbx
701 addq %rax,%r11
702 movq 32(%rsi),%rax
703 adcq $0,%rdx
704 addq %r11,%r10
705 movq %rdx,%r11
706 adcq $0,%r11
707
708 mulq %rbx
709 addq %rax,%r12
710 movq 40(%rsi),%rax
711 adcq $0,%rdx
712 addq %r12,%r11
713 movq %rdx,%r12
714 adcq $0,%r12
715
716 mulq %rbx
717 addq %rax,%r13
718 movq 48(%rsi),%rax
719 adcq $0,%rdx
720 addq %r13,%r12
721 movq %rdx,%r13
722 adcq $0,%r13
723
724 mulq %rbx
725 addq %rax,%r14
726 movq 56(%rsi),%rax
727 adcq $0,%rdx
728 addq %r14,%r13
729 movq %rdx,%r14
730 adcq $0,%r14
731
732 mulq %rbx
733 addq %rax,%r15
734 movq (%rsi),%rax
735 adcq $0,%rdx
736 addq %r15,%r14
737 movq %rdx,%r15
738 adcq $0,%r15
739
740 leaq 8(%rdi),%rdi
741
742 decl %ecx
743 jnz .Loop_mul_gather
744
745 movq %r8,(%rdi)
746 movq %r9,8(%rdi)
747 movq %r10,16(%rdi)
748 movq %r11,24(%rdi)
749 movq %r12,32(%rdi)
750 movq %r13,40(%rdi)
751 movq %r14,48(%rdi)
752 movq %r15,56(%rdi)
753
754 movq 128+8(%rsp),%rdi
755 movq 128+16(%rsp),%rbp
756
757 movq (%rsp),%r8
758 movq 8(%rsp),%r9
759 movq 16(%rsp),%r10
760 movq 24(%rsp),%r11
761 movq 32(%rsp),%r12
762 movq 40(%rsp),%r13
763 movq 48(%rsp),%r14
764 movq 56(%rsp),%r15
765
766 call __rsaz_512_reduce
767 addq 64(%rsp),%r8
768 adcq 72(%rsp),%r9
769 adcq 80(%rsp),%r10
770 adcq 88(%rsp),%r11
771 adcq 96(%rsp),%r12
772 adcq 104(%rsp),%r13
773 adcq 112(%rsp),%r14
774 adcq 120(%rsp),%r15
775 sbbq %rcx,%rcx
776
777 call __rsaz_512_subtract
778
779 leaq 128+24+48(%rsp),%rax
780.cfi_def_cfa %rax,8
781 movq -48(%rax),%r15
782.cfi_restore %r15
783 movq -40(%rax),%r14
784.cfi_restore %r14
785 movq -32(%rax),%r13
786.cfi_restore %r13
787 movq -24(%rax),%r12
788.cfi_restore %r12
789 movq -16(%rax),%rbp
790.cfi_restore %rbp
791 movq -8(%rax),%rbx
792.cfi_restore %rbx
793 leaq (%rax),%rsp
794.cfi_def_cfa_register %rsp
795.Lmul_gather4_epilogue:
796 .byte 0xf3,0xc3
797.cfi_endproc
798.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
799.globl rsaz_512_mul_scatter4
800.type rsaz_512_mul_scatter4,@function
801.align 32
802rsaz_512_mul_scatter4:
803.cfi_startproc
804 pushq %rbx
805.cfi_adjust_cfa_offset 8
806.cfi_offset %rbx,-16
807 pushq %rbp
808.cfi_adjust_cfa_offset 8
809.cfi_offset %rbp,-24
810 pushq %r12
811.cfi_adjust_cfa_offset 8
812.cfi_offset %r12,-32
813 pushq %r13
814.cfi_adjust_cfa_offset 8
815.cfi_offset %r13,-40
816 pushq %r14
817.cfi_adjust_cfa_offset 8
818.cfi_offset %r14,-48
819 pushq %r15
820.cfi_adjust_cfa_offset 8
821.cfi_offset %r15,-56
822
823 movl %r9d,%r9d
824 subq $128+24,%rsp
825.cfi_adjust_cfa_offset 128+24
826.Lmul_scatter4_body:
827 leaq (%r8,%r9,8),%r8
828.byte 102,72,15,110,199
829.byte 102,72,15,110,202
830.byte 102,73,15,110,208
831 movq %rcx,128(%rsp)
832
833 movq %rdi,%rbp
834 movq (%rdi),%rbx
835 call __rsaz_512_mul
836
837.byte 102,72,15,126,199
838.byte 102,72,15,126,205
839
840 movq (%rsp),%r8
841 movq 8(%rsp),%r9
842 movq 16(%rsp),%r10
843 movq 24(%rsp),%r11
844 movq 32(%rsp),%r12
845 movq 40(%rsp),%r13
846 movq 48(%rsp),%r14
847 movq 56(%rsp),%r15
848
849 call __rsaz_512_reduce
850 addq 64(%rsp),%r8
851 adcq 72(%rsp),%r9
852 adcq 80(%rsp),%r10
853 adcq 88(%rsp),%r11
854 adcq 96(%rsp),%r12
855 adcq 104(%rsp),%r13
856 adcq 112(%rsp),%r14
857 adcq 120(%rsp),%r15
858.byte 102,72,15,126,214
859 sbbq %rcx,%rcx
860
861 call __rsaz_512_subtract
862
863 movq %r8,0(%rsi)
864 movq %r9,128(%rsi)
865 movq %r10,256(%rsi)
866 movq %r11,384(%rsi)
867 movq %r12,512(%rsi)
868 movq %r13,640(%rsi)
869 movq %r14,768(%rsi)
870 movq %r15,896(%rsi)
871
872 leaq 128+24+48(%rsp),%rax
873.cfi_def_cfa %rax,8
874 movq -48(%rax),%r15
875.cfi_restore %r15
876 movq -40(%rax),%r14
877.cfi_restore %r14
878 movq -32(%rax),%r13
879.cfi_restore %r13
880 movq -24(%rax),%r12
881.cfi_restore %r12
882 movq -16(%rax),%rbp
883.cfi_restore %rbp
884 movq -8(%rax),%rbx
885.cfi_restore %rbx
886 leaq (%rax),%rsp
887.cfi_def_cfa_register %rsp
888.Lmul_scatter4_epilogue:
889 .byte 0xf3,0xc3
890.cfi_endproc
891.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
892.globl rsaz_512_mul_by_one
893.type rsaz_512_mul_by_one,@function
894.align 32
895rsaz_512_mul_by_one:
896.cfi_startproc
897 pushq %rbx
898.cfi_adjust_cfa_offset 8
899.cfi_offset %rbx,-16
900 pushq %rbp
901.cfi_adjust_cfa_offset 8
902.cfi_offset %rbp,-24
903 pushq %r12
904.cfi_adjust_cfa_offset 8
905.cfi_offset %r12,-32
906 pushq %r13
907.cfi_adjust_cfa_offset 8
908.cfi_offset %r13,-40
909 pushq %r14
910.cfi_adjust_cfa_offset 8
911.cfi_offset %r14,-48
912 pushq %r15
913.cfi_adjust_cfa_offset 8
914.cfi_offset %r15,-56
915
916 subq $128+24,%rsp
917.cfi_adjust_cfa_offset 128+24
918.Lmul_by_one_body:
919 movq %rdx,%rbp
920 movq %rcx,128(%rsp)
921
922 movq (%rsi),%r8
923 pxor %xmm0,%xmm0
924 movq 8(%rsi),%r9
925 movq 16(%rsi),%r10
926 movq 24(%rsi),%r11
927 movq 32(%rsi),%r12
928 movq 40(%rsi),%r13
929 movq 48(%rsi),%r14
930 movq 56(%rsi),%r15
931
932 movdqa %xmm0,(%rsp)
933 movdqa %xmm0,16(%rsp)
934 movdqa %xmm0,32(%rsp)
935 movdqa %xmm0,48(%rsp)
936 movdqa %xmm0,64(%rsp)
937 movdqa %xmm0,80(%rsp)
938 movdqa %xmm0,96(%rsp)
939 call __rsaz_512_reduce
940 movq %r8,(%rdi)
941 movq %r9,8(%rdi)
942 movq %r10,16(%rdi)
943 movq %r11,24(%rdi)
944 movq %r12,32(%rdi)
945 movq %r13,40(%rdi)
946 movq %r14,48(%rdi)
947 movq %r15,56(%rdi)
948
949 leaq 128+24+48(%rsp),%rax
950.cfi_def_cfa %rax,8
951 movq -48(%rax),%r15
952.cfi_restore %r15
953 movq -40(%rax),%r14
954.cfi_restore %r14
955 movq -32(%rax),%r13
956.cfi_restore %r13
957 movq -24(%rax),%r12
958.cfi_restore %r12
959 movq -16(%rax),%rbp
960.cfi_restore %rbp
961 movq -8(%rax),%rbx
962.cfi_restore %rbx
963 leaq (%rax),%rsp
964.cfi_def_cfa_register %rsp
965.Lmul_by_one_epilogue:
966 .byte 0xf3,0xc3
967.cfi_endproc
968.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
969.type __rsaz_512_reduce,@function
970.align 32
971__rsaz_512_reduce:
972.cfi_startproc
973 movq %r8,%rbx
974 imulq 128+8(%rsp),%rbx
975 movq 0(%rbp),%rax
976 movl $8,%ecx
977 jmp .Lreduction_loop
978
979.align 32
980.Lreduction_loop:
981 mulq %rbx
982 movq 8(%rbp),%rax
983 negq %r8
984 movq %rdx,%r8
985 adcq $0,%r8
986
987 mulq %rbx
988 addq %rax,%r9
989 movq 16(%rbp),%rax
990 adcq $0,%rdx
991 addq %r9,%r8
992 movq %rdx,%r9
993 adcq $0,%r9
994
995 mulq %rbx
996 addq %rax,%r10
997 movq 24(%rbp),%rax
998 adcq $0,%rdx
999 addq %r10,%r9
1000 movq %rdx,%r10
1001 adcq $0,%r10
1002
1003 mulq %rbx
1004 addq %rax,%r11
1005 movq 32(%rbp),%rax
1006 adcq $0,%rdx
1007 addq %r11,%r10
1008 movq 128+8(%rsp),%rsi
1009
1010
1011 adcq $0,%rdx
1012 movq %rdx,%r11
1013
1014 mulq %rbx
1015 addq %rax,%r12
1016 movq 40(%rbp),%rax
1017 adcq $0,%rdx
1018 imulq %r8,%rsi
1019 addq %r12,%r11
1020 movq %rdx,%r12
1021 adcq $0,%r12
1022
1023 mulq %rbx
1024 addq %rax,%r13
1025 movq 48(%rbp),%rax
1026 adcq $0,%rdx
1027 addq %r13,%r12
1028 movq %rdx,%r13
1029 adcq $0,%r13
1030
1031 mulq %rbx
1032 addq %rax,%r14
1033 movq 56(%rbp),%rax
1034 adcq $0,%rdx
1035 addq %r14,%r13
1036 movq %rdx,%r14
1037 adcq $0,%r14
1038
1039 mulq %rbx
1040 movq %rsi,%rbx
1041 addq %rax,%r15
1042 movq 0(%rbp),%rax
1043 adcq $0,%rdx
1044 addq %r15,%r14
1045 movq %rdx,%r15
1046 adcq $0,%r15
1047
1048 decl %ecx
1049 jne .Lreduction_loop
1050
1051 .byte 0xf3,0xc3
1052.cfi_endproc
1053.size __rsaz_512_reduce,.-__rsaz_512_reduce
1054.type __rsaz_512_subtract,@function
1055.align 32
1056__rsaz_512_subtract:
1057.cfi_startproc
1058 movq %r8,(%rdi)
1059 movq %r9,8(%rdi)
1060 movq %r10,16(%rdi)
1061 movq %r11,24(%rdi)
1062 movq %r12,32(%rdi)
1063 movq %r13,40(%rdi)
1064 movq %r14,48(%rdi)
1065 movq %r15,56(%rdi)
1066
1067 movq 0(%rbp),%r8
1068 movq 8(%rbp),%r9
1069 negq %r8
1070 notq %r9
1071 andq %rcx,%r8
1072 movq 16(%rbp),%r10
1073 andq %rcx,%r9
1074 notq %r10
1075 movq 24(%rbp),%r11
1076 andq %rcx,%r10
1077 notq %r11
1078 movq 32(%rbp),%r12
1079 andq %rcx,%r11
1080 notq %r12
1081 movq 40(%rbp),%r13
1082 andq %rcx,%r12
1083 notq %r13
1084 movq 48(%rbp),%r14
1085 andq %rcx,%r13
1086 notq %r14
1087 movq 56(%rbp),%r15
1088 andq %rcx,%r14
1089 notq %r15
1090 andq %rcx,%r15
1091
1092 addq (%rdi),%r8
1093 adcq 8(%rdi),%r9
1094 adcq 16(%rdi),%r10
1095 adcq 24(%rdi),%r11
1096 adcq 32(%rdi),%r12
1097 adcq 40(%rdi),%r13
1098 adcq 48(%rdi),%r14
1099 adcq 56(%rdi),%r15
1100
1101 movq %r8,(%rdi)
1102 movq %r9,8(%rdi)
1103 movq %r10,16(%rdi)
1104 movq %r11,24(%rdi)
1105 movq %r12,32(%rdi)
1106 movq %r13,40(%rdi)
1107 movq %r14,48(%rdi)
1108 movq %r15,56(%rdi)
1109
1110 .byte 0xf3,0xc3
1111.cfi_endproc
1112.size __rsaz_512_subtract,.-__rsaz_512_subtract
1113.type __rsaz_512_mul,@function
1114.align 32
1115__rsaz_512_mul:
1116.cfi_startproc
1117 leaq 8(%rsp),%rdi
1118
1119 movq (%rsi),%rax
1120 mulq %rbx
1121 movq %rax,(%rdi)
1122 movq 8(%rsi),%rax
1123 movq %rdx,%r8
1124
1125 mulq %rbx
1126 addq %rax,%r8
1127 movq 16(%rsi),%rax
1128 movq %rdx,%r9
1129 adcq $0,%r9
1130
1131 mulq %rbx
1132 addq %rax,%r9
1133 movq 24(%rsi),%rax
1134 movq %rdx,%r10
1135 adcq $0,%r10
1136
1137 mulq %rbx
1138 addq %rax,%r10
1139 movq 32(%rsi),%rax
1140 movq %rdx,%r11
1141 adcq $0,%r11
1142
1143 mulq %rbx
1144 addq %rax,%r11
1145 movq 40(%rsi),%rax
1146 movq %rdx,%r12
1147 adcq $0,%r12
1148
1149 mulq %rbx
1150 addq %rax,%r12
1151 movq 48(%rsi),%rax
1152 movq %rdx,%r13
1153 adcq $0,%r13
1154
1155 mulq %rbx
1156 addq %rax,%r13
1157 movq 56(%rsi),%rax
1158 movq %rdx,%r14
1159 adcq $0,%r14
1160
1161 mulq %rbx
1162 addq %rax,%r14
1163 movq (%rsi),%rax
1164 movq %rdx,%r15
1165 adcq $0,%r15
1166
1167 leaq 8(%rbp),%rbp
1168 leaq 8(%rdi),%rdi
1169
1170 movl $7,%ecx
1171 jmp .Loop_mul
1172
1173.align 32
1174.Loop_mul:
1175 movq (%rbp),%rbx
1176 mulq %rbx
1177 addq %rax,%r8
1178 movq 8(%rsi),%rax
1179 movq %r8,(%rdi)
1180 movq %rdx,%r8
1181 adcq $0,%r8
1182
1183 mulq %rbx
1184 addq %rax,%r9
1185 movq 16(%rsi),%rax
1186 adcq $0,%rdx
1187 addq %r9,%r8
1188 movq %rdx,%r9
1189 adcq $0,%r9
1190
1191 mulq %rbx
1192 addq %rax,%r10
1193 movq 24(%rsi),%rax
1194 adcq $0,%rdx
1195 addq %r10,%r9
1196 movq %rdx,%r10
1197 adcq $0,%r10
1198
1199 mulq %rbx
1200 addq %rax,%r11
1201 movq 32(%rsi),%rax
1202 adcq $0,%rdx
1203 addq %r11,%r10
1204 movq %rdx,%r11
1205 adcq $0,%r11
1206
1207 mulq %rbx
1208 addq %rax,%r12
1209 movq 40(%rsi),%rax
1210 adcq $0,%rdx
1211 addq %r12,%r11
1212 movq %rdx,%r12
1213 adcq $0,%r12
1214
1215 mulq %rbx
1216 addq %rax,%r13
1217 movq 48(%rsi),%rax
1218 adcq $0,%rdx
1219 addq %r13,%r12
1220 movq %rdx,%r13
1221 adcq $0,%r13
1222
1223 mulq %rbx
1224 addq %rax,%r14
1225 movq 56(%rsi),%rax
1226 adcq $0,%rdx
1227 addq %r14,%r13
1228 movq %rdx,%r14
1229 leaq 8(%rbp),%rbp
1230 adcq $0,%r14
1231
1232 mulq %rbx
1233 addq %rax,%r15
1234 movq (%rsi),%rax
1235 adcq $0,%rdx
1236 addq %r15,%r14
1237 movq %rdx,%r15
1238 adcq $0,%r15
1239
1240 leaq 8(%rdi),%rdi
1241
1242 decl %ecx
1243 jnz .Loop_mul
1244
1245 movq %r8,(%rdi)
1246 movq %r9,8(%rdi)
1247 movq %r10,16(%rdi)
1248 movq %r11,24(%rdi)
1249 movq %r12,32(%rdi)
1250 movq %r13,40(%rdi)
1251 movq %r14,48(%rdi)
1252 movq %r15,56(%rdi)
1253
1254 .byte 0xf3,0xc3
1255.cfi_endproc
1256.size __rsaz_512_mul,.-__rsaz_512_mul
1257.globl rsaz_512_scatter4
1258.type rsaz_512_scatter4,@function
1259.align 16
1260rsaz_512_scatter4:
1261.cfi_startproc
1262 leaq (%rdi,%rdx,8),%rdi
1263 movl $8,%r9d
1264 jmp .Loop_scatter
1265.align 16
1266.Loop_scatter:
1267 movq (%rsi),%rax
1268 leaq 8(%rsi),%rsi
1269 movq %rax,(%rdi)
1270 leaq 128(%rdi),%rdi
1271 decl %r9d
1272 jnz .Loop_scatter
1273 .byte 0xf3,0xc3
1274.cfi_endproc
1275.size rsaz_512_scatter4,.-rsaz_512_scatter4
1276
1277.globl rsaz_512_gather4
1278.type rsaz_512_gather4,@function
1279.align 16
1280rsaz_512_gather4:
1281.cfi_startproc
1282 movd %edx,%xmm8
1283 movdqa .Linc+16(%rip),%xmm1
1284 movdqa .Linc(%rip),%xmm0
1285
1286 pshufd $0,%xmm8,%xmm8
1287 movdqa %xmm1,%xmm7
1288 movdqa %xmm1,%xmm2
1289 paddd %xmm0,%xmm1
1290 pcmpeqd %xmm8,%xmm0
1291 movdqa %xmm7,%xmm3
1292 paddd %xmm1,%xmm2
1293 pcmpeqd %xmm8,%xmm1
1294 movdqa %xmm7,%xmm4
1295 paddd %xmm2,%xmm3
1296 pcmpeqd %xmm8,%xmm2
1297 movdqa %xmm7,%xmm5
1298 paddd %xmm3,%xmm4
1299 pcmpeqd %xmm8,%xmm3
1300 movdqa %xmm7,%xmm6
1301 paddd %xmm4,%xmm5
1302 pcmpeqd %xmm8,%xmm4
1303 paddd %xmm5,%xmm6
1304 pcmpeqd %xmm8,%xmm5
1305 paddd %xmm6,%xmm7
1306 pcmpeqd %xmm8,%xmm6
1307 pcmpeqd %xmm8,%xmm7
1308 movl $8,%r9d
1309 jmp .Loop_gather
1310.align 16
1311.Loop_gather:
1312 movdqa 0(%rsi),%xmm8
1313 movdqa 16(%rsi),%xmm9
1314 movdqa 32(%rsi),%xmm10
1315 movdqa 48(%rsi),%xmm11
1316 pand %xmm0,%xmm8
1317 movdqa 64(%rsi),%xmm12
1318 pand %xmm1,%xmm9
1319 movdqa 80(%rsi),%xmm13
1320 pand %xmm2,%xmm10
1321 movdqa 96(%rsi),%xmm14
1322 pand %xmm3,%xmm11
1323 movdqa 112(%rsi),%xmm15
1324 leaq 128(%rsi),%rsi
1325 pand %xmm4,%xmm12
1326 pand %xmm5,%xmm13
1327 pand %xmm6,%xmm14
1328 pand %xmm7,%xmm15
1329 por %xmm10,%xmm8
1330 por %xmm11,%xmm9
1331 por %xmm12,%xmm8
1332 por %xmm13,%xmm9
1333 por %xmm14,%xmm8
1334 por %xmm15,%xmm9
1335
1336 por %xmm9,%xmm8
1337 pshufd $0x4e,%xmm8,%xmm9
1338 por %xmm9,%xmm8
1339 movq %xmm8,(%rdi)
1340 leaq 8(%rdi),%rdi
1341 decl %r9d
1342 jnz .Loop_gather
1343 .byte 0xf3,0xc3
1344.LSEH_end_rsaz_512_gather4:
1345.cfi_endproc
1346.size rsaz_512_gather4,.-rsaz_512_gather4
1347
1348.align 64
1349.Linc:
1350.long 0,0, 1,1
1351.long 2,2, 2,2
1352 .section ".note.gnu.property", "a"
1353 .p2align 3
1354 .long 1f - 0f
1355 .long 4f - 1f
1356 .long 5
13570:
1358 # "GNU" encoded with .byte, since .asciz isn't supported
1359 # on Solaris.
1360 .byte 0x47
1361 .byte 0x4e
1362 .byte 0x55
1363 .byte 0
13641:
1365 .p2align 3
1366 .long 0xc0000002
1367 .long 3f - 2f
13682:
1369 .long 3
13703:
1371 .p2align 3
13724:
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette