VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1j/crypto/genasm-elf/rsaz-x86_64.S@ 88461

Last change on this file since 88461 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 20.7 KB
Line 
1.text
2
3
4
5.globl rsaz_512_sqr
6.type rsaz_512_sqr,@function
7.align 32
8rsaz_512_sqr:
9.cfi_startproc
10 pushq %rbx
11.cfi_adjust_cfa_offset 8
12.cfi_offset %rbx,-16
13 pushq %rbp
14.cfi_adjust_cfa_offset 8
15.cfi_offset %rbp,-24
16 pushq %r12
17.cfi_adjust_cfa_offset 8
18.cfi_offset %r12,-32
19 pushq %r13
20.cfi_adjust_cfa_offset 8
21.cfi_offset %r13,-40
22 pushq %r14
23.cfi_adjust_cfa_offset 8
24.cfi_offset %r14,-48
25 pushq %r15
26.cfi_adjust_cfa_offset 8
27.cfi_offset %r15,-56
28
29 subq $128+24,%rsp
30.cfi_adjust_cfa_offset 128+24
31.Lsqr_body:
32 movq %rdx,%rbp
33 movq (%rsi),%rdx
34 movq 8(%rsi),%rax
35 movq %rcx,128(%rsp)
36 jmp .Loop_sqr
37
38.align 32
39.Loop_sqr:
40 movl %r8d,128+8(%rsp)
41
42 movq %rdx,%rbx
43 mulq %rdx
44 movq %rax,%r8
45 movq 16(%rsi),%rax
46 movq %rdx,%r9
47
48 mulq %rbx
49 addq %rax,%r9
50 movq 24(%rsi),%rax
51 movq %rdx,%r10
52 adcq $0,%r10
53
54 mulq %rbx
55 addq %rax,%r10
56 movq 32(%rsi),%rax
57 movq %rdx,%r11
58 adcq $0,%r11
59
60 mulq %rbx
61 addq %rax,%r11
62 movq 40(%rsi),%rax
63 movq %rdx,%r12
64 adcq $0,%r12
65
66 mulq %rbx
67 addq %rax,%r12
68 movq 48(%rsi),%rax
69 movq %rdx,%r13
70 adcq $0,%r13
71
72 mulq %rbx
73 addq %rax,%r13
74 movq 56(%rsi),%rax
75 movq %rdx,%r14
76 adcq $0,%r14
77
78 mulq %rbx
79 addq %rax,%r14
80 movq %rbx,%rax
81 movq %rdx,%r15
82 adcq $0,%r15
83
84 addq %r8,%r8
85 movq %r9,%rcx
86 adcq %r9,%r9
87
88 mulq %rax
89 movq %rax,(%rsp)
90 addq %rdx,%r8
91 adcq $0,%r9
92
93 movq %r8,8(%rsp)
94 shrq $63,%rcx
95
96
97 movq 8(%rsi),%r8
98 movq 16(%rsi),%rax
99 mulq %r8
100 addq %rax,%r10
101 movq 24(%rsi),%rax
102 movq %rdx,%rbx
103 adcq $0,%rbx
104
105 mulq %r8
106 addq %rax,%r11
107 movq 32(%rsi),%rax
108 adcq $0,%rdx
109 addq %rbx,%r11
110 movq %rdx,%rbx
111 adcq $0,%rbx
112
113 mulq %r8
114 addq %rax,%r12
115 movq 40(%rsi),%rax
116 adcq $0,%rdx
117 addq %rbx,%r12
118 movq %rdx,%rbx
119 adcq $0,%rbx
120
121 mulq %r8
122 addq %rax,%r13
123 movq 48(%rsi),%rax
124 adcq $0,%rdx
125 addq %rbx,%r13
126 movq %rdx,%rbx
127 adcq $0,%rbx
128
129 mulq %r8
130 addq %rax,%r14
131 movq 56(%rsi),%rax
132 adcq $0,%rdx
133 addq %rbx,%r14
134 movq %rdx,%rbx
135 adcq $0,%rbx
136
137 mulq %r8
138 addq %rax,%r15
139 movq %r8,%rax
140 adcq $0,%rdx
141 addq %rbx,%r15
142 movq %rdx,%r8
143 movq %r10,%rdx
144 adcq $0,%r8
145
146 addq %rdx,%rdx
147 leaq (%rcx,%r10,2),%r10
148 movq %r11,%rbx
149 adcq %r11,%r11
150
151 mulq %rax
152 addq %rax,%r9
153 adcq %rdx,%r10
154 adcq $0,%r11
155
156 movq %r9,16(%rsp)
157 movq %r10,24(%rsp)
158 shrq $63,%rbx
159
160
161 movq 16(%rsi),%r9
162 movq 24(%rsi),%rax
163 mulq %r9
164 addq %rax,%r12
165 movq 32(%rsi),%rax
166 movq %rdx,%rcx
167 adcq $0,%rcx
168
169 mulq %r9
170 addq %rax,%r13
171 movq 40(%rsi),%rax
172 adcq $0,%rdx
173 addq %rcx,%r13
174 movq %rdx,%rcx
175 adcq $0,%rcx
176
177 mulq %r9
178 addq %rax,%r14
179 movq 48(%rsi),%rax
180 adcq $0,%rdx
181 addq %rcx,%r14
182 movq %rdx,%rcx
183 adcq $0,%rcx
184
185 mulq %r9
186 movq %r12,%r10
187 leaq (%rbx,%r12,2),%r12
188 addq %rax,%r15
189 movq 56(%rsi),%rax
190 adcq $0,%rdx
191 addq %rcx,%r15
192 movq %rdx,%rcx
193 adcq $0,%rcx
194
195 mulq %r9
196 shrq $63,%r10
197 addq %rax,%r8
198 movq %r9,%rax
199 adcq $0,%rdx
200 addq %rcx,%r8
201 movq %rdx,%r9
202 adcq $0,%r9
203
204 movq %r13,%rcx
205 leaq (%r10,%r13,2),%r13
206
207 mulq %rax
208 addq %rax,%r11
209 adcq %rdx,%r12
210 adcq $0,%r13
211
212 movq %r11,32(%rsp)
213 movq %r12,40(%rsp)
214 shrq $63,%rcx
215
216
217 movq 24(%rsi),%r10
218 movq 32(%rsi),%rax
219 mulq %r10
220 addq %rax,%r14
221 movq 40(%rsi),%rax
222 movq %rdx,%rbx
223 adcq $0,%rbx
224
225 mulq %r10
226 addq %rax,%r15
227 movq 48(%rsi),%rax
228 adcq $0,%rdx
229 addq %rbx,%r15
230 movq %rdx,%rbx
231 adcq $0,%rbx
232
233 mulq %r10
234 movq %r14,%r12
235 leaq (%rcx,%r14,2),%r14
236 addq %rax,%r8
237 movq 56(%rsi),%rax
238 adcq $0,%rdx
239 addq %rbx,%r8
240 movq %rdx,%rbx
241 adcq $0,%rbx
242
243 mulq %r10
244 shrq $63,%r12
245 addq %rax,%r9
246 movq %r10,%rax
247 adcq $0,%rdx
248 addq %rbx,%r9
249 movq %rdx,%r10
250 adcq $0,%r10
251
252 movq %r15,%rbx
253 leaq (%r12,%r15,2),%r15
254
255 mulq %rax
256 addq %rax,%r13
257 adcq %rdx,%r14
258 adcq $0,%r15
259
260 movq %r13,48(%rsp)
261 movq %r14,56(%rsp)
262 shrq $63,%rbx
263
264
265 movq 32(%rsi),%r11
266 movq 40(%rsi),%rax
267 mulq %r11
268 addq %rax,%r8
269 movq 48(%rsi),%rax
270 movq %rdx,%rcx
271 adcq $0,%rcx
272
273 mulq %r11
274 addq %rax,%r9
275 movq 56(%rsi),%rax
276 adcq $0,%rdx
277 movq %r8,%r12
278 leaq (%rbx,%r8,2),%r8
279 addq %rcx,%r9
280 movq %rdx,%rcx
281 adcq $0,%rcx
282
283 mulq %r11
284 shrq $63,%r12
285 addq %rax,%r10
286 movq %r11,%rax
287 adcq $0,%rdx
288 addq %rcx,%r10
289 movq %rdx,%r11
290 adcq $0,%r11
291
292 movq %r9,%rcx
293 leaq (%r12,%r9,2),%r9
294
295 mulq %rax
296 addq %rax,%r15
297 adcq %rdx,%r8
298 adcq $0,%r9
299
300 movq %r15,64(%rsp)
301 movq %r8,72(%rsp)
302 shrq $63,%rcx
303
304
305 movq 40(%rsi),%r12
306 movq 48(%rsi),%rax
307 mulq %r12
308 addq %rax,%r10
309 movq 56(%rsi),%rax
310 movq %rdx,%rbx
311 adcq $0,%rbx
312
313 mulq %r12
314 addq %rax,%r11
315 movq %r12,%rax
316 movq %r10,%r15
317 leaq (%rcx,%r10,2),%r10
318 adcq $0,%rdx
319 shrq $63,%r15
320 addq %rbx,%r11
321 movq %rdx,%r12
322 adcq $0,%r12
323
324 movq %r11,%rbx
325 leaq (%r15,%r11,2),%r11
326
327 mulq %rax
328 addq %rax,%r9
329 adcq %rdx,%r10
330 adcq $0,%r11
331
332 movq %r9,80(%rsp)
333 movq %r10,88(%rsp)
334
335
336 movq 48(%rsi),%r13
337 movq 56(%rsi),%rax
338 mulq %r13
339 addq %rax,%r12
340 movq %r13,%rax
341 movq %rdx,%r13
342 adcq $0,%r13
343
344 xorq %r14,%r14
345 shlq $1,%rbx
346 adcq %r12,%r12
347 adcq %r13,%r13
348 adcq %r14,%r14
349
350 mulq %rax
351 addq %rax,%r11
352 adcq %rdx,%r12
353 adcq $0,%r13
354
355 movq %r11,96(%rsp)
356 movq %r12,104(%rsp)
357
358
359 movq 56(%rsi),%rax
360 mulq %rax
361 addq %rax,%r13
362 adcq $0,%rdx
363
364 addq %rdx,%r14
365
366 movq %r13,112(%rsp)
367 movq %r14,120(%rsp)
368
369 movq (%rsp),%r8
370 movq 8(%rsp),%r9
371 movq 16(%rsp),%r10
372 movq 24(%rsp),%r11
373 movq 32(%rsp),%r12
374 movq 40(%rsp),%r13
375 movq 48(%rsp),%r14
376 movq 56(%rsp),%r15
377
378 call __rsaz_512_reduce
379
380 addq 64(%rsp),%r8
381 adcq 72(%rsp),%r9
382 adcq 80(%rsp),%r10
383 adcq 88(%rsp),%r11
384 adcq 96(%rsp),%r12
385 adcq 104(%rsp),%r13
386 adcq 112(%rsp),%r14
387 adcq 120(%rsp),%r15
388 sbbq %rcx,%rcx
389
390 call __rsaz_512_subtract
391
392 movq %r8,%rdx
393 movq %r9,%rax
394 movl 128+8(%rsp),%r8d
395 movq %rdi,%rsi
396
397 decl %r8d
398 jnz .Loop_sqr
399
400 leaq 128+24+48(%rsp),%rax
401.cfi_def_cfa %rax,8
402 movq -48(%rax),%r15
403.cfi_restore %r15
404 movq -40(%rax),%r14
405.cfi_restore %r14
406 movq -32(%rax),%r13
407.cfi_restore %r13
408 movq -24(%rax),%r12
409.cfi_restore %r12
410 movq -16(%rax),%rbp
411.cfi_restore %rbp
412 movq -8(%rax),%rbx
413.cfi_restore %rbx
414 leaq (%rax),%rsp
415.cfi_def_cfa_register %rsp
416.Lsqr_epilogue:
417 .byte 0xf3,0xc3
418.cfi_endproc
419.size rsaz_512_sqr,.-rsaz_512_sqr
420.globl rsaz_512_mul
421.type rsaz_512_mul,@function
422.align 32
423rsaz_512_mul:
424.cfi_startproc
425 pushq %rbx
426.cfi_adjust_cfa_offset 8
427.cfi_offset %rbx,-16
428 pushq %rbp
429.cfi_adjust_cfa_offset 8
430.cfi_offset %rbp,-24
431 pushq %r12
432.cfi_adjust_cfa_offset 8
433.cfi_offset %r12,-32
434 pushq %r13
435.cfi_adjust_cfa_offset 8
436.cfi_offset %r13,-40
437 pushq %r14
438.cfi_adjust_cfa_offset 8
439.cfi_offset %r14,-48
440 pushq %r15
441.cfi_adjust_cfa_offset 8
442.cfi_offset %r15,-56
443
444 subq $128+24,%rsp
445.cfi_adjust_cfa_offset 128+24
446.Lmul_body:
447.byte 102,72,15,110,199
448.byte 102,72,15,110,201
449 movq %r8,128(%rsp)
450 movq (%rdx),%rbx
451 movq %rdx,%rbp
452 call __rsaz_512_mul
453
454.byte 102,72,15,126,199
455.byte 102,72,15,126,205
456
457 movq (%rsp),%r8
458 movq 8(%rsp),%r9
459 movq 16(%rsp),%r10
460 movq 24(%rsp),%r11
461 movq 32(%rsp),%r12
462 movq 40(%rsp),%r13
463 movq 48(%rsp),%r14
464 movq 56(%rsp),%r15
465
466 call __rsaz_512_reduce
467 addq 64(%rsp),%r8
468 adcq 72(%rsp),%r9
469 adcq 80(%rsp),%r10
470 adcq 88(%rsp),%r11
471 adcq 96(%rsp),%r12
472 adcq 104(%rsp),%r13
473 adcq 112(%rsp),%r14
474 adcq 120(%rsp),%r15
475 sbbq %rcx,%rcx
476
477 call __rsaz_512_subtract
478
479 leaq 128+24+48(%rsp),%rax
480.cfi_def_cfa %rax,8
481 movq -48(%rax),%r15
482.cfi_restore %r15
483 movq -40(%rax),%r14
484.cfi_restore %r14
485 movq -32(%rax),%r13
486.cfi_restore %r13
487 movq -24(%rax),%r12
488.cfi_restore %r12
489 movq -16(%rax),%rbp
490.cfi_restore %rbp
491 movq -8(%rax),%rbx
492.cfi_restore %rbx
493 leaq (%rax),%rsp
494.cfi_def_cfa_register %rsp
495.Lmul_epilogue:
496 .byte 0xf3,0xc3
497.cfi_endproc
498.size rsaz_512_mul,.-rsaz_512_mul
499.globl rsaz_512_mul_gather4
500.type rsaz_512_mul_gather4,@function
501.align 32
502rsaz_512_mul_gather4:
503.cfi_startproc
504 pushq %rbx
505.cfi_adjust_cfa_offset 8
506.cfi_offset %rbx,-16
507 pushq %rbp
508.cfi_adjust_cfa_offset 8
509.cfi_offset %rbp,-24
510 pushq %r12
511.cfi_adjust_cfa_offset 8
512.cfi_offset %r12,-32
513 pushq %r13
514.cfi_adjust_cfa_offset 8
515.cfi_offset %r13,-40
516 pushq %r14
517.cfi_adjust_cfa_offset 8
518.cfi_offset %r14,-48
519 pushq %r15
520.cfi_adjust_cfa_offset 8
521.cfi_offset %r15,-56
522
523 subq $152,%rsp
524.cfi_adjust_cfa_offset 152
525.Lmul_gather4_body:
526 movd %r9d,%xmm8
527 movdqa .Linc+16(%rip),%xmm1
528 movdqa .Linc(%rip),%xmm0
529
530 pshufd $0,%xmm8,%xmm8
531 movdqa %xmm1,%xmm7
532 movdqa %xmm1,%xmm2
533 paddd %xmm0,%xmm1
534 pcmpeqd %xmm8,%xmm0
535 movdqa %xmm7,%xmm3
536 paddd %xmm1,%xmm2
537 pcmpeqd %xmm8,%xmm1
538 movdqa %xmm7,%xmm4
539 paddd %xmm2,%xmm3
540 pcmpeqd %xmm8,%xmm2
541 movdqa %xmm7,%xmm5
542 paddd %xmm3,%xmm4
543 pcmpeqd %xmm8,%xmm3
544 movdqa %xmm7,%xmm6
545 paddd %xmm4,%xmm5
546 pcmpeqd %xmm8,%xmm4
547 paddd %xmm5,%xmm6
548 pcmpeqd %xmm8,%xmm5
549 paddd %xmm6,%xmm7
550 pcmpeqd %xmm8,%xmm6
551 pcmpeqd %xmm8,%xmm7
552
553 movdqa 0(%rdx),%xmm8
554 movdqa 16(%rdx),%xmm9
555 movdqa 32(%rdx),%xmm10
556 movdqa 48(%rdx),%xmm11
557 pand %xmm0,%xmm8
558 movdqa 64(%rdx),%xmm12
559 pand %xmm1,%xmm9
560 movdqa 80(%rdx),%xmm13
561 pand %xmm2,%xmm10
562 movdqa 96(%rdx),%xmm14
563 pand %xmm3,%xmm11
564 movdqa 112(%rdx),%xmm15
565 leaq 128(%rdx),%rbp
566 pand %xmm4,%xmm12
567 pand %xmm5,%xmm13
568 pand %xmm6,%xmm14
569 pand %xmm7,%xmm15
570 por %xmm10,%xmm8
571 por %xmm11,%xmm9
572 por %xmm12,%xmm8
573 por %xmm13,%xmm9
574 por %xmm14,%xmm8
575 por %xmm15,%xmm9
576
577 por %xmm9,%xmm8
578 pshufd $0x4e,%xmm8,%xmm9
579 por %xmm9,%xmm8
580.byte 102,76,15,126,195
581
582 movq %r8,128(%rsp)
583 movq %rdi,128+8(%rsp)
584 movq %rcx,128+16(%rsp)
585
586 movq (%rsi),%rax
587 movq 8(%rsi),%rcx
588 mulq %rbx
589 movq %rax,(%rsp)
590 movq %rcx,%rax
591 movq %rdx,%r8
592
593 mulq %rbx
594 addq %rax,%r8
595 movq 16(%rsi),%rax
596 movq %rdx,%r9
597 adcq $0,%r9
598
599 mulq %rbx
600 addq %rax,%r9
601 movq 24(%rsi),%rax
602 movq %rdx,%r10
603 adcq $0,%r10
604
605 mulq %rbx
606 addq %rax,%r10
607 movq 32(%rsi),%rax
608 movq %rdx,%r11
609 adcq $0,%r11
610
611 mulq %rbx
612 addq %rax,%r11
613 movq 40(%rsi),%rax
614 movq %rdx,%r12
615 adcq $0,%r12
616
617 mulq %rbx
618 addq %rax,%r12
619 movq 48(%rsi),%rax
620 movq %rdx,%r13
621 adcq $0,%r13
622
623 mulq %rbx
624 addq %rax,%r13
625 movq 56(%rsi),%rax
626 movq %rdx,%r14
627 adcq $0,%r14
628
629 mulq %rbx
630 addq %rax,%r14
631 movq (%rsi),%rax
632 movq %rdx,%r15
633 adcq $0,%r15
634
635 leaq 8(%rsp),%rdi
636 movl $7,%ecx
637 jmp .Loop_mul_gather
638
639.align 32
640.Loop_mul_gather:
641 movdqa 0(%rbp),%xmm8
642 movdqa 16(%rbp),%xmm9
643 movdqa 32(%rbp),%xmm10
644 movdqa 48(%rbp),%xmm11
645 pand %xmm0,%xmm8
646 movdqa 64(%rbp),%xmm12
647 pand %xmm1,%xmm9
648 movdqa 80(%rbp),%xmm13
649 pand %xmm2,%xmm10
650 movdqa 96(%rbp),%xmm14
651 pand %xmm3,%xmm11
652 movdqa 112(%rbp),%xmm15
653 leaq 128(%rbp),%rbp
654 pand %xmm4,%xmm12
655 pand %xmm5,%xmm13
656 pand %xmm6,%xmm14
657 pand %xmm7,%xmm15
658 por %xmm10,%xmm8
659 por %xmm11,%xmm9
660 por %xmm12,%xmm8
661 por %xmm13,%xmm9
662 por %xmm14,%xmm8
663 por %xmm15,%xmm9
664
665 por %xmm9,%xmm8
666 pshufd $0x4e,%xmm8,%xmm9
667 por %xmm9,%xmm8
668.byte 102,76,15,126,195
669
670 mulq %rbx
671 addq %rax,%r8
672 movq 8(%rsi),%rax
673 movq %r8,(%rdi)
674 movq %rdx,%r8
675 adcq $0,%r8
676
677 mulq %rbx
678 addq %rax,%r9
679 movq 16(%rsi),%rax
680 adcq $0,%rdx
681 addq %r9,%r8
682 movq %rdx,%r9
683 adcq $0,%r9
684
685 mulq %rbx
686 addq %rax,%r10
687 movq 24(%rsi),%rax
688 adcq $0,%rdx
689 addq %r10,%r9
690 movq %rdx,%r10
691 adcq $0,%r10
692
693 mulq %rbx
694 addq %rax,%r11
695 movq 32(%rsi),%rax
696 adcq $0,%rdx
697 addq %r11,%r10
698 movq %rdx,%r11
699 adcq $0,%r11
700
701 mulq %rbx
702 addq %rax,%r12
703 movq 40(%rsi),%rax
704 adcq $0,%rdx
705 addq %r12,%r11
706 movq %rdx,%r12
707 adcq $0,%r12
708
709 mulq %rbx
710 addq %rax,%r13
711 movq 48(%rsi),%rax
712 adcq $0,%rdx
713 addq %r13,%r12
714 movq %rdx,%r13
715 adcq $0,%r13
716
717 mulq %rbx
718 addq %rax,%r14
719 movq 56(%rsi),%rax
720 adcq $0,%rdx
721 addq %r14,%r13
722 movq %rdx,%r14
723 adcq $0,%r14
724
725 mulq %rbx
726 addq %rax,%r15
727 movq (%rsi),%rax
728 adcq $0,%rdx
729 addq %r15,%r14
730 movq %rdx,%r15
731 adcq $0,%r15
732
733 leaq 8(%rdi),%rdi
734
735 decl %ecx
736 jnz .Loop_mul_gather
737
738 movq %r8,(%rdi)
739 movq %r9,8(%rdi)
740 movq %r10,16(%rdi)
741 movq %r11,24(%rdi)
742 movq %r12,32(%rdi)
743 movq %r13,40(%rdi)
744 movq %r14,48(%rdi)
745 movq %r15,56(%rdi)
746
747 movq 128+8(%rsp),%rdi
748 movq 128+16(%rsp),%rbp
749
750 movq (%rsp),%r8
751 movq 8(%rsp),%r9
752 movq 16(%rsp),%r10
753 movq 24(%rsp),%r11
754 movq 32(%rsp),%r12
755 movq 40(%rsp),%r13
756 movq 48(%rsp),%r14
757 movq 56(%rsp),%r15
758
759 call __rsaz_512_reduce
760 addq 64(%rsp),%r8
761 adcq 72(%rsp),%r9
762 adcq 80(%rsp),%r10
763 adcq 88(%rsp),%r11
764 adcq 96(%rsp),%r12
765 adcq 104(%rsp),%r13
766 adcq 112(%rsp),%r14
767 adcq 120(%rsp),%r15
768 sbbq %rcx,%rcx
769
770 call __rsaz_512_subtract
771
772 leaq 128+24+48(%rsp),%rax
773.cfi_def_cfa %rax,8
774 movq -48(%rax),%r15
775.cfi_restore %r15
776 movq -40(%rax),%r14
777.cfi_restore %r14
778 movq -32(%rax),%r13
779.cfi_restore %r13
780 movq -24(%rax),%r12
781.cfi_restore %r12
782 movq -16(%rax),%rbp
783.cfi_restore %rbp
784 movq -8(%rax),%rbx
785.cfi_restore %rbx
786 leaq (%rax),%rsp
787.cfi_def_cfa_register %rsp
788.Lmul_gather4_epilogue:
789 .byte 0xf3,0xc3
790.cfi_endproc
791.size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
792.globl rsaz_512_mul_scatter4
793.type rsaz_512_mul_scatter4,@function
794.align 32
795rsaz_512_mul_scatter4:
796.cfi_startproc
797 pushq %rbx
798.cfi_adjust_cfa_offset 8
799.cfi_offset %rbx,-16
800 pushq %rbp
801.cfi_adjust_cfa_offset 8
802.cfi_offset %rbp,-24
803 pushq %r12
804.cfi_adjust_cfa_offset 8
805.cfi_offset %r12,-32
806 pushq %r13
807.cfi_adjust_cfa_offset 8
808.cfi_offset %r13,-40
809 pushq %r14
810.cfi_adjust_cfa_offset 8
811.cfi_offset %r14,-48
812 pushq %r15
813.cfi_adjust_cfa_offset 8
814.cfi_offset %r15,-56
815
816 movl %r9d,%r9d
817 subq $128+24,%rsp
818.cfi_adjust_cfa_offset 128+24
819.Lmul_scatter4_body:
820 leaq (%r8,%r9,8),%r8
821.byte 102,72,15,110,199
822.byte 102,72,15,110,202
823.byte 102,73,15,110,208
824 movq %rcx,128(%rsp)
825
826 movq %rdi,%rbp
827 movq (%rdi),%rbx
828 call __rsaz_512_mul
829
830.byte 102,72,15,126,199
831.byte 102,72,15,126,205
832
833 movq (%rsp),%r8
834 movq 8(%rsp),%r9
835 movq 16(%rsp),%r10
836 movq 24(%rsp),%r11
837 movq 32(%rsp),%r12
838 movq 40(%rsp),%r13
839 movq 48(%rsp),%r14
840 movq 56(%rsp),%r15
841
842 call __rsaz_512_reduce
843 addq 64(%rsp),%r8
844 adcq 72(%rsp),%r9
845 adcq 80(%rsp),%r10
846 adcq 88(%rsp),%r11
847 adcq 96(%rsp),%r12
848 adcq 104(%rsp),%r13
849 adcq 112(%rsp),%r14
850 adcq 120(%rsp),%r15
851.byte 102,72,15,126,214
852 sbbq %rcx,%rcx
853
854 call __rsaz_512_subtract
855
856 movq %r8,0(%rsi)
857 movq %r9,128(%rsi)
858 movq %r10,256(%rsi)
859 movq %r11,384(%rsi)
860 movq %r12,512(%rsi)
861 movq %r13,640(%rsi)
862 movq %r14,768(%rsi)
863 movq %r15,896(%rsi)
864
865 leaq 128+24+48(%rsp),%rax
866.cfi_def_cfa %rax,8
867 movq -48(%rax),%r15
868.cfi_restore %r15
869 movq -40(%rax),%r14
870.cfi_restore %r14
871 movq -32(%rax),%r13
872.cfi_restore %r13
873 movq -24(%rax),%r12
874.cfi_restore %r12
875 movq -16(%rax),%rbp
876.cfi_restore %rbp
877 movq -8(%rax),%rbx
878.cfi_restore %rbx
879 leaq (%rax),%rsp
880.cfi_def_cfa_register %rsp
881.Lmul_scatter4_epilogue:
882 .byte 0xf3,0xc3
883.cfi_endproc
884.size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
885.globl rsaz_512_mul_by_one
886.type rsaz_512_mul_by_one,@function
887.align 32
888rsaz_512_mul_by_one:
889.cfi_startproc
890 pushq %rbx
891.cfi_adjust_cfa_offset 8
892.cfi_offset %rbx,-16
893 pushq %rbp
894.cfi_adjust_cfa_offset 8
895.cfi_offset %rbp,-24
896 pushq %r12
897.cfi_adjust_cfa_offset 8
898.cfi_offset %r12,-32
899 pushq %r13
900.cfi_adjust_cfa_offset 8
901.cfi_offset %r13,-40
902 pushq %r14
903.cfi_adjust_cfa_offset 8
904.cfi_offset %r14,-48
905 pushq %r15
906.cfi_adjust_cfa_offset 8
907.cfi_offset %r15,-56
908
909 subq $128+24,%rsp
910.cfi_adjust_cfa_offset 128+24
911.Lmul_by_one_body:
912 movq %rdx,%rbp
913 movq %rcx,128(%rsp)
914
915 movq (%rsi),%r8
916 pxor %xmm0,%xmm0
917 movq 8(%rsi),%r9
918 movq 16(%rsi),%r10
919 movq 24(%rsi),%r11
920 movq 32(%rsi),%r12
921 movq 40(%rsi),%r13
922 movq 48(%rsi),%r14
923 movq 56(%rsi),%r15
924
925 movdqa %xmm0,(%rsp)
926 movdqa %xmm0,16(%rsp)
927 movdqa %xmm0,32(%rsp)
928 movdqa %xmm0,48(%rsp)
929 movdqa %xmm0,64(%rsp)
930 movdqa %xmm0,80(%rsp)
931 movdqa %xmm0,96(%rsp)
932 call __rsaz_512_reduce
933 movq %r8,(%rdi)
934 movq %r9,8(%rdi)
935 movq %r10,16(%rdi)
936 movq %r11,24(%rdi)
937 movq %r12,32(%rdi)
938 movq %r13,40(%rdi)
939 movq %r14,48(%rdi)
940 movq %r15,56(%rdi)
941
942 leaq 128+24+48(%rsp),%rax
943.cfi_def_cfa %rax,8
944 movq -48(%rax),%r15
945.cfi_restore %r15
946 movq -40(%rax),%r14
947.cfi_restore %r14
948 movq -32(%rax),%r13
949.cfi_restore %r13
950 movq -24(%rax),%r12
951.cfi_restore %r12
952 movq -16(%rax),%rbp
953.cfi_restore %rbp
954 movq -8(%rax),%rbx
955.cfi_restore %rbx
956 leaq (%rax),%rsp
957.cfi_def_cfa_register %rsp
958.Lmul_by_one_epilogue:
959 .byte 0xf3,0xc3
960.cfi_endproc
961.size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
962.type __rsaz_512_reduce,@function
963.align 32
964__rsaz_512_reduce:
965 movq %r8,%rbx
966 imulq 128+8(%rsp),%rbx
967 movq 0(%rbp),%rax
968 movl $8,%ecx
969 jmp .Lreduction_loop
970
971.align 32
972.Lreduction_loop:
973 mulq %rbx
974 movq 8(%rbp),%rax
975 negq %r8
976 movq %rdx,%r8
977 adcq $0,%r8
978
979 mulq %rbx
980 addq %rax,%r9
981 movq 16(%rbp),%rax
982 adcq $0,%rdx
983 addq %r9,%r8
984 movq %rdx,%r9
985 adcq $0,%r9
986
987 mulq %rbx
988 addq %rax,%r10
989 movq 24(%rbp),%rax
990 adcq $0,%rdx
991 addq %r10,%r9
992 movq %rdx,%r10
993 adcq $0,%r10
994
995 mulq %rbx
996 addq %rax,%r11
997 movq 32(%rbp),%rax
998 adcq $0,%rdx
999 addq %r11,%r10
1000 movq 128+8(%rsp),%rsi
1001
1002
1003 adcq $0,%rdx
1004 movq %rdx,%r11
1005
1006 mulq %rbx
1007 addq %rax,%r12
1008 movq 40(%rbp),%rax
1009 adcq $0,%rdx
1010 imulq %r8,%rsi
1011 addq %r12,%r11
1012 movq %rdx,%r12
1013 adcq $0,%r12
1014
1015 mulq %rbx
1016 addq %rax,%r13
1017 movq 48(%rbp),%rax
1018 adcq $0,%rdx
1019 addq %r13,%r12
1020 movq %rdx,%r13
1021 adcq $0,%r13
1022
1023 mulq %rbx
1024 addq %rax,%r14
1025 movq 56(%rbp),%rax
1026 adcq $0,%rdx
1027 addq %r14,%r13
1028 movq %rdx,%r14
1029 adcq $0,%r14
1030
1031 mulq %rbx
1032 movq %rsi,%rbx
1033 addq %rax,%r15
1034 movq 0(%rbp),%rax
1035 adcq $0,%rdx
1036 addq %r15,%r14
1037 movq %rdx,%r15
1038 adcq $0,%r15
1039
1040 decl %ecx
1041 jne .Lreduction_loop
1042
1043 .byte 0xf3,0xc3
1044.size __rsaz_512_reduce,.-__rsaz_512_reduce
1045.type __rsaz_512_subtract,@function
1046.align 32
1047__rsaz_512_subtract:
1048 movq %r8,(%rdi)
1049 movq %r9,8(%rdi)
1050 movq %r10,16(%rdi)
1051 movq %r11,24(%rdi)
1052 movq %r12,32(%rdi)
1053 movq %r13,40(%rdi)
1054 movq %r14,48(%rdi)
1055 movq %r15,56(%rdi)
1056
1057 movq 0(%rbp),%r8
1058 movq 8(%rbp),%r9
1059 negq %r8
1060 notq %r9
1061 andq %rcx,%r8
1062 movq 16(%rbp),%r10
1063 andq %rcx,%r9
1064 notq %r10
1065 movq 24(%rbp),%r11
1066 andq %rcx,%r10
1067 notq %r11
1068 movq 32(%rbp),%r12
1069 andq %rcx,%r11
1070 notq %r12
1071 movq 40(%rbp),%r13
1072 andq %rcx,%r12
1073 notq %r13
1074 movq 48(%rbp),%r14
1075 andq %rcx,%r13
1076 notq %r14
1077 movq 56(%rbp),%r15
1078 andq %rcx,%r14
1079 notq %r15
1080 andq %rcx,%r15
1081
1082 addq (%rdi),%r8
1083 adcq 8(%rdi),%r9
1084 adcq 16(%rdi),%r10
1085 adcq 24(%rdi),%r11
1086 adcq 32(%rdi),%r12
1087 adcq 40(%rdi),%r13
1088 adcq 48(%rdi),%r14
1089 adcq 56(%rdi),%r15
1090
1091 movq %r8,(%rdi)
1092 movq %r9,8(%rdi)
1093 movq %r10,16(%rdi)
1094 movq %r11,24(%rdi)
1095 movq %r12,32(%rdi)
1096 movq %r13,40(%rdi)
1097 movq %r14,48(%rdi)
1098 movq %r15,56(%rdi)
1099
1100 .byte 0xf3,0xc3
1101.size __rsaz_512_subtract,.-__rsaz_512_subtract
1102.type __rsaz_512_mul,@function
1103.align 32
1104__rsaz_512_mul:
1105 leaq 8(%rsp),%rdi
1106
1107 movq (%rsi),%rax
1108 mulq %rbx
1109 movq %rax,(%rdi)
1110 movq 8(%rsi),%rax
1111 movq %rdx,%r8
1112
1113 mulq %rbx
1114 addq %rax,%r8
1115 movq 16(%rsi),%rax
1116 movq %rdx,%r9
1117 adcq $0,%r9
1118
1119 mulq %rbx
1120 addq %rax,%r9
1121 movq 24(%rsi),%rax
1122 movq %rdx,%r10
1123 adcq $0,%r10
1124
1125 mulq %rbx
1126 addq %rax,%r10
1127 movq 32(%rsi),%rax
1128 movq %rdx,%r11
1129 adcq $0,%r11
1130
1131 mulq %rbx
1132 addq %rax,%r11
1133 movq 40(%rsi),%rax
1134 movq %rdx,%r12
1135 adcq $0,%r12
1136
1137 mulq %rbx
1138 addq %rax,%r12
1139 movq 48(%rsi),%rax
1140 movq %rdx,%r13
1141 adcq $0,%r13
1142
1143 mulq %rbx
1144 addq %rax,%r13
1145 movq 56(%rsi),%rax
1146 movq %rdx,%r14
1147 adcq $0,%r14
1148
1149 mulq %rbx
1150 addq %rax,%r14
1151 movq (%rsi),%rax
1152 movq %rdx,%r15
1153 adcq $0,%r15
1154
1155 leaq 8(%rbp),%rbp
1156 leaq 8(%rdi),%rdi
1157
1158 movl $7,%ecx
1159 jmp .Loop_mul
1160
1161.align 32
1162.Loop_mul:
1163 movq (%rbp),%rbx
1164 mulq %rbx
1165 addq %rax,%r8
1166 movq 8(%rsi),%rax
1167 movq %r8,(%rdi)
1168 movq %rdx,%r8
1169 adcq $0,%r8
1170
1171 mulq %rbx
1172 addq %rax,%r9
1173 movq 16(%rsi),%rax
1174 adcq $0,%rdx
1175 addq %r9,%r8
1176 movq %rdx,%r9
1177 adcq $0,%r9
1178
1179 mulq %rbx
1180 addq %rax,%r10
1181 movq 24(%rsi),%rax
1182 adcq $0,%rdx
1183 addq %r10,%r9
1184 movq %rdx,%r10
1185 adcq $0,%r10
1186
1187 mulq %rbx
1188 addq %rax,%r11
1189 movq 32(%rsi),%rax
1190 adcq $0,%rdx
1191 addq %r11,%r10
1192 movq %rdx,%r11
1193 adcq $0,%r11
1194
1195 mulq %rbx
1196 addq %rax,%r12
1197 movq 40(%rsi),%rax
1198 adcq $0,%rdx
1199 addq %r12,%r11
1200 movq %rdx,%r12
1201 adcq $0,%r12
1202
1203 mulq %rbx
1204 addq %rax,%r13
1205 movq 48(%rsi),%rax
1206 adcq $0,%rdx
1207 addq %r13,%r12
1208 movq %rdx,%r13
1209 adcq $0,%r13
1210
1211 mulq %rbx
1212 addq %rax,%r14
1213 movq 56(%rsi),%rax
1214 adcq $0,%rdx
1215 addq %r14,%r13
1216 movq %rdx,%r14
1217 leaq 8(%rbp),%rbp
1218 adcq $0,%r14
1219
1220 mulq %rbx
1221 addq %rax,%r15
1222 movq (%rsi),%rax
1223 adcq $0,%rdx
1224 addq %r15,%r14
1225 movq %rdx,%r15
1226 adcq $0,%r15
1227
1228 leaq 8(%rdi),%rdi
1229
1230 decl %ecx
1231 jnz .Loop_mul
1232
1233 movq %r8,(%rdi)
1234 movq %r9,8(%rdi)
1235 movq %r10,16(%rdi)
1236 movq %r11,24(%rdi)
1237 movq %r12,32(%rdi)
1238 movq %r13,40(%rdi)
1239 movq %r14,48(%rdi)
1240 movq %r15,56(%rdi)
1241
1242 .byte 0xf3,0xc3
1243.size __rsaz_512_mul,.-__rsaz_512_mul
1244.globl rsaz_512_scatter4
1245.type rsaz_512_scatter4,@function
1246.align 16
1247rsaz_512_scatter4:
1248 leaq (%rdi,%rdx,8),%rdi
1249 movl $8,%r9d
1250 jmp .Loop_scatter
1251.align 16
1252.Loop_scatter:
1253 movq (%rsi),%rax
1254 leaq 8(%rsi),%rsi
1255 movq %rax,(%rdi)
1256 leaq 128(%rdi),%rdi
1257 decl %r9d
1258 jnz .Loop_scatter
1259 .byte 0xf3,0xc3
1260.size rsaz_512_scatter4,.-rsaz_512_scatter4
1261
1262.globl rsaz_512_gather4
1263.type rsaz_512_gather4,@function
1264.align 16
1265rsaz_512_gather4:
1266 movd %edx,%xmm8
1267 movdqa .Linc+16(%rip),%xmm1
1268 movdqa .Linc(%rip),%xmm0
1269
1270 pshufd $0,%xmm8,%xmm8
1271 movdqa %xmm1,%xmm7
1272 movdqa %xmm1,%xmm2
1273 paddd %xmm0,%xmm1
1274 pcmpeqd %xmm8,%xmm0
1275 movdqa %xmm7,%xmm3
1276 paddd %xmm1,%xmm2
1277 pcmpeqd %xmm8,%xmm1
1278 movdqa %xmm7,%xmm4
1279 paddd %xmm2,%xmm3
1280 pcmpeqd %xmm8,%xmm2
1281 movdqa %xmm7,%xmm5
1282 paddd %xmm3,%xmm4
1283 pcmpeqd %xmm8,%xmm3
1284 movdqa %xmm7,%xmm6
1285 paddd %xmm4,%xmm5
1286 pcmpeqd %xmm8,%xmm4
1287 paddd %xmm5,%xmm6
1288 pcmpeqd %xmm8,%xmm5
1289 paddd %xmm6,%xmm7
1290 pcmpeqd %xmm8,%xmm6
1291 pcmpeqd %xmm8,%xmm7
1292 movl $8,%r9d
1293 jmp .Loop_gather
1294.align 16
1295.Loop_gather:
1296 movdqa 0(%rsi),%xmm8
1297 movdqa 16(%rsi),%xmm9
1298 movdqa 32(%rsi),%xmm10
1299 movdqa 48(%rsi),%xmm11
1300 pand %xmm0,%xmm8
1301 movdqa 64(%rsi),%xmm12
1302 pand %xmm1,%xmm9
1303 movdqa 80(%rsi),%xmm13
1304 pand %xmm2,%xmm10
1305 movdqa 96(%rsi),%xmm14
1306 pand %xmm3,%xmm11
1307 movdqa 112(%rsi),%xmm15
1308 leaq 128(%rsi),%rsi
1309 pand %xmm4,%xmm12
1310 pand %xmm5,%xmm13
1311 pand %xmm6,%xmm14
1312 pand %xmm7,%xmm15
1313 por %xmm10,%xmm8
1314 por %xmm11,%xmm9
1315 por %xmm12,%xmm8
1316 por %xmm13,%xmm9
1317 por %xmm14,%xmm8
1318 por %xmm15,%xmm9
1319
1320 por %xmm9,%xmm8
1321 pshufd $0x4e,%xmm8,%xmm9
1322 por %xmm9,%xmm8
1323 movq %xmm8,(%rdi)
1324 leaq 8(%rdi),%rdi
1325 decl %r9d
1326 jnz .Loop_gather
1327 .byte 0xf3,0xc3
1328.LSEH_end_rsaz_512_gather4:
1329.size rsaz_512_gather4,.-rsaz_512_gather4
1330
1331.align 64
1332.Linc:
1333.long 0,0, 1,1
1334.long 2,2, 2,2
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette