VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1i/crypto/genasm-elf/x86_64-mont.S@ 87083

Last change on this file since 87083 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 13.6 KB
Line 
1.text
2
3
4
5.globl bn_mul_mont
6.type bn_mul_mont,@function
7.align 16
8bn_mul_mont:
9.cfi_startproc
10 movl %r9d,%r9d
11 movq %rsp,%rax
12.cfi_def_cfa_register %rax
13 testl $3,%r9d
14 jnz .Lmul_enter
15 cmpl $8,%r9d
16 jb .Lmul_enter
17 cmpq %rsi,%rdx
18 jne .Lmul4x_enter
19 testl $7,%r9d
20 jz .Lsqr8x_enter
21 jmp .Lmul4x_enter
22
23.align 16
24.Lmul_enter:
25 pushq %rbx
26.cfi_offset %rbx,-16
27 pushq %rbp
28.cfi_offset %rbp,-24
29 pushq %r12
30.cfi_offset %r12,-32
31 pushq %r13
32.cfi_offset %r13,-40
33 pushq %r14
34.cfi_offset %r14,-48
35 pushq %r15
36.cfi_offset %r15,-56
37
38 negq %r9
39 movq %rsp,%r11
40 leaq -16(%rsp,%r9,8),%r10
41 negq %r9
42 andq $-1024,%r10
43
44
45
46
47
48
49
50
51
52 subq %r10,%r11
53 andq $-4096,%r11
54 leaq (%r10,%r11,1),%rsp
55 movq (%rsp),%r11
56 cmpq %r10,%rsp
57 ja .Lmul_page_walk
58 jmp .Lmul_page_walk_done
59
60.align 16
61.Lmul_page_walk:
62 leaq -4096(%rsp),%rsp
63 movq (%rsp),%r11
64 cmpq %r10,%rsp
65 ja .Lmul_page_walk
66.Lmul_page_walk_done:
67
68 movq %rax,8(%rsp,%r9,8)
69.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
70.Lmul_body:
71 movq %rdx,%r12
72 movq (%r8),%r8
73 movq (%r12),%rbx
74 movq (%rsi),%rax
75
76 xorq %r14,%r14
77 xorq %r15,%r15
78
79 movq %r8,%rbp
80 mulq %rbx
81 movq %rax,%r10
82 movq (%rcx),%rax
83
84 imulq %r10,%rbp
85 movq %rdx,%r11
86
87 mulq %rbp
88 addq %rax,%r10
89 movq 8(%rsi),%rax
90 adcq $0,%rdx
91 movq %rdx,%r13
92
93 leaq 1(%r15),%r15
94 jmp .L1st_enter
95
96.align 16
97.L1st:
98 addq %rax,%r13
99 movq (%rsi,%r15,8),%rax
100 adcq $0,%rdx
101 addq %r11,%r13
102 movq %r10,%r11
103 adcq $0,%rdx
104 movq %r13,-16(%rsp,%r15,8)
105 movq %rdx,%r13
106
107.L1st_enter:
108 mulq %rbx
109 addq %rax,%r11
110 movq (%rcx,%r15,8),%rax
111 adcq $0,%rdx
112 leaq 1(%r15),%r15
113 movq %rdx,%r10
114
115 mulq %rbp
116 cmpq %r9,%r15
117 jne .L1st
118
119 addq %rax,%r13
120 movq (%rsi),%rax
121 adcq $0,%rdx
122 addq %r11,%r13
123 adcq $0,%rdx
124 movq %r13,-16(%rsp,%r15,8)
125 movq %rdx,%r13
126 movq %r10,%r11
127
128 xorq %rdx,%rdx
129 addq %r11,%r13
130 adcq $0,%rdx
131 movq %r13,-8(%rsp,%r9,8)
132 movq %rdx,(%rsp,%r9,8)
133
134 leaq 1(%r14),%r14
135 jmp .Louter
136.align 16
137.Louter:
138 movq (%r12,%r14,8),%rbx
139 xorq %r15,%r15
140 movq %r8,%rbp
141 movq (%rsp),%r10
142 mulq %rbx
143 addq %rax,%r10
144 movq (%rcx),%rax
145 adcq $0,%rdx
146
147 imulq %r10,%rbp
148 movq %rdx,%r11
149
150 mulq %rbp
151 addq %rax,%r10
152 movq 8(%rsi),%rax
153 adcq $0,%rdx
154 movq 8(%rsp),%r10
155 movq %rdx,%r13
156
157 leaq 1(%r15),%r15
158 jmp .Linner_enter
159
160.align 16
161.Linner:
162 addq %rax,%r13
163 movq (%rsi,%r15,8),%rax
164 adcq $0,%rdx
165 addq %r10,%r13
166 movq (%rsp,%r15,8),%r10
167 adcq $0,%rdx
168 movq %r13,-16(%rsp,%r15,8)
169 movq %rdx,%r13
170
171.Linner_enter:
172 mulq %rbx
173 addq %rax,%r11
174 movq (%rcx,%r15,8),%rax
175 adcq $0,%rdx
176 addq %r11,%r10
177 movq %rdx,%r11
178 adcq $0,%r11
179 leaq 1(%r15),%r15
180
181 mulq %rbp
182 cmpq %r9,%r15
183 jne .Linner
184
185 addq %rax,%r13
186 movq (%rsi),%rax
187 adcq $0,%rdx
188 addq %r10,%r13
189 movq (%rsp,%r15,8),%r10
190 adcq $0,%rdx
191 movq %r13,-16(%rsp,%r15,8)
192 movq %rdx,%r13
193
194 xorq %rdx,%rdx
195 addq %r11,%r13
196 adcq $0,%rdx
197 addq %r10,%r13
198 adcq $0,%rdx
199 movq %r13,-8(%rsp,%r9,8)
200 movq %rdx,(%rsp,%r9,8)
201
202 leaq 1(%r14),%r14
203 cmpq %r9,%r14
204 jb .Louter
205
206 xorq %r14,%r14
207 movq (%rsp),%rax
208 movq %r9,%r15
209
210.align 16
211.Lsub: sbbq (%rcx,%r14,8),%rax
212 movq %rax,(%rdi,%r14,8)
213 movq 8(%rsp,%r14,8),%rax
214 leaq 1(%r14),%r14
215 decq %r15
216 jnz .Lsub
217
218 sbbq $0,%rax
219 movq $-1,%rbx
220 xorq %rax,%rbx
221 xorq %r14,%r14
222 movq %r9,%r15
223
224.Lcopy:
225 movq (%rdi,%r14,8),%rcx
226 movq (%rsp,%r14,8),%rdx
227 andq %rbx,%rcx
228 andq %rax,%rdx
229 movq %r9,(%rsp,%r14,8)
230 orq %rcx,%rdx
231 movq %rdx,(%rdi,%r14,8)
232 leaq 1(%r14),%r14
233 subq $1,%r15
234 jnz .Lcopy
235
236 movq 8(%rsp,%r9,8),%rsi
237.cfi_def_cfa %rsi,8
238 movq $1,%rax
239 movq -48(%rsi),%r15
240.cfi_restore %r15
241 movq -40(%rsi),%r14
242.cfi_restore %r14
243 movq -32(%rsi),%r13
244.cfi_restore %r13
245 movq -24(%rsi),%r12
246.cfi_restore %r12
247 movq -16(%rsi),%rbp
248.cfi_restore %rbp
249 movq -8(%rsi),%rbx
250.cfi_restore %rbx
251 leaq (%rsi),%rsp
252.cfi_def_cfa_register %rsp
253.Lmul_epilogue:
254 .byte 0xf3,0xc3
255.cfi_endproc
256.size bn_mul_mont,.-bn_mul_mont
257.type bn_mul4x_mont,@function
258.align 16
259bn_mul4x_mont:
260.cfi_startproc
261 movl %r9d,%r9d
262 movq %rsp,%rax
263.cfi_def_cfa_register %rax
264.Lmul4x_enter:
265 pushq %rbx
266.cfi_offset %rbx,-16
267 pushq %rbp
268.cfi_offset %rbp,-24
269 pushq %r12
270.cfi_offset %r12,-32
271 pushq %r13
272.cfi_offset %r13,-40
273 pushq %r14
274.cfi_offset %r14,-48
275 pushq %r15
276.cfi_offset %r15,-56
277
278 negq %r9
279 movq %rsp,%r11
280 leaq -32(%rsp,%r9,8),%r10
281 negq %r9
282 andq $-1024,%r10
283
284 subq %r10,%r11
285 andq $-4096,%r11
286 leaq (%r10,%r11,1),%rsp
287 movq (%rsp),%r11
288 cmpq %r10,%rsp
289 ja .Lmul4x_page_walk
290 jmp .Lmul4x_page_walk_done
291
292.Lmul4x_page_walk:
293 leaq -4096(%rsp),%rsp
294 movq (%rsp),%r11
295 cmpq %r10,%rsp
296 ja .Lmul4x_page_walk
297.Lmul4x_page_walk_done:
298
299 movq %rax,8(%rsp,%r9,8)
300.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
301.Lmul4x_body:
302 movq %rdi,16(%rsp,%r9,8)
303 movq %rdx,%r12
304 movq (%r8),%r8
305 movq (%r12),%rbx
306 movq (%rsi),%rax
307
308 xorq %r14,%r14
309 xorq %r15,%r15
310
311 movq %r8,%rbp
312 mulq %rbx
313 movq %rax,%r10
314 movq (%rcx),%rax
315
316 imulq %r10,%rbp
317 movq %rdx,%r11
318
319 mulq %rbp
320 addq %rax,%r10
321 movq 8(%rsi),%rax
322 adcq $0,%rdx
323 movq %rdx,%rdi
324
325 mulq %rbx
326 addq %rax,%r11
327 movq 8(%rcx),%rax
328 adcq $0,%rdx
329 movq %rdx,%r10
330
331 mulq %rbp
332 addq %rax,%rdi
333 movq 16(%rsi),%rax
334 adcq $0,%rdx
335 addq %r11,%rdi
336 leaq 4(%r15),%r15
337 adcq $0,%rdx
338 movq %rdi,(%rsp)
339 movq %rdx,%r13
340 jmp .L1st4x
341.align 16
342.L1st4x:
343 mulq %rbx
344 addq %rax,%r10
345 movq -16(%rcx,%r15,8),%rax
346 adcq $0,%rdx
347 movq %rdx,%r11
348
349 mulq %rbp
350 addq %rax,%r13
351 movq -8(%rsi,%r15,8),%rax
352 adcq $0,%rdx
353 addq %r10,%r13
354 adcq $0,%rdx
355 movq %r13,-24(%rsp,%r15,8)
356 movq %rdx,%rdi
357
358 mulq %rbx
359 addq %rax,%r11
360 movq -8(%rcx,%r15,8),%rax
361 adcq $0,%rdx
362 movq %rdx,%r10
363
364 mulq %rbp
365 addq %rax,%rdi
366 movq (%rsi,%r15,8),%rax
367 adcq $0,%rdx
368 addq %r11,%rdi
369 adcq $0,%rdx
370 movq %rdi,-16(%rsp,%r15,8)
371 movq %rdx,%r13
372
373 mulq %rbx
374 addq %rax,%r10
375 movq (%rcx,%r15,8),%rax
376 adcq $0,%rdx
377 movq %rdx,%r11
378
379 mulq %rbp
380 addq %rax,%r13
381 movq 8(%rsi,%r15,8),%rax
382 adcq $0,%rdx
383 addq %r10,%r13
384 adcq $0,%rdx
385 movq %r13,-8(%rsp,%r15,8)
386 movq %rdx,%rdi
387
388 mulq %rbx
389 addq %rax,%r11
390 movq 8(%rcx,%r15,8),%rax
391 adcq $0,%rdx
392 leaq 4(%r15),%r15
393 movq %rdx,%r10
394
395 mulq %rbp
396 addq %rax,%rdi
397 movq -16(%rsi,%r15,8),%rax
398 adcq $0,%rdx
399 addq %r11,%rdi
400 adcq $0,%rdx
401 movq %rdi,-32(%rsp,%r15,8)
402 movq %rdx,%r13
403 cmpq %r9,%r15
404 jb .L1st4x
405
406 mulq %rbx
407 addq %rax,%r10
408 movq -16(%rcx,%r15,8),%rax
409 adcq $0,%rdx
410 movq %rdx,%r11
411
412 mulq %rbp
413 addq %rax,%r13
414 movq -8(%rsi,%r15,8),%rax
415 adcq $0,%rdx
416 addq %r10,%r13
417 adcq $0,%rdx
418 movq %r13,-24(%rsp,%r15,8)
419 movq %rdx,%rdi
420
421 mulq %rbx
422 addq %rax,%r11
423 movq -8(%rcx,%r15,8),%rax
424 adcq $0,%rdx
425 movq %rdx,%r10
426
427 mulq %rbp
428 addq %rax,%rdi
429 movq (%rsi),%rax
430 adcq $0,%rdx
431 addq %r11,%rdi
432 adcq $0,%rdx
433 movq %rdi,-16(%rsp,%r15,8)
434 movq %rdx,%r13
435
436 xorq %rdi,%rdi
437 addq %r10,%r13
438 adcq $0,%rdi
439 movq %r13,-8(%rsp,%r15,8)
440 movq %rdi,(%rsp,%r15,8)
441
442 leaq 1(%r14),%r14
443.align 4
444.Louter4x:
445 movq (%r12,%r14,8),%rbx
446 xorq %r15,%r15
447 movq (%rsp),%r10
448 movq %r8,%rbp
449 mulq %rbx
450 addq %rax,%r10
451 movq (%rcx),%rax
452 adcq $0,%rdx
453
454 imulq %r10,%rbp
455 movq %rdx,%r11
456
457 mulq %rbp
458 addq %rax,%r10
459 movq 8(%rsi),%rax
460 adcq $0,%rdx
461 movq %rdx,%rdi
462
463 mulq %rbx
464 addq %rax,%r11
465 movq 8(%rcx),%rax
466 adcq $0,%rdx
467 addq 8(%rsp),%r11
468 adcq $0,%rdx
469 movq %rdx,%r10
470
471 mulq %rbp
472 addq %rax,%rdi
473 movq 16(%rsi),%rax
474 adcq $0,%rdx
475 addq %r11,%rdi
476 leaq 4(%r15),%r15
477 adcq $0,%rdx
478 movq %rdi,(%rsp)
479 movq %rdx,%r13
480 jmp .Linner4x
481.align 16
482.Linner4x:
483 mulq %rbx
484 addq %rax,%r10
485 movq -16(%rcx,%r15,8),%rax
486 adcq $0,%rdx
487 addq -16(%rsp,%r15,8),%r10
488 adcq $0,%rdx
489 movq %rdx,%r11
490
491 mulq %rbp
492 addq %rax,%r13
493 movq -8(%rsi,%r15,8),%rax
494 adcq $0,%rdx
495 addq %r10,%r13
496 adcq $0,%rdx
497 movq %r13,-24(%rsp,%r15,8)
498 movq %rdx,%rdi
499
500 mulq %rbx
501 addq %rax,%r11
502 movq -8(%rcx,%r15,8),%rax
503 adcq $0,%rdx
504 addq -8(%rsp,%r15,8),%r11
505 adcq $0,%rdx
506 movq %rdx,%r10
507
508 mulq %rbp
509 addq %rax,%rdi
510 movq (%rsi,%r15,8),%rax
511 adcq $0,%rdx
512 addq %r11,%rdi
513 adcq $0,%rdx
514 movq %rdi,-16(%rsp,%r15,8)
515 movq %rdx,%r13
516
517 mulq %rbx
518 addq %rax,%r10
519 movq (%rcx,%r15,8),%rax
520 adcq $0,%rdx
521 addq (%rsp,%r15,8),%r10
522 adcq $0,%rdx
523 movq %rdx,%r11
524
525 mulq %rbp
526 addq %rax,%r13
527 movq 8(%rsi,%r15,8),%rax
528 adcq $0,%rdx
529 addq %r10,%r13
530 adcq $0,%rdx
531 movq %r13,-8(%rsp,%r15,8)
532 movq %rdx,%rdi
533
534 mulq %rbx
535 addq %rax,%r11
536 movq 8(%rcx,%r15,8),%rax
537 adcq $0,%rdx
538 addq 8(%rsp,%r15,8),%r11
539 adcq $0,%rdx
540 leaq 4(%r15),%r15
541 movq %rdx,%r10
542
543 mulq %rbp
544 addq %rax,%rdi
545 movq -16(%rsi,%r15,8),%rax
546 adcq $0,%rdx
547 addq %r11,%rdi
548 adcq $0,%rdx
549 movq %rdi,-32(%rsp,%r15,8)
550 movq %rdx,%r13
551 cmpq %r9,%r15
552 jb .Linner4x
553
554 mulq %rbx
555 addq %rax,%r10
556 movq -16(%rcx,%r15,8),%rax
557 adcq $0,%rdx
558 addq -16(%rsp,%r15,8),%r10
559 adcq $0,%rdx
560 movq %rdx,%r11
561
562 mulq %rbp
563 addq %rax,%r13
564 movq -8(%rsi,%r15,8),%rax
565 adcq $0,%rdx
566 addq %r10,%r13
567 adcq $0,%rdx
568 movq %r13,-24(%rsp,%r15,8)
569 movq %rdx,%rdi
570
571 mulq %rbx
572 addq %rax,%r11
573 movq -8(%rcx,%r15,8),%rax
574 adcq $0,%rdx
575 addq -8(%rsp,%r15,8),%r11
576 adcq $0,%rdx
577 leaq 1(%r14),%r14
578 movq %rdx,%r10
579
580 mulq %rbp
581 addq %rax,%rdi
582 movq (%rsi),%rax
583 adcq $0,%rdx
584 addq %r11,%rdi
585 adcq $0,%rdx
586 movq %rdi,-16(%rsp,%r15,8)
587 movq %rdx,%r13
588
589 xorq %rdi,%rdi
590 addq %r10,%r13
591 adcq $0,%rdi
592 addq (%rsp,%r9,8),%r13
593 adcq $0,%rdi
594 movq %r13,-8(%rsp,%r15,8)
595 movq %rdi,(%rsp,%r15,8)
596
597 cmpq %r9,%r14
598 jb .Louter4x
599 movq 16(%rsp,%r9,8),%rdi
600 leaq -4(%r9),%r15
601 movq 0(%rsp),%rax
602 movq 8(%rsp),%rdx
603 shrq $2,%r15
604 leaq (%rsp),%rsi
605 xorq %r14,%r14
606
607 subq 0(%rcx),%rax
608 movq 16(%rsi),%rbx
609 movq 24(%rsi),%rbp
610 sbbq 8(%rcx),%rdx
611
612.Lsub4x:
613 movq %rax,0(%rdi,%r14,8)
614 movq %rdx,8(%rdi,%r14,8)
615 sbbq 16(%rcx,%r14,8),%rbx
616 movq 32(%rsi,%r14,8),%rax
617 movq 40(%rsi,%r14,8),%rdx
618 sbbq 24(%rcx,%r14,8),%rbp
619 movq %rbx,16(%rdi,%r14,8)
620 movq %rbp,24(%rdi,%r14,8)
621 sbbq 32(%rcx,%r14,8),%rax
622 movq 48(%rsi,%r14,8),%rbx
623 movq 56(%rsi,%r14,8),%rbp
624 sbbq 40(%rcx,%r14,8),%rdx
625 leaq 4(%r14),%r14
626 decq %r15
627 jnz .Lsub4x
628
629 movq %rax,0(%rdi,%r14,8)
630 movq 32(%rsi,%r14,8),%rax
631 sbbq 16(%rcx,%r14,8),%rbx
632 movq %rdx,8(%rdi,%r14,8)
633 sbbq 24(%rcx,%r14,8),%rbp
634 movq %rbx,16(%rdi,%r14,8)
635
636 sbbq $0,%rax
637 movq %rbp,24(%rdi,%r14,8)
638 pxor %xmm0,%xmm0
639.byte 102,72,15,110,224
640 pcmpeqd %xmm5,%xmm5
641 pshufd $0,%xmm4,%xmm4
642 movq %r9,%r15
643 pxor %xmm4,%xmm5
644 shrq $2,%r15
645 xorl %eax,%eax
646
647 jmp .Lcopy4x
648.align 16
649.Lcopy4x:
650 movdqa (%rsp,%rax,1),%xmm1
651 movdqu (%rdi,%rax,1),%xmm2
652 pand %xmm4,%xmm1
653 pand %xmm5,%xmm2
654 movdqa 16(%rsp,%rax,1),%xmm3
655 movdqa %xmm0,(%rsp,%rax,1)
656 por %xmm2,%xmm1
657 movdqu 16(%rdi,%rax,1),%xmm2
658 movdqu %xmm1,(%rdi,%rax,1)
659 pand %xmm4,%xmm3
660 pand %xmm5,%xmm2
661 movdqa %xmm0,16(%rsp,%rax,1)
662 por %xmm2,%xmm3
663 movdqu %xmm3,16(%rdi,%rax,1)
664 leaq 32(%rax),%rax
665 decq %r15
666 jnz .Lcopy4x
667 movq 8(%rsp,%r9,8),%rsi
668.cfi_def_cfa %rsi, 8
669 movq $1,%rax
670 movq -48(%rsi),%r15
671.cfi_restore %r15
672 movq -40(%rsi),%r14
673.cfi_restore %r14
674 movq -32(%rsi),%r13
675.cfi_restore %r13
676 movq -24(%rsi),%r12
677.cfi_restore %r12
678 movq -16(%rsi),%rbp
679.cfi_restore %rbp
680 movq -8(%rsi),%rbx
681.cfi_restore %rbx
682 leaq (%rsi),%rsp
683.cfi_def_cfa_register %rsp
684.Lmul4x_epilogue:
685 .byte 0xf3,0xc3
686.cfi_endproc
687.size bn_mul4x_mont,.-bn_mul4x_mont
688
689
690.type bn_sqr8x_mont,@function
691.align 32
692bn_sqr8x_mont:
693.cfi_startproc
694 movq %rsp,%rax
695.cfi_def_cfa_register %rax
696.Lsqr8x_enter:
697 pushq %rbx
698.cfi_offset %rbx,-16
699 pushq %rbp
700.cfi_offset %rbp,-24
701 pushq %r12
702.cfi_offset %r12,-32
703 pushq %r13
704.cfi_offset %r13,-40
705 pushq %r14
706.cfi_offset %r14,-48
707 pushq %r15
708.cfi_offset %r15,-56
709.Lsqr8x_prologue:
710
711 movl %r9d,%r10d
712 shll $3,%r9d
713 shlq $3+2,%r10
714 negq %r9
715
716
717
718
719
720
721 leaq -64(%rsp,%r9,2),%r11
722 movq %rsp,%rbp
723 movq (%r8),%r8
724 subq %rsi,%r11
725 andq $4095,%r11
726 cmpq %r11,%r10
727 jb .Lsqr8x_sp_alt
728 subq %r11,%rbp
729 leaq -64(%rbp,%r9,2),%rbp
730 jmp .Lsqr8x_sp_done
731
732.align 32
733.Lsqr8x_sp_alt:
734 leaq 4096-64(,%r9,2),%r10
735 leaq -64(%rbp,%r9,2),%rbp
736 subq %r10,%r11
737 movq $0,%r10
738 cmovcq %r10,%r11
739 subq %r11,%rbp
740.Lsqr8x_sp_done:
741 andq $-64,%rbp
742 movq %rsp,%r11
743 subq %rbp,%r11
744 andq $-4096,%r11
745 leaq (%r11,%rbp,1),%rsp
746 movq (%rsp),%r10
747 cmpq %rbp,%rsp
748 ja .Lsqr8x_page_walk
749 jmp .Lsqr8x_page_walk_done
750
751.align 16
752.Lsqr8x_page_walk:
753 leaq -4096(%rsp),%rsp
754 movq (%rsp),%r10
755 cmpq %rbp,%rsp
756 ja .Lsqr8x_page_walk
757.Lsqr8x_page_walk_done:
758
759 movq %r9,%r10
760 negq %r9
761
762 movq %r8,32(%rsp)
763 movq %rax,40(%rsp)
764.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
765.Lsqr8x_body:
766
767.byte 102,72,15,110,209
768 pxor %xmm0,%xmm0
769.byte 102,72,15,110,207
770.byte 102,73,15,110,218
771 call bn_sqr8x_internal
772
773
774
775
776 leaq (%rdi,%r9,1),%rbx
777 movq %r9,%rcx
778 movq %r9,%rdx
779.byte 102,72,15,126,207
780 sarq $3+2,%rcx
781 jmp .Lsqr8x_sub
782
783.align 32
784.Lsqr8x_sub:
785 movq 0(%rbx),%r12
786 movq 8(%rbx),%r13
787 movq 16(%rbx),%r14
788 movq 24(%rbx),%r15
789 leaq 32(%rbx),%rbx
790 sbbq 0(%rbp),%r12
791 sbbq 8(%rbp),%r13
792 sbbq 16(%rbp),%r14
793 sbbq 24(%rbp),%r15
794 leaq 32(%rbp),%rbp
795 movq %r12,0(%rdi)
796 movq %r13,8(%rdi)
797 movq %r14,16(%rdi)
798 movq %r15,24(%rdi)
799 leaq 32(%rdi),%rdi
800 incq %rcx
801 jnz .Lsqr8x_sub
802
803 sbbq $0,%rax
804 leaq (%rbx,%r9,1),%rbx
805 leaq (%rdi,%r9,1),%rdi
806
807.byte 102,72,15,110,200
808 pxor %xmm0,%xmm0
809 pshufd $0,%xmm1,%xmm1
810 movq 40(%rsp),%rsi
811.cfi_def_cfa %rsi,8
812 jmp .Lsqr8x_cond_copy
813
814.align 32
815.Lsqr8x_cond_copy:
816 movdqa 0(%rbx),%xmm2
817 movdqa 16(%rbx),%xmm3
818 leaq 32(%rbx),%rbx
819 movdqu 0(%rdi),%xmm4
820 movdqu 16(%rdi),%xmm5
821 leaq 32(%rdi),%rdi
822 movdqa %xmm0,-32(%rbx)
823 movdqa %xmm0,-16(%rbx)
824 movdqa %xmm0,-32(%rbx,%rdx,1)
825 movdqa %xmm0,-16(%rbx,%rdx,1)
826 pcmpeqd %xmm1,%xmm0
827 pand %xmm1,%xmm2
828 pand %xmm1,%xmm3
829 pand %xmm0,%xmm4
830 pand %xmm0,%xmm5
831 pxor %xmm0,%xmm0
832 por %xmm2,%xmm4
833 por %xmm3,%xmm5
834 movdqu %xmm4,-32(%rdi)
835 movdqu %xmm5,-16(%rdi)
836 addq $32,%r9
837 jnz .Lsqr8x_cond_copy
838
839 movq $1,%rax
840 movq -48(%rsi),%r15
841.cfi_restore %r15
842 movq -40(%rsi),%r14
843.cfi_restore %r14
844 movq -32(%rsi),%r13
845.cfi_restore %r13
846 movq -24(%rsi),%r12
847.cfi_restore %r12
848 movq -16(%rsi),%rbp
849.cfi_restore %rbp
850 movq -8(%rsi),%rbx
851.cfi_restore %rbx
852 leaq (%rsi),%rsp
853.cfi_def_cfa_register %rsp
854.Lsqr8x_epilogue:
855 .byte 0xf3,0xc3
856.cfi_endproc
857.size bn_sqr8x_mont,.-bn_sqr8x_mont
858.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
859.align 16
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette