VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1j/crypto/genasm-elf/x86_64-mont5.S@ 88461

Last change on this file since 88461 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 37.1 KB
Line 
1.text
2
3
4
5.globl bn_mul_mont_gather5
6.type bn_mul_mont_gather5,@function
7.align 64
8bn_mul_mont_gather5:
9.cfi_startproc
10 movl %r9d,%r9d
11 movq %rsp,%rax
12.cfi_def_cfa_register %rax
13 testl $7,%r9d
14 jnz .Lmul_enter
15 jmp .Lmul4x_enter
16
17.align 16
18.Lmul_enter:
19 movd 8(%rsp),%xmm5
20 pushq %rbx
21.cfi_offset %rbx,-16
22 pushq %rbp
23.cfi_offset %rbp,-24
24 pushq %r12
25.cfi_offset %r12,-32
26 pushq %r13
27.cfi_offset %r13,-40
28 pushq %r14
29.cfi_offset %r14,-48
30 pushq %r15
31.cfi_offset %r15,-56
32
33 negq %r9
34 movq %rsp,%r11
35 leaq -280(%rsp,%r9,8),%r10
36 negq %r9
37 andq $-1024,%r10
38
39
40
41
42
43
44
45
46
47 subq %r10,%r11
48 andq $-4096,%r11
49 leaq (%r10,%r11,1),%rsp
50 movq (%rsp),%r11
51 cmpq %r10,%rsp
52 ja .Lmul_page_walk
53 jmp .Lmul_page_walk_done
54
55.Lmul_page_walk:
56 leaq -4096(%rsp),%rsp
57 movq (%rsp),%r11
58 cmpq %r10,%rsp
59 ja .Lmul_page_walk
60.Lmul_page_walk_done:
61
62 leaq .Linc(%rip),%r10
63 movq %rax,8(%rsp,%r9,8)
64.cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
65.Lmul_body:
66
67 leaq 128(%rdx),%r12
68 movdqa 0(%r10),%xmm0
69 movdqa 16(%r10),%xmm1
70 leaq 24-112(%rsp,%r9,8),%r10
71 andq $-16,%r10
72
73 pshufd $0,%xmm5,%xmm5
74 movdqa %xmm1,%xmm4
75 movdqa %xmm1,%xmm2
76 paddd %xmm0,%xmm1
77 pcmpeqd %xmm5,%xmm0
78.byte 0x67
79 movdqa %xmm4,%xmm3
80 paddd %xmm1,%xmm2
81 pcmpeqd %xmm5,%xmm1
82 movdqa %xmm0,112(%r10)
83 movdqa %xmm4,%xmm0
84
85 paddd %xmm2,%xmm3
86 pcmpeqd %xmm5,%xmm2
87 movdqa %xmm1,128(%r10)
88 movdqa %xmm4,%xmm1
89
90 paddd %xmm3,%xmm0
91 pcmpeqd %xmm5,%xmm3
92 movdqa %xmm2,144(%r10)
93 movdqa %xmm4,%xmm2
94
95 paddd %xmm0,%xmm1
96 pcmpeqd %xmm5,%xmm0
97 movdqa %xmm3,160(%r10)
98 movdqa %xmm4,%xmm3
99 paddd %xmm1,%xmm2
100 pcmpeqd %xmm5,%xmm1
101 movdqa %xmm0,176(%r10)
102 movdqa %xmm4,%xmm0
103
104 paddd %xmm2,%xmm3
105 pcmpeqd %xmm5,%xmm2
106 movdqa %xmm1,192(%r10)
107 movdqa %xmm4,%xmm1
108
109 paddd %xmm3,%xmm0
110 pcmpeqd %xmm5,%xmm3
111 movdqa %xmm2,208(%r10)
112 movdqa %xmm4,%xmm2
113
114 paddd %xmm0,%xmm1
115 pcmpeqd %xmm5,%xmm0
116 movdqa %xmm3,224(%r10)
117 movdqa %xmm4,%xmm3
118 paddd %xmm1,%xmm2
119 pcmpeqd %xmm5,%xmm1
120 movdqa %xmm0,240(%r10)
121 movdqa %xmm4,%xmm0
122
123 paddd %xmm2,%xmm3
124 pcmpeqd %xmm5,%xmm2
125 movdqa %xmm1,256(%r10)
126 movdqa %xmm4,%xmm1
127
128 paddd %xmm3,%xmm0
129 pcmpeqd %xmm5,%xmm3
130 movdqa %xmm2,272(%r10)
131 movdqa %xmm4,%xmm2
132
133 paddd %xmm0,%xmm1
134 pcmpeqd %xmm5,%xmm0
135 movdqa %xmm3,288(%r10)
136 movdqa %xmm4,%xmm3
137 paddd %xmm1,%xmm2
138 pcmpeqd %xmm5,%xmm1
139 movdqa %xmm0,304(%r10)
140
141 paddd %xmm2,%xmm3
142.byte 0x67
143 pcmpeqd %xmm5,%xmm2
144 movdqa %xmm1,320(%r10)
145
146 pcmpeqd %xmm5,%xmm3
147 movdqa %xmm2,336(%r10)
148 pand 64(%r12),%xmm0
149
150 pand 80(%r12),%xmm1
151 pand 96(%r12),%xmm2
152 movdqa %xmm3,352(%r10)
153 pand 112(%r12),%xmm3
154 por %xmm2,%xmm0
155 por %xmm3,%xmm1
156 movdqa -128(%r12),%xmm4
157 movdqa -112(%r12),%xmm5
158 movdqa -96(%r12),%xmm2
159 pand 112(%r10),%xmm4
160 movdqa -80(%r12),%xmm3
161 pand 128(%r10),%xmm5
162 por %xmm4,%xmm0
163 pand 144(%r10),%xmm2
164 por %xmm5,%xmm1
165 pand 160(%r10),%xmm3
166 por %xmm2,%xmm0
167 por %xmm3,%xmm1
168 movdqa -64(%r12),%xmm4
169 movdqa -48(%r12),%xmm5
170 movdqa -32(%r12),%xmm2
171 pand 176(%r10),%xmm4
172 movdqa -16(%r12),%xmm3
173 pand 192(%r10),%xmm5
174 por %xmm4,%xmm0
175 pand 208(%r10),%xmm2
176 por %xmm5,%xmm1
177 pand 224(%r10),%xmm3
178 por %xmm2,%xmm0
179 por %xmm3,%xmm1
180 movdqa 0(%r12),%xmm4
181 movdqa 16(%r12),%xmm5
182 movdqa 32(%r12),%xmm2
183 pand 240(%r10),%xmm4
184 movdqa 48(%r12),%xmm3
185 pand 256(%r10),%xmm5
186 por %xmm4,%xmm0
187 pand 272(%r10),%xmm2
188 por %xmm5,%xmm1
189 pand 288(%r10),%xmm3
190 por %xmm2,%xmm0
191 por %xmm3,%xmm1
192 por %xmm1,%xmm0
193 pshufd $0x4e,%xmm0,%xmm1
194 por %xmm1,%xmm0
195 leaq 256(%r12),%r12
196.byte 102,72,15,126,195
197
198 movq (%r8),%r8
199 movq (%rsi),%rax
200
201 xorq %r14,%r14
202 xorq %r15,%r15
203
204 movq %r8,%rbp
205 mulq %rbx
206 movq %rax,%r10
207 movq (%rcx),%rax
208
209 imulq %r10,%rbp
210 movq %rdx,%r11
211
212 mulq %rbp
213 addq %rax,%r10
214 movq 8(%rsi),%rax
215 adcq $0,%rdx
216 movq %rdx,%r13
217
218 leaq 1(%r15),%r15
219 jmp .L1st_enter
220
221.align 16
222.L1st:
223 addq %rax,%r13
224 movq (%rsi,%r15,8),%rax
225 adcq $0,%rdx
226 addq %r11,%r13
227 movq %r10,%r11
228 adcq $0,%rdx
229 movq %r13,-16(%rsp,%r15,8)
230 movq %rdx,%r13
231
232.L1st_enter:
233 mulq %rbx
234 addq %rax,%r11
235 movq (%rcx,%r15,8),%rax
236 adcq $0,%rdx
237 leaq 1(%r15),%r15
238 movq %rdx,%r10
239
240 mulq %rbp
241 cmpq %r9,%r15
242 jne .L1st
243
244
245 addq %rax,%r13
246 adcq $0,%rdx
247 addq %r11,%r13
248 adcq $0,%rdx
249 movq %r13,-16(%rsp,%r9,8)
250 movq %rdx,%r13
251 movq %r10,%r11
252
253 xorq %rdx,%rdx
254 addq %r11,%r13
255 adcq $0,%rdx
256 movq %r13,-8(%rsp,%r9,8)
257 movq %rdx,(%rsp,%r9,8)
258
259 leaq 1(%r14),%r14
260 jmp .Louter
261.align 16
262.Louter:
263 leaq 24+128(%rsp,%r9,8),%rdx
264 andq $-16,%rdx
265 pxor %xmm4,%xmm4
266 pxor %xmm5,%xmm5
267 movdqa -128(%r12),%xmm0
268 movdqa -112(%r12),%xmm1
269 movdqa -96(%r12),%xmm2
270 movdqa -80(%r12),%xmm3
271 pand -128(%rdx),%xmm0
272 pand -112(%rdx),%xmm1
273 por %xmm0,%xmm4
274 pand -96(%rdx),%xmm2
275 por %xmm1,%xmm5
276 pand -80(%rdx),%xmm3
277 por %xmm2,%xmm4
278 por %xmm3,%xmm5
279 movdqa -64(%r12),%xmm0
280 movdqa -48(%r12),%xmm1
281 movdqa -32(%r12),%xmm2
282 movdqa -16(%r12),%xmm3
283 pand -64(%rdx),%xmm0
284 pand -48(%rdx),%xmm1
285 por %xmm0,%xmm4
286 pand -32(%rdx),%xmm2
287 por %xmm1,%xmm5
288 pand -16(%rdx),%xmm3
289 por %xmm2,%xmm4
290 por %xmm3,%xmm5
291 movdqa 0(%r12),%xmm0
292 movdqa 16(%r12),%xmm1
293 movdqa 32(%r12),%xmm2
294 movdqa 48(%r12),%xmm3
295 pand 0(%rdx),%xmm0
296 pand 16(%rdx),%xmm1
297 por %xmm0,%xmm4
298 pand 32(%rdx),%xmm2
299 por %xmm1,%xmm5
300 pand 48(%rdx),%xmm3
301 por %xmm2,%xmm4
302 por %xmm3,%xmm5
303 movdqa 64(%r12),%xmm0
304 movdqa 80(%r12),%xmm1
305 movdqa 96(%r12),%xmm2
306 movdqa 112(%r12),%xmm3
307 pand 64(%rdx),%xmm0
308 pand 80(%rdx),%xmm1
309 por %xmm0,%xmm4
310 pand 96(%rdx),%xmm2
311 por %xmm1,%xmm5
312 pand 112(%rdx),%xmm3
313 por %xmm2,%xmm4
314 por %xmm3,%xmm5
315 por %xmm5,%xmm4
316 pshufd $0x4e,%xmm4,%xmm0
317 por %xmm4,%xmm0
318 leaq 256(%r12),%r12
319
320 movq (%rsi),%rax
321.byte 102,72,15,126,195
322
323 xorq %r15,%r15
324 movq %r8,%rbp
325 movq (%rsp),%r10
326
327 mulq %rbx
328 addq %rax,%r10
329 movq (%rcx),%rax
330 adcq $0,%rdx
331
332 imulq %r10,%rbp
333 movq %rdx,%r11
334
335 mulq %rbp
336 addq %rax,%r10
337 movq 8(%rsi),%rax
338 adcq $0,%rdx
339 movq 8(%rsp),%r10
340 movq %rdx,%r13
341
342 leaq 1(%r15),%r15
343 jmp .Linner_enter
344
345.align 16
346.Linner:
347 addq %rax,%r13
348 movq (%rsi,%r15,8),%rax
349 adcq $0,%rdx
350 addq %r10,%r13
351 movq (%rsp,%r15,8),%r10
352 adcq $0,%rdx
353 movq %r13,-16(%rsp,%r15,8)
354 movq %rdx,%r13
355
356.Linner_enter:
357 mulq %rbx
358 addq %rax,%r11
359 movq (%rcx,%r15,8),%rax
360 adcq $0,%rdx
361 addq %r11,%r10
362 movq %rdx,%r11
363 adcq $0,%r11
364 leaq 1(%r15),%r15
365
366 mulq %rbp
367 cmpq %r9,%r15
368 jne .Linner
369
370 addq %rax,%r13
371 adcq $0,%rdx
372 addq %r10,%r13
373 movq (%rsp,%r9,8),%r10
374 adcq $0,%rdx
375 movq %r13,-16(%rsp,%r9,8)
376 movq %rdx,%r13
377
378 xorq %rdx,%rdx
379 addq %r11,%r13
380 adcq $0,%rdx
381 addq %r10,%r13
382 adcq $0,%rdx
383 movq %r13,-8(%rsp,%r9,8)
384 movq %rdx,(%rsp,%r9,8)
385
386 leaq 1(%r14),%r14
387 cmpq %r9,%r14
388 jb .Louter
389
390 xorq %r14,%r14
391 movq (%rsp),%rax
392 leaq (%rsp),%rsi
393 movq %r9,%r15
394 jmp .Lsub
395.align 16
396.Lsub: sbbq (%rcx,%r14,8),%rax
397 movq %rax,(%rdi,%r14,8)
398 movq 8(%rsi,%r14,8),%rax
399 leaq 1(%r14),%r14
400 decq %r15
401 jnz .Lsub
402
403 sbbq $0,%rax
404 movq $-1,%rbx
405 xorq %rax,%rbx
406 xorq %r14,%r14
407 movq %r9,%r15
408
409.Lcopy:
410 movq (%rdi,%r14,8),%rcx
411 movq (%rsp,%r14,8),%rdx
412 andq %rbx,%rcx
413 andq %rax,%rdx
414 movq %r14,(%rsp,%r14,8)
415 orq %rcx,%rdx
416 movq %rdx,(%rdi,%r14,8)
417 leaq 1(%r14),%r14
418 subq $1,%r15
419 jnz .Lcopy
420
421 movq 8(%rsp,%r9,8),%rsi
422.cfi_def_cfa %rsi,8
423 movq $1,%rax
424
425 movq -48(%rsi),%r15
426.cfi_restore %r15
427 movq -40(%rsi),%r14
428.cfi_restore %r14
429 movq -32(%rsi),%r13
430.cfi_restore %r13
431 movq -24(%rsi),%r12
432.cfi_restore %r12
433 movq -16(%rsi),%rbp
434.cfi_restore %rbp
435 movq -8(%rsi),%rbx
436.cfi_restore %rbx
437 leaq (%rsi),%rsp
438.cfi_def_cfa_register %rsp
439.Lmul_epilogue:
440 .byte 0xf3,0xc3
441.cfi_endproc
442.size bn_mul_mont_gather5,.-bn_mul_mont_gather5
443.type bn_mul4x_mont_gather5,@function
444.align 32
445bn_mul4x_mont_gather5:
446.cfi_startproc
447.byte 0x67
448 movq %rsp,%rax
449.cfi_def_cfa_register %rax
450.Lmul4x_enter:
451 pushq %rbx
452.cfi_offset %rbx,-16
453 pushq %rbp
454.cfi_offset %rbp,-24
455 pushq %r12
456.cfi_offset %r12,-32
457 pushq %r13
458.cfi_offset %r13,-40
459 pushq %r14
460.cfi_offset %r14,-48
461 pushq %r15
462.cfi_offset %r15,-56
463.Lmul4x_prologue:
464
465.byte 0x67
466 shll $3,%r9d
467 leaq (%r9,%r9,2),%r10
468 negq %r9
469
470
471
472
473
474
475
476
477
478
479 leaq -320(%rsp,%r9,2),%r11
480 movq %rsp,%rbp
481 subq %rdi,%r11
482 andq $4095,%r11
483 cmpq %r11,%r10
484 jb .Lmul4xsp_alt
485 subq %r11,%rbp
486 leaq -320(%rbp,%r9,2),%rbp
487 jmp .Lmul4xsp_done
488
489.align 32
490.Lmul4xsp_alt:
491 leaq 4096-320(,%r9,2),%r10
492 leaq -320(%rbp,%r9,2),%rbp
493 subq %r10,%r11
494 movq $0,%r10
495 cmovcq %r10,%r11
496 subq %r11,%rbp
497.Lmul4xsp_done:
498 andq $-64,%rbp
499 movq %rsp,%r11
500 subq %rbp,%r11
501 andq $-4096,%r11
502 leaq (%r11,%rbp,1),%rsp
503 movq (%rsp),%r10
504 cmpq %rbp,%rsp
505 ja .Lmul4x_page_walk
506 jmp .Lmul4x_page_walk_done
507
508.Lmul4x_page_walk:
509 leaq -4096(%rsp),%rsp
510 movq (%rsp),%r10
511 cmpq %rbp,%rsp
512 ja .Lmul4x_page_walk
513.Lmul4x_page_walk_done:
514
515 negq %r9
516
517 movq %rax,40(%rsp)
518.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
519.Lmul4x_body:
520
521 call mul4x_internal
522
523 movq 40(%rsp),%rsi
524.cfi_def_cfa %rsi,8
525 movq $1,%rax
526
527 movq -48(%rsi),%r15
528.cfi_restore %r15
529 movq -40(%rsi),%r14
530.cfi_restore %r14
531 movq -32(%rsi),%r13
532.cfi_restore %r13
533 movq -24(%rsi),%r12
534.cfi_restore %r12
535 movq -16(%rsi),%rbp
536.cfi_restore %rbp
537 movq -8(%rsi),%rbx
538.cfi_restore %rbx
539 leaq (%rsi),%rsp
540.cfi_def_cfa_register %rsp
541.Lmul4x_epilogue:
542 .byte 0xf3,0xc3
543.cfi_endproc
544.size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
545
546.type mul4x_internal,@function
547.align 32
548mul4x_internal:
549 shlq $5,%r9
550 movd 8(%rax),%xmm5
551 leaq .Linc(%rip),%rax
552 leaq 128(%rdx,%r9,1),%r13
553 shrq $5,%r9
554 movdqa 0(%rax),%xmm0
555 movdqa 16(%rax),%xmm1
556 leaq 88-112(%rsp,%r9,1),%r10
557 leaq 128(%rdx),%r12
558
559 pshufd $0,%xmm5,%xmm5
560 movdqa %xmm1,%xmm4
561.byte 0x67,0x67
562 movdqa %xmm1,%xmm2
563 paddd %xmm0,%xmm1
564 pcmpeqd %xmm5,%xmm0
565.byte 0x67
566 movdqa %xmm4,%xmm3
567 paddd %xmm1,%xmm2
568 pcmpeqd %xmm5,%xmm1
569 movdqa %xmm0,112(%r10)
570 movdqa %xmm4,%xmm0
571
572 paddd %xmm2,%xmm3
573 pcmpeqd %xmm5,%xmm2
574 movdqa %xmm1,128(%r10)
575 movdqa %xmm4,%xmm1
576
577 paddd %xmm3,%xmm0
578 pcmpeqd %xmm5,%xmm3
579 movdqa %xmm2,144(%r10)
580 movdqa %xmm4,%xmm2
581
582 paddd %xmm0,%xmm1
583 pcmpeqd %xmm5,%xmm0
584 movdqa %xmm3,160(%r10)
585 movdqa %xmm4,%xmm3
586 paddd %xmm1,%xmm2
587 pcmpeqd %xmm5,%xmm1
588 movdqa %xmm0,176(%r10)
589 movdqa %xmm4,%xmm0
590
591 paddd %xmm2,%xmm3
592 pcmpeqd %xmm5,%xmm2
593 movdqa %xmm1,192(%r10)
594 movdqa %xmm4,%xmm1
595
596 paddd %xmm3,%xmm0
597 pcmpeqd %xmm5,%xmm3
598 movdqa %xmm2,208(%r10)
599 movdqa %xmm4,%xmm2
600
601 paddd %xmm0,%xmm1
602 pcmpeqd %xmm5,%xmm0
603 movdqa %xmm3,224(%r10)
604 movdqa %xmm4,%xmm3
605 paddd %xmm1,%xmm2
606 pcmpeqd %xmm5,%xmm1
607 movdqa %xmm0,240(%r10)
608 movdqa %xmm4,%xmm0
609
610 paddd %xmm2,%xmm3
611 pcmpeqd %xmm5,%xmm2
612 movdqa %xmm1,256(%r10)
613 movdqa %xmm4,%xmm1
614
615 paddd %xmm3,%xmm0
616 pcmpeqd %xmm5,%xmm3
617 movdqa %xmm2,272(%r10)
618 movdqa %xmm4,%xmm2
619
620 paddd %xmm0,%xmm1
621 pcmpeqd %xmm5,%xmm0
622 movdqa %xmm3,288(%r10)
623 movdqa %xmm4,%xmm3
624 paddd %xmm1,%xmm2
625 pcmpeqd %xmm5,%xmm1
626 movdqa %xmm0,304(%r10)
627
628 paddd %xmm2,%xmm3
629.byte 0x67
630 pcmpeqd %xmm5,%xmm2
631 movdqa %xmm1,320(%r10)
632
633 pcmpeqd %xmm5,%xmm3
634 movdqa %xmm2,336(%r10)
635 pand 64(%r12),%xmm0
636
637 pand 80(%r12),%xmm1
638 pand 96(%r12),%xmm2
639 movdqa %xmm3,352(%r10)
640 pand 112(%r12),%xmm3
641 por %xmm2,%xmm0
642 por %xmm3,%xmm1
643 movdqa -128(%r12),%xmm4
644 movdqa -112(%r12),%xmm5
645 movdqa -96(%r12),%xmm2
646 pand 112(%r10),%xmm4
647 movdqa -80(%r12),%xmm3
648 pand 128(%r10),%xmm5
649 por %xmm4,%xmm0
650 pand 144(%r10),%xmm2
651 por %xmm5,%xmm1
652 pand 160(%r10),%xmm3
653 por %xmm2,%xmm0
654 por %xmm3,%xmm1
655 movdqa -64(%r12),%xmm4
656 movdqa -48(%r12),%xmm5
657 movdqa -32(%r12),%xmm2
658 pand 176(%r10),%xmm4
659 movdqa -16(%r12),%xmm3
660 pand 192(%r10),%xmm5
661 por %xmm4,%xmm0
662 pand 208(%r10),%xmm2
663 por %xmm5,%xmm1
664 pand 224(%r10),%xmm3
665 por %xmm2,%xmm0
666 por %xmm3,%xmm1
667 movdqa 0(%r12),%xmm4
668 movdqa 16(%r12),%xmm5
669 movdqa 32(%r12),%xmm2
670 pand 240(%r10),%xmm4
671 movdqa 48(%r12),%xmm3
672 pand 256(%r10),%xmm5
673 por %xmm4,%xmm0
674 pand 272(%r10),%xmm2
675 por %xmm5,%xmm1
676 pand 288(%r10),%xmm3
677 por %xmm2,%xmm0
678 por %xmm3,%xmm1
679 por %xmm1,%xmm0
680 pshufd $0x4e,%xmm0,%xmm1
681 por %xmm1,%xmm0
682 leaq 256(%r12),%r12
683.byte 102,72,15,126,195
684
685 movq %r13,16+8(%rsp)
686 movq %rdi,56+8(%rsp)
687
688 movq (%r8),%r8
689 movq (%rsi),%rax
690 leaq (%rsi,%r9,1),%rsi
691 negq %r9
692
693 movq %r8,%rbp
694 mulq %rbx
695 movq %rax,%r10
696 movq (%rcx),%rax
697
698 imulq %r10,%rbp
699 leaq 64+8(%rsp),%r14
700 movq %rdx,%r11
701
702 mulq %rbp
703 addq %rax,%r10
704 movq 8(%rsi,%r9,1),%rax
705 adcq $0,%rdx
706 movq %rdx,%rdi
707
708 mulq %rbx
709 addq %rax,%r11
710 movq 8(%rcx),%rax
711 adcq $0,%rdx
712 movq %rdx,%r10
713
714 mulq %rbp
715 addq %rax,%rdi
716 movq 16(%rsi,%r9,1),%rax
717 adcq $0,%rdx
718 addq %r11,%rdi
719 leaq 32(%r9),%r15
720 leaq 32(%rcx),%rcx
721 adcq $0,%rdx
722 movq %rdi,(%r14)
723 movq %rdx,%r13
724 jmp .L1st4x
725
726.align 32
727.L1st4x:
728 mulq %rbx
729 addq %rax,%r10
730 movq -16(%rcx),%rax
731 leaq 32(%r14),%r14
732 adcq $0,%rdx
733 movq %rdx,%r11
734
735 mulq %rbp
736 addq %rax,%r13
737 movq -8(%rsi,%r15,1),%rax
738 adcq $0,%rdx
739 addq %r10,%r13
740 adcq $0,%rdx
741 movq %r13,-24(%r14)
742 movq %rdx,%rdi
743
744 mulq %rbx
745 addq %rax,%r11
746 movq -8(%rcx),%rax
747 adcq $0,%rdx
748 movq %rdx,%r10
749
750 mulq %rbp
751 addq %rax,%rdi
752 movq (%rsi,%r15,1),%rax
753 adcq $0,%rdx
754 addq %r11,%rdi
755 adcq $0,%rdx
756 movq %rdi,-16(%r14)
757 movq %rdx,%r13
758
759 mulq %rbx
760 addq %rax,%r10
761 movq 0(%rcx),%rax
762 adcq $0,%rdx
763 movq %rdx,%r11
764
765 mulq %rbp
766 addq %rax,%r13
767 movq 8(%rsi,%r15,1),%rax
768 adcq $0,%rdx
769 addq %r10,%r13
770 adcq $0,%rdx
771 movq %r13,-8(%r14)
772 movq %rdx,%rdi
773
774 mulq %rbx
775 addq %rax,%r11
776 movq 8(%rcx),%rax
777 adcq $0,%rdx
778 movq %rdx,%r10
779
780 mulq %rbp
781 addq %rax,%rdi
782 movq 16(%rsi,%r15,1),%rax
783 adcq $0,%rdx
784 addq %r11,%rdi
785 leaq 32(%rcx),%rcx
786 adcq $0,%rdx
787 movq %rdi,(%r14)
788 movq %rdx,%r13
789
790 addq $32,%r15
791 jnz .L1st4x
792
793 mulq %rbx
794 addq %rax,%r10
795 movq -16(%rcx),%rax
796 leaq 32(%r14),%r14
797 adcq $0,%rdx
798 movq %rdx,%r11
799
800 mulq %rbp
801 addq %rax,%r13
802 movq -8(%rsi),%rax
803 adcq $0,%rdx
804 addq %r10,%r13
805 adcq $0,%rdx
806 movq %r13,-24(%r14)
807 movq %rdx,%rdi
808
809 mulq %rbx
810 addq %rax,%r11
811 movq -8(%rcx),%rax
812 adcq $0,%rdx
813 movq %rdx,%r10
814
815 mulq %rbp
816 addq %rax,%rdi
817 movq (%rsi,%r9,1),%rax
818 adcq $0,%rdx
819 addq %r11,%rdi
820 adcq $0,%rdx
821 movq %rdi,-16(%r14)
822 movq %rdx,%r13
823
824 leaq (%rcx,%r9,1),%rcx
825
826 xorq %rdi,%rdi
827 addq %r10,%r13
828 adcq $0,%rdi
829 movq %r13,-8(%r14)
830
831 jmp .Louter4x
832
833.align 32
834.Louter4x:
835 leaq 16+128(%r14),%rdx
836 pxor %xmm4,%xmm4
837 pxor %xmm5,%xmm5
838 movdqa -128(%r12),%xmm0
839 movdqa -112(%r12),%xmm1
840 movdqa -96(%r12),%xmm2
841 movdqa -80(%r12),%xmm3
842 pand -128(%rdx),%xmm0
843 pand -112(%rdx),%xmm1
844 por %xmm0,%xmm4
845 pand -96(%rdx),%xmm2
846 por %xmm1,%xmm5
847 pand -80(%rdx),%xmm3
848 por %xmm2,%xmm4
849 por %xmm3,%xmm5
850 movdqa -64(%r12),%xmm0
851 movdqa -48(%r12),%xmm1
852 movdqa -32(%r12),%xmm2
853 movdqa -16(%r12),%xmm3
854 pand -64(%rdx),%xmm0
855 pand -48(%rdx),%xmm1
856 por %xmm0,%xmm4
857 pand -32(%rdx),%xmm2
858 por %xmm1,%xmm5
859 pand -16(%rdx),%xmm3
860 por %xmm2,%xmm4
861 por %xmm3,%xmm5
862 movdqa 0(%r12),%xmm0
863 movdqa 16(%r12),%xmm1
864 movdqa 32(%r12),%xmm2
865 movdqa 48(%r12),%xmm3
866 pand 0(%rdx),%xmm0
867 pand 16(%rdx),%xmm1
868 por %xmm0,%xmm4
869 pand 32(%rdx),%xmm2
870 por %xmm1,%xmm5
871 pand 48(%rdx),%xmm3
872 por %xmm2,%xmm4
873 por %xmm3,%xmm5
874 movdqa 64(%r12),%xmm0
875 movdqa 80(%r12),%xmm1
876 movdqa 96(%r12),%xmm2
877 movdqa 112(%r12),%xmm3
878 pand 64(%rdx),%xmm0
879 pand 80(%rdx),%xmm1
880 por %xmm0,%xmm4
881 pand 96(%rdx),%xmm2
882 por %xmm1,%xmm5
883 pand 112(%rdx),%xmm3
884 por %xmm2,%xmm4
885 por %xmm3,%xmm5
886 por %xmm5,%xmm4
887 pshufd $0x4e,%xmm4,%xmm0
888 por %xmm4,%xmm0
889 leaq 256(%r12),%r12
890.byte 102,72,15,126,195
891
892 movq (%r14,%r9,1),%r10
893 movq %r8,%rbp
894 mulq %rbx
895 addq %rax,%r10
896 movq (%rcx),%rax
897 adcq $0,%rdx
898
899 imulq %r10,%rbp
900 movq %rdx,%r11
901 movq %rdi,(%r14)
902
903 leaq (%r14,%r9,1),%r14
904
905 mulq %rbp
906 addq %rax,%r10
907 movq 8(%rsi,%r9,1),%rax
908 adcq $0,%rdx
909 movq %rdx,%rdi
910
911 mulq %rbx
912 addq %rax,%r11
913 movq 8(%rcx),%rax
914 adcq $0,%rdx
915 addq 8(%r14),%r11
916 adcq $0,%rdx
917 movq %rdx,%r10
918
919 mulq %rbp
920 addq %rax,%rdi
921 movq 16(%rsi,%r9,1),%rax
922 adcq $0,%rdx
923 addq %r11,%rdi
924 leaq 32(%r9),%r15
925 leaq 32(%rcx),%rcx
926 adcq $0,%rdx
927 movq %rdx,%r13
928 jmp .Linner4x
929
930.align 32
931.Linner4x:
932 mulq %rbx
933 addq %rax,%r10
934 movq -16(%rcx),%rax
935 adcq $0,%rdx
936 addq 16(%r14),%r10
937 leaq 32(%r14),%r14
938 adcq $0,%rdx
939 movq %rdx,%r11
940
941 mulq %rbp
942 addq %rax,%r13
943 movq -8(%rsi,%r15,1),%rax
944 adcq $0,%rdx
945 addq %r10,%r13
946 adcq $0,%rdx
947 movq %rdi,-32(%r14)
948 movq %rdx,%rdi
949
950 mulq %rbx
951 addq %rax,%r11
952 movq -8(%rcx),%rax
953 adcq $0,%rdx
954 addq -8(%r14),%r11
955 adcq $0,%rdx
956 movq %rdx,%r10
957
958 mulq %rbp
959 addq %rax,%rdi
960 movq (%rsi,%r15,1),%rax
961 adcq $0,%rdx
962 addq %r11,%rdi
963 adcq $0,%rdx
964 movq %r13,-24(%r14)
965 movq %rdx,%r13
966
967 mulq %rbx
968 addq %rax,%r10
969 movq 0(%rcx),%rax
970 adcq $0,%rdx
971 addq (%r14),%r10
972 adcq $0,%rdx
973 movq %rdx,%r11
974
975 mulq %rbp
976 addq %rax,%r13
977 movq 8(%rsi,%r15,1),%rax
978 adcq $0,%rdx
979 addq %r10,%r13
980 adcq $0,%rdx
981 movq %rdi,-16(%r14)
982 movq %rdx,%rdi
983
984 mulq %rbx
985 addq %rax,%r11
986 movq 8(%rcx),%rax
987 adcq $0,%rdx
988 addq 8(%r14),%r11
989 adcq $0,%rdx
990 movq %rdx,%r10
991
992 mulq %rbp
993 addq %rax,%rdi
994 movq 16(%rsi,%r15,1),%rax
995 adcq $0,%rdx
996 addq %r11,%rdi
997 leaq 32(%rcx),%rcx
998 adcq $0,%rdx
999 movq %r13,-8(%r14)
1000 movq %rdx,%r13
1001
1002 addq $32,%r15
1003 jnz .Linner4x
1004
1005 mulq %rbx
1006 addq %rax,%r10
1007 movq -16(%rcx),%rax
1008 adcq $0,%rdx
1009 addq 16(%r14),%r10
1010 leaq 32(%r14),%r14
1011 adcq $0,%rdx
1012 movq %rdx,%r11
1013
1014 mulq %rbp
1015 addq %rax,%r13
1016 movq -8(%rsi),%rax
1017 adcq $0,%rdx
1018 addq %r10,%r13
1019 adcq $0,%rdx
1020 movq %rdi,-32(%r14)
1021 movq %rdx,%rdi
1022
1023 mulq %rbx
1024 addq %rax,%r11
1025 movq %rbp,%rax
1026 movq -8(%rcx),%rbp
1027 adcq $0,%rdx
1028 addq -8(%r14),%r11
1029 adcq $0,%rdx
1030 movq %rdx,%r10
1031
1032 mulq %rbp
1033 addq %rax,%rdi
1034 movq (%rsi,%r9,1),%rax
1035 adcq $0,%rdx
1036 addq %r11,%rdi
1037 adcq $0,%rdx
1038 movq %r13,-24(%r14)
1039 movq %rdx,%r13
1040
1041 movq %rdi,-16(%r14)
1042 leaq (%rcx,%r9,1),%rcx
1043
1044 xorq %rdi,%rdi
1045 addq %r10,%r13
1046 adcq $0,%rdi
1047 addq (%r14),%r13
1048 adcq $0,%rdi
1049 movq %r13,-8(%r14)
1050
1051 cmpq 16+8(%rsp),%r12
1052 jb .Louter4x
1053 xorq %rax,%rax
1054 subq %r13,%rbp
1055 adcq %r15,%r15
1056 orq %r15,%rdi
1057 subq %rdi,%rax
1058 leaq (%r14,%r9,1),%rbx
1059 movq (%rcx),%r12
1060 leaq (%rcx),%rbp
1061 movq %r9,%rcx
1062 sarq $3+2,%rcx
1063 movq 56+8(%rsp),%rdi
1064 decq %r12
1065 xorq %r10,%r10
1066 movq 8(%rbp),%r13
1067 movq 16(%rbp),%r14
1068 movq 24(%rbp),%r15
1069 jmp .Lsqr4x_sub_entry
1070.size mul4x_internal,.-mul4x_internal
1071.globl bn_power5
1072.type bn_power5,@function
1073.align 32
1074bn_power5:
1075.cfi_startproc
1076 movq %rsp,%rax
1077.cfi_def_cfa_register %rax
1078 pushq %rbx
1079.cfi_offset %rbx,-16
1080 pushq %rbp
1081.cfi_offset %rbp,-24
1082 pushq %r12
1083.cfi_offset %r12,-32
1084 pushq %r13
1085.cfi_offset %r13,-40
1086 pushq %r14
1087.cfi_offset %r14,-48
1088 pushq %r15
1089.cfi_offset %r15,-56
1090.Lpower5_prologue:
1091
1092 shll $3,%r9d
1093 leal (%r9,%r9,2),%r10d
1094 negq %r9
1095 movq (%r8),%r8
1096
1097
1098
1099
1100
1101
1102
1103
1104 leaq -320(%rsp,%r9,2),%r11
1105 movq %rsp,%rbp
1106 subq %rdi,%r11
1107 andq $4095,%r11
1108 cmpq %r11,%r10
1109 jb .Lpwr_sp_alt
1110 subq %r11,%rbp
1111 leaq -320(%rbp,%r9,2),%rbp
1112 jmp .Lpwr_sp_done
1113
1114.align 32
1115.Lpwr_sp_alt:
1116 leaq 4096-320(,%r9,2),%r10
1117 leaq -320(%rbp,%r9,2),%rbp
1118 subq %r10,%r11
1119 movq $0,%r10
1120 cmovcq %r10,%r11
1121 subq %r11,%rbp
1122.Lpwr_sp_done:
1123 andq $-64,%rbp
1124 movq %rsp,%r11
1125 subq %rbp,%r11
1126 andq $-4096,%r11
1127 leaq (%r11,%rbp,1),%rsp
1128 movq (%rsp),%r10
1129 cmpq %rbp,%rsp
1130 ja .Lpwr_page_walk
1131 jmp .Lpwr_page_walk_done
1132
1133.Lpwr_page_walk:
1134 leaq -4096(%rsp),%rsp
1135 movq (%rsp),%r10
1136 cmpq %rbp,%rsp
1137 ja .Lpwr_page_walk
1138.Lpwr_page_walk_done:
1139
1140 movq %r9,%r10
1141 negq %r9
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152 movq %r8,32(%rsp)
1153 movq %rax,40(%rsp)
1154.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
1155.Lpower5_body:
1156.byte 102,72,15,110,207
1157.byte 102,72,15,110,209
1158.byte 102,73,15,110,218
1159.byte 102,72,15,110,226
1160
1161 call __bn_sqr8x_internal
1162 call __bn_post4x_internal
1163 call __bn_sqr8x_internal
1164 call __bn_post4x_internal
1165 call __bn_sqr8x_internal
1166 call __bn_post4x_internal
1167 call __bn_sqr8x_internal
1168 call __bn_post4x_internal
1169 call __bn_sqr8x_internal
1170 call __bn_post4x_internal
1171
1172.byte 102,72,15,126,209
1173.byte 102,72,15,126,226
1174 movq %rsi,%rdi
1175 movq 40(%rsp),%rax
1176 leaq 32(%rsp),%r8
1177
1178 call mul4x_internal
1179
1180 movq 40(%rsp),%rsi
1181.cfi_def_cfa %rsi,8
1182 movq $1,%rax
1183 movq -48(%rsi),%r15
1184.cfi_restore %r15
1185 movq -40(%rsi),%r14
1186.cfi_restore %r14
1187 movq -32(%rsi),%r13
1188.cfi_restore %r13
1189 movq -24(%rsi),%r12
1190.cfi_restore %r12
1191 movq -16(%rsi),%rbp
1192.cfi_restore %rbp
1193 movq -8(%rsi),%rbx
1194.cfi_restore %rbx
1195 leaq (%rsi),%rsp
1196.cfi_def_cfa_register %rsp
1197.Lpower5_epilogue:
1198 .byte 0xf3,0xc3
1199.cfi_endproc
1200.size bn_power5,.-bn_power5
1201
1202.globl bn_sqr8x_internal
1203.hidden bn_sqr8x_internal
1204.type bn_sqr8x_internal,@function
1205.align 32
1206bn_sqr8x_internal:
1207__bn_sqr8x_internal:
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281 leaq 32(%r10),%rbp
1282 leaq (%rsi,%r9,1),%rsi
1283
1284 movq %r9,%rcx
1285
1286
1287 movq -32(%rsi,%rbp,1),%r14
1288 leaq 48+8(%rsp,%r9,2),%rdi
1289 movq -24(%rsi,%rbp,1),%rax
1290 leaq -32(%rdi,%rbp,1),%rdi
1291 movq -16(%rsi,%rbp,1),%rbx
1292 movq %rax,%r15
1293
1294 mulq %r14
1295 movq %rax,%r10
1296 movq %rbx,%rax
1297 movq %rdx,%r11
1298 movq %r10,-24(%rdi,%rbp,1)
1299
1300 mulq %r14
1301 addq %rax,%r11
1302 movq %rbx,%rax
1303 adcq $0,%rdx
1304 movq %r11,-16(%rdi,%rbp,1)
1305 movq %rdx,%r10
1306
1307
1308 movq -8(%rsi,%rbp,1),%rbx
1309 mulq %r15
1310 movq %rax,%r12
1311 movq %rbx,%rax
1312 movq %rdx,%r13
1313
1314 leaq (%rbp),%rcx
1315 mulq %r14
1316 addq %rax,%r10
1317 movq %rbx,%rax
1318 movq %rdx,%r11
1319 adcq $0,%r11
1320 addq %r12,%r10
1321 adcq $0,%r11
1322 movq %r10,-8(%rdi,%rcx,1)
1323 jmp .Lsqr4x_1st
1324
1325.align 32
1326.Lsqr4x_1st:
1327 movq (%rsi,%rcx,1),%rbx
1328 mulq %r15
1329 addq %rax,%r13
1330 movq %rbx,%rax
1331 movq %rdx,%r12
1332 adcq $0,%r12
1333
1334 mulq %r14
1335 addq %rax,%r11
1336 movq %rbx,%rax
1337 movq 8(%rsi,%rcx,1),%rbx
1338 movq %rdx,%r10
1339 adcq $0,%r10
1340 addq %r13,%r11
1341 adcq $0,%r10
1342
1343
1344 mulq %r15
1345 addq %rax,%r12
1346 movq %rbx,%rax
1347 movq %r11,(%rdi,%rcx,1)
1348 movq %rdx,%r13
1349 adcq $0,%r13
1350
1351 mulq %r14
1352 addq %rax,%r10
1353 movq %rbx,%rax
1354 movq 16(%rsi,%rcx,1),%rbx
1355 movq %rdx,%r11
1356 adcq $0,%r11
1357 addq %r12,%r10
1358 adcq $0,%r11
1359
1360 mulq %r15
1361 addq %rax,%r13
1362 movq %rbx,%rax
1363 movq %r10,8(%rdi,%rcx,1)
1364 movq %rdx,%r12
1365 adcq $0,%r12
1366
1367 mulq %r14
1368 addq %rax,%r11
1369 movq %rbx,%rax
1370 movq 24(%rsi,%rcx,1),%rbx
1371 movq %rdx,%r10
1372 adcq $0,%r10
1373 addq %r13,%r11
1374 adcq $0,%r10
1375
1376
1377 mulq %r15
1378 addq %rax,%r12
1379 movq %rbx,%rax
1380 movq %r11,16(%rdi,%rcx,1)
1381 movq %rdx,%r13
1382 adcq $0,%r13
1383 leaq 32(%rcx),%rcx
1384
1385 mulq %r14
1386 addq %rax,%r10
1387 movq %rbx,%rax
1388 movq %rdx,%r11
1389 adcq $0,%r11
1390 addq %r12,%r10
1391 adcq $0,%r11
1392 movq %r10,-8(%rdi,%rcx,1)
1393
1394 cmpq $0,%rcx
1395 jne .Lsqr4x_1st
1396
1397 mulq %r15
1398 addq %rax,%r13
1399 leaq 16(%rbp),%rbp
1400 adcq $0,%rdx
1401 addq %r11,%r13
1402 adcq $0,%rdx
1403
1404 movq %r13,(%rdi)
1405 movq %rdx,%r12
1406 movq %rdx,8(%rdi)
1407 jmp .Lsqr4x_outer
1408
1409.align 32
1410.Lsqr4x_outer:
1411 movq -32(%rsi,%rbp,1),%r14
1412 leaq 48+8(%rsp,%r9,2),%rdi
1413 movq -24(%rsi,%rbp,1),%rax
1414 leaq -32(%rdi,%rbp,1),%rdi
1415 movq -16(%rsi,%rbp,1),%rbx
1416 movq %rax,%r15
1417
1418 mulq %r14
1419 movq -24(%rdi,%rbp,1),%r10
1420 addq %rax,%r10
1421 movq %rbx,%rax
1422 adcq $0,%rdx
1423 movq %r10,-24(%rdi,%rbp,1)
1424 movq %rdx,%r11
1425
1426 mulq %r14
1427 addq %rax,%r11
1428 movq %rbx,%rax
1429 adcq $0,%rdx
1430 addq -16(%rdi,%rbp,1),%r11
1431 movq %rdx,%r10
1432 adcq $0,%r10
1433 movq %r11,-16(%rdi,%rbp,1)
1434
1435 xorq %r12,%r12
1436
1437 movq -8(%rsi,%rbp,1),%rbx
1438 mulq %r15
1439 addq %rax,%r12
1440 movq %rbx,%rax
1441 adcq $0,%rdx
1442 addq -8(%rdi,%rbp,1),%r12
1443 movq %rdx,%r13
1444 adcq $0,%r13
1445
1446 mulq %r14
1447 addq %rax,%r10
1448 movq %rbx,%rax
1449 adcq $0,%rdx
1450 addq %r12,%r10
1451 movq %rdx,%r11
1452 adcq $0,%r11
1453 movq %r10,-8(%rdi,%rbp,1)
1454
1455 leaq (%rbp),%rcx
1456 jmp .Lsqr4x_inner
1457
1458.align 32
1459.Lsqr4x_inner:
1460 movq (%rsi,%rcx,1),%rbx
1461 mulq %r15
1462 addq %rax,%r13
1463 movq %rbx,%rax
1464 movq %rdx,%r12
1465 adcq $0,%r12
1466 addq (%rdi,%rcx,1),%r13
1467 adcq $0,%r12
1468
1469.byte 0x67
1470 mulq %r14
1471 addq %rax,%r11
1472 movq %rbx,%rax
1473 movq 8(%rsi,%rcx,1),%rbx
1474 movq %rdx,%r10
1475 adcq $0,%r10
1476 addq %r13,%r11
1477 adcq $0,%r10
1478
1479 mulq %r15
1480 addq %rax,%r12
1481 movq %r11,(%rdi,%rcx,1)
1482 movq %rbx,%rax
1483 movq %rdx,%r13
1484 adcq $0,%r13
1485 addq 8(%rdi,%rcx,1),%r12
1486 leaq 16(%rcx),%rcx
1487 adcq $0,%r13
1488
1489 mulq %r14
1490 addq %rax,%r10
1491 movq %rbx,%rax
1492 adcq $0,%rdx
1493 addq %r12,%r10
1494 movq %rdx,%r11
1495 adcq $0,%r11
1496 movq %r10,-8(%rdi,%rcx,1)
1497
1498 cmpq $0,%rcx
1499 jne .Lsqr4x_inner
1500
1501.byte 0x67
1502 mulq %r15
1503 addq %rax,%r13
1504 adcq $0,%rdx
1505 addq %r11,%r13
1506 adcq $0,%rdx
1507
1508 movq %r13,(%rdi)
1509 movq %rdx,%r12
1510 movq %rdx,8(%rdi)
1511
1512 addq $16,%rbp
1513 jnz .Lsqr4x_outer
1514
1515
1516 movq -32(%rsi),%r14
1517 leaq 48+8(%rsp,%r9,2),%rdi
1518 movq -24(%rsi),%rax
1519 leaq -32(%rdi,%rbp,1),%rdi
1520 movq -16(%rsi),%rbx
1521 movq %rax,%r15
1522
1523 mulq %r14
1524 addq %rax,%r10
1525 movq %rbx,%rax
1526 movq %rdx,%r11
1527 adcq $0,%r11
1528
1529 mulq %r14
1530 addq %rax,%r11
1531 movq %rbx,%rax
1532 movq %r10,-24(%rdi)
1533 movq %rdx,%r10
1534 adcq $0,%r10
1535 addq %r13,%r11
1536 movq -8(%rsi),%rbx
1537 adcq $0,%r10
1538
1539 mulq %r15
1540 addq %rax,%r12
1541 movq %rbx,%rax
1542 movq %r11,-16(%rdi)
1543 movq %rdx,%r13
1544 adcq $0,%r13
1545
1546 mulq %r14
1547 addq %rax,%r10
1548 movq %rbx,%rax
1549 movq %rdx,%r11
1550 adcq $0,%r11
1551 addq %r12,%r10
1552 adcq $0,%r11
1553 movq %r10,-8(%rdi)
1554
1555 mulq %r15
1556 addq %rax,%r13
1557 movq -16(%rsi),%rax
1558 adcq $0,%rdx
1559 addq %r11,%r13
1560 adcq $0,%rdx
1561
1562 movq %r13,(%rdi)
1563 movq %rdx,%r12
1564 movq %rdx,8(%rdi)
1565
1566 mulq %rbx
1567 addq $16,%rbp
1568 xorq %r14,%r14
1569 subq %r9,%rbp
1570 xorq %r15,%r15
1571
1572 addq %r12,%rax
1573 adcq $0,%rdx
1574 movq %rax,8(%rdi)
1575 movq %rdx,16(%rdi)
1576 movq %r15,24(%rdi)
1577
1578 movq -16(%rsi,%rbp,1),%rax
1579 leaq 48+8(%rsp),%rdi
1580 xorq %r10,%r10
1581 movq 8(%rdi),%r11
1582
1583 leaq (%r14,%r10,2),%r12
1584 shrq $63,%r10
1585 leaq (%rcx,%r11,2),%r13
1586 shrq $63,%r11
1587 orq %r10,%r13
1588 movq 16(%rdi),%r10
1589 movq %r11,%r14
1590 mulq %rax
1591 negq %r15
1592 movq 24(%rdi),%r11
1593 adcq %rax,%r12
1594 movq -8(%rsi,%rbp,1),%rax
1595 movq %r12,(%rdi)
1596 adcq %rdx,%r13
1597
1598 leaq (%r14,%r10,2),%rbx
1599 movq %r13,8(%rdi)
1600 sbbq %r15,%r15
1601 shrq $63,%r10
1602 leaq (%rcx,%r11,2),%r8
1603 shrq $63,%r11
1604 orq %r10,%r8
1605 movq 32(%rdi),%r10
1606 movq %r11,%r14
1607 mulq %rax
1608 negq %r15
1609 movq 40(%rdi),%r11
1610 adcq %rax,%rbx
1611 movq 0(%rsi,%rbp,1),%rax
1612 movq %rbx,16(%rdi)
1613 adcq %rdx,%r8
1614 leaq 16(%rbp),%rbp
1615 movq %r8,24(%rdi)
1616 sbbq %r15,%r15
1617 leaq 64(%rdi),%rdi
1618 jmp .Lsqr4x_shift_n_add
1619
1620.align 32
1621.Lsqr4x_shift_n_add:
1622 leaq (%r14,%r10,2),%r12
1623 shrq $63,%r10
1624 leaq (%rcx,%r11,2),%r13
1625 shrq $63,%r11
1626 orq %r10,%r13
1627 movq -16(%rdi),%r10
1628 movq %r11,%r14
1629 mulq %rax
1630 negq %r15
1631 movq -8(%rdi),%r11
1632 adcq %rax,%r12
1633 movq -8(%rsi,%rbp,1),%rax
1634 movq %r12,-32(%rdi)
1635 adcq %rdx,%r13
1636
1637 leaq (%r14,%r10,2),%rbx
1638 movq %r13,-24(%rdi)
1639 sbbq %r15,%r15
1640 shrq $63,%r10
1641 leaq (%rcx,%r11,2),%r8
1642 shrq $63,%r11
1643 orq %r10,%r8
1644 movq 0(%rdi),%r10
1645 movq %r11,%r14
1646 mulq %rax
1647 negq %r15
1648 movq 8(%rdi),%r11
1649 adcq %rax,%rbx
1650 movq 0(%rsi,%rbp,1),%rax
1651 movq %rbx,-16(%rdi)
1652 adcq %rdx,%r8
1653
1654 leaq (%r14,%r10,2),%r12
1655 movq %r8,-8(%rdi)
1656 sbbq %r15,%r15
1657 shrq $63,%r10
1658 leaq (%rcx,%r11,2),%r13
1659 shrq $63,%r11
1660 orq %r10,%r13
1661 movq 16(%rdi),%r10
1662 movq %r11,%r14
1663 mulq %rax
1664 negq %r15
1665 movq 24(%rdi),%r11
1666 adcq %rax,%r12
1667 movq 8(%rsi,%rbp,1),%rax
1668 movq %r12,0(%rdi)
1669 adcq %rdx,%r13
1670
1671 leaq (%r14,%r10,2),%rbx
1672 movq %r13,8(%rdi)
1673 sbbq %r15,%r15
1674 shrq $63,%r10
1675 leaq (%rcx,%r11,2),%r8
1676 shrq $63,%r11
1677 orq %r10,%r8
1678 movq 32(%rdi),%r10
1679 movq %r11,%r14
1680 mulq %rax
1681 negq %r15
1682 movq 40(%rdi),%r11
1683 adcq %rax,%rbx
1684 movq 16(%rsi,%rbp,1),%rax
1685 movq %rbx,16(%rdi)
1686 adcq %rdx,%r8
1687 movq %r8,24(%rdi)
1688 sbbq %r15,%r15
1689 leaq 64(%rdi),%rdi
1690 addq $32,%rbp
1691 jnz .Lsqr4x_shift_n_add
1692
1693 leaq (%r14,%r10,2),%r12
1694.byte 0x67
1695 shrq $63,%r10
1696 leaq (%rcx,%r11,2),%r13
1697 shrq $63,%r11
1698 orq %r10,%r13
1699 movq -16(%rdi),%r10
1700 movq %r11,%r14
1701 mulq %rax
1702 negq %r15
1703 movq -8(%rdi),%r11
1704 adcq %rax,%r12
1705 movq -8(%rsi),%rax
1706 movq %r12,-32(%rdi)
1707 adcq %rdx,%r13
1708
1709 leaq (%r14,%r10,2),%rbx
1710 movq %r13,-24(%rdi)
1711 sbbq %r15,%r15
1712 shrq $63,%r10
1713 leaq (%rcx,%r11,2),%r8
1714 shrq $63,%r11
1715 orq %r10,%r8
1716 mulq %rax
1717 negq %r15
1718 adcq %rax,%rbx
1719 adcq %rdx,%r8
1720 movq %rbx,-16(%rdi)
1721 movq %r8,-8(%rdi)
1722.byte 102,72,15,126,213
1723__bn_sqr8x_reduction:
1724 xorq %rax,%rax
1725 leaq (%r9,%rbp,1),%rcx
1726 leaq 48+8(%rsp,%r9,2),%rdx
1727 movq %rcx,0+8(%rsp)
1728 leaq 48+8(%rsp,%r9,1),%rdi
1729 movq %rdx,8+8(%rsp)
1730 negq %r9
1731 jmp .L8x_reduction_loop
1732
1733.align 32
1734.L8x_reduction_loop:
1735 leaq (%rdi,%r9,1),%rdi
1736.byte 0x66
1737 movq 0(%rdi),%rbx
1738 movq 8(%rdi),%r9
1739 movq 16(%rdi),%r10
1740 movq 24(%rdi),%r11
1741 movq 32(%rdi),%r12
1742 movq 40(%rdi),%r13
1743 movq 48(%rdi),%r14
1744 movq 56(%rdi),%r15
1745 movq %rax,(%rdx)
1746 leaq 64(%rdi),%rdi
1747
1748.byte 0x67
1749 movq %rbx,%r8
1750 imulq 32+8(%rsp),%rbx
1751 movq 0(%rbp),%rax
1752 movl $8,%ecx
1753 jmp .L8x_reduce
1754
1755.align 32
1756.L8x_reduce:
1757 mulq %rbx
1758 movq 8(%rbp),%rax
1759 negq %r8
1760 movq %rdx,%r8
1761 adcq $0,%r8
1762
1763 mulq %rbx
1764 addq %rax,%r9
1765 movq 16(%rbp),%rax
1766 adcq $0,%rdx
1767 addq %r9,%r8
1768 movq %rbx,48-8+8(%rsp,%rcx,8)
1769 movq %rdx,%r9
1770 adcq $0,%r9
1771
1772 mulq %rbx
1773 addq %rax,%r10
1774 movq 24(%rbp),%rax
1775 adcq $0,%rdx
1776 addq %r10,%r9
1777 movq 32+8(%rsp),%rsi
1778 movq %rdx,%r10
1779 adcq $0,%r10
1780
1781 mulq %rbx
1782 addq %rax,%r11
1783 movq 32(%rbp),%rax
1784 adcq $0,%rdx
1785 imulq %r8,%rsi
1786 addq %r11,%r10
1787 movq %rdx,%r11
1788 adcq $0,%r11
1789
1790 mulq %rbx
1791 addq %rax,%r12
1792 movq 40(%rbp),%rax
1793 adcq $0,%rdx
1794 addq %r12,%r11
1795 movq %rdx,%r12
1796 adcq $0,%r12
1797
1798 mulq %rbx
1799 addq %rax,%r13
1800 movq 48(%rbp),%rax
1801 adcq $0,%rdx
1802 addq %r13,%r12
1803 movq %rdx,%r13
1804 adcq $0,%r13
1805
1806 mulq %rbx
1807 addq %rax,%r14
1808 movq 56(%rbp),%rax
1809 adcq $0,%rdx
1810 addq %r14,%r13
1811 movq %rdx,%r14
1812 adcq $0,%r14
1813
1814 mulq %rbx
1815 movq %rsi,%rbx
1816 addq %rax,%r15
1817 movq 0(%rbp),%rax
1818 adcq $0,%rdx
1819 addq %r15,%r14
1820 movq %rdx,%r15
1821 adcq $0,%r15
1822
1823 decl %ecx
1824 jnz .L8x_reduce
1825
1826 leaq 64(%rbp),%rbp
1827 xorq %rax,%rax
1828 movq 8+8(%rsp),%rdx
1829 cmpq 0+8(%rsp),%rbp
1830 jae .L8x_no_tail
1831
1832.byte 0x66
1833 addq 0(%rdi),%r8
1834 adcq 8(%rdi),%r9
1835 adcq 16(%rdi),%r10
1836 adcq 24(%rdi),%r11
1837 adcq 32(%rdi),%r12
1838 adcq 40(%rdi),%r13
1839 adcq 48(%rdi),%r14
1840 adcq 56(%rdi),%r15
1841 sbbq %rsi,%rsi
1842
1843 movq 48+56+8(%rsp),%rbx
1844 movl $8,%ecx
1845 movq 0(%rbp),%rax
1846 jmp .L8x_tail
1847
1848.align 32
1849.L8x_tail:
1850 mulq %rbx
1851 addq %rax,%r8
1852 movq 8(%rbp),%rax
1853 movq %r8,(%rdi)
1854 movq %rdx,%r8
1855 adcq $0,%r8
1856
1857 mulq %rbx
1858 addq %rax,%r9
1859 movq 16(%rbp),%rax
1860 adcq $0,%rdx
1861 addq %r9,%r8
1862 leaq 8(%rdi),%rdi
1863 movq %rdx,%r9
1864 adcq $0,%r9
1865
1866 mulq %rbx
1867 addq %rax,%r10
1868 movq 24(%rbp),%rax
1869 adcq $0,%rdx
1870 addq %r10,%r9
1871 movq %rdx,%r10
1872 adcq $0,%r10
1873
1874 mulq %rbx
1875 addq %rax,%r11
1876 movq 32(%rbp),%rax
1877 adcq $0,%rdx
1878 addq %r11,%r10
1879 movq %rdx,%r11
1880 adcq $0,%r11
1881
1882 mulq %rbx
1883 addq %rax,%r12
1884 movq 40(%rbp),%rax
1885 adcq $0,%rdx
1886 addq %r12,%r11
1887 movq %rdx,%r12
1888 adcq $0,%r12
1889
1890 mulq %rbx
1891 addq %rax,%r13
1892 movq 48(%rbp),%rax
1893 adcq $0,%rdx
1894 addq %r13,%r12
1895 movq %rdx,%r13
1896 adcq $0,%r13
1897
1898 mulq %rbx
1899 addq %rax,%r14
1900 movq 56(%rbp),%rax
1901 adcq $0,%rdx
1902 addq %r14,%r13
1903 movq %rdx,%r14
1904 adcq $0,%r14
1905
1906 mulq %rbx
1907 movq 48-16+8(%rsp,%rcx,8),%rbx
1908 addq %rax,%r15
1909 adcq $0,%rdx
1910 addq %r15,%r14
1911 movq 0(%rbp),%rax
1912 movq %rdx,%r15
1913 adcq $0,%r15
1914
1915 decl %ecx
1916 jnz .L8x_tail
1917
1918 leaq 64(%rbp),%rbp
1919 movq 8+8(%rsp),%rdx
1920 cmpq 0+8(%rsp),%rbp
1921 jae .L8x_tail_done
1922
1923 movq 48+56+8(%rsp),%rbx
1924 negq %rsi
1925 movq 0(%rbp),%rax
1926 adcq 0(%rdi),%r8
1927 adcq 8(%rdi),%r9
1928 adcq 16(%rdi),%r10
1929 adcq 24(%rdi),%r11
1930 adcq 32(%rdi),%r12
1931 adcq 40(%rdi),%r13
1932 adcq 48(%rdi),%r14
1933 adcq 56(%rdi),%r15
1934 sbbq %rsi,%rsi
1935
1936 movl $8,%ecx
1937 jmp .L8x_tail
1938
1939.align 32
1940.L8x_tail_done:
1941 xorq %rax,%rax
1942 addq (%rdx),%r8
1943 adcq $0,%r9
1944 adcq $0,%r10
1945 adcq $0,%r11
1946 adcq $0,%r12
1947 adcq $0,%r13
1948 adcq $0,%r14
1949 adcq $0,%r15
1950 adcq $0,%rax
1951
1952 negq %rsi
1953.L8x_no_tail:
1954 adcq 0(%rdi),%r8
1955 adcq 8(%rdi),%r9
1956 adcq 16(%rdi),%r10
1957 adcq 24(%rdi),%r11
1958 adcq 32(%rdi),%r12
1959 adcq 40(%rdi),%r13
1960 adcq 48(%rdi),%r14
1961 adcq 56(%rdi),%r15
1962 adcq $0,%rax
1963 movq -8(%rbp),%rcx
1964 xorq %rsi,%rsi
1965
1966.byte 102,72,15,126,213
1967
1968 movq %r8,0(%rdi)
1969 movq %r9,8(%rdi)
1970.byte 102,73,15,126,217
1971 movq %r10,16(%rdi)
1972 movq %r11,24(%rdi)
1973 movq %r12,32(%rdi)
1974 movq %r13,40(%rdi)
1975 movq %r14,48(%rdi)
1976 movq %r15,56(%rdi)
1977 leaq 64(%rdi),%rdi
1978
1979 cmpq %rdx,%rdi
1980 jb .L8x_reduction_loop
1981 .byte 0xf3,0xc3
1982.size bn_sqr8x_internal,.-bn_sqr8x_internal
1983.type __bn_post4x_internal,@function
1984.align 32
1985__bn_post4x_internal:
1986 movq 0(%rbp),%r12
1987 leaq (%rdi,%r9,1),%rbx
1988 movq %r9,%rcx
1989.byte 102,72,15,126,207
1990 negq %rax
1991.byte 102,72,15,126,206
1992 sarq $3+2,%rcx
1993 decq %r12
1994 xorq %r10,%r10
1995 movq 8(%rbp),%r13
1996 movq 16(%rbp),%r14
1997 movq 24(%rbp),%r15
1998 jmp .Lsqr4x_sub_entry
1999
2000.align 16
2001.Lsqr4x_sub:
2002 movq 0(%rbp),%r12
2003 movq 8(%rbp),%r13
2004 movq 16(%rbp),%r14
2005 movq 24(%rbp),%r15
2006.Lsqr4x_sub_entry:
2007 leaq 32(%rbp),%rbp
2008 notq %r12
2009 notq %r13
2010 notq %r14
2011 notq %r15
2012 andq %rax,%r12
2013 andq %rax,%r13
2014 andq %rax,%r14
2015 andq %rax,%r15
2016
2017 negq %r10
2018 adcq 0(%rbx),%r12
2019 adcq 8(%rbx),%r13
2020 adcq 16(%rbx),%r14
2021 adcq 24(%rbx),%r15
2022 movq %r12,0(%rdi)
2023 leaq 32(%rbx),%rbx
2024 movq %r13,8(%rdi)
2025 sbbq %r10,%r10
2026 movq %r14,16(%rdi)
2027 movq %r15,24(%rdi)
2028 leaq 32(%rdi),%rdi
2029
2030 incq %rcx
2031 jnz .Lsqr4x_sub
2032
2033 movq %r9,%r10
2034 negq %r9
2035 .byte 0xf3,0xc3
2036.size __bn_post4x_internal,.-__bn_post4x_internal
2037.globl bn_from_montgomery
2038.type bn_from_montgomery,@function
2039.align 32
2040bn_from_montgomery:
2041 testl $7,%r9d
2042 jz bn_from_mont8x
2043 xorl %eax,%eax
2044 .byte 0xf3,0xc3
2045.size bn_from_montgomery,.-bn_from_montgomery
2046
2047.type bn_from_mont8x,@function
2048.align 32
2049bn_from_mont8x:
2050.cfi_startproc
2051.byte 0x67
2052 movq %rsp,%rax
2053.cfi_def_cfa_register %rax
2054 pushq %rbx
2055.cfi_offset %rbx,-16
2056 pushq %rbp
2057.cfi_offset %rbp,-24
2058 pushq %r12
2059.cfi_offset %r12,-32
2060 pushq %r13
2061.cfi_offset %r13,-40
2062 pushq %r14
2063.cfi_offset %r14,-48
2064 pushq %r15
2065.cfi_offset %r15,-56
2066.Lfrom_prologue:
2067
2068 shll $3,%r9d
2069 leaq (%r9,%r9,2),%r10
2070 negq %r9
2071 movq (%r8),%r8
2072
2073
2074
2075
2076
2077
2078
2079
2080 leaq -320(%rsp,%r9,2),%r11
2081 movq %rsp,%rbp
2082 subq %rdi,%r11
2083 andq $4095,%r11
2084 cmpq %r11,%r10
2085 jb .Lfrom_sp_alt
2086 subq %r11,%rbp
2087 leaq -320(%rbp,%r9,2),%rbp
2088 jmp .Lfrom_sp_done
2089
2090.align 32
2091.Lfrom_sp_alt:
2092 leaq 4096-320(,%r9,2),%r10
2093 leaq -320(%rbp,%r9,2),%rbp
2094 subq %r10,%r11
2095 movq $0,%r10
2096 cmovcq %r10,%r11
2097 subq %r11,%rbp
2098.Lfrom_sp_done:
2099 andq $-64,%rbp
2100 movq %rsp,%r11
2101 subq %rbp,%r11
2102 andq $-4096,%r11
2103 leaq (%r11,%rbp,1),%rsp
2104 movq (%rsp),%r10
2105 cmpq %rbp,%rsp
2106 ja .Lfrom_page_walk
2107 jmp .Lfrom_page_walk_done
2108
2109.Lfrom_page_walk:
2110 leaq -4096(%rsp),%rsp
2111 movq (%rsp),%r10
2112 cmpq %rbp,%rsp
2113 ja .Lfrom_page_walk
2114.Lfrom_page_walk_done:
2115
2116 movq %r9,%r10
2117 negq %r9
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128 movq %r8,32(%rsp)
2129 movq %rax,40(%rsp)
2130.cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2131.Lfrom_body:
2132 movq %r9,%r11
2133 leaq 48(%rsp),%rax
2134 pxor %xmm0,%xmm0
2135 jmp .Lmul_by_1
2136
2137.align 32
2138.Lmul_by_1:
2139 movdqu (%rsi),%xmm1
2140 movdqu 16(%rsi),%xmm2
2141 movdqu 32(%rsi),%xmm3
2142 movdqa %xmm0,(%rax,%r9,1)
2143 movdqu 48(%rsi),%xmm4
2144 movdqa %xmm0,16(%rax,%r9,1)
2145.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
2146 movdqa %xmm1,(%rax)
2147 movdqa %xmm0,32(%rax,%r9,1)
2148 movdqa %xmm2,16(%rax)
2149 movdqa %xmm0,48(%rax,%r9,1)
2150 movdqa %xmm3,32(%rax)
2151 movdqa %xmm4,48(%rax)
2152 leaq 64(%rax),%rax
2153 subq $64,%r11
2154 jnz .Lmul_by_1
2155
2156.byte 102,72,15,110,207
2157.byte 102,72,15,110,209
2158.byte 0x67
2159 movq %rcx,%rbp
2160.byte 102,73,15,110,218
2161 call __bn_sqr8x_reduction
2162 call __bn_post4x_internal
2163
2164 pxor %xmm0,%xmm0
2165 leaq 48(%rsp),%rax
2166 jmp .Lfrom_mont_zero
2167
2168.align 32
2169.Lfrom_mont_zero:
2170 movq 40(%rsp),%rsi
2171.cfi_def_cfa %rsi,8
2172 movdqa %xmm0,0(%rax)
2173 movdqa %xmm0,16(%rax)
2174 movdqa %xmm0,32(%rax)
2175 movdqa %xmm0,48(%rax)
2176 leaq 64(%rax),%rax
2177 subq $32,%r9
2178 jnz .Lfrom_mont_zero
2179
2180 movq $1,%rax
2181 movq -48(%rsi),%r15
2182.cfi_restore %r15
2183 movq -40(%rsi),%r14
2184.cfi_restore %r14
2185 movq -32(%rsi),%r13
2186.cfi_restore %r13
2187 movq -24(%rsi),%r12
2188.cfi_restore %r12
2189 movq -16(%rsi),%rbp
2190.cfi_restore %rbp
2191 movq -8(%rsi),%rbx
2192.cfi_restore %rbx
2193 leaq (%rsi),%rsp
2194.cfi_def_cfa_register %rsp
2195.Lfrom_epilogue:
2196 .byte 0xf3,0xc3
2197.cfi_endproc
2198.size bn_from_mont8x,.-bn_from_mont8x
2199.globl bn_get_bits5
2200.type bn_get_bits5,@function
2201.align 16
2202bn_get_bits5:
2203 leaq 0(%rdi),%r10
2204 leaq 1(%rdi),%r11
2205 movl %esi,%ecx
2206 shrl $4,%esi
2207 andl $15,%ecx
2208 leal -8(%rcx),%eax
2209 cmpl $11,%ecx
2210 cmovaq %r11,%r10
2211 cmoval %eax,%ecx
2212 movzwl (%r10,%rsi,2),%eax
2213 shrl %cl,%eax
2214 andl $31,%eax
2215 .byte 0xf3,0xc3
2216.size bn_get_bits5,.-bn_get_bits5
2217
2218.globl bn_scatter5
2219.type bn_scatter5,@function
2220.align 16
2221bn_scatter5:
2222 cmpl $0,%esi
2223 jz .Lscatter_epilogue
2224 leaq (%rdx,%rcx,8),%rdx
2225.Lscatter:
2226 movq (%rdi),%rax
2227 leaq 8(%rdi),%rdi
2228 movq %rax,(%rdx)
2229 leaq 256(%rdx),%rdx
2230 subl $1,%esi
2231 jnz .Lscatter
2232.Lscatter_epilogue:
2233 .byte 0xf3,0xc3
2234.size bn_scatter5,.-bn_scatter5
2235
2236.globl bn_gather5
2237.type bn_gather5,@function
2238.align 32
2239bn_gather5:
2240.LSEH_begin_bn_gather5:
2241
2242.byte 0x4c,0x8d,0x14,0x24
2243.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
2244 leaq .Linc(%rip),%rax
2245 andq $-16,%rsp
2246
2247 movd %ecx,%xmm5
2248 movdqa 0(%rax),%xmm0
2249 movdqa 16(%rax),%xmm1
2250 leaq 128(%rdx),%r11
2251 leaq 128(%rsp),%rax
2252
2253 pshufd $0,%xmm5,%xmm5
2254 movdqa %xmm1,%xmm4
2255 movdqa %xmm1,%xmm2
2256 paddd %xmm0,%xmm1
2257 pcmpeqd %xmm5,%xmm0
2258 movdqa %xmm4,%xmm3
2259
2260 paddd %xmm1,%xmm2
2261 pcmpeqd %xmm5,%xmm1
2262 movdqa %xmm0,-128(%rax)
2263 movdqa %xmm4,%xmm0
2264
2265 paddd %xmm2,%xmm3
2266 pcmpeqd %xmm5,%xmm2
2267 movdqa %xmm1,-112(%rax)
2268 movdqa %xmm4,%xmm1
2269
2270 paddd %xmm3,%xmm0
2271 pcmpeqd %xmm5,%xmm3
2272 movdqa %xmm2,-96(%rax)
2273 movdqa %xmm4,%xmm2
2274 paddd %xmm0,%xmm1
2275 pcmpeqd %xmm5,%xmm0
2276 movdqa %xmm3,-80(%rax)
2277 movdqa %xmm4,%xmm3
2278
2279 paddd %xmm1,%xmm2
2280 pcmpeqd %xmm5,%xmm1
2281 movdqa %xmm0,-64(%rax)
2282 movdqa %xmm4,%xmm0
2283
2284 paddd %xmm2,%xmm3
2285 pcmpeqd %xmm5,%xmm2
2286 movdqa %xmm1,-48(%rax)
2287 movdqa %xmm4,%xmm1
2288
2289 paddd %xmm3,%xmm0
2290 pcmpeqd %xmm5,%xmm3
2291 movdqa %xmm2,-32(%rax)
2292 movdqa %xmm4,%xmm2
2293 paddd %xmm0,%xmm1
2294 pcmpeqd %xmm5,%xmm0
2295 movdqa %xmm3,-16(%rax)
2296 movdqa %xmm4,%xmm3
2297
2298 paddd %xmm1,%xmm2
2299 pcmpeqd %xmm5,%xmm1
2300 movdqa %xmm0,0(%rax)
2301 movdqa %xmm4,%xmm0
2302
2303 paddd %xmm2,%xmm3
2304 pcmpeqd %xmm5,%xmm2
2305 movdqa %xmm1,16(%rax)
2306 movdqa %xmm4,%xmm1
2307
2308 paddd %xmm3,%xmm0
2309 pcmpeqd %xmm5,%xmm3
2310 movdqa %xmm2,32(%rax)
2311 movdqa %xmm4,%xmm2
2312 paddd %xmm0,%xmm1
2313 pcmpeqd %xmm5,%xmm0
2314 movdqa %xmm3,48(%rax)
2315 movdqa %xmm4,%xmm3
2316
2317 paddd %xmm1,%xmm2
2318 pcmpeqd %xmm5,%xmm1
2319 movdqa %xmm0,64(%rax)
2320 movdqa %xmm4,%xmm0
2321
2322 paddd %xmm2,%xmm3
2323 pcmpeqd %xmm5,%xmm2
2324 movdqa %xmm1,80(%rax)
2325 movdqa %xmm4,%xmm1
2326
2327 paddd %xmm3,%xmm0
2328 pcmpeqd %xmm5,%xmm3
2329 movdqa %xmm2,96(%rax)
2330 movdqa %xmm4,%xmm2
2331 movdqa %xmm3,112(%rax)
2332 jmp .Lgather
2333
2334.align 32
2335.Lgather:
2336 pxor %xmm4,%xmm4
2337 pxor %xmm5,%xmm5
2338 movdqa -128(%r11),%xmm0
2339 movdqa -112(%r11),%xmm1
2340 movdqa -96(%r11),%xmm2
2341 pand -128(%rax),%xmm0
2342 movdqa -80(%r11),%xmm3
2343 pand -112(%rax),%xmm1
2344 por %xmm0,%xmm4
2345 pand -96(%rax),%xmm2
2346 por %xmm1,%xmm5
2347 pand -80(%rax),%xmm3
2348 por %xmm2,%xmm4
2349 por %xmm3,%xmm5
2350 movdqa -64(%r11),%xmm0
2351 movdqa -48(%r11),%xmm1
2352 movdqa -32(%r11),%xmm2
2353 pand -64(%rax),%xmm0
2354 movdqa -16(%r11),%xmm3
2355 pand -48(%rax),%xmm1
2356 por %xmm0,%xmm4
2357 pand -32(%rax),%xmm2
2358 por %xmm1,%xmm5
2359 pand -16(%rax),%xmm3
2360 por %xmm2,%xmm4
2361 por %xmm3,%xmm5
2362 movdqa 0(%r11),%xmm0
2363 movdqa 16(%r11),%xmm1
2364 movdqa 32(%r11),%xmm2
2365 pand 0(%rax),%xmm0
2366 movdqa 48(%r11),%xmm3
2367 pand 16(%rax),%xmm1
2368 por %xmm0,%xmm4
2369 pand 32(%rax),%xmm2
2370 por %xmm1,%xmm5
2371 pand 48(%rax),%xmm3
2372 por %xmm2,%xmm4
2373 por %xmm3,%xmm5
2374 movdqa 64(%r11),%xmm0
2375 movdqa 80(%r11),%xmm1
2376 movdqa 96(%r11),%xmm2
2377 pand 64(%rax),%xmm0
2378 movdqa 112(%r11),%xmm3
2379 pand 80(%rax),%xmm1
2380 por %xmm0,%xmm4
2381 pand 96(%rax),%xmm2
2382 por %xmm1,%xmm5
2383 pand 112(%rax),%xmm3
2384 por %xmm2,%xmm4
2385 por %xmm3,%xmm5
2386 por %xmm5,%xmm4
2387 leaq 256(%r11),%r11
2388 pshufd $0x4e,%xmm4,%xmm0
2389 por %xmm4,%xmm0
2390 movq %xmm0,(%rdi)
2391 leaq 8(%rdi),%rdi
2392 subl $1,%esi
2393 jnz .Lgather
2394
2395 leaq (%r10),%rsp
2396 .byte 0xf3,0xc3
2397.LSEH_end_bn_gather5:
2398.size bn_gather5,.-bn_gather5
2399.align 64
2400.Linc:
2401.long 0,0, 1,1
2402.long 2,2, 2,2
2403.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette