VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.1/crypto/genasm-elf/aesni-x86_64.S@ 94083

Last change on this file since 94083 was 94083, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Recreate asm files, bugref:10128

File size: 84.6 KB
Line 
1.text
2
3.globl aesni_encrypt
4.type aesni_encrypt,@function
5.align 16
6aesni_encrypt:
7.cfi_startproc
8.byte 243,15,30,250
9 movups (%rdi),%xmm2
10 movl 240(%rdx),%eax
11 movups (%rdx),%xmm0
12 movups 16(%rdx),%xmm1
13 leaq 32(%rdx),%rdx
14 xorps %xmm0,%xmm2
15.Loop_enc1_1:
16.byte 102,15,56,220,209
17 decl %eax
18 movups (%rdx),%xmm1
19 leaq 16(%rdx),%rdx
20 jnz .Loop_enc1_1
21.byte 102,15,56,221,209
22 pxor %xmm0,%xmm0
23 pxor %xmm1,%xmm1
24 movups %xmm2,(%rsi)
25 pxor %xmm2,%xmm2
26 .byte 0xf3,0xc3
27.cfi_endproc
28.size aesni_encrypt,.-aesni_encrypt
29
30.globl aesni_decrypt
31.type aesni_decrypt,@function
32.align 16
33aesni_decrypt:
34.cfi_startproc
35.byte 243,15,30,250
36 movups (%rdi),%xmm2
37 movl 240(%rdx),%eax
38 movups (%rdx),%xmm0
39 movups 16(%rdx),%xmm1
40 leaq 32(%rdx),%rdx
41 xorps %xmm0,%xmm2
42.Loop_dec1_2:
43.byte 102,15,56,222,209
44 decl %eax
45 movups (%rdx),%xmm1
46 leaq 16(%rdx),%rdx
47 jnz .Loop_dec1_2
48.byte 102,15,56,223,209
49 pxor %xmm0,%xmm0
50 pxor %xmm1,%xmm1
51 movups %xmm2,(%rsi)
52 pxor %xmm2,%xmm2
53 .byte 0xf3,0xc3
54.cfi_endproc
55.size aesni_decrypt, .-aesni_decrypt
56.type _aesni_encrypt2,@function
57.align 16
58_aesni_encrypt2:
59.cfi_startproc
60 movups (%rcx),%xmm0
61 shll $4,%eax
62 movups 16(%rcx),%xmm1
63 xorps %xmm0,%xmm2
64 xorps %xmm0,%xmm3
65 movups 32(%rcx),%xmm0
66 leaq 32(%rcx,%rax,1),%rcx
67 negq %rax
68 addq $16,%rax
69
70.Lenc_loop2:
71.byte 102,15,56,220,209
72.byte 102,15,56,220,217
73 movups (%rcx,%rax,1),%xmm1
74 addq $32,%rax
75.byte 102,15,56,220,208
76.byte 102,15,56,220,216
77 movups -16(%rcx,%rax,1),%xmm0
78 jnz .Lenc_loop2
79
80.byte 102,15,56,220,209
81.byte 102,15,56,220,217
82.byte 102,15,56,221,208
83.byte 102,15,56,221,216
84 .byte 0xf3,0xc3
85.cfi_endproc
86.size _aesni_encrypt2,.-_aesni_encrypt2
87.type _aesni_decrypt2,@function
88.align 16
89_aesni_decrypt2:
90.cfi_startproc
91 movups (%rcx),%xmm0
92 shll $4,%eax
93 movups 16(%rcx),%xmm1
94 xorps %xmm0,%xmm2
95 xorps %xmm0,%xmm3
96 movups 32(%rcx),%xmm0
97 leaq 32(%rcx,%rax,1),%rcx
98 negq %rax
99 addq $16,%rax
100
101.Ldec_loop2:
102.byte 102,15,56,222,209
103.byte 102,15,56,222,217
104 movups (%rcx,%rax,1),%xmm1
105 addq $32,%rax
106.byte 102,15,56,222,208
107.byte 102,15,56,222,216
108 movups -16(%rcx,%rax,1),%xmm0
109 jnz .Ldec_loop2
110
111.byte 102,15,56,222,209
112.byte 102,15,56,222,217
113.byte 102,15,56,223,208
114.byte 102,15,56,223,216
115 .byte 0xf3,0xc3
116.cfi_endproc
117.size _aesni_decrypt2,.-_aesni_decrypt2
118.type _aesni_encrypt3,@function
119.align 16
120_aesni_encrypt3:
121.cfi_startproc
122 movups (%rcx),%xmm0
123 shll $4,%eax
124 movups 16(%rcx),%xmm1
125 xorps %xmm0,%xmm2
126 xorps %xmm0,%xmm3
127 xorps %xmm0,%xmm4
128 movups 32(%rcx),%xmm0
129 leaq 32(%rcx,%rax,1),%rcx
130 negq %rax
131 addq $16,%rax
132
133.Lenc_loop3:
134.byte 102,15,56,220,209
135.byte 102,15,56,220,217
136.byte 102,15,56,220,225
137 movups (%rcx,%rax,1),%xmm1
138 addq $32,%rax
139.byte 102,15,56,220,208
140.byte 102,15,56,220,216
141.byte 102,15,56,220,224
142 movups -16(%rcx,%rax,1),%xmm0
143 jnz .Lenc_loop3
144
145.byte 102,15,56,220,209
146.byte 102,15,56,220,217
147.byte 102,15,56,220,225
148.byte 102,15,56,221,208
149.byte 102,15,56,221,216
150.byte 102,15,56,221,224
151 .byte 0xf3,0xc3
152.cfi_endproc
153.size _aesni_encrypt3,.-_aesni_encrypt3
154.type _aesni_decrypt3,@function
155.align 16
156_aesni_decrypt3:
157.cfi_startproc
158 movups (%rcx),%xmm0
159 shll $4,%eax
160 movups 16(%rcx),%xmm1
161 xorps %xmm0,%xmm2
162 xorps %xmm0,%xmm3
163 xorps %xmm0,%xmm4
164 movups 32(%rcx),%xmm0
165 leaq 32(%rcx,%rax,1),%rcx
166 negq %rax
167 addq $16,%rax
168
169.Ldec_loop3:
170.byte 102,15,56,222,209
171.byte 102,15,56,222,217
172.byte 102,15,56,222,225
173 movups (%rcx,%rax,1),%xmm1
174 addq $32,%rax
175.byte 102,15,56,222,208
176.byte 102,15,56,222,216
177.byte 102,15,56,222,224
178 movups -16(%rcx,%rax,1),%xmm0
179 jnz .Ldec_loop3
180
181.byte 102,15,56,222,209
182.byte 102,15,56,222,217
183.byte 102,15,56,222,225
184.byte 102,15,56,223,208
185.byte 102,15,56,223,216
186.byte 102,15,56,223,224
187 .byte 0xf3,0xc3
188.cfi_endproc
189.size _aesni_decrypt3,.-_aesni_decrypt3
190.type _aesni_encrypt4,@function
191.align 16
192_aesni_encrypt4:
193.cfi_startproc
194 movups (%rcx),%xmm0
195 shll $4,%eax
196 movups 16(%rcx),%xmm1
197 xorps %xmm0,%xmm2
198 xorps %xmm0,%xmm3
199 xorps %xmm0,%xmm4
200 xorps %xmm0,%xmm5
201 movups 32(%rcx),%xmm0
202 leaq 32(%rcx,%rax,1),%rcx
203 negq %rax
204.byte 0x0f,0x1f,0x00
205 addq $16,%rax
206
207.Lenc_loop4:
208.byte 102,15,56,220,209
209.byte 102,15,56,220,217
210.byte 102,15,56,220,225
211.byte 102,15,56,220,233
212 movups (%rcx,%rax,1),%xmm1
213 addq $32,%rax
214.byte 102,15,56,220,208
215.byte 102,15,56,220,216
216.byte 102,15,56,220,224
217.byte 102,15,56,220,232
218 movups -16(%rcx,%rax,1),%xmm0
219 jnz .Lenc_loop4
220
221.byte 102,15,56,220,209
222.byte 102,15,56,220,217
223.byte 102,15,56,220,225
224.byte 102,15,56,220,233
225.byte 102,15,56,221,208
226.byte 102,15,56,221,216
227.byte 102,15,56,221,224
228.byte 102,15,56,221,232
229 .byte 0xf3,0xc3
230.cfi_endproc
231.size _aesni_encrypt4,.-_aesni_encrypt4
232.type _aesni_decrypt4,@function
233.align 16
234_aesni_decrypt4:
235.cfi_startproc
236 movups (%rcx),%xmm0
237 shll $4,%eax
238 movups 16(%rcx),%xmm1
239 xorps %xmm0,%xmm2
240 xorps %xmm0,%xmm3
241 xorps %xmm0,%xmm4
242 xorps %xmm0,%xmm5
243 movups 32(%rcx),%xmm0
244 leaq 32(%rcx,%rax,1),%rcx
245 negq %rax
246.byte 0x0f,0x1f,0x00
247 addq $16,%rax
248
249.Ldec_loop4:
250.byte 102,15,56,222,209
251.byte 102,15,56,222,217
252.byte 102,15,56,222,225
253.byte 102,15,56,222,233
254 movups (%rcx,%rax,1),%xmm1
255 addq $32,%rax
256.byte 102,15,56,222,208
257.byte 102,15,56,222,216
258.byte 102,15,56,222,224
259.byte 102,15,56,222,232
260 movups -16(%rcx,%rax,1),%xmm0
261 jnz .Ldec_loop4
262
263.byte 102,15,56,222,209
264.byte 102,15,56,222,217
265.byte 102,15,56,222,225
266.byte 102,15,56,222,233
267.byte 102,15,56,223,208
268.byte 102,15,56,223,216
269.byte 102,15,56,223,224
270.byte 102,15,56,223,232
271 .byte 0xf3,0xc3
272.cfi_endproc
273.size _aesni_decrypt4,.-_aesni_decrypt4
274.type _aesni_encrypt6,@function
275.align 16
276_aesni_encrypt6:
277.cfi_startproc
278 movups (%rcx),%xmm0
279 shll $4,%eax
280 movups 16(%rcx),%xmm1
281 xorps %xmm0,%xmm2
282 pxor %xmm0,%xmm3
283 pxor %xmm0,%xmm4
284.byte 102,15,56,220,209
285 leaq 32(%rcx,%rax,1),%rcx
286 negq %rax
287.byte 102,15,56,220,217
288 pxor %xmm0,%xmm5
289 pxor %xmm0,%xmm6
290.byte 102,15,56,220,225
291 pxor %xmm0,%xmm7
292 movups (%rcx,%rax,1),%xmm0
293 addq $16,%rax
294 jmp .Lenc_loop6_enter
295.align 16
296.Lenc_loop6:
297.byte 102,15,56,220,209
298.byte 102,15,56,220,217
299.byte 102,15,56,220,225
300.Lenc_loop6_enter:
301.byte 102,15,56,220,233
302.byte 102,15,56,220,241
303.byte 102,15,56,220,249
304 movups (%rcx,%rax,1),%xmm1
305 addq $32,%rax
306.byte 102,15,56,220,208
307.byte 102,15,56,220,216
308.byte 102,15,56,220,224
309.byte 102,15,56,220,232
310.byte 102,15,56,220,240
311.byte 102,15,56,220,248
312 movups -16(%rcx,%rax,1),%xmm0
313 jnz .Lenc_loop6
314
315.byte 102,15,56,220,209
316.byte 102,15,56,220,217
317.byte 102,15,56,220,225
318.byte 102,15,56,220,233
319.byte 102,15,56,220,241
320.byte 102,15,56,220,249
321.byte 102,15,56,221,208
322.byte 102,15,56,221,216
323.byte 102,15,56,221,224
324.byte 102,15,56,221,232
325.byte 102,15,56,221,240
326.byte 102,15,56,221,248
327 .byte 0xf3,0xc3
328.cfi_endproc
329.size _aesni_encrypt6,.-_aesni_encrypt6
330.type _aesni_decrypt6,@function
331.align 16
332_aesni_decrypt6:
333.cfi_startproc
334 movups (%rcx),%xmm0
335 shll $4,%eax
336 movups 16(%rcx),%xmm1
337 xorps %xmm0,%xmm2
338 pxor %xmm0,%xmm3
339 pxor %xmm0,%xmm4
340.byte 102,15,56,222,209
341 leaq 32(%rcx,%rax,1),%rcx
342 negq %rax
343.byte 102,15,56,222,217
344 pxor %xmm0,%xmm5
345 pxor %xmm0,%xmm6
346.byte 102,15,56,222,225
347 pxor %xmm0,%xmm7
348 movups (%rcx,%rax,1),%xmm0
349 addq $16,%rax
350 jmp .Ldec_loop6_enter
351.align 16
352.Ldec_loop6:
353.byte 102,15,56,222,209
354.byte 102,15,56,222,217
355.byte 102,15,56,222,225
356.Ldec_loop6_enter:
357.byte 102,15,56,222,233
358.byte 102,15,56,222,241
359.byte 102,15,56,222,249
360 movups (%rcx,%rax,1),%xmm1
361 addq $32,%rax
362.byte 102,15,56,222,208
363.byte 102,15,56,222,216
364.byte 102,15,56,222,224
365.byte 102,15,56,222,232
366.byte 102,15,56,222,240
367.byte 102,15,56,222,248
368 movups -16(%rcx,%rax,1),%xmm0
369 jnz .Ldec_loop6
370
371.byte 102,15,56,222,209
372.byte 102,15,56,222,217
373.byte 102,15,56,222,225
374.byte 102,15,56,222,233
375.byte 102,15,56,222,241
376.byte 102,15,56,222,249
377.byte 102,15,56,223,208
378.byte 102,15,56,223,216
379.byte 102,15,56,223,224
380.byte 102,15,56,223,232
381.byte 102,15,56,223,240
382.byte 102,15,56,223,248
383 .byte 0xf3,0xc3
384.cfi_endproc
385.size _aesni_decrypt6,.-_aesni_decrypt6
386.type _aesni_encrypt8,@function
387.align 16
388_aesni_encrypt8:
389.cfi_startproc
390 movups (%rcx),%xmm0
391 shll $4,%eax
392 movups 16(%rcx),%xmm1
393 xorps %xmm0,%xmm2
394 xorps %xmm0,%xmm3
395 pxor %xmm0,%xmm4
396 pxor %xmm0,%xmm5
397 pxor %xmm0,%xmm6
398 leaq 32(%rcx,%rax,1),%rcx
399 negq %rax
400.byte 102,15,56,220,209
401 pxor %xmm0,%xmm7
402 pxor %xmm0,%xmm8
403.byte 102,15,56,220,217
404 pxor %xmm0,%xmm9
405 movups (%rcx,%rax,1),%xmm0
406 addq $16,%rax
407 jmp .Lenc_loop8_inner
408.align 16
409.Lenc_loop8:
410.byte 102,15,56,220,209
411.byte 102,15,56,220,217
412.Lenc_loop8_inner:
413.byte 102,15,56,220,225
414.byte 102,15,56,220,233
415.byte 102,15,56,220,241
416.byte 102,15,56,220,249
417.byte 102,68,15,56,220,193
418.byte 102,68,15,56,220,201
419.Lenc_loop8_enter:
420 movups (%rcx,%rax,1),%xmm1
421 addq $32,%rax
422.byte 102,15,56,220,208
423.byte 102,15,56,220,216
424.byte 102,15,56,220,224
425.byte 102,15,56,220,232
426.byte 102,15,56,220,240
427.byte 102,15,56,220,248
428.byte 102,68,15,56,220,192
429.byte 102,68,15,56,220,200
430 movups -16(%rcx,%rax,1),%xmm0
431 jnz .Lenc_loop8
432
433.byte 102,15,56,220,209
434.byte 102,15,56,220,217
435.byte 102,15,56,220,225
436.byte 102,15,56,220,233
437.byte 102,15,56,220,241
438.byte 102,15,56,220,249
439.byte 102,68,15,56,220,193
440.byte 102,68,15,56,220,201
441.byte 102,15,56,221,208
442.byte 102,15,56,221,216
443.byte 102,15,56,221,224
444.byte 102,15,56,221,232
445.byte 102,15,56,221,240
446.byte 102,15,56,221,248
447.byte 102,68,15,56,221,192
448.byte 102,68,15,56,221,200
449 .byte 0xf3,0xc3
450.cfi_endproc
451.size _aesni_encrypt8,.-_aesni_encrypt8
452.type _aesni_decrypt8,@function
453.align 16
454_aesni_decrypt8:
455.cfi_startproc
456 movups (%rcx),%xmm0
457 shll $4,%eax
458 movups 16(%rcx),%xmm1
459 xorps %xmm0,%xmm2
460 xorps %xmm0,%xmm3
461 pxor %xmm0,%xmm4
462 pxor %xmm0,%xmm5
463 pxor %xmm0,%xmm6
464 leaq 32(%rcx,%rax,1),%rcx
465 negq %rax
466.byte 102,15,56,222,209
467 pxor %xmm0,%xmm7
468 pxor %xmm0,%xmm8
469.byte 102,15,56,222,217
470 pxor %xmm0,%xmm9
471 movups (%rcx,%rax,1),%xmm0
472 addq $16,%rax
473 jmp .Ldec_loop8_inner
474.align 16
475.Ldec_loop8:
476.byte 102,15,56,222,209
477.byte 102,15,56,222,217
478.Ldec_loop8_inner:
479.byte 102,15,56,222,225
480.byte 102,15,56,222,233
481.byte 102,15,56,222,241
482.byte 102,15,56,222,249
483.byte 102,68,15,56,222,193
484.byte 102,68,15,56,222,201
485.Ldec_loop8_enter:
486 movups (%rcx,%rax,1),%xmm1
487 addq $32,%rax
488.byte 102,15,56,222,208
489.byte 102,15,56,222,216
490.byte 102,15,56,222,224
491.byte 102,15,56,222,232
492.byte 102,15,56,222,240
493.byte 102,15,56,222,248
494.byte 102,68,15,56,222,192
495.byte 102,68,15,56,222,200
496 movups -16(%rcx,%rax,1),%xmm0
497 jnz .Ldec_loop8
498
499.byte 102,15,56,222,209
500.byte 102,15,56,222,217
501.byte 102,15,56,222,225
502.byte 102,15,56,222,233
503.byte 102,15,56,222,241
504.byte 102,15,56,222,249
505.byte 102,68,15,56,222,193
506.byte 102,68,15,56,222,201
507.byte 102,15,56,223,208
508.byte 102,15,56,223,216
509.byte 102,15,56,223,224
510.byte 102,15,56,223,232
511.byte 102,15,56,223,240
512.byte 102,15,56,223,248
513.byte 102,68,15,56,223,192
514.byte 102,68,15,56,223,200
515 .byte 0xf3,0xc3
516.cfi_endproc
517.size _aesni_decrypt8,.-_aesni_decrypt8
518.globl aesni_ecb_encrypt
519.type aesni_ecb_encrypt,@function
520.align 16
521aesni_ecb_encrypt:
522.cfi_startproc
523.byte 243,15,30,250
524 andq $-16,%rdx
525 jz .Lecb_ret
526
527 movl 240(%rcx),%eax
528 movups (%rcx),%xmm0
529 movq %rcx,%r11
530 movl %eax,%r10d
531 testl %r8d,%r8d
532 jz .Lecb_decrypt
533
534 cmpq $0x80,%rdx
535 jb .Lecb_enc_tail
536
537 movdqu (%rdi),%xmm2
538 movdqu 16(%rdi),%xmm3
539 movdqu 32(%rdi),%xmm4
540 movdqu 48(%rdi),%xmm5
541 movdqu 64(%rdi),%xmm6
542 movdqu 80(%rdi),%xmm7
543 movdqu 96(%rdi),%xmm8
544 movdqu 112(%rdi),%xmm9
545 leaq 128(%rdi),%rdi
546 subq $0x80,%rdx
547 jmp .Lecb_enc_loop8_enter
548.align 16
549.Lecb_enc_loop8:
550 movups %xmm2,(%rsi)
551 movq %r11,%rcx
552 movdqu (%rdi),%xmm2
553 movl %r10d,%eax
554 movups %xmm3,16(%rsi)
555 movdqu 16(%rdi),%xmm3
556 movups %xmm4,32(%rsi)
557 movdqu 32(%rdi),%xmm4
558 movups %xmm5,48(%rsi)
559 movdqu 48(%rdi),%xmm5
560 movups %xmm6,64(%rsi)
561 movdqu 64(%rdi),%xmm6
562 movups %xmm7,80(%rsi)
563 movdqu 80(%rdi),%xmm7
564 movups %xmm8,96(%rsi)
565 movdqu 96(%rdi),%xmm8
566 movups %xmm9,112(%rsi)
567 leaq 128(%rsi),%rsi
568 movdqu 112(%rdi),%xmm9
569 leaq 128(%rdi),%rdi
570.Lecb_enc_loop8_enter:
571
572 call _aesni_encrypt8
573
574 subq $0x80,%rdx
575 jnc .Lecb_enc_loop8
576
577 movups %xmm2,(%rsi)
578 movq %r11,%rcx
579 movups %xmm3,16(%rsi)
580 movl %r10d,%eax
581 movups %xmm4,32(%rsi)
582 movups %xmm5,48(%rsi)
583 movups %xmm6,64(%rsi)
584 movups %xmm7,80(%rsi)
585 movups %xmm8,96(%rsi)
586 movups %xmm9,112(%rsi)
587 leaq 128(%rsi),%rsi
588 addq $0x80,%rdx
589 jz .Lecb_ret
590
591.Lecb_enc_tail:
592 movups (%rdi),%xmm2
593 cmpq $0x20,%rdx
594 jb .Lecb_enc_one
595 movups 16(%rdi),%xmm3
596 je .Lecb_enc_two
597 movups 32(%rdi),%xmm4
598 cmpq $0x40,%rdx
599 jb .Lecb_enc_three
600 movups 48(%rdi),%xmm5
601 je .Lecb_enc_four
602 movups 64(%rdi),%xmm6
603 cmpq $0x60,%rdx
604 jb .Lecb_enc_five
605 movups 80(%rdi),%xmm7
606 je .Lecb_enc_six
607 movdqu 96(%rdi),%xmm8
608 xorps %xmm9,%xmm9
609 call _aesni_encrypt8
610 movups %xmm2,(%rsi)
611 movups %xmm3,16(%rsi)
612 movups %xmm4,32(%rsi)
613 movups %xmm5,48(%rsi)
614 movups %xmm6,64(%rsi)
615 movups %xmm7,80(%rsi)
616 movups %xmm8,96(%rsi)
617 jmp .Lecb_ret
618.align 16
619.Lecb_enc_one:
620 movups (%rcx),%xmm0
621 movups 16(%rcx),%xmm1
622 leaq 32(%rcx),%rcx
623 xorps %xmm0,%xmm2
624.Loop_enc1_3:
625.byte 102,15,56,220,209
626 decl %eax
627 movups (%rcx),%xmm1
628 leaq 16(%rcx),%rcx
629 jnz .Loop_enc1_3
630.byte 102,15,56,221,209
631 movups %xmm2,(%rsi)
632 jmp .Lecb_ret
633.align 16
634.Lecb_enc_two:
635 call _aesni_encrypt2
636 movups %xmm2,(%rsi)
637 movups %xmm3,16(%rsi)
638 jmp .Lecb_ret
639.align 16
640.Lecb_enc_three:
641 call _aesni_encrypt3
642 movups %xmm2,(%rsi)
643 movups %xmm3,16(%rsi)
644 movups %xmm4,32(%rsi)
645 jmp .Lecb_ret
646.align 16
647.Lecb_enc_four:
648 call _aesni_encrypt4
649 movups %xmm2,(%rsi)
650 movups %xmm3,16(%rsi)
651 movups %xmm4,32(%rsi)
652 movups %xmm5,48(%rsi)
653 jmp .Lecb_ret
654.align 16
655.Lecb_enc_five:
656 xorps %xmm7,%xmm7
657 call _aesni_encrypt6
658 movups %xmm2,(%rsi)
659 movups %xmm3,16(%rsi)
660 movups %xmm4,32(%rsi)
661 movups %xmm5,48(%rsi)
662 movups %xmm6,64(%rsi)
663 jmp .Lecb_ret
664.align 16
665.Lecb_enc_six:
666 call _aesni_encrypt6
667 movups %xmm2,(%rsi)
668 movups %xmm3,16(%rsi)
669 movups %xmm4,32(%rsi)
670 movups %xmm5,48(%rsi)
671 movups %xmm6,64(%rsi)
672 movups %xmm7,80(%rsi)
673 jmp .Lecb_ret
674
675.align 16
676.Lecb_decrypt:
677 cmpq $0x80,%rdx
678 jb .Lecb_dec_tail
679
680 movdqu (%rdi),%xmm2
681 movdqu 16(%rdi),%xmm3
682 movdqu 32(%rdi),%xmm4
683 movdqu 48(%rdi),%xmm5
684 movdqu 64(%rdi),%xmm6
685 movdqu 80(%rdi),%xmm7
686 movdqu 96(%rdi),%xmm8
687 movdqu 112(%rdi),%xmm9
688 leaq 128(%rdi),%rdi
689 subq $0x80,%rdx
690 jmp .Lecb_dec_loop8_enter
691.align 16
692.Lecb_dec_loop8:
693 movups %xmm2,(%rsi)
694 movq %r11,%rcx
695 movdqu (%rdi),%xmm2
696 movl %r10d,%eax
697 movups %xmm3,16(%rsi)
698 movdqu 16(%rdi),%xmm3
699 movups %xmm4,32(%rsi)
700 movdqu 32(%rdi),%xmm4
701 movups %xmm5,48(%rsi)
702 movdqu 48(%rdi),%xmm5
703 movups %xmm6,64(%rsi)
704 movdqu 64(%rdi),%xmm6
705 movups %xmm7,80(%rsi)
706 movdqu 80(%rdi),%xmm7
707 movups %xmm8,96(%rsi)
708 movdqu 96(%rdi),%xmm8
709 movups %xmm9,112(%rsi)
710 leaq 128(%rsi),%rsi
711 movdqu 112(%rdi),%xmm9
712 leaq 128(%rdi),%rdi
713.Lecb_dec_loop8_enter:
714
715 call _aesni_decrypt8
716
717 movups (%r11),%xmm0
718 subq $0x80,%rdx
719 jnc .Lecb_dec_loop8
720
721 movups %xmm2,(%rsi)
722 pxor %xmm2,%xmm2
723 movq %r11,%rcx
724 movups %xmm3,16(%rsi)
725 pxor %xmm3,%xmm3
726 movl %r10d,%eax
727 movups %xmm4,32(%rsi)
728 pxor %xmm4,%xmm4
729 movups %xmm5,48(%rsi)
730 pxor %xmm5,%xmm5
731 movups %xmm6,64(%rsi)
732 pxor %xmm6,%xmm6
733 movups %xmm7,80(%rsi)
734 pxor %xmm7,%xmm7
735 movups %xmm8,96(%rsi)
736 pxor %xmm8,%xmm8
737 movups %xmm9,112(%rsi)
738 pxor %xmm9,%xmm9
739 leaq 128(%rsi),%rsi
740 addq $0x80,%rdx
741 jz .Lecb_ret
742
743.Lecb_dec_tail:
744 movups (%rdi),%xmm2
745 cmpq $0x20,%rdx
746 jb .Lecb_dec_one
747 movups 16(%rdi),%xmm3
748 je .Lecb_dec_two
749 movups 32(%rdi),%xmm4
750 cmpq $0x40,%rdx
751 jb .Lecb_dec_three
752 movups 48(%rdi),%xmm5
753 je .Lecb_dec_four
754 movups 64(%rdi),%xmm6
755 cmpq $0x60,%rdx
756 jb .Lecb_dec_five
757 movups 80(%rdi),%xmm7
758 je .Lecb_dec_six
759 movups 96(%rdi),%xmm8
760 movups (%rcx),%xmm0
761 xorps %xmm9,%xmm9
762 call _aesni_decrypt8
763 movups %xmm2,(%rsi)
764 pxor %xmm2,%xmm2
765 movups %xmm3,16(%rsi)
766 pxor %xmm3,%xmm3
767 movups %xmm4,32(%rsi)
768 pxor %xmm4,%xmm4
769 movups %xmm5,48(%rsi)
770 pxor %xmm5,%xmm5
771 movups %xmm6,64(%rsi)
772 pxor %xmm6,%xmm6
773 movups %xmm7,80(%rsi)
774 pxor %xmm7,%xmm7
775 movups %xmm8,96(%rsi)
776 pxor %xmm8,%xmm8
777 pxor %xmm9,%xmm9
778 jmp .Lecb_ret
779.align 16
780.Lecb_dec_one:
781 movups (%rcx),%xmm0
782 movups 16(%rcx),%xmm1
783 leaq 32(%rcx),%rcx
784 xorps %xmm0,%xmm2
785.Loop_dec1_4:
786.byte 102,15,56,222,209
787 decl %eax
788 movups (%rcx),%xmm1
789 leaq 16(%rcx),%rcx
790 jnz .Loop_dec1_4
791.byte 102,15,56,223,209
792 movups %xmm2,(%rsi)
793 pxor %xmm2,%xmm2
794 jmp .Lecb_ret
795.align 16
796.Lecb_dec_two:
797 call _aesni_decrypt2
798 movups %xmm2,(%rsi)
799 pxor %xmm2,%xmm2
800 movups %xmm3,16(%rsi)
801 pxor %xmm3,%xmm3
802 jmp .Lecb_ret
803.align 16
804.Lecb_dec_three:
805 call _aesni_decrypt3
806 movups %xmm2,(%rsi)
807 pxor %xmm2,%xmm2
808 movups %xmm3,16(%rsi)
809 pxor %xmm3,%xmm3
810 movups %xmm4,32(%rsi)
811 pxor %xmm4,%xmm4
812 jmp .Lecb_ret
813.align 16
814.Lecb_dec_four:
815 call _aesni_decrypt4
816 movups %xmm2,(%rsi)
817 pxor %xmm2,%xmm2
818 movups %xmm3,16(%rsi)
819 pxor %xmm3,%xmm3
820 movups %xmm4,32(%rsi)
821 pxor %xmm4,%xmm4
822 movups %xmm5,48(%rsi)
823 pxor %xmm5,%xmm5
824 jmp .Lecb_ret
825.align 16
826.Lecb_dec_five:
827 xorps %xmm7,%xmm7
828 call _aesni_decrypt6
829 movups %xmm2,(%rsi)
830 pxor %xmm2,%xmm2
831 movups %xmm3,16(%rsi)
832 pxor %xmm3,%xmm3
833 movups %xmm4,32(%rsi)
834 pxor %xmm4,%xmm4
835 movups %xmm5,48(%rsi)
836 pxor %xmm5,%xmm5
837 movups %xmm6,64(%rsi)
838 pxor %xmm6,%xmm6
839 pxor %xmm7,%xmm7
840 jmp .Lecb_ret
841.align 16
842.Lecb_dec_six:
843 call _aesni_decrypt6
844 movups %xmm2,(%rsi)
845 pxor %xmm2,%xmm2
846 movups %xmm3,16(%rsi)
847 pxor %xmm3,%xmm3
848 movups %xmm4,32(%rsi)
849 pxor %xmm4,%xmm4
850 movups %xmm5,48(%rsi)
851 pxor %xmm5,%xmm5
852 movups %xmm6,64(%rsi)
853 pxor %xmm6,%xmm6
854 movups %xmm7,80(%rsi)
855 pxor %xmm7,%xmm7
856
857.Lecb_ret:
858 xorps %xmm0,%xmm0
859 pxor %xmm1,%xmm1
860 .byte 0xf3,0xc3
861.cfi_endproc
862.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
863.globl aesni_ccm64_encrypt_blocks
864.type aesni_ccm64_encrypt_blocks,@function
865.align 16
866aesni_ccm64_encrypt_blocks:
867.cfi_startproc
868.byte 243,15,30,250
869 movl 240(%rcx),%eax
870 movdqu (%r8),%xmm6
871 movdqa .Lincrement64(%rip),%xmm9
872 movdqa .Lbswap_mask(%rip),%xmm7
873
874 shll $4,%eax
875 movl $16,%r10d
876 leaq 0(%rcx),%r11
877 movdqu (%r9),%xmm3
878 movdqa %xmm6,%xmm2
879 leaq 32(%rcx,%rax,1),%rcx
880.byte 102,15,56,0,247
881 subq %rax,%r10
882 jmp .Lccm64_enc_outer
883.align 16
884.Lccm64_enc_outer:
885 movups (%r11),%xmm0
886 movq %r10,%rax
887 movups (%rdi),%xmm8
888
889 xorps %xmm0,%xmm2
890 movups 16(%r11),%xmm1
891 xorps %xmm8,%xmm0
892 xorps %xmm0,%xmm3
893 movups 32(%r11),%xmm0
894
895.Lccm64_enc2_loop:
896.byte 102,15,56,220,209
897.byte 102,15,56,220,217
898 movups (%rcx,%rax,1),%xmm1
899 addq $32,%rax
900.byte 102,15,56,220,208
901.byte 102,15,56,220,216
902 movups -16(%rcx,%rax,1),%xmm0
903 jnz .Lccm64_enc2_loop
904.byte 102,15,56,220,209
905.byte 102,15,56,220,217
906 paddq %xmm9,%xmm6
907 decq %rdx
908.byte 102,15,56,221,208
909.byte 102,15,56,221,216
910
911 leaq 16(%rdi),%rdi
912 xorps %xmm2,%xmm8
913 movdqa %xmm6,%xmm2
914 movups %xmm8,(%rsi)
915.byte 102,15,56,0,215
916 leaq 16(%rsi),%rsi
917 jnz .Lccm64_enc_outer
918
919 pxor %xmm0,%xmm0
920 pxor %xmm1,%xmm1
921 pxor %xmm2,%xmm2
922 movups %xmm3,(%r9)
923 pxor %xmm3,%xmm3
924 pxor %xmm8,%xmm8
925 pxor %xmm6,%xmm6
926 .byte 0xf3,0xc3
927.cfi_endproc
928.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
929.globl aesni_ccm64_decrypt_blocks
930.type aesni_ccm64_decrypt_blocks,@function
931.align 16
932aesni_ccm64_decrypt_blocks:
933.cfi_startproc
934.byte 243,15,30,250
935 movl 240(%rcx),%eax
936 movups (%r8),%xmm6
937 movdqu (%r9),%xmm3
938 movdqa .Lincrement64(%rip),%xmm9
939 movdqa .Lbswap_mask(%rip),%xmm7
940
941 movaps %xmm6,%xmm2
942 movl %eax,%r10d
943 movq %rcx,%r11
944.byte 102,15,56,0,247
945 movups (%rcx),%xmm0
946 movups 16(%rcx),%xmm1
947 leaq 32(%rcx),%rcx
948 xorps %xmm0,%xmm2
949.Loop_enc1_5:
950.byte 102,15,56,220,209
951 decl %eax
952 movups (%rcx),%xmm1
953 leaq 16(%rcx),%rcx
954 jnz .Loop_enc1_5
955.byte 102,15,56,221,209
956 shll $4,%r10d
957 movl $16,%eax
958 movups (%rdi),%xmm8
959 paddq %xmm9,%xmm6
960 leaq 16(%rdi),%rdi
961 subq %r10,%rax
962 leaq 32(%r11,%r10,1),%rcx
963 movq %rax,%r10
964 jmp .Lccm64_dec_outer
965.align 16
966.Lccm64_dec_outer:
967 xorps %xmm2,%xmm8
968 movdqa %xmm6,%xmm2
969 movups %xmm8,(%rsi)
970 leaq 16(%rsi),%rsi
971.byte 102,15,56,0,215
972
973 subq $1,%rdx
974 jz .Lccm64_dec_break
975
976 movups (%r11),%xmm0
977 movq %r10,%rax
978 movups 16(%r11),%xmm1
979 xorps %xmm0,%xmm8
980 xorps %xmm0,%xmm2
981 xorps %xmm8,%xmm3
982 movups 32(%r11),%xmm0
983 jmp .Lccm64_dec2_loop
984.align 16
985.Lccm64_dec2_loop:
986.byte 102,15,56,220,209
987.byte 102,15,56,220,217
988 movups (%rcx,%rax,1),%xmm1
989 addq $32,%rax
990.byte 102,15,56,220,208
991.byte 102,15,56,220,216
992 movups -16(%rcx,%rax,1),%xmm0
993 jnz .Lccm64_dec2_loop
994 movups (%rdi),%xmm8
995 paddq %xmm9,%xmm6
996.byte 102,15,56,220,209
997.byte 102,15,56,220,217
998.byte 102,15,56,221,208
999.byte 102,15,56,221,216
1000 leaq 16(%rdi),%rdi
1001 jmp .Lccm64_dec_outer
1002
1003.align 16
1004.Lccm64_dec_break:
1005
1006 movl 240(%r11),%eax
1007 movups (%r11),%xmm0
1008 movups 16(%r11),%xmm1
1009 xorps %xmm0,%xmm8
1010 leaq 32(%r11),%r11
1011 xorps %xmm8,%xmm3
1012.Loop_enc1_6:
1013.byte 102,15,56,220,217
1014 decl %eax
1015 movups (%r11),%xmm1
1016 leaq 16(%r11),%r11
1017 jnz .Loop_enc1_6
1018.byte 102,15,56,221,217
1019 pxor %xmm0,%xmm0
1020 pxor %xmm1,%xmm1
1021 pxor %xmm2,%xmm2
1022 movups %xmm3,(%r9)
1023 pxor %xmm3,%xmm3
1024 pxor %xmm8,%xmm8
1025 pxor %xmm6,%xmm6
1026 .byte 0xf3,0xc3
1027.cfi_endproc
1028.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1029.globl aesni_ctr32_encrypt_blocks
1030.type aesni_ctr32_encrypt_blocks,@function
1031.align 16
1032aesni_ctr32_encrypt_blocks:
1033.cfi_startproc
1034.byte 243,15,30,250
1035 cmpq $1,%rdx
1036 jne .Lctr32_bulk
1037
1038
1039
1040 movups (%r8),%xmm2
1041 movups (%rdi),%xmm3
1042 movl 240(%rcx),%edx
1043 movups (%rcx),%xmm0
1044 movups 16(%rcx),%xmm1
1045 leaq 32(%rcx),%rcx
1046 xorps %xmm0,%xmm2
1047.Loop_enc1_7:
1048.byte 102,15,56,220,209
1049 decl %edx
1050 movups (%rcx),%xmm1
1051 leaq 16(%rcx),%rcx
1052 jnz .Loop_enc1_7
1053.byte 102,15,56,221,209
1054 pxor %xmm0,%xmm0
1055 pxor %xmm1,%xmm1
1056 xorps %xmm3,%xmm2
1057 pxor %xmm3,%xmm3
1058 movups %xmm2,(%rsi)
1059 xorps %xmm2,%xmm2
1060 jmp .Lctr32_epilogue
1061
1062.align 16
1063.Lctr32_bulk:
1064 leaq (%rsp),%r11
1065.cfi_def_cfa_register %r11
1066 pushq %rbp
1067.cfi_offset %rbp,-16
1068 subq $128,%rsp
1069 andq $-16,%rsp
1070
1071
1072
1073
1074 movdqu (%r8),%xmm2
1075 movdqu (%rcx),%xmm0
1076 movl 12(%r8),%r8d
1077 pxor %xmm0,%xmm2
1078 movl 12(%rcx),%ebp
1079 movdqa %xmm2,0(%rsp)
1080 bswapl %r8d
1081 movdqa %xmm2,%xmm3
1082 movdqa %xmm2,%xmm4
1083 movdqa %xmm2,%xmm5
1084 movdqa %xmm2,64(%rsp)
1085 movdqa %xmm2,80(%rsp)
1086 movdqa %xmm2,96(%rsp)
1087 movq %rdx,%r10
1088 movdqa %xmm2,112(%rsp)
1089
1090 leaq 1(%r8),%rax
1091 leaq 2(%r8),%rdx
1092 bswapl %eax
1093 bswapl %edx
1094 xorl %ebp,%eax
1095 xorl %ebp,%edx
1096.byte 102,15,58,34,216,3
1097 leaq 3(%r8),%rax
1098 movdqa %xmm3,16(%rsp)
1099.byte 102,15,58,34,226,3
1100 bswapl %eax
1101 movq %r10,%rdx
1102 leaq 4(%r8),%r10
1103 movdqa %xmm4,32(%rsp)
1104 xorl %ebp,%eax
1105 bswapl %r10d
1106.byte 102,15,58,34,232,3
1107 xorl %ebp,%r10d
1108 movdqa %xmm5,48(%rsp)
1109 leaq 5(%r8),%r9
1110 movl %r10d,64+12(%rsp)
1111 bswapl %r9d
1112 leaq 6(%r8),%r10
1113 movl 240(%rcx),%eax
1114 xorl %ebp,%r9d
1115 bswapl %r10d
1116 movl %r9d,80+12(%rsp)
1117 xorl %ebp,%r10d
1118 leaq 7(%r8),%r9
1119 movl %r10d,96+12(%rsp)
1120 bswapl %r9d
1121 movl OPENSSL_ia32cap_P+4(%rip),%r10d
1122 xorl %ebp,%r9d
1123 andl $71303168,%r10d
1124 movl %r9d,112+12(%rsp)
1125
1126 movups 16(%rcx),%xmm1
1127
1128 movdqa 64(%rsp),%xmm6
1129 movdqa 80(%rsp),%xmm7
1130
1131 cmpq $8,%rdx
1132 jb .Lctr32_tail
1133
1134 subq $6,%rdx
1135 cmpl $4194304,%r10d
1136 je .Lctr32_6x
1137
1138 leaq 128(%rcx),%rcx
1139 subq $2,%rdx
1140 jmp .Lctr32_loop8
1141
1142.align 16
1143.Lctr32_6x:
1144 shll $4,%eax
1145 movl $48,%r10d
1146 bswapl %ebp
1147 leaq 32(%rcx,%rax,1),%rcx
1148 subq %rax,%r10
1149 jmp .Lctr32_loop6
1150
1151.align 16
1152.Lctr32_loop6:
1153 addl $6,%r8d
1154 movups -48(%rcx,%r10,1),%xmm0
1155.byte 102,15,56,220,209
1156 movl %r8d,%eax
1157 xorl %ebp,%eax
1158.byte 102,15,56,220,217
1159.byte 0x0f,0x38,0xf1,0x44,0x24,12
1160 leal 1(%r8),%eax
1161.byte 102,15,56,220,225
1162 xorl %ebp,%eax
1163.byte 0x0f,0x38,0xf1,0x44,0x24,28
1164.byte 102,15,56,220,233
1165 leal 2(%r8),%eax
1166 xorl %ebp,%eax
1167.byte 102,15,56,220,241
1168.byte 0x0f,0x38,0xf1,0x44,0x24,44
1169 leal 3(%r8),%eax
1170.byte 102,15,56,220,249
1171 movups -32(%rcx,%r10,1),%xmm1
1172 xorl %ebp,%eax
1173
1174.byte 102,15,56,220,208
1175.byte 0x0f,0x38,0xf1,0x44,0x24,60
1176 leal 4(%r8),%eax
1177.byte 102,15,56,220,216
1178 xorl %ebp,%eax
1179.byte 0x0f,0x38,0xf1,0x44,0x24,76
1180.byte 102,15,56,220,224
1181 leal 5(%r8),%eax
1182 xorl %ebp,%eax
1183.byte 102,15,56,220,232
1184.byte 0x0f,0x38,0xf1,0x44,0x24,92
1185 movq %r10,%rax
1186.byte 102,15,56,220,240
1187.byte 102,15,56,220,248
1188 movups -16(%rcx,%r10,1),%xmm0
1189
1190 call .Lenc_loop6
1191
1192 movdqu (%rdi),%xmm8
1193 movdqu 16(%rdi),%xmm9
1194 movdqu 32(%rdi),%xmm10
1195 movdqu 48(%rdi),%xmm11
1196 movdqu 64(%rdi),%xmm12
1197 movdqu 80(%rdi),%xmm13
1198 leaq 96(%rdi),%rdi
1199 movups -64(%rcx,%r10,1),%xmm1
1200 pxor %xmm2,%xmm8
1201 movaps 0(%rsp),%xmm2
1202 pxor %xmm3,%xmm9
1203 movaps 16(%rsp),%xmm3
1204 pxor %xmm4,%xmm10
1205 movaps 32(%rsp),%xmm4
1206 pxor %xmm5,%xmm11
1207 movaps 48(%rsp),%xmm5
1208 pxor %xmm6,%xmm12
1209 movaps 64(%rsp),%xmm6
1210 pxor %xmm7,%xmm13
1211 movaps 80(%rsp),%xmm7
1212 movdqu %xmm8,(%rsi)
1213 movdqu %xmm9,16(%rsi)
1214 movdqu %xmm10,32(%rsi)
1215 movdqu %xmm11,48(%rsi)
1216 movdqu %xmm12,64(%rsi)
1217 movdqu %xmm13,80(%rsi)
1218 leaq 96(%rsi),%rsi
1219
1220 subq $6,%rdx
1221 jnc .Lctr32_loop6
1222
1223 addq $6,%rdx
1224 jz .Lctr32_done
1225
1226 leal -48(%r10),%eax
1227 leaq -80(%rcx,%r10,1),%rcx
1228 negl %eax
1229 shrl $4,%eax
1230 jmp .Lctr32_tail
1231
1232.align 32
1233.Lctr32_loop8:
1234 addl $8,%r8d
1235 movdqa 96(%rsp),%xmm8
1236.byte 102,15,56,220,209
1237 movl %r8d,%r9d
1238 movdqa 112(%rsp),%xmm9
1239.byte 102,15,56,220,217
1240 bswapl %r9d
1241 movups 32-128(%rcx),%xmm0
1242.byte 102,15,56,220,225
1243 xorl %ebp,%r9d
1244 nop
1245.byte 102,15,56,220,233
1246 movl %r9d,0+12(%rsp)
1247 leaq 1(%r8),%r9
1248.byte 102,15,56,220,241
1249.byte 102,15,56,220,249
1250.byte 102,68,15,56,220,193
1251.byte 102,68,15,56,220,201
1252 movups 48-128(%rcx),%xmm1
1253 bswapl %r9d
1254.byte 102,15,56,220,208
1255.byte 102,15,56,220,216
1256 xorl %ebp,%r9d
1257.byte 0x66,0x90
1258.byte 102,15,56,220,224
1259.byte 102,15,56,220,232
1260 movl %r9d,16+12(%rsp)
1261 leaq 2(%r8),%r9
1262.byte 102,15,56,220,240
1263.byte 102,15,56,220,248
1264.byte 102,68,15,56,220,192
1265.byte 102,68,15,56,220,200
1266 movups 64-128(%rcx),%xmm0
1267 bswapl %r9d
1268.byte 102,15,56,220,209
1269.byte 102,15,56,220,217
1270 xorl %ebp,%r9d
1271.byte 0x66,0x90
1272.byte 102,15,56,220,225
1273.byte 102,15,56,220,233
1274 movl %r9d,32+12(%rsp)
1275 leaq 3(%r8),%r9
1276.byte 102,15,56,220,241
1277.byte 102,15,56,220,249
1278.byte 102,68,15,56,220,193
1279.byte 102,68,15,56,220,201
1280 movups 80-128(%rcx),%xmm1
1281 bswapl %r9d
1282.byte 102,15,56,220,208
1283.byte 102,15,56,220,216
1284 xorl %ebp,%r9d
1285.byte 0x66,0x90
1286.byte 102,15,56,220,224
1287.byte 102,15,56,220,232
1288 movl %r9d,48+12(%rsp)
1289 leaq 4(%r8),%r9
1290.byte 102,15,56,220,240
1291.byte 102,15,56,220,248
1292.byte 102,68,15,56,220,192
1293.byte 102,68,15,56,220,200
1294 movups 96-128(%rcx),%xmm0
1295 bswapl %r9d
1296.byte 102,15,56,220,209
1297.byte 102,15,56,220,217
1298 xorl %ebp,%r9d
1299.byte 0x66,0x90
1300.byte 102,15,56,220,225
1301.byte 102,15,56,220,233
1302 movl %r9d,64+12(%rsp)
1303 leaq 5(%r8),%r9
1304.byte 102,15,56,220,241
1305.byte 102,15,56,220,249
1306.byte 102,68,15,56,220,193
1307.byte 102,68,15,56,220,201
1308 movups 112-128(%rcx),%xmm1
1309 bswapl %r9d
1310.byte 102,15,56,220,208
1311.byte 102,15,56,220,216
1312 xorl %ebp,%r9d
1313.byte 0x66,0x90
1314.byte 102,15,56,220,224
1315.byte 102,15,56,220,232
1316 movl %r9d,80+12(%rsp)
1317 leaq 6(%r8),%r9
1318.byte 102,15,56,220,240
1319.byte 102,15,56,220,248
1320.byte 102,68,15,56,220,192
1321.byte 102,68,15,56,220,200
1322 movups 128-128(%rcx),%xmm0
1323 bswapl %r9d
1324.byte 102,15,56,220,209
1325.byte 102,15,56,220,217
1326 xorl %ebp,%r9d
1327.byte 0x66,0x90
1328.byte 102,15,56,220,225
1329.byte 102,15,56,220,233
1330 movl %r9d,96+12(%rsp)
1331 leaq 7(%r8),%r9
1332.byte 102,15,56,220,241
1333.byte 102,15,56,220,249
1334.byte 102,68,15,56,220,193
1335.byte 102,68,15,56,220,201
1336 movups 144-128(%rcx),%xmm1
1337 bswapl %r9d
1338.byte 102,15,56,220,208
1339.byte 102,15,56,220,216
1340.byte 102,15,56,220,224
1341 xorl %ebp,%r9d
1342 movdqu 0(%rdi),%xmm10
1343.byte 102,15,56,220,232
1344 movl %r9d,112+12(%rsp)
1345 cmpl $11,%eax
1346.byte 102,15,56,220,240
1347.byte 102,15,56,220,248
1348.byte 102,68,15,56,220,192
1349.byte 102,68,15,56,220,200
1350 movups 160-128(%rcx),%xmm0
1351
1352 jb .Lctr32_enc_done
1353
1354.byte 102,15,56,220,209
1355.byte 102,15,56,220,217
1356.byte 102,15,56,220,225
1357.byte 102,15,56,220,233
1358.byte 102,15,56,220,241
1359.byte 102,15,56,220,249
1360.byte 102,68,15,56,220,193
1361.byte 102,68,15,56,220,201
1362 movups 176-128(%rcx),%xmm1
1363
1364.byte 102,15,56,220,208
1365.byte 102,15,56,220,216
1366.byte 102,15,56,220,224
1367.byte 102,15,56,220,232
1368.byte 102,15,56,220,240
1369.byte 102,15,56,220,248
1370.byte 102,68,15,56,220,192
1371.byte 102,68,15,56,220,200
1372 movups 192-128(%rcx),%xmm0
1373 je .Lctr32_enc_done
1374
1375.byte 102,15,56,220,209
1376.byte 102,15,56,220,217
1377.byte 102,15,56,220,225
1378.byte 102,15,56,220,233
1379.byte 102,15,56,220,241
1380.byte 102,15,56,220,249
1381.byte 102,68,15,56,220,193
1382.byte 102,68,15,56,220,201
1383 movups 208-128(%rcx),%xmm1
1384
1385.byte 102,15,56,220,208
1386.byte 102,15,56,220,216
1387.byte 102,15,56,220,224
1388.byte 102,15,56,220,232
1389.byte 102,15,56,220,240
1390.byte 102,15,56,220,248
1391.byte 102,68,15,56,220,192
1392.byte 102,68,15,56,220,200
1393 movups 224-128(%rcx),%xmm0
1394 jmp .Lctr32_enc_done
1395
1396.align 16
1397.Lctr32_enc_done:
1398 movdqu 16(%rdi),%xmm11
1399 pxor %xmm0,%xmm10
1400 movdqu 32(%rdi),%xmm12
1401 pxor %xmm0,%xmm11
1402 movdqu 48(%rdi),%xmm13
1403 pxor %xmm0,%xmm12
1404 movdqu 64(%rdi),%xmm14
1405 pxor %xmm0,%xmm13
1406 movdqu 80(%rdi),%xmm15
1407 pxor %xmm0,%xmm14
1408 pxor %xmm0,%xmm15
1409.byte 102,15,56,220,209
1410.byte 102,15,56,220,217
1411.byte 102,15,56,220,225
1412.byte 102,15,56,220,233
1413.byte 102,15,56,220,241
1414.byte 102,15,56,220,249
1415.byte 102,68,15,56,220,193
1416.byte 102,68,15,56,220,201
1417 movdqu 96(%rdi),%xmm1
1418 leaq 128(%rdi),%rdi
1419
1420.byte 102,65,15,56,221,210
1421 pxor %xmm0,%xmm1
1422 movdqu 112-128(%rdi),%xmm10
1423.byte 102,65,15,56,221,219
1424 pxor %xmm0,%xmm10
1425 movdqa 0(%rsp),%xmm11
1426.byte 102,65,15,56,221,228
1427.byte 102,65,15,56,221,237
1428 movdqa 16(%rsp),%xmm12
1429 movdqa 32(%rsp),%xmm13
1430.byte 102,65,15,56,221,246
1431.byte 102,65,15,56,221,255
1432 movdqa 48(%rsp),%xmm14
1433 movdqa 64(%rsp),%xmm15
1434.byte 102,68,15,56,221,193
1435 movdqa 80(%rsp),%xmm0
1436 movups 16-128(%rcx),%xmm1
1437.byte 102,69,15,56,221,202
1438
1439 movups %xmm2,(%rsi)
1440 movdqa %xmm11,%xmm2
1441 movups %xmm3,16(%rsi)
1442 movdqa %xmm12,%xmm3
1443 movups %xmm4,32(%rsi)
1444 movdqa %xmm13,%xmm4
1445 movups %xmm5,48(%rsi)
1446 movdqa %xmm14,%xmm5
1447 movups %xmm6,64(%rsi)
1448 movdqa %xmm15,%xmm6
1449 movups %xmm7,80(%rsi)
1450 movdqa %xmm0,%xmm7
1451 movups %xmm8,96(%rsi)
1452 movups %xmm9,112(%rsi)
1453 leaq 128(%rsi),%rsi
1454
1455 subq $8,%rdx
1456 jnc .Lctr32_loop8
1457
1458 addq $8,%rdx
1459 jz .Lctr32_done
1460 leaq -128(%rcx),%rcx
1461
1462.Lctr32_tail:
1463
1464
1465 leaq 16(%rcx),%rcx
1466 cmpq $4,%rdx
1467 jb .Lctr32_loop3
1468 je .Lctr32_loop4
1469
1470
1471 shll $4,%eax
1472 movdqa 96(%rsp),%xmm8
1473 pxor %xmm9,%xmm9
1474
1475 movups 16(%rcx),%xmm0
1476.byte 102,15,56,220,209
1477.byte 102,15,56,220,217
1478 leaq 32-16(%rcx,%rax,1),%rcx
1479 negq %rax
1480.byte 102,15,56,220,225
1481 addq $16,%rax
1482 movups (%rdi),%xmm10
1483.byte 102,15,56,220,233
1484.byte 102,15,56,220,241
1485 movups 16(%rdi),%xmm11
1486 movups 32(%rdi),%xmm12
1487.byte 102,15,56,220,249
1488.byte 102,68,15,56,220,193
1489
1490 call .Lenc_loop8_enter
1491
1492 movdqu 48(%rdi),%xmm13
1493 pxor %xmm10,%xmm2
1494 movdqu 64(%rdi),%xmm10
1495 pxor %xmm11,%xmm3
1496 movdqu %xmm2,(%rsi)
1497 pxor %xmm12,%xmm4
1498 movdqu %xmm3,16(%rsi)
1499 pxor %xmm13,%xmm5
1500 movdqu %xmm4,32(%rsi)
1501 pxor %xmm10,%xmm6
1502 movdqu %xmm5,48(%rsi)
1503 movdqu %xmm6,64(%rsi)
1504 cmpq $6,%rdx
1505 jb .Lctr32_done
1506
1507 movups 80(%rdi),%xmm11
1508 xorps %xmm11,%xmm7
1509 movups %xmm7,80(%rsi)
1510 je .Lctr32_done
1511
1512 movups 96(%rdi),%xmm12
1513 xorps %xmm12,%xmm8
1514 movups %xmm8,96(%rsi)
1515 jmp .Lctr32_done
1516
1517.align 32
1518.Lctr32_loop4:
1519.byte 102,15,56,220,209
1520 leaq 16(%rcx),%rcx
1521 decl %eax
1522.byte 102,15,56,220,217
1523.byte 102,15,56,220,225
1524.byte 102,15,56,220,233
1525 movups (%rcx),%xmm1
1526 jnz .Lctr32_loop4
1527.byte 102,15,56,221,209
1528.byte 102,15,56,221,217
1529 movups (%rdi),%xmm10
1530 movups 16(%rdi),%xmm11
1531.byte 102,15,56,221,225
1532.byte 102,15,56,221,233
1533 movups 32(%rdi),%xmm12
1534 movups 48(%rdi),%xmm13
1535
1536 xorps %xmm10,%xmm2
1537 movups %xmm2,(%rsi)
1538 xorps %xmm11,%xmm3
1539 movups %xmm3,16(%rsi)
1540 pxor %xmm12,%xmm4
1541 movdqu %xmm4,32(%rsi)
1542 pxor %xmm13,%xmm5
1543 movdqu %xmm5,48(%rsi)
1544 jmp .Lctr32_done
1545
1546.align 32
1547.Lctr32_loop3:
1548.byte 102,15,56,220,209
1549 leaq 16(%rcx),%rcx
1550 decl %eax
1551.byte 102,15,56,220,217
1552.byte 102,15,56,220,225
1553 movups (%rcx),%xmm1
1554 jnz .Lctr32_loop3
1555.byte 102,15,56,221,209
1556.byte 102,15,56,221,217
1557.byte 102,15,56,221,225
1558
1559 movups (%rdi),%xmm10
1560 xorps %xmm10,%xmm2
1561 movups %xmm2,(%rsi)
1562 cmpq $2,%rdx
1563 jb .Lctr32_done
1564
1565 movups 16(%rdi),%xmm11
1566 xorps %xmm11,%xmm3
1567 movups %xmm3,16(%rsi)
1568 je .Lctr32_done
1569
1570 movups 32(%rdi),%xmm12
1571 xorps %xmm12,%xmm4
1572 movups %xmm4,32(%rsi)
1573
1574.Lctr32_done:
1575 xorps %xmm0,%xmm0
1576 xorl %ebp,%ebp
1577 pxor %xmm1,%xmm1
1578 pxor %xmm2,%xmm2
1579 pxor %xmm3,%xmm3
1580 pxor %xmm4,%xmm4
1581 pxor %xmm5,%xmm5
1582 pxor %xmm6,%xmm6
1583 pxor %xmm7,%xmm7
1584 movaps %xmm0,0(%rsp)
1585 pxor %xmm8,%xmm8
1586 movaps %xmm0,16(%rsp)
1587 pxor %xmm9,%xmm9
1588 movaps %xmm0,32(%rsp)
1589 pxor %xmm10,%xmm10
1590 movaps %xmm0,48(%rsp)
1591 pxor %xmm11,%xmm11
1592 movaps %xmm0,64(%rsp)
1593 pxor %xmm12,%xmm12
1594 movaps %xmm0,80(%rsp)
1595 pxor %xmm13,%xmm13
1596 movaps %xmm0,96(%rsp)
1597 pxor %xmm14,%xmm14
1598 movaps %xmm0,112(%rsp)
1599 pxor %xmm15,%xmm15
1600 movq -8(%r11),%rbp
1601.cfi_restore %rbp
1602 leaq (%r11),%rsp
1603.cfi_def_cfa_register %rsp
1604.Lctr32_epilogue:
1605 .byte 0xf3,0xc3
1606.cfi_endproc
1607.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1608.globl aesni_xts_encrypt
1609.type aesni_xts_encrypt,@function
1610.align 16
1611aesni_xts_encrypt:
1612.cfi_startproc
1613.byte 243,15,30,250
1614 leaq (%rsp),%r11
1615.cfi_def_cfa_register %r11
1616 pushq %rbp
1617.cfi_offset %rbp,-16
1618 subq $112,%rsp
1619 andq $-16,%rsp
1620 movups (%r9),%xmm2
1621 movl 240(%r8),%eax
1622 movl 240(%rcx),%r10d
1623 movups (%r8),%xmm0
1624 movups 16(%r8),%xmm1
1625 leaq 32(%r8),%r8
1626 xorps %xmm0,%xmm2
1627.Loop_enc1_8:
1628.byte 102,15,56,220,209
1629 decl %eax
1630 movups (%r8),%xmm1
1631 leaq 16(%r8),%r8
1632 jnz .Loop_enc1_8
1633.byte 102,15,56,221,209
1634 movups (%rcx),%xmm0
1635 movq %rcx,%rbp
1636 movl %r10d,%eax
1637 shll $4,%r10d
1638 movq %rdx,%r9
1639 andq $-16,%rdx
1640
1641 movups 16(%rcx,%r10,1),%xmm1
1642
1643 movdqa .Lxts_magic(%rip),%xmm8
1644 movdqa %xmm2,%xmm15
1645 pshufd $0x5f,%xmm2,%xmm9
1646 pxor %xmm0,%xmm1
1647 movdqa %xmm9,%xmm14
1648 paddd %xmm9,%xmm9
1649 movdqa %xmm15,%xmm10
1650 psrad $31,%xmm14
1651 paddq %xmm15,%xmm15
1652 pand %xmm8,%xmm14
1653 pxor %xmm0,%xmm10
1654 pxor %xmm14,%xmm15
1655 movdqa %xmm9,%xmm14
1656 paddd %xmm9,%xmm9
1657 movdqa %xmm15,%xmm11
1658 psrad $31,%xmm14
1659 paddq %xmm15,%xmm15
1660 pand %xmm8,%xmm14
1661 pxor %xmm0,%xmm11
1662 pxor %xmm14,%xmm15
1663 movdqa %xmm9,%xmm14
1664 paddd %xmm9,%xmm9
1665 movdqa %xmm15,%xmm12
1666 psrad $31,%xmm14
1667 paddq %xmm15,%xmm15
1668 pand %xmm8,%xmm14
1669 pxor %xmm0,%xmm12
1670 pxor %xmm14,%xmm15
1671 movdqa %xmm9,%xmm14
1672 paddd %xmm9,%xmm9
1673 movdqa %xmm15,%xmm13
1674 psrad $31,%xmm14
1675 paddq %xmm15,%xmm15
1676 pand %xmm8,%xmm14
1677 pxor %xmm0,%xmm13
1678 pxor %xmm14,%xmm15
1679 movdqa %xmm15,%xmm14
1680 psrad $31,%xmm9
1681 paddq %xmm15,%xmm15
1682 pand %xmm8,%xmm9
1683 pxor %xmm0,%xmm14
1684 pxor %xmm9,%xmm15
1685 movaps %xmm1,96(%rsp)
1686
1687 subq $96,%rdx
1688 jc .Lxts_enc_short
1689
1690 movl $16+96,%eax
1691 leaq 32(%rbp,%r10,1),%rcx
1692 subq %r10,%rax
1693 movups 16(%rbp),%xmm1
1694 movq %rax,%r10
1695 leaq .Lxts_magic(%rip),%r8
1696 jmp .Lxts_enc_grandloop
1697
1698.align 32
1699.Lxts_enc_grandloop:
1700 movdqu 0(%rdi),%xmm2
1701 movdqa %xmm0,%xmm8
1702 movdqu 16(%rdi),%xmm3
1703 pxor %xmm10,%xmm2
1704 movdqu 32(%rdi),%xmm4
1705 pxor %xmm11,%xmm3
1706.byte 102,15,56,220,209
1707 movdqu 48(%rdi),%xmm5
1708 pxor %xmm12,%xmm4
1709.byte 102,15,56,220,217
1710 movdqu 64(%rdi),%xmm6
1711 pxor %xmm13,%xmm5
1712.byte 102,15,56,220,225
1713 movdqu 80(%rdi),%xmm7
1714 pxor %xmm15,%xmm8
1715 movdqa 96(%rsp),%xmm9
1716 pxor %xmm14,%xmm6
1717.byte 102,15,56,220,233
1718 movups 32(%rbp),%xmm0
1719 leaq 96(%rdi),%rdi
1720 pxor %xmm8,%xmm7
1721
1722 pxor %xmm9,%xmm10
1723.byte 102,15,56,220,241
1724 pxor %xmm9,%xmm11
1725 movdqa %xmm10,0(%rsp)
1726.byte 102,15,56,220,249
1727 movups 48(%rbp),%xmm1
1728 pxor %xmm9,%xmm12
1729
1730.byte 102,15,56,220,208
1731 pxor %xmm9,%xmm13
1732 movdqa %xmm11,16(%rsp)
1733.byte 102,15,56,220,216
1734 pxor %xmm9,%xmm14
1735 movdqa %xmm12,32(%rsp)
1736.byte 102,15,56,220,224
1737.byte 102,15,56,220,232
1738 pxor %xmm9,%xmm8
1739 movdqa %xmm14,64(%rsp)
1740.byte 102,15,56,220,240
1741.byte 102,15,56,220,248
1742 movups 64(%rbp),%xmm0
1743 movdqa %xmm8,80(%rsp)
1744 pshufd $0x5f,%xmm15,%xmm9
1745 jmp .Lxts_enc_loop6
1746.align 32
1747.Lxts_enc_loop6:
1748.byte 102,15,56,220,209
1749.byte 102,15,56,220,217
1750.byte 102,15,56,220,225
1751.byte 102,15,56,220,233
1752.byte 102,15,56,220,241
1753.byte 102,15,56,220,249
1754 movups -64(%rcx,%rax,1),%xmm1
1755 addq $32,%rax
1756
1757.byte 102,15,56,220,208
1758.byte 102,15,56,220,216
1759.byte 102,15,56,220,224
1760.byte 102,15,56,220,232
1761.byte 102,15,56,220,240
1762.byte 102,15,56,220,248
1763 movups -80(%rcx,%rax,1),%xmm0
1764 jnz .Lxts_enc_loop6
1765
1766 movdqa (%r8),%xmm8
1767 movdqa %xmm9,%xmm14
1768 paddd %xmm9,%xmm9
1769.byte 102,15,56,220,209
1770 paddq %xmm15,%xmm15
1771 psrad $31,%xmm14
1772.byte 102,15,56,220,217
1773 pand %xmm8,%xmm14
1774 movups (%rbp),%xmm10
1775.byte 102,15,56,220,225
1776.byte 102,15,56,220,233
1777.byte 102,15,56,220,241
1778 pxor %xmm14,%xmm15
1779 movaps %xmm10,%xmm11
1780.byte 102,15,56,220,249
1781 movups -64(%rcx),%xmm1
1782
1783 movdqa %xmm9,%xmm14
1784.byte 102,15,56,220,208
1785 paddd %xmm9,%xmm9
1786 pxor %xmm15,%xmm10
1787.byte 102,15,56,220,216
1788 psrad $31,%xmm14
1789 paddq %xmm15,%xmm15
1790.byte 102,15,56,220,224
1791.byte 102,15,56,220,232
1792 pand %xmm8,%xmm14
1793 movaps %xmm11,%xmm12
1794.byte 102,15,56,220,240
1795 pxor %xmm14,%xmm15
1796 movdqa %xmm9,%xmm14
1797.byte 102,15,56,220,248
1798 movups -48(%rcx),%xmm0
1799
1800 paddd %xmm9,%xmm9
1801.byte 102,15,56,220,209
1802 pxor %xmm15,%xmm11
1803 psrad $31,%xmm14
1804.byte 102,15,56,220,217
1805 paddq %xmm15,%xmm15
1806 pand %xmm8,%xmm14
1807.byte 102,15,56,220,225
1808.byte 102,15,56,220,233
1809 movdqa %xmm13,48(%rsp)
1810 pxor %xmm14,%xmm15
1811.byte 102,15,56,220,241
1812 movaps %xmm12,%xmm13
1813 movdqa %xmm9,%xmm14
1814.byte 102,15,56,220,249
1815 movups -32(%rcx),%xmm1
1816
1817 paddd %xmm9,%xmm9
1818.byte 102,15,56,220,208
1819 pxor %xmm15,%xmm12
1820 psrad $31,%xmm14
1821.byte 102,15,56,220,216
1822 paddq %xmm15,%xmm15
1823 pand %xmm8,%xmm14
1824.byte 102,15,56,220,224
1825.byte 102,15,56,220,232
1826.byte 102,15,56,220,240
1827 pxor %xmm14,%xmm15
1828 movaps %xmm13,%xmm14
1829.byte 102,15,56,220,248
1830
1831 movdqa %xmm9,%xmm0
1832 paddd %xmm9,%xmm9
1833.byte 102,15,56,220,209
1834 pxor %xmm15,%xmm13
1835 psrad $31,%xmm0
1836.byte 102,15,56,220,217
1837 paddq %xmm15,%xmm15
1838 pand %xmm8,%xmm0
1839.byte 102,15,56,220,225
1840.byte 102,15,56,220,233
1841 pxor %xmm0,%xmm15
1842 movups (%rbp),%xmm0
1843.byte 102,15,56,220,241
1844.byte 102,15,56,220,249
1845 movups 16(%rbp),%xmm1
1846
1847 pxor %xmm15,%xmm14
1848.byte 102,15,56,221,84,36,0
1849 psrad $31,%xmm9
1850 paddq %xmm15,%xmm15
1851.byte 102,15,56,221,92,36,16
1852.byte 102,15,56,221,100,36,32
1853 pand %xmm8,%xmm9
1854 movq %r10,%rax
1855.byte 102,15,56,221,108,36,48
1856.byte 102,15,56,221,116,36,64
1857.byte 102,15,56,221,124,36,80
1858 pxor %xmm9,%xmm15
1859
1860 leaq 96(%rsi),%rsi
1861 movups %xmm2,-96(%rsi)
1862 movups %xmm3,-80(%rsi)
1863 movups %xmm4,-64(%rsi)
1864 movups %xmm5,-48(%rsi)
1865 movups %xmm6,-32(%rsi)
1866 movups %xmm7,-16(%rsi)
1867 subq $96,%rdx
1868 jnc .Lxts_enc_grandloop
1869
1870 movl $16+96,%eax
1871 subl %r10d,%eax
1872 movq %rbp,%rcx
1873 shrl $4,%eax
1874
1875.Lxts_enc_short:
1876
1877 movl %eax,%r10d
1878 pxor %xmm0,%xmm10
1879 addq $96,%rdx
1880 jz .Lxts_enc_done
1881
1882 pxor %xmm0,%xmm11
1883 cmpq $0x20,%rdx
1884 jb .Lxts_enc_one
1885 pxor %xmm0,%xmm12
1886 je .Lxts_enc_two
1887
1888 pxor %xmm0,%xmm13
1889 cmpq $0x40,%rdx
1890 jb .Lxts_enc_three
1891 pxor %xmm0,%xmm14
1892 je .Lxts_enc_four
1893
1894 movdqu (%rdi),%xmm2
1895 movdqu 16(%rdi),%xmm3
1896 movdqu 32(%rdi),%xmm4
1897 pxor %xmm10,%xmm2
1898 movdqu 48(%rdi),%xmm5
1899 pxor %xmm11,%xmm3
1900 movdqu 64(%rdi),%xmm6
1901 leaq 80(%rdi),%rdi
1902 pxor %xmm12,%xmm4
1903 pxor %xmm13,%xmm5
1904 pxor %xmm14,%xmm6
1905 pxor %xmm7,%xmm7
1906
1907 call _aesni_encrypt6
1908
1909 xorps %xmm10,%xmm2
1910 movdqa %xmm15,%xmm10
1911 xorps %xmm11,%xmm3
1912 xorps %xmm12,%xmm4
1913 movdqu %xmm2,(%rsi)
1914 xorps %xmm13,%xmm5
1915 movdqu %xmm3,16(%rsi)
1916 xorps %xmm14,%xmm6
1917 movdqu %xmm4,32(%rsi)
1918 movdqu %xmm5,48(%rsi)
1919 movdqu %xmm6,64(%rsi)
1920 leaq 80(%rsi),%rsi
1921 jmp .Lxts_enc_done
1922
1923.align 16
1924.Lxts_enc_one:
1925 movups (%rdi),%xmm2
1926 leaq 16(%rdi),%rdi
1927 xorps %xmm10,%xmm2
1928 movups (%rcx),%xmm0
1929 movups 16(%rcx),%xmm1
1930 leaq 32(%rcx),%rcx
1931 xorps %xmm0,%xmm2
1932.Loop_enc1_9:
1933.byte 102,15,56,220,209
1934 decl %eax
1935 movups (%rcx),%xmm1
1936 leaq 16(%rcx),%rcx
1937 jnz .Loop_enc1_9
1938.byte 102,15,56,221,209
1939 xorps %xmm10,%xmm2
1940 movdqa %xmm11,%xmm10
1941 movups %xmm2,(%rsi)
1942 leaq 16(%rsi),%rsi
1943 jmp .Lxts_enc_done
1944
1945.align 16
1946.Lxts_enc_two:
1947 movups (%rdi),%xmm2
1948 movups 16(%rdi),%xmm3
1949 leaq 32(%rdi),%rdi
1950 xorps %xmm10,%xmm2
1951 xorps %xmm11,%xmm3
1952
1953 call _aesni_encrypt2
1954
1955 xorps %xmm10,%xmm2
1956 movdqa %xmm12,%xmm10
1957 xorps %xmm11,%xmm3
1958 movups %xmm2,(%rsi)
1959 movups %xmm3,16(%rsi)
1960 leaq 32(%rsi),%rsi
1961 jmp .Lxts_enc_done
1962
1963.align 16
1964.Lxts_enc_three:
1965 movups (%rdi),%xmm2
1966 movups 16(%rdi),%xmm3
1967 movups 32(%rdi),%xmm4
1968 leaq 48(%rdi),%rdi
1969 xorps %xmm10,%xmm2
1970 xorps %xmm11,%xmm3
1971 xorps %xmm12,%xmm4
1972
1973 call _aesni_encrypt3
1974
1975 xorps %xmm10,%xmm2
1976 movdqa %xmm13,%xmm10
1977 xorps %xmm11,%xmm3
1978 xorps %xmm12,%xmm4
1979 movups %xmm2,(%rsi)
1980 movups %xmm3,16(%rsi)
1981 movups %xmm4,32(%rsi)
1982 leaq 48(%rsi),%rsi
1983 jmp .Lxts_enc_done
1984
1985.align 16
1986.Lxts_enc_four:
1987 movups (%rdi),%xmm2
1988 movups 16(%rdi),%xmm3
1989 movups 32(%rdi),%xmm4
1990 xorps %xmm10,%xmm2
1991 movups 48(%rdi),%xmm5
1992 leaq 64(%rdi),%rdi
1993 xorps %xmm11,%xmm3
1994 xorps %xmm12,%xmm4
1995 xorps %xmm13,%xmm5
1996
1997 call _aesni_encrypt4
1998
1999 pxor %xmm10,%xmm2
2000 movdqa %xmm14,%xmm10
2001 pxor %xmm11,%xmm3
2002 pxor %xmm12,%xmm4
2003 movdqu %xmm2,(%rsi)
2004 pxor %xmm13,%xmm5
2005 movdqu %xmm3,16(%rsi)
2006 movdqu %xmm4,32(%rsi)
2007 movdqu %xmm5,48(%rsi)
2008 leaq 64(%rsi),%rsi
2009 jmp .Lxts_enc_done
2010
2011.align 16
2012.Lxts_enc_done:
2013 andq $15,%r9
2014 jz .Lxts_enc_ret
2015 movq %r9,%rdx
2016
2017.Lxts_enc_steal:
2018 movzbl (%rdi),%eax
2019 movzbl -16(%rsi),%ecx
2020 leaq 1(%rdi),%rdi
2021 movb %al,-16(%rsi)
2022 movb %cl,0(%rsi)
2023 leaq 1(%rsi),%rsi
2024 subq $1,%rdx
2025 jnz .Lxts_enc_steal
2026
2027 subq %r9,%rsi
2028 movq %rbp,%rcx
2029 movl %r10d,%eax
2030
2031 movups -16(%rsi),%xmm2
2032 xorps %xmm10,%xmm2
2033 movups (%rcx),%xmm0
2034 movups 16(%rcx),%xmm1
2035 leaq 32(%rcx),%rcx
2036 xorps %xmm0,%xmm2
2037.Loop_enc1_10:
2038.byte 102,15,56,220,209
2039 decl %eax
2040 movups (%rcx),%xmm1
2041 leaq 16(%rcx),%rcx
2042 jnz .Loop_enc1_10
2043.byte 102,15,56,221,209
2044 xorps %xmm10,%xmm2
2045 movups %xmm2,-16(%rsi)
2046
2047.Lxts_enc_ret:
2048 xorps %xmm0,%xmm0
2049 pxor %xmm1,%xmm1
2050 pxor %xmm2,%xmm2
2051 pxor %xmm3,%xmm3
2052 pxor %xmm4,%xmm4
2053 pxor %xmm5,%xmm5
2054 pxor %xmm6,%xmm6
2055 pxor %xmm7,%xmm7
2056 movaps %xmm0,0(%rsp)
2057 pxor %xmm8,%xmm8
2058 movaps %xmm0,16(%rsp)
2059 pxor %xmm9,%xmm9
2060 movaps %xmm0,32(%rsp)
2061 pxor %xmm10,%xmm10
2062 movaps %xmm0,48(%rsp)
2063 pxor %xmm11,%xmm11
2064 movaps %xmm0,64(%rsp)
2065 pxor %xmm12,%xmm12
2066 movaps %xmm0,80(%rsp)
2067 pxor %xmm13,%xmm13
2068 movaps %xmm0,96(%rsp)
2069 pxor %xmm14,%xmm14
2070 pxor %xmm15,%xmm15
2071 movq -8(%r11),%rbp
2072.cfi_restore %rbp
2073 leaq (%r11),%rsp
2074.cfi_def_cfa_register %rsp
2075.Lxts_enc_epilogue:
2076 .byte 0xf3,0xc3
2077.cfi_endproc
2078.size aesni_xts_encrypt,.-aesni_xts_encrypt
2079.globl aesni_xts_decrypt
2080.type aesni_xts_decrypt,@function
2081.align 16
2082aesni_xts_decrypt:
2083.cfi_startproc
2084.byte 243,15,30,250
2085 leaq (%rsp),%r11
2086.cfi_def_cfa_register %r11
2087 pushq %rbp
2088.cfi_offset %rbp,-16
2089 subq $112,%rsp
2090 andq $-16,%rsp
2091 movups (%r9),%xmm2
2092 movl 240(%r8),%eax
2093 movl 240(%rcx),%r10d
2094 movups (%r8),%xmm0
2095 movups 16(%r8),%xmm1
2096 leaq 32(%r8),%r8
2097 xorps %xmm0,%xmm2
2098.Loop_enc1_11:
2099.byte 102,15,56,220,209
2100 decl %eax
2101 movups (%r8),%xmm1
2102 leaq 16(%r8),%r8
2103 jnz .Loop_enc1_11
2104.byte 102,15,56,221,209
2105 xorl %eax,%eax
2106 testq $15,%rdx
2107 setnz %al
2108 shlq $4,%rax
2109 subq %rax,%rdx
2110
2111 movups (%rcx),%xmm0
2112 movq %rcx,%rbp
2113 movl %r10d,%eax
2114 shll $4,%r10d
2115 movq %rdx,%r9
2116 andq $-16,%rdx
2117
2118 movups 16(%rcx,%r10,1),%xmm1
2119
2120 movdqa .Lxts_magic(%rip),%xmm8
2121 movdqa %xmm2,%xmm15
2122 pshufd $0x5f,%xmm2,%xmm9
2123 pxor %xmm0,%xmm1
2124 movdqa %xmm9,%xmm14
2125 paddd %xmm9,%xmm9
2126 movdqa %xmm15,%xmm10
2127 psrad $31,%xmm14
2128 paddq %xmm15,%xmm15
2129 pand %xmm8,%xmm14
2130 pxor %xmm0,%xmm10
2131 pxor %xmm14,%xmm15
2132 movdqa %xmm9,%xmm14
2133 paddd %xmm9,%xmm9
2134 movdqa %xmm15,%xmm11
2135 psrad $31,%xmm14
2136 paddq %xmm15,%xmm15
2137 pand %xmm8,%xmm14
2138 pxor %xmm0,%xmm11
2139 pxor %xmm14,%xmm15
2140 movdqa %xmm9,%xmm14
2141 paddd %xmm9,%xmm9
2142 movdqa %xmm15,%xmm12
2143 psrad $31,%xmm14
2144 paddq %xmm15,%xmm15
2145 pand %xmm8,%xmm14
2146 pxor %xmm0,%xmm12
2147 pxor %xmm14,%xmm15
2148 movdqa %xmm9,%xmm14
2149 paddd %xmm9,%xmm9
2150 movdqa %xmm15,%xmm13
2151 psrad $31,%xmm14
2152 paddq %xmm15,%xmm15
2153 pand %xmm8,%xmm14
2154 pxor %xmm0,%xmm13
2155 pxor %xmm14,%xmm15
2156 movdqa %xmm15,%xmm14
2157 psrad $31,%xmm9
2158 paddq %xmm15,%xmm15
2159 pand %xmm8,%xmm9
2160 pxor %xmm0,%xmm14
2161 pxor %xmm9,%xmm15
2162 movaps %xmm1,96(%rsp)
2163
2164 subq $96,%rdx
2165 jc .Lxts_dec_short
2166
2167 movl $16+96,%eax
2168 leaq 32(%rbp,%r10,1),%rcx
2169 subq %r10,%rax
2170 movups 16(%rbp),%xmm1
2171 movq %rax,%r10
2172 leaq .Lxts_magic(%rip),%r8
2173 jmp .Lxts_dec_grandloop
2174
2175.align 32
2176.Lxts_dec_grandloop:
2177 movdqu 0(%rdi),%xmm2
2178 movdqa %xmm0,%xmm8
2179 movdqu 16(%rdi),%xmm3
2180 pxor %xmm10,%xmm2
2181 movdqu 32(%rdi),%xmm4
2182 pxor %xmm11,%xmm3
2183.byte 102,15,56,222,209
2184 movdqu 48(%rdi),%xmm5
2185 pxor %xmm12,%xmm4
2186.byte 102,15,56,222,217
2187 movdqu 64(%rdi),%xmm6
2188 pxor %xmm13,%xmm5
2189.byte 102,15,56,222,225
2190 movdqu 80(%rdi),%xmm7
2191 pxor %xmm15,%xmm8
2192 movdqa 96(%rsp),%xmm9
2193 pxor %xmm14,%xmm6
2194.byte 102,15,56,222,233
2195 movups 32(%rbp),%xmm0
2196 leaq 96(%rdi),%rdi
2197 pxor %xmm8,%xmm7
2198
2199 pxor %xmm9,%xmm10
2200.byte 102,15,56,222,241
2201 pxor %xmm9,%xmm11
2202 movdqa %xmm10,0(%rsp)
2203.byte 102,15,56,222,249
2204 movups 48(%rbp),%xmm1
2205 pxor %xmm9,%xmm12
2206
2207.byte 102,15,56,222,208
2208 pxor %xmm9,%xmm13
2209 movdqa %xmm11,16(%rsp)
2210.byte 102,15,56,222,216
2211 pxor %xmm9,%xmm14
2212 movdqa %xmm12,32(%rsp)
2213.byte 102,15,56,222,224
2214.byte 102,15,56,222,232
2215 pxor %xmm9,%xmm8
2216 movdqa %xmm14,64(%rsp)
2217.byte 102,15,56,222,240
2218.byte 102,15,56,222,248
2219 movups 64(%rbp),%xmm0
2220 movdqa %xmm8,80(%rsp)
2221 pshufd $0x5f,%xmm15,%xmm9
2222 jmp .Lxts_dec_loop6
2223.align 32
2224.Lxts_dec_loop6:
2225.byte 102,15,56,222,209
2226.byte 102,15,56,222,217
2227.byte 102,15,56,222,225
2228.byte 102,15,56,222,233
2229.byte 102,15,56,222,241
2230.byte 102,15,56,222,249
2231 movups -64(%rcx,%rax,1),%xmm1
2232 addq $32,%rax
2233
2234.byte 102,15,56,222,208
2235.byte 102,15,56,222,216
2236.byte 102,15,56,222,224
2237.byte 102,15,56,222,232
2238.byte 102,15,56,222,240
2239.byte 102,15,56,222,248
2240 movups -80(%rcx,%rax,1),%xmm0
2241 jnz .Lxts_dec_loop6
2242
2243 movdqa (%r8),%xmm8
2244 movdqa %xmm9,%xmm14
2245 paddd %xmm9,%xmm9
2246.byte 102,15,56,222,209
2247 paddq %xmm15,%xmm15
2248 psrad $31,%xmm14
2249.byte 102,15,56,222,217
2250 pand %xmm8,%xmm14
2251 movups (%rbp),%xmm10
2252.byte 102,15,56,222,225
2253.byte 102,15,56,222,233
2254.byte 102,15,56,222,241
2255 pxor %xmm14,%xmm15
2256 movaps %xmm10,%xmm11
2257.byte 102,15,56,222,249
2258 movups -64(%rcx),%xmm1
2259
2260 movdqa %xmm9,%xmm14
2261.byte 102,15,56,222,208
2262 paddd %xmm9,%xmm9
2263 pxor %xmm15,%xmm10
2264.byte 102,15,56,222,216
2265 psrad $31,%xmm14
2266 paddq %xmm15,%xmm15
2267.byte 102,15,56,222,224
2268.byte 102,15,56,222,232
2269 pand %xmm8,%xmm14
2270 movaps %xmm11,%xmm12
2271.byte 102,15,56,222,240
2272 pxor %xmm14,%xmm15
2273 movdqa %xmm9,%xmm14
2274.byte 102,15,56,222,248
2275 movups -48(%rcx),%xmm0
2276
2277 paddd %xmm9,%xmm9
2278.byte 102,15,56,222,209
2279 pxor %xmm15,%xmm11
2280 psrad $31,%xmm14
2281.byte 102,15,56,222,217
2282 paddq %xmm15,%xmm15
2283 pand %xmm8,%xmm14
2284.byte 102,15,56,222,225
2285.byte 102,15,56,222,233
2286 movdqa %xmm13,48(%rsp)
2287 pxor %xmm14,%xmm15
2288.byte 102,15,56,222,241
2289 movaps %xmm12,%xmm13
2290 movdqa %xmm9,%xmm14
2291.byte 102,15,56,222,249
2292 movups -32(%rcx),%xmm1
2293
2294 paddd %xmm9,%xmm9
2295.byte 102,15,56,222,208
2296 pxor %xmm15,%xmm12
2297 psrad $31,%xmm14
2298.byte 102,15,56,222,216
2299 paddq %xmm15,%xmm15
2300 pand %xmm8,%xmm14
2301.byte 102,15,56,222,224
2302.byte 102,15,56,222,232
2303.byte 102,15,56,222,240
2304 pxor %xmm14,%xmm15
2305 movaps %xmm13,%xmm14
2306.byte 102,15,56,222,248
2307
2308 movdqa %xmm9,%xmm0
2309 paddd %xmm9,%xmm9
2310.byte 102,15,56,222,209
2311 pxor %xmm15,%xmm13
2312 psrad $31,%xmm0
2313.byte 102,15,56,222,217
2314 paddq %xmm15,%xmm15
2315 pand %xmm8,%xmm0
2316.byte 102,15,56,222,225
2317.byte 102,15,56,222,233
2318 pxor %xmm0,%xmm15
2319 movups (%rbp),%xmm0
2320.byte 102,15,56,222,241
2321.byte 102,15,56,222,249
2322 movups 16(%rbp),%xmm1
2323
2324 pxor %xmm15,%xmm14
2325.byte 102,15,56,223,84,36,0
2326 psrad $31,%xmm9
2327 paddq %xmm15,%xmm15
2328.byte 102,15,56,223,92,36,16
2329.byte 102,15,56,223,100,36,32
2330 pand %xmm8,%xmm9
2331 movq %r10,%rax
2332.byte 102,15,56,223,108,36,48
2333.byte 102,15,56,223,116,36,64
2334.byte 102,15,56,223,124,36,80
2335 pxor %xmm9,%xmm15
2336
2337 leaq 96(%rsi),%rsi
2338 movups %xmm2,-96(%rsi)
2339 movups %xmm3,-80(%rsi)
2340 movups %xmm4,-64(%rsi)
2341 movups %xmm5,-48(%rsi)
2342 movups %xmm6,-32(%rsi)
2343 movups %xmm7,-16(%rsi)
2344 subq $96,%rdx
2345 jnc .Lxts_dec_grandloop
2346
2347 movl $16+96,%eax
2348 subl %r10d,%eax
2349 movq %rbp,%rcx
2350 shrl $4,%eax
2351
2352.Lxts_dec_short:
2353
2354 movl %eax,%r10d
2355 pxor %xmm0,%xmm10
2356 pxor %xmm0,%xmm11
2357 addq $96,%rdx
2358 jz .Lxts_dec_done
2359
2360 pxor %xmm0,%xmm12
2361 cmpq $0x20,%rdx
2362 jb .Lxts_dec_one
2363 pxor %xmm0,%xmm13
2364 je .Lxts_dec_two
2365
2366 pxor %xmm0,%xmm14
2367 cmpq $0x40,%rdx
2368 jb .Lxts_dec_three
2369 je .Lxts_dec_four
2370
2371 movdqu (%rdi),%xmm2
2372 movdqu 16(%rdi),%xmm3
2373 movdqu 32(%rdi),%xmm4
2374 pxor %xmm10,%xmm2
2375 movdqu 48(%rdi),%xmm5
2376 pxor %xmm11,%xmm3
2377 movdqu 64(%rdi),%xmm6
2378 leaq 80(%rdi),%rdi
2379 pxor %xmm12,%xmm4
2380 pxor %xmm13,%xmm5
2381 pxor %xmm14,%xmm6
2382
2383 call _aesni_decrypt6
2384
2385 xorps %xmm10,%xmm2
2386 xorps %xmm11,%xmm3
2387 xorps %xmm12,%xmm4
2388 movdqu %xmm2,(%rsi)
2389 xorps %xmm13,%xmm5
2390 movdqu %xmm3,16(%rsi)
2391 xorps %xmm14,%xmm6
2392 movdqu %xmm4,32(%rsi)
2393 pxor %xmm14,%xmm14
2394 movdqu %xmm5,48(%rsi)
2395 pcmpgtd %xmm15,%xmm14
2396 movdqu %xmm6,64(%rsi)
2397 leaq 80(%rsi),%rsi
2398 pshufd $0x13,%xmm14,%xmm11
2399 andq $15,%r9
2400 jz .Lxts_dec_ret
2401
2402 movdqa %xmm15,%xmm10
2403 paddq %xmm15,%xmm15
2404 pand %xmm8,%xmm11
2405 pxor %xmm15,%xmm11
2406 jmp .Lxts_dec_done2
2407
2408.align 16
2409.Lxts_dec_one:
2410 movups (%rdi),%xmm2
2411 leaq 16(%rdi),%rdi
2412 xorps %xmm10,%xmm2
2413 movups (%rcx),%xmm0
2414 movups 16(%rcx),%xmm1
2415 leaq 32(%rcx),%rcx
2416 xorps %xmm0,%xmm2
2417.Loop_dec1_12:
2418.byte 102,15,56,222,209
2419 decl %eax
2420 movups (%rcx),%xmm1
2421 leaq 16(%rcx),%rcx
2422 jnz .Loop_dec1_12
2423.byte 102,15,56,223,209
2424 xorps %xmm10,%xmm2
2425 movdqa %xmm11,%xmm10
2426 movups %xmm2,(%rsi)
2427 movdqa %xmm12,%xmm11
2428 leaq 16(%rsi),%rsi
2429 jmp .Lxts_dec_done
2430
2431.align 16
2432.Lxts_dec_two:
2433 movups (%rdi),%xmm2
2434 movups 16(%rdi),%xmm3
2435 leaq 32(%rdi),%rdi
2436 xorps %xmm10,%xmm2
2437 xorps %xmm11,%xmm3
2438
2439 call _aesni_decrypt2
2440
2441 xorps %xmm10,%xmm2
2442 movdqa %xmm12,%xmm10
2443 xorps %xmm11,%xmm3
2444 movdqa %xmm13,%xmm11
2445 movups %xmm2,(%rsi)
2446 movups %xmm3,16(%rsi)
2447 leaq 32(%rsi),%rsi
2448 jmp .Lxts_dec_done
2449
2450.align 16
2451.Lxts_dec_three:
2452 movups (%rdi),%xmm2
2453 movups 16(%rdi),%xmm3
2454 movups 32(%rdi),%xmm4
2455 leaq 48(%rdi),%rdi
2456 xorps %xmm10,%xmm2
2457 xorps %xmm11,%xmm3
2458 xorps %xmm12,%xmm4
2459
2460 call _aesni_decrypt3
2461
2462 xorps %xmm10,%xmm2
2463 movdqa %xmm13,%xmm10
2464 xorps %xmm11,%xmm3
2465 movdqa %xmm14,%xmm11
2466 xorps %xmm12,%xmm4
2467 movups %xmm2,(%rsi)
2468 movups %xmm3,16(%rsi)
2469 movups %xmm4,32(%rsi)
2470 leaq 48(%rsi),%rsi
2471 jmp .Lxts_dec_done
2472
2473.align 16
2474.Lxts_dec_four:
2475 movups (%rdi),%xmm2
2476 movups 16(%rdi),%xmm3
2477 movups 32(%rdi),%xmm4
2478 xorps %xmm10,%xmm2
2479 movups 48(%rdi),%xmm5
2480 leaq 64(%rdi),%rdi
2481 xorps %xmm11,%xmm3
2482 xorps %xmm12,%xmm4
2483 xorps %xmm13,%xmm5
2484
2485 call _aesni_decrypt4
2486
2487 pxor %xmm10,%xmm2
2488 movdqa %xmm14,%xmm10
2489 pxor %xmm11,%xmm3
2490 movdqa %xmm15,%xmm11
2491 pxor %xmm12,%xmm4
2492 movdqu %xmm2,(%rsi)
2493 pxor %xmm13,%xmm5
2494 movdqu %xmm3,16(%rsi)
2495 movdqu %xmm4,32(%rsi)
2496 movdqu %xmm5,48(%rsi)
2497 leaq 64(%rsi),%rsi
2498 jmp .Lxts_dec_done
2499
2500.align 16
2501.Lxts_dec_done:
2502 andq $15,%r9
2503 jz .Lxts_dec_ret
2504.Lxts_dec_done2:
2505 movq %r9,%rdx
2506 movq %rbp,%rcx
2507 movl %r10d,%eax
2508
2509 movups (%rdi),%xmm2
2510 xorps %xmm11,%xmm2
2511 movups (%rcx),%xmm0
2512 movups 16(%rcx),%xmm1
2513 leaq 32(%rcx),%rcx
2514 xorps %xmm0,%xmm2
2515.Loop_dec1_13:
2516.byte 102,15,56,222,209
2517 decl %eax
2518 movups (%rcx),%xmm1
2519 leaq 16(%rcx),%rcx
2520 jnz .Loop_dec1_13
2521.byte 102,15,56,223,209
2522 xorps %xmm11,%xmm2
2523 movups %xmm2,(%rsi)
2524
2525.Lxts_dec_steal:
2526 movzbl 16(%rdi),%eax
2527 movzbl (%rsi),%ecx
2528 leaq 1(%rdi),%rdi
2529 movb %al,(%rsi)
2530 movb %cl,16(%rsi)
2531 leaq 1(%rsi),%rsi
2532 subq $1,%rdx
2533 jnz .Lxts_dec_steal
2534
2535 subq %r9,%rsi
2536 movq %rbp,%rcx
2537 movl %r10d,%eax
2538
2539 movups (%rsi),%xmm2
2540 xorps %xmm10,%xmm2
2541 movups (%rcx),%xmm0
2542 movups 16(%rcx),%xmm1
2543 leaq 32(%rcx),%rcx
2544 xorps %xmm0,%xmm2
2545.Loop_dec1_14:
2546.byte 102,15,56,222,209
2547 decl %eax
2548 movups (%rcx),%xmm1
2549 leaq 16(%rcx),%rcx
2550 jnz .Loop_dec1_14
2551.byte 102,15,56,223,209
2552 xorps %xmm10,%xmm2
2553 movups %xmm2,(%rsi)
2554
2555.Lxts_dec_ret:
2556 xorps %xmm0,%xmm0
2557 pxor %xmm1,%xmm1
2558 pxor %xmm2,%xmm2
2559 pxor %xmm3,%xmm3
2560 pxor %xmm4,%xmm4
2561 pxor %xmm5,%xmm5
2562 pxor %xmm6,%xmm6
2563 pxor %xmm7,%xmm7
2564 movaps %xmm0,0(%rsp)
2565 pxor %xmm8,%xmm8
2566 movaps %xmm0,16(%rsp)
2567 pxor %xmm9,%xmm9
2568 movaps %xmm0,32(%rsp)
2569 pxor %xmm10,%xmm10
2570 movaps %xmm0,48(%rsp)
2571 pxor %xmm11,%xmm11
2572 movaps %xmm0,64(%rsp)
2573 pxor %xmm12,%xmm12
2574 movaps %xmm0,80(%rsp)
2575 pxor %xmm13,%xmm13
2576 movaps %xmm0,96(%rsp)
2577 pxor %xmm14,%xmm14
2578 pxor %xmm15,%xmm15
2579 movq -8(%r11),%rbp
2580.cfi_restore %rbp
2581 leaq (%r11),%rsp
2582.cfi_def_cfa_register %rsp
2583.Lxts_dec_epilogue:
2584 .byte 0xf3,0xc3
2585.cfi_endproc
2586.size aesni_xts_decrypt,.-aesni_xts_decrypt
2587.globl aesni_ocb_encrypt
2588.type aesni_ocb_encrypt,@function
2589.align 32
2590aesni_ocb_encrypt:
2591.cfi_startproc
2592.byte 243,15,30,250
2593 leaq (%rsp),%rax
2594 pushq %rbx
2595.cfi_adjust_cfa_offset 8
2596.cfi_offset %rbx,-16
2597 pushq %rbp
2598.cfi_adjust_cfa_offset 8
2599.cfi_offset %rbp,-24
2600 pushq %r12
2601.cfi_adjust_cfa_offset 8
2602.cfi_offset %r12,-32
2603 pushq %r13
2604.cfi_adjust_cfa_offset 8
2605.cfi_offset %r13,-40
2606 pushq %r14
2607.cfi_adjust_cfa_offset 8
2608.cfi_offset %r14,-48
2609 movq 8(%rax),%rbx
2610 movq 8+8(%rax),%rbp
2611
2612 movl 240(%rcx),%r10d
2613 movq %rcx,%r11
2614 shll $4,%r10d
2615 movups (%rcx),%xmm9
2616 movups 16(%rcx,%r10,1),%xmm1
2617
2618 movdqu (%r9),%xmm15
2619 pxor %xmm1,%xmm9
2620 pxor %xmm1,%xmm15
2621
2622 movl $16+32,%eax
2623 leaq 32(%r11,%r10,1),%rcx
2624 movups 16(%r11),%xmm1
2625 subq %r10,%rax
2626 movq %rax,%r10
2627
2628 movdqu (%rbx),%xmm10
2629 movdqu (%rbp),%xmm8
2630
2631 testq $1,%r8
2632 jnz .Locb_enc_odd
2633
2634 bsfq %r8,%r12
2635 addq $1,%r8
2636 shlq $4,%r12
2637 movdqu (%rbx,%r12,1),%xmm7
2638 movdqu (%rdi),%xmm2
2639 leaq 16(%rdi),%rdi
2640
2641 call __ocb_encrypt1
2642
2643 movdqa %xmm7,%xmm15
2644 movups %xmm2,(%rsi)
2645 leaq 16(%rsi),%rsi
2646 subq $1,%rdx
2647 jz .Locb_enc_done
2648
2649.Locb_enc_odd:
2650 leaq 1(%r8),%r12
2651 leaq 3(%r8),%r13
2652 leaq 5(%r8),%r14
2653 leaq 6(%r8),%r8
2654 bsfq %r12,%r12
2655 bsfq %r13,%r13
2656 bsfq %r14,%r14
2657 shlq $4,%r12
2658 shlq $4,%r13
2659 shlq $4,%r14
2660
2661 subq $6,%rdx
2662 jc .Locb_enc_short
2663 jmp .Locb_enc_grandloop
2664
2665.align 32
2666.Locb_enc_grandloop:
2667 movdqu 0(%rdi),%xmm2
2668 movdqu 16(%rdi),%xmm3
2669 movdqu 32(%rdi),%xmm4
2670 movdqu 48(%rdi),%xmm5
2671 movdqu 64(%rdi),%xmm6
2672 movdqu 80(%rdi),%xmm7
2673 leaq 96(%rdi),%rdi
2674
2675 call __ocb_encrypt6
2676
2677 movups %xmm2,0(%rsi)
2678 movups %xmm3,16(%rsi)
2679 movups %xmm4,32(%rsi)
2680 movups %xmm5,48(%rsi)
2681 movups %xmm6,64(%rsi)
2682 movups %xmm7,80(%rsi)
2683 leaq 96(%rsi),%rsi
2684 subq $6,%rdx
2685 jnc .Locb_enc_grandloop
2686
2687.Locb_enc_short:
2688 addq $6,%rdx
2689 jz .Locb_enc_done
2690
2691 movdqu 0(%rdi),%xmm2
2692 cmpq $2,%rdx
2693 jb .Locb_enc_one
2694 movdqu 16(%rdi),%xmm3
2695 je .Locb_enc_two
2696
2697 movdqu 32(%rdi),%xmm4
2698 cmpq $4,%rdx
2699 jb .Locb_enc_three
2700 movdqu 48(%rdi),%xmm5
2701 je .Locb_enc_four
2702
2703 movdqu 64(%rdi),%xmm6
2704 pxor %xmm7,%xmm7
2705
2706 call __ocb_encrypt6
2707
2708 movdqa %xmm14,%xmm15
2709 movups %xmm2,0(%rsi)
2710 movups %xmm3,16(%rsi)
2711 movups %xmm4,32(%rsi)
2712 movups %xmm5,48(%rsi)
2713 movups %xmm6,64(%rsi)
2714
2715 jmp .Locb_enc_done
2716
2717.align 16
2718.Locb_enc_one:
2719 movdqa %xmm10,%xmm7
2720
2721 call __ocb_encrypt1
2722
2723 movdqa %xmm7,%xmm15
2724 movups %xmm2,0(%rsi)
2725 jmp .Locb_enc_done
2726
2727.align 16
2728.Locb_enc_two:
2729 pxor %xmm4,%xmm4
2730 pxor %xmm5,%xmm5
2731
2732 call __ocb_encrypt4
2733
2734 movdqa %xmm11,%xmm15
2735 movups %xmm2,0(%rsi)
2736 movups %xmm3,16(%rsi)
2737
2738 jmp .Locb_enc_done
2739
2740.align 16
2741.Locb_enc_three:
2742 pxor %xmm5,%xmm5
2743
2744 call __ocb_encrypt4
2745
2746 movdqa %xmm12,%xmm15
2747 movups %xmm2,0(%rsi)
2748 movups %xmm3,16(%rsi)
2749 movups %xmm4,32(%rsi)
2750
2751 jmp .Locb_enc_done
2752
2753.align 16
2754.Locb_enc_four:
2755 call __ocb_encrypt4
2756
2757 movdqa %xmm13,%xmm15
2758 movups %xmm2,0(%rsi)
2759 movups %xmm3,16(%rsi)
2760 movups %xmm4,32(%rsi)
2761 movups %xmm5,48(%rsi)
2762
2763.Locb_enc_done:
2764 pxor %xmm0,%xmm15
2765 movdqu %xmm8,(%rbp)
2766 movdqu %xmm15,(%r9)
2767
2768 xorps %xmm0,%xmm0
2769 pxor %xmm1,%xmm1
2770 pxor %xmm2,%xmm2
2771 pxor %xmm3,%xmm3
2772 pxor %xmm4,%xmm4
2773 pxor %xmm5,%xmm5
2774 pxor %xmm6,%xmm6
2775 pxor %xmm7,%xmm7
2776 pxor %xmm8,%xmm8
2777 pxor %xmm9,%xmm9
2778 pxor %xmm10,%xmm10
2779 pxor %xmm11,%xmm11
2780 pxor %xmm12,%xmm12
2781 pxor %xmm13,%xmm13
2782 pxor %xmm14,%xmm14
2783 pxor %xmm15,%xmm15
2784 leaq 40(%rsp),%rax
2785.cfi_def_cfa %rax,8
2786 movq -40(%rax),%r14
2787.cfi_restore %r14
2788 movq -32(%rax),%r13
2789.cfi_restore %r13
2790 movq -24(%rax),%r12
2791.cfi_restore %r12
2792 movq -16(%rax),%rbp
2793.cfi_restore %rbp
2794 movq -8(%rax),%rbx
2795.cfi_restore %rbx
2796 leaq (%rax),%rsp
2797.cfi_def_cfa_register %rsp
2798.Locb_enc_epilogue:
2799 .byte 0xf3,0xc3
2800.cfi_endproc
2801.size aesni_ocb_encrypt,.-aesni_ocb_encrypt
2802
2803.type __ocb_encrypt6,@function
2804.align 32
2805__ocb_encrypt6:
2806.cfi_startproc
2807 pxor %xmm9,%xmm15
2808 movdqu (%rbx,%r12,1),%xmm11
2809 movdqa %xmm10,%xmm12
2810 movdqu (%rbx,%r13,1),%xmm13
2811 movdqa %xmm10,%xmm14
2812 pxor %xmm15,%xmm10
2813 movdqu (%rbx,%r14,1),%xmm15
2814 pxor %xmm10,%xmm11
2815 pxor %xmm2,%xmm8
2816 pxor %xmm10,%xmm2
2817 pxor %xmm11,%xmm12
2818 pxor %xmm3,%xmm8
2819 pxor %xmm11,%xmm3
2820 pxor %xmm12,%xmm13
2821 pxor %xmm4,%xmm8
2822 pxor %xmm12,%xmm4
2823 pxor %xmm13,%xmm14
2824 pxor %xmm5,%xmm8
2825 pxor %xmm13,%xmm5
2826 pxor %xmm14,%xmm15
2827 pxor %xmm6,%xmm8
2828 pxor %xmm14,%xmm6
2829 pxor %xmm7,%xmm8
2830 pxor %xmm15,%xmm7
2831 movups 32(%r11),%xmm0
2832
2833 leaq 1(%r8),%r12
2834 leaq 3(%r8),%r13
2835 leaq 5(%r8),%r14
2836 addq $6,%r8
2837 pxor %xmm9,%xmm10
2838 bsfq %r12,%r12
2839 bsfq %r13,%r13
2840 bsfq %r14,%r14
2841
2842.byte 102,15,56,220,209
2843.byte 102,15,56,220,217
2844.byte 102,15,56,220,225
2845.byte 102,15,56,220,233
2846 pxor %xmm9,%xmm11
2847 pxor %xmm9,%xmm12
2848.byte 102,15,56,220,241
2849 pxor %xmm9,%xmm13
2850 pxor %xmm9,%xmm14
2851.byte 102,15,56,220,249
2852 movups 48(%r11),%xmm1
2853 pxor %xmm9,%xmm15
2854
2855.byte 102,15,56,220,208
2856.byte 102,15,56,220,216
2857.byte 102,15,56,220,224
2858.byte 102,15,56,220,232
2859.byte 102,15,56,220,240
2860.byte 102,15,56,220,248
2861 movups 64(%r11),%xmm0
2862 shlq $4,%r12
2863 shlq $4,%r13
2864 jmp .Locb_enc_loop6
2865
2866.align 32
2867.Locb_enc_loop6:
2868.byte 102,15,56,220,209
2869.byte 102,15,56,220,217
2870.byte 102,15,56,220,225
2871.byte 102,15,56,220,233
2872.byte 102,15,56,220,241
2873.byte 102,15,56,220,249
2874 movups (%rcx,%rax,1),%xmm1
2875 addq $32,%rax
2876
2877.byte 102,15,56,220,208
2878.byte 102,15,56,220,216
2879.byte 102,15,56,220,224
2880.byte 102,15,56,220,232
2881.byte 102,15,56,220,240
2882.byte 102,15,56,220,248
2883 movups -16(%rcx,%rax,1),%xmm0
2884 jnz .Locb_enc_loop6
2885
2886.byte 102,15,56,220,209
2887.byte 102,15,56,220,217
2888.byte 102,15,56,220,225
2889.byte 102,15,56,220,233
2890.byte 102,15,56,220,241
2891.byte 102,15,56,220,249
2892 movups 16(%r11),%xmm1
2893 shlq $4,%r14
2894
2895.byte 102,65,15,56,221,210
2896 movdqu (%rbx),%xmm10
2897 movq %r10,%rax
2898.byte 102,65,15,56,221,219
2899.byte 102,65,15,56,221,228
2900.byte 102,65,15,56,221,237
2901.byte 102,65,15,56,221,246
2902.byte 102,65,15,56,221,255
2903 .byte 0xf3,0xc3
2904.cfi_endproc
2905.size __ocb_encrypt6,.-__ocb_encrypt6
2906
2907.type __ocb_encrypt4,@function
2908.align 32
2909__ocb_encrypt4:
2910.cfi_startproc
2911 pxor %xmm9,%xmm15
2912 movdqu (%rbx,%r12,1),%xmm11
2913 movdqa %xmm10,%xmm12
2914 movdqu (%rbx,%r13,1),%xmm13
2915 pxor %xmm15,%xmm10
2916 pxor %xmm10,%xmm11
2917 pxor %xmm2,%xmm8
2918 pxor %xmm10,%xmm2
2919 pxor %xmm11,%xmm12
2920 pxor %xmm3,%xmm8
2921 pxor %xmm11,%xmm3
2922 pxor %xmm12,%xmm13
2923 pxor %xmm4,%xmm8
2924 pxor %xmm12,%xmm4
2925 pxor %xmm5,%xmm8
2926 pxor %xmm13,%xmm5
2927 movups 32(%r11),%xmm0
2928
2929 pxor %xmm9,%xmm10
2930 pxor %xmm9,%xmm11
2931 pxor %xmm9,%xmm12
2932 pxor %xmm9,%xmm13
2933
2934.byte 102,15,56,220,209
2935.byte 102,15,56,220,217
2936.byte 102,15,56,220,225
2937.byte 102,15,56,220,233
2938 movups 48(%r11),%xmm1
2939
2940.byte 102,15,56,220,208
2941.byte 102,15,56,220,216
2942.byte 102,15,56,220,224
2943.byte 102,15,56,220,232
2944 movups 64(%r11),%xmm0
2945 jmp .Locb_enc_loop4
2946
2947.align 32
2948.Locb_enc_loop4:
2949.byte 102,15,56,220,209
2950.byte 102,15,56,220,217
2951.byte 102,15,56,220,225
2952.byte 102,15,56,220,233
2953 movups (%rcx,%rax,1),%xmm1
2954 addq $32,%rax
2955
2956.byte 102,15,56,220,208
2957.byte 102,15,56,220,216
2958.byte 102,15,56,220,224
2959.byte 102,15,56,220,232
2960 movups -16(%rcx,%rax,1),%xmm0
2961 jnz .Locb_enc_loop4
2962
2963.byte 102,15,56,220,209
2964.byte 102,15,56,220,217
2965.byte 102,15,56,220,225
2966.byte 102,15,56,220,233
2967 movups 16(%r11),%xmm1
2968 movq %r10,%rax
2969
2970.byte 102,65,15,56,221,210
2971.byte 102,65,15,56,221,219
2972.byte 102,65,15,56,221,228
2973.byte 102,65,15,56,221,237
2974 .byte 0xf3,0xc3
2975.cfi_endproc
2976.size __ocb_encrypt4,.-__ocb_encrypt4
2977
2978.type __ocb_encrypt1,@function
2979.align 32
2980__ocb_encrypt1:
2981.cfi_startproc
2982 pxor %xmm15,%xmm7
2983 pxor %xmm9,%xmm7
2984 pxor %xmm2,%xmm8
2985 pxor %xmm7,%xmm2
2986 movups 32(%r11),%xmm0
2987
2988.byte 102,15,56,220,209
2989 movups 48(%r11),%xmm1
2990 pxor %xmm9,%xmm7
2991
2992.byte 102,15,56,220,208
2993 movups 64(%r11),%xmm0
2994 jmp .Locb_enc_loop1
2995
2996.align 32
2997.Locb_enc_loop1:
2998.byte 102,15,56,220,209
2999 movups (%rcx,%rax,1),%xmm1
3000 addq $32,%rax
3001
3002.byte 102,15,56,220,208
3003 movups -16(%rcx,%rax,1),%xmm0
3004 jnz .Locb_enc_loop1
3005
3006.byte 102,15,56,220,209
3007 movups 16(%r11),%xmm1
3008 movq %r10,%rax
3009
3010.byte 102,15,56,221,215
3011 .byte 0xf3,0xc3
3012.cfi_endproc
3013.size __ocb_encrypt1,.-__ocb_encrypt1
3014
3015.globl aesni_ocb_decrypt
3016.type aesni_ocb_decrypt,@function
3017.align 32
3018aesni_ocb_decrypt:
3019.cfi_startproc
3020.byte 243,15,30,250
3021 leaq (%rsp),%rax
3022 pushq %rbx
3023.cfi_adjust_cfa_offset 8
3024.cfi_offset %rbx,-16
3025 pushq %rbp
3026.cfi_adjust_cfa_offset 8
3027.cfi_offset %rbp,-24
3028 pushq %r12
3029.cfi_adjust_cfa_offset 8
3030.cfi_offset %r12,-32
3031 pushq %r13
3032.cfi_adjust_cfa_offset 8
3033.cfi_offset %r13,-40
3034 pushq %r14
3035.cfi_adjust_cfa_offset 8
3036.cfi_offset %r14,-48
3037 movq 8(%rax),%rbx
3038 movq 8+8(%rax),%rbp
3039
3040 movl 240(%rcx),%r10d
3041 movq %rcx,%r11
3042 shll $4,%r10d
3043 movups (%rcx),%xmm9
3044 movups 16(%rcx,%r10,1),%xmm1
3045
3046 movdqu (%r9),%xmm15
3047 pxor %xmm1,%xmm9
3048 pxor %xmm1,%xmm15
3049
3050 movl $16+32,%eax
3051 leaq 32(%r11,%r10,1),%rcx
3052 movups 16(%r11),%xmm1
3053 subq %r10,%rax
3054 movq %rax,%r10
3055
3056 movdqu (%rbx),%xmm10
3057 movdqu (%rbp),%xmm8
3058
3059 testq $1,%r8
3060 jnz .Locb_dec_odd
3061
3062 bsfq %r8,%r12
3063 addq $1,%r8
3064 shlq $4,%r12
3065 movdqu (%rbx,%r12,1),%xmm7
3066 movdqu (%rdi),%xmm2
3067 leaq 16(%rdi),%rdi
3068
3069 call __ocb_decrypt1
3070
3071 movdqa %xmm7,%xmm15
3072 movups %xmm2,(%rsi)
3073 xorps %xmm2,%xmm8
3074 leaq 16(%rsi),%rsi
3075 subq $1,%rdx
3076 jz .Locb_dec_done
3077
3078.Locb_dec_odd:
3079 leaq 1(%r8),%r12
3080 leaq 3(%r8),%r13
3081 leaq 5(%r8),%r14
3082 leaq 6(%r8),%r8
3083 bsfq %r12,%r12
3084 bsfq %r13,%r13
3085 bsfq %r14,%r14
3086 shlq $4,%r12
3087 shlq $4,%r13
3088 shlq $4,%r14
3089
3090 subq $6,%rdx
3091 jc .Locb_dec_short
3092 jmp .Locb_dec_grandloop
3093
3094.align 32
3095.Locb_dec_grandloop:
3096 movdqu 0(%rdi),%xmm2
3097 movdqu 16(%rdi),%xmm3
3098 movdqu 32(%rdi),%xmm4
3099 movdqu 48(%rdi),%xmm5
3100 movdqu 64(%rdi),%xmm6
3101 movdqu 80(%rdi),%xmm7
3102 leaq 96(%rdi),%rdi
3103
3104 call __ocb_decrypt6
3105
3106 movups %xmm2,0(%rsi)
3107 pxor %xmm2,%xmm8
3108 movups %xmm3,16(%rsi)
3109 pxor %xmm3,%xmm8
3110 movups %xmm4,32(%rsi)
3111 pxor %xmm4,%xmm8
3112 movups %xmm5,48(%rsi)
3113 pxor %xmm5,%xmm8
3114 movups %xmm6,64(%rsi)
3115 pxor %xmm6,%xmm8
3116 movups %xmm7,80(%rsi)
3117 pxor %xmm7,%xmm8
3118 leaq 96(%rsi),%rsi
3119 subq $6,%rdx
3120 jnc .Locb_dec_grandloop
3121
3122.Locb_dec_short:
3123 addq $6,%rdx
3124 jz .Locb_dec_done
3125
3126 movdqu 0(%rdi),%xmm2
3127 cmpq $2,%rdx
3128 jb .Locb_dec_one
3129 movdqu 16(%rdi),%xmm3
3130 je .Locb_dec_two
3131
3132 movdqu 32(%rdi),%xmm4
3133 cmpq $4,%rdx
3134 jb .Locb_dec_three
3135 movdqu 48(%rdi),%xmm5
3136 je .Locb_dec_four
3137
3138 movdqu 64(%rdi),%xmm6
3139 pxor %xmm7,%xmm7
3140
3141 call __ocb_decrypt6
3142
3143 movdqa %xmm14,%xmm15
3144 movups %xmm2,0(%rsi)
3145 pxor %xmm2,%xmm8
3146 movups %xmm3,16(%rsi)
3147 pxor %xmm3,%xmm8
3148 movups %xmm4,32(%rsi)
3149 pxor %xmm4,%xmm8
3150 movups %xmm5,48(%rsi)
3151 pxor %xmm5,%xmm8
3152 movups %xmm6,64(%rsi)
3153 pxor %xmm6,%xmm8
3154
3155 jmp .Locb_dec_done
3156
3157.align 16
3158.Locb_dec_one:
3159 movdqa %xmm10,%xmm7
3160
3161 call __ocb_decrypt1
3162
3163 movdqa %xmm7,%xmm15
3164 movups %xmm2,0(%rsi)
3165 xorps %xmm2,%xmm8
3166 jmp .Locb_dec_done
3167
3168.align 16
3169.Locb_dec_two:
3170 pxor %xmm4,%xmm4
3171 pxor %xmm5,%xmm5
3172
3173 call __ocb_decrypt4
3174
3175 movdqa %xmm11,%xmm15
3176 movups %xmm2,0(%rsi)
3177 xorps %xmm2,%xmm8
3178 movups %xmm3,16(%rsi)
3179 xorps %xmm3,%xmm8
3180
3181 jmp .Locb_dec_done
3182
3183.align 16
3184.Locb_dec_three:
3185 pxor %xmm5,%xmm5
3186
3187 call __ocb_decrypt4
3188
3189 movdqa %xmm12,%xmm15
3190 movups %xmm2,0(%rsi)
3191 xorps %xmm2,%xmm8
3192 movups %xmm3,16(%rsi)
3193 xorps %xmm3,%xmm8
3194 movups %xmm4,32(%rsi)
3195 xorps %xmm4,%xmm8
3196
3197 jmp .Locb_dec_done
3198
3199.align 16
3200.Locb_dec_four:
3201 call __ocb_decrypt4
3202
3203 movdqa %xmm13,%xmm15
3204 movups %xmm2,0(%rsi)
3205 pxor %xmm2,%xmm8
3206 movups %xmm3,16(%rsi)
3207 pxor %xmm3,%xmm8
3208 movups %xmm4,32(%rsi)
3209 pxor %xmm4,%xmm8
3210 movups %xmm5,48(%rsi)
3211 pxor %xmm5,%xmm8
3212
3213.Locb_dec_done:
3214 pxor %xmm0,%xmm15
3215 movdqu %xmm8,(%rbp)
3216 movdqu %xmm15,(%r9)
3217
3218 xorps %xmm0,%xmm0
3219 pxor %xmm1,%xmm1
3220 pxor %xmm2,%xmm2
3221 pxor %xmm3,%xmm3
3222 pxor %xmm4,%xmm4
3223 pxor %xmm5,%xmm5
3224 pxor %xmm6,%xmm6
3225 pxor %xmm7,%xmm7
3226 pxor %xmm8,%xmm8
3227 pxor %xmm9,%xmm9
3228 pxor %xmm10,%xmm10
3229 pxor %xmm11,%xmm11
3230 pxor %xmm12,%xmm12
3231 pxor %xmm13,%xmm13
3232 pxor %xmm14,%xmm14
3233 pxor %xmm15,%xmm15
3234 leaq 40(%rsp),%rax
3235.cfi_def_cfa %rax,8
3236 movq -40(%rax),%r14
3237.cfi_restore %r14
3238 movq -32(%rax),%r13
3239.cfi_restore %r13
3240 movq -24(%rax),%r12
3241.cfi_restore %r12
3242 movq -16(%rax),%rbp
3243.cfi_restore %rbp
3244 movq -8(%rax),%rbx
3245.cfi_restore %rbx
3246 leaq (%rax),%rsp
3247.cfi_def_cfa_register %rsp
3248.Locb_dec_epilogue:
3249 .byte 0xf3,0xc3
3250.cfi_endproc
3251.size aesni_ocb_decrypt,.-aesni_ocb_decrypt
3252
3253.type __ocb_decrypt6,@function
3254.align 32
3255__ocb_decrypt6:
3256.cfi_startproc
3257 pxor %xmm9,%xmm15
3258 movdqu (%rbx,%r12,1),%xmm11
3259 movdqa %xmm10,%xmm12
3260 movdqu (%rbx,%r13,1),%xmm13
3261 movdqa %xmm10,%xmm14
3262 pxor %xmm15,%xmm10
3263 movdqu (%rbx,%r14,1),%xmm15
3264 pxor %xmm10,%xmm11
3265 pxor %xmm10,%xmm2
3266 pxor %xmm11,%xmm12
3267 pxor %xmm11,%xmm3
3268 pxor %xmm12,%xmm13
3269 pxor %xmm12,%xmm4
3270 pxor %xmm13,%xmm14
3271 pxor %xmm13,%xmm5
3272 pxor %xmm14,%xmm15
3273 pxor %xmm14,%xmm6
3274 pxor %xmm15,%xmm7
3275 movups 32(%r11),%xmm0
3276
3277 leaq 1(%r8),%r12
3278 leaq 3(%r8),%r13
3279 leaq 5(%r8),%r14
3280 addq $6,%r8
3281 pxor %xmm9,%xmm10
3282 bsfq %r12,%r12
3283 bsfq %r13,%r13
3284 bsfq %r14,%r14
3285
3286.byte 102,15,56,222,209
3287.byte 102,15,56,222,217
3288.byte 102,15,56,222,225
3289.byte 102,15,56,222,233
3290 pxor %xmm9,%xmm11
3291 pxor %xmm9,%xmm12
3292.byte 102,15,56,222,241
3293 pxor %xmm9,%xmm13
3294 pxor %xmm9,%xmm14
3295.byte 102,15,56,222,249
3296 movups 48(%r11),%xmm1
3297 pxor %xmm9,%xmm15
3298
3299.byte 102,15,56,222,208
3300.byte 102,15,56,222,216
3301.byte 102,15,56,222,224
3302.byte 102,15,56,222,232
3303.byte 102,15,56,222,240
3304.byte 102,15,56,222,248
3305 movups 64(%r11),%xmm0
3306 shlq $4,%r12
3307 shlq $4,%r13
3308 jmp .Locb_dec_loop6
3309
3310.align 32
3311.Locb_dec_loop6:
3312.byte 102,15,56,222,209
3313.byte 102,15,56,222,217
3314.byte 102,15,56,222,225
3315.byte 102,15,56,222,233
3316.byte 102,15,56,222,241
3317.byte 102,15,56,222,249
3318 movups (%rcx,%rax,1),%xmm1
3319 addq $32,%rax
3320
3321.byte 102,15,56,222,208
3322.byte 102,15,56,222,216
3323.byte 102,15,56,222,224
3324.byte 102,15,56,222,232
3325.byte 102,15,56,222,240
3326.byte 102,15,56,222,248
3327 movups -16(%rcx,%rax,1),%xmm0
3328 jnz .Locb_dec_loop6
3329
3330.byte 102,15,56,222,209
3331.byte 102,15,56,222,217
3332.byte 102,15,56,222,225
3333.byte 102,15,56,222,233
3334.byte 102,15,56,222,241
3335.byte 102,15,56,222,249
3336 movups 16(%r11),%xmm1
3337 shlq $4,%r14
3338
3339.byte 102,65,15,56,223,210
3340 movdqu (%rbx),%xmm10
3341 movq %r10,%rax
3342.byte 102,65,15,56,223,219
3343.byte 102,65,15,56,223,228
3344.byte 102,65,15,56,223,237
3345.byte 102,65,15,56,223,246
3346.byte 102,65,15,56,223,255
3347 .byte 0xf3,0xc3
3348.cfi_endproc
3349.size __ocb_decrypt6,.-__ocb_decrypt6
3350
3351.type __ocb_decrypt4,@function
3352.align 32
3353__ocb_decrypt4:
3354.cfi_startproc
3355 pxor %xmm9,%xmm15
3356 movdqu (%rbx,%r12,1),%xmm11
3357 movdqa %xmm10,%xmm12
3358 movdqu (%rbx,%r13,1),%xmm13
3359 pxor %xmm15,%xmm10
3360 pxor %xmm10,%xmm11
3361 pxor %xmm10,%xmm2
3362 pxor %xmm11,%xmm12
3363 pxor %xmm11,%xmm3
3364 pxor %xmm12,%xmm13
3365 pxor %xmm12,%xmm4
3366 pxor %xmm13,%xmm5
3367 movups 32(%r11),%xmm0
3368
3369 pxor %xmm9,%xmm10
3370 pxor %xmm9,%xmm11
3371 pxor %xmm9,%xmm12
3372 pxor %xmm9,%xmm13
3373
3374.byte 102,15,56,222,209
3375.byte 102,15,56,222,217
3376.byte 102,15,56,222,225
3377.byte 102,15,56,222,233
3378 movups 48(%r11),%xmm1
3379
3380.byte 102,15,56,222,208
3381.byte 102,15,56,222,216
3382.byte 102,15,56,222,224
3383.byte 102,15,56,222,232
3384 movups 64(%r11),%xmm0
3385 jmp .Locb_dec_loop4
3386
3387.align 32
3388.Locb_dec_loop4:
3389.byte 102,15,56,222,209
3390.byte 102,15,56,222,217
3391.byte 102,15,56,222,225
3392.byte 102,15,56,222,233
3393 movups (%rcx,%rax,1),%xmm1
3394 addq $32,%rax
3395
3396.byte 102,15,56,222,208
3397.byte 102,15,56,222,216
3398.byte 102,15,56,222,224
3399.byte 102,15,56,222,232
3400 movups -16(%rcx,%rax,1),%xmm0
3401 jnz .Locb_dec_loop4
3402
3403.byte 102,15,56,222,209
3404.byte 102,15,56,222,217
3405.byte 102,15,56,222,225
3406.byte 102,15,56,222,233
3407 movups 16(%r11),%xmm1
3408 movq %r10,%rax
3409
3410.byte 102,65,15,56,223,210
3411.byte 102,65,15,56,223,219
3412.byte 102,65,15,56,223,228
3413.byte 102,65,15,56,223,237
3414 .byte 0xf3,0xc3
3415.cfi_endproc
3416.size __ocb_decrypt4,.-__ocb_decrypt4
3417
3418.type __ocb_decrypt1,@function
3419.align 32
3420__ocb_decrypt1:
3421.cfi_startproc
3422 pxor %xmm15,%xmm7
3423 pxor %xmm9,%xmm7
3424 pxor %xmm7,%xmm2
3425 movups 32(%r11),%xmm0
3426
3427.byte 102,15,56,222,209
3428 movups 48(%r11),%xmm1
3429 pxor %xmm9,%xmm7
3430
3431.byte 102,15,56,222,208
3432 movups 64(%r11),%xmm0
3433 jmp .Locb_dec_loop1
3434
3435.align 32
3436.Locb_dec_loop1:
3437.byte 102,15,56,222,209
3438 movups (%rcx,%rax,1),%xmm1
3439 addq $32,%rax
3440
3441.byte 102,15,56,222,208
3442 movups -16(%rcx,%rax,1),%xmm0
3443 jnz .Locb_dec_loop1
3444
3445.byte 102,15,56,222,209
3446 movups 16(%r11),%xmm1
3447 movq %r10,%rax
3448
3449.byte 102,15,56,223,215
3450 .byte 0xf3,0xc3
3451.cfi_endproc
3452.size __ocb_decrypt1,.-__ocb_decrypt1
3453.globl aesni_cbc_encrypt
3454.type aesni_cbc_encrypt,@function
3455.align 16
3456aesni_cbc_encrypt:
3457.cfi_startproc
3458.byte 243,15,30,250
3459 testq %rdx,%rdx
3460 jz .Lcbc_ret
3461
3462 movl 240(%rcx),%r10d
3463 movq %rcx,%r11
3464 testl %r9d,%r9d
3465 jz .Lcbc_decrypt
3466
3467 movups (%r8),%xmm2
3468 movl %r10d,%eax
3469 cmpq $16,%rdx
3470 jb .Lcbc_enc_tail
3471 subq $16,%rdx
3472 jmp .Lcbc_enc_loop
3473.align 16
3474.Lcbc_enc_loop:
3475 movups (%rdi),%xmm3
3476 leaq 16(%rdi),%rdi
3477
3478 movups (%rcx),%xmm0
3479 movups 16(%rcx),%xmm1
3480 xorps %xmm0,%xmm3
3481 leaq 32(%rcx),%rcx
3482 xorps %xmm3,%xmm2
3483.Loop_enc1_15:
3484.byte 102,15,56,220,209
3485 decl %eax
3486 movups (%rcx),%xmm1
3487 leaq 16(%rcx),%rcx
3488 jnz .Loop_enc1_15
3489.byte 102,15,56,221,209
3490 movl %r10d,%eax
3491 movq %r11,%rcx
3492 movups %xmm2,0(%rsi)
3493 leaq 16(%rsi),%rsi
3494 subq $16,%rdx
3495 jnc .Lcbc_enc_loop
3496 addq $16,%rdx
3497 jnz .Lcbc_enc_tail
3498 pxor %xmm0,%xmm0
3499 pxor %xmm1,%xmm1
3500 movups %xmm2,(%r8)
3501 pxor %xmm2,%xmm2
3502 pxor %xmm3,%xmm3
3503 jmp .Lcbc_ret
3504
3505.Lcbc_enc_tail:
3506 movq %rdx,%rcx
3507 xchgq %rdi,%rsi
3508.long 0x9066A4F3
3509 movl $16,%ecx
3510 subq %rdx,%rcx
3511 xorl %eax,%eax
3512.long 0x9066AAF3
3513 leaq -16(%rdi),%rdi
3514 movl %r10d,%eax
3515 movq %rdi,%rsi
3516 movq %r11,%rcx
3517 xorq %rdx,%rdx
3518 jmp .Lcbc_enc_loop
3519
3520.align 16
3521.Lcbc_decrypt:
3522 cmpq $16,%rdx
3523 jne .Lcbc_decrypt_bulk
3524
3525
3526
3527 movdqu (%rdi),%xmm2
3528 movdqu (%r8),%xmm3
3529 movdqa %xmm2,%xmm4
3530 movups (%rcx),%xmm0
3531 movups 16(%rcx),%xmm1
3532 leaq 32(%rcx),%rcx
3533 xorps %xmm0,%xmm2
3534.Loop_dec1_16:
3535.byte 102,15,56,222,209
3536 decl %r10d
3537 movups (%rcx),%xmm1
3538 leaq 16(%rcx),%rcx
3539 jnz .Loop_dec1_16
3540.byte 102,15,56,223,209
3541 pxor %xmm0,%xmm0
3542 pxor %xmm1,%xmm1
3543 movdqu %xmm4,(%r8)
3544 xorps %xmm3,%xmm2
3545 pxor %xmm3,%xmm3
3546 movups %xmm2,(%rsi)
3547 pxor %xmm2,%xmm2
3548 jmp .Lcbc_ret
3549.align 16
3550.Lcbc_decrypt_bulk:
3551 leaq (%rsp),%r11
3552.cfi_def_cfa_register %r11
3553 pushq %rbp
3554.cfi_offset %rbp,-16
3555 subq $16,%rsp
3556 andq $-16,%rsp
3557 movq %rcx,%rbp
3558 movups (%r8),%xmm10
3559 movl %r10d,%eax
3560 cmpq $0x50,%rdx
3561 jbe .Lcbc_dec_tail
3562
3563 movups (%rcx),%xmm0
3564 movdqu 0(%rdi),%xmm2
3565 movdqu 16(%rdi),%xmm3
3566 movdqa %xmm2,%xmm11
3567 movdqu 32(%rdi),%xmm4
3568 movdqa %xmm3,%xmm12
3569 movdqu 48(%rdi),%xmm5
3570 movdqa %xmm4,%xmm13
3571 movdqu 64(%rdi),%xmm6
3572 movdqa %xmm5,%xmm14
3573 movdqu 80(%rdi),%xmm7
3574 movdqa %xmm6,%xmm15
3575 movl OPENSSL_ia32cap_P+4(%rip),%r9d
3576 cmpq $0x70,%rdx
3577 jbe .Lcbc_dec_six_or_seven
3578
3579 andl $71303168,%r9d
3580 subq $0x50,%rdx
3581 cmpl $4194304,%r9d
3582 je .Lcbc_dec_loop6_enter
3583 subq $0x20,%rdx
3584 leaq 112(%rcx),%rcx
3585 jmp .Lcbc_dec_loop8_enter
3586.align 16
3587.Lcbc_dec_loop8:
3588 movups %xmm9,(%rsi)
3589 leaq 16(%rsi),%rsi
3590.Lcbc_dec_loop8_enter:
3591 movdqu 96(%rdi),%xmm8
3592 pxor %xmm0,%xmm2
3593 movdqu 112(%rdi),%xmm9
3594 pxor %xmm0,%xmm3
3595 movups 16-112(%rcx),%xmm1
3596 pxor %xmm0,%xmm4
3597 movq $-1,%rbp
3598 cmpq $0x70,%rdx
3599 pxor %xmm0,%xmm5
3600 pxor %xmm0,%xmm6
3601 pxor %xmm0,%xmm7
3602 pxor %xmm0,%xmm8
3603
3604.byte 102,15,56,222,209
3605 pxor %xmm0,%xmm9
3606 movups 32-112(%rcx),%xmm0
3607.byte 102,15,56,222,217
3608.byte 102,15,56,222,225
3609.byte 102,15,56,222,233
3610.byte 102,15,56,222,241
3611.byte 102,15,56,222,249
3612.byte 102,68,15,56,222,193
3613 adcq $0,%rbp
3614 andq $128,%rbp
3615.byte 102,68,15,56,222,201
3616 addq %rdi,%rbp
3617 movups 48-112(%rcx),%xmm1
3618.byte 102,15,56,222,208
3619.byte 102,15,56,222,216
3620.byte 102,15,56,222,224
3621.byte 102,15,56,222,232
3622.byte 102,15,56,222,240
3623.byte 102,15,56,222,248
3624.byte 102,68,15,56,222,192
3625.byte 102,68,15,56,222,200
3626 movups 64-112(%rcx),%xmm0
3627 nop
3628.byte 102,15,56,222,209
3629.byte 102,15,56,222,217
3630.byte 102,15,56,222,225
3631.byte 102,15,56,222,233
3632.byte 102,15,56,222,241
3633.byte 102,15,56,222,249
3634.byte 102,68,15,56,222,193
3635.byte 102,68,15,56,222,201
3636 movups 80-112(%rcx),%xmm1
3637 nop
3638.byte 102,15,56,222,208
3639.byte 102,15,56,222,216
3640.byte 102,15,56,222,224
3641.byte 102,15,56,222,232
3642.byte 102,15,56,222,240
3643.byte 102,15,56,222,248
3644.byte 102,68,15,56,222,192
3645.byte 102,68,15,56,222,200
3646 movups 96-112(%rcx),%xmm0
3647 nop
3648.byte 102,15,56,222,209
3649.byte 102,15,56,222,217
3650.byte 102,15,56,222,225
3651.byte 102,15,56,222,233
3652.byte 102,15,56,222,241
3653.byte 102,15,56,222,249
3654.byte 102,68,15,56,222,193
3655.byte 102,68,15,56,222,201
3656 movups 112-112(%rcx),%xmm1
3657 nop
3658.byte 102,15,56,222,208
3659.byte 102,15,56,222,216
3660.byte 102,15,56,222,224
3661.byte 102,15,56,222,232
3662.byte 102,15,56,222,240
3663.byte 102,15,56,222,248
3664.byte 102,68,15,56,222,192
3665.byte 102,68,15,56,222,200
3666 movups 128-112(%rcx),%xmm0
3667 nop
3668.byte 102,15,56,222,209
3669.byte 102,15,56,222,217
3670.byte 102,15,56,222,225
3671.byte 102,15,56,222,233
3672.byte 102,15,56,222,241
3673.byte 102,15,56,222,249
3674.byte 102,68,15,56,222,193
3675.byte 102,68,15,56,222,201
3676 movups 144-112(%rcx),%xmm1
3677 cmpl $11,%eax
3678.byte 102,15,56,222,208
3679.byte 102,15,56,222,216
3680.byte 102,15,56,222,224
3681.byte 102,15,56,222,232
3682.byte 102,15,56,222,240
3683.byte 102,15,56,222,248
3684.byte 102,68,15,56,222,192
3685.byte 102,68,15,56,222,200
3686 movups 160-112(%rcx),%xmm0
3687 jb .Lcbc_dec_done
3688.byte 102,15,56,222,209
3689.byte 102,15,56,222,217
3690.byte 102,15,56,222,225
3691.byte 102,15,56,222,233
3692.byte 102,15,56,222,241
3693.byte 102,15,56,222,249
3694.byte 102,68,15,56,222,193
3695.byte 102,68,15,56,222,201
3696 movups 176-112(%rcx),%xmm1
3697 nop
3698.byte 102,15,56,222,208
3699.byte 102,15,56,222,216
3700.byte 102,15,56,222,224
3701.byte 102,15,56,222,232
3702.byte 102,15,56,222,240
3703.byte 102,15,56,222,248
3704.byte 102,68,15,56,222,192
3705.byte 102,68,15,56,222,200
3706 movups 192-112(%rcx),%xmm0
3707 je .Lcbc_dec_done
3708.byte 102,15,56,222,209
3709.byte 102,15,56,222,217
3710.byte 102,15,56,222,225
3711.byte 102,15,56,222,233
3712.byte 102,15,56,222,241
3713.byte 102,15,56,222,249
3714.byte 102,68,15,56,222,193
3715.byte 102,68,15,56,222,201
3716 movups 208-112(%rcx),%xmm1
3717 nop
3718.byte 102,15,56,222,208
3719.byte 102,15,56,222,216
3720.byte 102,15,56,222,224
3721.byte 102,15,56,222,232
3722.byte 102,15,56,222,240
3723.byte 102,15,56,222,248
3724.byte 102,68,15,56,222,192
3725.byte 102,68,15,56,222,200
3726 movups 224-112(%rcx),%xmm0
3727 jmp .Lcbc_dec_done
3728.align 16
3729.Lcbc_dec_done:
3730.byte 102,15,56,222,209
3731.byte 102,15,56,222,217
3732 pxor %xmm0,%xmm10
3733 pxor %xmm0,%xmm11
3734.byte 102,15,56,222,225
3735.byte 102,15,56,222,233
3736 pxor %xmm0,%xmm12
3737 pxor %xmm0,%xmm13
3738.byte 102,15,56,222,241
3739.byte 102,15,56,222,249
3740 pxor %xmm0,%xmm14
3741 pxor %xmm0,%xmm15
3742.byte 102,68,15,56,222,193
3743.byte 102,68,15,56,222,201
3744 movdqu 80(%rdi),%xmm1
3745
3746.byte 102,65,15,56,223,210
3747 movdqu 96(%rdi),%xmm10
3748 pxor %xmm0,%xmm1
3749.byte 102,65,15,56,223,219
3750 pxor %xmm0,%xmm10
3751 movdqu 112(%rdi),%xmm0
3752.byte 102,65,15,56,223,228
3753 leaq 128(%rdi),%rdi
3754 movdqu 0(%rbp),%xmm11
3755.byte 102,65,15,56,223,237
3756.byte 102,65,15,56,223,246
3757 movdqu 16(%rbp),%xmm12
3758 movdqu 32(%rbp),%xmm13
3759.byte 102,65,15,56,223,255
3760.byte 102,68,15,56,223,193
3761 movdqu 48(%rbp),%xmm14
3762 movdqu 64(%rbp),%xmm15
3763.byte 102,69,15,56,223,202
3764 movdqa %xmm0,%xmm10
3765 movdqu 80(%rbp),%xmm1
3766 movups -112(%rcx),%xmm0
3767
3768 movups %xmm2,(%rsi)
3769 movdqa %xmm11,%xmm2
3770 movups %xmm3,16(%rsi)
3771 movdqa %xmm12,%xmm3
3772 movups %xmm4,32(%rsi)
3773 movdqa %xmm13,%xmm4
3774 movups %xmm5,48(%rsi)
3775 movdqa %xmm14,%xmm5
3776 movups %xmm6,64(%rsi)
3777 movdqa %xmm15,%xmm6
3778 movups %xmm7,80(%rsi)
3779 movdqa %xmm1,%xmm7
3780 movups %xmm8,96(%rsi)
3781 leaq 112(%rsi),%rsi
3782
3783 subq $0x80,%rdx
3784 ja .Lcbc_dec_loop8
3785
3786 movaps %xmm9,%xmm2
3787 leaq -112(%rcx),%rcx
3788 addq $0x70,%rdx
3789 jle .Lcbc_dec_clear_tail_collected
3790 movups %xmm9,(%rsi)
3791 leaq 16(%rsi),%rsi
3792 cmpq $0x50,%rdx
3793 jbe .Lcbc_dec_tail
3794
3795 movaps %xmm11,%xmm2
3796.Lcbc_dec_six_or_seven:
3797 cmpq $0x60,%rdx
3798 ja .Lcbc_dec_seven
3799
3800 movaps %xmm7,%xmm8
3801 call _aesni_decrypt6
3802 pxor %xmm10,%xmm2
3803 movaps %xmm8,%xmm10
3804 pxor %xmm11,%xmm3
3805 movdqu %xmm2,(%rsi)
3806 pxor %xmm12,%xmm4
3807 movdqu %xmm3,16(%rsi)
3808 pxor %xmm3,%xmm3
3809 pxor %xmm13,%xmm5
3810 movdqu %xmm4,32(%rsi)
3811 pxor %xmm4,%xmm4
3812 pxor %xmm14,%xmm6
3813 movdqu %xmm5,48(%rsi)
3814 pxor %xmm5,%xmm5
3815 pxor %xmm15,%xmm7
3816 movdqu %xmm6,64(%rsi)
3817 pxor %xmm6,%xmm6
3818 leaq 80(%rsi),%rsi
3819 movdqa %xmm7,%xmm2
3820 pxor %xmm7,%xmm7
3821 jmp .Lcbc_dec_tail_collected
3822
3823.align 16
3824.Lcbc_dec_seven:
3825 movups 96(%rdi),%xmm8
3826 xorps %xmm9,%xmm9
3827 call _aesni_decrypt8
3828 movups 80(%rdi),%xmm9
3829 pxor %xmm10,%xmm2
3830 movups 96(%rdi),%xmm10
3831 pxor %xmm11,%xmm3
3832 movdqu %xmm2,(%rsi)
3833 pxor %xmm12,%xmm4
3834 movdqu %xmm3,16(%rsi)
3835 pxor %xmm3,%xmm3
3836 pxor %xmm13,%xmm5
3837 movdqu %xmm4,32(%rsi)
3838 pxor %xmm4,%xmm4
3839 pxor %xmm14,%xmm6
3840 movdqu %xmm5,48(%rsi)
3841 pxor %xmm5,%xmm5
3842 pxor %xmm15,%xmm7
3843 movdqu %xmm6,64(%rsi)
3844 pxor %xmm6,%xmm6
3845 pxor %xmm9,%xmm8
3846 movdqu %xmm7,80(%rsi)
3847 pxor %xmm7,%xmm7
3848 leaq 96(%rsi),%rsi
3849 movdqa %xmm8,%xmm2
3850 pxor %xmm8,%xmm8
3851 pxor %xmm9,%xmm9
3852 jmp .Lcbc_dec_tail_collected
3853
3854.align 16
3855.Lcbc_dec_loop6:
3856 movups %xmm7,(%rsi)
3857 leaq 16(%rsi),%rsi
3858 movdqu 0(%rdi),%xmm2
3859 movdqu 16(%rdi),%xmm3
3860 movdqa %xmm2,%xmm11
3861 movdqu 32(%rdi),%xmm4
3862 movdqa %xmm3,%xmm12
3863 movdqu 48(%rdi),%xmm5
3864 movdqa %xmm4,%xmm13
3865 movdqu 64(%rdi),%xmm6
3866 movdqa %xmm5,%xmm14
3867 movdqu 80(%rdi),%xmm7
3868 movdqa %xmm6,%xmm15
3869.Lcbc_dec_loop6_enter:
3870 leaq 96(%rdi),%rdi
3871 movdqa %xmm7,%xmm8
3872
3873 call _aesni_decrypt6
3874
3875 pxor %xmm10,%xmm2
3876 movdqa %xmm8,%xmm10
3877 pxor %xmm11,%xmm3
3878 movdqu %xmm2,(%rsi)
3879 pxor %xmm12,%xmm4
3880 movdqu %xmm3,16(%rsi)
3881 pxor %xmm13,%xmm5
3882 movdqu %xmm4,32(%rsi)
3883 pxor %xmm14,%xmm6
3884 movq %rbp,%rcx
3885 movdqu %xmm5,48(%rsi)
3886 pxor %xmm15,%xmm7
3887 movl %r10d,%eax
3888 movdqu %xmm6,64(%rsi)
3889 leaq 80(%rsi),%rsi
3890 subq $0x60,%rdx
3891 ja .Lcbc_dec_loop6
3892
3893 movdqa %xmm7,%xmm2
3894 addq $0x50,%rdx
3895 jle .Lcbc_dec_clear_tail_collected
3896 movups %xmm7,(%rsi)
3897 leaq 16(%rsi),%rsi
3898
3899.Lcbc_dec_tail:
3900 movups (%rdi),%xmm2
3901 subq $0x10,%rdx
3902 jbe .Lcbc_dec_one
3903
3904 movups 16(%rdi),%xmm3
3905 movaps %xmm2,%xmm11
3906 subq $0x10,%rdx
3907 jbe .Lcbc_dec_two
3908
3909 movups 32(%rdi),%xmm4
3910 movaps %xmm3,%xmm12
3911 subq $0x10,%rdx
3912 jbe .Lcbc_dec_three
3913
3914 movups 48(%rdi),%xmm5
3915 movaps %xmm4,%xmm13
3916 subq $0x10,%rdx
3917 jbe .Lcbc_dec_four
3918
3919 movups 64(%rdi),%xmm6
3920 movaps %xmm5,%xmm14
3921 movaps %xmm6,%xmm15
3922 xorps %xmm7,%xmm7
3923 call _aesni_decrypt6
3924 pxor %xmm10,%xmm2
3925 movaps %xmm15,%xmm10
3926 pxor %xmm11,%xmm3
3927 movdqu %xmm2,(%rsi)
3928 pxor %xmm12,%xmm4
3929 movdqu %xmm3,16(%rsi)
3930 pxor %xmm3,%xmm3
3931 pxor %xmm13,%xmm5
3932 movdqu %xmm4,32(%rsi)
3933 pxor %xmm4,%xmm4
3934 pxor %xmm14,%xmm6
3935 movdqu %xmm5,48(%rsi)
3936 pxor %xmm5,%xmm5
3937 leaq 64(%rsi),%rsi
3938 movdqa %xmm6,%xmm2
3939 pxor %xmm6,%xmm6
3940 pxor %xmm7,%xmm7
3941 subq $0x10,%rdx
3942 jmp .Lcbc_dec_tail_collected
3943
3944.align 16
3945.Lcbc_dec_one:
3946 movaps %xmm2,%xmm11
3947 movups (%rcx),%xmm0
3948 movups 16(%rcx),%xmm1
3949 leaq 32(%rcx),%rcx
3950 xorps %xmm0,%xmm2
3951.Loop_dec1_17:
3952.byte 102,15,56,222,209
3953 decl %eax
3954 movups (%rcx),%xmm1
3955 leaq 16(%rcx),%rcx
3956 jnz .Loop_dec1_17
3957.byte 102,15,56,223,209
3958 xorps %xmm10,%xmm2
3959 movaps %xmm11,%xmm10
3960 jmp .Lcbc_dec_tail_collected
3961.align 16
3962.Lcbc_dec_two:
3963 movaps %xmm3,%xmm12
3964 call _aesni_decrypt2
3965 pxor %xmm10,%xmm2
3966 movaps %xmm12,%xmm10
3967 pxor %xmm11,%xmm3
3968 movdqu %xmm2,(%rsi)
3969 movdqa %xmm3,%xmm2
3970 pxor %xmm3,%xmm3
3971 leaq 16(%rsi),%rsi
3972 jmp .Lcbc_dec_tail_collected
3973.align 16
3974.Lcbc_dec_three:
3975 movaps %xmm4,%xmm13
3976 call _aesni_decrypt3
3977 pxor %xmm10,%xmm2
3978 movaps %xmm13,%xmm10
3979 pxor %xmm11,%xmm3
3980 movdqu %xmm2,(%rsi)
3981 pxor %xmm12,%xmm4
3982 movdqu %xmm3,16(%rsi)
3983 pxor %xmm3,%xmm3
3984 movdqa %xmm4,%xmm2
3985 pxor %xmm4,%xmm4
3986 leaq 32(%rsi),%rsi
3987 jmp .Lcbc_dec_tail_collected
3988.align 16
3989.Lcbc_dec_four:
3990 movaps %xmm5,%xmm14
3991 call _aesni_decrypt4
3992 pxor %xmm10,%xmm2
3993 movaps %xmm14,%xmm10
3994 pxor %xmm11,%xmm3
3995 movdqu %xmm2,(%rsi)
3996 pxor %xmm12,%xmm4
3997 movdqu %xmm3,16(%rsi)
3998 pxor %xmm3,%xmm3
3999 pxor %xmm13,%xmm5
4000 movdqu %xmm4,32(%rsi)
4001 pxor %xmm4,%xmm4
4002 movdqa %xmm5,%xmm2
4003 pxor %xmm5,%xmm5
4004 leaq 48(%rsi),%rsi
4005 jmp .Lcbc_dec_tail_collected
4006
4007.align 16
4008.Lcbc_dec_clear_tail_collected:
4009 pxor %xmm3,%xmm3
4010 pxor %xmm4,%xmm4
4011 pxor %xmm5,%xmm5
4012 pxor %xmm6,%xmm6
4013 pxor %xmm7,%xmm7
4014 pxor %xmm8,%xmm8
4015 pxor %xmm9,%xmm9
4016.Lcbc_dec_tail_collected:
4017 movups %xmm10,(%r8)
4018 andq $15,%rdx
4019 jnz .Lcbc_dec_tail_partial
4020 movups %xmm2,(%rsi)
4021 pxor %xmm2,%xmm2
4022 jmp .Lcbc_dec_ret
4023.align 16
4024.Lcbc_dec_tail_partial:
4025 movaps %xmm2,(%rsp)
4026 pxor %xmm2,%xmm2
4027 movq $16,%rcx
4028 movq %rsi,%rdi
4029 subq %rdx,%rcx
4030 leaq (%rsp),%rsi
4031.long 0x9066A4F3
4032 movdqa %xmm2,(%rsp)
4033
4034.Lcbc_dec_ret:
4035 xorps %xmm0,%xmm0
4036 pxor %xmm1,%xmm1
4037 movq -8(%r11),%rbp
4038.cfi_restore %rbp
4039 leaq (%r11),%rsp
4040.cfi_def_cfa_register %rsp
4041.Lcbc_ret:
4042 .byte 0xf3,0xc3
4043.cfi_endproc
4044.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
4045.globl aesni_set_decrypt_key
4046.type aesni_set_decrypt_key,@function
4047.align 16
4048aesni_set_decrypt_key:
4049.cfi_startproc
4050.byte 0x48,0x83,0xEC,0x08
4051.cfi_adjust_cfa_offset 8
4052 call __aesni_set_encrypt_key
4053 shll $4,%esi
4054 testl %eax,%eax
4055 jnz .Ldec_key_ret
4056 leaq 16(%rdx,%rsi,1),%rdi
4057
4058 movups (%rdx),%xmm0
4059 movups (%rdi),%xmm1
4060 movups %xmm0,(%rdi)
4061 movups %xmm1,(%rdx)
4062 leaq 16(%rdx),%rdx
4063 leaq -16(%rdi),%rdi
4064
4065.Ldec_key_inverse:
4066 movups (%rdx),%xmm0
4067 movups (%rdi),%xmm1
4068.byte 102,15,56,219,192
4069.byte 102,15,56,219,201
4070 leaq 16(%rdx),%rdx
4071 leaq -16(%rdi),%rdi
4072 movups %xmm0,16(%rdi)
4073 movups %xmm1,-16(%rdx)
4074 cmpq %rdx,%rdi
4075 ja .Ldec_key_inverse
4076
4077 movups (%rdx),%xmm0
4078.byte 102,15,56,219,192
4079 pxor %xmm1,%xmm1
4080 movups %xmm0,(%rdi)
4081 pxor %xmm0,%xmm0
4082.Ldec_key_ret:
4083 addq $8,%rsp
4084.cfi_adjust_cfa_offset -8
4085 .byte 0xf3,0xc3
4086.cfi_endproc
4087.LSEH_end_set_decrypt_key:
4088.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
4089.globl aesni_set_encrypt_key
4090.type aesni_set_encrypt_key,@function
4091.align 16
4092aesni_set_encrypt_key:
4093__aesni_set_encrypt_key:
4094.cfi_startproc
4095.byte 0x48,0x83,0xEC,0x08
4096.cfi_adjust_cfa_offset 8
4097 movq $-1,%rax
4098 testq %rdi,%rdi
4099 jz .Lenc_key_ret
4100 testq %rdx,%rdx
4101 jz .Lenc_key_ret
4102
4103 movl $268437504,%r10d
4104 movups (%rdi),%xmm0
4105 xorps %xmm4,%xmm4
4106 andl OPENSSL_ia32cap_P+4(%rip),%r10d
4107 leaq 16(%rdx),%rax
4108 cmpl $256,%esi
4109 je .L14rounds
4110 cmpl $192,%esi
4111 je .L12rounds
4112 cmpl $128,%esi
4113 jne .Lbad_keybits
4114
4115.L10rounds:
4116 movl $9,%esi
4117 cmpl $268435456,%r10d
4118 je .L10rounds_alt
4119
4120 movups %xmm0,(%rdx)
4121.byte 102,15,58,223,200,1
4122 call .Lkey_expansion_128_cold
4123.byte 102,15,58,223,200,2
4124 call .Lkey_expansion_128
4125.byte 102,15,58,223,200,4
4126 call .Lkey_expansion_128
4127.byte 102,15,58,223,200,8
4128 call .Lkey_expansion_128
4129.byte 102,15,58,223,200,16
4130 call .Lkey_expansion_128
4131.byte 102,15,58,223,200,32
4132 call .Lkey_expansion_128
4133.byte 102,15,58,223,200,64
4134 call .Lkey_expansion_128
4135.byte 102,15,58,223,200,128
4136 call .Lkey_expansion_128
4137.byte 102,15,58,223,200,27
4138 call .Lkey_expansion_128
4139.byte 102,15,58,223,200,54
4140 call .Lkey_expansion_128
4141 movups %xmm0,(%rax)
4142 movl %esi,80(%rax)
4143 xorl %eax,%eax
4144 jmp .Lenc_key_ret
4145
4146.align 16
4147.L10rounds_alt:
4148 movdqa .Lkey_rotate(%rip),%xmm5
4149 movl $8,%r10d
4150 movdqa .Lkey_rcon1(%rip),%xmm4
4151 movdqa %xmm0,%xmm2
4152 movdqu %xmm0,(%rdx)
4153 jmp .Loop_key128
4154
4155.align 16
4156.Loop_key128:
4157.byte 102,15,56,0,197
4158.byte 102,15,56,221,196
4159 pslld $1,%xmm4
4160 leaq 16(%rax),%rax
4161
4162 movdqa %xmm2,%xmm3
4163 pslldq $4,%xmm2
4164 pxor %xmm2,%xmm3
4165 pslldq $4,%xmm2
4166 pxor %xmm2,%xmm3
4167 pslldq $4,%xmm2
4168 pxor %xmm3,%xmm2
4169
4170 pxor %xmm2,%xmm0
4171 movdqu %xmm0,-16(%rax)
4172 movdqa %xmm0,%xmm2
4173
4174 decl %r10d
4175 jnz .Loop_key128
4176
4177 movdqa .Lkey_rcon1b(%rip),%xmm4
4178
4179.byte 102,15,56,0,197
4180.byte 102,15,56,221,196
4181 pslld $1,%xmm4
4182
4183 movdqa %xmm2,%xmm3
4184 pslldq $4,%xmm2
4185 pxor %xmm2,%xmm3
4186 pslldq $4,%xmm2
4187 pxor %xmm2,%xmm3
4188 pslldq $4,%xmm2
4189 pxor %xmm3,%xmm2
4190
4191 pxor %xmm2,%xmm0
4192 movdqu %xmm0,(%rax)
4193
4194 movdqa %xmm0,%xmm2
4195.byte 102,15,56,0,197
4196.byte 102,15,56,221,196
4197
4198 movdqa %xmm2,%xmm3
4199 pslldq $4,%xmm2
4200 pxor %xmm2,%xmm3
4201 pslldq $4,%xmm2
4202 pxor %xmm2,%xmm3
4203 pslldq $4,%xmm2
4204 pxor %xmm3,%xmm2
4205
4206 pxor %xmm2,%xmm0
4207 movdqu %xmm0,16(%rax)
4208
4209 movl %esi,96(%rax)
4210 xorl %eax,%eax
4211 jmp .Lenc_key_ret
4212
4213.align 16
4214.L12rounds:
4215 movq 16(%rdi),%xmm2
4216 movl $11,%esi
4217 cmpl $268435456,%r10d
4218 je .L12rounds_alt
4219
4220 movups %xmm0,(%rdx)
4221.byte 102,15,58,223,202,1
4222 call .Lkey_expansion_192a_cold
4223.byte 102,15,58,223,202,2
4224 call .Lkey_expansion_192b
4225.byte 102,15,58,223,202,4
4226 call .Lkey_expansion_192a
4227.byte 102,15,58,223,202,8
4228 call .Lkey_expansion_192b
4229.byte 102,15,58,223,202,16
4230 call .Lkey_expansion_192a
4231.byte 102,15,58,223,202,32
4232 call .Lkey_expansion_192b
4233.byte 102,15,58,223,202,64
4234 call .Lkey_expansion_192a
4235.byte 102,15,58,223,202,128
4236 call .Lkey_expansion_192b
4237 movups %xmm0,(%rax)
4238 movl %esi,48(%rax)
4239 xorq %rax,%rax
4240 jmp .Lenc_key_ret
4241
4242.align 16
4243.L12rounds_alt:
4244 movdqa .Lkey_rotate192(%rip),%xmm5
4245 movdqa .Lkey_rcon1(%rip),%xmm4
4246 movl $8,%r10d
4247 movdqu %xmm0,(%rdx)
4248 jmp .Loop_key192
4249
4250.align 16
4251.Loop_key192:
4252 movq %xmm2,0(%rax)
4253 movdqa %xmm2,%xmm1
4254.byte 102,15,56,0,213
4255.byte 102,15,56,221,212
4256 pslld $1,%xmm4
4257 leaq 24(%rax),%rax
4258
4259 movdqa %xmm0,%xmm3
4260 pslldq $4,%xmm0
4261 pxor %xmm0,%xmm3
4262 pslldq $4,%xmm0
4263 pxor %xmm0,%xmm3
4264 pslldq $4,%xmm0
4265 pxor %xmm3,%xmm0
4266
4267 pshufd $0xff,%xmm0,%xmm3
4268 pxor %xmm1,%xmm3
4269 pslldq $4,%xmm1
4270 pxor %xmm1,%xmm3
4271
4272 pxor %xmm2,%xmm0
4273 pxor %xmm3,%xmm2
4274 movdqu %xmm0,-16(%rax)
4275
4276 decl %r10d
4277 jnz .Loop_key192
4278
4279 movl %esi,32(%rax)
4280 xorl %eax,%eax
4281 jmp .Lenc_key_ret
4282
4283.align 16
4284.L14rounds:
4285 movups 16(%rdi),%xmm2
4286 movl $13,%esi
4287 leaq 16(%rax),%rax
4288 cmpl $268435456,%r10d
4289 je .L14rounds_alt
4290
4291 movups %xmm0,(%rdx)
4292 movups %xmm2,16(%rdx)
4293.byte 102,15,58,223,202,1
4294 call .Lkey_expansion_256a_cold
4295.byte 102,15,58,223,200,1
4296 call .Lkey_expansion_256b
4297.byte 102,15,58,223,202,2
4298 call .Lkey_expansion_256a
4299.byte 102,15,58,223,200,2
4300 call .Lkey_expansion_256b
4301.byte 102,15,58,223,202,4
4302 call .Lkey_expansion_256a
4303.byte 102,15,58,223,200,4
4304 call .Lkey_expansion_256b
4305.byte 102,15,58,223,202,8
4306 call .Lkey_expansion_256a
4307.byte 102,15,58,223,200,8
4308 call .Lkey_expansion_256b
4309.byte 102,15,58,223,202,16
4310 call .Lkey_expansion_256a
4311.byte 102,15,58,223,200,16
4312 call .Lkey_expansion_256b
4313.byte 102,15,58,223,202,32
4314 call .Lkey_expansion_256a
4315.byte 102,15,58,223,200,32
4316 call .Lkey_expansion_256b
4317.byte 102,15,58,223,202,64
4318 call .Lkey_expansion_256a
4319 movups %xmm0,(%rax)
4320 movl %esi,16(%rax)
4321 xorq %rax,%rax
4322 jmp .Lenc_key_ret
4323
4324.align 16
4325.L14rounds_alt:
4326 movdqa .Lkey_rotate(%rip),%xmm5
4327 movdqa .Lkey_rcon1(%rip),%xmm4
4328 movl $7,%r10d
4329 movdqu %xmm0,0(%rdx)
4330 movdqa %xmm2,%xmm1
4331 movdqu %xmm2,16(%rdx)
4332 jmp .Loop_key256
4333
4334.align 16
4335.Loop_key256:
4336.byte 102,15,56,0,213
4337.byte 102,15,56,221,212
4338
4339 movdqa %xmm0,%xmm3
4340 pslldq $4,%xmm0
4341 pxor %xmm0,%xmm3
4342 pslldq $4,%xmm0
4343 pxor %xmm0,%xmm3
4344 pslldq $4,%xmm0
4345 pxor %xmm3,%xmm0
4346 pslld $1,%xmm4
4347
4348 pxor %xmm2,%xmm0
4349 movdqu %xmm0,(%rax)
4350
4351 decl %r10d
4352 jz .Ldone_key256
4353
4354 pshufd $0xff,%xmm0,%xmm2
4355 pxor %xmm3,%xmm3
4356.byte 102,15,56,221,211
4357
4358 movdqa %xmm1,%xmm3
4359 pslldq $4,%xmm1
4360 pxor %xmm1,%xmm3
4361 pslldq $4,%xmm1
4362 pxor %xmm1,%xmm3
4363 pslldq $4,%xmm1
4364 pxor %xmm3,%xmm1
4365
4366 pxor %xmm1,%xmm2
4367 movdqu %xmm2,16(%rax)
4368 leaq 32(%rax),%rax
4369 movdqa %xmm2,%xmm1
4370
4371 jmp .Loop_key256
4372
4373.Ldone_key256:
4374 movl %esi,16(%rax)
4375 xorl %eax,%eax
4376 jmp .Lenc_key_ret
4377
4378.align 16
4379.Lbad_keybits:
4380 movq $-2,%rax
4381.Lenc_key_ret:
4382 pxor %xmm0,%xmm0
4383 pxor %xmm1,%xmm1
4384 pxor %xmm2,%xmm2
4385 pxor %xmm3,%xmm3
4386 pxor %xmm4,%xmm4
4387 pxor %xmm5,%xmm5
4388 addq $8,%rsp
4389.cfi_adjust_cfa_offset -8
4390 .byte 0xf3,0xc3
4391.LSEH_end_set_encrypt_key:
4392
4393.align 16
4394.Lkey_expansion_128:
4395 movups %xmm0,(%rax)
4396 leaq 16(%rax),%rax
4397.Lkey_expansion_128_cold:
4398 shufps $16,%xmm0,%xmm4
4399 xorps %xmm4,%xmm0
4400 shufps $140,%xmm0,%xmm4
4401 xorps %xmm4,%xmm0
4402 shufps $255,%xmm1,%xmm1
4403 xorps %xmm1,%xmm0
4404 .byte 0xf3,0xc3
4405
4406.align 16
4407.Lkey_expansion_192a:
4408 movups %xmm0,(%rax)
4409 leaq 16(%rax),%rax
4410.Lkey_expansion_192a_cold:
4411 movaps %xmm2,%xmm5
4412.Lkey_expansion_192b_warm:
4413 shufps $16,%xmm0,%xmm4
4414 movdqa %xmm2,%xmm3
4415 xorps %xmm4,%xmm0
4416 shufps $140,%xmm0,%xmm4
4417 pslldq $4,%xmm3
4418 xorps %xmm4,%xmm0
4419 pshufd $85,%xmm1,%xmm1
4420 pxor %xmm3,%xmm2
4421 pxor %xmm1,%xmm0
4422 pshufd $255,%xmm0,%xmm3
4423 pxor %xmm3,%xmm2
4424 .byte 0xf3,0xc3
4425
4426.align 16
4427.Lkey_expansion_192b:
4428 movaps %xmm0,%xmm3
4429 shufps $68,%xmm0,%xmm5
4430 movups %xmm5,(%rax)
4431 shufps $78,%xmm2,%xmm3
4432 movups %xmm3,16(%rax)
4433 leaq 32(%rax),%rax
4434 jmp .Lkey_expansion_192b_warm
4435
4436.align 16
4437.Lkey_expansion_256a:
4438 movups %xmm2,(%rax)
4439 leaq 16(%rax),%rax
4440.Lkey_expansion_256a_cold:
4441 shufps $16,%xmm0,%xmm4
4442 xorps %xmm4,%xmm0
4443 shufps $140,%xmm0,%xmm4
4444 xorps %xmm4,%xmm0
4445 shufps $255,%xmm1,%xmm1
4446 xorps %xmm1,%xmm0
4447 .byte 0xf3,0xc3
4448
4449.align 16
4450.Lkey_expansion_256b:
4451 movups %xmm0,(%rax)
4452 leaq 16(%rax),%rax
4453
4454 shufps $16,%xmm2,%xmm4
4455 xorps %xmm4,%xmm2
4456 shufps $140,%xmm2,%xmm4
4457 xorps %xmm4,%xmm2
4458 shufps $170,%xmm1,%xmm1
4459 xorps %xmm1,%xmm2
4460 .byte 0xf3,0xc3
4461.cfi_endproc
4462.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
4463.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4464.align 64
4465.Lbswap_mask:
4466.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4467.Lincrement32:
4468.long 6,6,6,0
4469.Lincrement64:
4470.long 1,0,0,0
4471.Lxts_magic:
4472.long 0x87,0,1,0
4473.Lincrement1:
4474.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4475.Lkey_rotate:
4476.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4477.Lkey_rotate192:
4478.long 0x04070605,0x04070605,0x04070605,0x04070605
4479.Lkey_rcon1:
4480.long 1,1,1,1
4481.Lkey_rcon1b:
4482.long 0x1b,0x1b,0x1b,0x1b
4483
4484.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4485.align 64
4486 .section .note.gnu.property, #alloc
4487 .p2align 3
4488 .long 1f - 0f
4489 .long 4f - 1f
4490 .long 5
44910:
4492 # "GNU" encoded with .byte, since .asciz isn't supported
4493 # on Solaris.
4494 .byte 0x47
4495 .byte 0x4e
4496 .byte 0x55
4497 .byte 0
44981:
4499 .p2align 3
4500 .long 0xc0000002
4501 .long 3f - 2f
45022:
4503 .long 3
45043:
4505 .p2align 3
45064:
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette