VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1f/crypto/genasm-elf/aesni-x86_64.S@ 83531

Last change on this file since 83531 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 83.8 KB
Line 
1.text
2
3.globl aesni_encrypt
4.type aesni_encrypt,@function
5.align 16
6aesni_encrypt:
7.cfi_startproc
8 movups (%rdi),%xmm2
9 movl 240(%rdx),%eax
10 movups (%rdx),%xmm0
11 movups 16(%rdx),%xmm1
12 leaq 32(%rdx),%rdx
13 xorps %xmm0,%xmm2
14.Loop_enc1_1:
15.byte 102,15,56,220,209
16 decl %eax
17 movups (%rdx),%xmm1
18 leaq 16(%rdx),%rdx
19 jnz .Loop_enc1_1
20.byte 102,15,56,221,209
21 pxor %xmm0,%xmm0
22 pxor %xmm1,%xmm1
23 movups %xmm2,(%rsi)
24 pxor %xmm2,%xmm2
25 .byte 0xf3,0xc3
26.cfi_endproc
27.size aesni_encrypt,.-aesni_encrypt
28
29.globl aesni_decrypt
30.type aesni_decrypt,@function
31.align 16
32aesni_decrypt:
33.cfi_startproc
34 movups (%rdi),%xmm2
35 movl 240(%rdx),%eax
36 movups (%rdx),%xmm0
37 movups 16(%rdx),%xmm1
38 leaq 32(%rdx),%rdx
39 xorps %xmm0,%xmm2
40.Loop_dec1_2:
41.byte 102,15,56,222,209
42 decl %eax
43 movups (%rdx),%xmm1
44 leaq 16(%rdx),%rdx
45 jnz .Loop_dec1_2
46.byte 102,15,56,223,209
47 pxor %xmm0,%xmm0
48 pxor %xmm1,%xmm1
49 movups %xmm2,(%rsi)
50 pxor %xmm2,%xmm2
51 .byte 0xf3,0xc3
52.cfi_endproc
53.size aesni_decrypt, .-aesni_decrypt
54.type _aesni_encrypt2,@function
55.align 16
56_aesni_encrypt2:
57.cfi_startproc
58 movups (%rcx),%xmm0
59 shll $4,%eax
60 movups 16(%rcx),%xmm1
61 xorps %xmm0,%xmm2
62 xorps %xmm0,%xmm3
63 movups 32(%rcx),%xmm0
64 leaq 32(%rcx,%rax,1),%rcx
65 negq %rax
66 addq $16,%rax
67
68.Lenc_loop2:
69.byte 102,15,56,220,209
70.byte 102,15,56,220,217
71 movups (%rcx,%rax,1),%xmm1
72 addq $32,%rax
73.byte 102,15,56,220,208
74.byte 102,15,56,220,216
75 movups -16(%rcx,%rax,1),%xmm0
76 jnz .Lenc_loop2
77
78.byte 102,15,56,220,209
79.byte 102,15,56,220,217
80.byte 102,15,56,221,208
81.byte 102,15,56,221,216
82 .byte 0xf3,0xc3
83.cfi_endproc
84.size _aesni_encrypt2,.-_aesni_encrypt2
85.type _aesni_decrypt2,@function
86.align 16
87_aesni_decrypt2:
88.cfi_startproc
89 movups (%rcx),%xmm0
90 shll $4,%eax
91 movups 16(%rcx),%xmm1
92 xorps %xmm0,%xmm2
93 xorps %xmm0,%xmm3
94 movups 32(%rcx),%xmm0
95 leaq 32(%rcx,%rax,1),%rcx
96 negq %rax
97 addq $16,%rax
98
99.Ldec_loop2:
100.byte 102,15,56,222,209
101.byte 102,15,56,222,217
102 movups (%rcx,%rax,1),%xmm1
103 addq $32,%rax
104.byte 102,15,56,222,208
105.byte 102,15,56,222,216
106 movups -16(%rcx,%rax,1),%xmm0
107 jnz .Ldec_loop2
108
109.byte 102,15,56,222,209
110.byte 102,15,56,222,217
111.byte 102,15,56,223,208
112.byte 102,15,56,223,216
113 .byte 0xf3,0xc3
114.cfi_endproc
115.size _aesni_decrypt2,.-_aesni_decrypt2
116.type _aesni_encrypt3,@function
117.align 16
118_aesni_encrypt3:
119.cfi_startproc
120 movups (%rcx),%xmm0
121 shll $4,%eax
122 movups 16(%rcx),%xmm1
123 xorps %xmm0,%xmm2
124 xorps %xmm0,%xmm3
125 xorps %xmm0,%xmm4
126 movups 32(%rcx),%xmm0
127 leaq 32(%rcx,%rax,1),%rcx
128 negq %rax
129 addq $16,%rax
130
131.Lenc_loop3:
132.byte 102,15,56,220,209
133.byte 102,15,56,220,217
134.byte 102,15,56,220,225
135 movups (%rcx,%rax,1),%xmm1
136 addq $32,%rax
137.byte 102,15,56,220,208
138.byte 102,15,56,220,216
139.byte 102,15,56,220,224
140 movups -16(%rcx,%rax,1),%xmm0
141 jnz .Lenc_loop3
142
143.byte 102,15,56,220,209
144.byte 102,15,56,220,217
145.byte 102,15,56,220,225
146.byte 102,15,56,221,208
147.byte 102,15,56,221,216
148.byte 102,15,56,221,224
149 .byte 0xf3,0xc3
150.cfi_endproc
151.size _aesni_encrypt3,.-_aesni_encrypt3
152.type _aesni_decrypt3,@function
153.align 16
154_aesni_decrypt3:
155.cfi_startproc
156 movups (%rcx),%xmm0
157 shll $4,%eax
158 movups 16(%rcx),%xmm1
159 xorps %xmm0,%xmm2
160 xorps %xmm0,%xmm3
161 xorps %xmm0,%xmm4
162 movups 32(%rcx),%xmm0
163 leaq 32(%rcx,%rax,1),%rcx
164 negq %rax
165 addq $16,%rax
166
167.Ldec_loop3:
168.byte 102,15,56,222,209
169.byte 102,15,56,222,217
170.byte 102,15,56,222,225
171 movups (%rcx,%rax,1),%xmm1
172 addq $32,%rax
173.byte 102,15,56,222,208
174.byte 102,15,56,222,216
175.byte 102,15,56,222,224
176 movups -16(%rcx,%rax,1),%xmm0
177 jnz .Ldec_loop3
178
179.byte 102,15,56,222,209
180.byte 102,15,56,222,217
181.byte 102,15,56,222,225
182.byte 102,15,56,223,208
183.byte 102,15,56,223,216
184.byte 102,15,56,223,224
185 .byte 0xf3,0xc3
186.cfi_endproc
187.size _aesni_decrypt3,.-_aesni_decrypt3
188.type _aesni_encrypt4,@function
189.align 16
190_aesni_encrypt4:
191.cfi_startproc
192 movups (%rcx),%xmm0
193 shll $4,%eax
194 movups 16(%rcx),%xmm1
195 xorps %xmm0,%xmm2
196 xorps %xmm0,%xmm3
197 xorps %xmm0,%xmm4
198 xorps %xmm0,%xmm5
199 movups 32(%rcx),%xmm0
200 leaq 32(%rcx,%rax,1),%rcx
201 negq %rax
202.byte 0x0f,0x1f,0x00
203 addq $16,%rax
204
205.Lenc_loop4:
206.byte 102,15,56,220,209
207.byte 102,15,56,220,217
208.byte 102,15,56,220,225
209.byte 102,15,56,220,233
210 movups (%rcx,%rax,1),%xmm1
211 addq $32,%rax
212.byte 102,15,56,220,208
213.byte 102,15,56,220,216
214.byte 102,15,56,220,224
215.byte 102,15,56,220,232
216 movups -16(%rcx,%rax,1),%xmm0
217 jnz .Lenc_loop4
218
219.byte 102,15,56,220,209
220.byte 102,15,56,220,217
221.byte 102,15,56,220,225
222.byte 102,15,56,220,233
223.byte 102,15,56,221,208
224.byte 102,15,56,221,216
225.byte 102,15,56,221,224
226.byte 102,15,56,221,232
227 .byte 0xf3,0xc3
228.cfi_endproc
229.size _aesni_encrypt4,.-_aesni_encrypt4
230.type _aesni_decrypt4,@function
231.align 16
232_aesni_decrypt4:
233.cfi_startproc
234 movups (%rcx),%xmm0
235 shll $4,%eax
236 movups 16(%rcx),%xmm1
237 xorps %xmm0,%xmm2
238 xorps %xmm0,%xmm3
239 xorps %xmm0,%xmm4
240 xorps %xmm0,%xmm5
241 movups 32(%rcx),%xmm0
242 leaq 32(%rcx,%rax,1),%rcx
243 negq %rax
244.byte 0x0f,0x1f,0x00
245 addq $16,%rax
246
247.Ldec_loop4:
248.byte 102,15,56,222,209
249.byte 102,15,56,222,217
250.byte 102,15,56,222,225
251.byte 102,15,56,222,233
252 movups (%rcx,%rax,1),%xmm1
253 addq $32,%rax
254.byte 102,15,56,222,208
255.byte 102,15,56,222,216
256.byte 102,15,56,222,224
257.byte 102,15,56,222,232
258 movups -16(%rcx,%rax,1),%xmm0
259 jnz .Ldec_loop4
260
261.byte 102,15,56,222,209
262.byte 102,15,56,222,217
263.byte 102,15,56,222,225
264.byte 102,15,56,222,233
265.byte 102,15,56,223,208
266.byte 102,15,56,223,216
267.byte 102,15,56,223,224
268.byte 102,15,56,223,232
269 .byte 0xf3,0xc3
270.cfi_endproc
271.size _aesni_decrypt4,.-_aesni_decrypt4
272.type _aesni_encrypt6,@function
273.align 16
274_aesni_encrypt6:
275.cfi_startproc
276 movups (%rcx),%xmm0
277 shll $4,%eax
278 movups 16(%rcx),%xmm1
279 xorps %xmm0,%xmm2
280 pxor %xmm0,%xmm3
281 pxor %xmm0,%xmm4
282.byte 102,15,56,220,209
283 leaq 32(%rcx,%rax,1),%rcx
284 negq %rax
285.byte 102,15,56,220,217
286 pxor %xmm0,%xmm5
287 pxor %xmm0,%xmm6
288.byte 102,15,56,220,225
289 pxor %xmm0,%xmm7
290 movups (%rcx,%rax,1),%xmm0
291 addq $16,%rax
292 jmp .Lenc_loop6_enter
293.align 16
294.Lenc_loop6:
295.byte 102,15,56,220,209
296.byte 102,15,56,220,217
297.byte 102,15,56,220,225
298.Lenc_loop6_enter:
299.byte 102,15,56,220,233
300.byte 102,15,56,220,241
301.byte 102,15,56,220,249
302 movups (%rcx,%rax,1),%xmm1
303 addq $32,%rax
304.byte 102,15,56,220,208
305.byte 102,15,56,220,216
306.byte 102,15,56,220,224
307.byte 102,15,56,220,232
308.byte 102,15,56,220,240
309.byte 102,15,56,220,248
310 movups -16(%rcx,%rax,1),%xmm0
311 jnz .Lenc_loop6
312
313.byte 102,15,56,220,209
314.byte 102,15,56,220,217
315.byte 102,15,56,220,225
316.byte 102,15,56,220,233
317.byte 102,15,56,220,241
318.byte 102,15,56,220,249
319.byte 102,15,56,221,208
320.byte 102,15,56,221,216
321.byte 102,15,56,221,224
322.byte 102,15,56,221,232
323.byte 102,15,56,221,240
324.byte 102,15,56,221,248
325 .byte 0xf3,0xc3
326.cfi_endproc
327.size _aesni_encrypt6,.-_aesni_encrypt6
328.type _aesni_decrypt6,@function
329.align 16
330_aesni_decrypt6:
331.cfi_startproc
332 movups (%rcx),%xmm0
333 shll $4,%eax
334 movups 16(%rcx),%xmm1
335 xorps %xmm0,%xmm2
336 pxor %xmm0,%xmm3
337 pxor %xmm0,%xmm4
338.byte 102,15,56,222,209
339 leaq 32(%rcx,%rax,1),%rcx
340 negq %rax
341.byte 102,15,56,222,217
342 pxor %xmm0,%xmm5
343 pxor %xmm0,%xmm6
344.byte 102,15,56,222,225
345 pxor %xmm0,%xmm7
346 movups (%rcx,%rax,1),%xmm0
347 addq $16,%rax
348 jmp .Ldec_loop6_enter
349.align 16
350.Ldec_loop6:
351.byte 102,15,56,222,209
352.byte 102,15,56,222,217
353.byte 102,15,56,222,225
354.Ldec_loop6_enter:
355.byte 102,15,56,222,233
356.byte 102,15,56,222,241
357.byte 102,15,56,222,249
358 movups (%rcx,%rax,1),%xmm1
359 addq $32,%rax
360.byte 102,15,56,222,208
361.byte 102,15,56,222,216
362.byte 102,15,56,222,224
363.byte 102,15,56,222,232
364.byte 102,15,56,222,240
365.byte 102,15,56,222,248
366 movups -16(%rcx,%rax,1),%xmm0
367 jnz .Ldec_loop6
368
369.byte 102,15,56,222,209
370.byte 102,15,56,222,217
371.byte 102,15,56,222,225
372.byte 102,15,56,222,233
373.byte 102,15,56,222,241
374.byte 102,15,56,222,249
375.byte 102,15,56,223,208
376.byte 102,15,56,223,216
377.byte 102,15,56,223,224
378.byte 102,15,56,223,232
379.byte 102,15,56,223,240
380.byte 102,15,56,223,248
381 .byte 0xf3,0xc3
382.cfi_endproc
383.size _aesni_decrypt6,.-_aesni_decrypt6
384.type _aesni_encrypt8,@function
385.align 16
386_aesni_encrypt8:
387.cfi_startproc
388 movups (%rcx),%xmm0
389 shll $4,%eax
390 movups 16(%rcx),%xmm1
391 xorps %xmm0,%xmm2
392 xorps %xmm0,%xmm3
393 pxor %xmm0,%xmm4
394 pxor %xmm0,%xmm5
395 pxor %xmm0,%xmm6
396 leaq 32(%rcx,%rax,1),%rcx
397 negq %rax
398.byte 102,15,56,220,209
399 pxor %xmm0,%xmm7
400 pxor %xmm0,%xmm8
401.byte 102,15,56,220,217
402 pxor %xmm0,%xmm9
403 movups (%rcx,%rax,1),%xmm0
404 addq $16,%rax
405 jmp .Lenc_loop8_inner
406.align 16
407.Lenc_loop8:
408.byte 102,15,56,220,209
409.byte 102,15,56,220,217
410.Lenc_loop8_inner:
411.byte 102,15,56,220,225
412.byte 102,15,56,220,233
413.byte 102,15,56,220,241
414.byte 102,15,56,220,249
415.byte 102,68,15,56,220,193
416.byte 102,68,15,56,220,201
417.Lenc_loop8_enter:
418 movups (%rcx,%rax,1),%xmm1
419 addq $32,%rax
420.byte 102,15,56,220,208
421.byte 102,15,56,220,216
422.byte 102,15,56,220,224
423.byte 102,15,56,220,232
424.byte 102,15,56,220,240
425.byte 102,15,56,220,248
426.byte 102,68,15,56,220,192
427.byte 102,68,15,56,220,200
428 movups -16(%rcx,%rax,1),%xmm0
429 jnz .Lenc_loop8
430
431.byte 102,15,56,220,209
432.byte 102,15,56,220,217
433.byte 102,15,56,220,225
434.byte 102,15,56,220,233
435.byte 102,15,56,220,241
436.byte 102,15,56,220,249
437.byte 102,68,15,56,220,193
438.byte 102,68,15,56,220,201
439.byte 102,15,56,221,208
440.byte 102,15,56,221,216
441.byte 102,15,56,221,224
442.byte 102,15,56,221,232
443.byte 102,15,56,221,240
444.byte 102,15,56,221,248
445.byte 102,68,15,56,221,192
446.byte 102,68,15,56,221,200
447 .byte 0xf3,0xc3
448.cfi_endproc
449.size _aesni_encrypt8,.-_aesni_encrypt8
450.type _aesni_decrypt8,@function
451.align 16
452_aesni_decrypt8:
453.cfi_startproc
454 movups (%rcx),%xmm0
455 shll $4,%eax
456 movups 16(%rcx),%xmm1
457 xorps %xmm0,%xmm2
458 xorps %xmm0,%xmm3
459 pxor %xmm0,%xmm4
460 pxor %xmm0,%xmm5
461 pxor %xmm0,%xmm6
462 leaq 32(%rcx,%rax,1),%rcx
463 negq %rax
464.byte 102,15,56,222,209
465 pxor %xmm0,%xmm7
466 pxor %xmm0,%xmm8
467.byte 102,15,56,222,217
468 pxor %xmm0,%xmm9
469 movups (%rcx,%rax,1),%xmm0
470 addq $16,%rax
471 jmp .Ldec_loop8_inner
472.align 16
473.Ldec_loop8:
474.byte 102,15,56,222,209
475.byte 102,15,56,222,217
476.Ldec_loop8_inner:
477.byte 102,15,56,222,225
478.byte 102,15,56,222,233
479.byte 102,15,56,222,241
480.byte 102,15,56,222,249
481.byte 102,68,15,56,222,193
482.byte 102,68,15,56,222,201
483.Ldec_loop8_enter:
484 movups (%rcx,%rax,1),%xmm1
485 addq $32,%rax
486.byte 102,15,56,222,208
487.byte 102,15,56,222,216
488.byte 102,15,56,222,224
489.byte 102,15,56,222,232
490.byte 102,15,56,222,240
491.byte 102,15,56,222,248
492.byte 102,68,15,56,222,192
493.byte 102,68,15,56,222,200
494 movups -16(%rcx,%rax,1),%xmm0
495 jnz .Ldec_loop8
496
497.byte 102,15,56,222,209
498.byte 102,15,56,222,217
499.byte 102,15,56,222,225
500.byte 102,15,56,222,233
501.byte 102,15,56,222,241
502.byte 102,15,56,222,249
503.byte 102,68,15,56,222,193
504.byte 102,68,15,56,222,201
505.byte 102,15,56,223,208
506.byte 102,15,56,223,216
507.byte 102,15,56,223,224
508.byte 102,15,56,223,232
509.byte 102,15,56,223,240
510.byte 102,15,56,223,248
511.byte 102,68,15,56,223,192
512.byte 102,68,15,56,223,200
513 .byte 0xf3,0xc3
514.cfi_endproc
515.size _aesni_decrypt8,.-_aesni_decrypt8
516.globl aesni_ecb_encrypt
517.type aesni_ecb_encrypt,@function
518.align 16
519aesni_ecb_encrypt:
520.cfi_startproc
521 andq $-16,%rdx
522 jz .Lecb_ret
523
524 movl 240(%rcx),%eax
525 movups (%rcx),%xmm0
526 movq %rcx,%r11
527 movl %eax,%r10d
528 testl %r8d,%r8d
529 jz .Lecb_decrypt
530
531 cmpq $0x80,%rdx
532 jb .Lecb_enc_tail
533
534 movdqu (%rdi),%xmm2
535 movdqu 16(%rdi),%xmm3
536 movdqu 32(%rdi),%xmm4
537 movdqu 48(%rdi),%xmm5
538 movdqu 64(%rdi),%xmm6
539 movdqu 80(%rdi),%xmm7
540 movdqu 96(%rdi),%xmm8
541 movdqu 112(%rdi),%xmm9
542 leaq 128(%rdi),%rdi
543 subq $0x80,%rdx
544 jmp .Lecb_enc_loop8_enter
545.align 16
546.Lecb_enc_loop8:
547 movups %xmm2,(%rsi)
548 movq %r11,%rcx
549 movdqu (%rdi),%xmm2
550 movl %r10d,%eax
551 movups %xmm3,16(%rsi)
552 movdqu 16(%rdi),%xmm3
553 movups %xmm4,32(%rsi)
554 movdqu 32(%rdi),%xmm4
555 movups %xmm5,48(%rsi)
556 movdqu 48(%rdi),%xmm5
557 movups %xmm6,64(%rsi)
558 movdqu 64(%rdi),%xmm6
559 movups %xmm7,80(%rsi)
560 movdqu 80(%rdi),%xmm7
561 movups %xmm8,96(%rsi)
562 movdqu 96(%rdi),%xmm8
563 movups %xmm9,112(%rsi)
564 leaq 128(%rsi),%rsi
565 movdqu 112(%rdi),%xmm9
566 leaq 128(%rdi),%rdi
567.Lecb_enc_loop8_enter:
568
569 call _aesni_encrypt8
570
571 subq $0x80,%rdx
572 jnc .Lecb_enc_loop8
573
574 movups %xmm2,(%rsi)
575 movq %r11,%rcx
576 movups %xmm3,16(%rsi)
577 movl %r10d,%eax
578 movups %xmm4,32(%rsi)
579 movups %xmm5,48(%rsi)
580 movups %xmm6,64(%rsi)
581 movups %xmm7,80(%rsi)
582 movups %xmm8,96(%rsi)
583 movups %xmm9,112(%rsi)
584 leaq 128(%rsi),%rsi
585 addq $0x80,%rdx
586 jz .Lecb_ret
587
588.Lecb_enc_tail:
589 movups (%rdi),%xmm2
590 cmpq $0x20,%rdx
591 jb .Lecb_enc_one
592 movups 16(%rdi),%xmm3
593 je .Lecb_enc_two
594 movups 32(%rdi),%xmm4
595 cmpq $0x40,%rdx
596 jb .Lecb_enc_three
597 movups 48(%rdi),%xmm5
598 je .Lecb_enc_four
599 movups 64(%rdi),%xmm6
600 cmpq $0x60,%rdx
601 jb .Lecb_enc_five
602 movups 80(%rdi),%xmm7
603 je .Lecb_enc_six
604 movdqu 96(%rdi),%xmm8
605 xorps %xmm9,%xmm9
606 call _aesni_encrypt8
607 movups %xmm2,(%rsi)
608 movups %xmm3,16(%rsi)
609 movups %xmm4,32(%rsi)
610 movups %xmm5,48(%rsi)
611 movups %xmm6,64(%rsi)
612 movups %xmm7,80(%rsi)
613 movups %xmm8,96(%rsi)
614 jmp .Lecb_ret
615.align 16
616.Lecb_enc_one:
617 movups (%rcx),%xmm0
618 movups 16(%rcx),%xmm1
619 leaq 32(%rcx),%rcx
620 xorps %xmm0,%xmm2
621.Loop_enc1_3:
622.byte 102,15,56,220,209
623 decl %eax
624 movups (%rcx),%xmm1
625 leaq 16(%rcx),%rcx
626 jnz .Loop_enc1_3
627.byte 102,15,56,221,209
628 movups %xmm2,(%rsi)
629 jmp .Lecb_ret
630.align 16
631.Lecb_enc_two:
632 call _aesni_encrypt2
633 movups %xmm2,(%rsi)
634 movups %xmm3,16(%rsi)
635 jmp .Lecb_ret
636.align 16
637.Lecb_enc_three:
638 call _aesni_encrypt3
639 movups %xmm2,(%rsi)
640 movups %xmm3,16(%rsi)
641 movups %xmm4,32(%rsi)
642 jmp .Lecb_ret
643.align 16
644.Lecb_enc_four:
645 call _aesni_encrypt4
646 movups %xmm2,(%rsi)
647 movups %xmm3,16(%rsi)
648 movups %xmm4,32(%rsi)
649 movups %xmm5,48(%rsi)
650 jmp .Lecb_ret
651.align 16
652.Lecb_enc_five:
653 xorps %xmm7,%xmm7
654 call _aesni_encrypt6
655 movups %xmm2,(%rsi)
656 movups %xmm3,16(%rsi)
657 movups %xmm4,32(%rsi)
658 movups %xmm5,48(%rsi)
659 movups %xmm6,64(%rsi)
660 jmp .Lecb_ret
661.align 16
662.Lecb_enc_six:
663 call _aesni_encrypt6
664 movups %xmm2,(%rsi)
665 movups %xmm3,16(%rsi)
666 movups %xmm4,32(%rsi)
667 movups %xmm5,48(%rsi)
668 movups %xmm6,64(%rsi)
669 movups %xmm7,80(%rsi)
670 jmp .Lecb_ret
671
672.align 16
673.Lecb_decrypt:
674 cmpq $0x80,%rdx
675 jb .Lecb_dec_tail
676
677 movdqu (%rdi),%xmm2
678 movdqu 16(%rdi),%xmm3
679 movdqu 32(%rdi),%xmm4
680 movdqu 48(%rdi),%xmm5
681 movdqu 64(%rdi),%xmm6
682 movdqu 80(%rdi),%xmm7
683 movdqu 96(%rdi),%xmm8
684 movdqu 112(%rdi),%xmm9
685 leaq 128(%rdi),%rdi
686 subq $0x80,%rdx
687 jmp .Lecb_dec_loop8_enter
688.align 16
689.Lecb_dec_loop8:
690 movups %xmm2,(%rsi)
691 movq %r11,%rcx
692 movdqu (%rdi),%xmm2
693 movl %r10d,%eax
694 movups %xmm3,16(%rsi)
695 movdqu 16(%rdi),%xmm3
696 movups %xmm4,32(%rsi)
697 movdqu 32(%rdi),%xmm4
698 movups %xmm5,48(%rsi)
699 movdqu 48(%rdi),%xmm5
700 movups %xmm6,64(%rsi)
701 movdqu 64(%rdi),%xmm6
702 movups %xmm7,80(%rsi)
703 movdqu 80(%rdi),%xmm7
704 movups %xmm8,96(%rsi)
705 movdqu 96(%rdi),%xmm8
706 movups %xmm9,112(%rsi)
707 leaq 128(%rsi),%rsi
708 movdqu 112(%rdi),%xmm9
709 leaq 128(%rdi),%rdi
710.Lecb_dec_loop8_enter:
711
712 call _aesni_decrypt8
713
714 movups (%r11),%xmm0
715 subq $0x80,%rdx
716 jnc .Lecb_dec_loop8
717
718 movups %xmm2,(%rsi)
719 pxor %xmm2,%xmm2
720 movq %r11,%rcx
721 movups %xmm3,16(%rsi)
722 pxor %xmm3,%xmm3
723 movl %r10d,%eax
724 movups %xmm4,32(%rsi)
725 pxor %xmm4,%xmm4
726 movups %xmm5,48(%rsi)
727 pxor %xmm5,%xmm5
728 movups %xmm6,64(%rsi)
729 pxor %xmm6,%xmm6
730 movups %xmm7,80(%rsi)
731 pxor %xmm7,%xmm7
732 movups %xmm8,96(%rsi)
733 pxor %xmm8,%xmm8
734 movups %xmm9,112(%rsi)
735 pxor %xmm9,%xmm9
736 leaq 128(%rsi),%rsi
737 addq $0x80,%rdx
738 jz .Lecb_ret
739
740.Lecb_dec_tail:
741 movups (%rdi),%xmm2
742 cmpq $0x20,%rdx
743 jb .Lecb_dec_one
744 movups 16(%rdi),%xmm3
745 je .Lecb_dec_two
746 movups 32(%rdi),%xmm4
747 cmpq $0x40,%rdx
748 jb .Lecb_dec_three
749 movups 48(%rdi),%xmm5
750 je .Lecb_dec_four
751 movups 64(%rdi),%xmm6
752 cmpq $0x60,%rdx
753 jb .Lecb_dec_five
754 movups 80(%rdi),%xmm7
755 je .Lecb_dec_six
756 movups 96(%rdi),%xmm8
757 movups (%rcx),%xmm0
758 xorps %xmm9,%xmm9
759 call _aesni_decrypt8
760 movups %xmm2,(%rsi)
761 pxor %xmm2,%xmm2
762 movups %xmm3,16(%rsi)
763 pxor %xmm3,%xmm3
764 movups %xmm4,32(%rsi)
765 pxor %xmm4,%xmm4
766 movups %xmm5,48(%rsi)
767 pxor %xmm5,%xmm5
768 movups %xmm6,64(%rsi)
769 pxor %xmm6,%xmm6
770 movups %xmm7,80(%rsi)
771 pxor %xmm7,%xmm7
772 movups %xmm8,96(%rsi)
773 pxor %xmm8,%xmm8
774 pxor %xmm9,%xmm9
775 jmp .Lecb_ret
776.align 16
777.Lecb_dec_one:
778 movups (%rcx),%xmm0
779 movups 16(%rcx),%xmm1
780 leaq 32(%rcx),%rcx
781 xorps %xmm0,%xmm2
782.Loop_dec1_4:
783.byte 102,15,56,222,209
784 decl %eax
785 movups (%rcx),%xmm1
786 leaq 16(%rcx),%rcx
787 jnz .Loop_dec1_4
788.byte 102,15,56,223,209
789 movups %xmm2,(%rsi)
790 pxor %xmm2,%xmm2
791 jmp .Lecb_ret
792.align 16
793.Lecb_dec_two:
794 call _aesni_decrypt2
795 movups %xmm2,(%rsi)
796 pxor %xmm2,%xmm2
797 movups %xmm3,16(%rsi)
798 pxor %xmm3,%xmm3
799 jmp .Lecb_ret
800.align 16
801.Lecb_dec_three:
802 call _aesni_decrypt3
803 movups %xmm2,(%rsi)
804 pxor %xmm2,%xmm2
805 movups %xmm3,16(%rsi)
806 pxor %xmm3,%xmm3
807 movups %xmm4,32(%rsi)
808 pxor %xmm4,%xmm4
809 jmp .Lecb_ret
810.align 16
811.Lecb_dec_four:
812 call _aesni_decrypt4
813 movups %xmm2,(%rsi)
814 pxor %xmm2,%xmm2
815 movups %xmm3,16(%rsi)
816 pxor %xmm3,%xmm3
817 movups %xmm4,32(%rsi)
818 pxor %xmm4,%xmm4
819 movups %xmm5,48(%rsi)
820 pxor %xmm5,%xmm5
821 jmp .Lecb_ret
822.align 16
823.Lecb_dec_five:
824 xorps %xmm7,%xmm7
825 call _aesni_decrypt6
826 movups %xmm2,(%rsi)
827 pxor %xmm2,%xmm2
828 movups %xmm3,16(%rsi)
829 pxor %xmm3,%xmm3
830 movups %xmm4,32(%rsi)
831 pxor %xmm4,%xmm4
832 movups %xmm5,48(%rsi)
833 pxor %xmm5,%xmm5
834 movups %xmm6,64(%rsi)
835 pxor %xmm6,%xmm6
836 pxor %xmm7,%xmm7
837 jmp .Lecb_ret
838.align 16
839.Lecb_dec_six:
840 call _aesni_decrypt6
841 movups %xmm2,(%rsi)
842 pxor %xmm2,%xmm2
843 movups %xmm3,16(%rsi)
844 pxor %xmm3,%xmm3
845 movups %xmm4,32(%rsi)
846 pxor %xmm4,%xmm4
847 movups %xmm5,48(%rsi)
848 pxor %xmm5,%xmm5
849 movups %xmm6,64(%rsi)
850 pxor %xmm6,%xmm6
851 movups %xmm7,80(%rsi)
852 pxor %xmm7,%xmm7
853
854.Lecb_ret:
855 xorps %xmm0,%xmm0
856 pxor %xmm1,%xmm1
857 .byte 0xf3,0xc3
858.cfi_endproc
859.size aesni_ecb_encrypt,.-aesni_ecb_encrypt
860.globl aesni_ccm64_encrypt_blocks
861.type aesni_ccm64_encrypt_blocks,@function
862.align 16
863aesni_ccm64_encrypt_blocks:
864 movl 240(%rcx),%eax
865 movdqu (%r8),%xmm6
866 movdqa .Lincrement64(%rip),%xmm9
867 movdqa .Lbswap_mask(%rip),%xmm7
868
869 shll $4,%eax
870 movl $16,%r10d
871 leaq 0(%rcx),%r11
872 movdqu (%r9),%xmm3
873 movdqa %xmm6,%xmm2
874 leaq 32(%rcx,%rax,1),%rcx
875.byte 102,15,56,0,247
876 subq %rax,%r10
877 jmp .Lccm64_enc_outer
878.align 16
879.Lccm64_enc_outer:
880 movups (%r11),%xmm0
881 movq %r10,%rax
882 movups (%rdi),%xmm8
883
884 xorps %xmm0,%xmm2
885 movups 16(%r11),%xmm1
886 xorps %xmm8,%xmm0
887 xorps %xmm0,%xmm3
888 movups 32(%r11),%xmm0
889
890.Lccm64_enc2_loop:
891.byte 102,15,56,220,209
892.byte 102,15,56,220,217
893 movups (%rcx,%rax,1),%xmm1
894 addq $32,%rax
895.byte 102,15,56,220,208
896.byte 102,15,56,220,216
897 movups -16(%rcx,%rax,1),%xmm0
898 jnz .Lccm64_enc2_loop
899.byte 102,15,56,220,209
900.byte 102,15,56,220,217
901 paddq %xmm9,%xmm6
902 decq %rdx
903.byte 102,15,56,221,208
904.byte 102,15,56,221,216
905
906 leaq 16(%rdi),%rdi
907 xorps %xmm2,%xmm8
908 movdqa %xmm6,%xmm2
909 movups %xmm8,(%rsi)
910.byte 102,15,56,0,215
911 leaq 16(%rsi),%rsi
912 jnz .Lccm64_enc_outer
913
914 pxor %xmm0,%xmm0
915 pxor %xmm1,%xmm1
916 pxor %xmm2,%xmm2
917 movups %xmm3,(%r9)
918 pxor %xmm3,%xmm3
919 pxor %xmm8,%xmm8
920 pxor %xmm6,%xmm6
921 .byte 0xf3,0xc3
922.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
923.globl aesni_ccm64_decrypt_blocks
924.type aesni_ccm64_decrypt_blocks,@function
925.align 16
926aesni_ccm64_decrypt_blocks:
927 movl 240(%rcx),%eax
928 movups (%r8),%xmm6
929 movdqu (%r9),%xmm3
930 movdqa .Lincrement64(%rip),%xmm9
931 movdqa .Lbswap_mask(%rip),%xmm7
932
933 movaps %xmm6,%xmm2
934 movl %eax,%r10d
935 movq %rcx,%r11
936.byte 102,15,56,0,247
937 movups (%rcx),%xmm0
938 movups 16(%rcx),%xmm1
939 leaq 32(%rcx),%rcx
940 xorps %xmm0,%xmm2
941.Loop_enc1_5:
942.byte 102,15,56,220,209
943 decl %eax
944 movups (%rcx),%xmm1
945 leaq 16(%rcx),%rcx
946 jnz .Loop_enc1_5
947.byte 102,15,56,221,209
948 shll $4,%r10d
949 movl $16,%eax
950 movups (%rdi),%xmm8
951 paddq %xmm9,%xmm6
952 leaq 16(%rdi),%rdi
953 subq %r10,%rax
954 leaq 32(%r11,%r10,1),%rcx
955 movq %rax,%r10
956 jmp .Lccm64_dec_outer
957.align 16
958.Lccm64_dec_outer:
959 xorps %xmm2,%xmm8
960 movdqa %xmm6,%xmm2
961 movups %xmm8,(%rsi)
962 leaq 16(%rsi),%rsi
963.byte 102,15,56,0,215
964
965 subq $1,%rdx
966 jz .Lccm64_dec_break
967
968 movups (%r11),%xmm0
969 movq %r10,%rax
970 movups 16(%r11),%xmm1
971 xorps %xmm0,%xmm8
972 xorps %xmm0,%xmm2
973 xorps %xmm8,%xmm3
974 movups 32(%r11),%xmm0
975 jmp .Lccm64_dec2_loop
976.align 16
977.Lccm64_dec2_loop:
978.byte 102,15,56,220,209
979.byte 102,15,56,220,217
980 movups (%rcx,%rax,1),%xmm1
981 addq $32,%rax
982.byte 102,15,56,220,208
983.byte 102,15,56,220,216
984 movups -16(%rcx,%rax,1),%xmm0
985 jnz .Lccm64_dec2_loop
986 movups (%rdi),%xmm8
987 paddq %xmm9,%xmm6
988.byte 102,15,56,220,209
989.byte 102,15,56,220,217
990.byte 102,15,56,221,208
991.byte 102,15,56,221,216
992 leaq 16(%rdi),%rdi
993 jmp .Lccm64_dec_outer
994
995.align 16
996.Lccm64_dec_break:
997
998 movl 240(%r11),%eax
999 movups (%r11),%xmm0
1000 movups 16(%r11),%xmm1
1001 xorps %xmm0,%xmm8
1002 leaq 32(%r11),%r11
1003 xorps %xmm8,%xmm3
1004.Loop_enc1_6:
1005.byte 102,15,56,220,217
1006 decl %eax
1007 movups (%r11),%xmm1
1008 leaq 16(%r11),%r11
1009 jnz .Loop_enc1_6
1010.byte 102,15,56,221,217
1011 pxor %xmm0,%xmm0
1012 pxor %xmm1,%xmm1
1013 pxor %xmm2,%xmm2
1014 movups %xmm3,(%r9)
1015 pxor %xmm3,%xmm3
1016 pxor %xmm8,%xmm8
1017 pxor %xmm6,%xmm6
1018 .byte 0xf3,0xc3
1019.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1020.globl aesni_ctr32_encrypt_blocks
1021.type aesni_ctr32_encrypt_blocks,@function
1022.align 16
1023aesni_ctr32_encrypt_blocks:
1024.cfi_startproc
1025 cmpq $1,%rdx
1026 jne .Lctr32_bulk
1027
1028
1029
1030 movups (%r8),%xmm2
1031 movups (%rdi),%xmm3
1032 movl 240(%rcx),%edx
1033 movups (%rcx),%xmm0
1034 movups 16(%rcx),%xmm1
1035 leaq 32(%rcx),%rcx
1036 xorps %xmm0,%xmm2
1037.Loop_enc1_7:
1038.byte 102,15,56,220,209
1039 decl %edx
1040 movups (%rcx),%xmm1
1041 leaq 16(%rcx),%rcx
1042 jnz .Loop_enc1_7
1043.byte 102,15,56,221,209
1044 pxor %xmm0,%xmm0
1045 pxor %xmm1,%xmm1
1046 xorps %xmm3,%xmm2
1047 pxor %xmm3,%xmm3
1048 movups %xmm2,(%rsi)
1049 xorps %xmm2,%xmm2
1050 jmp .Lctr32_epilogue
1051
1052.align 16
1053.Lctr32_bulk:
1054 leaq (%rsp),%r11
1055.cfi_def_cfa_register %r11
1056 pushq %rbp
1057.cfi_offset %rbp,-16
1058 subq $128,%rsp
1059 andq $-16,%rsp
1060
1061
1062
1063
1064 movdqu (%r8),%xmm2
1065 movdqu (%rcx),%xmm0
1066 movl 12(%r8),%r8d
1067 pxor %xmm0,%xmm2
1068 movl 12(%rcx),%ebp
1069 movdqa %xmm2,0(%rsp)
1070 bswapl %r8d
1071 movdqa %xmm2,%xmm3
1072 movdqa %xmm2,%xmm4
1073 movdqa %xmm2,%xmm5
1074 movdqa %xmm2,64(%rsp)
1075 movdqa %xmm2,80(%rsp)
1076 movdqa %xmm2,96(%rsp)
1077 movq %rdx,%r10
1078 movdqa %xmm2,112(%rsp)
1079
1080 leaq 1(%r8),%rax
1081 leaq 2(%r8),%rdx
1082 bswapl %eax
1083 bswapl %edx
1084 xorl %ebp,%eax
1085 xorl %ebp,%edx
1086.byte 102,15,58,34,216,3
1087 leaq 3(%r8),%rax
1088 movdqa %xmm3,16(%rsp)
1089.byte 102,15,58,34,226,3
1090 bswapl %eax
1091 movq %r10,%rdx
1092 leaq 4(%r8),%r10
1093 movdqa %xmm4,32(%rsp)
1094 xorl %ebp,%eax
1095 bswapl %r10d
1096.byte 102,15,58,34,232,3
1097 xorl %ebp,%r10d
1098 movdqa %xmm5,48(%rsp)
1099 leaq 5(%r8),%r9
1100 movl %r10d,64+12(%rsp)
1101 bswapl %r9d
1102 leaq 6(%r8),%r10
1103 movl 240(%rcx),%eax
1104 xorl %ebp,%r9d
1105 bswapl %r10d
1106 movl %r9d,80+12(%rsp)
1107 xorl %ebp,%r10d
1108 leaq 7(%r8),%r9
1109 movl %r10d,96+12(%rsp)
1110 bswapl %r9d
1111 movl OPENSSL_ia32cap_P+4(%rip),%r10d
1112 xorl %ebp,%r9d
1113 andl $71303168,%r10d
1114 movl %r9d,112+12(%rsp)
1115
1116 movups 16(%rcx),%xmm1
1117
1118 movdqa 64(%rsp),%xmm6
1119 movdqa 80(%rsp),%xmm7
1120
1121 cmpq $8,%rdx
1122 jb .Lctr32_tail
1123
1124 subq $6,%rdx
1125 cmpl $4194304,%r10d
1126 je .Lctr32_6x
1127
1128 leaq 128(%rcx),%rcx
1129 subq $2,%rdx
1130 jmp .Lctr32_loop8
1131
1132.align 16
1133.Lctr32_6x:
1134 shll $4,%eax
1135 movl $48,%r10d
1136 bswapl %ebp
1137 leaq 32(%rcx,%rax,1),%rcx
1138 subq %rax,%r10
1139 jmp .Lctr32_loop6
1140
1141.align 16
1142.Lctr32_loop6:
1143 addl $6,%r8d
1144 movups -48(%rcx,%r10,1),%xmm0
1145.byte 102,15,56,220,209
1146 movl %r8d,%eax
1147 xorl %ebp,%eax
1148.byte 102,15,56,220,217
1149.byte 0x0f,0x38,0xf1,0x44,0x24,12
1150 leal 1(%r8),%eax
1151.byte 102,15,56,220,225
1152 xorl %ebp,%eax
1153.byte 0x0f,0x38,0xf1,0x44,0x24,28
1154.byte 102,15,56,220,233
1155 leal 2(%r8),%eax
1156 xorl %ebp,%eax
1157.byte 102,15,56,220,241
1158.byte 0x0f,0x38,0xf1,0x44,0x24,44
1159 leal 3(%r8),%eax
1160.byte 102,15,56,220,249
1161 movups -32(%rcx,%r10,1),%xmm1
1162 xorl %ebp,%eax
1163
1164.byte 102,15,56,220,208
1165.byte 0x0f,0x38,0xf1,0x44,0x24,60
1166 leal 4(%r8),%eax
1167.byte 102,15,56,220,216
1168 xorl %ebp,%eax
1169.byte 0x0f,0x38,0xf1,0x44,0x24,76
1170.byte 102,15,56,220,224
1171 leal 5(%r8),%eax
1172 xorl %ebp,%eax
1173.byte 102,15,56,220,232
1174.byte 0x0f,0x38,0xf1,0x44,0x24,92
1175 movq %r10,%rax
1176.byte 102,15,56,220,240
1177.byte 102,15,56,220,248
1178 movups -16(%rcx,%r10,1),%xmm0
1179
1180 call .Lenc_loop6
1181
1182 movdqu (%rdi),%xmm8
1183 movdqu 16(%rdi),%xmm9
1184 movdqu 32(%rdi),%xmm10
1185 movdqu 48(%rdi),%xmm11
1186 movdqu 64(%rdi),%xmm12
1187 movdqu 80(%rdi),%xmm13
1188 leaq 96(%rdi),%rdi
1189 movups -64(%rcx,%r10,1),%xmm1
1190 pxor %xmm2,%xmm8
1191 movaps 0(%rsp),%xmm2
1192 pxor %xmm3,%xmm9
1193 movaps 16(%rsp),%xmm3
1194 pxor %xmm4,%xmm10
1195 movaps 32(%rsp),%xmm4
1196 pxor %xmm5,%xmm11
1197 movaps 48(%rsp),%xmm5
1198 pxor %xmm6,%xmm12
1199 movaps 64(%rsp),%xmm6
1200 pxor %xmm7,%xmm13
1201 movaps 80(%rsp),%xmm7
1202 movdqu %xmm8,(%rsi)
1203 movdqu %xmm9,16(%rsi)
1204 movdqu %xmm10,32(%rsi)
1205 movdqu %xmm11,48(%rsi)
1206 movdqu %xmm12,64(%rsi)
1207 movdqu %xmm13,80(%rsi)
1208 leaq 96(%rsi),%rsi
1209
1210 subq $6,%rdx
1211 jnc .Lctr32_loop6
1212
1213 addq $6,%rdx
1214 jz .Lctr32_done
1215
1216 leal -48(%r10),%eax
1217 leaq -80(%rcx,%r10,1),%rcx
1218 negl %eax
1219 shrl $4,%eax
1220 jmp .Lctr32_tail
1221
1222.align 32
1223.Lctr32_loop8:
1224 addl $8,%r8d
1225 movdqa 96(%rsp),%xmm8
1226.byte 102,15,56,220,209
1227 movl %r8d,%r9d
1228 movdqa 112(%rsp),%xmm9
1229.byte 102,15,56,220,217
1230 bswapl %r9d
1231 movups 32-128(%rcx),%xmm0
1232.byte 102,15,56,220,225
1233 xorl %ebp,%r9d
1234 nop
1235.byte 102,15,56,220,233
1236 movl %r9d,0+12(%rsp)
1237 leaq 1(%r8),%r9
1238.byte 102,15,56,220,241
1239.byte 102,15,56,220,249
1240.byte 102,68,15,56,220,193
1241.byte 102,68,15,56,220,201
1242 movups 48-128(%rcx),%xmm1
1243 bswapl %r9d
1244.byte 102,15,56,220,208
1245.byte 102,15,56,220,216
1246 xorl %ebp,%r9d
1247.byte 0x66,0x90
1248.byte 102,15,56,220,224
1249.byte 102,15,56,220,232
1250 movl %r9d,16+12(%rsp)
1251 leaq 2(%r8),%r9
1252.byte 102,15,56,220,240
1253.byte 102,15,56,220,248
1254.byte 102,68,15,56,220,192
1255.byte 102,68,15,56,220,200
1256 movups 64-128(%rcx),%xmm0
1257 bswapl %r9d
1258.byte 102,15,56,220,209
1259.byte 102,15,56,220,217
1260 xorl %ebp,%r9d
1261.byte 0x66,0x90
1262.byte 102,15,56,220,225
1263.byte 102,15,56,220,233
1264 movl %r9d,32+12(%rsp)
1265 leaq 3(%r8),%r9
1266.byte 102,15,56,220,241
1267.byte 102,15,56,220,249
1268.byte 102,68,15,56,220,193
1269.byte 102,68,15,56,220,201
1270 movups 80-128(%rcx),%xmm1
1271 bswapl %r9d
1272.byte 102,15,56,220,208
1273.byte 102,15,56,220,216
1274 xorl %ebp,%r9d
1275.byte 0x66,0x90
1276.byte 102,15,56,220,224
1277.byte 102,15,56,220,232
1278 movl %r9d,48+12(%rsp)
1279 leaq 4(%r8),%r9
1280.byte 102,15,56,220,240
1281.byte 102,15,56,220,248
1282.byte 102,68,15,56,220,192
1283.byte 102,68,15,56,220,200
1284 movups 96-128(%rcx),%xmm0
1285 bswapl %r9d
1286.byte 102,15,56,220,209
1287.byte 102,15,56,220,217
1288 xorl %ebp,%r9d
1289.byte 0x66,0x90
1290.byte 102,15,56,220,225
1291.byte 102,15,56,220,233
1292 movl %r9d,64+12(%rsp)
1293 leaq 5(%r8),%r9
1294.byte 102,15,56,220,241
1295.byte 102,15,56,220,249
1296.byte 102,68,15,56,220,193
1297.byte 102,68,15,56,220,201
1298 movups 112-128(%rcx),%xmm1
1299 bswapl %r9d
1300.byte 102,15,56,220,208
1301.byte 102,15,56,220,216
1302 xorl %ebp,%r9d
1303.byte 0x66,0x90
1304.byte 102,15,56,220,224
1305.byte 102,15,56,220,232
1306 movl %r9d,80+12(%rsp)
1307 leaq 6(%r8),%r9
1308.byte 102,15,56,220,240
1309.byte 102,15,56,220,248
1310.byte 102,68,15,56,220,192
1311.byte 102,68,15,56,220,200
1312 movups 128-128(%rcx),%xmm0
1313 bswapl %r9d
1314.byte 102,15,56,220,209
1315.byte 102,15,56,220,217
1316 xorl %ebp,%r9d
1317.byte 0x66,0x90
1318.byte 102,15,56,220,225
1319.byte 102,15,56,220,233
1320 movl %r9d,96+12(%rsp)
1321 leaq 7(%r8),%r9
1322.byte 102,15,56,220,241
1323.byte 102,15,56,220,249
1324.byte 102,68,15,56,220,193
1325.byte 102,68,15,56,220,201
1326 movups 144-128(%rcx),%xmm1
1327 bswapl %r9d
1328.byte 102,15,56,220,208
1329.byte 102,15,56,220,216
1330.byte 102,15,56,220,224
1331 xorl %ebp,%r9d
1332 movdqu 0(%rdi),%xmm10
1333.byte 102,15,56,220,232
1334 movl %r9d,112+12(%rsp)
1335 cmpl $11,%eax
1336.byte 102,15,56,220,240
1337.byte 102,15,56,220,248
1338.byte 102,68,15,56,220,192
1339.byte 102,68,15,56,220,200
1340 movups 160-128(%rcx),%xmm0
1341
1342 jb .Lctr32_enc_done
1343
1344.byte 102,15,56,220,209
1345.byte 102,15,56,220,217
1346.byte 102,15,56,220,225
1347.byte 102,15,56,220,233
1348.byte 102,15,56,220,241
1349.byte 102,15,56,220,249
1350.byte 102,68,15,56,220,193
1351.byte 102,68,15,56,220,201
1352 movups 176-128(%rcx),%xmm1
1353
1354.byte 102,15,56,220,208
1355.byte 102,15,56,220,216
1356.byte 102,15,56,220,224
1357.byte 102,15,56,220,232
1358.byte 102,15,56,220,240
1359.byte 102,15,56,220,248
1360.byte 102,68,15,56,220,192
1361.byte 102,68,15,56,220,200
1362 movups 192-128(%rcx),%xmm0
1363 je .Lctr32_enc_done
1364
1365.byte 102,15,56,220,209
1366.byte 102,15,56,220,217
1367.byte 102,15,56,220,225
1368.byte 102,15,56,220,233
1369.byte 102,15,56,220,241
1370.byte 102,15,56,220,249
1371.byte 102,68,15,56,220,193
1372.byte 102,68,15,56,220,201
1373 movups 208-128(%rcx),%xmm1
1374
1375.byte 102,15,56,220,208
1376.byte 102,15,56,220,216
1377.byte 102,15,56,220,224
1378.byte 102,15,56,220,232
1379.byte 102,15,56,220,240
1380.byte 102,15,56,220,248
1381.byte 102,68,15,56,220,192
1382.byte 102,68,15,56,220,200
1383 movups 224-128(%rcx),%xmm0
1384 jmp .Lctr32_enc_done
1385
1386.align 16
1387.Lctr32_enc_done:
1388 movdqu 16(%rdi),%xmm11
1389 pxor %xmm0,%xmm10
1390 movdqu 32(%rdi),%xmm12
1391 pxor %xmm0,%xmm11
1392 movdqu 48(%rdi),%xmm13
1393 pxor %xmm0,%xmm12
1394 movdqu 64(%rdi),%xmm14
1395 pxor %xmm0,%xmm13
1396 movdqu 80(%rdi),%xmm15
1397 pxor %xmm0,%xmm14
1398 pxor %xmm0,%xmm15
1399.byte 102,15,56,220,209
1400.byte 102,15,56,220,217
1401.byte 102,15,56,220,225
1402.byte 102,15,56,220,233
1403.byte 102,15,56,220,241
1404.byte 102,15,56,220,249
1405.byte 102,68,15,56,220,193
1406.byte 102,68,15,56,220,201
1407 movdqu 96(%rdi),%xmm1
1408 leaq 128(%rdi),%rdi
1409
1410.byte 102,65,15,56,221,210
1411 pxor %xmm0,%xmm1
1412 movdqu 112-128(%rdi),%xmm10
1413.byte 102,65,15,56,221,219
1414 pxor %xmm0,%xmm10
1415 movdqa 0(%rsp),%xmm11
1416.byte 102,65,15,56,221,228
1417.byte 102,65,15,56,221,237
1418 movdqa 16(%rsp),%xmm12
1419 movdqa 32(%rsp),%xmm13
1420.byte 102,65,15,56,221,246
1421.byte 102,65,15,56,221,255
1422 movdqa 48(%rsp),%xmm14
1423 movdqa 64(%rsp),%xmm15
1424.byte 102,68,15,56,221,193
1425 movdqa 80(%rsp),%xmm0
1426 movups 16-128(%rcx),%xmm1
1427.byte 102,69,15,56,221,202
1428
1429 movups %xmm2,(%rsi)
1430 movdqa %xmm11,%xmm2
1431 movups %xmm3,16(%rsi)
1432 movdqa %xmm12,%xmm3
1433 movups %xmm4,32(%rsi)
1434 movdqa %xmm13,%xmm4
1435 movups %xmm5,48(%rsi)
1436 movdqa %xmm14,%xmm5
1437 movups %xmm6,64(%rsi)
1438 movdqa %xmm15,%xmm6
1439 movups %xmm7,80(%rsi)
1440 movdqa %xmm0,%xmm7
1441 movups %xmm8,96(%rsi)
1442 movups %xmm9,112(%rsi)
1443 leaq 128(%rsi),%rsi
1444
1445 subq $8,%rdx
1446 jnc .Lctr32_loop8
1447
1448 addq $8,%rdx
1449 jz .Lctr32_done
1450 leaq -128(%rcx),%rcx
1451
1452.Lctr32_tail:
1453
1454
1455 leaq 16(%rcx),%rcx
1456 cmpq $4,%rdx
1457 jb .Lctr32_loop3
1458 je .Lctr32_loop4
1459
1460
1461 shll $4,%eax
1462 movdqa 96(%rsp),%xmm8
1463 pxor %xmm9,%xmm9
1464
1465 movups 16(%rcx),%xmm0
1466.byte 102,15,56,220,209
1467.byte 102,15,56,220,217
1468 leaq 32-16(%rcx,%rax,1),%rcx
1469 negq %rax
1470.byte 102,15,56,220,225
1471 addq $16,%rax
1472 movups (%rdi),%xmm10
1473.byte 102,15,56,220,233
1474.byte 102,15,56,220,241
1475 movups 16(%rdi),%xmm11
1476 movups 32(%rdi),%xmm12
1477.byte 102,15,56,220,249
1478.byte 102,68,15,56,220,193
1479
1480 call .Lenc_loop8_enter
1481
1482 movdqu 48(%rdi),%xmm13
1483 pxor %xmm10,%xmm2
1484 movdqu 64(%rdi),%xmm10
1485 pxor %xmm11,%xmm3
1486 movdqu %xmm2,(%rsi)
1487 pxor %xmm12,%xmm4
1488 movdqu %xmm3,16(%rsi)
1489 pxor %xmm13,%xmm5
1490 movdqu %xmm4,32(%rsi)
1491 pxor %xmm10,%xmm6
1492 movdqu %xmm5,48(%rsi)
1493 movdqu %xmm6,64(%rsi)
1494 cmpq $6,%rdx
1495 jb .Lctr32_done
1496
1497 movups 80(%rdi),%xmm11
1498 xorps %xmm11,%xmm7
1499 movups %xmm7,80(%rsi)
1500 je .Lctr32_done
1501
1502 movups 96(%rdi),%xmm12
1503 xorps %xmm12,%xmm8
1504 movups %xmm8,96(%rsi)
1505 jmp .Lctr32_done
1506
1507.align 32
1508.Lctr32_loop4:
1509.byte 102,15,56,220,209
1510 leaq 16(%rcx),%rcx
1511 decl %eax
1512.byte 102,15,56,220,217
1513.byte 102,15,56,220,225
1514.byte 102,15,56,220,233
1515 movups (%rcx),%xmm1
1516 jnz .Lctr32_loop4
1517.byte 102,15,56,221,209
1518.byte 102,15,56,221,217
1519 movups (%rdi),%xmm10
1520 movups 16(%rdi),%xmm11
1521.byte 102,15,56,221,225
1522.byte 102,15,56,221,233
1523 movups 32(%rdi),%xmm12
1524 movups 48(%rdi),%xmm13
1525
1526 xorps %xmm10,%xmm2
1527 movups %xmm2,(%rsi)
1528 xorps %xmm11,%xmm3
1529 movups %xmm3,16(%rsi)
1530 pxor %xmm12,%xmm4
1531 movdqu %xmm4,32(%rsi)
1532 pxor %xmm13,%xmm5
1533 movdqu %xmm5,48(%rsi)
1534 jmp .Lctr32_done
1535
1536.align 32
1537.Lctr32_loop3:
1538.byte 102,15,56,220,209
1539 leaq 16(%rcx),%rcx
1540 decl %eax
1541.byte 102,15,56,220,217
1542.byte 102,15,56,220,225
1543 movups (%rcx),%xmm1
1544 jnz .Lctr32_loop3
1545.byte 102,15,56,221,209
1546.byte 102,15,56,221,217
1547.byte 102,15,56,221,225
1548
1549 movups (%rdi),%xmm10
1550 xorps %xmm10,%xmm2
1551 movups %xmm2,(%rsi)
1552 cmpq $2,%rdx
1553 jb .Lctr32_done
1554
1555 movups 16(%rdi),%xmm11
1556 xorps %xmm11,%xmm3
1557 movups %xmm3,16(%rsi)
1558 je .Lctr32_done
1559
1560 movups 32(%rdi),%xmm12
1561 xorps %xmm12,%xmm4
1562 movups %xmm4,32(%rsi)
1563
1564.Lctr32_done:
1565 xorps %xmm0,%xmm0
1566 xorl %ebp,%ebp
1567 pxor %xmm1,%xmm1
1568 pxor %xmm2,%xmm2
1569 pxor %xmm3,%xmm3
1570 pxor %xmm4,%xmm4
1571 pxor %xmm5,%xmm5
1572 pxor %xmm6,%xmm6
1573 pxor %xmm7,%xmm7
1574 movaps %xmm0,0(%rsp)
1575 pxor %xmm8,%xmm8
1576 movaps %xmm0,16(%rsp)
1577 pxor %xmm9,%xmm9
1578 movaps %xmm0,32(%rsp)
1579 pxor %xmm10,%xmm10
1580 movaps %xmm0,48(%rsp)
1581 pxor %xmm11,%xmm11
1582 movaps %xmm0,64(%rsp)
1583 pxor %xmm12,%xmm12
1584 movaps %xmm0,80(%rsp)
1585 pxor %xmm13,%xmm13
1586 movaps %xmm0,96(%rsp)
1587 pxor %xmm14,%xmm14
1588 movaps %xmm0,112(%rsp)
1589 pxor %xmm15,%xmm15
1590 movq -8(%r11),%rbp
1591.cfi_restore %rbp
1592 leaq (%r11),%rsp
1593.cfi_def_cfa_register %rsp
1594.Lctr32_epilogue:
1595 .byte 0xf3,0xc3
1596.cfi_endproc
1597.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1598.globl aesni_xts_encrypt
1599.type aesni_xts_encrypt,@function
1600.align 16
1601aesni_xts_encrypt:
1602.cfi_startproc
1603 leaq (%rsp),%r11
1604.cfi_def_cfa_register %r11
1605 pushq %rbp
1606.cfi_offset %rbp,-16
1607 subq $112,%rsp
1608 andq $-16,%rsp
1609 movups (%r9),%xmm2
1610 movl 240(%r8),%eax
1611 movl 240(%rcx),%r10d
1612 movups (%r8),%xmm0
1613 movups 16(%r8),%xmm1
1614 leaq 32(%r8),%r8
1615 xorps %xmm0,%xmm2
1616.Loop_enc1_8:
1617.byte 102,15,56,220,209
1618 decl %eax
1619 movups (%r8),%xmm1
1620 leaq 16(%r8),%r8
1621 jnz .Loop_enc1_8
1622.byte 102,15,56,221,209
1623 movups (%rcx),%xmm0
1624 movq %rcx,%rbp
1625 movl %r10d,%eax
1626 shll $4,%r10d
1627 movq %rdx,%r9
1628 andq $-16,%rdx
1629
1630 movups 16(%rcx,%r10,1),%xmm1
1631
1632 movdqa .Lxts_magic(%rip),%xmm8
1633 movdqa %xmm2,%xmm15
1634 pshufd $0x5f,%xmm2,%xmm9
1635 pxor %xmm0,%xmm1
1636 movdqa %xmm9,%xmm14
1637 paddd %xmm9,%xmm9
1638 movdqa %xmm15,%xmm10
1639 psrad $31,%xmm14
1640 paddq %xmm15,%xmm15
1641 pand %xmm8,%xmm14
1642 pxor %xmm0,%xmm10
1643 pxor %xmm14,%xmm15
1644 movdqa %xmm9,%xmm14
1645 paddd %xmm9,%xmm9
1646 movdqa %xmm15,%xmm11
1647 psrad $31,%xmm14
1648 paddq %xmm15,%xmm15
1649 pand %xmm8,%xmm14
1650 pxor %xmm0,%xmm11
1651 pxor %xmm14,%xmm15
1652 movdqa %xmm9,%xmm14
1653 paddd %xmm9,%xmm9
1654 movdqa %xmm15,%xmm12
1655 psrad $31,%xmm14
1656 paddq %xmm15,%xmm15
1657 pand %xmm8,%xmm14
1658 pxor %xmm0,%xmm12
1659 pxor %xmm14,%xmm15
1660 movdqa %xmm9,%xmm14
1661 paddd %xmm9,%xmm9
1662 movdqa %xmm15,%xmm13
1663 psrad $31,%xmm14
1664 paddq %xmm15,%xmm15
1665 pand %xmm8,%xmm14
1666 pxor %xmm0,%xmm13
1667 pxor %xmm14,%xmm15
1668 movdqa %xmm15,%xmm14
1669 psrad $31,%xmm9
1670 paddq %xmm15,%xmm15
1671 pand %xmm8,%xmm9
1672 pxor %xmm0,%xmm14
1673 pxor %xmm9,%xmm15
1674 movaps %xmm1,96(%rsp)
1675
1676 subq $96,%rdx
1677 jc .Lxts_enc_short
1678
1679 movl $16+96,%eax
1680 leaq 32(%rbp,%r10,1),%rcx
1681 subq %r10,%rax
1682 movups 16(%rbp),%xmm1
1683 movq %rax,%r10
1684 leaq .Lxts_magic(%rip),%r8
1685 jmp .Lxts_enc_grandloop
1686
1687.align 32
1688.Lxts_enc_grandloop:
1689 movdqu 0(%rdi),%xmm2
1690 movdqa %xmm0,%xmm8
1691 movdqu 16(%rdi),%xmm3
1692 pxor %xmm10,%xmm2
1693 movdqu 32(%rdi),%xmm4
1694 pxor %xmm11,%xmm3
1695.byte 102,15,56,220,209
1696 movdqu 48(%rdi),%xmm5
1697 pxor %xmm12,%xmm4
1698.byte 102,15,56,220,217
1699 movdqu 64(%rdi),%xmm6
1700 pxor %xmm13,%xmm5
1701.byte 102,15,56,220,225
1702 movdqu 80(%rdi),%xmm7
1703 pxor %xmm15,%xmm8
1704 movdqa 96(%rsp),%xmm9
1705 pxor %xmm14,%xmm6
1706.byte 102,15,56,220,233
1707 movups 32(%rbp),%xmm0
1708 leaq 96(%rdi),%rdi
1709 pxor %xmm8,%xmm7
1710
1711 pxor %xmm9,%xmm10
1712.byte 102,15,56,220,241
1713 pxor %xmm9,%xmm11
1714 movdqa %xmm10,0(%rsp)
1715.byte 102,15,56,220,249
1716 movups 48(%rbp),%xmm1
1717 pxor %xmm9,%xmm12
1718
1719.byte 102,15,56,220,208
1720 pxor %xmm9,%xmm13
1721 movdqa %xmm11,16(%rsp)
1722.byte 102,15,56,220,216
1723 pxor %xmm9,%xmm14
1724 movdqa %xmm12,32(%rsp)
1725.byte 102,15,56,220,224
1726.byte 102,15,56,220,232
1727 pxor %xmm9,%xmm8
1728 movdqa %xmm14,64(%rsp)
1729.byte 102,15,56,220,240
1730.byte 102,15,56,220,248
1731 movups 64(%rbp),%xmm0
1732 movdqa %xmm8,80(%rsp)
1733 pshufd $0x5f,%xmm15,%xmm9
1734 jmp .Lxts_enc_loop6
1735.align 32
1736.Lxts_enc_loop6:
1737.byte 102,15,56,220,209
1738.byte 102,15,56,220,217
1739.byte 102,15,56,220,225
1740.byte 102,15,56,220,233
1741.byte 102,15,56,220,241
1742.byte 102,15,56,220,249
1743 movups -64(%rcx,%rax,1),%xmm1
1744 addq $32,%rax
1745
1746.byte 102,15,56,220,208
1747.byte 102,15,56,220,216
1748.byte 102,15,56,220,224
1749.byte 102,15,56,220,232
1750.byte 102,15,56,220,240
1751.byte 102,15,56,220,248
1752 movups -80(%rcx,%rax,1),%xmm0
1753 jnz .Lxts_enc_loop6
1754
1755 movdqa (%r8),%xmm8
1756 movdqa %xmm9,%xmm14
1757 paddd %xmm9,%xmm9
1758.byte 102,15,56,220,209
1759 paddq %xmm15,%xmm15
1760 psrad $31,%xmm14
1761.byte 102,15,56,220,217
1762 pand %xmm8,%xmm14
1763 movups (%rbp),%xmm10
1764.byte 102,15,56,220,225
1765.byte 102,15,56,220,233
1766.byte 102,15,56,220,241
1767 pxor %xmm14,%xmm15
1768 movaps %xmm10,%xmm11
1769.byte 102,15,56,220,249
1770 movups -64(%rcx),%xmm1
1771
1772 movdqa %xmm9,%xmm14
1773.byte 102,15,56,220,208
1774 paddd %xmm9,%xmm9
1775 pxor %xmm15,%xmm10
1776.byte 102,15,56,220,216
1777 psrad $31,%xmm14
1778 paddq %xmm15,%xmm15
1779.byte 102,15,56,220,224
1780.byte 102,15,56,220,232
1781 pand %xmm8,%xmm14
1782 movaps %xmm11,%xmm12
1783.byte 102,15,56,220,240
1784 pxor %xmm14,%xmm15
1785 movdqa %xmm9,%xmm14
1786.byte 102,15,56,220,248
1787 movups -48(%rcx),%xmm0
1788
1789 paddd %xmm9,%xmm9
1790.byte 102,15,56,220,209
1791 pxor %xmm15,%xmm11
1792 psrad $31,%xmm14
1793.byte 102,15,56,220,217
1794 paddq %xmm15,%xmm15
1795 pand %xmm8,%xmm14
1796.byte 102,15,56,220,225
1797.byte 102,15,56,220,233
1798 movdqa %xmm13,48(%rsp)
1799 pxor %xmm14,%xmm15
1800.byte 102,15,56,220,241
1801 movaps %xmm12,%xmm13
1802 movdqa %xmm9,%xmm14
1803.byte 102,15,56,220,249
1804 movups -32(%rcx),%xmm1
1805
1806 paddd %xmm9,%xmm9
1807.byte 102,15,56,220,208
1808 pxor %xmm15,%xmm12
1809 psrad $31,%xmm14
1810.byte 102,15,56,220,216
1811 paddq %xmm15,%xmm15
1812 pand %xmm8,%xmm14
1813.byte 102,15,56,220,224
1814.byte 102,15,56,220,232
1815.byte 102,15,56,220,240
1816 pxor %xmm14,%xmm15
1817 movaps %xmm13,%xmm14
1818.byte 102,15,56,220,248
1819
1820 movdqa %xmm9,%xmm0
1821 paddd %xmm9,%xmm9
1822.byte 102,15,56,220,209
1823 pxor %xmm15,%xmm13
1824 psrad $31,%xmm0
1825.byte 102,15,56,220,217
1826 paddq %xmm15,%xmm15
1827 pand %xmm8,%xmm0
1828.byte 102,15,56,220,225
1829.byte 102,15,56,220,233
1830 pxor %xmm0,%xmm15
1831 movups (%rbp),%xmm0
1832.byte 102,15,56,220,241
1833.byte 102,15,56,220,249
1834 movups 16(%rbp),%xmm1
1835
1836 pxor %xmm15,%xmm14
1837.byte 102,15,56,221,84,36,0
1838 psrad $31,%xmm9
1839 paddq %xmm15,%xmm15
1840.byte 102,15,56,221,92,36,16
1841.byte 102,15,56,221,100,36,32
1842 pand %xmm8,%xmm9
1843 movq %r10,%rax
1844.byte 102,15,56,221,108,36,48
1845.byte 102,15,56,221,116,36,64
1846.byte 102,15,56,221,124,36,80
1847 pxor %xmm9,%xmm15
1848
1849 leaq 96(%rsi),%rsi
1850 movups %xmm2,-96(%rsi)
1851 movups %xmm3,-80(%rsi)
1852 movups %xmm4,-64(%rsi)
1853 movups %xmm5,-48(%rsi)
1854 movups %xmm6,-32(%rsi)
1855 movups %xmm7,-16(%rsi)
1856 subq $96,%rdx
1857 jnc .Lxts_enc_grandloop
1858
1859 movl $16+96,%eax
1860 subl %r10d,%eax
1861 movq %rbp,%rcx
1862 shrl $4,%eax
1863
1864.Lxts_enc_short:
1865
1866 movl %eax,%r10d
1867 pxor %xmm0,%xmm10
1868 addq $96,%rdx
1869 jz .Lxts_enc_done
1870
1871 pxor %xmm0,%xmm11
1872 cmpq $0x20,%rdx
1873 jb .Lxts_enc_one
1874 pxor %xmm0,%xmm12
1875 je .Lxts_enc_two
1876
1877 pxor %xmm0,%xmm13
1878 cmpq $0x40,%rdx
1879 jb .Lxts_enc_three
1880 pxor %xmm0,%xmm14
1881 je .Lxts_enc_four
1882
1883 movdqu (%rdi),%xmm2
1884 movdqu 16(%rdi),%xmm3
1885 movdqu 32(%rdi),%xmm4
1886 pxor %xmm10,%xmm2
1887 movdqu 48(%rdi),%xmm5
1888 pxor %xmm11,%xmm3
1889 movdqu 64(%rdi),%xmm6
1890 leaq 80(%rdi),%rdi
1891 pxor %xmm12,%xmm4
1892 pxor %xmm13,%xmm5
1893 pxor %xmm14,%xmm6
1894 pxor %xmm7,%xmm7
1895
1896 call _aesni_encrypt6
1897
1898 xorps %xmm10,%xmm2
1899 movdqa %xmm15,%xmm10
1900 xorps %xmm11,%xmm3
1901 xorps %xmm12,%xmm4
1902 movdqu %xmm2,(%rsi)
1903 xorps %xmm13,%xmm5
1904 movdqu %xmm3,16(%rsi)
1905 xorps %xmm14,%xmm6
1906 movdqu %xmm4,32(%rsi)
1907 movdqu %xmm5,48(%rsi)
1908 movdqu %xmm6,64(%rsi)
1909 leaq 80(%rsi),%rsi
1910 jmp .Lxts_enc_done
1911
1912.align 16
1913.Lxts_enc_one:
1914 movups (%rdi),%xmm2
1915 leaq 16(%rdi),%rdi
1916 xorps %xmm10,%xmm2
1917 movups (%rcx),%xmm0
1918 movups 16(%rcx),%xmm1
1919 leaq 32(%rcx),%rcx
1920 xorps %xmm0,%xmm2
1921.Loop_enc1_9:
1922.byte 102,15,56,220,209
1923 decl %eax
1924 movups (%rcx),%xmm1
1925 leaq 16(%rcx),%rcx
1926 jnz .Loop_enc1_9
1927.byte 102,15,56,221,209
1928 xorps %xmm10,%xmm2
1929 movdqa %xmm11,%xmm10
1930 movups %xmm2,(%rsi)
1931 leaq 16(%rsi),%rsi
1932 jmp .Lxts_enc_done
1933
1934.align 16
1935.Lxts_enc_two:
1936 movups (%rdi),%xmm2
1937 movups 16(%rdi),%xmm3
1938 leaq 32(%rdi),%rdi
1939 xorps %xmm10,%xmm2
1940 xorps %xmm11,%xmm3
1941
1942 call _aesni_encrypt2
1943
1944 xorps %xmm10,%xmm2
1945 movdqa %xmm12,%xmm10
1946 xorps %xmm11,%xmm3
1947 movups %xmm2,(%rsi)
1948 movups %xmm3,16(%rsi)
1949 leaq 32(%rsi),%rsi
1950 jmp .Lxts_enc_done
1951
1952.align 16
1953.Lxts_enc_three:
1954 movups (%rdi),%xmm2
1955 movups 16(%rdi),%xmm3
1956 movups 32(%rdi),%xmm4
1957 leaq 48(%rdi),%rdi
1958 xorps %xmm10,%xmm2
1959 xorps %xmm11,%xmm3
1960 xorps %xmm12,%xmm4
1961
1962 call _aesni_encrypt3
1963
1964 xorps %xmm10,%xmm2
1965 movdqa %xmm13,%xmm10
1966 xorps %xmm11,%xmm3
1967 xorps %xmm12,%xmm4
1968 movups %xmm2,(%rsi)
1969 movups %xmm3,16(%rsi)
1970 movups %xmm4,32(%rsi)
1971 leaq 48(%rsi),%rsi
1972 jmp .Lxts_enc_done
1973
1974.align 16
1975.Lxts_enc_four:
1976 movups (%rdi),%xmm2
1977 movups 16(%rdi),%xmm3
1978 movups 32(%rdi),%xmm4
1979 xorps %xmm10,%xmm2
1980 movups 48(%rdi),%xmm5
1981 leaq 64(%rdi),%rdi
1982 xorps %xmm11,%xmm3
1983 xorps %xmm12,%xmm4
1984 xorps %xmm13,%xmm5
1985
1986 call _aesni_encrypt4
1987
1988 pxor %xmm10,%xmm2
1989 movdqa %xmm14,%xmm10
1990 pxor %xmm11,%xmm3
1991 pxor %xmm12,%xmm4
1992 movdqu %xmm2,(%rsi)
1993 pxor %xmm13,%xmm5
1994 movdqu %xmm3,16(%rsi)
1995 movdqu %xmm4,32(%rsi)
1996 movdqu %xmm5,48(%rsi)
1997 leaq 64(%rsi),%rsi
1998 jmp .Lxts_enc_done
1999
2000.align 16
2001.Lxts_enc_done:
2002 andq $15,%r9
2003 jz .Lxts_enc_ret
2004 movq %r9,%rdx
2005
2006.Lxts_enc_steal:
2007 movzbl (%rdi),%eax
2008 movzbl -16(%rsi),%ecx
2009 leaq 1(%rdi),%rdi
2010 movb %al,-16(%rsi)
2011 movb %cl,0(%rsi)
2012 leaq 1(%rsi),%rsi
2013 subq $1,%rdx
2014 jnz .Lxts_enc_steal
2015
2016 subq %r9,%rsi
2017 movq %rbp,%rcx
2018 movl %r10d,%eax
2019
2020 movups -16(%rsi),%xmm2
2021 xorps %xmm10,%xmm2
2022 movups (%rcx),%xmm0
2023 movups 16(%rcx),%xmm1
2024 leaq 32(%rcx),%rcx
2025 xorps %xmm0,%xmm2
2026.Loop_enc1_10:
2027.byte 102,15,56,220,209
2028 decl %eax
2029 movups (%rcx),%xmm1
2030 leaq 16(%rcx),%rcx
2031 jnz .Loop_enc1_10
2032.byte 102,15,56,221,209
2033 xorps %xmm10,%xmm2
2034 movups %xmm2,-16(%rsi)
2035
2036.Lxts_enc_ret:
2037 xorps %xmm0,%xmm0
2038 pxor %xmm1,%xmm1
2039 pxor %xmm2,%xmm2
2040 pxor %xmm3,%xmm3
2041 pxor %xmm4,%xmm4
2042 pxor %xmm5,%xmm5
2043 pxor %xmm6,%xmm6
2044 pxor %xmm7,%xmm7
2045 movaps %xmm0,0(%rsp)
2046 pxor %xmm8,%xmm8
2047 movaps %xmm0,16(%rsp)
2048 pxor %xmm9,%xmm9
2049 movaps %xmm0,32(%rsp)
2050 pxor %xmm10,%xmm10
2051 movaps %xmm0,48(%rsp)
2052 pxor %xmm11,%xmm11
2053 movaps %xmm0,64(%rsp)
2054 pxor %xmm12,%xmm12
2055 movaps %xmm0,80(%rsp)
2056 pxor %xmm13,%xmm13
2057 movaps %xmm0,96(%rsp)
2058 pxor %xmm14,%xmm14
2059 pxor %xmm15,%xmm15
2060 movq -8(%r11),%rbp
2061.cfi_restore %rbp
2062 leaq (%r11),%rsp
2063.cfi_def_cfa_register %rsp
2064.Lxts_enc_epilogue:
2065 .byte 0xf3,0xc3
2066.cfi_endproc
2067.size aesni_xts_encrypt,.-aesni_xts_encrypt
2068.globl aesni_xts_decrypt
2069.type aesni_xts_decrypt,@function
2070.align 16
2071aesni_xts_decrypt:
2072.cfi_startproc
2073 leaq (%rsp),%r11
2074.cfi_def_cfa_register %r11
2075 pushq %rbp
2076.cfi_offset %rbp,-16
2077 subq $112,%rsp
2078 andq $-16,%rsp
2079 movups (%r9),%xmm2
2080 movl 240(%r8),%eax
2081 movl 240(%rcx),%r10d
2082 movups (%r8),%xmm0
2083 movups 16(%r8),%xmm1
2084 leaq 32(%r8),%r8
2085 xorps %xmm0,%xmm2
2086.Loop_enc1_11:
2087.byte 102,15,56,220,209
2088 decl %eax
2089 movups (%r8),%xmm1
2090 leaq 16(%r8),%r8
2091 jnz .Loop_enc1_11
2092.byte 102,15,56,221,209
2093 xorl %eax,%eax
2094 testq $15,%rdx
2095 setnz %al
2096 shlq $4,%rax
2097 subq %rax,%rdx
2098
2099 movups (%rcx),%xmm0
2100 movq %rcx,%rbp
2101 movl %r10d,%eax
2102 shll $4,%r10d
2103 movq %rdx,%r9
2104 andq $-16,%rdx
2105
2106 movups 16(%rcx,%r10,1),%xmm1
2107
2108 movdqa .Lxts_magic(%rip),%xmm8
2109 movdqa %xmm2,%xmm15
2110 pshufd $0x5f,%xmm2,%xmm9
2111 pxor %xmm0,%xmm1
2112 movdqa %xmm9,%xmm14
2113 paddd %xmm9,%xmm9
2114 movdqa %xmm15,%xmm10
2115 psrad $31,%xmm14
2116 paddq %xmm15,%xmm15
2117 pand %xmm8,%xmm14
2118 pxor %xmm0,%xmm10
2119 pxor %xmm14,%xmm15
2120 movdqa %xmm9,%xmm14
2121 paddd %xmm9,%xmm9
2122 movdqa %xmm15,%xmm11
2123 psrad $31,%xmm14
2124 paddq %xmm15,%xmm15
2125 pand %xmm8,%xmm14
2126 pxor %xmm0,%xmm11
2127 pxor %xmm14,%xmm15
2128 movdqa %xmm9,%xmm14
2129 paddd %xmm9,%xmm9
2130 movdqa %xmm15,%xmm12
2131 psrad $31,%xmm14
2132 paddq %xmm15,%xmm15
2133 pand %xmm8,%xmm14
2134 pxor %xmm0,%xmm12
2135 pxor %xmm14,%xmm15
2136 movdqa %xmm9,%xmm14
2137 paddd %xmm9,%xmm9
2138 movdqa %xmm15,%xmm13
2139 psrad $31,%xmm14
2140 paddq %xmm15,%xmm15
2141 pand %xmm8,%xmm14
2142 pxor %xmm0,%xmm13
2143 pxor %xmm14,%xmm15
2144 movdqa %xmm15,%xmm14
2145 psrad $31,%xmm9
2146 paddq %xmm15,%xmm15
2147 pand %xmm8,%xmm9
2148 pxor %xmm0,%xmm14
2149 pxor %xmm9,%xmm15
2150 movaps %xmm1,96(%rsp)
2151
2152 subq $96,%rdx
2153 jc .Lxts_dec_short
2154
2155 movl $16+96,%eax
2156 leaq 32(%rbp,%r10,1),%rcx
2157 subq %r10,%rax
2158 movups 16(%rbp),%xmm1
2159 movq %rax,%r10
2160 leaq .Lxts_magic(%rip),%r8
2161 jmp .Lxts_dec_grandloop
2162
2163.align 32
2164.Lxts_dec_grandloop:
2165 movdqu 0(%rdi),%xmm2
2166 movdqa %xmm0,%xmm8
2167 movdqu 16(%rdi),%xmm3
2168 pxor %xmm10,%xmm2
2169 movdqu 32(%rdi),%xmm4
2170 pxor %xmm11,%xmm3
2171.byte 102,15,56,222,209
2172 movdqu 48(%rdi),%xmm5
2173 pxor %xmm12,%xmm4
2174.byte 102,15,56,222,217
2175 movdqu 64(%rdi),%xmm6
2176 pxor %xmm13,%xmm5
2177.byte 102,15,56,222,225
2178 movdqu 80(%rdi),%xmm7
2179 pxor %xmm15,%xmm8
2180 movdqa 96(%rsp),%xmm9
2181 pxor %xmm14,%xmm6
2182.byte 102,15,56,222,233
2183 movups 32(%rbp),%xmm0
2184 leaq 96(%rdi),%rdi
2185 pxor %xmm8,%xmm7
2186
2187 pxor %xmm9,%xmm10
2188.byte 102,15,56,222,241
2189 pxor %xmm9,%xmm11
2190 movdqa %xmm10,0(%rsp)
2191.byte 102,15,56,222,249
2192 movups 48(%rbp),%xmm1
2193 pxor %xmm9,%xmm12
2194
2195.byte 102,15,56,222,208
2196 pxor %xmm9,%xmm13
2197 movdqa %xmm11,16(%rsp)
2198.byte 102,15,56,222,216
2199 pxor %xmm9,%xmm14
2200 movdqa %xmm12,32(%rsp)
2201.byte 102,15,56,222,224
2202.byte 102,15,56,222,232
2203 pxor %xmm9,%xmm8
2204 movdqa %xmm14,64(%rsp)
2205.byte 102,15,56,222,240
2206.byte 102,15,56,222,248
2207 movups 64(%rbp),%xmm0
2208 movdqa %xmm8,80(%rsp)
2209 pshufd $0x5f,%xmm15,%xmm9
2210 jmp .Lxts_dec_loop6
2211.align 32
2212.Lxts_dec_loop6:
2213.byte 102,15,56,222,209
2214.byte 102,15,56,222,217
2215.byte 102,15,56,222,225
2216.byte 102,15,56,222,233
2217.byte 102,15,56,222,241
2218.byte 102,15,56,222,249
2219 movups -64(%rcx,%rax,1),%xmm1
2220 addq $32,%rax
2221
2222.byte 102,15,56,222,208
2223.byte 102,15,56,222,216
2224.byte 102,15,56,222,224
2225.byte 102,15,56,222,232
2226.byte 102,15,56,222,240
2227.byte 102,15,56,222,248
2228 movups -80(%rcx,%rax,1),%xmm0
2229 jnz .Lxts_dec_loop6
2230
2231 movdqa (%r8),%xmm8
2232 movdqa %xmm9,%xmm14
2233 paddd %xmm9,%xmm9
2234.byte 102,15,56,222,209
2235 paddq %xmm15,%xmm15
2236 psrad $31,%xmm14
2237.byte 102,15,56,222,217
2238 pand %xmm8,%xmm14
2239 movups (%rbp),%xmm10
2240.byte 102,15,56,222,225
2241.byte 102,15,56,222,233
2242.byte 102,15,56,222,241
2243 pxor %xmm14,%xmm15
2244 movaps %xmm10,%xmm11
2245.byte 102,15,56,222,249
2246 movups -64(%rcx),%xmm1
2247
2248 movdqa %xmm9,%xmm14
2249.byte 102,15,56,222,208
2250 paddd %xmm9,%xmm9
2251 pxor %xmm15,%xmm10
2252.byte 102,15,56,222,216
2253 psrad $31,%xmm14
2254 paddq %xmm15,%xmm15
2255.byte 102,15,56,222,224
2256.byte 102,15,56,222,232
2257 pand %xmm8,%xmm14
2258 movaps %xmm11,%xmm12
2259.byte 102,15,56,222,240
2260 pxor %xmm14,%xmm15
2261 movdqa %xmm9,%xmm14
2262.byte 102,15,56,222,248
2263 movups -48(%rcx),%xmm0
2264
2265 paddd %xmm9,%xmm9
2266.byte 102,15,56,222,209
2267 pxor %xmm15,%xmm11
2268 psrad $31,%xmm14
2269.byte 102,15,56,222,217
2270 paddq %xmm15,%xmm15
2271 pand %xmm8,%xmm14
2272.byte 102,15,56,222,225
2273.byte 102,15,56,222,233
2274 movdqa %xmm13,48(%rsp)
2275 pxor %xmm14,%xmm15
2276.byte 102,15,56,222,241
2277 movaps %xmm12,%xmm13
2278 movdqa %xmm9,%xmm14
2279.byte 102,15,56,222,249
2280 movups -32(%rcx),%xmm1
2281
2282 paddd %xmm9,%xmm9
2283.byte 102,15,56,222,208
2284 pxor %xmm15,%xmm12
2285 psrad $31,%xmm14
2286.byte 102,15,56,222,216
2287 paddq %xmm15,%xmm15
2288 pand %xmm8,%xmm14
2289.byte 102,15,56,222,224
2290.byte 102,15,56,222,232
2291.byte 102,15,56,222,240
2292 pxor %xmm14,%xmm15
2293 movaps %xmm13,%xmm14
2294.byte 102,15,56,222,248
2295
2296 movdqa %xmm9,%xmm0
2297 paddd %xmm9,%xmm9
2298.byte 102,15,56,222,209
2299 pxor %xmm15,%xmm13
2300 psrad $31,%xmm0
2301.byte 102,15,56,222,217
2302 paddq %xmm15,%xmm15
2303 pand %xmm8,%xmm0
2304.byte 102,15,56,222,225
2305.byte 102,15,56,222,233
2306 pxor %xmm0,%xmm15
2307 movups (%rbp),%xmm0
2308.byte 102,15,56,222,241
2309.byte 102,15,56,222,249
2310 movups 16(%rbp),%xmm1
2311
2312 pxor %xmm15,%xmm14
2313.byte 102,15,56,223,84,36,0
2314 psrad $31,%xmm9
2315 paddq %xmm15,%xmm15
2316.byte 102,15,56,223,92,36,16
2317.byte 102,15,56,223,100,36,32
2318 pand %xmm8,%xmm9
2319 movq %r10,%rax
2320.byte 102,15,56,223,108,36,48
2321.byte 102,15,56,223,116,36,64
2322.byte 102,15,56,223,124,36,80
2323 pxor %xmm9,%xmm15
2324
2325 leaq 96(%rsi),%rsi
2326 movups %xmm2,-96(%rsi)
2327 movups %xmm3,-80(%rsi)
2328 movups %xmm4,-64(%rsi)
2329 movups %xmm5,-48(%rsi)
2330 movups %xmm6,-32(%rsi)
2331 movups %xmm7,-16(%rsi)
2332 subq $96,%rdx
2333 jnc .Lxts_dec_grandloop
2334
2335 movl $16+96,%eax
2336 subl %r10d,%eax
2337 movq %rbp,%rcx
2338 shrl $4,%eax
2339
2340.Lxts_dec_short:
2341
2342 movl %eax,%r10d
2343 pxor %xmm0,%xmm10
2344 pxor %xmm0,%xmm11
2345 addq $96,%rdx
2346 jz .Lxts_dec_done
2347
2348 pxor %xmm0,%xmm12
2349 cmpq $0x20,%rdx
2350 jb .Lxts_dec_one
2351 pxor %xmm0,%xmm13
2352 je .Lxts_dec_two
2353
2354 pxor %xmm0,%xmm14
2355 cmpq $0x40,%rdx
2356 jb .Lxts_dec_three
2357 je .Lxts_dec_four
2358
2359 movdqu (%rdi),%xmm2
2360 movdqu 16(%rdi),%xmm3
2361 movdqu 32(%rdi),%xmm4
2362 pxor %xmm10,%xmm2
2363 movdqu 48(%rdi),%xmm5
2364 pxor %xmm11,%xmm3
2365 movdqu 64(%rdi),%xmm6
2366 leaq 80(%rdi),%rdi
2367 pxor %xmm12,%xmm4
2368 pxor %xmm13,%xmm5
2369 pxor %xmm14,%xmm6
2370
2371 call _aesni_decrypt6
2372
2373 xorps %xmm10,%xmm2
2374 xorps %xmm11,%xmm3
2375 xorps %xmm12,%xmm4
2376 movdqu %xmm2,(%rsi)
2377 xorps %xmm13,%xmm5
2378 movdqu %xmm3,16(%rsi)
2379 xorps %xmm14,%xmm6
2380 movdqu %xmm4,32(%rsi)
2381 pxor %xmm14,%xmm14
2382 movdqu %xmm5,48(%rsi)
2383 pcmpgtd %xmm15,%xmm14
2384 movdqu %xmm6,64(%rsi)
2385 leaq 80(%rsi),%rsi
2386 pshufd $0x13,%xmm14,%xmm11
2387 andq $15,%r9
2388 jz .Lxts_dec_ret
2389
2390 movdqa %xmm15,%xmm10
2391 paddq %xmm15,%xmm15
2392 pand %xmm8,%xmm11
2393 pxor %xmm15,%xmm11
2394 jmp .Lxts_dec_done2
2395
2396.align 16
2397.Lxts_dec_one:
2398 movups (%rdi),%xmm2
2399 leaq 16(%rdi),%rdi
2400 xorps %xmm10,%xmm2
2401 movups (%rcx),%xmm0
2402 movups 16(%rcx),%xmm1
2403 leaq 32(%rcx),%rcx
2404 xorps %xmm0,%xmm2
2405.Loop_dec1_12:
2406.byte 102,15,56,222,209
2407 decl %eax
2408 movups (%rcx),%xmm1
2409 leaq 16(%rcx),%rcx
2410 jnz .Loop_dec1_12
2411.byte 102,15,56,223,209
2412 xorps %xmm10,%xmm2
2413 movdqa %xmm11,%xmm10
2414 movups %xmm2,(%rsi)
2415 movdqa %xmm12,%xmm11
2416 leaq 16(%rsi),%rsi
2417 jmp .Lxts_dec_done
2418
2419.align 16
2420.Lxts_dec_two:
2421 movups (%rdi),%xmm2
2422 movups 16(%rdi),%xmm3
2423 leaq 32(%rdi),%rdi
2424 xorps %xmm10,%xmm2
2425 xorps %xmm11,%xmm3
2426
2427 call _aesni_decrypt2
2428
2429 xorps %xmm10,%xmm2
2430 movdqa %xmm12,%xmm10
2431 xorps %xmm11,%xmm3
2432 movdqa %xmm13,%xmm11
2433 movups %xmm2,(%rsi)
2434 movups %xmm3,16(%rsi)
2435 leaq 32(%rsi),%rsi
2436 jmp .Lxts_dec_done
2437
2438.align 16
2439.Lxts_dec_three:
2440 movups (%rdi),%xmm2
2441 movups 16(%rdi),%xmm3
2442 movups 32(%rdi),%xmm4
2443 leaq 48(%rdi),%rdi
2444 xorps %xmm10,%xmm2
2445 xorps %xmm11,%xmm3
2446 xorps %xmm12,%xmm4
2447
2448 call _aesni_decrypt3
2449
2450 xorps %xmm10,%xmm2
2451 movdqa %xmm13,%xmm10
2452 xorps %xmm11,%xmm3
2453 movdqa %xmm14,%xmm11
2454 xorps %xmm12,%xmm4
2455 movups %xmm2,(%rsi)
2456 movups %xmm3,16(%rsi)
2457 movups %xmm4,32(%rsi)
2458 leaq 48(%rsi),%rsi
2459 jmp .Lxts_dec_done
2460
2461.align 16
2462.Lxts_dec_four:
2463 movups (%rdi),%xmm2
2464 movups 16(%rdi),%xmm3
2465 movups 32(%rdi),%xmm4
2466 xorps %xmm10,%xmm2
2467 movups 48(%rdi),%xmm5
2468 leaq 64(%rdi),%rdi
2469 xorps %xmm11,%xmm3
2470 xorps %xmm12,%xmm4
2471 xorps %xmm13,%xmm5
2472
2473 call _aesni_decrypt4
2474
2475 pxor %xmm10,%xmm2
2476 movdqa %xmm14,%xmm10
2477 pxor %xmm11,%xmm3
2478 movdqa %xmm15,%xmm11
2479 pxor %xmm12,%xmm4
2480 movdqu %xmm2,(%rsi)
2481 pxor %xmm13,%xmm5
2482 movdqu %xmm3,16(%rsi)
2483 movdqu %xmm4,32(%rsi)
2484 movdqu %xmm5,48(%rsi)
2485 leaq 64(%rsi),%rsi
2486 jmp .Lxts_dec_done
2487
2488.align 16
2489.Lxts_dec_done:
2490 andq $15,%r9
2491 jz .Lxts_dec_ret
2492.Lxts_dec_done2:
2493 movq %r9,%rdx
2494 movq %rbp,%rcx
2495 movl %r10d,%eax
2496
2497 movups (%rdi),%xmm2
2498 xorps %xmm11,%xmm2
2499 movups (%rcx),%xmm0
2500 movups 16(%rcx),%xmm1
2501 leaq 32(%rcx),%rcx
2502 xorps %xmm0,%xmm2
2503.Loop_dec1_13:
2504.byte 102,15,56,222,209
2505 decl %eax
2506 movups (%rcx),%xmm1
2507 leaq 16(%rcx),%rcx
2508 jnz .Loop_dec1_13
2509.byte 102,15,56,223,209
2510 xorps %xmm11,%xmm2
2511 movups %xmm2,(%rsi)
2512
2513.Lxts_dec_steal:
2514 movzbl 16(%rdi),%eax
2515 movzbl (%rsi),%ecx
2516 leaq 1(%rdi),%rdi
2517 movb %al,(%rsi)
2518 movb %cl,16(%rsi)
2519 leaq 1(%rsi),%rsi
2520 subq $1,%rdx
2521 jnz .Lxts_dec_steal
2522
2523 subq %r9,%rsi
2524 movq %rbp,%rcx
2525 movl %r10d,%eax
2526
2527 movups (%rsi),%xmm2
2528 xorps %xmm10,%xmm2
2529 movups (%rcx),%xmm0
2530 movups 16(%rcx),%xmm1
2531 leaq 32(%rcx),%rcx
2532 xorps %xmm0,%xmm2
2533.Loop_dec1_14:
2534.byte 102,15,56,222,209
2535 decl %eax
2536 movups (%rcx),%xmm1
2537 leaq 16(%rcx),%rcx
2538 jnz .Loop_dec1_14
2539.byte 102,15,56,223,209
2540 xorps %xmm10,%xmm2
2541 movups %xmm2,(%rsi)
2542
2543.Lxts_dec_ret:
2544 xorps %xmm0,%xmm0
2545 pxor %xmm1,%xmm1
2546 pxor %xmm2,%xmm2
2547 pxor %xmm3,%xmm3
2548 pxor %xmm4,%xmm4
2549 pxor %xmm5,%xmm5
2550 pxor %xmm6,%xmm6
2551 pxor %xmm7,%xmm7
2552 movaps %xmm0,0(%rsp)
2553 pxor %xmm8,%xmm8
2554 movaps %xmm0,16(%rsp)
2555 pxor %xmm9,%xmm9
2556 movaps %xmm0,32(%rsp)
2557 pxor %xmm10,%xmm10
2558 movaps %xmm0,48(%rsp)
2559 pxor %xmm11,%xmm11
2560 movaps %xmm0,64(%rsp)
2561 pxor %xmm12,%xmm12
2562 movaps %xmm0,80(%rsp)
2563 pxor %xmm13,%xmm13
2564 movaps %xmm0,96(%rsp)
2565 pxor %xmm14,%xmm14
2566 pxor %xmm15,%xmm15
2567 movq -8(%r11),%rbp
2568.cfi_restore %rbp
2569 leaq (%r11),%rsp
2570.cfi_def_cfa_register %rsp
2571.Lxts_dec_epilogue:
2572 .byte 0xf3,0xc3
2573.cfi_endproc
2574.size aesni_xts_decrypt,.-aesni_xts_decrypt
2575.globl aesni_ocb_encrypt
2576.type aesni_ocb_encrypt,@function
2577.align 32
2578aesni_ocb_encrypt:
2579.cfi_startproc
2580 leaq (%rsp),%rax
2581 pushq %rbx
2582.cfi_adjust_cfa_offset 8
2583.cfi_offset %rbx,-16
2584 pushq %rbp
2585.cfi_adjust_cfa_offset 8
2586.cfi_offset %rbp,-24
2587 pushq %r12
2588.cfi_adjust_cfa_offset 8
2589.cfi_offset %r12,-32
2590 pushq %r13
2591.cfi_adjust_cfa_offset 8
2592.cfi_offset %r13,-40
2593 pushq %r14
2594.cfi_adjust_cfa_offset 8
2595.cfi_offset %r14,-48
2596 movq 8(%rax),%rbx
2597 movq 8+8(%rax),%rbp
2598
2599 movl 240(%rcx),%r10d
2600 movq %rcx,%r11
2601 shll $4,%r10d
2602 movups (%rcx),%xmm9
2603 movups 16(%rcx,%r10,1),%xmm1
2604
2605 movdqu (%r9),%xmm15
2606 pxor %xmm1,%xmm9
2607 pxor %xmm1,%xmm15
2608
2609 movl $16+32,%eax
2610 leaq 32(%r11,%r10,1),%rcx
2611 movups 16(%r11),%xmm1
2612 subq %r10,%rax
2613 movq %rax,%r10
2614
2615 movdqu (%rbx),%xmm10
2616 movdqu (%rbp),%xmm8
2617
2618 testq $1,%r8
2619 jnz .Locb_enc_odd
2620
2621 bsfq %r8,%r12
2622 addq $1,%r8
2623 shlq $4,%r12
2624 movdqu (%rbx,%r12,1),%xmm7
2625 movdqu (%rdi),%xmm2
2626 leaq 16(%rdi),%rdi
2627
2628 call __ocb_encrypt1
2629
2630 movdqa %xmm7,%xmm15
2631 movups %xmm2,(%rsi)
2632 leaq 16(%rsi),%rsi
2633 subq $1,%rdx
2634 jz .Locb_enc_done
2635
2636.Locb_enc_odd:
2637 leaq 1(%r8),%r12
2638 leaq 3(%r8),%r13
2639 leaq 5(%r8),%r14
2640 leaq 6(%r8),%r8
2641 bsfq %r12,%r12
2642 bsfq %r13,%r13
2643 bsfq %r14,%r14
2644 shlq $4,%r12
2645 shlq $4,%r13
2646 shlq $4,%r14
2647
2648 subq $6,%rdx
2649 jc .Locb_enc_short
2650 jmp .Locb_enc_grandloop
2651
2652.align 32
2653.Locb_enc_grandloop:
2654 movdqu 0(%rdi),%xmm2
2655 movdqu 16(%rdi),%xmm3
2656 movdqu 32(%rdi),%xmm4
2657 movdqu 48(%rdi),%xmm5
2658 movdqu 64(%rdi),%xmm6
2659 movdqu 80(%rdi),%xmm7
2660 leaq 96(%rdi),%rdi
2661
2662 call __ocb_encrypt6
2663
2664 movups %xmm2,0(%rsi)
2665 movups %xmm3,16(%rsi)
2666 movups %xmm4,32(%rsi)
2667 movups %xmm5,48(%rsi)
2668 movups %xmm6,64(%rsi)
2669 movups %xmm7,80(%rsi)
2670 leaq 96(%rsi),%rsi
2671 subq $6,%rdx
2672 jnc .Locb_enc_grandloop
2673
2674.Locb_enc_short:
2675 addq $6,%rdx
2676 jz .Locb_enc_done
2677
2678 movdqu 0(%rdi),%xmm2
2679 cmpq $2,%rdx
2680 jb .Locb_enc_one
2681 movdqu 16(%rdi),%xmm3
2682 je .Locb_enc_two
2683
2684 movdqu 32(%rdi),%xmm4
2685 cmpq $4,%rdx
2686 jb .Locb_enc_three
2687 movdqu 48(%rdi),%xmm5
2688 je .Locb_enc_four
2689
2690 movdqu 64(%rdi),%xmm6
2691 pxor %xmm7,%xmm7
2692
2693 call __ocb_encrypt6
2694
2695 movdqa %xmm14,%xmm15
2696 movups %xmm2,0(%rsi)
2697 movups %xmm3,16(%rsi)
2698 movups %xmm4,32(%rsi)
2699 movups %xmm5,48(%rsi)
2700 movups %xmm6,64(%rsi)
2701
2702 jmp .Locb_enc_done
2703
2704.align 16
2705.Locb_enc_one:
2706 movdqa %xmm10,%xmm7
2707
2708 call __ocb_encrypt1
2709
2710 movdqa %xmm7,%xmm15
2711 movups %xmm2,0(%rsi)
2712 jmp .Locb_enc_done
2713
2714.align 16
2715.Locb_enc_two:
2716 pxor %xmm4,%xmm4
2717 pxor %xmm5,%xmm5
2718
2719 call __ocb_encrypt4
2720
2721 movdqa %xmm11,%xmm15
2722 movups %xmm2,0(%rsi)
2723 movups %xmm3,16(%rsi)
2724
2725 jmp .Locb_enc_done
2726
2727.align 16
2728.Locb_enc_three:
2729 pxor %xmm5,%xmm5
2730
2731 call __ocb_encrypt4
2732
2733 movdqa %xmm12,%xmm15
2734 movups %xmm2,0(%rsi)
2735 movups %xmm3,16(%rsi)
2736 movups %xmm4,32(%rsi)
2737
2738 jmp .Locb_enc_done
2739
2740.align 16
2741.Locb_enc_four:
2742 call __ocb_encrypt4
2743
2744 movdqa %xmm13,%xmm15
2745 movups %xmm2,0(%rsi)
2746 movups %xmm3,16(%rsi)
2747 movups %xmm4,32(%rsi)
2748 movups %xmm5,48(%rsi)
2749
2750.Locb_enc_done:
2751 pxor %xmm0,%xmm15
2752 movdqu %xmm8,(%rbp)
2753 movdqu %xmm15,(%r9)
2754
2755 xorps %xmm0,%xmm0
2756 pxor %xmm1,%xmm1
2757 pxor %xmm2,%xmm2
2758 pxor %xmm3,%xmm3
2759 pxor %xmm4,%xmm4
2760 pxor %xmm5,%xmm5
2761 pxor %xmm6,%xmm6
2762 pxor %xmm7,%xmm7
2763 pxor %xmm8,%xmm8
2764 pxor %xmm9,%xmm9
2765 pxor %xmm10,%xmm10
2766 pxor %xmm11,%xmm11
2767 pxor %xmm12,%xmm12
2768 pxor %xmm13,%xmm13
2769 pxor %xmm14,%xmm14
2770 pxor %xmm15,%xmm15
2771 leaq 40(%rsp),%rax
2772.cfi_def_cfa %rax,8
2773 movq -40(%rax),%r14
2774.cfi_restore %r14
2775 movq -32(%rax),%r13
2776.cfi_restore %r13
2777 movq -24(%rax),%r12
2778.cfi_restore %r12
2779 movq -16(%rax),%rbp
2780.cfi_restore %rbp
2781 movq -8(%rax),%rbx
2782.cfi_restore %rbx
2783 leaq (%rax),%rsp
2784.cfi_def_cfa_register %rsp
2785.Locb_enc_epilogue:
2786 .byte 0xf3,0xc3
2787.cfi_endproc
2788.size aesni_ocb_encrypt,.-aesni_ocb_encrypt
2789
2790.type __ocb_encrypt6,@function
2791.align 32
2792__ocb_encrypt6:
2793 pxor %xmm9,%xmm15
2794 movdqu (%rbx,%r12,1),%xmm11
2795 movdqa %xmm10,%xmm12
2796 movdqu (%rbx,%r13,1),%xmm13
2797 movdqa %xmm10,%xmm14
2798 pxor %xmm15,%xmm10
2799 movdqu (%rbx,%r14,1),%xmm15
2800 pxor %xmm10,%xmm11
2801 pxor %xmm2,%xmm8
2802 pxor %xmm10,%xmm2
2803 pxor %xmm11,%xmm12
2804 pxor %xmm3,%xmm8
2805 pxor %xmm11,%xmm3
2806 pxor %xmm12,%xmm13
2807 pxor %xmm4,%xmm8
2808 pxor %xmm12,%xmm4
2809 pxor %xmm13,%xmm14
2810 pxor %xmm5,%xmm8
2811 pxor %xmm13,%xmm5
2812 pxor %xmm14,%xmm15
2813 pxor %xmm6,%xmm8
2814 pxor %xmm14,%xmm6
2815 pxor %xmm7,%xmm8
2816 pxor %xmm15,%xmm7
2817 movups 32(%r11),%xmm0
2818
2819 leaq 1(%r8),%r12
2820 leaq 3(%r8),%r13
2821 leaq 5(%r8),%r14
2822 addq $6,%r8
2823 pxor %xmm9,%xmm10
2824 bsfq %r12,%r12
2825 bsfq %r13,%r13
2826 bsfq %r14,%r14
2827
2828.byte 102,15,56,220,209
2829.byte 102,15,56,220,217
2830.byte 102,15,56,220,225
2831.byte 102,15,56,220,233
2832 pxor %xmm9,%xmm11
2833 pxor %xmm9,%xmm12
2834.byte 102,15,56,220,241
2835 pxor %xmm9,%xmm13
2836 pxor %xmm9,%xmm14
2837.byte 102,15,56,220,249
2838 movups 48(%r11),%xmm1
2839 pxor %xmm9,%xmm15
2840
2841.byte 102,15,56,220,208
2842.byte 102,15,56,220,216
2843.byte 102,15,56,220,224
2844.byte 102,15,56,220,232
2845.byte 102,15,56,220,240
2846.byte 102,15,56,220,248
2847 movups 64(%r11),%xmm0
2848 shlq $4,%r12
2849 shlq $4,%r13
2850 jmp .Locb_enc_loop6
2851
2852.align 32
2853.Locb_enc_loop6:
2854.byte 102,15,56,220,209
2855.byte 102,15,56,220,217
2856.byte 102,15,56,220,225
2857.byte 102,15,56,220,233
2858.byte 102,15,56,220,241
2859.byte 102,15,56,220,249
2860 movups (%rcx,%rax,1),%xmm1
2861 addq $32,%rax
2862
2863.byte 102,15,56,220,208
2864.byte 102,15,56,220,216
2865.byte 102,15,56,220,224
2866.byte 102,15,56,220,232
2867.byte 102,15,56,220,240
2868.byte 102,15,56,220,248
2869 movups -16(%rcx,%rax,1),%xmm0
2870 jnz .Locb_enc_loop6
2871
2872.byte 102,15,56,220,209
2873.byte 102,15,56,220,217
2874.byte 102,15,56,220,225
2875.byte 102,15,56,220,233
2876.byte 102,15,56,220,241
2877.byte 102,15,56,220,249
2878 movups 16(%r11),%xmm1
2879 shlq $4,%r14
2880
2881.byte 102,65,15,56,221,210
2882 movdqu (%rbx),%xmm10
2883 movq %r10,%rax
2884.byte 102,65,15,56,221,219
2885.byte 102,65,15,56,221,228
2886.byte 102,65,15,56,221,237
2887.byte 102,65,15,56,221,246
2888.byte 102,65,15,56,221,255
2889 .byte 0xf3,0xc3
2890.size __ocb_encrypt6,.-__ocb_encrypt6
2891
2892.type __ocb_encrypt4,@function
2893.align 32
2894__ocb_encrypt4:
2895 pxor %xmm9,%xmm15
2896 movdqu (%rbx,%r12,1),%xmm11
2897 movdqa %xmm10,%xmm12
2898 movdqu (%rbx,%r13,1),%xmm13
2899 pxor %xmm15,%xmm10
2900 pxor %xmm10,%xmm11
2901 pxor %xmm2,%xmm8
2902 pxor %xmm10,%xmm2
2903 pxor %xmm11,%xmm12
2904 pxor %xmm3,%xmm8
2905 pxor %xmm11,%xmm3
2906 pxor %xmm12,%xmm13
2907 pxor %xmm4,%xmm8
2908 pxor %xmm12,%xmm4
2909 pxor %xmm5,%xmm8
2910 pxor %xmm13,%xmm5
2911 movups 32(%r11),%xmm0
2912
2913 pxor %xmm9,%xmm10
2914 pxor %xmm9,%xmm11
2915 pxor %xmm9,%xmm12
2916 pxor %xmm9,%xmm13
2917
2918.byte 102,15,56,220,209
2919.byte 102,15,56,220,217
2920.byte 102,15,56,220,225
2921.byte 102,15,56,220,233
2922 movups 48(%r11),%xmm1
2923
2924.byte 102,15,56,220,208
2925.byte 102,15,56,220,216
2926.byte 102,15,56,220,224
2927.byte 102,15,56,220,232
2928 movups 64(%r11),%xmm0
2929 jmp .Locb_enc_loop4
2930
2931.align 32
2932.Locb_enc_loop4:
2933.byte 102,15,56,220,209
2934.byte 102,15,56,220,217
2935.byte 102,15,56,220,225
2936.byte 102,15,56,220,233
2937 movups (%rcx,%rax,1),%xmm1
2938 addq $32,%rax
2939
2940.byte 102,15,56,220,208
2941.byte 102,15,56,220,216
2942.byte 102,15,56,220,224
2943.byte 102,15,56,220,232
2944 movups -16(%rcx,%rax,1),%xmm0
2945 jnz .Locb_enc_loop4
2946
2947.byte 102,15,56,220,209
2948.byte 102,15,56,220,217
2949.byte 102,15,56,220,225
2950.byte 102,15,56,220,233
2951 movups 16(%r11),%xmm1
2952 movq %r10,%rax
2953
2954.byte 102,65,15,56,221,210
2955.byte 102,65,15,56,221,219
2956.byte 102,65,15,56,221,228
2957.byte 102,65,15,56,221,237
2958 .byte 0xf3,0xc3
2959.size __ocb_encrypt4,.-__ocb_encrypt4
2960
2961.type __ocb_encrypt1,@function
2962.align 32
2963__ocb_encrypt1:
2964 pxor %xmm15,%xmm7
2965 pxor %xmm9,%xmm7
2966 pxor %xmm2,%xmm8
2967 pxor %xmm7,%xmm2
2968 movups 32(%r11),%xmm0
2969
2970.byte 102,15,56,220,209
2971 movups 48(%r11),%xmm1
2972 pxor %xmm9,%xmm7
2973
2974.byte 102,15,56,220,208
2975 movups 64(%r11),%xmm0
2976 jmp .Locb_enc_loop1
2977
2978.align 32
2979.Locb_enc_loop1:
2980.byte 102,15,56,220,209
2981 movups (%rcx,%rax,1),%xmm1
2982 addq $32,%rax
2983
2984.byte 102,15,56,220,208
2985 movups -16(%rcx,%rax,1),%xmm0
2986 jnz .Locb_enc_loop1
2987
2988.byte 102,15,56,220,209
2989 movups 16(%r11),%xmm1
2990 movq %r10,%rax
2991
2992.byte 102,15,56,221,215
2993 .byte 0xf3,0xc3
2994.size __ocb_encrypt1,.-__ocb_encrypt1
2995
2996.globl aesni_ocb_decrypt
2997.type aesni_ocb_decrypt,@function
2998.align 32
2999aesni_ocb_decrypt:
3000.cfi_startproc
3001 leaq (%rsp),%rax
3002 pushq %rbx
3003.cfi_adjust_cfa_offset 8
3004.cfi_offset %rbx,-16
3005 pushq %rbp
3006.cfi_adjust_cfa_offset 8
3007.cfi_offset %rbp,-24
3008 pushq %r12
3009.cfi_adjust_cfa_offset 8
3010.cfi_offset %r12,-32
3011 pushq %r13
3012.cfi_adjust_cfa_offset 8
3013.cfi_offset %r13,-40
3014 pushq %r14
3015.cfi_adjust_cfa_offset 8
3016.cfi_offset %r14,-48
3017 movq 8(%rax),%rbx
3018 movq 8+8(%rax),%rbp
3019
3020 movl 240(%rcx),%r10d
3021 movq %rcx,%r11
3022 shll $4,%r10d
3023 movups (%rcx),%xmm9
3024 movups 16(%rcx,%r10,1),%xmm1
3025
3026 movdqu (%r9),%xmm15
3027 pxor %xmm1,%xmm9
3028 pxor %xmm1,%xmm15
3029
3030 movl $16+32,%eax
3031 leaq 32(%r11,%r10,1),%rcx
3032 movups 16(%r11),%xmm1
3033 subq %r10,%rax
3034 movq %rax,%r10
3035
3036 movdqu (%rbx),%xmm10
3037 movdqu (%rbp),%xmm8
3038
3039 testq $1,%r8
3040 jnz .Locb_dec_odd
3041
3042 bsfq %r8,%r12
3043 addq $1,%r8
3044 shlq $4,%r12
3045 movdqu (%rbx,%r12,1),%xmm7
3046 movdqu (%rdi),%xmm2
3047 leaq 16(%rdi),%rdi
3048
3049 call __ocb_decrypt1
3050
3051 movdqa %xmm7,%xmm15
3052 movups %xmm2,(%rsi)
3053 xorps %xmm2,%xmm8
3054 leaq 16(%rsi),%rsi
3055 subq $1,%rdx
3056 jz .Locb_dec_done
3057
3058.Locb_dec_odd:
3059 leaq 1(%r8),%r12
3060 leaq 3(%r8),%r13
3061 leaq 5(%r8),%r14
3062 leaq 6(%r8),%r8
3063 bsfq %r12,%r12
3064 bsfq %r13,%r13
3065 bsfq %r14,%r14
3066 shlq $4,%r12
3067 shlq $4,%r13
3068 shlq $4,%r14
3069
3070 subq $6,%rdx
3071 jc .Locb_dec_short
3072 jmp .Locb_dec_grandloop
3073
3074.align 32
3075.Locb_dec_grandloop:
3076 movdqu 0(%rdi),%xmm2
3077 movdqu 16(%rdi),%xmm3
3078 movdqu 32(%rdi),%xmm4
3079 movdqu 48(%rdi),%xmm5
3080 movdqu 64(%rdi),%xmm6
3081 movdqu 80(%rdi),%xmm7
3082 leaq 96(%rdi),%rdi
3083
3084 call __ocb_decrypt6
3085
3086 movups %xmm2,0(%rsi)
3087 pxor %xmm2,%xmm8
3088 movups %xmm3,16(%rsi)
3089 pxor %xmm3,%xmm8
3090 movups %xmm4,32(%rsi)
3091 pxor %xmm4,%xmm8
3092 movups %xmm5,48(%rsi)
3093 pxor %xmm5,%xmm8
3094 movups %xmm6,64(%rsi)
3095 pxor %xmm6,%xmm8
3096 movups %xmm7,80(%rsi)
3097 pxor %xmm7,%xmm8
3098 leaq 96(%rsi),%rsi
3099 subq $6,%rdx
3100 jnc .Locb_dec_grandloop
3101
3102.Locb_dec_short:
3103 addq $6,%rdx
3104 jz .Locb_dec_done
3105
3106 movdqu 0(%rdi),%xmm2
3107 cmpq $2,%rdx
3108 jb .Locb_dec_one
3109 movdqu 16(%rdi),%xmm3
3110 je .Locb_dec_two
3111
3112 movdqu 32(%rdi),%xmm4
3113 cmpq $4,%rdx
3114 jb .Locb_dec_three
3115 movdqu 48(%rdi),%xmm5
3116 je .Locb_dec_four
3117
3118 movdqu 64(%rdi),%xmm6
3119 pxor %xmm7,%xmm7
3120
3121 call __ocb_decrypt6
3122
3123 movdqa %xmm14,%xmm15
3124 movups %xmm2,0(%rsi)
3125 pxor %xmm2,%xmm8
3126 movups %xmm3,16(%rsi)
3127 pxor %xmm3,%xmm8
3128 movups %xmm4,32(%rsi)
3129 pxor %xmm4,%xmm8
3130 movups %xmm5,48(%rsi)
3131 pxor %xmm5,%xmm8
3132 movups %xmm6,64(%rsi)
3133 pxor %xmm6,%xmm8
3134
3135 jmp .Locb_dec_done
3136
3137.align 16
3138.Locb_dec_one:
3139 movdqa %xmm10,%xmm7
3140
3141 call __ocb_decrypt1
3142
3143 movdqa %xmm7,%xmm15
3144 movups %xmm2,0(%rsi)
3145 xorps %xmm2,%xmm8
3146 jmp .Locb_dec_done
3147
3148.align 16
3149.Locb_dec_two:
3150 pxor %xmm4,%xmm4
3151 pxor %xmm5,%xmm5
3152
3153 call __ocb_decrypt4
3154
3155 movdqa %xmm11,%xmm15
3156 movups %xmm2,0(%rsi)
3157 xorps %xmm2,%xmm8
3158 movups %xmm3,16(%rsi)
3159 xorps %xmm3,%xmm8
3160
3161 jmp .Locb_dec_done
3162
3163.align 16
3164.Locb_dec_three:
3165 pxor %xmm5,%xmm5
3166
3167 call __ocb_decrypt4
3168
3169 movdqa %xmm12,%xmm15
3170 movups %xmm2,0(%rsi)
3171 xorps %xmm2,%xmm8
3172 movups %xmm3,16(%rsi)
3173 xorps %xmm3,%xmm8
3174 movups %xmm4,32(%rsi)
3175 xorps %xmm4,%xmm8
3176
3177 jmp .Locb_dec_done
3178
3179.align 16
3180.Locb_dec_four:
3181 call __ocb_decrypt4
3182
3183 movdqa %xmm13,%xmm15
3184 movups %xmm2,0(%rsi)
3185 pxor %xmm2,%xmm8
3186 movups %xmm3,16(%rsi)
3187 pxor %xmm3,%xmm8
3188 movups %xmm4,32(%rsi)
3189 pxor %xmm4,%xmm8
3190 movups %xmm5,48(%rsi)
3191 pxor %xmm5,%xmm8
3192
3193.Locb_dec_done:
3194 pxor %xmm0,%xmm15
3195 movdqu %xmm8,(%rbp)
3196 movdqu %xmm15,(%r9)
3197
3198 xorps %xmm0,%xmm0
3199 pxor %xmm1,%xmm1
3200 pxor %xmm2,%xmm2
3201 pxor %xmm3,%xmm3
3202 pxor %xmm4,%xmm4
3203 pxor %xmm5,%xmm5
3204 pxor %xmm6,%xmm6
3205 pxor %xmm7,%xmm7
3206 pxor %xmm8,%xmm8
3207 pxor %xmm9,%xmm9
3208 pxor %xmm10,%xmm10
3209 pxor %xmm11,%xmm11
3210 pxor %xmm12,%xmm12
3211 pxor %xmm13,%xmm13
3212 pxor %xmm14,%xmm14
3213 pxor %xmm15,%xmm15
3214 leaq 40(%rsp),%rax
3215.cfi_def_cfa %rax,8
3216 movq -40(%rax),%r14
3217.cfi_restore %r14
3218 movq -32(%rax),%r13
3219.cfi_restore %r13
3220 movq -24(%rax),%r12
3221.cfi_restore %r12
3222 movq -16(%rax),%rbp
3223.cfi_restore %rbp
3224 movq -8(%rax),%rbx
3225.cfi_restore %rbx
3226 leaq (%rax),%rsp
3227.cfi_def_cfa_register %rsp
3228.Locb_dec_epilogue:
3229 .byte 0xf3,0xc3
3230.cfi_endproc
3231.size aesni_ocb_decrypt,.-aesni_ocb_decrypt
3232
3233.type __ocb_decrypt6,@function
3234.align 32
3235__ocb_decrypt6:
3236 pxor %xmm9,%xmm15
3237 movdqu (%rbx,%r12,1),%xmm11
3238 movdqa %xmm10,%xmm12
3239 movdqu (%rbx,%r13,1),%xmm13
3240 movdqa %xmm10,%xmm14
3241 pxor %xmm15,%xmm10
3242 movdqu (%rbx,%r14,1),%xmm15
3243 pxor %xmm10,%xmm11
3244 pxor %xmm10,%xmm2
3245 pxor %xmm11,%xmm12
3246 pxor %xmm11,%xmm3
3247 pxor %xmm12,%xmm13
3248 pxor %xmm12,%xmm4
3249 pxor %xmm13,%xmm14
3250 pxor %xmm13,%xmm5
3251 pxor %xmm14,%xmm15
3252 pxor %xmm14,%xmm6
3253 pxor %xmm15,%xmm7
3254 movups 32(%r11),%xmm0
3255
3256 leaq 1(%r8),%r12
3257 leaq 3(%r8),%r13
3258 leaq 5(%r8),%r14
3259 addq $6,%r8
3260 pxor %xmm9,%xmm10
3261 bsfq %r12,%r12
3262 bsfq %r13,%r13
3263 bsfq %r14,%r14
3264
3265.byte 102,15,56,222,209
3266.byte 102,15,56,222,217
3267.byte 102,15,56,222,225
3268.byte 102,15,56,222,233
3269 pxor %xmm9,%xmm11
3270 pxor %xmm9,%xmm12
3271.byte 102,15,56,222,241
3272 pxor %xmm9,%xmm13
3273 pxor %xmm9,%xmm14
3274.byte 102,15,56,222,249
3275 movups 48(%r11),%xmm1
3276 pxor %xmm9,%xmm15
3277
3278.byte 102,15,56,222,208
3279.byte 102,15,56,222,216
3280.byte 102,15,56,222,224
3281.byte 102,15,56,222,232
3282.byte 102,15,56,222,240
3283.byte 102,15,56,222,248
3284 movups 64(%r11),%xmm0
3285 shlq $4,%r12
3286 shlq $4,%r13
3287 jmp .Locb_dec_loop6
3288
3289.align 32
3290.Locb_dec_loop6:
3291.byte 102,15,56,222,209
3292.byte 102,15,56,222,217
3293.byte 102,15,56,222,225
3294.byte 102,15,56,222,233
3295.byte 102,15,56,222,241
3296.byte 102,15,56,222,249
3297 movups (%rcx,%rax,1),%xmm1
3298 addq $32,%rax
3299
3300.byte 102,15,56,222,208
3301.byte 102,15,56,222,216
3302.byte 102,15,56,222,224
3303.byte 102,15,56,222,232
3304.byte 102,15,56,222,240
3305.byte 102,15,56,222,248
3306 movups -16(%rcx,%rax,1),%xmm0
3307 jnz .Locb_dec_loop6
3308
3309.byte 102,15,56,222,209
3310.byte 102,15,56,222,217
3311.byte 102,15,56,222,225
3312.byte 102,15,56,222,233
3313.byte 102,15,56,222,241
3314.byte 102,15,56,222,249
3315 movups 16(%r11),%xmm1
3316 shlq $4,%r14
3317
3318.byte 102,65,15,56,223,210
3319 movdqu (%rbx),%xmm10
3320 movq %r10,%rax
3321.byte 102,65,15,56,223,219
3322.byte 102,65,15,56,223,228
3323.byte 102,65,15,56,223,237
3324.byte 102,65,15,56,223,246
3325.byte 102,65,15,56,223,255
3326 .byte 0xf3,0xc3
3327.size __ocb_decrypt6,.-__ocb_decrypt6
3328
3329.type __ocb_decrypt4,@function
3330.align 32
3331__ocb_decrypt4:
3332 pxor %xmm9,%xmm15
3333 movdqu (%rbx,%r12,1),%xmm11
3334 movdqa %xmm10,%xmm12
3335 movdqu (%rbx,%r13,1),%xmm13
3336 pxor %xmm15,%xmm10
3337 pxor %xmm10,%xmm11
3338 pxor %xmm10,%xmm2
3339 pxor %xmm11,%xmm12
3340 pxor %xmm11,%xmm3
3341 pxor %xmm12,%xmm13
3342 pxor %xmm12,%xmm4
3343 pxor %xmm13,%xmm5
3344 movups 32(%r11),%xmm0
3345
3346 pxor %xmm9,%xmm10
3347 pxor %xmm9,%xmm11
3348 pxor %xmm9,%xmm12
3349 pxor %xmm9,%xmm13
3350
3351.byte 102,15,56,222,209
3352.byte 102,15,56,222,217
3353.byte 102,15,56,222,225
3354.byte 102,15,56,222,233
3355 movups 48(%r11),%xmm1
3356
3357.byte 102,15,56,222,208
3358.byte 102,15,56,222,216
3359.byte 102,15,56,222,224
3360.byte 102,15,56,222,232
3361 movups 64(%r11),%xmm0
3362 jmp .Locb_dec_loop4
3363
3364.align 32
3365.Locb_dec_loop4:
3366.byte 102,15,56,222,209
3367.byte 102,15,56,222,217
3368.byte 102,15,56,222,225
3369.byte 102,15,56,222,233
3370 movups (%rcx,%rax,1),%xmm1
3371 addq $32,%rax
3372
3373.byte 102,15,56,222,208
3374.byte 102,15,56,222,216
3375.byte 102,15,56,222,224
3376.byte 102,15,56,222,232
3377 movups -16(%rcx,%rax,1),%xmm0
3378 jnz .Locb_dec_loop4
3379
3380.byte 102,15,56,222,209
3381.byte 102,15,56,222,217
3382.byte 102,15,56,222,225
3383.byte 102,15,56,222,233
3384 movups 16(%r11),%xmm1
3385 movq %r10,%rax
3386
3387.byte 102,65,15,56,223,210
3388.byte 102,65,15,56,223,219
3389.byte 102,65,15,56,223,228
3390.byte 102,65,15,56,223,237
3391 .byte 0xf3,0xc3
3392.size __ocb_decrypt4,.-__ocb_decrypt4
3393
3394.type __ocb_decrypt1,@function
3395.align 32
3396__ocb_decrypt1:
3397 pxor %xmm15,%xmm7
3398 pxor %xmm9,%xmm7
3399 pxor %xmm7,%xmm2
3400 movups 32(%r11),%xmm0
3401
3402.byte 102,15,56,222,209
3403 movups 48(%r11),%xmm1
3404 pxor %xmm9,%xmm7
3405
3406.byte 102,15,56,222,208
3407 movups 64(%r11),%xmm0
3408 jmp .Locb_dec_loop1
3409
3410.align 32
3411.Locb_dec_loop1:
3412.byte 102,15,56,222,209
3413 movups (%rcx,%rax,1),%xmm1
3414 addq $32,%rax
3415
3416.byte 102,15,56,222,208
3417 movups -16(%rcx,%rax,1),%xmm0
3418 jnz .Locb_dec_loop1
3419
3420.byte 102,15,56,222,209
3421 movups 16(%r11),%xmm1
3422 movq %r10,%rax
3423
3424.byte 102,15,56,223,215
3425 .byte 0xf3,0xc3
3426.size __ocb_decrypt1,.-__ocb_decrypt1
3427.globl aesni_cbc_encrypt
3428.type aesni_cbc_encrypt,@function
3429.align 16
3430aesni_cbc_encrypt:
3431.cfi_startproc
3432 testq %rdx,%rdx
3433 jz .Lcbc_ret
3434
3435 movl 240(%rcx),%r10d
3436 movq %rcx,%r11
3437 testl %r9d,%r9d
3438 jz .Lcbc_decrypt
3439
3440 movups (%r8),%xmm2
3441 movl %r10d,%eax
3442 cmpq $16,%rdx
3443 jb .Lcbc_enc_tail
3444 subq $16,%rdx
3445 jmp .Lcbc_enc_loop
3446.align 16
3447.Lcbc_enc_loop:
3448 movups (%rdi),%xmm3
3449 leaq 16(%rdi),%rdi
3450
3451 movups (%rcx),%xmm0
3452 movups 16(%rcx),%xmm1
3453 xorps %xmm0,%xmm3
3454 leaq 32(%rcx),%rcx
3455 xorps %xmm3,%xmm2
3456.Loop_enc1_15:
3457.byte 102,15,56,220,209
3458 decl %eax
3459 movups (%rcx),%xmm1
3460 leaq 16(%rcx),%rcx
3461 jnz .Loop_enc1_15
3462.byte 102,15,56,221,209
3463 movl %r10d,%eax
3464 movq %r11,%rcx
3465 movups %xmm2,0(%rsi)
3466 leaq 16(%rsi),%rsi
3467 subq $16,%rdx
3468 jnc .Lcbc_enc_loop
3469 addq $16,%rdx
3470 jnz .Lcbc_enc_tail
3471 pxor %xmm0,%xmm0
3472 pxor %xmm1,%xmm1
3473 movups %xmm2,(%r8)
3474 pxor %xmm2,%xmm2
3475 pxor %xmm3,%xmm3
3476 jmp .Lcbc_ret
3477
3478.Lcbc_enc_tail:
3479 movq %rdx,%rcx
3480 xchgq %rdi,%rsi
3481.long 0x9066A4F3
3482 movl $16,%ecx
3483 subq %rdx,%rcx
3484 xorl %eax,%eax
3485.long 0x9066AAF3
3486 leaq -16(%rdi),%rdi
3487 movl %r10d,%eax
3488 movq %rdi,%rsi
3489 movq %r11,%rcx
3490 xorq %rdx,%rdx
3491 jmp .Lcbc_enc_loop
3492
3493.align 16
3494.Lcbc_decrypt:
3495 cmpq $16,%rdx
3496 jne .Lcbc_decrypt_bulk
3497
3498
3499
3500 movdqu (%rdi),%xmm2
3501 movdqu (%r8),%xmm3
3502 movdqa %xmm2,%xmm4
3503 movups (%rcx),%xmm0
3504 movups 16(%rcx),%xmm1
3505 leaq 32(%rcx),%rcx
3506 xorps %xmm0,%xmm2
3507.Loop_dec1_16:
3508.byte 102,15,56,222,209
3509 decl %r10d
3510 movups (%rcx),%xmm1
3511 leaq 16(%rcx),%rcx
3512 jnz .Loop_dec1_16
3513.byte 102,15,56,223,209
3514 pxor %xmm0,%xmm0
3515 pxor %xmm1,%xmm1
3516 movdqu %xmm4,(%r8)
3517 xorps %xmm3,%xmm2
3518 pxor %xmm3,%xmm3
3519 movups %xmm2,(%rsi)
3520 pxor %xmm2,%xmm2
3521 jmp .Lcbc_ret
3522.align 16
3523.Lcbc_decrypt_bulk:
3524 leaq (%rsp),%r11
3525.cfi_def_cfa_register %r11
3526 pushq %rbp
3527.cfi_offset %rbp,-16
3528 subq $16,%rsp
3529 andq $-16,%rsp
3530 movq %rcx,%rbp
3531 movups (%r8),%xmm10
3532 movl %r10d,%eax
3533 cmpq $0x50,%rdx
3534 jbe .Lcbc_dec_tail
3535
3536 movups (%rcx),%xmm0
3537 movdqu 0(%rdi),%xmm2
3538 movdqu 16(%rdi),%xmm3
3539 movdqa %xmm2,%xmm11
3540 movdqu 32(%rdi),%xmm4
3541 movdqa %xmm3,%xmm12
3542 movdqu 48(%rdi),%xmm5
3543 movdqa %xmm4,%xmm13
3544 movdqu 64(%rdi),%xmm6
3545 movdqa %xmm5,%xmm14
3546 movdqu 80(%rdi),%xmm7
3547 movdqa %xmm6,%xmm15
3548 movl OPENSSL_ia32cap_P+4(%rip),%r9d
3549 cmpq $0x70,%rdx
3550 jbe .Lcbc_dec_six_or_seven
3551
3552 andl $71303168,%r9d
3553 subq $0x50,%rdx
3554 cmpl $4194304,%r9d
3555 je .Lcbc_dec_loop6_enter
3556 subq $0x20,%rdx
3557 leaq 112(%rcx),%rcx
3558 jmp .Lcbc_dec_loop8_enter
3559.align 16
3560.Lcbc_dec_loop8:
3561 movups %xmm9,(%rsi)
3562 leaq 16(%rsi),%rsi
3563.Lcbc_dec_loop8_enter:
3564 movdqu 96(%rdi),%xmm8
3565 pxor %xmm0,%xmm2
3566 movdqu 112(%rdi),%xmm9
3567 pxor %xmm0,%xmm3
3568 movups 16-112(%rcx),%xmm1
3569 pxor %xmm0,%xmm4
3570 movq $-1,%rbp
3571 cmpq $0x70,%rdx
3572 pxor %xmm0,%xmm5
3573 pxor %xmm0,%xmm6
3574 pxor %xmm0,%xmm7
3575 pxor %xmm0,%xmm8
3576
3577.byte 102,15,56,222,209
3578 pxor %xmm0,%xmm9
3579 movups 32-112(%rcx),%xmm0
3580.byte 102,15,56,222,217
3581.byte 102,15,56,222,225
3582.byte 102,15,56,222,233
3583.byte 102,15,56,222,241
3584.byte 102,15,56,222,249
3585.byte 102,68,15,56,222,193
3586 adcq $0,%rbp
3587 andq $128,%rbp
3588.byte 102,68,15,56,222,201
3589 addq %rdi,%rbp
3590 movups 48-112(%rcx),%xmm1
3591.byte 102,15,56,222,208
3592.byte 102,15,56,222,216
3593.byte 102,15,56,222,224
3594.byte 102,15,56,222,232
3595.byte 102,15,56,222,240
3596.byte 102,15,56,222,248
3597.byte 102,68,15,56,222,192
3598.byte 102,68,15,56,222,200
3599 movups 64-112(%rcx),%xmm0
3600 nop
3601.byte 102,15,56,222,209
3602.byte 102,15,56,222,217
3603.byte 102,15,56,222,225
3604.byte 102,15,56,222,233
3605.byte 102,15,56,222,241
3606.byte 102,15,56,222,249
3607.byte 102,68,15,56,222,193
3608.byte 102,68,15,56,222,201
3609 movups 80-112(%rcx),%xmm1
3610 nop
3611.byte 102,15,56,222,208
3612.byte 102,15,56,222,216
3613.byte 102,15,56,222,224
3614.byte 102,15,56,222,232
3615.byte 102,15,56,222,240
3616.byte 102,15,56,222,248
3617.byte 102,68,15,56,222,192
3618.byte 102,68,15,56,222,200
3619 movups 96-112(%rcx),%xmm0
3620 nop
3621.byte 102,15,56,222,209
3622.byte 102,15,56,222,217
3623.byte 102,15,56,222,225
3624.byte 102,15,56,222,233
3625.byte 102,15,56,222,241
3626.byte 102,15,56,222,249
3627.byte 102,68,15,56,222,193
3628.byte 102,68,15,56,222,201
3629 movups 112-112(%rcx),%xmm1
3630 nop
3631.byte 102,15,56,222,208
3632.byte 102,15,56,222,216
3633.byte 102,15,56,222,224
3634.byte 102,15,56,222,232
3635.byte 102,15,56,222,240
3636.byte 102,15,56,222,248
3637.byte 102,68,15,56,222,192
3638.byte 102,68,15,56,222,200
3639 movups 128-112(%rcx),%xmm0
3640 nop
3641.byte 102,15,56,222,209
3642.byte 102,15,56,222,217
3643.byte 102,15,56,222,225
3644.byte 102,15,56,222,233
3645.byte 102,15,56,222,241
3646.byte 102,15,56,222,249
3647.byte 102,68,15,56,222,193
3648.byte 102,68,15,56,222,201
3649 movups 144-112(%rcx),%xmm1
3650 cmpl $11,%eax
3651.byte 102,15,56,222,208
3652.byte 102,15,56,222,216
3653.byte 102,15,56,222,224
3654.byte 102,15,56,222,232
3655.byte 102,15,56,222,240
3656.byte 102,15,56,222,248
3657.byte 102,68,15,56,222,192
3658.byte 102,68,15,56,222,200
3659 movups 160-112(%rcx),%xmm0
3660 jb .Lcbc_dec_done
3661.byte 102,15,56,222,209
3662.byte 102,15,56,222,217
3663.byte 102,15,56,222,225
3664.byte 102,15,56,222,233
3665.byte 102,15,56,222,241
3666.byte 102,15,56,222,249
3667.byte 102,68,15,56,222,193
3668.byte 102,68,15,56,222,201
3669 movups 176-112(%rcx),%xmm1
3670 nop
3671.byte 102,15,56,222,208
3672.byte 102,15,56,222,216
3673.byte 102,15,56,222,224
3674.byte 102,15,56,222,232
3675.byte 102,15,56,222,240
3676.byte 102,15,56,222,248
3677.byte 102,68,15,56,222,192
3678.byte 102,68,15,56,222,200
3679 movups 192-112(%rcx),%xmm0
3680 je .Lcbc_dec_done
3681.byte 102,15,56,222,209
3682.byte 102,15,56,222,217
3683.byte 102,15,56,222,225
3684.byte 102,15,56,222,233
3685.byte 102,15,56,222,241
3686.byte 102,15,56,222,249
3687.byte 102,68,15,56,222,193
3688.byte 102,68,15,56,222,201
3689 movups 208-112(%rcx),%xmm1
3690 nop
3691.byte 102,15,56,222,208
3692.byte 102,15,56,222,216
3693.byte 102,15,56,222,224
3694.byte 102,15,56,222,232
3695.byte 102,15,56,222,240
3696.byte 102,15,56,222,248
3697.byte 102,68,15,56,222,192
3698.byte 102,68,15,56,222,200
3699 movups 224-112(%rcx),%xmm0
3700 jmp .Lcbc_dec_done
3701.align 16
3702.Lcbc_dec_done:
3703.byte 102,15,56,222,209
3704.byte 102,15,56,222,217
3705 pxor %xmm0,%xmm10
3706 pxor %xmm0,%xmm11
3707.byte 102,15,56,222,225
3708.byte 102,15,56,222,233
3709 pxor %xmm0,%xmm12
3710 pxor %xmm0,%xmm13
3711.byte 102,15,56,222,241
3712.byte 102,15,56,222,249
3713 pxor %xmm0,%xmm14
3714 pxor %xmm0,%xmm15
3715.byte 102,68,15,56,222,193
3716.byte 102,68,15,56,222,201
3717 movdqu 80(%rdi),%xmm1
3718
3719.byte 102,65,15,56,223,210
3720 movdqu 96(%rdi),%xmm10
3721 pxor %xmm0,%xmm1
3722.byte 102,65,15,56,223,219
3723 pxor %xmm0,%xmm10
3724 movdqu 112(%rdi),%xmm0
3725.byte 102,65,15,56,223,228
3726 leaq 128(%rdi),%rdi
3727 movdqu 0(%rbp),%xmm11
3728.byte 102,65,15,56,223,237
3729.byte 102,65,15,56,223,246
3730 movdqu 16(%rbp),%xmm12
3731 movdqu 32(%rbp),%xmm13
3732.byte 102,65,15,56,223,255
3733.byte 102,68,15,56,223,193
3734 movdqu 48(%rbp),%xmm14
3735 movdqu 64(%rbp),%xmm15
3736.byte 102,69,15,56,223,202
3737 movdqa %xmm0,%xmm10
3738 movdqu 80(%rbp),%xmm1
3739 movups -112(%rcx),%xmm0
3740
3741 movups %xmm2,(%rsi)
3742 movdqa %xmm11,%xmm2
3743 movups %xmm3,16(%rsi)
3744 movdqa %xmm12,%xmm3
3745 movups %xmm4,32(%rsi)
3746 movdqa %xmm13,%xmm4
3747 movups %xmm5,48(%rsi)
3748 movdqa %xmm14,%xmm5
3749 movups %xmm6,64(%rsi)
3750 movdqa %xmm15,%xmm6
3751 movups %xmm7,80(%rsi)
3752 movdqa %xmm1,%xmm7
3753 movups %xmm8,96(%rsi)
3754 leaq 112(%rsi),%rsi
3755
3756 subq $0x80,%rdx
3757 ja .Lcbc_dec_loop8
3758
3759 movaps %xmm9,%xmm2
3760 leaq -112(%rcx),%rcx
3761 addq $0x70,%rdx
3762 jle .Lcbc_dec_clear_tail_collected
3763 movups %xmm9,(%rsi)
3764 leaq 16(%rsi),%rsi
3765 cmpq $0x50,%rdx
3766 jbe .Lcbc_dec_tail
3767
3768 movaps %xmm11,%xmm2
3769.Lcbc_dec_six_or_seven:
3770 cmpq $0x60,%rdx
3771 ja .Lcbc_dec_seven
3772
3773 movaps %xmm7,%xmm8
3774 call _aesni_decrypt6
3775 pxor %xmm10,%xmm2
3776 movaps %xmm8,%xmm10
3777 pxor %xmm11,%xmm3
3778 movdqu %xmm2,(%rsi)
3779 pxor %xmm12,%xmm4
3780 movdqu %xmm3,16(%rsi)
3781 pxor %xmm3,%xmm3
3782 pxor %xmm13,%xmm5
3783 movdqu %xmm4,32(%rsi)
3784 pxor %xmm4,%xmm4
3785 pxor %xmm14,%xmm6
3786 movdqu %xmm5,48(%rsi)
3787 pxor %xmm5,%xmm5
3788 pxor %xmm15,%xmm7
3789 movdqu %xmm6,64(%rsi)
3790 pxor %xmm6,%xmm6
3791 leaq 80(%rsi),%rsi
3792 movdqa %xmm7,%xmm2
3793 pxor %xmm7,%xmm7
3794 jmp .Lcbc_dec_tail_collected
3795
3796.align 16
3797.Lcbc_dec_seven:
3798 movups 96(%rdi),%xmm8
3799 xorps %xmm9,%xmm9
3800 call _aesni_decrypt8
3801 movups 80(%rdi),%xmm9
3802 pxor %xmm10,%xmm2
3803 movups 96(%rdi),%xmm10
3804 pxor %xmm11,%xmm3
3805 movdqu %xmm2,(%rsi)
3806 pxor %xmm12,%xmm4
3807 movdqu %xmm3,16(%rsi)
3808 pxor %xmm3,%xmm3
3809 pxor %xmm13,%xmm5
3810 movdqu %xmm4,32(%rsi)
3811 pxor %xmm4,%xmm4
3812 pxor %xmm14,%xmm6
3813 movdqu %xmm5,48(%rsi)
3814 pxor %xmm5,%xmm5
3815 pxor %xmm15,%xmm7
3816 movdqu %xmm6,64(%rsi)
3817 pxor %xmm6,%xmm6
3818 pxor %xmm9,%xmm8
3819 movdqu %xmm7,80(%rsi)
3820 pxor %xmm7,%xmm7
3821 leaq 96(%rsi),%rsi
3822 movdqa %xmm8,%xmm2
3823 pxor %xmm8,%xmm8
3824 pxor %xmm9,%xmm9
3825 jmp .Lcbc_dec_tail_collected
3826
3827.align 16
3828.Lcbc_dec_loop6:
3829 movups %xmm7,(%rsi)
3830 leaq 16(%rsi),%rsi
3831 movdqu 0(%rdi),%xmm2
3832 movdqu 16(%rdi),%xmm3
3833 movdqa %xmm2,%xmm11
3834 movdqu 32(%rdi),%xmm4
3835 movdqa %xmm3,%xmm12
3836 movdqu 48(%rdi),%xmm5
3837 movdqa %xmm4,%xmm13
3838 movdqu 64(%rdi),%xmm6
3839 movdqa %xmm5,%xmm14
3840 movdqu 80(%rdi),%xmm7
3841 movdqa %xmm6,%xmm15
3842.Lcbc_dec_loop6_enter:
3843 leaq 96(%rdi),%rdi
3844 movdqa %xmm7,%xmm8
3845
3846 call _aesni_decrypt6
3847
3848 pxor %xmm10,%xmm2
3849 movdqa %xmm8,%xmm10
3850 pxor %xmm11,%xmm3
3851 movdqu %xmm2,(%rsi)
3852 pxor %xmm12,%xmm4
3853 movdqu %xmm3,16(%rsi)
3854 pxor %xmm13,%xmm5
3855 movdqu %xmm4,32(%rsi)
3856 pxor %xmm14,%xmm6
3857 movq %rbp,%rcx
3858 movdqu %xmm5,48(%rsi)
3859 pxor %xmm15,%xmm7
3860 movl %r10d,%eax
3861 movdqu %xmm6,64(%rsi)
3862 leaq 80(%rsi),%rsi
3863 subq $0x60,%rdx
3864 ja .Lcbc_dec_loop6
3865
3866 movdqa %xmm7,%xmm2
3867 addq $0x50,%rdx
3868 jle .Lcbc_dec_clear_tail_collected
3869 movups %xmm7,(%rsi)
3870 leaq 16(%rsi),%rsi
3871
3872.Lcbc_dec_tail:
3873 movups (%rdi),%xmm2
3874 subq $0x10,%rdx
3875 jbe .Lcbc_dec_one
3876
3877 movups 16(%rdi),%xmm3
3878 movaps %xmm2,%xmm11
3879 subq $0x10,%rdx
3880 jbe .Lcbc_dec_two
3881
3882 movups 32(%rdi),%xmm4
3883 movaps %xmm3,%xmm12
3884 subq $0x10,%rdx
3885 jbe .Lcbc_dec_three
3886
3887 movups 48(%rdi),%xmm5
3888 movaps %xmm4,%xmm13
3889 subq $0x10,%rdx
3890 jbe .Lcbc_dec_four
3891
3892 movups 64(%rdi),%xmm6
3893 movaps %xmm5,%xmm14
3894 movaps %xmm6,%xmm15
3895 xorps %xmm7,%xmm7
3896 call _aesni_decrypt6
3897 pxor %xmm10,%xmm2
3898 movaps %xmm15,%xmm10
3899 pxor %xmm11,%xmm3
3900 movdqu %xmm2,(%rsi)
3901 pxor %xmm12,%xmm4
3902 movdqu %xmm3,16(%rsi)
3903 pxor %xmm3,%xmm3
3904 pxor %xmm13,%xmm5
3905 movdqu %xmm4,32(%rsi)
3906 pxor %xmm4,%xmm4
3907 pxor %xmm14,%xmm6
3908 movdqu %xmm5,48(%rsi)
3909 pxor %xmm5,%xmm5
3910 leaq 64(%rsi),%rsi
3911 movdqa %xmm6,%xmm2
3912 pxor %xmm6,%xmm6
3913 pxor %xmm7,%xmm7
3914 subq $0x10,%rdx
3915 jmp .Lcbc_dec_tail_collected
3916
3917.align 16
3918.Lcbc_dec_one:
3919 movaps %xmm2,%xmm11
3920 movups (%rcx),%xmm0
3921 movups 16(%rcx),%xmm1
3922 leaq 32(%rcx),%rcx
3923 xorps %xmm0,%xmm2
3924.Loop_dec1_17:
3925.byte 102,15,56,222,209
3926 decl %eax
3927 movups (%rcx),%xmm1
3928 leaq 16(%rcx),%rcx
3929 jnz .Loop_dec1_17
3930.byte 102,15,56,223,209
3931 xorps %xmm10,%xmm2
3932 movaps %xmm11,%xmm10
3933 jmp .Lcbc_dec_tail_collected
3934.align 16
3935.Lcbc_dec_two:
3936 movaps %xmm3,%xmm12
3937 call _aesni_decrypt2
3938 pxor %xmm10,%xmm2
3939 movaps %xmm12,%xmm10
3940 pxor %xmm11,%xmm3
3941 movdqu %xmm2,(%rsi)
3942 movdqa %xmm3,%xmm2
3943 pxor %xmm3,%xmm3
3944 leaq 16(%rsi),%rsi
3945 jmp .Lcbc_dec_tail_collected
3946.align 16
3947.Lcbc_dec_three:
3948 movaps %xmm4,%xmm13
3949 call _aesni_decrypt3
3950 pxor %xmm10,%xmm2
3951 movaps %xmm13,%xmm10
3952 pxor %xmm11,%xmm3
3953 movdqu %xmm2,(%rsi)
3954 pxor %xmm12,%xmm4
3955 movdqu %xmm3,16(%rsi)
3956 pxor %xmm3,%xmm3
3957 movdqa %xmm4,%xmm2
3958 pxor %xmm4,%xmm4
3959 leaq 32(%rsi),%rsi
3960 jmp .Lcbc_dec_tail_collected
3961.align 16
3962.Lcbc_dec_four:
3963 movaps %xmm5,%xmm14
3964 call _aesni_decrypt4
3965 pxor %xmm10,%xmm2
3966 movaps %xmm14,%xmm10
3967 pxor %xmm11,%xmm3
3968 movdqu %xmm2,(%rsi)
3969 pxor %xmm12,%xmm4
3970 movdqu %xmm3,16(%rsi)
3971 pxor %xmm3,%xmm3
3972 pxor %xmm13,%xmm5
3973 movdqu %xmm4,32(%rsi)
3974 pxor %xmm4,%xmm4
3975 movdqa %xmm5,%xmm2
3976 pxor %xmm5,%xmm5
3977 leaq 48(%rsi),%rsi
3978 jmp .Lcbc_dec_tail_collected
3979
3980.align 16
3981.Lcbc_dec_clear_tail_collected:
3982 pxor %xmm3,%xmm3
3983 pxor %xmm4,%xmm4
3984 pxor %xmm5,%xmm5
3985 pxor %xmm6,%xmm6
3986 pxor %xmm7,%xmm7
3987 pxor %xmm8,%xmm8
3988 pxor %xmm9,%xmm9
3989.Lcbc_dec_tail_collected:
3990 movups %xmm10,(%r8)
3991 andq $15,%rdx
3992 jnz .Lcbc_dec_tail_partial
3993 movups %xmm2,(%rsi)
3994 pxor %xmm2,%xmm2
3995 jmp .Lcbc_dec_ret
3996.align 16
3997.Lcbc_dec_tail_partial:
3998 movaps %xmm2,(%rsp)
3999 pxor %xmm2,%xmm2
4000 movq $16,%rcx
4001 movq %rsi,%rdi
4002 subq %rdx,%rcx
4003 leaq (%rsp),%rsi
4004.long 0x9066A4F3
4005 movdqa %xmm2,(%rsp)
4006
4007.Lcbc_dec_ret:
4008 xorps %xmm0,%xmm0
4009 pxor %xmm1,%xmm1
4010 movq -8(%r11),%rbp
4011.cfi_restore %rbp
4012 leaq (%r11),%rsp
4013.cfi_def_cfa_register %rsp
4014.Lcbc_ret:
4015 .byte 0xf3,0xc3
4016.cfi_endproc
4017.size aesni_cbc_encrypt,.-aesni_cbc_encrypt
4018.globl aesni_set_decrypt_key
4019.type aesni_set_decrypt_key,@function
4020.align 16
4021aesni_set_decrypt_key:
4022.cfi_startproc
4023.byte 0x48,0x83,0xEC,0x08
4024.cfi_adjust_cfa_offset 8
4025 call __aesni_set_encrypt_key
4026 shll $4,%esi
4027 testl %eax,%eax
4028 jnz .Ldec_key_ret
4029 leaq 16(%rdx,%rsi,1),%rdi
4030
4031 movups (%rdx),%xmm0
4032 movups (%rdi),%xmm1
4033 movups %xmm0,(%rdi)
4034 movups %xmm1,(%rdx)
4035 leaq 16(%rdx),%rdx
4036 leaq -16(%rdi),%rdi
4037
4038.Ldec_key_inverse:
4039 movups (%rdx),%xmm0
4040 movups (%rdi),%xmm1
4041.byte 102,15,56,219,192
4042.byte 102,15,56,219,201
4043 leaq 16(%rdx),%rdx
4044 leaq -16(%rdi),%rdi
4045 movups %xmm0,16(%rdi)
4046 movups %xmm1,-16(%rdx)
4047 cmpq %rdx,%rdi
4048 ja .Ldec_key_inverse
4049
4050 movups (%rdx),%xmm0
4051.byte 102,15,56,219,192
4052 pxor %xmm1,%xmm1
4053 movups %xmm0,(%rdi)
4054 pxor %xmm0,%xmm0
4055.Ldec_key_ret:
4056 addq $8,%rsp
4057.cfi_adjust_cfa_offset -8
4058 .byte 0xf3,0xc3
4059.cfi_endproc
4060.LSEH_end_set_decrypt_key:
4061.size aesni_set_decrypt_key,.-aesni_set_decrypt_key
4062.globl aesni_set_encrypt_key
4063.type aesni_set_encrypt_key,@function
4064.align 16
4065aesni_set_encrypt_key:
4066__aesni_set_encrypt_key:
4067.cfi_startproc
4068.byte 0x48,0x83,0xEC,0x08
4069.cfi_adjust_cfa_offset 8
4070 movq $-1,%rax
4071 testq %rdi,%rdi
4072 jz .Lenc_key_ret
4073 testq %rdx,%rdx
4074 jz .Lenc_key_ret
4075
4076 movl $268437504,%r10d
4077 movups (%rdi),%xmm0
4078 xorps %xmm4,%xmm4
4079 andl OPENSSL_ia32cap_P+4(%rip),%r10d
4080 leaq 16(%rdx),%rax
4081 cmpl $256,%esi
4082 je .L14rounds
4083 cmpl $192,%esi
4084 je .L12rounds
4085 cmpl $128,%esi
4086 jne .Lbad_keybits
4087
4088.L10rounds:
4089 movl $9,%esi
4090 cmpl $268435456,%r10d
4091 je .L10rounds_alt
4092
4093 movups %xmm0,(%rdx)
4094.byte 102,15,58,223,200,1
4095 call .Lkey_expansion_128_cold
4096.byte 102,15,58,223,200,2
4097 call .Lkey_expansion_128
4098.byte 102,15,58,223,200,4
4099 call .Lkey_expansion_128
4100.byte 102,15,58,223,200,8
4101 call .Lkey_expansion_128
4102.byte 102,15,58,223,200,16
4103 call .Lkey_expansion_128
4104.byte 102,15,58,223,200,32
4105 call .Lkey_expansion_128
4106.byte 102,15,58,223,200,64
4107 call .Lkey_expansion_128
4108.byte 102,15,58,223,200,128
4109 call .Lkey_expansion_128
4110.byte 102,15,58,223,200,27
4111 call .Lkey_expansion_128
4112.byte 102,15,58,223,200,54
4113 call .Lkey_expansion_128
4114 movups %xmm0,(%rax)
4115 movl %esi,80(%rax)
4116 xorl %eax,%eax
4117 jmp .Lenc_key_ret
4118
4119.align 16
4120.L10rounds_alt:
4121 movdqa .Lkey_rotate(%rip),%xmm5
4122 movl $8,%r10d
4123 movdqa .Lkey_rcon1(%rip),%xmm4
4124 movdqa %xmm0,%xmm2
4125 movdqu %xmm0,(%rdx)
4126 jmp .Loop_key128
4127
4128.align 16
4129.Loop_key128:
4130.byte 102,15,56,0,197
4131.byte 102,15,56,221,196
4132 pslld $1,%xmm4
4133 leaq 16(%rax),%rax
4134
4135 movdqa %xmm2,%xmm3
4136 pslldq $4,%xmm2
4137 pxor %xmm2,%xmm3
4138 pslldq $4,%xmm2
4139 pxor %xmm2,%xmm3
4140 pslldq $4,%xmm2
4141 pxor %xmm3,%xmm2
4142
4143 pxor %xmm2,%xmm0
4144 movdqu %xmm0,-16(%rax)
4145 movdqa %xmm0,%xmm2
4146
4147 decl %r10d
4148 jnz .Loop_key128
4149
4150 movdqa .Lkey_rcon1b(%rip),%xmm4
4151
4152.byte 102,15,56,0,197
4153.byte 102,15,56,221,196
4154 pslld $1,%xmm4
4155
4156 movdqa %xmm2,%xmm3
4157 pslldq $4,%xmm2
4158 pxor %xmm2,%xmm3
4159 pslldq $4,%xmm2
4160 pxor %xmm2,%xmm3
4161 pslldq $4,%xmm2
4162 pxor %xmm3,%xmm2
4163
4164 pxor %xmm2,%xmm0
4165 movdqu %xmm0,(%rax)
4166
4167 movdqa %xmm0,%xmm2
4168.byte 102,15,56,0,197
4169.byte 102,15,56,221,196
4170
4171 movdqa %xmm2,%xmm3
4172 pslldq $4,%xmm2
4173 pxor %xmm2,%xmm3
4174 pslldq $4,%xmm2
4175 pxor %xmm2,%xmm3
4176 pslldq $4,%xmm2
4177 pxor %xmm3,%xmm2
4178
4179 pxor %xmm2,%xmm0
4180 movdqu %xmm0,16(%rax)
4181
4182 movl %esi,96(%rax)
4183 xorl %eax,%eax
4184 jmp .Lenc_key_ret
4185
4186.align 16
4187.L12rounds:
4188 movq 16(%rdi),%xmm2
4189 movl $11,%esi
4190 cmpl $268435456,%r10d
4191 je .L12rounds_alt
4192
4193 movups %xmm0,(%rdx)
4194.byte 102,15,58,223,202,1
4195 call .Lkey_expansion_192a_cold
4196.byte 102,15,58,223,202,2
4197 call .Lkey_expansion_192b
4198.byte 102,15,58,223,202,4
4199 call .Lkey_expansion_192a
4200.byte 102,15,58,223,202,8
4201 call .Lkey_expansion_192b
4202.byte 102,15,58,223,202,16
4203 call .Lkey_expansion_192a
4204.byte 102,15,58,223,202,32
4205 call .Lkey_expansion_192b
4206.byte 102,15,58,223,202,64
4207 call .Lkey_expansion_192a
4208.byte 102,15,58,223,202,128
4209 call .Lkey_expansion_192b
4210 movups %xmm0,(%rax)
4211 movl %esi,48(%rax)
4212 xorq %rax,%rax
4213 jmp .Lenc_key_ret
4214
4215.align 16
4216.L12rounds_alt:
4217 movdqa .Lkey_rotate192(%rip),%xmm5
4218 movdqa .Lkey_rcon1(%rip),%xmm4
4219 movl $8,%r10d
4220 movdqu %xmm0,(%rdx)
4221 jmp .Loop_key192
4222
4223.align 16
4224.Loop_key192:
4225 movq %xmm2,0(%rax)
4226 movdqa %xmm2,%xmm1
4227.byte 102,15,56,0,213
4228.byte 102,15,56,221,212
4229 pslld $1,%xmm4
4230 leaq 24(%rax),%rax
4231
4232 movdqa %xmm0,%xmm3
4233 pslldq $4,%xmm0
4234 pxor %xmm0,%xmm3
4235 pslldq $4,%xmm0
4236 pxor %xmm0,%xmm3
4237 pslldq $4,%xmm0
4238 pxor %xmm3,%xmm0
4239
4240 pshufd $0xff,%xmm0,%xmm3
4241 pxor %xmm1,%xmm3
4242 pslldq $4,%xmm1
4243 pxor %xmm1,%xmm3
4244
4245 pxor %xmm2,%xmm0
4246 pxor %xmm3,%xmm2
4247 movdqu %xmm0,-16(%rax)
4248
4249 decl %r10d
4250 jnz .Loop_key192
4251
4252 movl %esi,32(%rax)
4253 xorl %eax,%eax
4254 jmp .Lenc_key_ret
4255
4256.align 16
4257.L14rounds:
4258 movups 16(%rdi),%xmm2
4259 movl $13,%esi
4260 leaq 16(%rax),%rax
4261 cmpl $268435456,%r10d
4262 je .L14rounds_alt
4263
4264 movups %xmm0,(%rdx)
4265 movups %xmm2,16(%rdx)
4266.byte 102,15,58,223,202,1
4267 call .Lkey_expansion_256a_cold
4268.byte 102,15,58,223,200,1
4269 call .Lkey_expansion_256b
4270.byte 102,15,58,223,202,2
4271 call .Lkey_expansion_256a
4272.byte 102,15,58,223,200,2
4273 call .Lkey_expansion_256b
4274.byte 102,15,58,223,202,4
4275 call .Lkey_expansion_256a
4276.byte 102,15,58,223,200,4
4277 call .Lkey_expansion_256b
4278.byte 102,15,58,223,202,8
4279 call .Lkey_expansion_256a
4280.byte 102,15,58,223,200,8
4281 call .Lkey_expansion_256b
4282.byte 102,15,58,223,202,16
4283 call .Lkey_expansion_256a
4284.byte 102,15,58,223,200,16
4285 call .Lkey_expansion_256b
4286.byte 102,15,58,223,202,32
4287 call .Lkey_expansion_256a
4288.byte 102,15,58,223,200,32
4289 call .Lkey_expansion_256b
4290.byte 102,15,58,223,202,64
4291 call .Lkey_expansion_256a
4292 movups %xmm0,(%rax)
4293 movl %esi,16(%rax)
4294 xorq %rax,%rax
4295 jmp .Lenc_key_ret
4296
4297.align 16
4298.L14rounds_alt:
4299 movdqa .Lkey_rotate(%rip),%xmm5
4300 movdqa .Lkey_rcon1(%rip),%xmm4
4301 movl $7,%r10d
4302 movdqu %xmm0,0(%rdx)
4303 movdqa %xmm2,%xmm1
4304 movdqu %xmm2,16(%rdx)
4305 jmp .Loop_key256
4306
4307.align 16
4308.Loop_key256:
4309.byte 102,15,56,0,213
4310.byte 102,15,56,221,212
4311
4312 movdqa %xmm0,%xmm3
4313 pslldq $4,%xmm0
4314 pxor %xmm0,%xmm3
4315 pslldq $4,%xmm0
4316 pxor %xmm0,%xmm3
4317 pslldq $4,%xmm0
4318 pxor %xmm3,%xmm0
4319 pslld $1,%xmm4
4320
4321 pxor %xmm2,%xmm0
4322 movdqu %xmm0,(%rax)
4323
4324 decl %r10d
4325 jz .Ldone_key256
4326
4327 pshufd $0xff,%xmm0,%xmm2
4328 pxor %xmm3,%xmm3
4329.byte 102,15,56,221,211
4330
4331 movdqa %xmm1,%xmm3
4332 pslldq $4,%xmm1
4333 pxor %xmm1,%xmm3
4334 pslldq $4,%xmm1
4335 pxor %xmm1,%xmm3
4336 pslldq $4,%xmm1
4337 pxor %xmm3,%xmm1
4338
4339 pxor %xmm1,%xmm2
4340 movdqu %xmm2,16(%rax)
4341 leaq 32(%rax),%rax
4342 movdqa %xmm2,%xmm1
4343
4344 jmp .Loop_key256
4345
4346.Ldone_key256:
4347 movl %esi,16(%rax)
4348 xorl %eax,%eax
4349 jmp .Lenc_key_ret
4350
4351.align 16
4352.Lbad_keybits:
4353 movq $-2,%rax
4354.Lenc_key_ret:
4355 pxor %xmm0,%xmm0
4356 pxor %xmm1,%xmm1
4357 pxor %xmm2,%xmm2
4358 pxor %xmm3,%xmm3
4359 pxor %xmm4,%xmm4
4360 pxor %xmm5,%xmm5
4361 addq $8,%rsp
4362.cfi_adjust_cfa_offset -8
4363 .byte 0xf3,0xc3
4364.cfi_endproc
4365.LSEH_end_set_encrypt_key:
4366
4367.align 16
4368.Lkey_expansion_128:
4369 movups %xmm0,(%rax)
4370 leaq 16(%rax),%rax
4371.Lkey_expansion_128_cold:
4372 shufps $16,%xmm0,%xmm4
4373 xorps %xmm4,%xmm0
4374 shufps $140,%xmm0,%xmm4
4375 xorps %xmm4,%xmm0
4376 shufps $255,%xmm1,%xmm1
4377 xorps %xmm1,%xmm0
4378 .byte 0xf3,0xc3
4379
4380.align 16
4381.Lkey_expansion_192a:
4382 movups %xmm0,(%rax)
4383 leaq 16(%rax),%rax
4384.Lkey_expansion_192a_cold:
4385 movaps %xmm2,%xmm5
4386.Lkey_expansion_192b_warm:
4387 shufps $16,%xmm0,%xmm4
4388 movdqa %xmm2,%xmm3
4389 xorps %xmm4,%xmm0
4390 shufps $140,%xmm0,%xmm4
4391 pslldq $4,%xmm3
4392 xorps %xmm4,%xmm0
4393 pshufd $85,%xmm1,%xmm1
4394 pxor %xmm3,%xmm2
4395 pxor %xmm1,%xmm0
4396 pshufd $255,%xmm0,%xmm3
4397 pxor %xmm3,%xmm2
4398 .byte 0xf3,0xc3
4399
4400.align 16
4401.Lkey_expansion_192b:
4402 movaps %xmm0,%xmm3
4403 shufps $68,%xmm0,%xmm5
4404 movups %xmm5,(%rax)
4405 shufps $78,%xmm2,%xmm3
4406 movups %xmm3,16(%rax)
4407 leaq 32(%rax),%rax
4408 jmp .Lkey_expansion_192b_warm
4409
4410.align 16
4411.Lkey_expansion_256a:
4412 movups %xmm2,(%rax)
4413 leaq 16(%rax),%rax
4414.Lkey_expansion_256a_cold:
4415 shufps $16,%xmm0,%xmm4
4416 xorps %xmm4,%xmm0
4417 shufps $140,%xmm0,%xmm4
4418 xorps %xmm4,%xmm0
4419 shufps $255,%xmm1,%xmm1
4420 xorps %xmm1,%xmm0
4421 .byte 0xf3,0xc3
4422
4423.align 16
4424.Lkey_expansion_256b:
4425 movups %xmm0,(%rax)
4426 leaq 16(%rax),%rax
4427
4428 shufps $16,%xmm2,%xmm4
4429 xorps %xmm4,%xmm2
4430 shufps $140,%xmm2,%xmm4
4431 xorps %xmm4,%xmm2
4432 shufps $170,%xmm1,%xmm1
4433 xorps %xmm1,%xmm2
4434 .byte 0xf3,0xc3
4435.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
4436.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4437.align 64
4438.Lbswap_mask:
4439.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4440.Lincrement32:
4441.long 6,6,6,0
4442.Lincrement64:
4443.long 1,0,0,0
4444.Lxts_magic:
4445.long 0x87,0,1,0
4446.Lincrement1:
4447.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4448.Lkey_rotate:
4449.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4450.Lkey_rotate192:
4451.long 0x04070605,0x04070605,0x04070605,0x04070605
4452.Lkey_rcon1:
4453.long 1,1,1,1
4454.Lkey_rcon1b:
4455.long 0x1b,0x1b,0x1b,0x1b
4456
4457.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4458.align 64
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette