VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.7/crypto/genasm-macosx/aesni-x86_64.S@ 98024

Last change on this file since 98024 was 94083, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: Recreate asm files, bugref:10128

File size: 80.3 KB
Line 
1.text
2
3.globl _aesni_encrypt
4
5.p2align 4
6_aesni_encrypt:
7
8.byte 243,15,30,250
9 movups (%rdi),%xmm2
10 movl 240(%rdx),%eax
11 movups (%rdx),%xmm0
12 movups 16(%rdx),%xmm1
13 leaq 32(%rdx),%rdx
14 xorps %xmm0,%xmm2
15L$oop_enc1_1:
16.byte 102,15,56,220,209
17 decl %eax
18 movups (%rdx),%xmm1
19 leaq 16(%rdx),%rdx
20 jnz L$oop_enc1_1
21.byte 102,15,56,221,209
22 pxor %xmm0,%xmm0
23 pxor %xmm1,%xmm1
24 movups %xmm2,(%rsi)
25 pxor %xmm2,%xmm2
26 .byte 0xf3,0xc3
27
28
29
30.globl _aesni_decrypt
31
32.p2align 4
33_aesni_decrypt:
34
35.byte 243,15,30,250
36 movups (%rdi),%xmm2
37 movl 240(%rdx),%eax
38 movups (%rdx),%xmm0
39 movups 16(%rdx),%xmm1
40 leaq 32(%rdx),%rdx
41 xorps %xmm0,%xmm2
42L$oop_dec1_2:
43.byte 102,15,56,222,209
44 decl %eax
45 movups (%rdx),%xmm1
46 leaq 16(%rdx),%rdx
47 jnz L$oop_dec1_2
48.byte 102,15,56,223,209
49 pxor %xmm0,%xmm0
50 pxor %xmm1,%xmm1
51 movups %xmm2,(%rsi)
52 pxor %xmm2,%xmm2
53 .byte 0xf3,0xc3
54
55
56
57.p2align 4
58_aesni_encrypt2:
59
60 movups (%rcx),%xmm0
61 shll $4,%eax
62 movups 16(%rcx),%xmm1
63 xorps %xmm0,%xmm2
64 xorps %xmm0,%xmm3
65 movups 32(%rcx),%xmm0
66 leaq 32(%rcx,%rax,1),%rcx
67 negq %rax
68 addq $16,%rax
69
70L$enc_loop2:
71.byte 102,15,56,220,209
72.byte 102,15,56,220,217
73 movups (%rcx,%rax,1),%xmm1
74 addq $32,%rax
75.byte 102,15,56,220,208
76.byte 102,15,56,220,216
77 movups -16(%rcx,%rax,1),%xmm0
78 jnz L$enc_loop2
79
80.byte 102,15,56,220,209
81.byte 102,15,56,220,217
82.byte 102,15,56,221,208
83.byte 102,15,56,221,216
84 .byte 0xf3,0xc3
85
86
87
88.p2align 4
89_aesni_decrypt2:
90
91 movups (%rcx),%xmm0
92 shll $4,%eax
93 movups 16(%rcx),%xmm1
94 xorps %xmm0,%xmm2
95 xorps %xmm0,%xmm3
96 movups 32(%rcx),%xmm0
97 leaq 32(%rcx,%rax,1),%rcx
98 negq %rax
99 addq $16,%rax
100
101L$dec_loop2:
102.byte 102,15,56,222,209
103.byte 102,15,56,222,217
104 movups (%rcx,%rax,1),%xmm1
105 addq $32,%rax
106.byte 102,15,56,222,208
107.byte 102,15,56,222,216
108 movups -16(%rcx,%rax,1),%xmm0
109 jnz L$dec_loop2
110
111.byte 102,15,56,222,209
112.byte 102,15,56,222,217
113.byte 102,15,56,223,208
114.byte 102,15,56,223,216
115 .byte 0xf3,0xc3
116
117
118
119.p2align 4
120_aesni_encrypt3:
121
122 movups (%rcx),%xmm0
123 shll $4,%eax
124 movups 16(%rcx),%xmm1
125 xorps %xmm0,%xmm2
126 xorps %xmm0,%xmm3
127 xorps %xmm0,%xmm4
128 movups 32(%rcx),%xmm0
129 leaq 32(%rcx,%rax,1),%rcx
130 negq %rax
131 addq $16,%rax
132
133L$enc_loop3:
134.byte 102,15,56,220,209
135.byte 102,15,56,220,217
136.byte 102,15,56,220,225
137 movups (%rcx,%rax,1),%xmm1
138 addq $32,%rax
139.byte 102,15,56,220,208
140.byte 102,15,56,220,216
141.byte 102,15,56,220,224
142 movups -16(%rcx,%rax,1),%xmm0
143 jnz L$enc_loop3
144
145.byte 102,15,56,220,209
146.byte 102,15,56,220,217
147.byte 102,15,56,220,225
148.byte 102,15,56,221,208
149.byte 102,15,56,221,216
150.byte 102,15,56,221,224
151 .byte 0xf3,0xc3
152
153
154
155.p2align 4
156_aesni_decrypt3:
157
158 movups (%rcx),%xmm0
159 shll $4,%eax
160 movups 16(%rcx),%xmm1
161 xorps %xmm0,%xmm2
162 xorps %xmm0,%xmm3
163 xorps %xmm0,%xmm4
164 movups 32(%rcx),%xmm0
165 leaq 32(%rcx,%rax,1),%rcx
166 negq %rax
167 addq $16,%rax
168
169L$dec_loop3:
170.byte 102,15,56,222,209
171.byte 102,15,56,222,217
172.byte 102,15,56,222,225
173 movups (%rcx,%rax,1),%xmm1
174 addq $32,%rax
175.byte 102,15,56,222,208
176.byte 102,15,56,222,216
177.byte 102,15,56,222,224
178 movups -16(%rcx,%rax,1),%xmm0
179 jnz L$dec_loop3
180
181.byte 102,15,56,222,209
182.byte 102,15,56,222,217
183.byte 102,15,56,222,225
184.byte 102,15,56,223,208
185.byte 102,15,56,223,216
186.byte 102,15,56,223,224
187 .byte 0xf3,0xc3
188
189
190
191.p2align 4
192_aesni_encrypt4:
193
194 movups (%rcx),%xmm0
195 shll $4,%eax
196 movups 16(%rcx),%xmm1
197 xorps %xmm0,%xmm2
198 xorps %xmm0,%xmm3
199 xorps %xmm0,%xmm4
200 xorps %xmm0,%xmm5
201 movups 32(%rcx),%xmm0
202 leaq 32(%rcx,%rax,1),%rcx
203 negq %rax
204.byte 0x0f,0x1f,0x00
205 addq $16,%rax
206
207L$enc_loop4:
208.byte 102,15,56,220,209
209.byte 102,15,56,220,217
210.byte 102,15,56,220,225
211.byte 102,15,56,220,233
212 movups (%rcx,%rax,1),%xmm1
213 addq $32,%rax
214.byte 102,15,56,220,208
215.byte 102,15,56,220,216
216.byte 102,15,56,220,224
217.byte 102,15,56,220,232
218 movups -16(%rcx,%rax,1),%xmm0
219 jnz L$enc_loop4
220
221.byte 102,15,56,220,209
222.byte 102,15,56,220,217
223.byte 102,15,56,220,225
224.byte 102,15,56,220,233
225.byte 102,15,56,221,208
226.byte 102,15,56,221,216
227.byte 102,15,56,221,224
228.byte 102,15,56,221,232
229 .byte 0xf3,0xc3
230
231
232
233.p2align 4
234_aesni_decrypt4:
235
236 movups (%rcx),%xmm0
237 shll $4,%eax
238 movups 16(%rcx),%xmm1
239 xorps %xmm0,%xmm2
240 xorps %xmm0,%xmm3
241 xorps %xmm0,%xmm4
242 xorps %xmm0,%xmm5
243 movups 32(%rcx),%xmm0
244 leaq 32(%rcx,%rax,1),%rcx
245 negq %rax
246.byte 0x0f,0x1f,0x00
247 addq $16,%rax
248
249L$dec_loop4:
250.byte 102,15,56,222,209
251.byte 102,15,56,222,217
252.byte 102,15,56,222,225
253.byte 102,15,56,222,233
254 movups (%rcx,%rax,1),%xmm1
255 addq $32,%rax
256.byte 102,15,56,222,208
257.byte 102,15,56,222,216
258.byte 102,15,56,222,224
259.byte 102,15,56,222,232
260 movups -16(%rcx,%rax,1),%xmm0
261 jnz L$dec_loop4
262
263.byte 102,15,56,222,209
264.byte 102,15,56,222,217
265.byte 102,15,56,222,225
266.byte 102,15,56,222,233
267.byte 102,15,56,223,208
268.byte 102,15,56,223,216
269.byte 102,15,56,223,224
270.byte 102,15,56,223,232
271 .byte 0xf3,0xc3
272
273
274
275.p2align 4
276_aesni_encrypt6:
277
278 movups (%rcx),%xmm0
279 shll $4,%eax
280 movups 16(%rcx),%xmm1
281 xorps %xmm0,%xmm2
282 pxor %xmm0,%xmm3
283 pxor %xmm0,%xmm4
284.byte 102,15,56,220,209
285 leaq 32(%rcx,%rax,1),%rcx
286 negq %rax
287.byte 102,15,56,220,217
288 pxor %xmm0,%xmm5
289 pxor %xmm0,%xmm6
290.byte 102,15,56,220,225
291 pxor %xmm0,%xmm7
292 movups (%rcx,%rax,1),%xmm0
293 addq $16,%rax
294 jmp L$enc_loop6_enter
295.p2align 4
296L$enc_loop6:
297.byte 102,15,56,220,209
298.byte 102,15,56,220,217
299.byte 102,15,56,220,225
300L$enc_loop6_enter:
301.byte 102,15,56,220,233
302.byte 102,15,56,220,241
303.byte 102,15,56,220,249
304 movups (%rcx,%rax,1),%xmm1
305 addq $32,%rax
306.byte 102,15,56,220,208
307.byte 102,15,56,220,216
308.byte 102,15,56,220,224
309.byte 102,15,56,220,232
310.byte 102,15,56,220,240
311.byte 102,15,56,220,248
312 movups -16(%rcx,%rax,1),%xmm0
313 jnz L$enc_loop6
314
315.byte 102,15,56,220,209
316.byte 102,15,56,220,217
317.byte 102,15,56,220,225
318.byte 102,15,56,220,233
319.byte 102,15,56,220,241
320.byte 102,15,56,220,249
321.byte 102,15,56,221,208
322.byte 102,15,56,221,216
323.byte 102,15,56,221,224
324.byte 102,15,56,221,232
325.byte 102,15,56,221,240
326.byte 102,15,56,221,248
327 .byte 0xf3,0xc3
328
329
330
331.p2align 4
332_aesni_decrypt6:
333
334 movups (%rcx),%xmm0
335 shll $4,%eax
336 movups 16(%rcx),%xmm1
337 xorps %xmm0,%xmm2
338 pxor %xmm0,%xmm3
339 pxor %xmm0,%xmm4
340.byte 102,15,56,222,209
341 leaq 32(%rcx,%rax,1),%rcx
342 negq %rax
343.byte 102,15,56,222,217
344 pxor %xmm0,%xmm5
345 pxor %xmm0,%xmm6
346.byte 102,15,56,222,225
347 pxor %xmm0,%xmm7
348 movups (%rcx,%rax,1),%xmm0
349 addq $16,%rax
350 jmp L$dec_loop6_enter
351.p2align 4
352L$dec_loop6:
353.byte 102,15,56,222,209
354.byte 102,15,56,222,217
355.byte 102,15,56,222,225
356L$dec_loop6_enter:
357.byte 102,15,56,222,233
358.byte 102,15,56,222,241
359.byte 102,15,56,222,249
360 movups (%rcx,%rax,1),%xmm1
361 addq $32,%rax
362.byte 102,15,56,222,208
363.byte 102,15,56,222,216
364.byte 102,15,56,222,224
365.byte 102,15,56,222,232
366.byte 102,15,56,222,240
367.byte 102,15,56,222,248
368 movups -16(%rcx,%rax,1),%xmm0
369 jnz L$dec_loop6
370
371.byte 102,15,56,222,209
372.byte 102,15,56,222,217
373.byte 102,15,56,222,225
374.byte 102,15,56,222,233
375.byte 102,15,56,222,241
376.byte 102,15,56,222,249
377.byte 102,15,56,223,208
378.byte 102,15,56,223,216
379.byte 102,15,56,223,224
380.byte 102,15,56,223,232
381.byte 102,15,56,223,240
382.byte 102,15,56,223,248
383 .byte 0xf3,0xc3
384
385
386
387.p2align 4
388_aesni_encrypt8:
389
390 movups (%rcx),%xmm0
391 shll $4,%eax
392 movups 16(%rcx),%xmm1
393 xorps %xmm0,%xmm2
394 xorps %xmm0,%xmm3
395 pxor %xmm0,%xmm4
396 pxor %xmm0,%xmm5
397 pxor %xmm0,%xmm6
398 leaq 32(%rcx,%rax,1),%rcx
399 negq %rax
400.byte 102,15,56,220,209
401 pxor %xmm0,%xmm7
402 pxor %xmm0,%xmm8
403.byte 102,15,56,220,217
404 pxor %xmm0,%xmm9
405 movups (%rcx,%rax,1),%xmm0
406 addq $16,%rax
407 jmp L$enc_loop8_inner
408.p2align 4
409L$enc_loop8:
410.byte 102,15,56,220,209
411.byte 102,15,56,220,217
412L$enc_loop8_inner:
413.byte 102,15,56,220,225
414.byte 102,15,56,220,233
415.byte 102,15,56,220,241
416.byte 102,15,56,220,249
417.byte 102,68,15,56,220,193
418.byte 102,68,15,56,220,201
419L$enc_loop8_enter:
420 movups (%rcx,%rax,1),%xmm1
421 addq $32,%rax
422.byte 102,15,56,220,208
423.byte 102,15,56,220,216
424.byte 102,15,56,220,224
425.byte 102,15,56,220,232
426.byte 102,15,56,220,240
427.byte 102,15,56,220,248
428.byte 102,68,15,56,220,192
429.byte 102,68,15,56,220,200
430 movups -16(%rcx,%rax,1),%xmm0
431 jnz L$enc_loop8
432
433.byte 102,15,56,220,209
434.byte 102,15,56,220,217
435.byte 102,15,56,220,225
436.byte 102,15,56,220,233
437.byte 102,15,56,220,241
438.byte 102,15,56,220,249
439.byte 102,68,15,56,220,193
440.byte 102,68,15,56,220,201
441.byte 102,15,56,221,208
442.byte 102,15,56,221,216
443.byte 102,15,56,221,224
444.byte 102,15,56,221,232
445.byte 102,15,56,221,240
446.byte 102,15,56,221,248
447.byte 102,68,15,56,221,192
448.byte 102,68,15,56,221,200
449 .byte 0xf3,0xc3
450
451
452
453.p2align 4
454_aesni_decrypt8:
455
456 movups (%rcx),%xmm0
457 shll $4,%eax
458 movups 16(%rcx),%xmm1
459 xorps %xmm0,%xmm2
460 xorps %xmm0,%xmm3
461 pxor %xmm0,%xmm4
462 pxor %xmm0,%xmm5
463 pxor %xmm0,%xmm6
464 leaq 32(%rcx,%rax,1),%rcx
465 negq %rax
466.byte 102,15,56,222,209
467 pxor %xmm0,%xmm7
468 pxor %xmm0,%xmm8
469.byte 102,15,56,222,217
470 pxor %xmm0,%xmm9
471 movups (%rcx,%rax,1),%xmm0
472 addq $16,%rax
473 jmp L$dec_loop8_inner
474.p2align 4
475L$dec_loop8:
476.byte 102,15,56,222,209
477.byte 102,15,56,222,217
478L$dec_loop8_inner:
479.byte 102,15,56,222,225
480.byte 102,15,56,222,233
481.byte 102,15,56,222,241
482.byte 102,15,56,222,249
483.byte 102,68,15,56,222,193
484.byte 102,68,15,56,222,201
485L$dec_loop8_enter:
486 movups (%rcx,%rax,1),%xmm1
487 addq $32,%rax
488.byte 102,15,56,222,208
489.byte 102,15,56,222,216
490.byte 102,15,56,222,224
491.byte 102,15,56,222,232
492.byte 102,15,56,222,240
493.byte 102,15,56,222,248
494.byte 102,68,15,56,222,192
495.byte 102,68,15,56,222,200
496 movups -16(%rcx,%rax,1),%xmm0
497 jnz L$dec_loop8
498
499.byte 102,15,56,222,209
500.byte 102,15,56,222,217
501.byte 102,15,56,222,225
502.byte 102,15,56,222,233
503.byte 102,15,56,222,241
504.byte 102,15,56,222,249
505.byte 102,68,15,56,222,193
506.byte 102,68,15,56,222,201
507.byte 102,15,56,223,208
508.byte 102,15,56,223,216
509.byte 102,15,56,223,224
510.byte 102,15,56,223,232
511.byte 102,15,56,223,240
512.byte 102,15,56,223,248
513.byte 102,68,15,56,223,192
514.byte 102,68,15,56,223,200
515 .byte 0xf3,0xc3
516
517
518.globl _aesni_ecb_encrypt
519
520.p2align 4
521_aesni_ecb_encrypt:
522
523.byte 243,15,30,250
524 andq $-16,%rdx
525 jz L$ecb_ret
526
527 movl 240(%rcx),%eax
528 movups (%rcx),%xmm0
529 movq %rcx,%r11
530 movl %eax,%r10d
531 testl %r8d,%r8d
532 jz L$ecb_decrypt
533
534 cmpq $0x80,%rdx
535 jb L$ecb_enc_tail
536
537 movdqu (%rdi),%xmm2
538 movdqu 16(%rdi),%xmm3
539 movdqu 32(%rdi),%xmm4
540 movdqu 48(%rdi),%xmm5
541 movdqu 64(%rdi),%xmm6
542 movdqu 80(%rdi),%xmm7
543 movdqu 96(%rdi),%xmm8
544 movdqu 112(%rdi),%xmm9
545 leaq 128(%rdi),%rdi
546 subq $0x80,%rdx
547 jmp L$ecb_enc_loop8_enter
548.p2align 4
549L$ecb_enc_loop8:
550 movups %xmm2,(%rsi)
551 movq %r11,%rcx
552 movdqu (%rdi),%xmm2
553 movl %r10d,%eax
554 movups %xmm3,16(%rsi)
555 movdqu 16(%rdi),%xmm3
556 movups %xmm4,32(%rsi)
557 movdqu 32(%rdi),%xmm4
558 movups %xmm5,48(%rsi)
559 movdqu 48(%rdi),%xmm5
560 movups %xmm6,64(%rsi)
561 movdqu 64(%rdi),%xmm6
562 movups %xmm7,80(%rsi)
563 movdqu 80(%rdi),%xmm7
564 movups %xmm8,96(%rsi)
565 movdqu 96(%rdi),%xmm8
566 movups %xmm9,112(%rsi)
567 leaq 128(%rsi),%rsi
568 movdqu 112(%rdi),%xmm9
569 leaq 128(%rdi),%rdi
570L$ecb_enc_loop8_enter:
571
572 call _aesni_encrypt8
573
574 subq $0x80,%rdx
575 jnc L$ecb_enc_loop8
576
577 movups %xmm2,(%rsi)
578 movq %r11,%rcx
579 movups %xmm3,16(%rsi)
580 movl %r10d,%eax
581 movups %xmm4,32(%rsi)
582 movups %xmm5,48(%rsi)
583 movups %xmm6,64(%rsi)
584 movups %xmm7,80(%rsi)
585 movups %xmm8,96(%rsi)
586 movups %xmm9,112(%rsi)
587 leaq 128(%rsi),%rsi
588 addq $0x80,%rdx
589 jz L$ecb_ret
590
591L$ecb_enc_tail:
592 movups (%rdi),%xmm2
593 cmpq $0x20,%rdx
594 jb L$ecb_enc_one
595 movups 16(%rdi),%xmm3
596 je L$ecb_enc_two
597 movups 32(%rdi),%xmm4
598 cmpq $0x40,%rdx
599 jb L$ecb_enc_three
600 movups 48(%rdi),%xmm5
601 je L$ecb_enc_four
602 movups 64(%rdi),%xmm6
603 cmpq $0x60,%rdx
604 jb L$ecb_enc_five
605 movups 80(%rdi),%xmm7
606 je L$ecb_enc_six
607 movdqu 96(%rdi),%xmm8
608 xorps %xmm9,%xmm9
609 call _aesni_encrypt8
610 movups %xmm2,(%rsi)
611 movups %xmm3,16(%rsi)
612 movups %xmm4,32(%rsi)
613 movups %xmm5,48(%rsi)
614 movups %xmm6,64(%rsi)
615 movups %xmm7,80(%rsi)
616 movups %xmm8,96(%rsi)
617 jmp L$ecb_ret
618.p2align 4
619L$ecb_enc_one:
620 movups (%rcx),%xmm0
621 movups 16(%rcx),%xmm1
622 leaq 32(%rcx),%rcx
623 xorps %xmm0,%xmm2
624L$oop_enc1_3:
625.byte 102,15,56,220,209
626 decl %eax
627 movups (%rcx),%xmm1
628 leaq 16(%rcx),%rcx
629 jnz L$oop_enc1_3
630.byte 102,15,56,221,209
631 movups %xmm2,(%rsi)
632 jmp L$ecb_ret
633.p2align 4
634L$ecb_enc_two:
635 call _aesni_encrypt2
636 movups %xmm2,(%rsi)
637 movups %xmm3,16(%rsi)
638 jmp L$ecb_ret
639.p2align 4
640L$ecb_enc_three:
641 call _aesni_encrypt3
642 movups %xmm2,(%rsi)
643 movups %xmm3,16(%rsi)
644 movups %xmm4,32(%rsi)
645 jmp L$ecb_ret
646.p2align 4
647L$ecb_enc_four:
648 call _aesni_encrypt4
649 movups %xmm2,(%rsi)
650 movups %xmm3,16(%rsi)
651 movups %xmm4,32(%rsi)
652 movups %xmm5,48(%rsi)
653 jmp L$ecb_ret
654.p2align 4
655L$ecb_enc_five:
656 xorps %xmm7,%xmm7
657 call _aesni_encrypt6
658 movups %xmm2,(%rsi)
659 movups %xmm3,16(%rsi)
660 movups %xmm4,32(%rsi)
661 movups %xmm5,48(%rsi)
662 movups %xmm6,64(%rsi)
663 jmp L$ecb_ret
664.p2align 4
665L$ecb_enc_six:
666 call _aesni_encrypt6
667 movups %xmm2,(%rsi)
668 movups %xmm3,16(%rsi)
669 movups %xmm4,32(%rsi)
670 movups %xmm5,48(%rsi)
671 movups %xmm6,64(%rsi)
672 movups %xmm7,80(%rsi)
673 jmp L$ecb_ret
674
675.p2align 4
676L$ecb_decrypt:
677 cmpq $0x80,%rdx
678 jb L$ecb_dec_tail
679
680 movdqu (%rdi),%xmm2
681 movdqu 16(%rdi),%xmm3
682 movdqu 32(%rdi),%xmm4
683 movdqu 48(%rdi),%xmm5
684 movdqu 64(%rdi),%xmm6
685 movdqu 80(%rdi),%xmm7
686 movdqu 96(%rdi),%xmm8
687 movdqu 112(%rdi),%xmm9
688 leaq 128(%rdi),%rdi
689 subq $0x80,%rdx
690 jmp L$ecb_dec_loop8_enter
691.p2align 4
692L$ecb_dec_loop8:
693 movups %xmm2,(%rsi)
694 movq %r11,%rcx
695 movdqu (%rdi),%xmm2
696 movl %r10d,%eax
697 movups %xmm3,16(%rsi)
698 movdqu 16(%rdi),%xmm3
699 movups %xmm4,32(%rsi)
700 movdqu 32(%rdi),%xmm4
701 movups %xmm5,48(%rsi)
702 movdqu 48(%rdi),%xmm5
703 movups %xmm6,64(%rsi)
704 movdqu 64(%rdi),%xmm6
705 movups %xmm7,80(%rsi)
706 movdqu 80(%rdi),%xmm7
707 movups %xmm8,96(%rsi)
708 movdqu 96(%rdi),%xmm8
709 movups %xmm9,112(%rsi)
710 leaq 128(%rsi),%rsi
711 movdqu 112(%rdi),%xmm9
712 leaq 128(%rdi),%rdi
713L$ecb_dec_loop8_enter:
714
715 call _aesni_decrypt8
716
717 movups (%r11),%xmm0
718 subq $0x80,%rdx
719 jnc L$ecb_dec_loop8
720
721 movups %xmm2,(%rsi)
722 pxor %xmm2,%xmm2
723 movq %r11,%rcx
724 movups %xmm3,16(%rsi)
725 pxor %xmm3,%xmm3
726 movl %r10d,%eax
727 movups %xmm4,32(%rsi)
728 pxor %xmm4,%xmm4
729 movups %xmm5,48(%rsi)
730 pxor %xmm5,%xmm5
731 movups %xmm6,64(%rsi)
732 pxor %xmm6,%xmm6
733 movups %xmm7,80(%rsi)
734 pxor %xmm7,%xmm7
735 movups %xmm8,96(%rsi)
736 pxor %xmm8,%xmm8
737 movups %xmm9,112(%rsi)
738 pxor %xmm9,%xmm9
739 leaq 128(%rsi),%rsi
740 addq $0x80,%rdx
741 jz L$ecb_ret
742
743L$ecb_dec_tail:
744 movups (%rdi),%xmm2
745 cmpq $0x20,%rdx
746 jb L$ecb_dec_one
747 movups 16(%rdi),%xmm3
748 je L$ecb_dec_two
749 movups 32(%rdi),%xmm4
750 cmpq $0x40,%rdx
751 jb L$ecb_dec_three
752 movups 48(%rdi),%xmm5
753 je L$ecb_dec_four
754 movups 64(%rdi),%xmm6
755 cmpq $0x60,%rdx
756 jb L$ecb_dec_five
757 movups 80(%rdi),%xmm7
758 je L$ecb_dec_six
759 movups 96(%rdi),%xmm8
760 movups (%rcx),%xmm0
761 xorps %xmm9,%xmm9
762 call _aesni_decrypt8
763 movups %xmm2,(%rsi)
764 pxor %xmm2,%xmm2
765 movups %xmm3,16(%rsi)
766 pxor %xmm3,%xmm3
767 movups %xmm4,32(%rsi)
768 pxor %xmm4,%xmm4
769 movups %xmm5,48(%rsi)
770 pxor %xmm5,%xmm5
771 movups %xmm6,64(%rsi)
772 pxor %xmm6,%xmm6
773 movups %xmm7,80(%rsi)
774 pxor %xmm7,%xmm7
775 movups %xmm8,96(%rsi)
776 pxor %xmm8,%xmm8
777 pxor %xmm9,%xmm9
778 jmp L$ecb_ret
779.p2align 4
780L$ecb_dec_one:
781 movups (%rcx),%xmm0
782 movups 16(%rcx),%xmm1
783 leaq 32(%rcx),%rcx
784 xorps %xmm0,%xmm2
785L$oop_dec1_4:
786.byte 102,15,56,222,209
787 decl %eax
788 movups (%rcx),%xmm1
789 leaq 16(%rcx),%rcx
790 jnz L$oop_dec1_4
791.byte 102,15,56,223,209
792 movups %xmm2,(%rsi)
793 pxor %xmm2,%xmm2
794 jmp L$ecb_ret
795.p2align 4
796L$ecb_dec_two:
797 call _aesni_decrypt2
798 movups %xmm2,(%rsi)
799 pxor %xmm2,%xmm2
800 movups %xmm3,16(%rsi)
801 pxor %xmm3,%xmm3
802 jmp L$ecb_ret
803.p2align 4
804L$ecb_dec_three:
805 call _aesni_decrypt3
806 movups %xmm2,(%rsi)
807 pxor %xmm2,%xmm2
808 movups %xmm3,16(%rsi)
809 pxor %xmm3,%xmm3
810 movups %xmm4,32(%rsi)
811 pxor %xmm4,%xmm4
812 jmp L$ecb_ret
813.p2align 4
814L$ecb_dec_four:
815 call _aesni_decrypt4
816 movups %xmm2,(%rsi)
817 pxor %xmm2,%xmm2
818 movups %xmm3,16(%rsi)
819 pxor %xmm3,%xmm3
820 movups %xmm4,32(%rsi)
821 pxor %xmm4,%xmm4
822 movups %xmm5,48(%rsi)
823 pxor %xmm5,%xmm5
824 jmp L$ecb_ret
825.p2align 4
826L$ecb_dec_five:
827 xorps %xmm7,%xmm7
828 call _aesni_decrypt6
829 movups %xmm2,(%rsi)
830 pxor %xmm2,%xmm2
831 movups %xmm3,16(%rsi)
832 pxor %xmm3,%xmm3
833 movups %xmm4,32(%rsi)
834 pxor %xmm4,%xmm4
835 movups %xmm5,48(%rsi)
836 pxor %xmm5,%xmm5
837 movups %xmm6,64(%rsi)
838 pxor %xmm6,%xmm6
839 pxor %xmm7,%xmm7
840 jmp L$ecb_ret
841.p2align 4
842L$ecb_dec_six:
843 call _aesni_decrypt6
844 movups %xmm2,(%rsi)
845 pxor %xmm2,%xmm2
846 movups %xmm3,16(%rsi)
847 pxor %xmm3,%xmm3
848 movups %xmm4,32(%rsi)
849 pxor %xmm4,%xmm4
850 movups %xmm5,48(%rsi)
851 pxor %xmm5,%xmm5
852 movups %xmm6,64(%rsi)
853 pxor %xmm6,%xmm6
854 movups %xmm7,80(%rsi)
855 pxor %xmm7,%xmm7
856
857L$ecb_ret:
858 xorps %xmm0,%xmm0
859 pxor %xmm1,%xmm1
860 .byte 0xf3,0xc3
861
862
863.globl _aesni_ccm64_encrypt_blocks
864
865.p2align 4
866_aesni_ccm64_encrypt_blocks:
867
868.byte 243,15,30,250
869 movl 240(%rcx),%eax
870 movdqu (%r8),%xmm6
871 movdqa L$increment64(%rip),%xmm9
872 movdqa L$bswap_mask(%rip),%xmm7
873
874 shll $4,%eax
875 movl $16,%r10d
876 leaq 0(%rcx),%r11
877 movdqu (%r9),%xmm3
878 movdqa %xmm6,%xmm2
879 leaq 32(%rcx,%rax,1),%rcx
880.byte 102,15,56,0,247
881 subq %rax,%r10
882 jmp L$ccm64_enc_outer
883.p2align 4
884L$ccm64_enc_outer:
885 movups (%r11),%xmm0
886 movq %r10,%rax
887 movups (%rdi),%xmm8
888
889 xorps %xmm0,%xmm2
890 movups 16(%r11),%xmm1
891 xorps %xmm8,%xmm0
892 xorps %xmm0,%xmm3
893 movups 32(%r11),%xmm0
894
895L$ccm64_enc2_loop:
896.byte 102,15,56,220,209
897.byte 102,15,56,220,217
898 movups (%rcx,%rax,1),%xmm1
899 addq $32,%rax
900.byte 102,15,56,220,208
901.byte 102,15,56,220,216
902 movups -16(%rcx,%rax,1),%xmm0
903 jnz L$ccm64_enc2_loop
904.byte 102,15,56,220,209
905.byte 102,15,56,220,217
906 paddq %xmm9,%xmm6
907 decq %rdx
908.byte 102,15,56,221,208
909.byte 102,15,56,221,216
910
911 leaq 16(%rdi),%rdi
912 xorps %xmm2,%xmm8
913 movdqa %xmm6,%xmm2
914 movups %xmm8,(%rsi)
915.byte 102,15,56,0,215
916 leaq 16(%rsi),%rsi
917 jnz L$ccm64_enc_outer
918
919 pxor %xmm0,%xmm0
920 pxor %xmm1,%xmm1
921 pxor %xmm2,%xmm2
922 movups %xmm3,(%r9)
923 pxor %xmm3,%xmm3
924 pxor %xmm8,%xmm8
925 pxor %xmm6,%xmm6
926 .byte 0xf3,0xc3
927
928
929.globl _aesni_ccm64_decrypt_blocks
930
931.p2align 4
932_aesni_ccm64_decrypt_blocks:
933
934.byte 243,15,30,250
935 movl 240(%rcx),%eax
936 movups (%r8),%xmm6
937 movdqu (%r9),%xmm3
938 movdqa L$increment64(%rip),%xmm9
939 movdqa L$bswap_mask(%rip),%xmm7
940
941 movaps %xmm6,%xmm2
942 movl %eax,%r10d
943 movq %rcx,%r11
944.byte 102,15,56,0,247
945 movups (%rcx),%xmm0
946 movups 16(%rcx),%xmm1
947 leaq 32(%rcx),%rcx
948 xorps %xmm0,%xmm2
949L$oop_enc1_5:
950.byte 102,15,56,220,209
951 decl %eax
952 movups (%rcx),%xmm1
953 leaq 16(%rcx),%rcx
954 jnz L$oop_enc1_5
955.byte 102,15,56,221,209
956 shll $4,%r10d
957 movl $16,%eax
958 movups (%rdi),%xmm8
959 paddq %xmm9,%xmm6
960 leaq 16(%rdi),%rdi
961 subq %r10,%rax
962 leaq 32(%r11,%r10,1),%rcx
963 movq %rax,%r10
964 jmp L$ccm64_dec_outer
965.p2align 4
966L$ccm64_dec_outer:
967 xorps %xmm2,%xmm8
968 movdqa %xmm6,%xmm2
969 movups %xmm8,(%rsi)
970 leaq 16(%rsi),%rsi
971.byte 102,15,56,0,215
972
973 subq $1,%rdx
974 jz L$ccm64_dec_break
975
976 movups (%r11),%xmm0
977 movq %r10,%rax
978 movups 16(%r11),%xmm1
979 xorps %xmm0,%xmm8
980 xorps %xmm0,%xmm2
981 xorps %xmm8,%xmm3
982 movups 32(%r11),%xmm0
983 jmp L$ccm64_dec2_loop
984.p2align 4
985L$ccm64_dec2_loop:
986.byte 102,15,56,220,209
987.byte 102,15,56,220,217
988 movups (%rcx,%rax,1),%xmm1
989 addq $32,%rax
990.byte 102,15,56,220,208
991.byte 102,15,56,220,216
992 movups -16(%rcx,%rax,1),%xmm0
993 jnz L$ccm64_dec2_loop
994 movups (%rdi),%xmm8
995 paddq %xmm9,%xmm6
996.byte 102,15,56,220,209
997.byte 102,15,56,220,217
998.byte 102,15,56,221,208
999.byte 102,15,56,221,216
1000 leaq 16(%rdi),%rdi
1001 jmp L$ccm64_dec_outer
1002
1003.p2align 4
1004L$ccm64_dec_break:
1005
1006 movl 240(%r11),%eax
1007 movups (%r11),%xmm0
1008 movups 16(%r11),%xmm1
1009 xorps %xmm0,%xmm8
1010 leaq 32(%r11),%r11
1011 xorps %xmm8,%xmm3
1012L$oop_enc1_6:
1013.byte 102,15,56,220,217
1014 decl %eax
1015 movups (%r11),%xmm1
1016 leaq 16(%r11),%r11
1017 jnz L$oop_enc1_6
1018.byte 102,15,56,221,217
1019 pxor %xmm0,%xmm0
1020 pxor %xmm1,%xmm1
1021 pxor %xmm2,%xmm2
1022 movups %xmm3,(%r9)
1023 pxor %xmm3,%xmm3
1024 pxor %xmm8,%xmm8
1025 pxor %xmm6,%xmm6
1026 .byte 0xf3,0xc3
1027
1028
1029.globl _aesni_ctr32_encrypt_blocks
1030
1031.p2align 4
1032_aesni_ctr32_encrypt_blocks:
1033
1034.byte 243,15,30,250
1035 cmpq $1,%rdx
1036 jne L$ctr32_bulk
1037
1038
1039
1040 movups (%r8),%xmm2
1041 movups (%rdi),%xmm3
1042 movl 240(%rcx),%edx
1043 movups (%rcx),%xmm0
1044 movups 16(%rcx),%xmm1
1045 leaq 32(%rcx),%rcx
1046 xorps %xmm0,%xmm2
1047L$oop_enc1_7:
1048.byte 102,15,56,220,209
1049 decl %edx
1050 movups (%rcx),%xmm1
1051 leaq 16(%rcx),%rcx
1052 jnz L$oop_enc1_7
1053.byte 102,15,56,221,209
1054 pxor %xmm0,%xmm0
1055 pxor %xmm1,%xmm1
1056 xorps %xmm3,%xmm2
1057 pxor %xmm3,%xmm3
1058 movups %xmm2,(%rsi)
1059 xorps %xmm2,%xmm2
1060 jmp L$ctr32_epilogue
1061
1062.p2align 4
1063L$ctr32_bulk:
1064 leaq (%rsp),%r11
1065
1066 pushq %rbp
1067
1068 subq $128,%rsp
1069 andq $-16,%rsp
1070
1071
1072
1073
1074 movdqu (%r8),%xmm2
1075 movdqu (%rcx),%xmm0
1076 movl 12(%r8),%r8d
1077 pxor %xmm0,%xmm2
1078 movl 12(%rcx),%ebp
1079 movdqa %xmm2,0(%rsp)
1080 bswapl %r8d
1081 movdqa %xmm2,%xmm3
1082 movdqa %xmm2,%xmm4
1083 movdqa %xmm2,%xmm5
1084 movdqa %xmm2,64(%rsp)
1085 movdqa %xmm2,80(%rsp)
1086 movdqa %xmm2,96(%rsp)
1087 movq %rdx,%r10
1088 movdqa %xmm2,112(%rsp)
1089
1090 leaq 1(%r8),%rax
1091 leaq 2(%r8),%rdx
1092 bswapl %eax
1093 bswapl %edx
1094 xorl %ebp,%eax
1095 xorl %ebp,%edx
1096.byte 102,15,58,34,216,3
1097 leaq 3(%r8),%rax
1098 movdqa %xmm3,16(%rsp)
1099.byte 102,15,58,34,226,3
1100 bswapl %eax
1101 movq %r10,%rdx
1102 leaq 4(%r8),%r10
1103 movdqa %xmm4,32(%rsp)
1104 xorl %ebp,%eax
1105 bswapl %r10d
1106.byte 102,15,58,34,232,3
1107 xorl %ebp,%r10d
1108 movdqa %xmm5,48(%rsp)
1109 leaq 5(%r8),%r9
1110 movl %r10d,64+12(%rsp)
1111 bswapl %r9d
1112 leaq 6(%r8),%r10
1113 movl 240(%rcx),%eax
1114 xorl %ebp,%r9d
1115 bswapl %r10d
1116 movl %r9d,80+12(%rsp)
1117 xorl %ebp,%r10d
1118 leaq 7(%r8),%r9
1119 movl %r10d,96+12(%rsp)
1120 bswapl %r9d
1121 movl _OPENSSL_ia32cap_P+4(%rip),%r10d
1122 xorl %ebp,%r9d
1123 andl $71303168,%r10d
1124 movl %r9d,112+12(%rsp)
1125
1126 movups 16(%rcx),%xmm1
1127
1128 movdqa 64(%rsp),%xmm6
1129 movdqa 80(%rsp),%xmm7
1130
1131 cmpq $8,%rdx
1132 jb L$ctr32_tail
1133
1134 subq $6,%rdx
1135 cmpl $4194304,%r10d
1136 je L$ctr32_6x
1137
1138 leaq 128(%rcx),%rcx
1139 subq $2,%rdx
1140 jmp L$ctr32_loop8
1141
1142.p2align 4
1143L$ctr32_6x:
1144 shll $4,%eax
1145 movl $48,%r10d
1146 bswapl %ebp
1147 leaq 32(%rcx,%rax,1),%rcx
1148 subq %rax,%r10
1149 jmp L$ctr32_loop6
1150
1151.p2align 4
1152L$ctr32_loop6:
1153 addl $6,%r8d
1154 movups -48(%rcx,%r10,1),%xmm0
1155.byte 102,15,56,220,209
1156 movl %r8d,%eax
1157 xorl %ebp,%eax
1158.byte 102,15,56,220,217
1159.byte 0x0f,0x38,0xf1,0x44,0x24,12
1160 leal 1(%r8),%eax
1161.byte 102,15,56,220,225
1162 xorl %ebp,%eax
1163.byte 0x0f,0x38,0xf1,0x44,0x24,28
1164.byte 102,15,56,220,233
1165 leal 2(%r8),%eax
1166 xorl %ebp,%eax
1167.byte 102,15,56,220,241
1168.byte 0x0f,0x38,0xf1,0x44,0x24,44
1169 leal 3(%r8),%eax
1170.byte 102,15,56,220,249
1171 movups -32(%rcx,%r10,1),%xmm1
1172 xorl %ebp,%eax
1173
1174.byte 102,15,56,220,208
1175.byte 0x0f,0x38,0xf1,0x44,0x24,60
1176 leal 4(%r8),%eax
1177.byte 102,15,56,220,216
1178 xorl %ebp,%eax
1179.byte 0x0f,0x38,0xf1,0x44,0x24,76
1180.byte 102,15,56,220,224
1181 leal 5(%r8),%eax
1182 xorl %ebp,%eax
1183.byte 102,15,56,220,232
1184.byte 0x0f,0x38,0xf1,0x44,0x24,92
1185 movq %r10,%rax
1186.byte 102,15,56,220,240
1187.byte 102,15,56,220,248
1188 movups -16(%rcx,%r10,1),%xmm0
1189
1190 call L$enc_loop6
1191
1192 movdqu (%rdi),%xmm8
1193 movdqu 16(%rdi),%xmm9
1194 movdqu 32(%rdi),%xmm10
1195 movdqu 48(%rdi),%xmm11
1196 movdqu 64(%rdi),%xmm12
1197 movdqu 80(%rdi),%xmm13
1198 leaq 96(%rdi),%rdi
1199 movups -64(%rcx,%r10,1),%xmm1
1200 pxor %xmm2,%xmm8
1201 movaps 0(%rsp),%xmm2
1202 pxor %xmm3,%xmm9
1203 movaps 16(%rsp),%xmm3
1204 pxor %xmm4,%xmm10
1205 movaps 32(%rsp),%xmm4
1206 pxor %xmm5,%xmm11
1207 movaps 48(%rsp),%xmm5
1208 pxor %xmm6,%xmm12
1209 movaps 64(%rsp),%xmm6
1210 pxor %xmm7,%xmm13
1211 movaps 80(%rsp),%xmm7
1212 movdqu %xmm8,(%rsi)
1213 movdqu %xmm9,16(%rsi)
1214 movdqu %xmm10,32(%rsi)
1215 movdqu %xmm11,48(%rsi)
1216 movdqu %xmm12,64(%rsi)
1217 movdqu %xmm13,80(%rsi)
1218 leaq 96(%rsi),%rsi
1219
1220 subq $6,%rdx
1221 jnc L$ctr32_loop6
1222
1223 addq $6,%rdx
1224 jz L$ctr32_done
1225
1226 leal -48(%r10),%eax
1227 leaq -80(%rcx,%r10,1),%rcx
1228 negl %eax
1229 shrl $4,%eax
1230 jmp L$ctr32_tail
1231
1232.p2align 5
1233L$ctr32_loop8:
1234 addl $8,%r8d
1235 movdqa 96(%rsp),%xmm8
1236.byte 102,15,56,220,209
1237 movl %r8d,%r9d
1238 movdqa 112(%rsp),%xmm9
1239.byte 102,15,56,220,217
1240 bswapl %r9d
1241 movups 32-128(%rcx),%xmm0
1242.byte 102,15,56,220,225
1243 xorl %ebp,%r9d
1244 nop
1245.byte 102,15,56,220,233
1246 movl %r9d,0+12(%rsp)
1247 leaq 1(%r8),%r9
1248.byte 102,15,56,220,241
1249.byte 102,15,56,220,249
1250.byte 102,68,15,56,220,193
1251.byte 102,68,15,56,220,201
1252 movups 48-128(%rcx),%xmm1
1253 bswapl %r9d
1254.byte 102,15,56,220,208
1255.byte 102,15,56,220,216
1256 xorl %ebp,%r9d
1257.byte 0x66,0x90
1258.byte 102,15,56,220,224
1259.byte 102,15,56,220,232
1260 movl %r9d,16+12(%rsp)
1261 leaq 2(%r8),%r9
1262.byte 102,15,56,220,240
1263.byte 102,15,56,220,248
1264.byte 102,68,15,56,220,192
1265.byte 102,68,15,56,220,200
1266 movups 64-128(%rcx),%xmm0
1267 bswapl %r9d
1268.byte 102,15,56,220,209
1269.byte 102,15,56,220,217
1270 xorl %ebp,%r9d
1271.byte 0x66,0x90
1272.byte 102,15,56,220,225
1273.byte 102,15,56,220,233
1274 movl %r9d,32+12(%rsp)
1275 leaq 3(%r8),%r9
1276.byte 102,15,56,220,241
1277.byte 102,15,56,220,249
1278.byte 102,68,15,56,220,193
1279.byte 102,68,15,56,220,201
1280 movups 80-128(%rcx),%xmm1
1281 bswapl %r9d
1282.byte 102,15,56,220,208
1283.byte 102,15,56,220,216
1284 xorl %ebp,%r9d
1285.byte 0x66,0x90
1286.byte 102,15,56,220,224
1287.byte 102,15,56,220,232
1288 movl %r9d,48+12(%rsp)
1289 leaq 4(%r8),%r9
1290.byte 102,15,56,220,240
1291.byte 102,15,56,220,248
1292.byte 102,68,15,56,220,192
1293.byte 102,68,15,56,220,200
1294 movups 96-128(%rcx),%xmm0
1295 bswapl %r9d
1296.byte 102,15,56,220,209
1297.byte 102,15,56,220,217
1298 xorl %ebp,%r9d
1299.byte 0x66,0x90
1300.byte 102,15,56,220,225
1301.byte 102,15,56,220,233
1302 movl %r9d,64+12(%rsp)
1303 leaq 5(%r8),%r9
1304.byte 102,15,56,220,241
1305.byte 102,15,56,220,249
1306.byte 102,68,15,56,220,193
1307.byte 102,68,15,56,220,201
1308 movups 112-128(%rcx),%xmm1
1309 bswapl %r9d
1310.byte 102,15,56,220,208
1311.byte 102,15,56,220,216
1312 xorl %ebp,%r9d
1313.byte 0x66,0x90
1314.byte 102,15,56,220,224
1315.byte 102,15,56,220,232
1316 movl %r9d,80+12(%rsp)
1317 leaq 6(%r8),%r9
1318.byte 102,15,56,220,240
1319.byte 102,15,56,220,248
1320.byte 102,68,15,56,220,192
1321.byte 102,68,15,56,220,200
1322 movups 128-128(%rcx),%xmm0
1323 bswapl %r9d
1324.byte 102,15,56,220,209
1325.byte 102,15,56,220,217
1326 xorl %ebp,%r9d
1327.byte 0x66,0x90
1328.byte 102,15,56,220,225
1329.byte 102,15,56,220,233
1330 movl %r9d,96+12(%rsp)
1331 leaq 7(%r8),%r9
1332.byte 102,15,56,220,241
1333.byte 102,15,56,220,249
1334.byte 102,68,15,56,220,193
1335.byte 102,68,15,56,220,201
1336 movups 144-128(%rcx),%xmm1
1337 bswapl %r9d
1338.byte 102,15,56,220,208
1339.byte 102,15,56,220,216
1340.byte 102,15,56,220,224
1341 xorl %ebp,%r9d
1342 movdqu 0(%rdi),%xmm10
1343.byte 102,15,56,220,232
1344 movl %r9d,112+12(%rsp)
1345 cmpl $11,%eax
1346.byte 102,15,56,220,240
1347.byte 102,15,56,220,248
1348.byte 102,68,15,56,220,192
1349.byte 102,68,15,56,220,200
1350 movups 160-128(%rcx),%xmm0
1351
1352 jb L$ctr32_enc_done
1353
1354.byte 102,15,56,220,209
1355.byte 102,15,56,220,217
1356.byte 102,15,56,220,225
1357.byte 102,15,56,220,233
1358.byte 102,15,56,220,241
1359.byte 102,15,56,220,249
1360.byte 102,68,15,56,220,193
1361.byte 102,68,15,56,220,201
1362 movups 176-128(%rcx),%xmm1
1363
1364.byte 102,15,56,220,208
1365.byte 102,15,56,220,216
1366.byte 102,15,56,220,224
1367.byte 102,15,56,220,232
1368.byte 102,15,56,220,240
1369.byte 102,15,56,220,248
1370.byte 102,68,15,56,220,192
1371.byte 102,68,15,56,220,200
1372 movups 192-128(%rcx),%xmm0
1373 je L$ctr32_enc_done
1374
1375.byte 102,15,56,220,209
1376.byte 102,15,56,220,217
1377.byte 102,15,56,220,225
1378.byte 102,15,56,220,233
1379.byte 102,15,56,220,241
1380.byte 102,15,56,220,249
1381.byte 102,68,15,56,220,193
1382.byte 102,68,15,56,220,201
1383 movups 208-128(%rcx),%xmm1
1384
1385.byte 102,15,56,220,208
1386.byte 102,15,56,220,216
1387.byte 102,15,56,220,224
1388.byte 102,15,56,220,232
1389.byte 102,15,56,220,240
1390.byte 102,15,56,220,248
1391.byte 102,68,15,56,220,192
1392.byte 102,68,15,56,220,200
1393 movups 224-128(%rcx),%xmm0
1394 jmp L$ctr32_enc_done
1395
1396.p2align 4
1397L$ctr32_enc_done:
1398 movdqu 16(%rdi),%xmm11
1399 pxor %xmm0,%xmm10
1400 movdqu 32(%rdi),%xmm12
1401 pxor %xmm0,%xmm11
1402 movdqu 48(%rdi),%xmm13
1403 pxor %xmm0,%xmm12
1404 movdqu 64(%rdi),%xmm14
1405 pxor %xmm0,%xmm13
1406 movdqu 80(%rdi),%xmm15
1407 pxor %xmm0,%xmm14
1408 pxor %xmm0,%xmm15
1409.byte 102,15,56,220,209
1410.byte 102,15,56,220,217
1411.byte 102,15,56,220,225
1412.byte 102,15,56,220,233
1413.byte 102,15,56,220,241
1414.byte 102,15,56,220,249
1415.byte 102,68,15,56,220,193
1416.byte 102,68,15,56,220,201
1417 movdqu 96(%rdi),%xmm1
1418 leaq 128(%rdi),%rdi
1419
1420.byte 102,65,15,56,221,210
1421 pxor %xmm0,%xmm1
1422 movdqu 112-128(%rdi),%xmm10
1423.byte 102,65,15,56,221,219
1424 pxor %xmm0,%xmm10
1425 movdqa 0(%rsp),%xmm11
1426.byte 102,65,15,56,221,228
1427.byte 102,65,15,56,221,237
1428 movdqa 16(%rsp),%xmm12
1429 movdqa 32(%rsp),%xmm13
1430.byte 102,65,15,56,221,246
1431.byte 102,65,15,56,221,255
1432 movdqa 48(%rsp),%xmm14
1433 movdqa 64(%rsp),%xmm15
1434.byte 102,68,15,56,221,193
1435 movdqa 80(%rsp),%xmm0
1436 movups 16-128(%rcx),%xmm1
1437.byte 102,69,15,56,221,202
1438
1439 movups %xmm2,(%rsi)
1440 movdqa %xmm11,%xmm2
1441 movups %xmm3,16(%rsi)
1442 movdqa %xmm12,%xmm3
1443 movups %xmm4,32(%rsi)
1444 movdqa %xmm13,%xmm4
1445 movups %xmm5,48(%rsi)
1446 movdqa %xmm14,%xmm5
1447 movups %xmm6,64(%rsi)
1448 movdqa %xmm15,%xmm6
1449 movups %xmm7,80(%rsi)
1450 movdqa %xmm0,%xmm7
1451 movups %xmm8,96(%rsi)
1452 movups %xmm9,112(%rsi)
1453 leaq 128(%rsi),%rsi
1454
1455 subq $8,%rdx
1456 jnc L$ctr32_loop8
1457
1458 addq $8,%rdx
1459 jz L$ctr32_done
1460 leaq -128(%rcx),%rcx
1461
1462L$ctr32_tail:
1463
1464
1465 leaq 16(%rcx),%rcx
1466 cmpq $4,%rdx
1467 jb L$ctr32_loop3
1468 je L$ctr32_loop4
1469
1470
1471 shll $4,%eax
1472 movdqa 96(%rsp),%xmm8
1473 pxor %xmm9,%xmm9
1474
1475 movups 16(%rcx),%xmm0
1476.byte 102,15,56,220,209
1477.byte 102,15,56,220,217
1478 leaq 32-16(%rcx,%rax,1),%rcx
1479 negq %rax
1480.byte 102,15,56,220,225
1481 addq $16,%rax
1482 movups (%rdi),%xmm10
1483.byte 102,15,56,220,233
1484.byte 102,15,56,220,241
1485 movups 16(%rdi),%xmm11
1486 movups 32(%rdi),%xmm12
1487.byte 102,15,56,220,249
1488.byte 102,68,15,56,220,193
1489
1490 call L$enc_loop8_enter
1491
1492 movdqu 48(%rdi),%xmm13
1493 pxor %xmm10,%xmm2
1494 movdqu 64(%rdi),%xmm10
1495 pxor %xmm11,%xmm3
1496 movdqu %xmm2,(%rsi)
1497 pxor %xmm12,%xmm4
1498 movdqu %xmm3,16(%rsi)
1499 pxor %xmm13,%xmm5
1500 movdqu %xmm4,32(%rsi)
1501 pxor %xmm10,%xmm6
1502 movdqu %xmm5,48(%rsi)
1503 movdqu %xmm6,64(%rsi)
1504 cmpq $6,%rdx
1505 jb L$ctr32_done
1506
1507 movups 80(%rdi),%xmm11
1508 xorps %xmm11,%xmm7
1509 movups %xmm7,80(%rsi)
1510 je L$ctr32_done
1511
1512 movups 96(%rdi),%xmm12
1513 xorps %xmm12,%xmm8
1514 movups %xmm8,96(%rsi)
1515 jmp L$ctr32_done
1516
1517.p2align 5
1518L$ctr32_loop4:
1519.byte 102,15,56,220,209
1520 leaq 16(%rcx),%rcx
1521 decl %eax
1522.byte 102,15,56,220,217
1523.byte 102,15,56,220,225
1524.byte 102,15,56,220,233
1525 movups (%rcx),%xmm1
1526 jnz L$ctr32_loop4
1527.byte 102,15,56,221,209
1528.byte 102,15,56,221,217
1529 movups (%rdi),%xmm10
1530 movups 16(%rdi),%xmm11
1531.byte 102,15,56,221,225
1532.byte 102,15,56,221,233
1533 movups 32(%rdi),%xmm12
1534 movups 48(%rdi),%xmm13
1535
1536 xorps %xmm10,%xmm2
1537 movups %xmm2,(%rsi)
1538 xorps %xmm11,%xmm3
1539 movups %xmm3,16(%rsi)
1540 pxor %xmm12,%xmm4
1541 movdqu %xmm4,32(%rsi)
1542 pxor %xmm13,%xmm5
1543 movdqu %xmm5,48(%rsi)
1544 jmp L$ctr32_done
1545
1546.p2align 5
1547L$ctr32_loop3:
1548.byte 102,15,56,220,209
1549 leaq 16(%rcx),%rcx
1550 decl %eax
1551.byte 102,15,56,220,217
1552.byte 102,15,56,220,225
1553 movups (%rcx),%xmm1
1554 jnz L$ctr32_loop3
1555.byte 102,15,56,221,209
1556.byte 102,15,56,221,217
1557.byte 102,15,56,221,225
1558
1559 movups (%rdi),%xmm10
1560 xorps %xmm10,%xmm2
1561 movups %xmm2,(%rsi)
1562 cmpq $2,%rdx
1563 jb L$ctr32_done
1564
1565 movups 16(%rdi),%xmm11
1566 xorps %xmm11,%xmm3
1567 movups %xmm3,16(%rsi)
1568 je L$ctr32_done
1569
1570 movups 32(%rdi),%xmm12
1571 xorps %xmm12,%xmm4
1572 movups %xmm4,32(%rsi)
1573
1574L$ctr32_done:
1575 xorps %xmm0,%xmm0
1576 xorl %ebp,%ebp
1577 pxor %xmm1,%xmm1
1578 pxor %xmm2,%xmm2
1579 pxor %xmm3,%xmm3
1580 pxor %xmm4,%xmm4
1581 pxor %xmm5,%xmm5
1582 pxor %xmm6,%xmm6
1583 pxor %xmm7,%xmm7
1584 movaps %xmm0,0(%rsp)
1585 pxor %xmm8,%xmm8
1586 movaps %xmm0,16(%rsp)
1587 pxor %xmm9,%xmm9
1588 movaps %xmm0,32(%rsp)
1589 pxor %xmm10,%xmm10
1590 movaps %xmm0,48(%rsp)
1591 pxor %xmm11,%xmm11
1592 movaps %xmm0,64(%rsp)
1593 pxor %xmm12,%xmm12
1594 movaps %xmm0,80(%rsp)
1595 pxor %xmm13,%xmm13
1596 movaps %xmm0,96(%rsp)
1597 pxor %xmm14,%xmm14
1598 movaps %xmm0,112(%rsp)
1599 pxor %xmm15,%xmm15
1600 movq -8(%r11),%rbp
1601
1602 leaq (%r11),%rsp
1603
1604L$ctr32_epilogue:
1605 .byte 0xf3,0xc3
1606
1607
1608.globl _aesni_xts_encrypt
1609
1610.p2align 4
1611_aesni_xts_encrypt:
1612
1613.byte 243,15,30,250
1614 leaq (%rsp),%r11
1615
1616 pushq %rbp
1617
1618 subq $112,%rsp
1619 andq $-16,%rsp
1620 movups (%r9),%xmm2
1621 movl 240(%r8),%eax
1622 movl 240(%rcx),%r10d
1623 movups (%r8),%xmm0
1624 movups 16(%r8),%xmm1
1625 leaq 32(%r8),%r8
1626 xorps %xmm0,%xmm2
1627L$oop_enc1_8:
1628.byte 102,15,56,220,209
1629 decl %eax
1630 movups (%r8),%xmm1
1631 leaq 16(%r8),%r8
1632 jnz L$oop_enc1_8
1633.byte 102,15,56,221,209
1634 movups (%rcx),%xmm0
1635 movq %rcx,%rbp
1636 movl %r10d,%eax
1637 shll $4,%r10d
1638 movq %rdx,%r9
1639 andq $-16,%rdx
1640
1641 movups 16(%rcx,%r10,1),%xmm1
1642
1643 movdqa L$xts_magic(%rip),%xmm8
1644 movdqa %xmm2,%xmm15
1645 pshufd $0x5f,%xmm2,%xmm9
1646 pxor %xmm0,%xmm1
1647 movdqa %xmm9,%xmm14
1648 paddd %xmm9,%xmm9
1649 movdqa %xmm15,%xmm10
1650 psrad $31,%xmm14
1651 paddq %xmm15,%xmm15
1652 pand %xmm8,%xmm14
1653 pxor %xmm0,%xmm10
1654 pxor %xmm14,%xmm15
1655 movdqa %xmm9,%xmm14
1656 paddd %xmm9,%xmm9
1657 movdqa %xmm15,%xmm11
1658 psrad $31,%xmm14
1659 paddq %xmm15,%xmm15
1660 pand %xmm8,%xmm14
1661 pxor %xmm0,%xmm11
1662 pxor %xmm14,%xmm15
1663 movdqa %xmm9,%xmm14
1664 paddd %xmm9,%xmm9
1665 movdqa %xmm15,%xmm12
1666 psrad $31,%xmm14
1667 paddq %xmm15,%xmm15
1668 pand %xmm8,%xmm14
1669 pxor %xmm0,%xmm12
1670 pxor %xmm14,%xmm15
1671 movdqa %xmm9,%xmm14
1672 paddd %xmm9,%xmm9
1673 movdqa %xmm15,%xmm13
1674 psrad $31,%xmm14
1675 paddq %xmm15,%xmm15
1676 pand %xmm8,%xmm14
1677 pxor %xmm0,%xmm13
1678 pxor %xmm14,%xmm15
1679 movdqa %xmm15,%xmm14
1680 psrad $31,%xmm9
1681 paddq %xmm15,%xmm15
1682 pand %xmm8,%xmm9
1683 pxor %xmm0,%xmm14
1684 pxor %xmm9,%xmm15
1685 movaps %xmm1,96(%rsp)
1686
1687 subq $96,%rdx
1688 jc L$xts_enc_short
1689
1690 movl $16+96,%eax
1691 leaq 32(%rbp,%r10,1),%rcx
1692 subq %r10,%rax
1693 movups 16(%rbp),%xmm1
1694 movq %rax,%r10
1695 leaq L$xts_magic(%rip),%r8
1696 jmp L$xts_enc_grandloop
1697
1698.p2align 5
1699L$xts_enc_grandloop:
1700 movdqu 0(%rdi),%xmm2
1701 movdqa %xmm0,%xmm8
1702 movdqu 16(%rdi),%xmm3
1703 pxor %xmm10,%xmm2
1704 movdqu 32(%rdi),%xmm4
1705 pxor %xmm11,%xmm3
1706.byte 102,15,56,220,209
1707 movdqu 48(%rdi),%xmm5
1708 pxor %xmm12,%xmm4
1709.byte 102,15,56,220,217
1710 movdqu 64(%rdi),%xmm6
1711 pxor %xmm13,%xmm5
1712.byte 102,15,56,220,225
1713 movdqu 80(%rdi),%xmm7
1714 pxor %xmm15,%xmm8
1715 movdqa 96(%rsp),%xmm9
1716 pxor %xmm14,%xmm6
1717.byte 102,15,56,220,233
1718 movups 32(%rbp),%xmm0
1719 leaq 96(%rdi),%rdi
1720 pxor %xmm8,%xmm7
1721
1722 pxor %xmm9,%xmm10
1723.byte 102,15,56,220,241
1724 pxor %xmm9,%xmm11
1725 movdqa %xmm10,0(%rsp)
1726.byte 102,15,56,220,249
1727 movups 48(%rbp),%xmm1
1728 pxor %xmm9,%xmm12
1729
1730.byte 102,15,56,220,208
1731 pxor %xmm9,%xmm13
1732 movdqa %xmm11,16(%rsp)
1733.byte 102,15,56,220,216
1734 pxor %xmm9,%xmm14
1735 movdqa %xmm12,32(%rsp)
1736.byte 102,15,56,220,224
1737.byte 102,15,56,220,232
1738 pxor %xmm9,%xmm8
1739 movdqa %xmm14,64(%rsp)
1740.byte 102,15,56,220,240
1741.byte 102,15,56,220,248
1742 movups 64(%rbp),%xmm0
1743 movdqa %xmm8,80(%rsp)
1744 pshufd $0x5f,%xmm15,%xmm9
1745 jmp L$xts_enc_loop6
1746.p2align 5
1747L$xts_enc_loop6:
1748.byte 102,15,56,220,209
1749.byte 102,15,56,220,217
1750.byte 102,15,56,220,225
1751.byte 102,15,56,220,233
1752.byte 102,15,56,220,241
1753.byte 102,15,56,220,249
1754 movups -64(%rcx,%rax,1),%xmm1
1755 addq $32,%rax
1756
1757.byte 102,15,56,220,208
1758.byte 102,15,56,220,216
1759.byte 102,15,56,220,224
1760.byte 102,15,56,220,232
1761.byte 102,15,56,220,240
1762.byte 102,15,56,220,248
1763 movups -80(%rcx,%rax,1),%xmm0
1764 jnz L$xts_enc_loop6
1765
1766 movdqa (%r8),%xmm8
1767 movdqa %xmm9,%xmm14
1768 paddd %xmm9,%xmm9
1769.byte 102,15,56,220,209
1770 paddq %xmm15,%xmm15
1771 psrad $31,%xmm14
1772.byte 102,15,56,220,217
1773 pand %xmm8,%xmm14
1774 movups (%rbp),%xmm10
1775.byte 102,15,56,220,225
1776.byte 102,15,56,220,233
1777.byte 102,15,56,220,241
1778 pxor %xmm14,%xmm15
1779 movaps %xmm10,%xmm11
1780.byte 102,15,56,220,249
1781 movups -64(%rcx),%xmm1
1782
1783 movdqa %xmm9,%xmm14
1784.byte 102,15,56,220,208
1785 paddd %xmm9,%xmm9
1786 pxor %xmm15,%xmm10
1787.byte 102,15,56,220,216
1788 psrad $31,%xmm14
1789 paddq %xmm15,%xmm15
1790.byte 102,15,56,220,224
1791.byte 102,15,56,220,232
1792 pand %xmm8,%xmm14
1793 movaps %xmm11,%xmm12
1794.byte 102,15,56,220,240
1795 pxor %xmm14,%xmm15
1796 movdqa %xmm9,%xmm14
1797.byte 102,15,56,220,248
1798 movups -48(%rcx),%xmm0
1799
1800 paddd %xmm9,%xmm9
1801.byte 102,15,56,220,209
1802 pxor %xmm15,%xmm11
1803 psrad $31,%xmm14
1804.byte 102,15,56,220,217
1805 paddq %xmm15,%xmm15
1806 pand %xmm8,%xmm14
1807.byte 102,15,56,220,225
1808.byte 102,15,56,220,233
1809 movdqa %xmm13,48(%rsp)
1810 pxor %xmm14,%xmm15
1811.byte 102,15,56,220,241
1812 movaps %xmm12,%xmm13
1813 movdqa %xmm9,%xmm14
1814.byte 102,15,56,220,249
1815 movups -32(%rcx),%xmm1
1816
1817 paddd %xmm9,%xmm9
1818.byte 102,15,56,220,208
1819 pxor %xmm15,%xmm12
1820 psrad $31,%xmm14
1821.byte 102,15,56,220,216
1822 paddq %xmm15,%xmm15
1823 pand %xmm8,%xmm14
1824.byte 102,15,56,220,224
1825.byte 102,15,56,220,232
1826.byte 102,15,56,220,240
1827 pxor %xmm14,%xmm15
1828 movaps %xmm13,%xmm14
1829.byte 102,15,56,220,248
1830
1831 movdqa %xmm9,%xmm0
1832 paddd %xmm9,%xmm9
1833.byte 102,15,56,220,209
1834 pxor %xmm15,%xmm13
1835 psrad $31,%xmm0
1836.byte 102,15,56,220,217
1837 paddq %xmm15,%xmm15
1838 pand %xmm8,%xmm0
1839.byte 102,15,56,220,225
1840.byte 102,15,56,220,233
1841 pxor %xmm0,%xmm15
1842 movups (%rbp),%xmm0
1843.byte 102,15,56,220,241
1844.byte 102,15,56,220,249
1845 movups 16(%rbp),%xmm1
1846
1847 pxor %xmm15,%xmm14
1848.byte 102,15,56,221,84,36,0
1849 psrad $31,%xmm9
1850 paddq %xmm15,%xmm15
1851.byte 102,15,56,221,92,36,16
1852.byte 102,15,56,221,100,36,32
1853 pand %xmm8,%xmm9
1854 movq %r10,%rax
1855.byte 102,15,56,221,108,36,48
1856.byte 102,15,56,221,116,36,64
1857.byte 102,15,56,221,124,36,80
1858 pxor %xmm9,%xmm15
1859
1860 leaq 96(%rsi),%rsi
1861 movups %xmm2,-96(%rsi)
1862 movups %xmm3,-80(%rsi)
1863 movups %xmm4,-64(%rsi)
1864 movups %xmm5,-48(%rsi)
1865 movups %xmm6,-32(%rsi)
1866 movups %xmm7,-16(%rsi)
1867 subq $96,%rdx
1868 jnc L$xts_enc_grandloop
1869
1870 movl $16+96,%eax
1871 subl %r10d,%eax
1872 movq %rbp,%rcx
1873 shrl $4,%eax
1874
1875L$xts_enc_short:
1876
1877 movl %eax,%r10d
1878 pxor %xmm0,%xmm10
1879 addq $96,%rdx
1880 jz L$xts_enc_done
1881
1882 pxor %xmm0,%xmm11
1883 cmpq $0x20,%rdx
1884 jb L$xts_enc_one
1885 pxor %xmm0,%xmm12
1886 je L$xts_enc_two
1887
1888 pxor %xmm0,%xmm13
1889 cmpq $0x40,%rdx
1890 jb L$xts_enc_three
1891 pxor %xmm0,%xmm14
1892 je L$xts_enc_four
1893
1894 movdqu (%rdi),%xmm2
1895 movdqu 16(%rdi),%xmm3
1896 movdqu 32(%rdi),%xmm4
1897 pxor %xmm10,%xmm2
1898 movdqu 48(%rdi),%xmm5
1899 pxor %xmm11,%xmm3
1900 movdqu 64(%rdi),%xmm6
1901 leaq 80(%rdi),%rdi
1902 pxor %xmm12,%xmm4
1903 pxor %xmm13,%xmm5
1904 pxor %xmm14,%xmm6
1905 pxor %xmm7,%xmm7
1906
1907 call _aesni_encrypt6
1908
1909 xorps %xmm10,%xmm2
1910 movdqa %xmm15,%xmm10
1911 xorps %xmm11,%xmm3
1912 xorps %xmm12,%xmm4
1913 movdqu %xmm2,(%rsi)
1914 xorps %xmm13,%xmm5
1915 movdqu %xmm3,16(%rsi)
1916 xorps %xmm14,%xmm6
1917 movdqu %xmm4,32(%rsi)
1918 movdqu %xmm5,48(%rsi)
1919 movdqu %xmm6,64(%rsi)
1920 leaq 80(%rsi),%rsi
1921 jmp L$xts_enc_done
1922
1923.p2align 4
1924L$xts_enc_one:
1925 movups (%rdi),%xmm2
1926 leaq 16(%rdi),%rdi
1927 xorps %xmm10,%xmm2
1928 movups (%rcx),%xmm0
1929 movups 16(%rcx),%xmm1
1930 leaq 32(%rcx),%rcx
1931 xorps %xmm0,%xmm2
1932L$oop_enc1_9:
1933.byte 102,15,56,220,209
1934 decl %eax
1935 movups (%rcx),%xmm1
1936 leaq 16(%rcx),%rcx
1937 jnz L$oop_enc1_9
1938.byte 102,15,56,221,209
1939 xorps %xmm10,%xmm2
1940 movdqa %xmm11,%xmm10
1941 movups %xmm2,(%rsi)
1942 leaq 16(%rsi),%rsi
1943 jmp L$xts_enc_done
1944
1945.p2align 4
1946L$xts_enc_two:
1947 movups (%rdi),%xmm2
1948 movups 16(%rdi),%xmm3
1949 leaq 32(%rdi),%rdi
1950 xorps %xmm10,%xmm2
1951 xorps %xmm11,%xmm3
1952
1953 call _aesni_encrypt2
1954
1955 xorps %xmm10,%xmm2
1956 movdqa %xmm12,%xmm10
1957 xorps %xmm11,%xmm3
1958 movups %xmm2,(%rsi)
1959 movups %xmm3,16(%rsi)
1960 leaq 32(%rsi),%rsi
1961 jmp L$xts_enc_done
1962
1963.p2align 4
1964L$xts_enc_three:
1965 movups (%rdi),%xmm2
1966 movups 16(%rdi),%xmm3
1967 movups 32(%rdi),%xmm4
1968 leaq 48(%rdi),%rdi
1969 xorps %xmm10,%xmm2
1970 xorps %xmm11,%xmm3
1971 xorps %xmm12,%xmm4
1972
1973 call _aesni_encrypt3
1974
1975 xorps %xmm10,%xmm2
1976 movdqa %xmm13,%xmm10
1977 xorps %xmm11,%xmm3
1978 xorps %xmm12,%xmm4
1979 movups %xmm2,(%rsi)
1980 movups %xmm3,16(%rsi)
1981 movups %xmm4,32(%rsi)
1982 leaq 48(%rsi),%rsi
1983 jmp L$xts_enc_done
1984
1985.p2align 4
1986L$xts_enc_four:
1987 movups (%rdi),%xmm2
1988 movups 16(%rdi),%xmm3
1989 movups 32(%rdi),%xmm4
1990 xorps %xmm10,%xmm2
1991 movups 48(%rdi),%xmm5
1992 leaq 64(%rdi),%rdi
1993 xorps %xmm11,%xmm3
1994 xorps %xmm12,%xmm4
1995 xorps %xmm13,%xmm5
1996
1997 call _aesni_encrypt4
1998
1999 pxor %xmm10,%xmm2
2000 movdqa %xmm14,%xmm10
2001 pxor %xmm11,%xmm3
2002 pxor %xmm12,%xmm4
2003 movdqu %xmm2,(%rsi)
2004 pxor %xmm13,%xmm5
2005 movdqu %xmm3,16(%rsi)
2006 movdqu %xmm4,32(%rsi)
2007 movdqu %xmm5,48(%rsi)
2008 leaq 64(%rsi),%rsi
2009 jmp L$xts_enc_done
2010
2011.p2align 4
2012L$xts_enc_done:
2013 andq $15,%r9
2014 jz L$xts_enc_ret
2015 movq %r9,%rdx
2016
2017L$xts_enc_steal:
2018 movzbl (%rdi),%eax
2019 movzbl -16(%rsi),%ecx
2020 leaq 1(%rdi),%rdi
2021 movb %al,-16(%rsi)
2022 movb %cl,0(%rsi)
2023 leaq 1(%rsi),%rsi
2024 subq $1,%rdx
2025 jnz L$xts_enc_steal
2026
2027 subq %r9,%rsi
2028 movq %rbp,%rcx
2029 movl %r10d,%eax
2030
2031 movups -16(%rsi),%xmm2
2032 xorps %xmm10,%xmm2
2033 movups (%rcx),%xmm0
2034 movups 16(%rcx),%xmm1
2035 leaq 32(%rcx),%rcx
2036 xorps %xmm0,%xmm2
2037L$oop_enc1_10:
2038.byte 102,15,56,220,209
2039 decl %eax
2040 movups (%rcx),%xmm1
2041 leaq 16(%rcx),%rcx
2042 jnz L$oop_enc1_10
2043.byte 102,15,56,221,209
2044 xorps %xmm10,%xmm2
2045 movups %xmm2,-16(%rsi)
2046
2047L$xts_enc_ret:
2048 xorps %xmm0,%xmm0
2049 pxor %xmm1,%xmm1
2050 pxor %xmm2,%xmm2
2051 pxor %xmm3,%xmm3
2052 pxor %xmm4,%xmm4
2053 pxor %xmm5,%xmm5
2054 pxor %xmm6,%xmm6
2055 pxor %xmm7,%xmm7
2056 movaps %xmm0,0(%rsp)
2057 pxor %xmm8,%xmm8
2058 movaps %xmm0,16(%rsp)
2059 pxor %xmm9,%xmm9
2060 movaps %xmm0,32(%rsp)
2061 pxor %xmm10,%xmm10
2062 movaps %xmm0,48(%rsp)
2063 pxor %xmm11,%xmm11
2064 movaps %xmm0,64(%rsp)
2065 pxor %xmm12,%xmm12
2066 movaps %xmm0,80(%rsp)
2067 pxor %xmm13,%xmm13
2068 movaps %xmm0,96(%rsp)
2069 pxor %xmm14,%xmm14
2070 pxor %xmm15,%xmm15
2071 movq -8(%r11),%rbp
2072
2073 leaq (%r11),%rsp
2074
2075L$xts_enc_epilogue:
2076 .byte 0xf3,0xc3
2077
2078
2079.globl _aesni_xts_decrypt
2080
2081.p2align 4
2082_aesni_xts_decrypt:
2083
2084.byte 243,15,30,250
2085 leaq (%rsp),%r11
2086
2087 pushq %rbp
2088
2089 subq $112,%rsp
2090 andq $-16,%rsp
2091 movups (%r9),%xmm2
2092 movl 240(%r8),%eax
2093 movl 240(%rcx),%r10d
2094 movups (%r8),%xmm0
2095 movups 16(%r8),%xmm1
2096 leaq 32(%r8),%r8
2097 xorps %xmm0,%xmm2
2098L$oop_enc1_11:
2099.byte 102,15,56,220,209
2100 decl %eax
2101 movups (%r8),%xmm1
2102 leaq 16(%r8),%r8
2103 jnz L$oop_enc1_11
2104.byte 102,15,56,221,209
2105 xorl %eax,%eax
2106 testq $15,%rdx
2107 setnz %al
2108 shlq $4,%rax
2109 subq %rax,%rdx
2110
2111 movups (%rcx),%xmm0
2112 movq %rcx,%rbp
2113 movl %r10d,%eax
2114 shll $4,%r10d
2115 movq %rdx,%r9
2116 andq $-16,%rdx
2117
2118 movups 16(%rcx,%r10,1),%xmm1
2119
2120 movdqa L$xts_magic(%rip),%xmm8
2121 movdqa %xmm2,%xmm15
2122 pshufd $0x5f,%xmm2,%xmm9
2123 pxor %xmm0,%xmm1
2124 movdqa %xmm9,%xmm14
2125 paddd %xmm9,%xmm9
2126 movdqa %xmm15,%xmm10
2127 psrad $31,%xmm14
2128 paddq %xmm15,%xmm15
2129 pand %xmm8,%xmm14
2130 pxor %xmm0,%xmm10
2131 pxor %xmm14,%xmm15
2132 movdqa %xmm9,%xmm14
2133 paddd %xmm9,%xmm9
2134 movdqa %xmm15,%xmm11
2135 psrad $31,%xmm14
2136 paddq %xmm15,%xmm15
2137 pand %xmm8,%xmm14
2138 pxor %xmm0,%xmm11
2139 pxor %xmm14,%xmm15
2140 movdqa %xmm9,%xmm14
2141 paddd %xmm9,%xmm9
2142 movdqa %xmm15,%xmm12
2143 psrad $31,%xmm14
2144 paddq %xmm15,%xmm15
2145 pand %xmm8,%xmm14
2146 pxor %xmm0,%xmm12
2147 pxor %xmm14,%xmm15
2148 movdqa %xmm9,%xmm14
2149 paddd %xmm9,%xmm9
2150 movdqa %xmm15,%xmm13
2151 psrad $31,%xmm14
2152 paddq %xmm15,%xmm15
2153 pand %xmm8,%xmm14
2154 pxor %xmm0,%xmm13
2155 pxor %xmm14,%xmm15
2156 movdqa %xmm15,%xmm14
2157 psrad $31,%xmm9
2158 paddq %xmm15,%xmm15
2159 pand %xmm8,%xmm9
2160 pxor %xmm0,%xmm14
2161 pxor %xmm9,%xmm15
2162 movaps %xmm1,96(%rsp)
2163
2164 subq $96,%rdx
2165 jc L$xts_dec_short
2166
2167 movl $16+96,%eax
2168 leaq 32(%rbp,%r10,1),%rcx
2169 subq %r10,%rax
2170 movups 16(%rbp),%xmm1
2171 movq %rax,%r10
2172 leaq L$xts_magic(%rip),%r8
2173 jmp L$xts_dec_grandloop
2174
2175.p2align 5
2176L$xts_dec_grandloop:
2177 movdqu 0(%rdi),%xmm2
2178 movdqa %xmm0,%xmm8
2179 movdqu 16(%rdi),%xmm3
2180 pxor %xmm10,%xmm2
2181 movdqu 32(%rdi),%xmm4
2182 pxor %xmm11,%xmm3
2183.byte 102,15,56,222,209
2184 movdqu 48(%rdi),%xmm5
2185 pxor %xmm12,%xmm4
2186.byte 102,15,56,222,217
2187 movdqu 64(%rdi),%xmm6
2188 pxor %xmm13,%xmm5
2189.byte 102,15,56,222,225
2190 movdqu 80(%rdi),%xmm7
2191 pxor %xmm15,%xmm8
2192 movdqa 96(%rsp),%xmm9
2193 pxor %xmm14,%xmm6
2194.byte 102,15,56,222,233
2195 movups 32(%rbp),%xmm0
2196 leaq 96(%rdi),%rdi
2197 pxor %xmm8,%xmm7
2198
2199 pxor %xmm9,%xmm10
2200.byte 102,15,56,222,241
2201 pxor %xmm9,%xmm11
2202 movdqa %xmm10,0(%rsp)
2203.byte 102,15,56,222,249
2204 movups 48(%rbp),%xmm1
2205 pxor %xmm9,%xmm12
2206
2207.byte 102,15,56,222,208
2208 pxor %xmm9,%xmm13
2209 movdqa %xmm11,16(%rsp)
2210.byte 102,15,56,222,216
2211 pxor %xmm9,%xmm14
2212 movdqa %xmm12,32(%rsp)
2213.byte 102,15,56,222,224
2214.byte 102,15,56,222,232
2215 pxor %xmm9,%xmm8
2216 movdqa %xmm14,64(%rsp)
2217.byte 102,15,56,222,240
2218.byte 102,15,56,222,248
2219 movups 64(%rbp),%xmm0
2220 movdqa %xmm8,80(%rsp)
2221 pshufd $0x5f,%xmm15,%xmm9
2222 jmp L$xts_dec_loop6
2223.p2align 5
2224L$xts_dec_loop6:
2225.byte 102,15,56,222,209
2226.byte 102,15,56,222,217
2227.byte 102,15,56,222,225
2228.byte 102,15,56,222,233
2229.byte 102,15,56,222,241
2230.byte 102,15,56,222,249
2231 movups -64(%rcx,%rax,1),%xmm1
2232 addq $32,%rax
2233
2234.byte 102,15,56,222,208
2235.byte 102,15,56,222,216
2236.byte 102,15,56,222,224
2237.byte 102,15,56,222,232
2238.byte 102,15,56,222,240
2239.byte 102,15,56,222,248
2240 movups -80(%rcx,%rax,1),%xmm0
2241 jnz L$xts_dec_loop6
2242
2243 movdqa (%r8),%xmm8
2244 movdqa %xmm9,%xmm14
2245 paddd %xmm9,%xmm9
2246.byte 102,15,56,222,209
2247 paddq %xmm15,%xmm15
2248 psrad $31,%xmm14
2249.byte 102,15,56,222,217
2250 pand %xmm8,%xmm14
2251 movups (%rbp),%xmm10
2252.byte 102,15,56,222,225
2253.byte 102,15,56,222,233
2254.byte 102,15,56,222,241
2255 pxor %xmm14,%xmm15
2256 movaps %xmm10,%xmm11
2257.byte 102,15,56,222,249
2258 movups -64(%rcx),%xmm1
2259
2260 movdqa %xmm9,%xmm14
2261.byte 102,15,56,222,208
2262 paddd %xmm9,%xmm9
2263 pxor %xmm15,%xmm10
2264.byte 102,15,56,222,216
2265 psrad $31,%xmm14
2266 paddq %xmm15,%xmm15
2267.byte 102,15,56,222,224
2268.byte 102,15,56,222,232
2269 pand %xmm8,%xmm14
2270 movaps %xmm11,%xmm12
2271.byte 102,15,56,222,240
2272 pxor %xmm14,%xmm15
2273 movdqa %xmm9,%xmm14
2274.byte 102,15,56,222,248
2275 movups -48(%rcx),%xmm0
2276
2277 paddd %xmm9,%xmm9
2278.byte 102,15,56,222,209
2279 pxor %xmm15,%xmm11
2280 psrad $31,%xmm14
2281.byte 102,15,56,222,217
2282 paddq %xmm15,%xmm15
2283 pand %xmm8,%xmm14
2284.byte 102,15,56,222,225
2285.byte 102,15,56,222,233
2286 movdqa %xmm13,48(%rsp)
2287 pxor %xmm14,%xmm15
2288.byte 102,15,56,222,241
2289 movaps %xmm12,%xmm13
2290 movdqa %xmm9,%xmm14
2291.byte 102,15,56,222,249
2292 movups -32(%rcx),%xmm1
2293
2294 paddd %xmm9,%xmm9
2295.byte 102,15,56,222,208
2296 pxor %xmm15,%xmm12
2297 psrad $31,%xmm14
2298.byte 102,15,56,222,216
2299 paddq %xmm15,%xmm15
2300 pand %xmm8,%xmm14
2301.byte 102,15,56,222,224
2302.byte 102,15,56,222,232
2303.byte 102,15,56,222,240
2304 pxor %xmm14,%xmm15
2305 movaps %xmm13,%xmm14
2306.byte 102,15,56,222,248
2307
2308 movdqa %xmm9,%xmm0
2309 paddd %xmm9,%xmm9
2310.byte 102,15,56,222,209
2311 pxor %xmm15,%xmm13
2312 psrad $31,%xmm0
2313.byte 102,15,56,222,217
2314 paddq %xmm15,%xmm15
2315 pand %xmm8,%xmm0
2316.byte 102,15,56,222,225
2317.byte 102,15,56,222,233
2318 pxor %xmm0,%xmm15
2319 movups (%rbp),%xmm0
2320.byte 102,15,56,222,241
2321.byte 102,15,56,222,249
2322 movups 16(%rbp),%xmm1
2323
2324 pxor %xmm15,%xmm14
2325.byte 102,15,56,223,84,36,0
2326 psrad $31,%xmm9
2327 paddq %xmm15,%xmm15
2328.byte 102,15,56,223,92,36,16
2329.byte 102,15,56,223,100,36,32
2330 pand %xmm8,%xmm9
2331 movq %r10,%rax
2332.byte 102,15,56,223,108,36,48
2333.byte 102,15,56,223,116,36,64
2334.byte 102,15,56,223,124,36,80
2335 pxor %xmm9,%xmm15
2336
2337 leaq 96(%rsi),%rsi
2338 movups %xmm2,-96(%rsi)
2339 movups %xmm3,-80(%rsi)
2340 movups %xmm4,-64(%rsi)
2341 movups %xmm5,-48(%rsi)
2342 movups %xmm6,-32(%rsi)
2343 movups %xmm7,-16(%rsi)
2344 subq $96,%rdx
2345 jnc L$xts_dec_grandloop
2346
2347 movl $16+96,%eax
2348 subl %r10d,%eax
2349 movq %rbp,%rcx
2350 shrl $4,%eax
2351
2352L$xts_dec_short:
2353
2354 movl %eax,%r10d
2355 pxor %xmm0,%xmm10
2356 pxor %xmm0,%xmm11
2357 addq $96,%rdx
2358 jz L$xts_dec_done
2359
2360 pxor %xmm0,%xmm12
2361 cmpq $0x20,%rdx
2362 jb L$xts_dec_one
2363 pxor %xmm0,%xmm13
2364 je L$xts_dec_two
2365
2366 pxor %xmm0,%xmm14
2367 cmpq $0x40,%rdx
2368 jb L$xts_dec_three
2369 je L$xts_dec_four
2370
2371 movdqu (%rdi),%xmm2
2372 movdqu 16(%rdi),%xmm3
2373 movdqu 32(%rdi),%xmm4
2374 pxor %xmm10,%xmm2
2375 movdqu 48(%rdi),%xmm5
2376 pxor %xmm11,%xmm3
2377 movdqu 64(%rdi),%xmm6
2378 leaq 80(%rdi),%rdi
2379 pxor %xmm12,%xmm4
2380 pxor %xmm13,%xmm5
2381 pxor %xmm14,%xmm6
2382
2383 call _aesni_decrypt6
2384
2385 xorps %xmm10,%xmm2
2386 xorps %xmm11,%xmm3
2387 xorps %xmm12,%xmm4
2388 movdqu %xmm2,(%rsi)
2389 xorps %xmm13,%xmm5
2390 movdqu %xmm3,16(%rsi)
2391 xorps %xmm14,%xmm6
2392 movdqu %xmm4,32(%rsi)
2393 pxor %xmm14,%xmm14
2394 movdqu %xmm5,48(%rsi)
2395 pcmpgtd %xmm15,%xmm14
2396 movdqu %xmm6,64(%rsi)
2397 leaq 80(%rsi),%rsi
2398 pshufd $0x13,%xmm14,%xmm11
2399 andq $15,%r9
2400 jz L$xts_dec_ret
2401
2402 movdqa %xmm15,%xmm10
2403 paddq %xmm15,%xmm15
2404 pand %xmm8,%xmm11
2405 pxor %xmm15,%xmm11
2406 jmp L$xts_dec_done2
2407
2408.p2align 4
2409L$xts_dec_one:
2410 movups (%rdi),%xmm2
2411 leaq 16(%rdi),%rdi
2412 xorps %xmm10,%xmm2
2413 movups (%rcx),%xmm0
2414 movups 16(%rcx),%xmm1
2415 leaq 32(%rcx),%rcx
2416 xorps %xmm0,%xmm2
2417L$oop_dec1_12:
2418.byte 102,15,56,222,209
2419 decl %eax
2420 movups (%rcx),%xmm1
2421 leaq 16(%rcx),%rcx
2422 jnz L$oop_dec1_12
2423.byte 102,15,56,223,209
2424 xorps %xmm10,%xmm2
2425 movdqa %xmm11,%xmm10
2426 movups %xmm2,(%rsi)
2427 movdqa %xmm12,%xmm11
2428 leaq 16(%rsi),%rsi
2429 jmp L$xts_dec_done
2430
2431.p2align 4
2432L$xts_dec_two:
2433 movups (%rdi),%xmm2
2434 movups 16(%rdi),%xmm3
2435 leaq 32(%rdi),%rdi
2436 xorps %xmm10,%xmm2
2437 xorps %xmm11,%xmm3
2438
2439 call _aesni_decrypt2
2440
2441 xorps %xmm10,%xmm2
2442 movdqa %xmm12,%xmm10
2443 xorps %xmm11,%xmm3
2444 movdqa %xmm13,%xmm11
2445 movups %xmm2,(%rsi)
2446 movups %xmm3,16(%rsi)
2447 leaq 32(%rsi),%rsi
2448 jmp L$xts_dec_done
2449
2450.p2align 4
2451L$xts_dec_three:
2452 movups (%rdi),%xmm2
2453 movups 16(%rdi),%xmm3
2454 movups 32(%rdi),%xmm4
2455 leaq 48(%rdi),%rdi
2456 xorps %xmm10,%xmm2
2457 xorps %xmm11,%xmm3
2458 xorps %xmm12,%xmm4
2459
2460 call _aesni_decrypt3
2461
2462 xorps %xmm10,%xmm2
2463 movdqa %xmm13,%xmm10
2464 xorps %xmm11,%xmm3
2465 movdqa %xmm14,%xmm11
2466 xorps %xmm12,%xmm4
2467 movups %xmm2,(%rsi)
2468 movups %xmm3,16(%rsi)
2469 movups %xmm4,32(%rsi)
2470 leaq 48(%rsi),%rsi
2471 jmp L$xts_dec_done
2472
2473.p2align 4
2474L$xts_dec_four:
2475 movups (%rdi),%xmm2
2476 movups 16(%rdi),%xmm3
2477 movups 32(%rdi),%xmm4
2478 xorps %xmm10,%xmm2
2479 movups 48(%rdi),%xmm5
2480 leaq 64(%rdi),%rdi
2481 xorps %xmm11,%xmm3
2482 xorps %xmm12,%xmm4
2483 xorps %xmm13,%xmm5
2484
2485 call _aesni_decrypt4
2486
2487 pxor %xmm10,%xmm2
2488 movdqa %xmm14,%xmm10
2489 pxor %xmm11,%xmm3
2490 movdqa %xmm15,%xmm11
2491 pxor %xmm12,%xmm4
2492 movdqu %xmm2,(%rsi)
2493 pxor %xmm13,%xmm5
2494 movdqu %xmm3,16(%rsi)
2495 movdqu %xmm4,32(%rsi)
2496 movdqu %xmm5,48(%rsi)
2497 leaq 64(%rsi),%rsi
2498 jmp L$xts_dec_done
2499
2500.p2align 4
2501L$xts_dec_done:
2502 andq $15,%r9
2503 jz L$xts_dec_ret
2504L$xts_dec_done2:
2505 movq %r9,%rdx
2506 movq %rbp,%rcx
2507 movl %r10d,%eax
2508
2509 movups (%rdi),%xmm2
2510 xorps %xmm11,%xmm2
2511 movups (%rcx),%xmm0
2512 movups 16(%rcx),%xmm1
2513 leaq 32(%rcx),%rcx
2514 xorps %xmm0,%xmm2
2515L$oop_dec1_13:
2516.byte 102,15,56,222,209
2517 decl %eax
2518 movups (%rcx),%xmm1
2519 leaq 16(%rcx),%rcx
2520 jnz L$oop_dec1_13
2521.byte 102,15,56,223,209
2522 xorps %xmm11,%xmm2
2523 movups %xmm2,(%rsi)
2524
2525L$xts_dec_steal:
2526 movzbl 16(%rdi),%eax
2527 movzbl (%rsi),%ecx
2528 leaq 1(%rdi),%rdi
2529 movb %al,(%rsi)
2530 movb %cl,16(%rsi)
2531 leaq 1(%rsi),%rsi
2532 subq $1,%rdx
2533 jnz L$xts_dec_steal
2534
2535 subq %r9,%rsi
2536 movq %rbp,%rcx
2537 movl %r10d,%eax
2538
2539 movups (%rsi),%xmm2
2540 xorps %xmm10,%xmm2
2541 movups (%rcx),%xmm0
2542 movups 16(%rcx),%xmm1
2543 leaq 32(%rcx),%rcx
2544 xorps %xmm0,%xmm2
2545L$oop_dec1_14:
2546.byte 102,15,56,222,209
2547 decl %eax
2548 movups (%rcx),%xmm1
2549 leaq 16(%rcx),%rcx
2550 jnz L$oop_dec1_14
2551.byte 102,15,56,223,209
2552 xorps %xmm10,%xmm2
2553 movups %xmm2,(%rsi)
2554
2555L$xts_dec_ret:
2556 xorps %xmm0,%xmm0
2557 pxor %xmm1,%xmm1
2558 pxor %xmm2,%xmm2
2559 pxor %xmm3,%xmm3
2560 pxor %xmm4,%xmm4
2561 pxor %xmm5,%xmm5
2562 pxor %xmm6,%xmm6
2563 pxor %xmm7,%xmm7
2564 movaps %xmm0,0(%rsp)
2565 pxor %xmm8,%xmm8
2566 movaps %xmm0,16(%rsp)
2567 pxor %xmm9,%xmm9
2568 movaps %xmm0,32(%rsp)
2569 pxor %xmm10,%xmm10
2570 movaps %xmm0,48(%rsp)
2571 pxor %xmm11,%xmm11
2572 movaps %xmm0,64(%rsp)
2573 pxor %xmm12,%xmm12
2574 movaps %xmm0,80(%rsp)
2575 pxor %xmm13,%xmm13
2576 movaps %xmm0,96(%rsp)
2577 pxor %xmm14,%xmm14
2578 pxor %xmm15,%xmm15
2579 movq -8(%r11),%rbp
2580
2581 leaq (%r11),%rsp
2582
2583L$xts_dec_epilogue:
2584 .byte 0xf3,0xc3
2585
2586
2587.globl _aesni_ocb_encrypt
2588
2589.p2align 5
2590_aesni_ocb_encrypt:
2591
2592.byte 243,15,30,250
2593 leaq (%rsp),%rax
2594 pushq %rbx
2595
2596 pushq %rbp
2597
2598 pushq %r12
2599
2600 pushq %r13
2601
2602 pushq %r14
2603
2604 movq 8(%rax),%rbx
2605 movq 8+8(%rax),%rbp
2606
2607 movl 240(%rcx),%r10d
2608 movq %rcx,%r11
2609 shll $4,%r10d
2610 movups (%rcx),%xmm9
2611 movups 16(%rcx,%r10,1),%xmm1
2612
2613 movdqu (%r9),%xmm15
2614 pxor %xmm1,%xmm9
2615 pxor %xmm1,%xmm15
2616
2617 movl $16+32,%eax
2618 leaq 32(%r11,%r10,1),%rcx
2619 movups 16(%r11),%xmm1
2620 subq %r10,%rax
2621 movq %rax,%r10
2622
2623 movdqu (%rbx),%xmm10
2624 movdqu (%rbp),%xmm8
2625
2626 testq $1,%r8
2627 jnz L$ocb_enc_odd
2628
2629 bsfq %r8,%r12
2630 addq $1,%r8
2631 shlq $4,%r12
2632 movdqu (%rbx,%r12,1),%xmm7
2633 movdqu (%rdi),%xmm2
2634 leaq 16(%rdi),%rdi
2635
2636 call __ocb_encrypt1
2637
2638 movdqa %xmm7,%xmm15
2639 movups %xmm2,(%rsi)
2640 leaq 16(%rsi),%rsi
2641 subq $1,%rdx
2642 jz L$ocb_enc_done
2643
2644L$ocb_enc_odd:
2645 leaq 1(%r8),%r12
2646 leaq 3(%r8),%r13
2647 leaq 5(%r8),%r14
2648 leaq 6(%r8),%r8
2649 bsfq %r12,%r12
2650 bsfq %r13,%r13
2651 bsfq %r14,%r14
2652 shlq $4,%r12
2653 shlq $4,%r13
2654 shlq $4,%r14
2655
2656 subq $6,%rdx
2657 jc L$ocb_enc_short
2658 jmp L$ocb_enc_grandloop
2659
2660.p2align 5
2661L$ocb_enc_grandloop:
2662 movdqu 0(%rdi),%xmm2
2663 movdqu 16(%rdi),%xmm3
2664 movdqu 32(%rdi),%xmm4
2665 movdqu 48(%rdi),%xmm5
2666 movdqu 64(%rdi),%xmm6
2667 movdqu 80(%rdi),%xmm7
2668 leaq 96(%rdi),%rdi
2669
2670 call __ocb_encrypt6
2671
2672 movups %xmm2,0(%rsi)
2673 movups %xmm3,16(%rsi)
2674 movups %xmm4,32(%rsi)
2675 movups %xmm5,48(%rsi)
2676 movups %xmm6,64(%rsi)
2677 movups %xmm7,80(%rsi)
2678 leaq 96(%rsi),%rsi
2679 subq $6,%rdx
2680 jnc L$ocb_enc_grandloop
2681
2682L$ocb_enc_short:
2683 addq $6,%rdx
2684 jz L$ocb_enc_done
2685
2686 movdqu 0(%rdi),%xmm2
2687 cmpq $2,%rdx
2688 jb L$ocb_enc_one
2689 movdqu 16(%rdi),%xmm3
2690 je L$ocb_enc_two
2691
2692 movdqu 32(%rdi),%xmm4
2693 cmpq $4,%rdx
2694 jb L$ocb_enc_three
2695 movdqu 48(%rdi),%xmm5
2696 je L$ocb_enc_four
2697
2698 movdqu 64(%rdi),%xmm6
2699 pxor %xmm7,%xmm7
2700
2701 call __ocb_encrypt6
2702
2703 movdqa %xmm14,%xmm15
2704 movups %xmm2,0(%rsi)
2705 movups %xmm3,16(%rsi)
2706 movups %xmm4,32(%rsi)
2707 movups %xmm5,48(%rsi)
2708 movups %xmm6,64(%rsi)
2709
2710 jmp L$ocb_enc_done
2711
2712.p2align 4
2713L$ocb_enc_one:
2714 movdqa %xmm10,%xmm7
2715
2716 call __ocb_encrypt1
2717
2718 movdqa %xmm7,%xmm15
2719 movups %xmm2,0(%rsi)
2720 jmp L$ocb_enc_done
2721
2722.p2align 4
2723L$ocb_enc_two:
2724 pxor %xmm4,%xmm4
2725 pxor %xmm5,%xmm5
2726
2727 call __ocb_encrypt4
2728
2729 movdqa %xmm11,%xmm15
2730 movups %xmm2,0(%rsi)
2731 movups %xmm3,16(%rsi)
2732
2733 jmp L$ocb_enc_done
2734
2735.p2align 4
2736L$ocb_enc_three:
2737 pxor %xmm5,%xmm5
2738
2739 call __ocb_encrypt4
2740
2741 movdqa %xmm12,%xmm15
2742 movups %xmm2,0(%rsi)
2743 movups %xmm3,16(%rsi)
2744 movups %xmm4,32(%rsi)
2745
2746 jmp L$ocb_enc_done
2747
2748.p2align 4
2749L$ocb_enc_four:
2750 call __ocb_encrypt4
2751
2752 movdqa %xmm13,%xmm15
2753 movups %xmm2,0(%rsi)
2754 movups %xmm3,16(%rsi)
2755 movups %xmm4,32(%rsi)
2756 movups %xmm5,48(%rsi)
2757
2758L$ocb_enc_done:
2759 pxor %xmm0,%xmm15
2760 movdqu %xmm8,(%rbp)
2761 movdqu %xmm15,(%r9)
2762
2763 xorps %xmm0,%xmm0
2764 pxor %xmm1,%xmm1
2765 pxor %xmm2,%xmm2
2766 pxor %xmm3,%xmm3
2767 pxor %xmm4,%xmm4
2768 pxor %xmm5,%xmm5
2769 pxor %xmm6,%xmm6
2770 pxor %xmm7,%xmm7
2771 pxor %xmm8,%xmm8
2772 pxor %xmm9,%xmm9
2773 pxor %xmm10,%xmm10
2774 pxor %xmm11,%xmm11
2775 pxor %xmm12,%xmm12
2776 pxor %xmm13,%xmm13
2777 pxor %xmm14,%xmm14
2778 pxor %xmm15,%xmm15
2779 leaq 40(%rsp),%rax
2780
2781 movq -40(%rax),%r14
2782
2783 movq -32(%rax),%r13
2784
2785 movq -24(%rax),%r12
2786
2787 movq -16(%rax),%rbp
2788
2789 movq -8(%rax),%rbx
2790
2791 leaq (%rax),%rsp
2792
2793L$ocb_enc_epilogue:
2794 .byte 0xf3,0xc3
2795
2796
2797
2798
2799.p2align 5
2800__ocb_encrypt6:
2801
2802 pxor %xmm9,%xmm15
2803 movdqu (%rbx,%r12,1),%xmm11
2804 movdqa %xmm10,%xmm12
2805 movdqu (%rbx,%r13,1),%xmm13
2806 movdqa %xmm10,%xmm14
2807 pxor %xmm15,%xmm10
2808 movdqu (%rbx,%r14,1),%xmm15
2809 pxor %xmm10,%xmm11
2810 pxor %xmm2,%xmm8
2811 pxor %xmm10,%xmm2
2812 pxor %xmm11,%xmm12
2813 pxor %xmm3,%xmm8
2814 pxor %xmm11,%xmm3
2815 pxor %xmm12,%xmm13
2816 pxor %xmm4,%xmm8
2817 pxor %xmm12,%xmm4
2818 pxor %xmm13,%xmm14
2819 pxor %xmm5,%xmm8
2820 pxor %xmm13,%xmm5
2821 pxor %xmm14,%xmm15
2822 pxor %xmm6,%xmm8
2823 pxor %xmm14,%xmm6
2824 pxor %xmm7,%xmm8
2825 pxor %xmm15,%xmm7
2826 movups 32(%r11),%xmm0
2827
2828 leaq 1(%r8),%r12
2829 leaq 3(%r8),%r13
2830 leaq 5(%r8),%r14
2831 addq $6,%r8
2832 pxor %xmm9,%xmm10
2833 bsfq %r12,%r12
2834 bsfq %r13,%r13
2835 bsfq %r14,%r14
2836
2837.byte 102,15,56,220,209
2838.byte 102,15,56,220,217
2839.byte 102,15,56,220,225
2840.byte 102,15,56,220,233
2841 pxor %xmm9,%xmm11
2842 pxor %xmm9,%xmm12
2843.byte 102,15,56,220,241
2844 pxor %xmm9,%xmm13
2845 pxor %xmm9,%xmm14
2846.byte 102,15,56,220,249
2847 movups 48(%r11),%xmm1
2848 pxor %xmm9,%xmm15
2849
2850.byte 102,15,56,220,208
2851.byte 102,15,56,220,216
2852.byte 102,15,56,220,224
2853.byte 102,15,56,220,232
2854.byte 102,15,56,220,240
2855.byte 102,15,56,220,248
2856 movups 64(%r11),%xmm0
2857 shlq $4,%r12
2858 shlq $4,%r13
2859 jmp L$ocb_enc_loop6
2860
2861.p2align 5
2862L$ocb_enc_loop6:
2863.byte 102,15,56,220,209
2864.byte 102,15,56,220,217
2865.byte 102,15,56,220,225
2866.byte 102,15,56,220,233
2867.byte 102,15,56,220,241
2868.byte 102,15,56,220,249
2869 movups (%rcx,%rax,1),%xmm1
2870 addq $32,%rax
2871
2872.byte 102,15,56,220,208
2873.byte 102,15,56,220,216
2874.byte 102,15,56,220,224
2875.byte 102,15,56,220,232
2876.byte 102,15,56,220,240
2877.byte 102,15,56,220,248
2878 movups -16(%rcx,%rax,1),%xmm0
2879 jnz L$ocb_enc_loop6
2880
2881.byte 102,15,56,220,209
2882.byte 102,15,56,220,217
2883.byte 102,15,56,220,225
2884.byte 102,15,56,220,233
2885.byte 102,15,56,220,241
2886.byte 102,15,56,220,249
2887 movups 16(%r11),%xmm1
2888 shlq $4,%r14
2889
2890.byte 102,65,15,56,221,210
2891 movdqu (%rbx),%xmm10
2892 movq %r10,%rax
2893.byte 102,65,15,56,221,219
2894.byte 102,65,15,56,221,228
2895.byte 102,65,15,56,221,237
2896.byte 102,65,15,56,221,246
2897.byte 102,65,15,56,221,255
2898 .byte 0xf3,0xc3
2899
2900
2901
2902
2903.p2align 5
2904__ocb_encrypt4:
2905
2906 pxor %xmm9,%xmm15
2907 movdqu (%rbx,%r12,1),%xmm11
2908 movdqa %xmm10,%xmm12
2909 movdqu (%rbx,%r13,1),%xmm13
2910 pxor %xmm15,%xmm10
2911 pxor %xmm10,%xmm11
2912 pxor %xmm2,%xmm8
2913 pxor %xmm10,%xmm2
2914 pxor %xmm11,%xmm12
2915 pxor %xmm3,%xmm8
2916 pxor %xmm11,%xmm3
2917 pxor %xmm12,%xmm13
2918 pxor %xmm4,%xmm8
2919 pxor %xmm12,%xmm4
2920 pxor %xmm5,%xmm8
2921 pxor %xmm13,%xmm5
2922 movups 32(%r11),%xmm0
2923
2924 pxor %xmm9,%xmm10
2925 pxor %xmm9,%xmm11
2926 pxor %xmm9,%xmm12
2927 pxor %xmm9,%xmm13
2928
2929.byte 102,15,56,220,209
2930.byte 102,15,56,220,217
2931.byte 102,15,56,220,225
2932.byte 102,15,56,220,233
2933 movups 48(%r11),%xmm1
2934
2935.byte 102,15,56,220,208
2936.byte 102,15,56,220,216
2937.byte 102,15,56,220,224
2938.byte 102,15,56,220,232
2939 movups 64(%r11),%xmm0
2940 jmp L$ocb_enc_loop4
2941
2942.p2align 5
2943L$ocb_enc_loop4:
2944.byte 102,15,56,220,209
2945.byte 102,15,56,220,217
2946.byte 102,15,56,220,225
2947.byte 102,15,56,220,233
2948 movups (%rcx,%rax,1),%xmm1
2949 addq $32,%rax
2950
2951.byte 102,15,56,220,208
2952.byte 102,15,56,220,216
2953.byte 102,15,56,220,224
2954.byte 102,15,56,220,232
2955 movups -16(%rcx,%rax,1),%xmm0
2956 jnz L$ocb_enc_loop4
2957
2958.byte 102,15,56,220,209
2959.byte 102,15,56,220,217
2960.byte 102,15,56,220,225
2961.byte 102,15,56,220,233
2962 movups 16(%r11),%xmm1
2963 movq %r10,%rax
2964
2965.byte 102,65,15,56,221,210
2966.byte 102,65,15,56,221,219
2967.byte 102,65,15,56,221,228
2968.byte 102,65,15,56,221,237
2969 .byte 0xf3,0xc3
2970
2971
2972
2973
2974.p2align 5
2975__ocb_encrypt1:
2976
2977 pxor %xmm15,%xmm7
2978 pxor %xmm9,%xmm7
2979 pxor %xmm2,%xmm8
2980 pxor %xmm7,%xmm2
2981 movups 32(%r11),%xmm0
2982
2983.byte 102,15,56,220,209
2984 movups 48(%r11),%xmm1
2985 pxor %xmm9,%xmm7
2986
2987.byte 102,15,56,220,208
2988 movups 64(%r11),%xmm0
2989 jmp L$ocb_enc_loop1
2990
2991.p2align 5
2992L$ocb_enc_loop1:
2993.byte 102,15,56,220,209
2994 movups (%rcx,%rax,1),%xmm1
2995 addq $32,%rax
2996
2997.byte 102,15,56,220,208
2998 movups -16(%rcx,%rax,1),%xmm0
2999 jnz L$ocb_enc_loop1
3000
3001.byte 102,15,56,220,209
3002 movups 16(%r11),%xmm1
3003 movq %r10,%rax
3004
3005.byte 102,15,56,221,215
3006 .byte 0xf3,0xc3
3007
3008
3009
3010.globl _aesni_ocb_decrypt
3011
3012.p2align 5
3013_aesni_ocb_decrypt:
3014
3015.byte 243,15,30,250
3016 leaq (%rsp),%rax
3017 pushq %rbx
3018
3019 pushq %rbp
3020
3021 pushq %r12
3022
3023 pushq %r13
3024
3025 pushq %r14
3026
3027 movq 8(%rax),%rbx
3028 movq 8+8(%rax),%rbp
3029
3030 movl 240(%rcx),%r10d
3031 movq %rcx,%r11
3032 shll $4,%r10d
3033 movups (%rcx),%xmm9
3034 movups 16(%rcx,%r10,1),%xmm1
3035
3036 movdqu (%r9),%xmm15
3037 pxor %xmm1,%xmm9
3038 pxor %xmm1,%xmm15
3039
3040 movl $16+32,%eax
3041 leaq 32(%r11,%r10,1),%rcx
3042 movups 16(%r11),%xmm1
3043 subq %r10,%rax
3044 movq %rax,%r10
3045
3046 movdqu (%rbx),%xmm10
3047 movdqu (%rbp),%xmm8
3048
3049 testq $1,%r8
3050 jnz L$ocb_dec_odd
3051
3052 bsfq %r8,%r12
3053 addq $1,%r8
3054 shlq $4,%r12
3055 movdqu (%rbx,%r12,1),%xmm7
3056 movdqu (%rdi),%xmm2
3057 leaq 16(%rdi),%rdi
3058
3059 call __ocb_decrypt1
3060
3061 movdqa %xmm7,%xmm15
3062 movups %xmm2,(%rsi)
3063 xorps %xmm2,%xmm8
3064 leaq 16(%rsi),%rsi
3065 subq $1,%rdx
3066 jz L$ocb_dec_done
3067
3068L$ocb_dec_odd:
3069 leaq 1(%r8),%r12
3070 leaq 3(%r8),%r13
3071 leaq 5(%r8),%r14
3072 leaq 6(%r8),%r8
3073 bsfq %r12,%r12
3074 bsfq %r13,%r13
3075 bsfq %r14,%r14
3076 shlq $4,%r12
3077 shlq $4,%r13
3078 shlq $4,%r14
3079
3080 subq $6,%rdx
3081 jc L$ocb_dec_short
3082 jmp L$ocb_dec_grandloop
3083
3084.p2align 5
3085L$ocb_dec_grandloop:
3086 movdqu 0(%rdi),%xmm2
3087 movdqu 16(%rdi),%xmm3
3088 movdqu 32(%rdi),%xmm4
3089 movdqu 48(%rdi),%xmm5
3090 movdqu 64(%rdi),%xmm6
3091 movdqu 80(%rdi),%xmm7
3092 leaq 96(%rdi),%rdi
3093
3094 call __ocb_decrypt6
3095
3096 movups %xmm2,0(%rsi)
3097 pxor %xmm2,%xmm8
3098 movups %xmm3,16(%rsi)
3099 pxor %xmm3,%xmm8
3100 movups %xmm4,32(%rsi)
3101 pxor %xmm4,%xmm8
3102 movups %xmm5,48(%rsi)
3103 pxor %xmm5,%xmm8
3104 movups %xmm6,64(%rsi)
3105 pxor %xmm6,%xmm8
3106 movups %xmm7,80(%rsi)
3107 pxor %xmm7,%xmm8
3108 leaq 96(%rsi),%rsi
3109 subq $6,%rdx
3110 jnc L$ocb_dec_grandloop
3111
3112L$ocb_dec_short:
3113 addq $6,%rdx
3114 jz L$ocb_dec_done
3115
3116 movdqu 0(%rdi),%xmm2
3117 cmpq $2,%rdx
3118 jb L$ocb_dec_one
3119 movdqu 16(%rdi),%xmm3
3120 je L$ocb_dec_two
3121
3122 movdqu 32(%rdi),%xmm4
3123 cmpq $4,%rdx
3124 jb L$ocb_dec_three
3125 movdqu 48(%rdi),%xmm5
3126 je L$ocb_dec_four
3127
3128 movdqu 64(%rdi),%xmm6
3129 pxor %xmm7,%xmm7
3130
3131 call __ocb_decrypt6
3132
3133 movdqa %xmm14,%xmm15
3134 movups %xmm2,0(%rsi)
3135 pxor %xmm2,%xmm8
3136 movups %xmm3,16(%rsi)
3137 pxor %xmm3,%xmm8
3138 movups %xmm4,32(%rsi)
3139 pxor %xmm4,%xmm8
3140 movups %xmm5,48(%rsi)
3141 pxor %xmm5,%xmm8
3142 movups %xmm6,64(%rsi)
3143 pxor %xmm6,%xmm8
3144
3145 jmp L$ocb_dec_done
3146
3147.p2align 4
3148L$ocb_dec_one:
3149 movdqa %xmm10,%xmm7
3150
3151 call __ocb_decrypt1
3152
3153 movdqa %xmm7,%xmm15
3154 movups %xmm2,0(%rsi)
3155 xorps %xmm2,%xmm8
3156 jmp L$ocb_dec_done
3157
3158.p2align 4
3159L$ocb_dec_two:
3160 pxor %xmm4,%xmm4
3161 pxor %xmm5,%xmm5
3162
3163 call __ocb_decrypt4
3164
3165 movdqa %xmm11,%xmm15
3166 movups %xmm2,0(%rsi)
3167 xorps %xmm2,%xmm8
3168 movups %xmm3,16(%rsi)
3169 xorps %xmm3,%xmm8
3170
3171 jmp L$ocb_dec_done
3172
3173.p2align 4
3174L$ocb_dec_three:
3175 pxor %xmm5,%xmm5
3176
3177 call __ocb_decrypt4
3178
3179 movdqa %xmm12,%xmm15
3180 movups %xmm2,0(%rsi)
3181 xorps %xmm2,%xmm8
3182 movups %xmm3,16(%rsi)
3183 xorps %xmm3,%xmm8
3184 movups %xmm4,32(%rsi)
3185 xorps %xmm4,%xmm8
3186
3187 jmp L$ocb_dec_done
3188
3189.p2align 4
3190L$ocb_dec_four:
3191 call __ocb_decrypt4
3192
3193 movdqa %xmm13,%xmm15
3194 movups %xmm2,0(%rsi)
3195 pxor %xmm2,%xmm8
3196 movups %xmm3,16(%rsi)
3197 pxor %xmm3,%xmm8
3198 movups %xmm4,32(%rsi)
3199 pxor %xmm4,%xmm8
3200 movups %xmm5,48(%rsi)
3201 pxor %xmm5,%xmm8
3202
3203L$ocb_dec_done:
3204 pxor %xmm0,%xmm15
3205 movdqu %xmm8,(%rbp)
3206 movdqu %xmm15,(%r9)
3207
3208 xorps %xmm0,%xmm0
3209 pxor %xmm1,%xmm1
3210 pxor %xmm2,%xmm2
3211 pxor %xmm3,%xmm3
3212 pxor %xmm4,%xmm4
3213 pxor %xmm5,%xmm5
3214 pxor %xmm6,%xmm6
3215 pxor %xmm7,%xmm7
3216 pxor %xmm8,%xmm8
3217 pxor %xmm9,%xmm9
3218 pxor %xmm10,%xmm10
3219 pxor %xmm11,%xmm11
3220 pxor %xmm12,%xmm12
3221 pxor %xmm13,%xmm13
3222 pxor %xmm14,%xmm14
3223 pxor %xmm15,%xmm15
3224 leaq 40(%rsp),%rax
3225
3226 movq -40(%rax),%r14
3227
3228 movq -32(%rax),%r13
3229
3230 movq -24(%rax),%r12
3231
3232 movq -16(%rax),%rbp
3233
3234 movq -8(%rax),%rbx
3235
3236 leaq (%rax),%rsp
3237
3238L$ocb_dec_epilogue:
3239 .byte 0xf3,0xc3
3240
3241
3242
3243
3244.p2align 5
3245__ocb_decrypt6:
3246
3247 pxor %xmm9,%xmm15
3248 movdqu (%rbx,%r12,1),%xmm11
3249 movdqa %xmm10,%xmm12
3250 movdqu (%rbx,%r13,1),%xmm13
3251 movdqa %xmm10,%xmm14
3252 pxor %xmm15,%xmm10
3253 movdqu (%rbx,%r14,1),%xmm15
3254 pxor %xmm10,%xmm11
3255 pxor %xmm10,%xmm2
3256 pxor %xmm11,%xmm12
3257 pxor %xmm11,%xmm3
3258 pxor %xmm12,%xmm13
3259 pxor %xmm12,%xmm4
3260 pxor %xmm13,%xmm14
3261 pxor %xmm13,%xmm5
3262 pxor %xmm14,%xmm15
3263 pxor %xmm14,%xmm6
3264 pxor %xmm15,%xmm7
3265 movups 32(%r11),%xmm0
3266
3267 leaq 1(%r8),%r12
3268 leaq 3(%r8),%r13
3269 leaq 5(%r8),%r14
3270 addq $6,%r8
3271 pxor %xmm9,%xmm10
3272 bsfq %r12,%r12
3273 bsfq %r13,%r13
3274 bsfq %r14,%r14
3275
3276.byte 102,15,56,222,209
3277.byte 102,15,56,222,217
3278.byte 102,15,56,222,225
3279.byte 102,15,56,222,233
3280 pxor %xmm9,%xmm11
3281 pxor %xmm9,%xmm12
3282.byte 102,15,56,222,241
3283 pxor %xmm9,%xmm13
3284 pxor %xmm9,%xmm14
3285.byte 102,15,56,222,249
3286 movups 48(%r11),%xmm1
3287 pxor %xmm9,%xmm15
3288
3289.byte 102,15,56,222,208
3290.byte 102,15,56,222,216
3291.byte 102,15,56,222,224
3292.byte 102,15,56,222,232
3293.byte 102,15,56,222,240
3294.byte 102,15,56,222,248
3295 movups 64(%r11),%xmm0
3296 shlq $4,%r12
3297 shlq $4,%r13
3298 jmp L$ocb_dec_loop6
3299
3300.p2align 5
3301L$ocb_dec_loop6:
3302.byte 102,15,56,222,209
3303.byte 102,15,56,222,217
3304.byte 102,15,56,222,225
3305.byte 102,15,56,222,233
3306.byte 102,15,56,222,241
3307.byte 102,15,56,222,249
3308 movups (%rcx,%rax,1),%xmm1
3309 addq $32,%rax
3310
3311.byte 102,15,56,222,208
3312.byte 102,15,56,222,216
3313.byte 102,15,56,222,224
3314.byte 102,15,56,222,232
3315.byte 102,15,56,222,240
3316.byte 102,15,56,222,248
3317 movups -16(%rcx,%rax,1),%xmm0
3318 jnz L$ocb_dec_loop6
3319
3320.byte 102,15,56,222,209
3321.byte 102,15,56,222,217
3322.byte 102,15,56,222,225
3323.byte 102,15,56,222,233
3324.byte 102,15,56,222,241
3325.byte 102,15,56,222,249
3326 movups 16(%r11),%xmm1
3327 shlq $4,%r14
3328
3329.byte 102,65,15,56,223,210
3330 movdqu (%rbx),%xmm10
3331 movq %r10,%rax
3332.byte 102,65,15,56,223,219
3333.byte 102,65,15,56,223,228
3334.byte 102,65,15,56,223,237
3335.byte 102,65,15,56,223,246
3336.byte 102,65,15,56,223,255
3337 .byte 0xf3,0xc3
3338
3339
3340
3341
3342.p2align 5
3343__ocb_decrypt4:
3344
3345 pxor %xmm9,%xmm15
3346 movdqu (%rbx,%r12,1),%xmm11
3347 movdqa %xmm10,%xmm12
3348 movdqu (%rbx,%r13,1),%xmm13
3349 pxor %xmm15,%xmm10
3350 pxor %xmm10,%xmm11
3351 pxor %xmm10,%xmm2
3352 pxor %xmm11,%xmm12
3353 pxor %xmm11,%xmm3
3354 pxor %xmm12,%xmm13
3355 pxor %xmm12,%xmm4
3356 pxor %xmm13,%xmm5
3357 movups 32(%r11),%xmm0
3358
3359 pxor %xmm9,%xmm10
3360 pxor %xmm9,%xmm11
3361 pxor %xmm9,%xmm12
3362 pxor %xmm9,%xmm13
3363
3364.byte 102,15,56,222,209
3365.byte 102,15,56,222,217
3366.byte 102,15,56,222,225
3367.byte 102,15,56,222,233
3368 movups 48(%r11),%xmm1
3369
3370.byte 102,15,56,222,208
3371.byte 102,15,56,222,216
3372.byte 102,15,56,222,224
3373.byte 102,15,56,222,232
3374 movups 64(%r11),%xmm0
3375 jmp L$ocb_dec_loop4
3376
3377.p2align 5
3378L$ocb_dec_loop4:
3379.byte 102,15,56,222,209
3380.byte 102,15,56,222,217
3381.byte 102,15,56,222,225
3382.byte 102,15,56,222,233
3383 movups (%rcx,%rax,1),%xmm1
3384 addq $32,%rax
3385
3386.byte 102,15,56,222,208
3387.byte 102,15,56,222,216
3388.byte 102,15,56,222,224
3389.byte 102,15,56,222,232
3390 movups -16(%rcx,%rax,1),%xmm0
3391 jnz L$ocb_dec_loop4
3392
3393.byte 102,15,56,222,209
3394.byte 102,15,56,222,217
3395.byte 102,15,56,222,225
3396.byte 102,15,56,222,233
3397 movups 16(%r11),%xmm1
3398 movq %r10,%rax
3399
3400.byte 102,65,15,56,223,210
3401.byte 102,65,15,56,223,219
3402.byte 102,65,15,56,223,228
3403.byte 102,65,15,56,223,237
3404 .byte 0xf3,0xc3
3405
3406
3407
3408
3409.p2align 5
3410__ocb_decrypt1:
3411
3412 pxor %xmm15,%xmm7
3413 pxor %xmm9,%xmm7
3414 pxor %xmm7,%xmm2
3415 movups 32(%r11),%xmm0
3416
3417.byte 102,15,56,222,209
3418 movups 48(%r11),%xmm1
3419 pxor %xmm9,%xmm7
3420
3421.byte 102,15,56,222,208
3422 movups 64(%r11),%xmm0
3423 jmp L$ocb_dec_loop1
3424
3425.p2align 5
3426L$ocb_dec_loop1:
3427.byte 102,15,56,222,209
3428 movups (%rcx,%rax,1),%xmm1
3429 addq $32,%rax
3430
3431.byte 102,15,56,222,208
3432 movups -16(%rcx,%rax,1),%xmm0
3433 jnz L$ocb_dec_loop1
3434
3435.byte 102,15,56,222,209
3436 movups 16(%r11),%xmm1
3437 movq %r10,%rax
3438
3439.byte 102,15,56,223,215
3440 .byte 0xf3,0xc3
3441
3442
3443.globl _aesni_cbc_encrypt
3444
3445.p2align 4
3446_aesni_cbc_encrypt:
3447
3448.byte 243,15,30,250
3449 testq %rdx,%rdx
3450 jz L$cbc_ret
3451
3452 movl 240(%rcx),%r10d
3453 movq %rcx,%r11
3454 testl %r9d,%r9d
3455 jz L$cbc_decrypt
3456
3457 movups (%r8),%xmm2
3458 movl %r10d,%eax
3459 cmpq $16,%rdx
3460 jb L$cbc_enc_tail
3461 subq $16,%rdx
3462 jmp L$cbc_enc_loop
3463.p2align 4
3464L$cbc_enc_loop:
3465 movups (%rdi),%xmm3
3466 leaq 16(%rdi),%rdi
3467
3468 movups (%rcx),%xmm0
3469 movups 16(%rcx),%xmm1
3470 xorps %xmm0,%xmm3
3471 leaq 32(%rcx),%rcx
3472 xorps %xmm3,%xmm2
3473L$oop_enc1_15:
3474.byte 102,15,56,220,209
3475 decl %eax
3476 movups (%rcx),%xmm1
3477 leaq 16(%rcx),%rcx
3478 jnz L$oop_enc1_15
3479.byte 102,15,56,221,209
3480 movl %r10d,%eax
3481 movq %r11,%rcx
3482 movups %xmm2,0(%rsi)
3483 leaq 16(%rsi),%rsi
3484 subq $16,%rdx
3485 jnc L$cbc_enc_loop
3486 addq $16,%rdx
3487 jnz L$cbc_enc_tail
3488 pxor %xmm0,%xmm0
3489 pxor %xmm1,%xmm1
3490 movups %xmm2,(%r8)
3491 pxor %xmm2,%xmm2
3492 pxor %xmm3,%xmm3
3493 jmp L$cbc_ret
3494
3495L$cbc_enc_tail:
3496 movq %rdx,%rcx
3497 xchgq %rdi,%rsi
3498.long 0x9066A4F3
3499 movl $16,%ecx
3500 subq %rdx,%rcx
3501 xorl %eax,%eax
3502.long 0x9066AAF3
3503 leaq -16(%rdi),%rdi
3504 movl %r10d,%eax
3505 movq %rdi,%rsi
3506 movq %r11,%rcx
3507 xorq %rdx,%rdx
3508 jmp L$cbc_enc_loop
3509
3510.p2align 4
3511L$cbc_decrypt:
3512 cmpq $16,%rdx
3513 jne L$cbc_decrypt_bulk
3514
3515
3516
3517 movdqu (%rdi),%xmm2
3518 movdqu (%r8),%xmm3
3519 movdqa %xmm2,%xmm4
3520 movups (%rcx),%xmm0
3521 movups 16(%rcx),%xmm1
3522 leaq 32(%rcx),%rcx
3523 xorps %xmm0,%xmm2
3524L$oop_dec1_16:
3525.byte 102,15,56,222,209
3526 decl %r10d
3527 movups (%rcx),%xmm1
3528 leaq 16(%rcx),%rcx
3529 jnz L$oop_dec1_16
3530.byte 102,15,56,223,209
3531 pxor %xmm0,%xmm0
3532 pxor %xmm1,%xmm1
3533 movdqu %xmm4,(%r8)
3534 xorps %xmm3,%xmm2
3535 pxor %xmm3,%xmm3
3536 movups %xmm2,(%rsi)
3537 pxor %xmm2,%xmm2
3538 jmp L$cbc_ret
3539.p2align 4
3540L$cbc_decrypt_bulk:
3541 leaq (%rsp),%r11
3542
3543 pushq %rbp
3544
3545 subq $16,%rsp
3546 andq $-16,%rsp
3547 movq %rcx,%rbp
3548 movups (%r8),%xmm10
3549 movl %r10d,%eax
3550 cmpq $0x50,%rdx
3551 jbe L$cbc_dec_tail
3552
3553 movups (%rcx),%xmm0
3554 movdqu 0(%rdi),%xmm2
3555 movdqu 16(%rdi),%xmm3
3556 movdqa %xmm2,%xmm11
3557 movdqu 32(%rdi),%xmm4
3558 movdqa %xmm3,%xmm12
3559 movdqu 48(%rdi),%xmm5
3560 movdqa %xmm4,%xmm13
3561 movdqu 64(%rdi),%xmm6
3562 movdqa %xmm5,%xmm14
3563 movdqu 80(%rdi),%xmm7
3564 movdqa %xmm6,%xmm15
3565 movl _OPENSSL_ia32cap_P+4(%rip),%r9d
3566 cmpq $0x70,%rdx
3567 jbe L$cbc_dec_six_or_seven
3568
3569 andl $71303168,%r9d
3570 subq $0x50,%rdx
3571 cmpl $4194304,%r9d
3572 je L$cbc_dec_loop6_enter
3573 subq $0x20,%rdx
3574 leaq 112(%rcx),%rcx
3575 jmp L$cbc_dec_loop8_enter
3576.p2align 4
3577L$cbc_dec_loop8:
3578 movups %xmm9,(%rsi)
3579 leaq 16(%rsi),%rsi
3580L$cbc_dec_loop8_enter:
3581 movdqu 96(%rdi),%xmm8
3582 pxor %xmm0,%xmm2
3583 movdqu 112(%rdi),%xmm9
3584 pxor %xmm0,%xmm3
3585 movups 16-112(%rcx),%xmm1
3586 pxor %xmm0,%xmm4
3587 movq $-1,%rbp
3588 cmpq $0x70,%rdx
3589 pxor %xmm0,%xmm5
3590 pxor %xmm0,%xmm6
3591 pxor %xmm0,%xmm7
3592 pxor %xmm0,%xmm8
3593
3594.byte 102,15,56,222,209
3595 pxor %xmm0,%xmm9
3596 movups 32-112(%rcx),%xmm0
3597.byte 102,15,56,222,217
3598.byte 102,15,56,222,225
3599.byte 102,15,56,222,233
3600.byte 102,15,56,222,241
3601.byte 102,15,56,222,249
3602.byte 102,68,15,56,222,193
3603 adcq $0,%rbp
3604 andq $128,%rbp
3605.byte 102,68,15,56,222,201
3606 addq %rdi,%rbp
3607 movups 48-112(%rcx),%xmm1
3608.byte 102,15,56,222,208
3609.byte 102,15,56,222,216
3610.byte 102,15,56,222,224
3611.byte 102,15,56,222,232
3612.byte 102,15,56,222,240
3613.byte 102,15,56,222,248
3614.byte 102,68,15,56,222,192
3615.byte 102,68,15,56,222,200
3616 movups 64-112(%rcx),%xmm0
3617 nop
3618.byte 102,15,56,222,209
3619.byte 102,15,56,222,217
3620.byte 102,15,56,222,225
3621.byte 102,15,56,222,233
3622.byte 102,15,56,222,241
3623.byte 102,15,56,222,249
3624.byte 102,68,15,56,222,193
3625.byte 102,68,15,56,222,201
3626 movups 80-112(%rcx),%xmm1
3627 nop
3628.byte 102,15,56,222,208
3629.byte 102,15,56,222,216
3630.byte 102,15,56,222,224
3631.byte 102,15,56,222,232
3632.byte 102,15,56,222,240
3633.byte 102,15,56,222,248
3634.byte 102,68,15,56,222,192
3635.byte 102,68,15,56,222,200
3636 movups 96-112(%rcx),%xmm0
3637 nop
3638.byte 102,15,56,222,209
3639.byte 102,15,56,222,217
3640.byte 102,15,56,222,225
3641.byte 102,15,56,222,233
3642.byte 102,15,56,222,241
3643.byte 102,15,56,222,249
3644.byte 102,68,15,56,222,193
3645.byte 102,68,15,56,222,201
3646 movups 112-112(%rcx),%xmm1
3647 nop
3648.byte 102,15,56,222,208
3649.byte 102,15,56,222,216
3650.byte 102,15,56,222,224
3651.byte 102,15,56,222,232
3652.byte 102,15,56,222,240
3653.byte 102,15,56,222,248
3654.byte 102,68,15,56,222,192
3655.byte 102,68,15,56,222,200
3656 movups 128-112(%rcx),%xmm0
3657 nop
3658.byte 102,15,56,222,209
3659.byte 102,15,56,222,217
3660.byte 102,15,56,222,225
3661.byte 102,15,56,222,233
3662.byte 102,15,56,222,241
3663.byte 102,15,56,222,249
3664.byte 102,68,15,56,222,193
3665.byte 102,68,15,56,222,201
3666 movups 144-112(%rcx),%xmm1
3667 cmpl $11,%eax
3668.byte 102,15,56,222,208
3669.byte 102,15,56,222,216
3670.byte 102,15,56,222,224
3671.byte 102,15,56,222,232
3672.byte 102,15,56,222,240
3673.byte 102,15,56,222,248
3674.byte 102,68,15,56,222,192
3675.byte 102,68,15,56,222,200
3676 movups 160-112(%rcx),%xmm0
3677 jb L$cbc_dec_done
3678.byte 102,15,56,222,209
3679.byte 102,15,56,222,217
3680.byte 102,15,56,222,225
3681.byte 102,15,56,222,233
3682.byte 102,15,56,222,241
3683.byte 102,15,56,222,249
3684.byte 102,68,15,56,222,193
3685.byte 102,68,15,56,222,201
3686 movups 176-112(%rcx),%xmm1
3687 nop
3688.byte 102,15,56,222,208
3689.byte 102,15,56,222,216
3690.byte 102,15,56,222,224
3691.byte 102,15,56,222,232
3692.byte 102,15,56,222,240
3693.byte 102,15,56,222,248
3694.byte 102,68,15,56,222,192
3695.byte 102,68,15,56,222,200
3696 movups 192-112(%rcx),%xmm0
3697 je L$cbc_dec_done
3698.byte 102,15,56,222,209
3699.byte 102,15,56,222,217
3700.byte 102,15,56,222,225
3701.byte 102,15,56,222,233
3702.byte 102,15,56,222,241
3703.byte 102,15,56,222,249
3704.byte 102,68,15,56,222,193
3705.byte 102,68,15,56,222,201
3706 movups 208-112(%rcx),%xmm1
3707 nop
3708.byte 102,15,56,222,208
3709.byte 102,15,56,222,216
3710.byte 102,15,56,222,224
3711.byte 102,15,56,222,232
3712.byte 102,15,56,222,240
3713.byte 102,15,56,222,248
3714.byte 102,68,15,56,222,192
3715.byte 102,68,15,56,222,200
3716 movups 224-112(%rcx),%xmm0
3717 jmp L$cbc_dec_done
3718.p2align 4
3719L$cbc_dec_done:
3720.byte 102,15,56,222,209
3721.byte 102,15,56,222,217
3722 pxor %xmm0,%xmm10
3723 pxor %xmm0,%xmm11
3724.byte 102,15,56,222,225
3725.byte 102,15,56,222,233
3726 pxor %xmm0,%xmm12
3727 pxor %xmm0,%xmm13
3728.byte 102,15,56,222,241
3729.byte 102,15,56,222,249
3730 pxor %xmm0,%xmm14
3731 pxor %xmm0,%xmm15
3732.byte 102,68,15,56,222,193
3733.byte 102,68,15,56,222,201
3734 movdqu 80(%rdi),%xmm1
3735
3736.byte 102,65,15,56,223,210
3737 movdqu 96(%rdi),%xmm10
3738 pxor %xmm0,%xmm1
3739.byte 102,65,15,56,223,219
3740 pxor %xmm0,%xmm10
3741 movdqu 112(%rdi),%xmm0
3742.byte 102,65,15,56,223,228
3743 leaq 128(%rdi),%rdi
3744 movdqu 0(%rbp),%xmm11
3745.byte 102,65,15,56,223,237
3746.byte 102,65,15,56,223,246
3747 movdqu 16(%rbp),%xmm12
3748 movdqu 32(%rbp),%xmm13
3749.byte 102,65,15,56,223,255
3750.byte 102,68,15,56,223,193
3751 movdqu 48(%rbp),%xmm14
3752 movdqu 64(%rbp),%xmm15
3753.byte 102,69,15,56,223,202
3754 movdqa %xmm0,%xmm10
3755 movdqu 80(%rbp),%xmm1
3756 movups -112(%rcx),%xmm0
3757
3758 movups %xmm2,(%rsi)
3759 movdqa %xmm11,%xmm2
3760 movups %xmm3,16(%rsi)
3761 movdqa %xmm12,%xmm3
3762 movups %xmm4,32(%rsi)
3763 movdqa %xmm13,%xmm4
3764 movups %xmm5,48(%rsi)
3765 movdqa %xmm14,%xmm5
3766 movups %xmm6,64(%rsi)
3767 movdqa %xmm15,%xmm6
3768 movups %xmm7,80(%rsi)
3769 movdqa %xmm1,%xmm7
3770 movups %xmm8,96(%rsi)
3771 leaq 112(%rsi),%rsi
3772
3773 subq $0x80,%rdx
3774 ja L$cbc_dec_loop8
3775
3776 movaps %xmm9,%xmm2
3777 leaq -112(%rcx),%rcx
3778 addq $0x70,%rdx
3779 jle L$cbc_dec_clear_tail_collected
3780 movups %xmm9,(%rsi)
3781 leaq 16(%rsi),%rsi
3782 cmpq $0x50,%rdx
3783 jbe L$cbc_dec_tail
3784
3785 movaps %xmm11,%xmm2
3786L$cbc_dec_six_or_seven:
3787 cmpq $0x60,%rdx
3788 ja L$cbc_dec_seven
3789
3790 movaps %xmm7,%xmm8
3791 call _aesni_decrypt6
3792 pxor %xmm10,%xmm2
3793 movaps %xmm8,%xmm10
3794 pxor %xmm11,%xmm3
3795 movdqu %xmm2,(%rsi)
3796 pxor %xmm12,%xmm4
3797 movdqu %xmm3,16(%rsi)
3798 pxor %xmm3,%xmm3
3799 pxor %xmm13,%xmm5
3800 movdqu %xmm4,32(%rsi)
3801 pxor %xmm4,%xmm4
3802 pxor %xmm14,%xmm6
3803 movdqu %xmm5,48(%rsi)
3804 pxor %xmm5,%xmm5
3805 pxor %xmm15,%xmm7
3806 movdqu %xmm6,64(%rsi)
3807 pxor %xmm6,%xmm6
3808 leaq 80(%rsi),%rsi
3809 movdqa %xmm7,%xmm2
3810 pxor %xmm7,%xmm7
3811 jmp L$cbc_dec_tail_collected
3812
3813.p2align 4
3814L$cbc_dec_seven:
3815 movups 96(%rdi),%xmm8
3816 xorps %xmm9,%xmm9
3817 call _aesni_decrypt8
3818 movups 80(%rdi),%xmm9
3819 pxor %xmm10,%xmm2
3820 movups 96(%rdi),%xmm10
3821 pxor %xmm11,%xmm3
3822 movdqu %xmm2,(%rsi)
3823 pxor %xmm12,%xmm4
3824 movdqu %xmm3,16(%rsi)
3825 pxor %xmm3,%xmm3
3826 pxor %xmm13,%xmm5
3827 movdqu %xmm4,32(%rsi)
3828 pxor %xmm4,%xmm4
3829 pxor %xmm14,%xmm6
3830 movdqu %xmm5,48(%rsi)
3831 pxor %xmm5,%xmm5
3832 pxor %xmm15,%xmm7
3833 movdqu %xmm6,64(%rsi)
3834 pxor %xmm6,%xmm6
3835 pxor %xmm9,%xmm8
3836 movdqu %xmm7,80(%rsi)
3837 pxor %xmm7,%xmm7
3838 leaq 96(%rsi),%rsi
3839 movdqa %xmm8,%xmm2
3840 pxor %xmm8,%xmm8
3841 pxor %xmm9,%xmm9
3842 jmp L$cbc_dec_tail_collected
3843
3844.p2align 4
3845L$cbc_dec_loop6:
3846 movups %xmm7,(%rsi)
3847 leaq 16(%rsi),%rsi
3848 movdqu 0(%rdi),%xmm2
3849 movdqu 16(%rdi),%xmm3
3850 movdqa %xmm2,%xmm11
3851 movdqu 32(%rdi),%xmm4
3852 movdqa %xmm3,%xmm12
3853 movdqu 48(%rdi),%xmm5
3854 movdqa %xmm4,%xmm13
3855 movdqu 64(%rdi),%xmm6
3856 movdqa %xmm5,%xmm14
3857 movdqu 80(%rdi),%xmm7
3858 movdqa %xmm6,%xmm15
3859L$cbc_dec_loop6_enter:
3860 leaq 96(%rdi),%rdi
3861 movdqa %xmm7,%xmm8
3862
3863 call _aesni_decrypt6
3864
3865 pxor %xmm10,%xmm2
3866 movdqa %xmm8,%xmm10
3867 pxor %xmm11,%xmm3
3868 movdqu %xmm2,(%rsi)
3869 pxor %xmm12,%xmm4
3870 movdqu %xmm3,16(%rsi)
3871 pxor %xmm13,%xmm5
3872 movdqu %xmm4,32(%rsi)
3873 pxor %xmm14,%xmm6
3874 movq %rbp,%rcx
3875 movdqu %xmm5,48(%rsi)
3876 pxor %xmm15,%xmm7
3877 movl %r10d,%eax
3878 movdqu %xmm6,64(%rsi)
3879 leaq 80(%rsi),%rsi
3880 subq $0x60,%rdx
3881 ja L$cbc_dec_loop6
3882
3883 movdqa %xmm7,%xmm2
3884 addq $0x50,%rdx
3885 jle L$cbc_dec_clear_tail_collected
3886 movups %xmm7,(%rsi)
3887 leaq 16(%rsi),%rsi
3888
3889L$cbc_dec_tail:
3890 movups (%rdi),%xmm2
3891 subq $0x10,%rdx
3892 jbe L$cbc_dec_one
3893
3894 movups 16(%rdi),%xmm3
3895 movaps %xmm2,%xmm11
3896 subq $0x10,%rdx
3897 jbe L$cbc_dec_two
3898
3899 movups 32(%rdi),%xmm4
3900 movaps %xmm3,%xmm12
3901 subq $0x10,%rdx
3902 jbe L$cbc_dec_three
3903
3904 movups 48(%rdi),%xmm5
3905 movaps %xmm4,%xmm13
3906 subq $0x10,%rdx
3907 jbe L$cbc_dec_four
3908
3909 movups 64(%rdi),%xmm6
3910 movaps %xmm5,%xmm14
3911 movaps %xmm6,%xmm15
3912 xorps %xmm7,%xmm7
3913 call _aesni_decrypt6
3914 pxor %xmm10,%xmm2
3915 movaps %xmm15,%xmm10
3916 pxor %xmm11,%xmm3
3917 movdqu %xmm2,(%rsi)
3918 pxor %xmm12,%xmm4
3919 movdqu %xmm3,16(%rsi)
3920 pxor %xmm3,%xmm3
3921 pxor %xmm13,%xmm5
3922 movdqu %xmm4,32(%rsi)
3923 pxor %xmm4,%xmm4
3924 pxor %xmm14,%xmm6
3925 movdqu %xmm5,48(%rsi)
3926 pxor %xmm5,%xmm5
3927 leaq 64(%rsi),%rsi
3928 movdqa %xmm6,%xmm2
3929 pxor %xmm6,%xmm6
3930 pxor %xmm7,%xmm7
3931 subq $0x10,%rdx
3932 jmp L$cbc_dec_tail_collected
3933
3934.p2align 4
3935L$cbc_dec_one:
3936 movaps %xmm2,%xmm11
3937 movups (%rcx),%xmm0
3938 movups 16(%rcx),%xmm1
3939 leaq 32(%rcx),%rcx
3940 xorps %xmm0,%xmm2
3941L$oop_dec1_17:
3942.byte 102,15,56,222,209
3943 decl %eax
3944 movups (%rcx),%xmm1
3945 leaq 16(%rcx),%rcx
3946 jnz L$oop_dec1_17
3947.byte 102,15,56,223,209
3948 xorps %xmm10,%xmm2
3949 movaps %xmm11,%xmm10
3950 jmp L$cbc_dec_tail_collected
3951.p2align 4
3952L$cbc_dec_two:
3953 movaps %xmm3,%xmm12
3954 call _aesni_decrypt2
3955 pxor %xmm10,%xmm2
3956 movaps %xmm12,%xmm10
3957 pxor %xmm11,%xmm3
3958 movdqu %xmm2,(%rsi)
3959 movdqa %xmm3,%xmm2
3960 pxor %xmm3,%xmm3
3961 leaq 16(%rsi),%rsi
3962 jmp L$cbc_dec_tail_collected
3963.p2align 4
3964L$cbc_dec_three:
3965 movaps %xmm4,%xmm13
3966 call _aesni_decrypt3
3967 pxor %xmm10,%xmm2
3968 movaps %xmm13,%xmm10
3969 pxor %xmm11,%xmm3
3970 movdqu %xmm2,(%rsi)
3971 pxor %xmm12,%xmm4
3972 movdqu %xmm3,16(%rsi)
3973 pxor %xmm3,%xmm3
3974 movdqa %xmm4,%xmm2
3975 pxor %xmm4,%xmm4
3976 leaq 32(%rsi),%rsi
3977 jmp L$cbc_dec_tail_collected
3978.p2align 4
3979L$cbc_dec_four:
3980 movaps %xmm5,%xmm14
3981 call _aesni_decrypt4
3982 pxor %xmm10,%xmm2
3983 movaps %xmm14,%xmm10
3984 pxor %xmm11,%xmm3
3985 movdqu %xmm2,(%rsi)
3986 pxor %xmm12,%xmm4
3987 movdqu %xmm3,16(%rsi)
3988 pxor %xmm3,%xmm3
3989 pxor %xmm13,%xmm5
3990 movdqu %xmm4,32(%rsi)
3991 pxor %xmm4,%xmm4
3992 movdqa %xmm5,%xmm2
3993 pxor %xmm5,%xmm5
3994 leaq 48(%rsi),%rsi
3995 jmp L$cbc_dec_tail_collected
3996
3997.p2align 4
3998L$cbc_dec_clear_tail_collected:
3999 pxor %xmm3,%xmm3
4000 pxor %xmm4,%xmm4
4001 pxor %xmm5,%xmm5
4002 pxor %xmm6,%xmm6
4003 pxor %xmm7,%xmm7
4004 pxor %xmm8,%xmm8
4005 pxor %xmm9,%xmm9
4006L$cbc_dec_tail_collected:
4007 movups %xmm10,(%r8)
4008 andq $15,%rdx
4009 jnz L$cbc_dec_tail_partial
4010 movups %xmm2,(%rsi)
4011 pxor %xmm2,%xmm2
4012 jmp L$cbc_dec_ret
4013.p2align 4
4014L$cbc_dec_tail_partial:
4015 movaps %xmm2,(%rsp)
4016 pxor %xmm2,%xmm2
4017 movq $16,%rcx
4018 movq %rsi,%rdi
4019 subq %rdx,%rcx
4020 leaq (%rsp),%rsi
4021.long 0x9066A4F3
4022 movdqa %xmm2,(%rsp)
4023
4024L$cbc_dec_ret:
4025 xorps %xmm0,%xmm0
4026 pxor %xmm1,%xmm1
4027 movq -8(%r11),%rbp
4028
4029 leaq (%r11),%rsp
4030
4031L$cbc_ret:
4032 .byte 0xf3,0xc3
4033
4034
4035.globl _aesni_set_decrypt_key
4036
4037.p2align 4
4038_aesni_set_decrypt_key:
4039
4040.byte 0x48,0x83,0xEC,0x08
4041
4042 call __aesni_set_encrypt_key
4043 shll $4,%esi
4044 testl %eax,%eax
4045 jnz L$dec_key_ret
4046 leaq 16(%rdx,%rsi,1),%rdi
4047
4048 movups (%rdx),%xmm0
4049 movups (%rdi),%xmm1
4050 movups %xmm0,(%rdi)
4051 movups %xmm1,(%rdx)
4052 leaq 16(%rdx),%rdx
4053 leaq -16(%rdi),%rdi
4054
4055L$dec_key_inverse:
4056 movups (%rdx),%xmm0
4057 movups (%rdi),%xmm1
4058.byte 102,15,56,219,192
4059.byte 102,15,56,219,201
4060 leaq 16(%rdx),%rdx
4061 leaq -16(%rdi),%rdi
4062 movups %xmm0,16(%rdi)
4063 movups %xmm1,-16(%rdx)
4064 cmpq %rdx,%rdi
4065 ja L$dec_key_inverse
4066
4067 movups (%rdx),%xmm0
4068.byte 102,15,56,219,192
4069 pxor %xmm1,%xmm1
4070 movups %xmm0,(%rdi)
4071 pxor %xmm0,%xmm0
4072L$dec_key_ret:
4073 addq $8,%rsp
4074
4075 .byte 0xf3,0xc3
4076
4077L$SEH_end_set_decrypt_key:
4078
4079.globl _aesni_set_encrypt_key
4080
4081.p2align 4
4082_aesni_set_encrypt_key:
4083__aesni_set_encrypt_key:
4084
4085.byte 0x48,0x83,0xEC,0x08
4086
4087 movq $-1,%rax
4088 testq %rdi,%rdi
4089 jz L$enc_key_ret
4090 testq %rdx,%rdx
4091 jz L$enc_key_ret
4092
4093 movl $268437504,%r10d
4094 movups (%rdi),%xmm0
4095 xorps %xmm4,%xmm4
4096 andl _OPENSSL_ia32cap_P+4(%rip),%r10d
4097 leaq 16(%rdx),%rax
4098 cmpl $256,%esi
4099 je L$14rounds
4100 cmpl $192,%esi
4101 je L$12rounds
4102 cmpl $128,%esi
4103 jne L$bad_keybits
4104
4105L$10rounds:
4106 movl $9,%esi
4107 cmpl $268435456,%r10d
4108 je L$10rounds_alt
4109
4110 movups %xmm0,(%rdx)
4111.byte 102,15,58,223,200,1
4112 call L$key_expansion_128_cold
4113.byte 102,15,58,223,200,2
4114 call L$key_expansion_128
4115.byte 102,15,58,223,200,4
4116 call L$key_expansion_128
4117.byte 102,15,58,223,200,8
4118 call L$key_expansion_128
4119.byte 102,15,58,223,200,16
4120 call L$key_expansion_128
4121.byte 102,15,58,223,200,32
4122 call L$key_expansion_128
4123.byte 102,15,58,223,200,64
4124 call L$key_expansion_128
4125.byte 102,15,58,223,200,128
4126 call L$key_expansion_128
4127.byte 102,15,58,223,200,27
4128 call L$key_expansion_128
4129.byte 102,15,58,223,200,54
4130 call L$key_expansion_128
4131 movups %xmm0,(%rax)
4132 movl %esi,80(%rax)
4133 xorl %eax,%eax
4134 jmp L$enc_key_ret
4135
4136.p2align 4
4137L$10rounds_alt:
4138 movdqa L$key_rotate(%rip),%xmm5
4139 movl $8,%r10d
4140 movdqa L$key_rcon1(%rip),%xmm4
4141 movdqa %xmm0,%xmm2
4142 movdqu %xmm0,(%rdx)
4143 jmp L$oop_key128
4144
4145.p2align 4
4146L$oop_key128:
4147.byte 102,15,56,0,197
4148.byte 102,15,56,221,196
4149 pslld $1,%xmm4
4150 leaq 16(%rax),%rax
4151
4152 movdqa %xmm2,%xmm3
4153 pslldq $4,%xmm2
4154 pxor %xmm2,%xmm3
4155 pslldq $4,%xmm2
4156 pxor %xmm2,%xmm3
4157 pslldq $4,%xmm2
4158 pxor %xmm3,%xmm2
4159
4160 pxor %xmm2,%xmm0
4161 movdqu %xmm0,-16(%rax)
4162 movdqa %xmm0,%xmm2
4163
4164 decl %r10d
4165 jnz L$oop_key128
4166
4167 movdqa L$key_rcon1b(%rip),%xmm4
4168
4169.byte 102,15,56,0,197
4170.byte 102,15,56,221,196
4171 pslld $1,%xmm4
4172
4173 movdqa %xmm2,%xmm3
4174 pslldq $4,%xmm2
4175 pxor %xmm2,%xmm3
4176 pslldq $4,%xmm2
4177 pxor %xmm2,%xmm3
4178 pslldq $4,%xmm2
4179 pxor %xmm3,%xmm2
4180
4181 pxor %xmm2,%xmm0
4182 movdqu %xmm0,(%rax)
4183
4184 movdqa %xmm0,%xmm2
4185.byte 102,15,56,0,197
4186.byte 102,15,56,221,196
4187
4188 movdqa %xmm2,%xmm3
4189 pslldq $4,%xmm2
4190 pxor %xmm2,%xmm3
4191 pslldq $4,%xmm2
4192 pxor %xmm2,%xmm3
4193 pslldq $4,%xmm2
4194 pxor %xmm3,%xmm2
4195
4196 pxor %xmm2,%xmm0
4197 movdqu %xmm0,16(%rax)
4198
4199 movl %esi,96(%rax)
4200 xorl %eax,%eax
4201 jmp L$enc_key_ret
4202
4203.p2align 4
4204L$12rounds:
4205 movq 16(%rdi),%xmm2
4206 movl $11,%esi
4207 cmpl $268435456,%r10d
4208 je L$12rounds_alt
4209
4210 movups %xmm0,(%rdx)
4211.byte 102,15,58,223,202,1
4212 call L$key_expansion_192a_cold
4213.byte 102,15,58,223,202,2
4214 call L$key_expansion_192b
4215.byte 102,15,58,223,202,4
4216 call L$key_expansion_192a
4217.byte 102,15,58,223,202,8
4218 call L$key_expansion_192b
4219.byte 102,15,58,223,202,16
4220 call L$key_expansion_192a
4221.byte 102,15,58,223,202,32
4222 call L$key_expansion_192b
4223.byte 102,15,58,223,202,64
4224 call L$key_expansion_192a
4225.byte 102,15,58,223,202,128
4226 call L$key_expansion_192b
4227 movups %xmm0,(%rax)
4228 movl %esi,48(%rax)
4229 xorq %rax,%rax
4230 jmp L$enc_key_ret
4231
4232.p2align 4
4233L$12rounds_alt:
4234 movdqa L$key_rotate192(%rip),%xmm5
4235 movdqa L$key_rcon1(%rip),%xmm4
4236 movl $8,%r10d
4237 movdqu %xmm0,(%rdx)
4238 jmp L$oop_key192
4239
4240.p2align 4
4241L$oop_key192:
4242 movq %xmm2,0(%rax)
4243 movdqa %xmm2,%xmm1
4244.byte 102,15,56,0,213
4245.byte 102,15,56,221,212
4246 pslld $1,%xmm4
4247 leaq 24(%rax),%rax
4248
4249 movdqa %xmm0,%xmm3
4250 pslldq $4,%xmm0
4251 pxor %xmm0,%xmm3
4252 pslldq $4,%xmm0
4253 pxor %xmm0,%xmm3
4254 pslldq $4,%xmm0
4255 pxor %xmm3,%xmm0
4256
4257 pshufd $0xff,%xmm0,%xmm3
4258 pxor %xmm1,%xmm3
4259 pslldq $4,%xmm1
4260 pxor %xmm1,%xmm3
4261
4262 pxor %xmm2,%xmm0
4263 pxor %xmm3,%xmm2
4264 movdqu %xmm0,-16(%rax)
4265
4266 decl %r10d
4267 jnz L$oop_key192
4268
4269 movl %esi,32(%rax)
4270 xorl %eax,%eax
4271 jmp L$enc_key_ret
4272
4273.p2align 4
4274L$14rounds:
4275 movups 16(%rdi),%xmm2
4276 movl $13,%esi
4277 leaq 16(%rax),%rax
4278 cmpl $268435456,%r10d
4279 je L$14rounds_alt
4280
4281 movups %xmm0,(%rdx)
4282 movups %xmm2,16(%rdx)
4283.byte 102,15,58,223,202,1
4284 call L$key_expansion_256a_cold
4285.byte 102,15,58,223,200,1
4286 call L$key_expansion_256b
4287.byte 102,15,58,223,202,2
4288 call L$key_expansion_256a
4289.byte 102,15,58,223,200,2
4290 call L$key_expansion_256b
4291.byte 102,15,58,223,202,4
4292 call L$key_expansion_256a
4293.byte 102,15,58,223,200,4
4294 call L$key_expansion_256b
4295.byte 102,15,58,223,202,8
4296 call L$key_expansion_256a
4297.byte 102,15,58,223,200,8
4298 call L$key_expansion_256b
4299.byte 102,15,58,223,202,16
4300 call L$key_expansion_256a
4301.byte 102,15,58,223,200,16
4302 call L$key_expansion_256b
4303.byte 102,15,58,223,202,32
4304 call L$key_expansion_256a
4305.byte 102,15,58,223,200,32
4306 call L$key_expansion_256b
4307.byte 102,15,58,223,202,64
4308 call L$key_expansion_256a
4309 movups %xmm0,(%rax)
4310 movl %esi,16(%rax)
4311 xorq %rax,%rax
4312 jmp L$enc_key_ret
4313
4314.p2align 4
4315L$14rounds_alt:
4316 movdqa L$key_rotate(%rip),%xmm5
4317 movdqa L$key_rcon1(%rip),%xmm4
4318 movl $7,%r10d
4319 movdqu %xmm0,0(%rdx)
4320 movdqa %xmm2,%xmm1
4321 movdqu %xmm2,16(%rdx)
4322 jmp L$oop_key256
4323
4324.p2align 4
4325L$oop_key256:
4326.byte 102,15,56,0,213
4327.byte 102,15,56,221,212
4328
4329 movdqa %xmm0,%xmm3
4330 pslldq $4,%xmm0
4331 pxor %xmm0,%xmm3
4332 pslldq $4,%xmm0
4333 pxor %xmm0,%xmm3
4334 pslldq $4,%xmm0
4335 pxor %xmm3,%xmm0
4336 pslld $1,%xmm4
4337
4338 pxor %xmm2,%xmm0
4339 movdqu %xmm0,(%rax)
4340
4341 decl %r10d
4342 jz L$done_key256
4343
4344 pshufd $0xff,%xmm0,%xmm2
4345 pxor %xmm3,%xmm3
4346.byte 102,15,56,221,211
4347
4348 movdqa %xmm1,%xmm3
4349 pslldq $4,%xmm1
4350 pxor %xmm1,%xmm3
4351 pslldq $4,%xmm1
4352 pxor %xmm1,%xmm3
4353 pslldq $4,%xmm1
4354 pxor %xmm3,%xmm1
4355
4356 pxor %xmm1,%xmm2
4357 movdqu %xmm2,16(%rax)
4358 leaq 32(%rax),%rax
4359 movdqa %xmm2,%xmm1
4360
4361 jmp L$oop_key256
4362
4363L$done_key256:
4364 movl %esi,16(%rax)
4365 xorl %eax,%eax
4366 jmp L$enc_key_ret
4367
4368.p2align 4
4369L$bad_keybits:
4370 movq $-2,%rax
4371L$enc_key_ret:
4372 pxor %xmm0,%xmm0
4373 pxor %xmm1,%xmm1
4374 pxor %xmm2,%xmm2
4375 pxor %xmm3,%xmm3
4376 pxor %xmm4,%xmm4
4377 pxor %xmm5,%xmm5
4378 addq $8,%rsp
4379
4380 .byte 0xf3,0xc3
4381L$SEH_end_set_encrypt_key:
4382
4383.p2align 4
4384L$key_expansion_128:
4385 movups %xmm0,(%rax)
4386 leaq 16(%rax),%rax
4387L$key_expansion_128_cold:
4388 shufps $16,%xmm0,%xmm4
4389 xorps %xmm4,%xmm0
4390 shufps $140,%xmm0,%xmm4
4391 xorps %xmm4,%xmm0
4392 shufps $255,%xmm1,%xmm1
4393 xorps %xmm1,%xmm0
4394 .byte 0xf3,0xc3
4395
4396.p2align 4
4397L$key_expansion_192a:
4398 movups %xmm0,(%rax)
4399 leaq 16(%rax),%rax
4400L$key_expansion_192a_cold:
4401 movaps %xmm2,%xmm5
4402L$key_expansion_192b_warm:
4403 shufps $16,%xmm0,%xmm4
4404 movdqa %xmm2,%xmm3
4405 xorps %xmm4,%xmm0
4406 shufps $140,%xmm0,%xmm4
4407 pslldq $4,%xmm3
4408 xorps %xmm4,%xmm0
4409 pshufd $85,%xmm1,%xmm1
4410 pxor %xmm3,%xmm2
4411 pxor %xmm1,%xmm0
4412 pshufd $255,%xmm0,%xmm3
4413 pxor %xmm3,%xmm2
4414 .byte 0xf3,0xc3
4415
4416.p2align 4
4417L$key_expansion_192b:
4418 movaps %xmm0,%xmm3
4419 shufps $68,%xmm0,%xmm5
4420 movups %xmm5,(%rax)
4421 shufps $78,%xmm2,%xmm3
4422 movups %xmm3,16(%rax)
4423 leaq 32(%rax),%rax
4424 jmp L$key_expansion_192b_warm
4425
4426.p2align 4
4427L$key_expansion_256a:
4428 movups %xmm2,(%rax)
4429 leaq 16(%rax),%rax
4430L$key_expansion_256a_cold:
4431 shufps $16,%xmm0,%xmm4
4432 xorps %xmm4,%xmm0
4433 shufps $140,%xmm0,%xmm4
4434 xorps %xmm4,%xmm0
4435 shufps $255,%xmm1,%xmm1
4436 xorps %xmm1,%xmm0
4437 .byte 0xf3,0xc3
4438
4439.p2align 4
4440L$key_expansion_256b:
4441 movups %xmm0,(%rax)
4442 leaq 16(%rax),%rax
4443
4444 shufps $16,%xmm2,%xmm4
4445 xorps %xmm4,%xmm2
4446 shufps $140,%xmm2,%xmm4
4447 xorps %xmm4,%xmm2
4448 shufps $170,%xmm1,%xmm1
4449 xorps %xmm1,%xmm2
4450 .byte 0xf3,0xc3
4451
4452
4453
4454.p2align 6
4455L$bswap_mask:
4456.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4457L$increment32:
4458.long 6,6,6,0
4459L$increment64:
4460.long 1,0,0,0
4461L$xts_magic:
4462.long 0x87,0,1,0
4463L$increment1:
4464.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4465L$key_rotate:
4466.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4467L$key_rotate192:
4468.long 0x04070605,0x04070605,0x04070605,0x04070605
4469L$key_rcon1:
4470.long 1,1,1,1
4471L$key_rcon1b:
4472.long 0x1b,0x1b,0x1b,0x1b
4473
4474.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4475.p2align 6
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette