VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1j/crypto/genasm-elf/aesni-mb-x86_64.S@ 88461

Last change on this file since 88461 was 83531, checked in by vboxsync, 5 years ago

setting svn:sync-process=export for openssl-1.1.1f, all files except tests

File size: 10.1 KB
Line 
1.text
2
3
4
5.globl aesni_multi_cbc_encrypt
6.type aesni_multi_cbc_encrypt,@function
7.align 32
8aesni_multi_cbc_encrypt:
9.cfi_startproc
10 movq %rsp,%rax
11.cfi_def_cfa_register %rax
12 pushq %rbx
13.cfi_offset %rbx,-16
14 pushq %rbp
15.cfi_offset %rbp,-24
16 pushq %r12
17.cfi_offset %r12,-32
18 pushq %r13
19.cfi_offset %r13,-40
20 pushq %r14
21.cfi_offset %r14,-48
22 pushq %r15
23.cfi_offset %r15,-56
24
25
26
27
28
29
30 subq $48,%rsp
31 andq $-64,%rsp
32 movq %rax,16(%rsp)
33.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
34
35.Lenc4x_body:
36 movdqu (%rsi),%xmm12
37 leaq 120(%rsi),%rsi
38 leaq 80(%rdi),%rdi
39
40.Lenc4x_loop_grande:
41 movl %edx,24(%rsp)
42 xorl %edx,%edx
43 movl -64(%rdi),%ecx
44 movq -80(%rdi),%r8
45 cmpl %edx,%ecx
46 movq -72(%rdi),%r12
47 cmovgl %ecx,%edx
48 testl %ecx,%ecx
49 movdqu -56(%rdi),%xmm2
50 movl %ecx,32(%rsp)
51 cmovleq %rsp,%r8
52 movl -24(%rdi),%ecx
53 movq -40(%rdi),%r9
54 cmpl %edx,%ecx
55 movq -32(%rdi),%r13
56 cmovgl %ecx,%edx
57 testl %ecx,%ecx
58 movdqu -16(%rdi),%xmm3
59 movl %ecx,36(%rsp)
60 cmovleq %rsp,%r9
61 movl 16(%rdi),%ecx
62 movq 0(%rdi),%r10
63 cmpl %edx,%ecx
64 movq 8(%rdi),%r14
65 cmovgl %ecx,%edx
66 testl %ecx,%ecx
67 movdqu 24(%rdi),%xmm4
68 movl %ecx,40(%rsp)
69 cmovleq %rsp,%r10
70 movl 56(%rdi),%ecx
71 movq 40(%rdi),%r11
72 cmpl %edx,%ecx
73 movq 48(%rdi),%r15
74 cmovgl %ecx,%edx
75 testl %ecx,%ecx
76 movdqu 64(%rdi),%xmm5
77 movl %ecx,44(%rsp)
78 cmovleq %rsp,%r11
79 testl %edx,%edx
80 jz .Lenc4x_done
81
82 movups 16-120(%rsi),%xmm1
83 pxor %xmm12,%xmm2
84 movups 32-120(%rsi),%xmm0
85 pxor %xmm12,%xmm3
86 movl 240-120(%rsi),%eax
87 pxor %xmm12,%xmm4
88 movdqu (%r8),%xmm6
89 pxor %xmm12,%xmm5
90 movdqu (%r9),%xmm7
91 pxor %xmm6,%xmm2
92 movdqu (%r10),%xmm8
93 pxor %xmm7,%xmm3
94 movdqu (%r11),%xmm9
95 pxor %xmm8,%xmm4
96 pxor %xmm9,%xmm5
97 movdqa 32(%rsp),%xmm10
98 xorq %rbx,%rbx
99 jmp .Loop_enc4x
100
101.align 32
102.Loop_enc4x:
103 addq $16,%rbx
104 leaq 16(%rsp),%rbp
105 movl $1,%ecx
106 subq %rbx,%rbp
107
108.byte 102,15,56,220,209
109 prefetcht0 31(%r8,%rbx,1)
110 prefetcht0 31(%r9,%rbx,1)
111.byte 102,15,56,220,217
112 prefetcht0 31(%r10,%rbx,1)
113 prefetcht0 31(%r10,%rbx,1)
114.byte 102,15,56,220,225
115.byte 102,15,56,220,233
116 movups 48-120(%rsi),%xmm1
117 cmpl 32(%rsp),%ecx
118.byte 102,15,56,220,208
119.byte 102,15,56,220,216
120.byte 102,15,56,220,224
121 cmovgeq %rbp,%r8
122 cmovgq %rbp,%r12
123.byte 102,15,56,220,232
124 movups -56(%rsi),%xmm0
125 cmpl 36(%rsp),%ecx
126.byte 102,15,56,220,209
127.byte 102,15,56,220,217
128.byte 102,15,56,220,225
129 cmovgeq %rbp,%r9
130 cmovgq %rbp,%r13
131.byte 102,15,56,220,233
132 movups -40(%rsi),%xmm1
133 cmpl 40(%rsp),%ecx
134.byte 102,15,56,220,208
135.byte 102,15,56,220,216
136.byte 102,15,56,220,224
137 cmovgeq %rbp,%r10
138 cmovgq %rbp,%r14
139.byte 102,15,56,220,232
140 movups -24(%rsi),%xmm0
141 cmpl 44(%rsp),%ecx
142.byte 102,15,56,220,209
143.byte 102,15,56,220,217
144.byte 102,15,56,220,225
145 cmovgeq %rbp,%r11
146 cmovgq %rbp,%r15
147.byte 102,15,56,220,233
148 movups -8(%rsi),%xmm1
149 movdqa %xmm10,%xmm11
150.byte 102,15,56,220,208
151 prefetcht0 15(%r12,%rbx,1)
152 prefetcht0 15(%r13,%rbx,1)
153.byte 102,15,56,220,216
154 prefetcht0 15(%r14,%rbx,1)
155 prefetcht0 15(%r15,%rbx,1)
156.byte 102,15,56,220,224
157.byte 102,15,56,220,232
158 movups 128-120(%rsi),%xmm0
159 pxor %xmm12,%xmm12
160
161.byte 102,15,56,220,209
162 pcmpgtd %xmm12,%xmm11
163 movdqu -120(%rsi),%xmm12
164.byte 102,15,56,220,217
165 paddd %xmm11,%xmm10
166 movdqa %xmm10,32(%rsp)
167.byte 102,15,56,220,225
168.byte 102,15,56,220,233
169 movups 144-120(%rsi),%xmm1
170
171 cmpl $11,%eax
172
173.byte 102,15,56,220,208
174.byte 102,15,56,220,216
175.byte 102,15,56,220,224
176.byte 102,15,56,220,232
177 movups 160-120(%rsi),%xmm0
178
179 jb .Lenc4x_tail
180
181.byte 102,15,56,220,209
182.byte 102,15,56,220,217
183.byte 102,15,56,220,225
184.byte 102,15,56,220,233
185 movups 176-120(%rsi),%xmm1
186
187.byte 102,15,56,220,208
188.byte 102,15,56,220,216
189.byte 102,15,56,220,224
190.byte 102,15,56,220,232
191 movups 192-120(%rsi),%xmm0
192
193 je .Lenc4x_tail
194
195.byte 102,15,56,220,209
196.byte 102,15,56,220,217
197.byte 102,15,56,220,225
198.byte 102,15,56,220,233
199 movups 208-120(%rsi),%xmm1
200
201.byte 102,15,56,220,208
202.byte 102,15,56,220,216
203.byte 102,15,56,220,224
204.byte 102,15,56,220,232
205 movups 224-120(%rsi),%xmm0
206 jmp .Lenc4x_tail
207
208.align 32
209.Lenc4x_tail:
210.byte 102,15,56,220,209
211.byte 102,15,56,220,217
212.byte 102,15,56,220,225
213.byte 102,15,56,220,233
214 movdqu (%r8,%rbx,1),%xmm6
215 movdqu 16-120(%rsi),%xmm1
216
217.byte 102,15,56,221,208
218 movdqu (%r9,%rbx,1),%xmm7
219 pxor %xmm12,%xmm6
220.byte 102,15,56,221,216
221 movdqu (%r10,%rbx,1),%xmm8
222 pxor %xmm12,%xmm7
223.byte 102,15,56,221,224
224 movdqu (%r11,%rbx,1),%xmm9
225 pxor %xmm12,%xmm8
226.byte 102,15,56,221,232
227 movdqu 32-120(%rsi),%xmm0
228 pxor %xmm12,%xmm9
229
230 movups %xmm2,-16(%r12,%rbx,1)
231 pxor %xmm6,%xmm2
232 movups %xmm3,-16(%r13,%rbx,1)
233 pxor %xmm7,%xmm3
234 movups %xmm4,-16(%r14,%rbx,1)
235 pxor %xmm8,%xmm4
236 movups %xmm5,-16(%r15,%rbx,1)
237 pxor %xmm9,%xmm5
238
239 decl %edx
240 jnz .Loop_enc4x
241
242 movq 16(%rsp),%rax
243.cfi_def_cfa %rax,8
244 movl 24(%rsp),%edx
245
246
247
248
249
250
251
252
253
254
255 leaq 160(%rdi),%rdi
256 decl %edx
257 jnz .Lenc4x_loop_grande
258
259.Lenc4x_done:
260 movq -48(%rax),%r15
261.cfi_restore %r15
262 movq -40(%rax),%r14
263.cfi_restore %r14
264 movq -32(%rax),%r13
265.cfi_restore %r13
266 movq -24(%rax),%r12
267.cfi_restore %r12
268 movq -16(%rax),%rbp
269.cfi_restore %rbp
270 movq -8(%rax),%rbx
271.cfi_restore %rbx
272 leaq (%rax),%rsp
273.cfi_def_cfa_register %rsp
274.Lenc4x_epilogue:
275 .byte 0xf3,0xc3
276.cfi_endproc
277.size aesni_multi_cbc_encrypt,.-aesni_multi_cbc_encrypt
278
279.globl aesni_multi_cbc_decrypt
280.type aesni_multi_cbc_decrypt,@function
281.align 32
282aesni_multi_cbc_decrypt:
283.cfi_startproc
284 movq %rsp,%rax
285.cfi_def_cfa_register %rax
286 pushq %rbx
287.cfi_offset %rbx,-16
288 pushq %rbp
289.cfi_offset %rbp,-24
290 pushq %r12
291.cfi_offset %r12,-32
292 pushq %r13
293.cfi_offset %r13,-40
294 pushq %r14
295.cfi_offset %r14,-48
296 pushq %r15
297.cfi_offset %r15,-56
298
299
300
301
302
303
304 subq $48,%rsp
305 andq $-64,%rsp
306 movq %rax,16(%rsp)
307.cfi_escape 0x0f,0x05,0x77,0x10,0x06,0x23,0x08
308
309.Ldec4x_body:
310 movdqu (%rsi),%xmm12
311 leaq 120(%rsi),%rsi
312 leaq 80(%rdi),%rdi
313
314.Ldec4x_loop_grande:
315 movl %edx,24(%rsp)
316 xorl %edx,%edx
317 movl -64(%rdi),%ecx
318 movq -80(%rdi),%r8
319 cmpl %edx,%ecx
320 movq -72(%rdi),%r12
321 cmovgl %ecx,%edx
322 testl %ecx,%ecx
323 movdqu -56(%rdi),%xmm6
324 movl %ecx,32(%rsp)
325 cmovleq %rsp,%r8
326 movl -24(%rdi),%ecx
327 movq -40(%rdi),%r9
328 cmpl %edx,%ecx
329 movq -32(%rdi),%r13
330 cmovgl %ecx,%edx
331 testl %ecx,%ecx
332 movdqu -16(%rdi),%xmm7
333 movl %ecx,36(%rsp)
334 cmovleq %rsp,%r9
335 movl 16(%rdi),%ecx
336 movq 0(%rdi),%r10
337 cmpl %edx,%ecx
338 movq 8(%rdi),%r14
339 cmovgl %ecx,%edx
340 testl %ecx,%ecx
341 movdqu 24(%rdi),%xmm8
342 movl %ecx,40(%rsp)
343 cmovleq %rsp,%r10
344 movl 56(%rdi),%ecx
345 movq 40(%rdi),%r11
346 cmpl %edx,%ecx
347 movq 48(%rdi),%r15
348 cmovgl %ecx,%edx
349 testl %ecx,%ecx
350 movdqu 64(%rdi),%xmm9
351 movl %ecx,44(%rsp)
352 cmovleq %rsp,%r11
353 testl %edx,%edx
354 jz .Ldec4x_done
355
356 movups 16-120(%rsi),%xmm1
357 movups 32-120(%rsi),%xmm0
358 movl 240-120(%rsi),%eax
359 movdqu (%r8),%xmm2
360 movdqu (%r9),%xmm3
361 pxor %xmm12,%xmm2
362 movdqu (%r10),%xmm4
363 pxor %xmm12,%xmm3
364 movdqu (%r11),%xmm5
365 pxor %xmm12,%xmm4
366 pxor %xmm12,%xmm5
367 movdqa 32(%rsp),%xmm10
368 xorq %rbx,%rbx
369 jmp .Loop_dec4x
370
371.align 32
372.Loop_dec4x:
373 addq $16,%rbx
374 leaq 16(%rsp),%rbp
375 movl $1,%ecx
376 subq %rbx,%rbp
377
378.byte 102,15,56,222,209
379 prefetcht0 31(%r8,%rbx,1)
380 prefetcht0 31(%r9,%rbx,1)
381.byte 102,15,56,222,217
382 prefetcht0 31(%r10,%rbx,1)
383 prefetcht0 31(%r11,%rbx,1)
384.byte 102,15,56,222,225
385.byte 102,15,56,222,233
386 movups 48-120(%rsi),%xmm1
387 cmpl 32(%rsp),%ecx
388.byte 102,15,56,222,208
389.byte 102,15,56,222,216
390.byte 102,15,56,222,224
391 cmovgeq %rbp,%r8
392 cmovgq %rbp,%r12
393.byte 102,15,56,222,232
394 movups -56(%rsi),%xmm0
395 cmpl 36(%rsp),%ecx
396.byte 102,15,56,222,209
397.byte 102,15,56,222,217
398.byte 102,15,56,222,225
399 cmovgeq %rbp,%r9
400 cmovgq %rbp,%r13
401.byte 102,15,56,222,233
402 movups -40(%rsi),%xmm1
403 cmpl 40(%rsp),%ecx
404.byte 102,15,56,222,208
405.byte 102,15,56,222,216
406.byte 102,15,56,222,224
407 cmovgeq %rbp,%r10
408 cmovgq %rbp,%r14
409.byte 102,15,56,222,232
410 movups -24(%rsi),%xmm0
411 cmpl 44(%rsp),%ecx
412.byte 102,15,56,222,209
413.byte 102,15,56,222,217
414.byte 102,15,56,222,225
415 cmovgeq %rbp,%r11
416 cmovgq %rbp,%r15
417.byte 102,15,56,222,233
418 movups -8(%rsi),%xmm1
419 movdqa %xmm10,%xmm11
420.byte 102,15,56,222,208
421 prefetcht0 15(%r12,%rbx,1)
422 prefetcht0 15(%r13,%rbx,1)
423.byte 102,15,56,222,216
424 prefetcht0 15(%r14,%rbx,1)
425 prefetcht0 15(%r15,%rbx,1)
426.byte 102,15,56,222,224
427.byte 102,15,56,222,232
428 movups 128-120(%rsi),%xmm0
429 pxor %xmm12,%xmm12
430
431.byte 102,15,56,222,209
432 pcmpgtd %xmm12,%xmm11
433 movdqu -120(%rsi),%xmm12
434.byte 102,15,56,222,217
435 paddd %xmm11,%xmm10
436 movdqa %xmm10,32(%rsp)
437.byte 102,15,56,222,225
438.byte 102,15,56,222,233
439 movups 144-120(%rsi),%xmm1
440
441 cmpl $11,%eax
442
443.byte 102,15,56,222,208
444.byte 102,15,56,222,216
445.byte 102,15,56,222,224
446.byte 102,15,56,222,232
447 movups 160-120(%rsi),%xmm0
448
449 jb .Ldec4x_tail
450
451.byte 102,15,56,222,209
452.byte 102,15,56,222,217
453.byte 102,15,56,222,225
454.byte 102,15,56,222,233
455 movups 176-120(%rsi),%xmm1
456
457.byte 102,15,56,222,208
458.byte 102,15,56,222,216
459.byte 102,15,56,222,224
460.byte 102,15,56,222,232
461 movups 192-120(%rsi),%xmm0
462
463 je .Ldec4x_tail
464
465.byte 102,15,56,222,209
466.byte 102,15,56,222,217
467.byte 102,15,56,222,225
468.byte 102,15,56,222,233
469 movups 208-120(%rsi),%xmm1
470
471.byte 102,15,56,222,208
472.byte 102,15,56,222,216
473.byte 102,15,56,222,224
474.byte 102,15,56,222,232
475 movups 224-120(%rsi),%xmm0
476 jmp .Ldec4x_tail
477
478.align 32
479.Ldec4x_tail:
480.byte 102,15,56,222,209
481.byte 102,15,56,222,217
482.byte 102,15,56,222,225
483 pxor %xmm0,%xmm6
484 pxor %xmm0,%xmm7
485.byte 102,15,56,222,233
486 movdqu 16-120(%rsi),%xmm1
487 pxor %xmm0,%xmm8
488 pxor %xmm0,%xmm9
489 movdqu 32-120(%rsi),%xmm0
490
491.byte 102,15,56,223,214
492.byte 102,15,56,223,223
493 movdqu -16(%r8,%rbx,1),%xmm6
494 movdqu -16(%r9,%rbx,1),%xmm7
495.byte 102,65,15,56,223,224
496.byte 102,65,15,56,223,233
497 movdqu -16(%r10,%rbx,1),%xmm8
498 movdqu -16(%r11,%rbx,1),%xmm9
499
500 movups %xmm2,-16(%r12,%rbx,1)
501 movdqu (%r8,%rbx,1),%xmm2
502 movups %xmm3,-16(%r13,%rbx,1)
503 movdqu (%r9,%rbx,1),%xmm3
504 pxor %xmm12,%xmm2
505 movups %xmm4,-16(%r14,%rbx,1)
506 movdqu (%r10,%rbx,1),%xmm4
507 pxor %xmm12,%xmm3
508 movups %xmm5,-16(%r15,%rbx,1)
509 movdqu (%r11,%rbx,1),%xmm5
510 pxor %xmm12,%xmm4
511 pxor %xmm12,%xmm5
512
513 decl %edx
514 jnz .Loop_dec4x
515
516 movq 16(%rsp),%rax
517.cfi_def_cfa %rax,8
518 movl 24(%rsp),%edx
519
520 leaq 160(%rdi),%rdi
521 decl %edx
522 jnz .Ldec4x_loop_grande
523
524.Ldec4x_done:
525 movq -48(%rax),%r15
526.cfi_restore %r15
527 movq -40(%rax),%r14
528.cfi_restore %r14
529 movq -32(%rax),%r13
530.cfi_restore %r13
531 movq -24(%rax),%r12
532.cfi_restore %r12
533 movq -16(%rax),%rbp
534.cfi_restore %rbp
535 movq -8(%rax),%rbx
536.cfi_restore %rbx
537 leaq (%rax),%rsp
538.cfi_def_cfa_register %rsp
539.Ldec4x_epilogue:
540 .byte 0xf3,0xc3
541.cfi_endproc
542.size aesni_multi_cbc_decrypt,.-aesni_multi_cbc_decrypt
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette