VirtualBox

source: vbox/trunk/src/libs/openssl-3.0.1/engines/asm/e_padlock-x86_64.pl@ 94082

Last change on this file since 94082 was 94082, checked in by vboxsync, 3 years ago

libs/openssl-3.0.1: started applying and adjusting our OpenSSL changes to 3.0.1. bugref:10128

File size: 13.1 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2011-2018 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10# ====================================================================
11# Written by Andy Polyakov <[email protected]> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16
17# September 2011
18#
19# Assembler helpers for Padlock engine. See even e_padlock-x86.pl for
20# details.
21
22# $output is the last argument if it looks like a file (it has an extension)
23# $flavour is the first argument if it doesn't look like a file
24$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
25$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
26
27$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
28
29$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
30( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
31( $xlate="${dir}../../crypto/perlasm/x86_64-xlate.pl" and -f $xlate) or
32die "can't locate x86_64-xlate.pl";
33
34open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
35 or die "can't call $xlate: $!";
36*STDOUT=*OUT;
37
38$code=".text\n";
39
40%PADLOCK_PREFETCH=(ecb=>128, cbc=>64, ctr32=>32); # prefetch errata
41$PADLOCK_CHUNK=512; # Must be a power of 2 between 32 and 2^20
42
43$ctx="%rdx";
44$out="%rdi";
45$inp="%rsi";
46$len="%rcx";
47$chunk="%rbx";
48
49($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
50 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
51
52$code.=<<___;
53.globl padlock_capability
54.type padlock_capability,\@abi-omnipotent
55.align 16
56padlock_capability:
57 mov %rbx,%r8
58 xor %eax,%eax
59 cpuid
60 xor %eax,%eax
61 cmp \$`"0x".unpack("H*",'tneC')`,%ebx
62 jne .Lzhaoxin
63 cmp \$`"0x".unpack("H*",'Hrua')`,%edx
64 jne .Lnoluck
65 cmp \$`"0x".unpack("H*",'slua')`,%ecx
66 jne .Lnoluck
67 jmp .LzhaoxinEnd
68.Lzhaoxin:
69 cmp \$`"0x".unpack("H*",'hS ')`,%ebx
70 jne .Lnoluck
71 cmp \$`"0x".unpack("H*",'hgna')`,%edx
72 jne .Lnoluck
73 cmp \$`"0x".unpack("H*",' ia')`,%ecx
74 jne .Lnoluck
75.LzhaoxinEnd:
76 mov \$0xC0000000,%eax
77 cpuid
78 mov %eax,%edx
79 xor %eax,%eax
80 cmp \$0xC0000001,%edx
81 jb .Lnoluck
82 mov \$0xC0000001,%eax
83 cpuid
84 mov %edx,%eax
85 and \$0xffffffef,%eax
86 or \$0x10,%eax # set Nano bit#4
87.Lnoluck:
88 mov %r8,%rbx
89 ret
90.size padlock_capability,.-padlock_capability
91
92.globl padlock_key_bswap
93.type padlock_key_bswap,\@abi-omnipotent,0
94.align 16
95padlock_key_bswap:
96 mov 240($arg1),%edx
97.Lbswap_loop:
98 mov ($arg1),%eax
99 bswap %eax
100 mov %eax,($arg1)
101 lea 4($arg1),$arg1
102 sub \$1,%edx
103 jnz .Lbswap_loop
104 ret
105.size padlock_key_bswap,.-padlock_key_bswap
106
107.globl padlock_verify_context
108.type padlock_verify_context,\@abi-omnipotent
109.align 16
110padlock_verify_context:
111 mov $arg1,$ctx
112 pushf
113 lea .Lpadlock_saved_context(%rip),%rax
114 call _padlock_verify_ctx
115 lea 8(%rsp),%rsp
116 ret
117.size padlock_verify_context,.-padlock_verify_context
118
119.type _padlock_verify_ctx,\@abi-omnipotent
120.align 16
121_padlock_verify_ctx:
122 mov 8(%rsp),%r8
123 bt \$30,%r8
124 jnc .Lverified
125 cmp (%rax),$ctx
126 je .Lverified
127 pushf
128 popf
129.Lverified:
130 mov $ctx,(%rax)
131 ret
132.size _padlock_verify_ctx,.-_padlock_verify_ctx
133
134.globl padlock_reload_key
135.type padlock_reload_key,\@abi-omnipotent
136.align 16
137padlock_reload_key:
138 pushf
139 popf
140 ret
141.size padlock_reload_key,.-padlock_reload_key
142
143.globl padlock_aes_block
144.type padlock_aes_block,\@function,3
145.align 16
146padlock_aes_block:
147 mov %rbx,%r8
148 mov \$1,$len
149 lea 32($ctx),%rbx # key
150 lea 16($ctx),$ctx # control word
151 .byte 0xf3,0x0f,0xa7,0xc8 # rep xcryptecb
152 mov %r8,%rbx
153 ret
154.size padlock_aes_block,.-padlock_aes_block
155
156.globl padlock_xstore
157.type padlock_xstore,\@function,2
158.align 16
159padlock_xstore:
160 mov %esi,%edx
161 .byte 0x0f,0xa7,0xc0 # xstore
162 ret
163.size padlock_xstore,.-padlock_xstore
164
165.globl padlock_sha1_oneshot
166.type padlock_sha1_oneshot,\@function,3
167.align 16
168padlock_sha1_oneshot:
169 mov %rdx,%rcx
170 mov %rdi,%rdx # put aside %rdi
171 movups (%rdi),%xmm0 # copy-in context
172 sub \$128+8,%rsp
173 mov 16(%rdi),%eax
174 movaps %xmm0,(%rsp)
175 mov %rsp,%rdi
176 mov %eax,16(%rsp)
177 xor %rax,%rax
178 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
179 movaps (%rsp),%xmm0
180 mov 16(%rsp),%eax
181 add \$128+8,%rsp
182 movups %xmm0,(%rdx) # copy-out context
183 mov %eax,16(%rdx)
184 ret
185.size padlock_sha1_oneshot,.-padlock_sha1_oneshot
186
187.globl padlock_sha1_blocks
188.type padlock_sha1_blocks,\@function,3
189.align 16
190padlock_sha1_blocks:
191 mov %rdx,%rcx
192 mov %rdi,%rdx # put aside %rdi
193 movups (%rdi),%xmm0 # copy-in context
194 sub \$128+8,%rsp
195 mov 16(%rdi),%eax
196 movaps %xmm0,(%rsp)
197 mov %rsp,%rdi
198 mov %eax,16(%rsp)
199 mov \$-1,%rax
200 .byte 0xf3,0x0f,0xa6,0xc8 # rep xsha1
201 movaps (%rsp),%xmm0
202 mov 16(%rsp),%eax
203 add \$128+8,%rsp
204 movups %xmm0,(%rdx) # copy-out context
205 mov %eax,16(%rdx)
206 ret
207.size padlock_sha1_blocks,.-padlock_sha1_blocks
208
209.globl padlock_sha256_oneshot
210.type padlock_sha256_oneshot,\@function,3
211.align 16
212padlock_sha256_oneshot:
213 mov %rdx,%rcx
214 mov %rdi,%rdx # put aside %rdi
215 movups (%rdi),%xmm0 # copy-in context
216 sub \$128+8,%rsp
217 movups 16(%rdi),%xmm1
218 movaps %xmm0,(%rsp)
219 mov %rsp,%rdi
220 movaps %xmm1,16(%rsp)
221 xor %rax,%rax
222 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
223 movaps (%rsp),%xmm0
224 movaps 16(%rsp),%xmm1
225 add \$128+8,%rsp
226 movups %xmm0,(%rdx) # copy-out context
227 movups %xmm1,16(%rdx)
228 ret
229.size padlock_sha256_oneshot,.-padlock_sha256_oneshot
230
231.globl padlock_sha256_blocks
232.type padlock_sha256_blocks,\@function,3
233.align 16
234padlock_sha256_blocks:
235 mov %rdx,%rcx
236 mov %rdi,%rdx # put aside %rdi
237 movups (%rdi),%xmm0 # copy-in context
238 sub \$128+8,%rsp
239 movups 16(%rdi),%xmm1
240 movaps %xmm0,(%rsp)
241 mov %rsp,%rdi
242 movaps %xmm1,16(%rsp)
243 mov \$-1,%rax
244 .byte 0xf3,0x0f,0xa6,0xd0 # rep xsha256
245 movaps (%rsp),%xmm0
246 movaps 16(%rsp),%xmm1
247 add \$128+8,%rsp
248 movups %xmm0,(%rdx) # copy-out context
249 movups %xmm1,16(%rdx)
250 ret
251.size padlock_sha256_blocks,.-padlock_sha256_blocks
252
253.globl padlock_sha512_blocks
254.type padlock_sha512_blocks,\@function,3
255.align 16
256padlock_sha512_blocks:
257 mov %rdx,%rcx
258 mov %rdi,%rdx # put aside %rdi
259 movups (%rdi),%xmm0 # copy-in context
260 sub \$128+8,%rsp
261 movups 16(%rdi),%xmm1
262 movups 32(%rdi),%xmm2
263 movups 48(%rdi),%xmm3
264 movaps %xmm0,(%rsp)
265 mov %rsp,%rdi
266 movaps %xmm1,16(%rsp)
267 movaps %xmm2,32(%rsp)
268 movaps %xmm3,48(%rsp)
269 .byte 0xf3,0x0f,0xa6,0xe0 # rep xha512
270 movaps (%rsp),%xmm0
271 movaps 16(%rsp),%xmm1
272 movaps 32(%rsp),%xmm2
273 movaps 48(%rsp),%xmm3
274 add \$128+8,%rsp
275 movups %xmm0,(%rdx) # copy-out context
276 movups %xmm1,16(%rdx)
277 movups %xmm2,32(%rdx)
278 movups %xmm3,48(%rdx)
279 ret
280.size padlock_sha512_blocks,.-padlock_sha512_blocks
281___
282
283sub generate_mode {
284my ($mode,$opcode) = @_;
285# int padlock_$mode_encrypt(void *out, const void *inp,
286# struct padlock_cipher_data *ctx, size_t len);
287$code.=<<___;
288.globl padlock_${mode}_encrypt
289.type padlock_${mode}_encrypt,\@function,4
290.align 16
291padlock_${mode}_encrypt:
292 push %rbp
293 push %rbx
294
295 xor %eax,%eax
296 test \$15,$ctx
297 jnz .L${mode}_abort
298 test \$15,$len
299 jnz .L${mode}_abort
300 lea .Lpadlock_saved_context(%rip),%rax
301 pushf
302 cld
303 call _padlock_verify_ctx
304 lea 16($ctx),$ctx # control word
305 xor %eax,%eax
306 xor %ebx,%ebx
307 testl \$`1<<5`,($ctx) # align bit in control word
308 jnz .L${mode}_aligned
309 test \$0x0f,$out
310 setz %al # !out_misaligned
311 test \$0x0f,$inp
312 setz %bl # !inp_misaligned
313 test %ebx,%eax
314 jnz .L${mode}_aligned
315 neg %rax
316 mov \$$PADLOCK_CHUNK,$chunk
317 not %rax # out_misaligned?-1:0
318 lea (%rsp),%rbp
319 cmp $chunk,$len
320 cmovc $len,$chunk # chunk=len>PADLOCK_CHUNK?PADLOCK_CHUNK:len
321 and $chunk,%rax # out_misaligned?chunk:0
322 mov $len,$chunk
323 neg %rax
324 and \$$PADLOCK_CHUNK-1,$chunk # chunk%=PADLOCK_CHUNK
325 lea (%rax,%rbp),%rsp
326 mov \$$PADLOCK_CHUNK,%rax
327 cmovz %rax,$chunk # chunk=chunk?:PADLOCK_CHUNK
328___
329$code.=<<___ if ($mode eq "ctr32");
330.L${mode}_reenter:
331 mov -4($ctx),%eax # pull 32-bit counter
332 bswap %eax
333 neg %eax
334 and \$`$PADLOCK_CHUNK/16-1`,%eax
335 mov \$$PADLOCK_CHUNK,$chunk
336 shl \$4,%eax
337 cmovz $chunk,%rax
338 cmp %rax,$len
339 cmova %rax,$chunk # don't let counter cross PADLOCK_CHUNK
340 cmovbe $len,$chunk
341___
342$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
343 cmp $chunk,$len
344 ja .L${mode}_loop
345 mov $inp,%rax # check if prefetch crosses page
346 cmp %rsp,%rbp
347 cmove $out,%rax
348 add $len,%rax
349 neg %rax
350 and \$0xfff,%rax # distance to page boundary
351 cmp \$$PADLOCK_PREFETCH{$mode},%rax
352 mov \$-$PADLOCK_PREFETCH{$mode},%rax
353 cmovae $chunk,%rax # mask=distance<prefetch?-prefetch:-1
354 and %rax,$chunk
355 jz .L${mode}_unaligned_tail
356___
357$code.=<<___;
358 jmp .L${mode}_loop
359.align 16
360.L${mode}_loop:
361 cmp $len,$chunk # ctr32 artefact
362 cmova $len,$chunk # ctr32 artefact
363 mov $out,%r8 # save parameters
364 mov $inp,%r9
365 mov $len,%r10
366 mov $chunk,$len
367 mov $chunk,%r11
368 test \$0x0f,$out # out_misaligned
369 cmovnz %rsp,$out
370 test \$0x0f,$inp # inp_misaligned
371 jz .L${mode}_inp_aligned
372 shr \$3,$len
373 .byte 0xf3,0x48,0xa5 # rep movsq
374 sub $chunk,$out
375 mov $chunk,$len
376 mov $out,$inp
377.L${mode}_inp_aligned:
378 lea -16($ctx),%rax # ivp
379 lea 16($ctx),%rbx # key
380 shr \$4,$len
381 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
382___
383$code.=<<___ if ($mode !~ /ecb|ctr/);
384 movdqa (%rax),%xmm0
385 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
386___
387$code.=<<___ if ($mode eq "ctr32");
388 mov -4($ctx),%eax # pull 32-bit counter
389 test \$0xffff0000,%eax
390 jnz .L${mode}_no_carry
391 bswap %eax
392 add \$0x10000,%eax
393 bswap %eax
394 mov %eax,-4($ctx)
395.L${mode}_no_carry:
396___
397$code.=<<___;
398 mov %r8,$out # restore parameters
399 mov %r11,$chunk
400 test \$0x0f,$out
401 jz .L${mode}_out_aligned
402 mov $chunk,$len
403 lea (%rsp),$inp
404 shr \$3,$len
405 .byte 0xf3,0x48,0xa5 # rep movsq
406 sub $chunk,$out
407.L${mode}_out_aligned:
408 mov %r9,$inp
409 mov %r10,$len
410 add $chunk,$out
411 add $chunk,$inp
412 sub $chunk,$len
413 mov \$$PADLOCK_CHUNK,$chunk
414___
415 if (!$PADLOCK_PREFETCH{$mode}) {
416$code.=<<___;
417 jnz .L${mode}_loop
418___
419 } else {
420$code.=<<___;
421 jz .L${mode}_break
422 cmp $chunk,$len
423 jae .L${mode}_loop
424___
425$code.=<<___ if ($mode eq "ctr32");
426 mov $len,$chunk
427 mov $inp,%rax # check if prefetch crosses page
428 cmp %rsp,%rbp
429 cmove $out,%rax
430 add $len,%rax
431 neg %rax
432 and \$0xfff,%rax # distance to page boundary
433 cmp \$$PADLOCK_PREFETCH{$mode},%rax
434 mov \$-$PADLOCK_PREFETCH{$mode},%rax
435 cmovae $chunk,%rax
436 and %rax,$chunk
437 jnz .L${mode}_loop
438___
439$code.=<<___;
440.L${mode}_unaligned_tail:
441 xor %eax,%eax
442 cmp %rsp,%rbp
443 cmove $len,%rax
444 mov $out,%r8 # save parameters
445 mov $len,$chunk
446 sub %rax,%rsp # alloca
447 shr \$3,$len
448 lea (%rsp),$out
449 .byte 0xf3,0x48,0xa5 # rep movsq
450 mov %rsp,$inp
451 mov %r8, $out # restore parameters
452 mov $chunk,$len
453 jmp .L${mode}_loop
454.align 16
455.L${mode}_break:
456___
457 }
458$code.=<<___;
459 cmp %rbp,%rsp
460 je .L${mode}_done
461
462 pxor %xmm0,%xmm0
463 lea (%rsp),%rax
464.L${mode}_bzero:
465 movaps %xmm0,(%rax)
466 lea 16(%rax),%rax
467 cmp %rax,%rbp
468 ja .L${mode}_bzero
469
470.L${mode}_done:
471 lea (%rbp),%rsp
472 jmp .L${mode}_exit
473
474.align 16
475.L${mode}_aligned:
476___
477$code.=<<___ if ($mode eq "ctr32");
478 mov -4($ctx),%eax # pull 32-bit counter
479 bswap %eax
480 neg %eax
481 and \$0xffff,%eax
482 mov \$`16*0x10000`,$chunk
483 shl \$4,%eax
484 cmovz $chunk,%rax
485 cmp %rax,$len
486 cmova %rax,$chunk # don't let counter cross 2^16
487 cmovbe $len,$chunk
488 jbe .L${mode}_aligned_skip
489
490.L${mode}_aligned_loop:
491 mov $len,%r10 # save parameters
492 mov $chunk,$len
493 mov $chunk,%r11
494
495 lea -16($ctx),%rax # ivp
496 lea 16($ctx),%rbx # key
497 shr \$4,$len # len/=AES_BLOCK_SIZE
498 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
499
500 mov -4($ctx),%eax # pull 32-bit counter
501 bswap %eax
502 add \$0x10000,%eax
503 bswap %eax
504 mov %eax,-4($ctx)
505
506 mov %r10,$len # restore parameters
507 sub %r11,$len
508 mov \$`16*0x10000`,$chunk
509 jz .L${mode}_exit
510 cmp $chunk,$len
511 jae .L${mode}_aligned_loop
512
513.L${mode}_aligned_skip:
514___
515$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
516 lea ($inp,$len),%rbp
517 neg %rbp
518 and \$0xfff,%rbp # distance to page boundary
519 xor %eax,%eax
520 cmp \$$PADLOCK_PREFETCH{$mode},%rbp
521 mov \$$PADLOCK_PREFETCH{$mode}-1,%rbp
522 cmovae %rax,%rbp
523 and $len,%rbp # remainder
524 sub %rbp,$len
525 jz .L${mode}_aligned_tail
526___
527$code.=<<___;
528 lea -16($ctx),%rax # ivp
529 lea 16($ctx),%rbx # key
530 shr \$4,$len # len/=AES_BLOCK_SIZE
531 .byte 0xf3,0x0f,0xa7,$opcode # rep xcrypt*
532___
533$code.=<<___ if ($mode !~ /ecb|ctr/);
534 movdqa (%rax),%xmm0
535 movdqa %xmm0,-16($ctx) # copy [or refresh] iv
536___
537$code.=<<___ if ($PADLOCK_PREFETCH{$mode});
538 test %rbp,%rbp # check remainder
539 jz .L${mode}_exit
540
541.L${mode}_aligned_tail:
542 mov $out,%r8
543 mov %rbp,$chunk
544 mov %rbp,$len
545 lea (%rsp),%rbp
546 sub $len,%rsp
547 shr \$3,$len
548 lea (%rsp),$out
549 .byte 0xf3,0x48,0xa5 # rep movsq
550 lea (%r8),$out
551 lea (%rsp),$inp
552 mov $chunk,$len
553 jmp .L${mode}_loop
554___
555$code.=<<___;
556.L${mode}_exit:
557 mov \$1,%eax
558 lea 8(%rsp),%rsp
559.L${mode}_abort:
560 pop %rbx
561 pop %rbp
562 ret
563.size padlock_${mode}_encrypt,.-padlock_${mode}_encrypt
564___
565}
566
567&generate_mode("ecb",0xc8);
568&generate_mode("cbc",0xd0);
569&generate_mode("cfb",0xe0);
570&generate_mode("ofb",0xe8);
571&generate_mode("ctr32",0xd8); # all 64-bit CPUs have working CTR...
572
573$code.=<<___;
574.asciz "VIA Padlock x86_64 module, CRYPTOGAMS by <appro\@openssl.org>"
575.align 16
576.data
577.align 8
578.Lpadlock_saved_context:
579 .quad 0
580___
581$code =~ s/\`([^\`]*)\`/eval($1)/gem;
582
583print $code;
584
585close STDOUT;
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette