VirtualBox

source: vbox/trunk/src/libs/openssl-1.1.1g/crypto/x86_64cpuid.pl@ 85855

Last change on this file since 85855 was 83916, checked in by vboxsync, 5 years ago

openssl-1.1.1g: Applied and adjusted our OpenSSL changes to 1.1.1g. bugref:9719

File size: 10.1 KB
Line 
1#! /usr/bin/env perl
2# Copyright 2005-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the OpenSSL license (the "License"). You may not use
5# this file except in compliance with the License. You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9
10$flavour = shift;
11$output = shift;
12if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
13
14$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
15
16$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
17( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
18( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
19die "can't locate x86_64-xlate.pl";
20
21open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
22*STDOUT=*OUT;
23
24($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
25 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
26
27print<<___;
28.extern OPENSSL_cpuid_setup
29.hidden OPENSSL_cpuid_setup
30.section .init
31 call OPENSSL_cpuid_setup
32
33.hidden OPENSSL_ia32cap_P
34.comm OPENSSL_ia32cap_P,16,4
35
36.text
37
38.globl OPENSSL_atomic_add
39.type OPENSSL_atomic_add,\@abi-omnipotent
40.align 16
41OPENSSL_atomic_add:
42.cfi_startproc
43 movl ($arg1),%eax
44.Lspin: leaq ($arg2,%rax),%r8
45 .byte 0xf0 # lock
46 cmpxchgl %r8d,($arg1)
47 jne .Lspin
48 movl %r8d,%eax
49 .byte 0x48,0x98 # cltq/cdqe
50 ret
51.cfi_endproc
52.size OPENSSL_atomic_add,.-OPENSSL_atomic_add
53
54.globl OPENSSL_rdtsc
55.type OPENSSL_rdtsc,\@abi-omnipotent
56.align 16
57OPENSSL_rdtsc:
58.cfi_startproc
59 rdtsc
60 shl \$32,%rdx
61 or %rdx,%rax
62 ret
63.cfi_endproc
64.size OPENSSL_rdtsc,.-OPENSSL_rdtsc
65
66.globl OPENSSL_ia32_cpuid
67.type OPENSSL_ia32_cpuid,\@function,1
68.align 16
69OPENSSL_ia32_cpuid:
70.cfi_startproc
71 mov %rbx,%r8 # save %rbx
72.cfi_register %rbx,%r8
73
74 xor %eax,%eax
75 mov %rax,8(%rdi) # clear extended feature flags
76 cpuid
77 mov %eax,%r11d # max value for standard query level
78
79 xor %eax,%eax
80 cmp \$0x756e6547,%ebx # "Genu"
81 setne %al
82 mov %eax,%r9d
83 cmp \$0x49656e69,%edx # "ineI"
84 setne %al
85 or %eax,%r9d
86 cmp \$0x6c65746e,%ecx # "ntel"
87 setne %al
88 or %eax,%r9d # 0 indicates Intel CPU
89 jz .Lintel
90
91 cmp \$0x68747541,%ebx # "Auth"
92 setne %al
93 mov %eax,%r10d
94 cmp \$0x69746E65,%edx # "enti"
95 setne %al
96 or %eax,%r10d
97 cmp \$0x444D4163,%ecx # "cAMD"
98 setne %al
99 or %eax,%r10d # 0 indicates AMD CPU
100 jnz .Lintel
101
102 # AMD specific
103 mov \$0x80000000,%eax
104 cpuid
105 cmp \$0x80000001,%eax
106 jb .Lintel
107 mov %eax,%r10d
108 mov \$0x80000001,%eax
109 cpuid
110 or %ecx,%r9d
111 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
112
113 cmp \$0x80000008,%r10d
114 jb .Lintel
115
116 mov \$0x80000008,%eax
117 cpuid
118 movzb %cl,%r10 # number of cores - 1
119 inc %r10 # number of cores
120
121 mov \$1,%eax
122 cpuid
123 bt \$28,%edx # test hyper-threading bit
124 jnc .Lgeneric
125 shr \$16,%ebx # number of logical processors
126 cmp %r10b,%bl
127 ja .Lgeneric
128 and \$0xefffffff,%edx # ~(1<<28)
129 jmp .Lgeneric
130
131.Lintel:
132 cmp \$4,%r11d
133 mov \$-1,%r10d
134 jb .Lnocacheinfo
135
136 mov \$4,%eax
137 mov \$0,%ecx # query L1D
138 cpuid
139 mov %eax,%r10d
140 shr \$14,%r10d
141 and \$0xfff,%r10d # number of cores -1 per L1D
142
143.Lnocacheinfo:
144 mov \$1,%eax
145 cpuid
146 movd %eax,%xmm0 # put aside processor id
147 and \$0xbfefffff,%edx # force reserved bits to 0
148 cmp \$0,%r9d
149 jne .Lnotintel
150 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
151 and \$15,%ah
152 cmp \$15,%ah # examine Family ID
153 jne .LnotP4
154 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
155.LnotP4:
156 cmp \$6,%ah
157 jne .Lnotintel
158 and \$0x0fff0ff0,%eax
159 cmp \$0x00050670,%eax # Knights Landing
160 je .Lknights
161 cmp \$0x00080650,%eax # Knights Mill (according to sde)
162 jne .Lnotintel
163.Lknights:
164 and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont
165
166.Lnotintel:
167 bt \$28,%edx # test hyper-threading bit
168 jnc .Lgeneric
169 and \$0xefffffff,%edx # ~(1<<28)
170 cmp \$0,%r10d
171 je .Lgeneric
172
173 or \$0x10000000,%edx # 1<<28
174 shr \$16,%ebx
175 cmp \$1,%bl # see if cache is shared
176 ja .Lgeneric
177 and \$0xefffffff,%edx # ~(1<<28)
178.Lgeneric:
179 and \$0x00000800,%r9d # isolate AMD XOP flag
180 and \$0xfffff7ff,%ecx
181 or %ecx,%r9d # merge AMD XOP flag
182
183 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
184
185 cmp \$7,%r11d
186 jb .Lno_extended_info
187 mov \$7,%eax
188 xor %ecx,%ecx
189 cpuid
190 bt \$26,%r9d # check XSAVE bit, cleared on Knights
191 jc .Lnotknights
192 and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
193.Lnotknights:
194 movd %xmm0,%eax # restore processor id
195 and \$0x0fff0ff0,%eax
196 cmp \$0x00050650,%eax # Skylake-X
197 jne .Lnotskylakex
198 and \$0xfffeffff,%ebx # ~(1<<16)
199 # suppress AVX512F flag on Skylake-X
200.Lnotskylakex:
201 mov %ebx,8(%rdi) # save extended feature flags
202 mov %ecx,12(%rdi)
203.Lno_extended_info:
204
205 bt \$27,%r9d # check OSXSAVE bit
206 jnc .Lclear_avx
207 xor %ecx,%ecx # XCR0
208 .byte 0x0f,0x01,0xd0 # xgetbv
209 and \$0xe6,%eax # isolate XMM, YMM and ZMM state support
210 cmp \$0xe6,%eax
211 je .Ldone
212 andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16)
213 # clear AVX512F+BW+VL+FIMA, all of
214 # them are EVEX-encoded, which requires
215 # ZMM state support even if one uses
216 # only XMM and YMM :-(
217 and \$6,%eax # isolate XMM and YMM state support
218 cmp \$6,%eax
219 je .Ldone
220.Lclear_avx:
221 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
222 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
223 mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
224 and %eax,8(%rdi) # clear AVX2 and AVX512* bits
225.Ldone:
226 shl \$32,%r9
227 mov %r10d,%eax
228 mov %r8,%rbx # restore %rbx
229.cfi_restore %rbx
230 or %r9,%rax
231 ret
232.cfi_endproc
233.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
234
235.globl OPENSSL_cleanse
236.type OPENSSL_cleanse,\@abi-omnipotent
237.align 16
238OPENSSL_cleanse:
239.cfi_startproc
240 xor %rax,%rax
241 cmp \$15,$arg2
242 jae .Lot
243 cmp \$0,$arg2
244 je .Lret
245.Little:
246 mov %al,($arg1)
247 sub \$1,$arg2
248 lea 1($arg1),$arg1
249 jnz .Little
250.Lret:
251 ret
252.align 16
253.Lot:
254 test \$7,$arg1
255 jz .Laligned
256 mov %al,($arg1)
257 lea -1($arg2),$arg2
258 lea 1($arg1),$arg1
259 jmp .Lot
260.Laligned:
261 mov %rax,($arg1)
262 lea -8($arg2),$arg2
263 test \$-8,$arg2
264 lea 8($arg1),$arg1
265 jnz .Laligned
266 cmp \$0,$arg2
267 jne .Little
268 ret
269.cfi_endproc
270.size OPENSSL_cleanse,.-OPENSSL_cleanse
271
272.globl CRYPTO_memcmp
273.type CRYPTO_memcmp,\@abi-omnipotent
274.align 16
275CRYPTO_memcmp:
276.cfi_startproc
277 xor %rax,%rax
278 xor %r10,%r10
279 cmp \$0,$arg3
280 je .Lno_data
281 cmp \$16,$arg3
282 jne .Loop_cmp
283 mov ($arg1),%r10
284 mov 8($arg1),%r11
285 mov \$1,$arg3
286 xor ($arg2),%r10
287 xor 8($arg2),%r11
288 or %r11,%r10
289 cmovnz $arg3,%rax
290 ret
291
292.align 16
293.Loop_cmp:
294 mov ($arg1),%r10b
295 lea 1($arg1),$arg1
296 xor ($arg2),%r10b
297 lea 1($arg2),$arg2
298 or %r10b,%al
299 dec $arg3
300 jnz .Loop_cmp
301 neg %rax
302 shr \$63,%rax
303.Lno_data:
304 ret
305.cfi_endproc
306.size CRYPTO_memcmp,.-CRYPTO_memcmp
307___
308
309print<<___ if (!$win64);
310.globl OPENSSL_wipe_cpu
311.type OPENSSL_wipe_cpu,\@abi-omnipotent
312.align 16
313OPENSSL_wipe_cpu:
314.cfi_startproc
315 pxor %xmm0,%xmm0
316 pxor %xmm1,%xmm1
317 pxor %xmm2,%xmm2
318 pxor %xmm3,%xmm3
319 pxor %xmm4,%xmm4
320 pxor %xmm5,%xmm5
321 pxor %xmm6,%xmm6
322 pxor %xmm7,%xmm7
323 pxor %xmm8,%xmm8
324 pxor %xmm9,%xmm9
325 pxor %xmm10,%xmm10
326 pxor %xmm11,%xmm11
327 pxor %xmm12,%xmm12
328 pxor %xmm13,%xmm13
329 pxor %xmm14,%xmm14
330 pxor %xmm15,%xmm15
331 xorq %rcx,%rcx
332 xorq %rdx,%rdx
333 xorq %rsi,%rsi
334 xorq %rdi,%rdi
335 xorq %r8,%r8
336 xorq %r9,%r9
337 xorq %r10,%r10
338 xorq %r11,%r11
339 leaq 8(%rsp),%rax
340 ret
341.cfi_endproc
342.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
343___
344print<<___ if ($win64);
345.globl OPENSSL_wipe_cpu
346.type OPENSSL_wipe_cpu,\@abi-omnipotent
347.align 16
348OPENSSL_wipe_cpu:
349 pxor %xmm0,%xmm0
350 pxor %xmm1,%xmm1
351 pxor %xmm2,%xmm2
352 pxor %xmm3,%xmm3
353 pxor %xmm4,%xmm4
354 pxor %xmm5,%xmm5
355 xorq %rcx,%rcx
356 xorq %rdx,%rdx
357 xorq %r8,%r8
358 xorq %r9,%r9
359 xorq %r10,%r10
360 xorq %r11,%r11
361 leaq 8(%rsp),%rax
362 ret
363.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
364___
365{
366my $out="%r10";
367my $cnt="%rcx";
368my $max="%r11";
369my $lasttick="%r8d";
370my $lastdiff="%r9d";
371my $redzone=win64?8:-8;
372
373print<<___;
374.globl OPENSSL_instrument_bus
375.type OPENSSL_instrument_bus,\@abi-omnipotent
376.align 16
377OPENSSL_instrument_bus:
378.cfi_startproc
379 mov $arg1,$out # tribute to Win64
380 mov $arg2,$cnt
381 mov $arg2,$max
382
383 rdtsc # collect 1st tick
384 mov %eax,$lasttick # lasttick = tick
385 mov \$0,$lastdiff # lastdiff = 0
386 clflush ($out)
387 .byte 0xf0 # lock
388 add $lastdiff,($out)
389 jmp .Loop
390.align 16
391.Loop: rdtsc
392 mov %eax,%edx
393 sub $lasttick,%eax
394 mov %edx,$lasttick
395 mov %eax,$lastdiff
396 clflush ($out)
397 .byte 0xf0 # lock
398 add %eax,($out)
399 lea 4($out),$out
400 sub \$1,$cnt
401 jnz .Loop
402
403 mov $max,%rax
404 ret
405.cfi_endproc
406.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
407
408.globl OPENSSL_instrument_bus2
409.type OPENSSL_instrument_bus2,\@abi-omnipotent
410.align 16
411OPENSSL_instrument_bus2:
412.cfi_startproc
413 mov $arg1,$out # tribute to Win64
414 mov $arg2,$cnt
415 mov $arg3,$max
416 mov $cnt,$redzone(%rsp)
417
418 rdtsc # collect 1st tick
419 mov %eax,$lasttick # lasttick = tick
420 mov \$0,$lastdiff # lastdiff = 0
421
422 clflush ($out)
423 .byte 0xf0 # lock
424 add $lastdiff,($out)
425
426 rdtsc # collect 1st diff
427 mov %eax,%edx
428 sub $lasttick,%eax # diff
429 mov %edx,$lasttick # lasttick = tick
430 mov %eax,$lastdiff # lastdiff = diff
431.Loop2:
432 clflush ($out)
433 .byte 0xf0 # lock
434 add %eax,($out) # accumulate diff
435
436 sub \$1,$max
437 jz .Ldone2
438
439 rdtsc
440 mov %eax,%edx
441 sub $lasttick,%eax # diff
442 mov %edx,$lasttick # lasttick = tick
443 cmp $lastdiff,%eax
444 mov %eax,$lastdiff # lastdiff = diff
445 mov \$0,%edx
446 setne %dl
447 sub %rdx,$cnt # conditional --$cnt
448 lea ($out,%rdx,4),$out # conditional ++$out
449 jnz .Loop2
450
451.Ldone2:
452 mov $redzone(%rsp),%rax
453 sub $cnt,%rax
454 ret
455.cfi_endproc
456.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
457___
458}
459
460sub gen_random {
461my $rdop = shift;
462print<<___;
463.globl OPENSSL_ia32_${rdop}_bytes
464.type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
465.align 16
466OPENSSL_ia32_${rdop}_bytes:
467.cfi_startproc
468 xor %rax, %rax # return value
469 cmp \$0,$arg2
470 je .Ldone_${rdop}_bytes
471
472 mov \$8,%r11
473.Loop_${rdop}_bytes:
474 ${rdop} %r10
475 jc .Lbreak_${rdop}_bytes
476 dec %r11
477 jnz .Loop_${rdop}_bytes
478 jmp .Ldone_${rdop}_bytes
479
480.align 16
481.Lbreak_${rdop}_bytes:
482 cmp \$8,$arg2
483 jb .Ltail_${rdop}_bytes
484 mov %r10,($arg1)
485 lea 8($arg1),$arg1
486 add \$8,%rax
487 sub \$8,$arg2
488 jz .Ldone_${rdop}_bytes
489 mov \$8,%r11
490 jmp .Loop_${rdop}_bytes
491
492.align 16
493.Ltail_${rdop}_bytes:
494 mov %r10b,($arg1)
495 lea 1($arg1),$arg1
496 inc %rax
497 shr \$8,%r10
498 dec $arg2
499 jnz .Ltail_${rdop}_bytes
500
501.Ldone_${rdop}_bytes:
502 xor %r10,%r10 # Clear sensitive data from register
503 ret
504.cfi_endproc
505.size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
506___
507}
508gen_random("rdrand");
509gen_random("rdseed");
510
511close STDOUT or die "error closing STDOUT: $!"; # flush
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette