1 | #! /usr/bin/env perl
|
---|
2 | # Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
|
---|
3 | #
|
---|
4 | # Licensed under the OpenSSL license (the "License"). You may not use
|
---|
5 | # this file except in compliance with the License. You can obtain a copy
|
---|
6 | # in the file LICENSE in the source distribution or at
|
---|
7 | # https://www.openssl.org/source/license.html
|
---|
8 |
|
---|
9 |
|
---|
10 | $flavour = shift;
|
---|
11 | $output = shift;
|
---|
12 | if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
---|
13 |
|
---|
14 | $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
---|
15 |
|
---|
16 | $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
---|
17 | ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
---|
18 | ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
|
---|
19 | die "can't locate x86_64-xlate.pl";
|
---|
20 |
|
---|
21 | open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
|
---|
22 | *STDOUT=*OUT;
|
---|
23 |
|
---|
24 | ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
|
---|
25 | ("%rdi","%rsi","%rdx","%rcx"); # Unix order
|
---|
26 |
|
---|
27 | print<<___;
|
---|
28 | .extern OPENSSL_cpuid_setup
|
---|
29 | .hidden OPENSSL_cpuid_setup
|
---|
30 | .section .init
|
---|
31 | call OPENSSL_cpuid_setup
|
---|
32 |
|
---|
33 | .hidden OPENSSL_ia32cap_P
|
---|
34 | .comm OPENSSL_ia32cap_P,16,4
|
---|
35 |
|
---|
36 | .text
|
---|
37 |
|
---|
38 | .globl OPENSSL_atomic_add
|
---|
39 | .type OPENSSL_atomic_add,\@abi-omnipotent
|
---|
40 | .align 16
|
---|
41 | OPENSSL_atomic_add:
|
---|
42 | movl ($arg1),%eax
|
---|
43 | .Lspin: leaq ($arg2,%rax),%r8
|
---|
44 | .byte 0xf0 # lock
|
---|
45 | cmpxchgl %r8d,($arg1)
|
---|
46 | jne .Lspin
|
---|
47 | movl %r8d,%eax
|
---|
48 | .byte 0x48,0x98 # cltq/cdqe
|
---|
49 | ret
|
---|
50 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
---|
51 |
|
---|
52 | .globl OPENSSL_rdtsc
|
---|
53 | .type OPENSSL_rdtsc,\@abi-omnipotent
|
---|
54 | .align 16
|
---|
55 | OPENSSL_rdtsc:
|
---|
56 | rdtsc
|
---|
57 | shl \$32,%rdx
|
---|
58 | or %rdx,%rax
|
---|
59 | ret
|
---|
60 | .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
|
---|
61 |
|
---|
62 | .globl OPENSSL_ia32_cpuid
|
---|
63 | .type OPENSSL_ia32_cpuid,\@function,1
|
---|
64 | .align 16
|
---|
65 | OPENSSL_ia32_cpuid:
|
---|
66 | mov %rbx,%r8 # save %rbx
|
---|
67 |
|
---|
68 | xor %eax,%eax
|
---|
69 | mov %eax,8(%rdi) # clear extended feature flags
|
---|
70 | cpuid
|
---|
71 | mov %eax,%r11d # max value for standard query level
|
---|
72 |
|
---|
73 | xor %eax,%eax
|
---|
74 | cmp \$0x756e6547,%ebx # "Genu"
|
---|
75 | setne %al
|
---|
76 | mov %eax,%r9d
|
---|
77 | cmp \$0x49656e69,%edx # "ineI"
|
---|
78 | setne %al
|
---|
79 | or %eax,%r9d
|
---|
80 | cmp \$0x6c65746e,%ecx # "ntel"
|
---|
81 | setne %al
|
---|
82 | or %eax,%r9d # 0 indicates Intel CPU
|
---|
83 | jz .Lintel
|
---|
84 |
|
---|
85 | cmp \$0x68747541,%ebx # "Auth"
|
---|
86 | setne %al
|
---|
87 | mov %eax,%r10d
|
---|
88 | cmp \$0x69746E65,%edx # "enti"
|
---|
89 | setne %al
|
---|
90 | or %eax,%r10d
|
---|
91 | cmp \$0x444D4163,%ecx # "cAMD"
|
---|
92 | setne %al
|
---|
93 | or %eax,%r10d # 0 indicates AMD CPU
|
---|
94 | jnz .Lintel
|
---|
95 |
|
---|
96 | # AMD specific
|
---|
97 | mov \$0x80000000,%eax
|
---|
98 | cpuid
|
---|
99 | cmp \$0x80000001,%eax
|
---|
100 | jb .Lintel
|
---|
101 | mov %eax,%r10d
|
---|
102 | mov \$0x80000001,%eax
|
---|
103 | cpuid
|
---|
104 | or %ecx,%r9d
|
---|
105 | and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
|
---|
106 |
|
---|
107 | cmp \$0x80000008,%r10d
|
---|
108 | jb .Lintel
|
---|
109 |
|
---|
110 | mov \$0x80000008,%eax
|
---|
111 | cpuid
|
---|
112 | movzb %cl,%r10 # number of cores - 1
|
---|
113 | inc %r10 # number of cores
|
---|
114 |
|
---|
115 | mov \$1,%eax
|
---|
116 | cpuid
|
---|
117 | bt \$28,%edx # test hyper-threading bit
|
---|
118 | jnc .Lgeneric
|
---|
119 | shr \$16,%ebx # number of logical processors
|
---|
120 | cmp %r10b,%bl
|
---|
121 | ja .Lgeneric
|
---|
122 | and \$0xefffffff,%edx # ~(1<<28)
|
---|
123 | jmp .Lgeneric
|
---|
124 |
|
---|
125 | .Lintel:
|
---|
126 | cmp \$4,%r11d
|
---|
127 | mov \$-1,%r10d
|
---|
128 | jb .Lnocacheinfo
|
---|
129 |
|
---|
130 | mov \$4,%eax
|
---|
131 | mov \$0,%ecx # query L1D
|
---|
132 | cpuid
|
---|
133 | mov %eax,%r10d
|
---|
134 | shr \$14,%r10d
|
---|
135 | and \$0xfff,%r10d # number of cores -1 per L1D
|
---|
136 |
|
---|
137 | .Lnocacheinfo:
|
---|
138 | mov \$1,%eax
|
---|
139 | cpuid
|
---|
140 | and \$0xbfefffff,%edx # force reserved bits to 0
|
---|
141 | cmp \$0,%r9d
|
---|
142 | jne .Lnotintel
|
---|
143 | or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
|
---|
144 | and \$15,%ah
|
---|
145 | cmp \$15,%ah # examine Family ID
|
---|
146 | jne .LnotP4
|
---|
147 | or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
|
---|
148 | .LnotP4:
|
---|
149 | cmp \$6,%ah
|
---|
150 | jne .Lnotintel
|
---|
151 | and \$0x0fff0ff0,%eax
|
---|
152 | cmp \$0x00050670,%eax # Knights Landing
|
---|
153 | je .Lknights
|
---|
154 | cmp \$0x00080650,%eax # Knights Mill (according to sde)
|
---|
155 | jne .Lnotintel
|
---|
156 | .Lknights:
|
---|
157 | and \$0xfbffffff,%ecx # clear XSAVE flag to mimic Silvermont
|
---|
158 |
|
---|
159 | .Lnotintel:
|
---|
160 | bt \$28,%edx # test hyper-threading bit
|
---|
161 | jnc .Lgeneric
|
---|
162 | and \$0xefffffff,%edx # ~(1<<28)
|
---|
163 | cmp \$0,%r10d
|
---|
164 | je .Lgeneric
|
---|
165 |
|
---|
166 | or \$0x10000000,%edx # 1<<28
|
---|
167 | shr \$16,%ebx
|
---|
168 | cmp \$1,%bl # see if cache is shared
|
---|
169 | ja .Lgeneric
|
---|
170 | and \$0xefffffff,%edx # ~(1<<28)
|
---|
171 | .Lgeneric:
|
---|
172 | and \$0x00000800,%r9d # isolate AMD XOP flag
|
---|
173 | and \$0xfffff7ff,%ecx
|
---|
174 | or %ecx,%r9d # merge AMD XOP flag
|
---|
175 |
|
---|
176 | mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
|
---|
177 |
|
---|
178 | cmp \$7,%r11d
|
---|
179 | jb .Lno_extended_info
|
---|
180 | mov \$7,%eax
|
---|
181 | xor %ecx,%ecx
|
---|
182 | cpuid
|
---|
183 | bt \$26,%r9d # check XSAVE bit, cleared on Knights
|
---|
184 | jc .Lnotknights
|
---|
185 | and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
|
---|
186 | .Lnotknights:
|
---|
187 | mov %ebx,8(%rdi) # save extended feature flags
|
---|
188 | .Lno_extended_info:
|
---|
189 |
|
---|
190 | bt \$27,%r9d # check OSXSAVE bit
|
---|
191 | jnc .Lclear_avx
|
---|
192 | xor %ecx,%ecx # XCR0
|
---|
193 | .byte 0x0f,0x01,0xd0 # xgetbv
|
---|
194 | and \$6,%eax # isolate XMM and YMM state support
|
---|
195 | cmp \$6,%eax
|
---|
196 | je .Ldone
|
---|
197 | .Lclear_avx:
|
---|
198 | mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
|
---|
199 | and %eax,%r9d # clear AVX, FMA and AMD XOP bits
|
---|
200 | andl \$0xffffffdf,8(%rdi) # clear AVX2, ~(1<<5)
|
---|
201 | .Ldone:
|
---|
202 | shl \$32,%r9
|
---|
203 | mov %r10d,%eax
|
---|
204 | mov %r8,%rbx # restore %rbx
|
---|
205 | or %r9,%rax
|
---|
206 | ret
|
---|
207 | .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
|
---|
208 |
|
---|
209 | .globl OPENSSL_cleanse
|
---|
210 | .type OPENSSL_cleanse,\@abi-omnipotent
|
---|
211 | .align 16
|
---|
212 | OPENSSL_cleanse:
|
---|
213 | xor %rax,%rax
|
---|
214 | cmp \$15,$arg2
|
---|
215 | jae .Lot
|
---|
216 | cmp \$0,$arg2
|
---|
217 | je .Lret
|
---|
218 | .Little:
|
---|
219 | mov %al,($arg1)
|
---|
220 | sub \$1,$arg2
|
---|
221 | lea 1($arg1),$arg1
|
---|
222 | jnz .Little
|
---|
223 | .Lret:
|
---|
224 | ret
|
---|
225 | .align 16
|
---|
226 | .Lot:
|
---|
227 | test \$7,$arg1
|
---|
228 | jz .Laligned
|
---|
229 | mov %al,($arg1)
|
---|
230 | lea -1($arg2),$arg2
|
---|
231 | lea 1($arg1),$arg1
|
---|
232 | jmp .Lot
|
---|
233 | .Laligned:
|
---|
234 | mov %rax,($arg1)
|
---|
235 | lea -8($arg2),$arg2
|
---|
236 | test \$-8,$arg2
|
---|
237 | lea 8($arg1),$arg1
|
---|
238 | jnz .Laligned
|
---|
239 | cmp \$0,$arg2
|
---|
240 | jne .Little
|
---|
241 | ret
|
---|
242 | .size OPENSSL_cleanse,.-OPENSSL_cleanse
|
---|
243 |
|
---|
244 | .globl CRYPTO_memcmp
|
---|
245 | .type CRYPTO_memcmp,\@abi-omnipotent
|
---|
246 | .align 16
|
---|
247 | CRYPTO_memcmp:
|
---|
248 | xor %rax,%rax
|
---|
249 | xor %r10,%r10
|
---|
250 | cmp \$0,$arg3
|
---|
251 | je .Lno_data
|
---|
252 | .Loop_cmp:
|
---|
253 | mov ($arg1),%r10b
|
---|
254 | lea 1($arg1),$arg1
|
---|
255 | xor ($arg2),%r10b
|
---|
256 | lea 1($arg2),$arg2
|
---|
257 | or %r10b,%al
|
---|
258 | dec $arg3
|
---|
259 | jnz .Loop_cmp
|
---|
260 | neg %rax
|
---|
261 | shr \$63,%rax
|
---|
262 | .Lno_data:
|
---|
263 | ret
|
---|
264 | .size CRYPTO_memcmp,.-CRYPTO_memcmp
|
---|
265 | ___
|
---|
266 |
|
---|
267 | print<<___ if (!$win64);
|
---|
268 | .globl OPENSSL_wipe_cpu
|
---|
269 | .type OPENSSL_wipe_cpu,\@abi-omnipotent
|
---|
270 | .align 16
|
---|
271 | OPENSSL_wipe_cpu:
|
---|
272 | pxor %xmm0,%xmm0
|
---|
273 | pxor %xmm1,%xmm1
|
---|
274 | pxor %xmm2,%xmm2
|
---|
275 | pxor %xmm3,%xmm3
|
---|
276 | pxor %xmm4,%xmm4
|
---|
277 | pxor %xmm5,%xmm5
|
---|
278 | pxor %xmm6,%xmm6
|
---|
279 | pxor %xmm7,%xmm7
|
---|
280 | pxor %xmm8,%xmm8
|
---|
281 | pxor %xmm9,%xmm9
|
---|
282 | pxor %xmm10,%xmm10
|
---|
283 | pxor %xmm11,%xmm11
|
---|
284 | pxor %xmm12,%xmm12
|
---|
285 | pxor %xmm13,%xmm13
|
---|
286 | pxor %xmm14,%xmm14
|
---|
287 | pxor %xmm15,%xmm15
|
---|
288 | xorq %rcx,%rcx
|
---|
289 | xorq %rdx,%rdx
|
---|
290 | xorq %rsi,%rsi
|
---|
291 | xorq %rdi,%rdi
|
---|
292 | xorq %r8,%r8
|
---|
293 | xorq %r9,%r9
|
---|
294 | xorq %r10,%r10
|
---|
295 | xorq %r11,%r11
|
---|
296 | leaq 8(%rsp),%rax
|
---|
297 | ret
|
---|
298 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
---|
299 | ___
|
---|
300 | print<<___ if ($win64);
|
---|
301 | .globl OPENSSL_wipe_cpu
|
---|
302 | .type OPENSSL_wipe_cpu,\@abi-omnipotent
|
---|
303 | .align 16
|
---|
304 | OPENSSL_wipe_cpu:
|
---|
305 | pxor %xmm0,%xmm0
|
---|
306 | pxor %xmm1,%xmm1
|
---|
307 | pxor %xmm2,%xmm2
|
---|
308 | pxor %xmm3,%xmm3
|
---|
309 | pxor %xmm4,%xmm4
|
---|
310 | pxor %xmm5,%xmm5
|
---|
311 | xorq %rcx,%rcx
|
---|
312 | xorq %rdx,%rdx
|
---|
313 | xorq %r8,%r8
|
---|
314 | xorq %r9,%r9
|
---|
315 | xorq %r10,%r10
|
---|
316 | xorq %r11,%r11
|
---|
317 | leaq 8(%rsp),%rax
|
---|
318 | ret
|
---|
319 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
---|
320 | ___
|
---|
321 | {
|
---|
322 | my $out="%r10";
|
---|
323 | my $cnt="%rcx";
|
---|
324 | my $max="%r11";
|
---|
325 | my $lasttick="%r8d";
|
---|
326 | my $lastdiff="%r9d";
|
---|
327 | my $redzone=win64?8:-8;
|
---|
328 |
|
---|
329 | print<<___;
|
---|
330 | .globl OPENSSL_instrument_bus
|
---|
331 | .type OPENSSL_instrument_bus,\@abi-omnipotent
|
---|
332 | .align 16
|
---|
333 | OPENSSL_instrument_bus:
|
---|
334 | mov $arg1,$out # tribute to Win64
|
---|
335 | mov $arg2,$cnt
|
---|
336 | mov $arg2,$max
|
---|
337 |
|
---|
338 | rdtsc # collect 1st tick
|
---|
339 | mov %eax,$lasttick # lasttick = tick
|
---|
340 | mov \$0,$lastdiff # lastdiff = 0
|
---|
341 | clflush ($out)
|
---|
342 | .byte 0xf0 # lock
|
---|
343 | add $lastdiff,($out)
|
---|
344 | jmp .Loop
|
---|
345 | .align 16
|
---|
346 | .Loop: rdtsc
|
---|
347 | mov %eax,%edx
|
---|
348 | sub $lasttick,%eax
|
---|
349 | mov %edx,$lasttick
|
---|
350 | mov %eax,$lastdiff
|
---|
351 | clflush ($out)
|
---|
352 | .byte 0xf0 # lock
|
---|
353 | add %eax,($out)
|
---|
354 | lea 4($out),$out
|
---|
355 | sub \$1,$cnt
|
---|
356 | jnz .Loop
|
---|
357 |
|
---|
358 | mov $max,%rax
|
---|
359 | ret
|
---|
360 | .size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
|
---|
361 |
|
---|
362 | .globl OPENSSL_instrument_bus2
|
---|
363 | .type OPENSSL_instrument_bus2,\@abi-omnipotent
|
---|
364 | .align 16
|
---|
365 | OPENSSL_instrument_bus2:
|
---|
366 | mov $arg1,$out # tribute to Win64
|
---|
367 | mov $arg2,$cnt
|
---|
368 | mov $arg3,$max
|
---|
369 | mov $cnt,$redzone(%rsp)
|
---|
370 |
|
---|
371 | rdtsc # collect 1st tick
|
---|
372 | mov %eax,$lasttick # lasttick = tick
|
---|
373 | mov \$0,$lastdiff # lastdiff = 0
|
---|
374 |
|
---|
375 | clflush ($out)
|
---|
376 | .byte 0xf0 # lock
|
---|
377 | add $lastdiff,($out)
|
---|
378 |
|
---|
379 | rdtsc # collect 1st diff
|
---|
380 | mov %eax,%edx
|
---|
381 | sub $lasttick,%eax # diff
|
---|
382 | mov %edx,$lasttick # lasttick = tick
|
---|
383 | mov %eax,$lastdiff # lastdiff = diff
|
---|
384 | .Loop2:
|
---|
385 | clflush ($out)
|
---|
386 | .byte 0xf0 # lock
|
---|
387 | add %eax,($out) # accumulate diff
|
---|
388 |
|
---|
389 | sub \$1,$max
|
---|
390 | jz .Ldone2
|
---|
391 |
|
---|
392 | rdtsc
|
---|
393 | mov %eax,%edx
|
---|
394 | sub $lasttick,%eax # diff
|
---|
395 | mov %edx,$lasttick # lasttick = tick
|
---|
396 | cmp $lastdiff,%eax
|
---|
397 | mov %eax,$lastdiff # lastdiff = diff
|
---|
398 | mov \$0,%edx
|
---|
399 | setne %dl
|
---|
400 | sub %rdx,$cnt # conditional --$cnt
|
---|
401 | lea ($out,%rdx,4),$out # conditional ++$out
|
---|
402 | jnz .Loop2
|
---|
403 |
|
---|
404 | .Ldone2:
|
---|
405 | mov $redzone(%rsp),%rax
|
---|
406 | sub $cnt,%rax
|
---|
407 | ret
|
---|
408 | .size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
|
---|
409 | ___
|
---|
410 | }
|
---|
411 |
|
---|
412 | sub gen_random {
|
---|
413 | my $rdop = shift;
|
---|
414 | print<<___;
|
---|
415 | .globl OPENSSL_ia32_${rdop}
|
---|
416 | .type OPENSSL_ia32_${rdop},\@abi-omnipotent
|
---|
417 | .align 16
|
---|
418 | OPENSSL_ia32_${rdop}:
|
---|
419 | mov \$8,%ecx
|
---|
420 | .Loop_${rdop}:
|
---|
421 | ${rdop} %rax
|
---|
422 | jc .Lbreak_${rdop}
|
---|
423 | loop .Loop_${rdop}
|
---|
424 | .Lbreak_${rdop}:
|
---|
425 | cmp \$0,%rax
|
---|
426 | cmove %rcx,%rax
|
---|
427 | ret
|
---|
428 | .size OPENSSL_ia32_${rdop},.-OPENSSL_ia32_${rdop}
|
---|
429 |
|
---|
430 | .globl OPENSSL_ia32_${rdop}_bytes
|
---|
431 | .type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
|
---|
432 | .align 16
|
---|
433 | OPENSSL_ia32_${rdop}_bytes:
|
---|
434 | xor %rax, %rax # return value
|
---|
435 | cmp \$0,$arg2
|
---|
436 | je .Ldone_${rdop}_bytes
|
---|
437 |
|
---|
438 | mov \$8,%r11
|
---|
439 | .Loop_${rdop}_bytes:
|
---|
440 | ${rdop} %r10
|
---|
441 | jc .Lbreak_${rdop}_bytes
|
---|
442 | dec %r11
|
---|
443 | jnz .Loop_${rdop}_bytes
|
---|
444 | jmp .Ldone_${rdop}_bytes
|
---|
445 |
|
---|
446 | .align 16
|
---|
447 | .Lbreak_${rdop}_bytes:
|
---|
448 | cmp \$8,$arg2
|
---|
449 | jb .Ltail_${rdop}_bytes
|
---|
450 | mov %r10,($arg1)
|
---|
451 | lea 8($arg1),$arg1
|
---|
452 | add \$8,%rax
|
---|
453 | sub \$8,$arg2
|
---|
454 | jz .Ldone_${rdop}_bytes
|
---|
455 | mov \$8,%r11
|
---|
456 | jmp .Loop_${rdop}_bytes
|
---|
457 |
|
---|
458 | .align 16
|
---|
459 | .Ltail_${rdop}_bytes:
|
---|
460 | mov %r10b,($arg1)
|
---|
461 | lea 1($arg1),$arg1
|
---|
462 | inc %rax
|
---|
463 | shr \$8,%r8
|
---|
464 | dec $arg2
|
---|
465 | jnz .Ltail_${rdop}_bytes
|
---|
466 |
|
---|
467 | .Ldone_${rdop}_bytes:
|
---|
468 | ret
|
---|
469 | .size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
|
---|
470 | ___
|
---|
471 | }
|
---|
472 | gen_random("rdrand");
|
---|
473 | gen_random("rdseed");
|
---|
474 |
|
---|
475 | close STDOUT; # flush
|
---|