1 | ! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
|
---|
2 | !
|
---|
3 | ! Licensed under the OpenSSL license (the "License"). You may not use
|
---|
4 | ! this file except in compliance with the License. You can obtain a copy
|
---|
5 | ! in the file LICENSE in the source distribution or at
|
---|
6 | ! https://www.openssl.org/source/license.html
|
---|
7 |
|
---|
8 | #if defined(__SUNPRO_C) && defined(__sparcv9)
|
---|
9 | # define ABI64 /* They've said -xarch=v9 at command line */
|
---|
10 | #elif defined(__GNUC__) && defined(__arch64__)
|
---|
11 | # define ABI64 /* They've said -m64 at command line */
|
---|
12 | #endif
|
---|
13 |
|
---|
14 | #ifdef ABI64
|
---|
15 | .register %g2,#scratch
|
---|
16 | .register %g3,#scratch
|
---|
17 | # define FRAME -192
|
---|
18 | # define BIAS 2047
|
---|
19 | #else
|
---|
20 | # define FRAME -96
|
---|
21 | # define BIAS 0
|
---|
22 | #endif
|
---|
23 |
|
---|
24 | .text
|
---|
25 | .align 32
|
---|
26 | .global OPENSSL_wipe_cpu
|
---|
27 | .type OPENSSL_wipe_cpu,#function
|
---|
28 | ! Keep in mind that this does not excuse us from wiping the stack!
|
---|
29 | ! This routine wipes registers, but not the backing store [which
|
---|
30 | ! resides on the stack, toward lower addresses]. To facilitate for
|
---|
31 | ! stack wiping I return pointer to the top of stack of the *caller*.
|
---|
32 | OPENSSL_wipe_cpu:
|
---|
33 | save %sp,FRAME,%sp
|
---|
34 | nop
|
---|
35 | #ifdef __sun
|
---|
36 | #include <sys/trap.h>
|
---|
37 | ta ST_CLEAN_WINDOWS
|
---|
38 | #else
|
---|
39 | call .walk.reg.wins
|
---|
40 | #endif
|
---|
41 | nop
|
---|
42 | call .PIC.zero.up
|
---|
43 | mov .zero-(.-4),%o0
|
---|
44 | ld [%o0],%f0
|
---|
45 | ld [%o0],%f1
|
---|
46 |
|
---|
47 | subcc %g0,1,%o0
|
---|
48 | ! Following is V9 "rd %ccr,%o0" instruction. However! V8
|
---|
49 | ! specification says that it ("rd %asr2,%o0" in V8 terms) does
|
---|
50 | ! not cause illegal_instruction trap. It therefore can be used
|
---|
51 | ! to determine if the CPU the code is executing on is V8- or
|
---|
52 | ! V9-compliant, as V9 returns a distinct value of 0x99,
|
---|
53 | ! "negative" and "borrow" bits set in both %icc and %xcc.
|
---|
54 | .word 0x91408000 !rd %ccr,%o0
|
---|
55 | cmp %o0,0x99
|
---|
56 | bne .v8
|
---|
57 | nop
|
---|
58 | ! Even though we do not use %fp register bank,
|
---|
59 | ! we wipe it as memcpy might have used it...
|
---|
60 | .word 0xbfa00040 !fmovd %f0,%f62
|
---|
61 | .word 0xbba00040 !...
|
---|
62 | .word 0xb7a00040
|
---|
63 | .word 0xb3a00040
|
---|
64 | .word 0xafa00040
|
---|
65 | .word 0xaba00040
|
---|
66 | .word 0xa7a00040
|
---|
67 | .word 0xa3a00040
|
---|
68 | .word 0x9fa00040
|
---|
69 | .word 0x9ba00040
|
---|
70 | .word 0x97a00040
|
---|
71 | .word 0x93a00040
|
---|
72 | .word 0x8fa00040
|
---|
73 | .word 0x8ba00040
|
---|
74 | .word 0x87a00040
|
---|
75 | .word 0x83a00040 !fmovd %f0,%f32
|
---|
76 | .v8: fmovs %f1,%f31
|
---|
77 | clr %o0
|
---|
78 | fmovs %f0,%f30
|
---|
79 | clr %o1
|
---|
80 | fmovs %f1,%f29
|
---|
81 | clr %o2
|
---|
82 | fmovs %f0,%f28
|
---|
83 | clr %o3
|
---|
84 | fmovs %f1,%f27
|
---|
85 | clr %o4
|
---|
86 | fmovs %f0,%f26
|
---|
87 | clr %o5
|
---|
88 | fmovs %f1,%f25
|
---|
89 | clr %o7
|
---|
90 | fmovs %f0,%f24
|
---|
91 | clr %l0
|
---|
92 | fmovs %f1,%f23
|
---|
93 | clr %l1
|
---|
94 | fmovs %f0,%f22
|
---|
95 | clr %l2
|
---|
96 | fmovs %f1,%f21
|
---|
97 | clr %l3
|
---|
98 | fmovs %f0,%f20
|
---|
99 | clr %l4
|
---|
100 | fmovs %f1,%f19
|
---|
101 | clr %l5
|
---|
102 | fmovs %f0,%f18
|
---|
103 | clr %l6
|
---|
104 | fmovs %f1,%f17
|
---|
105 | clr %l7
|
---|
106 | fmovs %f0,%f16
|
---|
107 | clr %i0
|
---|
108 | fmovs %f1,%f15
|
---|
109 | clr %i1
|
---|
110 | fmovs %f0,%f14
|
---|
111 | clr %i2
|
---|
112 | fmovs %f1,%f13
|
---|
113 | clr %i3
|
---|
114 | fmovs %f0,%f12
|
---|
115 | clr %i4
|
---|
116 | fmovs %f1,%f11
|
---|
117 | clr %i5
|
---|
118 | fmovs %f0,%f10
|
---|
119 | clr %g1
|
---|
120 | fmovs %f1,%f9
|
---|
121 | clr %g2
|
---|
122 | fmovs %f0,%f8
|
---|
123 | clr %g3
|
---|
124 | fmovs %f1,%f7
|
---|
125 | clr %g4
|
---|
126 | fmovs %f0,%f6
|
---|
127 | clr %g5
|
---|
128 | fmovs %f1,%f5
|
---|
129 | fmovs %f0,%f4
|
---|
130 | fmovs %f1,%f3
|
---|
131 | fmovs %f0,%f2
|
---|
132 |
|
---|
133 | add %fp,BIAS,%i0 ! return pointer to caller´s top of stack
|
---|
134 |
|
---|
135 | ret
|
---|
136 | restore
|
---|
137 |
|
---|
138 | .zero: .long 0x0,0x0
|
---|
139 | .PIC.zero.up:
|
---|
140 | retl
|
---|
141 | add %o0,%o7,%o0
|
---|
142 | #ifdef DEBUG
|
---|
143 | .global walk_reg_wins
|
---|
144 | .type walk_reg_wins,#function
|
---|
145 | walk_reg_wins:
|
---|
146 | #endif
|
---|
147 | .walk.reg.wins:
|
---|
148 | save %sp,FRAME,%sp
|
---|
149 | cmp %i7,%o7
|
---|
150 | be 2f
|
---|
151 | clr %o0
|
---|
152 | cmp %o7,0 ! compiler never cleans %o7...
|
---|
153 | be 1f ! could have been a leaf function...
|
---|
154 | clr %o1
|
---|
155 | call .walk.reg.wins
|
---|
156 | nop
|
---|
157 | 1: clr %o2
|
---|
158 | clr %o3
|
---|
159 | clr %o4
|
---|
160 | clr %o5
|
---|
161 | clr %o7
|
---|
162 | clr %l0
|
---|
163 | clr %l1
|
---|
164 | clr %l2
|
---|
165 | clr %l3
|
---|
166 | clr %l4
|
---|
167 | clr %l5
|
---|
168 | clr %l6
|
---|
169 | clr %l7
|
---|
170 | add %o0,1,%i0 ! used for debugging
|
---|
171 | 2: ret
|
---|
172 | restore
|
---|
173 | .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
|
---|
174 |
|
---|
175 | .global OPENSSL_atomic_add
|
---|
176 | .type OPENSSL_atomic_add,#function
|
---|
177 | .align 32
|
---|
178 | OPENSSL_atomic_add:
|
---|
179 | #ifndef ABI64
|
---|
180 | subcc %g0,1,%o2
|
---|
181 | .word 0x95408000 !rd %ccr,%o2, see comment above
|
---|
182 | cmp %o2,0x99
|
---|
183 | be .v9
|
---|
184 | nop
|
---|
185 | save %sp,FRAME,%sp
|
---|
186 | ba .enter
|
---|
187 | nop
|
---|
188 | #ifdef __sun
|
---|
189 | ! Note that you do not have to link with libthread to call thr_yield,
|
---|
190 | ! as libc provides a stub, which is overloaded the moment you link
|
---|
191 | ! with *either* libpthread or libthread...
|
---|
192 | #define YIELD_CPU thr_yield
|
---|
193 | #else
|
---|
194 | ! applies at least to Linux and FreeBSD... Feedback expected...
|
---|
195 | #define YIELD_CPU sched_yield
|
---|
196 | #endif
|
---|
197 | .spin: call YIELD_CPU
|
---|
198 | nop
|
---|
199 | .enter: ld [%i0],%i2
|
---|
200 | cmp %i2,-4096
|
---|
201 | be .spin
|
---|
202 | mov -1,%i2
|
---|
203 | swap [%i0],%i2
|
---|
204 | cmp %i2,-1
|
---|
205 | be .spin
|
---|
206 | add %i2,%i1,%i2
|
---|
207 | stbar
|
---|
208 | st %i2,[%i0]
|
---|
209 | sra %i2,%g0,%i0
|
---|
210 | ret
|
---|
211 | restore
|
---|
212 | .v9:
|
---|
213 | #endif
|
---|
214 | ld [%o0],%o2
|
---|
215 | 1: add %o1,%o2,%o3
|
---|
216 | .word 0xd7e2100a !cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
|
---|
217 | cmp %o2,%o3
|
---|
218 | bne 1b
|
---|
219 | mov %o3,%o2 ! cas is always fetching to dest. register
|
---|
220 | add %o1,%o2,%o0 ! OpenSSL expects the new value
|
---|
221 | retl
|
---|
222 | sra %o0,%g0,%o0 ! we return signed int, remember?
|
---|
223 | .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
---|
224 |
|
---|
225 | .global _sparcv9_rdtick
|
---|
226 | .align 32
|
---|
227 | _sparcv9_rdtick:
|
---|
228 | subcc %g0,1,%o0
|
---|
229 | .word 0x91408000 !rd %ccr,%o0
|
---|
230 | cmp %o0,0x99
|
---|
231 | bne .notick
|
---|
232 | xor %o0,%o0,%o0
|
---|
233 | .word 0x91410000 !rd %tick,%o0
|
---|
234 | retl
|
---|
235 | .word 0x93323020 !srlx %o0,32,%o1
|
---|
236 | .notick:
|
---|
237 | retl
|
---|
238 | xor %o1,%o1,%o1
|
---|
239 | .type _sparcv9_rdtick,#function
|
---|
240 | .size _sparcv9_rdtick,.-_sparcv9_rdtick
|
---|
241 |
|
---|
242 | .global _sparcv9_vis1_probe
|
---|
243 | .align 8
|
---|
244 | _sparcv9_vis1_probe:
|
---|
245 | add %sp,BIAS+2,%o1
|
---|
246 | .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
|
---|
247 | retl
|
---|
248 | .word 0x81b00d80 !fxor %f0,%f0,%f0
|
---|
249 | .type _sparcv9_vis1_probe,#function
|
---|
250 | .size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
|
---|
251 |
|
---|
252 | ! Probe and instrument VIS1 instruction. Output is number of cycles it
|
---|
253 | ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
|
---|
254 | ! is slow (documented to be 6 cycles on T2) and the core is in-order
|
---|
255 | ! single-issue, it should be possible to distinguish Tx reliably...
|
---|
256 | ! Observed return values are:
|
---|
257 | !
|
---|
258 | ! UltraSPARC IIe 7
|
---|
259 | ! UltraSPARC III 7
|
---|
260 | ! UltraSPARC T1 24
|
---|
261 | ! SPARC T4 65(*)
|
---|
262 | !
|
---|
263 | ! (*) result has lesser to do with VIS instruction latencies, rdtick
|
---|
264 | ! appears that slow, but it does the trick in sense that FP and
|
---|
265 | ! VIS code paths are still slower than integer-only ones.
|
---|
266 | !
|
---|
267 | ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
|
---|
268 | !
|
---|
269 | ! It would be possible to detect specifically US-T1 by instrumenting
|
---|
270 | ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
|
---|
271 | ! a lot of %tick-s, couple of thousand on Linux...
|
---|
272 | .global _sparcv9_vis1_instrument
|
---|
273 | .align 8
|
---|
274 | _sparcv9_vis1_instrument:
|
---|
275 | .word 0x81b00d80 !fxor %f0,%f0,%f0
|
---|
276 | .word 0x85b08d82 !fxor %f2,%f2,%f2
|
---|
277 | .word 0x91410000 !rd %tick,%o0
|
---|
278 | .word 0x81b00d80 !fxor %f0,%f0,%f0
|
---|
279 | .word 0x85b08d82 !fxor %f2,%f2,%f2
|
---|
280 | .word 0x93410000 !rd %tick,%o1
|
---|
281 | .word 0x81b00d80 !fxor %f0,%f0,%f0
|
---|
282 | .word 0x85b08d82 !fxor %f2,%f2,%f2
|
---|
283 | .word 0x95410000 !rd %tick,%o2
|
---|
284 | .word 0x81b00d80 !fxor %f0,%f0,%f0
|
---|
285 | .word 0x85b08d82 !fxor %f2,%f2,%f2
|
---|
286 | .word 0x97410000 !rd %tick,%o3
|
---|
287 | .word 0x81b00d80 !fxor %f0,%f0,%f0
|
---|
288 | .word 0x85b08d82 !fxor %f2,%f2,%f2
|
---|
289 | .word 0x99410000 !rd %tick,%o4
|
---|
290 |
|
---|
291 | ! calculate intervals
|
---|
292 | sub %o1,%o0,%o0
|
---|
293 | sub %o2,%o1,%o1
|
---|
294 | sub %o3,%o2,%o2
|
---|
295 | sub %o4,%o3,%o3
|
---|
296 |
|
---|
297 | ! find minimum value
|
---|
298 | cmp %o0,%o1
|
---|
299 | .word 0x38680002 !bgu,a %xcc,.+8
|
---|
300 | mov %o1,%o0
|
---|
301 | cmp %o0,%o2
|
---|
302 | .word 0x38680002 !bgu,a %xcc,.+8
|
---|
303 | mov %o2,%o0
|
---|
304 | cmp %o0,%o3
|
---|
305 | .word 0x38680002 !bgu,a %xcc,.+8
|
---|
306 | mov %o3,%o0
|
---|
307 |
|
---|
308 | retl
|
---|
309 | nop
|
---|
310 | .type _sparcv9_vis1_instrument,#function
|
---|
311 | .size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
|
---|
312 |
|
---|
313 | .global _sparcv9_vis2_probe
|
---|
314 | .align 8
|
---|
315 | _sparcv9_vis2_probe:
|
---|
316 | retl
|
---|
317 | .word 0x81b00980 !bshuffle %f0,%f0,%f0
|
---|
318 | .type _sparcv9_vis2_probe,#function
|
---|
319 | .size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
|
---|
320 |
|
---|
321 | .global _sparcv9_fmadd_probe
|
---|
322 | .align 8
|
---|
323 | _sparcv9_fmadd_probe:
|
---|
324 | .word 0x81b00d80 !fxor %f0,%f0,%f0
|
---|
325 | .word 0x85b08d82 !fxor %f2,%f2,%f2
|
---|
326 | retl
|
---|
327 | .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
|
---|
328 | .type _sparcv9_fmadd_probe,#function
|
---|
329 | .size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
|
---|
330 |
|
---|
331 | .global _sparcv9_rdcfr
|
---|
332 | .align 8
|
---|
333 | _sparcv9_rdcfr:
|
---|
334 | retl
|
---|
335 | .word 0x91468000 !rd %asr26,%o0
|
---|
336 | .type _sparcv9_rdcfr,#function
|
---|
337 | .size _sparcv9_rdcfr,.-_sparcv9_rdcfr
|
---|
338 |
|
---|
339 | .global _sparcv9_vis3_probe
|
---|
340 | .align 8
|
---|
341 | _sparcv9_vis3_probe:
|
---|
342 | retl
|
---|
343 | .word 0x81b022a0 !xmulx %g0,%g0,%g0
|
---|
344 | .type _sparcv9_vis3_probe,#function
|
---|
345 | .size _sparcv9_vis3_probe,.-_sparcv9_vis3_probe
|
---|
346 |
|
---|
347 | .global _sparcv9_random
|
---|
348 | .align 8
|
---|
349 | _sparcv9_random:
|
---|
350 | retl
|
---|
351 | .word 0x91b002a0 !random %o0
|
---|
352 | .type _sparcv9_random,#function
|
---|
353 | .size _sparcv9_random,.-_sparcv9_vis3_probe
|
---|
354 |
|
---|
355 | .global _sparcv9_fjaesx_probe
|
---|
356 | .align 8
|
---|
357 | _sparcv9_fjaesx_probe:
|
---|
358 | .word 0x81b09206 !faesencx %f2,%f6,%f0
|
---|
359 | retl
|
---|
360 | nop
|
---|
361 | .size _sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe
|
---|
362 |
|
---|
363 | .global OPENSSL_cleanse
|
---|
364 | .align 32
|
---|
365 | OPENSSL_cleanse:
|
---|
366 | cmp %o1,14
|
---|
367 | nop
|
---|
368 | #ifdef ABI64
|
---|
369 | bgu %xcc,.Lot
|
---|
370 | #else
|
---|
371 | bgu .Lot
|
---|
372 | #endif
|
---|
373 | cmp %o1,0
|
---|
374 | bne .Little
|
---|
375 | nop
|
---|
376 | retl
|
---|
377 | nop
|
---|
378 |
|
---|
379 | .Little:
|
---|
380 | stb %g0,[%o0]
|
---|
381 | subcc %o1,1,%o1
|
---|
382 | bnz .Little
|
---|
383 | add %o0,1,%o0
|
---|
384 | retl
|
---|
385 | nop
|
---|
386 | .align 32
|
---|
387 | .Lot:
|
---|
388 | #ifndef ABI64
|
---|
389 | subcc %g0,1,%g1
|
---|
390 | ! see above for explanation
|
---|
391 | .word 0x83408000 !rd %ccr,%g1
|
---|
392 | cmp %g1,0x99
|
---|
393 | bne .v8lot
|
---|
394 | nop
|
---|
395 | #endif
|
---|
396 |
|
---|
397 | .v9lot: andcc %o0,7,%g0
|
---|
398 | bz .v9aligned
|
---|
399 | nop
|
---|
400 | stb %g0,[%o0]
|
---|
401 | sub %o1,1,%o1
|
---|
402 | ba .v9lot
|
---|
403 | add %o0,1,%o0
|
---|
404 | .align 16,0x01000000
|
---|
405 | .v9aligned:
|
---|
406 | .word 0xc0720000 !stx %g0,[%o0]
|
---|
407 | sub %o1,8,%o1
|
---|
408 | andcc %o1,-8,%g0
|
---|
409 | #ifdef ABI64
|
---|
410 | .word 0x126ffffd !bnz %xcc,.v9aligned
|
---|
411 | #else
|
---|
412 | .word 0x124ffffd !bnz %icc,.v9aligned
|
---|
413 | #endif
|
---|
414 | add %o0,8,%o0
|
---|
415 |
|
---|
416 | cmp %o1,0
|
---|
417 | bne .Little
|
---|
418 | nop
|
---|
419 | retl
|
---|
420 | nop
|
---|
421 | #ifndef ABI64
|
---|
422 | .v8lot: andcc %o0,3,%g0
|
---|
423 | bz .v8aligned
|
---|
424 | nop
|
---|
425 | stb %g0,[%o0]
|
---|
426 | sub %o1,1,%o1
|
---|
427 | ba .v8lot
|
---|
428 | add %o0,1,%o0
|
---|
429 | nop
|
---|
430 | .v8aligned:
|
---|
431 | st %g0,[%o0]
|
---|
432 | sub %o1,4,%o1
|
---|
433 | andcc %o1,-4,%g0
|
---|
434 | bnz .v8aligned
|
---|
435 | add %o0,4,%o0
|
---|
436 |
|
---|
437 | cmp %o1,0
|
---|
438 | bne .Little
|
---|
439 | nop
|
---|
440 | retl
|
---|
441 | nop
|
---|
442 | #endif
|
---|
443 | .type OPENSSL_cleanse,#function
|
---|
444 | .size OPENSSL_cleanse,.-OPENSSL_cleanse
|
---|
445 |
|
---|
446 | .global CRYPTO_memcmp
|
---|
447 | .align 16
|
---|
448 | CRYPTO_memcmp:
|
---|
449 | cmp %o2,0
|
---|
450 | #ifdef ABI64
|
---|
451 | beq,pn %xcc,.Lno_data
|
---|
452 | #else
|
---|
453 | beq .Lno_data
|
---|
454 | #endif
|
---|
455 | xor %g1,%g1,%g1
|
---|
456 | nop
|
---|
457 |
|
---|
458 | .Loop_cmp:
|
---|
459 | ldub [%o0],%o3
|
---|
460 | add %o0,1,%o0
|
---|
461 | ldub [%o1],%o4
|
---|
462 | add %o1,1,%o1
|
---|
463 | subcc %o2,1,%o2
|
---|
464 | xor %o3,%o4,%o4
|
---|
465 | #ifdef ABI64
|
---|
466 | bnz %xcc,.Loop_cmp
|
---|
467 | #else
|
---|
468 | bnz .Loop_cmp
|
---|
469 | #endif
|
---|
470 | or %o4,%g1,%g1
|
---|
471 |
|
---|
472 | sub %g0,%g1,%g1
|
---|
473 | srl %g1,31,%g1
|
---|
474 | .Lno_data:
|
---|
475 | retl
|
---|
476 | mov %g1,%o0
|
---|
477 | .type CRYPTO_memcmp,#function
|
---|
478 | .size CRYPTO_memcmp,.-CRYPTO_memcmp
|
---|
479 |
|
---|
480 | .global _sparcv9_vis1_instrument_bus
|
---|
481 | .align 8
|
---|
482 | _sparcv9_vis1_instrument_bus:
|
---|
483 | mov %o1,%o3 ! save cnt
|
---|
484 | .word 0x99410000 !rd %tick,%o4 ! tick
|
---|
485 | mov %o4,%o5 ! lasttick = tick
|
---|
486 | set 0,%g4 ! diff
|
---|
487 |
|
---|
488 | andn %o0,63,%g1
|
---|
489 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
---|
490 | .word 0x8143e040 !membar #Sync
|
---|
491 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
---|
492 | .word 0x8143e040 !membar #Sync
|
---|
493 | ld [%o0],%o4
|
---|
494 | add %o4,%g4,%g4
|
---|
495 | .word 0xc9e2100c !cas [%o0],%o4,%g4
|
---|
496 |
|
---|
497 | .Loop: .word 0x99410000 !rd %tick,%o4
|
---|
498 | sub %o4,%o5,%g4 ! diff=tick-lasttick
|
---|
499 | mov %o4,%o5 ! lasttick=tick
|
---|
500 |
|
---|
501 | andn %o0,63,%g1
|
---|
502 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
---|
503 | .word 0x8143e040 !membar #Sync
|
---|
504 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
---|
505 | .word 0x8143e040 !membar #Sync
|
---|
506 | ld [%o0],%o4
|
---|
507 | add %o4,%g4,%g4
|
---|
508 | .word 0xc9e2100c !cas [%o0],%o4,%g4
|
---|
509 | subcc %o1,1,%o1 ! --$cnt
|
---|
510 | bnz .Loop
|
---|
511 | add %o0,4,%o0 ! ++$out
|
---|
512 |
|
---|
513 | retl
|
---|
514 | mov %o3,%o0
|
---|
515 | .type _sparcv9_vis1_instrument_bus,#function
|
---|
516 | .size _sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
|
---|
517 |
|
---|
518 | .global _sparcv9_vis1_instrument_bus2
|
---|
519 | .align 8
|
---|
520 | _sparcv9_vis1_instrument_bus2:
|
---|
521 | mov %o1,%o3 ! save cnt
|
---|
522 | sll %o1,2,%o1 ! cnt*=4
|
---|
523 |
|
---|
524 | .word 0x99410000 !rd %tick,%o4 ! tick
|
---|
525 | mov %o4,%o5 ! lasttick = tick
|
---|
526 | set 0,%g4 ! diff
|
---|
527 |
|
---|
528 | andn %o0,63,%g1
|
---|
529 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
---|
530 | .word 0x8143e040 !membar #Sync
|
---|
531 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
---|
532 | .word 0x8143e040 !membar #Sync
|
---|
533 | ld [%o0],%o4
|
---|
534 | add %o4,%g4,%g4
|
---|
535 | .word 0xc9e2100c !cas [%o0],%o4,%g4
|
---|
536 |
|
---|
537 | .word 0x99410000 !rd %tick,%o4 ! tick
|
---|
538 | sub %o4,%o5,%g4 ! diff=tick-lasttick
|
---|
539 | mov %o4,%o5 ! lasttick=tick
|
---|
540 | mov %g4,%g5 ! lastdiff=diff
|
---|
541 | .Loop2:
|
---|
542 | andn %o0,63,%g1
|
---|
543 | .word 0xc1985e00 !ldda [%g1]0xf0,%f0 ! block load
|
---|
544 | .word 0x8143e040 !membar #Sync
|
---|
545 | .word 0xc1b85c00 !stda %f0,[%g1]0xe0 ! block store and commit
|
---|
546 | .word 0x8143e040 !membar #Sync
|
---|
547 | ld [%o0],%o4
|
---|
548 | add %o4,%g4,%g4
|
---|
549 | .word 0xc9e2100c !cas [%o0],%o4,%g4
|
---|
550 |
|
---|
551 | subcc %o2,1,%o2 ! --max
|
---|
552 | bz .Ldone2
|
---|
553 | nop
|
---|
554 |
|
---|
555 | .word 0x99410000 !rd %tick,%o4 ! tick
|
---|
556 | sub %o4,%o5,%g4 ! diff=tick-lasttick
|
---|
557 | mov %o4,%o5 ! lasttick=tick
|
---|
558 | cmp %g4,%g5
|
---|
559 | mov %g4,%g5 ! lastdiff=diff
|
---|
560 |
|
---|
561 | .word 0x83408000 !rd %ccr,%g1
|
---|
562 | and %g1,4,%g1 ! isolate zero flag
|
---|
563 | xor %g1,4,%g1 ! flip zero flag
|
---|
564 |
|
---|
565 | subcc %o1,%g1,%o1 ! conditional --$cnt
|
---|
566 | bnz .Loop2
|
---|
567 | add %o0,%g1,%o0 ! conditional ++$out
|
---|
568 |
|
---|
569 | .Ldone2:
|
---|
570 | srl %o1,2,%o1
|
---|
571 | retl
|
---|
572 | sub %o3,%o1,%o0
|
---|
573 | .type _sparcv9_vis1_instrument_bus2,#function
|
---|
574 | .size _sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
|
---|
575 |
|
---|
576 | .section ".init",#alloc,#execinstr
|
---|
577 | call OPENSSL_cpuid_setup
|
---|
578 | nop
|
---|