VirtualBox

source: kStuff/trunk/kProfiler2/prfx86msc.asm@ 121

Last change on this file since 121 was 29, checked in by bird, 15 years ago

Finally got around execute the switch to the MIT license.

  • Property svn:keywords set to Id Revision
File size: 10.6 KB
Line 
1; $Id: prfx86msc.asm 29 2009-07-01 20:30:29Z bird $
2;; @file
3; kProfiler Mark 2 - Microsoft C/C++ Compiler Interaction, x86.
4;
5
6;
7; Copyright (c) 2006-2007 Knut St. Osmundsen <[email protected]>
8;
9; Permission is hereby granted, free of charge, to any person
10; obtaining a copy of this software and associated documentation
11; files (the "Software"), to deal in the Software without
12; restriction, including without limitation the rights to use,
13; copy, modify, merge, publish, distribute, sublicense, and/or sell
14; copies of the Software, and to permit persons to whom the
15; Software is furnished to do so, subject to the following
16; conditions:
17;
18; The above copyright notice and this permission notice shall be
19; included in all copies or substantial portions of the Software.
20;
21; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23; OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27; FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28; OTHER DEALINGS IN THE SOFTWARE.
29;
30
31[section .data]
32;
33g_fCalibrated:
34 dd 0
35g_OverheadAdj:
36 dd 0
37
38[section .text]
39
40extern KPRF_ENTER
41extern KPRF_LEAVE
42
43global __penter
44global __pexit
45
46;ifdef UNDEFINED
47global common_return_path
48global common_overhead
49global common_no_overhead
50global calibrate
51global calib_inner_update_minimum
52global calib_inner_next
53global calib_outer_dec
54global calib_outer_inc
55global calib_done
56global calib_nullproc
57;endif
58
59
60;;
61; On x86 the call to this function has been observed to be put before
62; creating the stack frame, as the very first instruction in the function.
63;
64; Thus the stack layout is as follows:
65; 24 return address of the calling function.
66; 20 our return address - the address of the calling function + 5.
67; 1c eax
68; 18 edx
69; 14 eflags
70; 10 ecx
71; c tsc high - param 3
72; 8 tsc low
73; 4 frame pointer - param 2
74; 0 function ptr - param 1
75;
76;
77align 16
78__penter:
79 ; save volatile register and get the time stamp.
80 push eax
81 push edx
82 rdtsc
83 pushfd
84 push ecx
85
86 ; setting up the enter call frame (cdecl).
87 sub esp, 4 + 4 + 8
88 mov [esp + 0ch], edx ; Param 3 - the timestamp
89 mov [esp + 08h], eax
90 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
91 mov [esp + 04h], edx
92 mov eax, [esp + 20h] ; Param 1 - The function address
93 sub eax, 5 ; call instruction
94 mov [esp], eax
95
96 call KPRF_ENTER
97 jmp common_return_path
98
99
100;;
101; On x86 the call to this function has been observed to be put right before
102; return instruction. This fact matters since since we have to calc the same
103; stack address as in _penter.
104;
105; Thus the stack layout is as follows:
106; 24 return address of the calling function.
107; 20 our return address - the address of the calling function + 5.
108; 1c eax
109; 18 edx
110; 14 eflags
111; 10 ecx
112; c tsc high - param 3
113; 8 tsc low
114; 4 frame pointer - param 2
115; 0 function ptr - param 1
116;
117;
118align 16
119__pexit:
120 ; save volatile register and get the time stamp.
121 push eax
122 push edx
123 rdtsc
124 pushfd
125 push ecx
126
127 ; setting up the leave call frame (cdecl).
128 sub esp, 4 + 4 + 8
129 mov [esp + 0ch], edx ; Param 3 - the timestamp
130 mov [esp + 08h], eax
131 lea edx, [esp + 24h] ; Param 2 - frame pointer (pointer to the return address of the function calling us)
132 mov [esp + 04h], edx
133 mov eax, [esp + 20h] ; Param 1 - Some address in the function.
134 sub eax, 5 ; call instruction
135 mov [esp], eax
136
137 call KPRF_LEAVE
138 jmp common_return_path
139
140
141;;
142; This is the common return path for both the enter and exit hooks.
143; It's kept common because we can then use the same overhead adjustment
144; and save some calibration efforts. It also saves space :-)
145align 16
146common_return_path:
147 ; Update overhead
148 test eax, eax
149 jz common_no_overhead
150 cmp byte [g_fCalibrated], 0
151 jnz common_overhead
152 call calibrate
153common_overhead:
154 mov ecx, eax ; ecx <- pointer to overhead counter.
155 mov eax, [g_OverheadAdj] ; apply the adjustment before reading tsc
156 sub [esp + 08h], eax
157 sbb dword [esp + 0ch], 0
158
159 rdtsc
160 sub eax, [esp + 08h]
161 sbb edx, [esp + 0ch]
162 add [ecx], eax
163 adc [ecx + 4], edx
164common_no_overhead:
165 add esp, 4 + 4 + 8
166
167 ; restore volatile registers.
168 pop ecx
169 popfd
170 pop edx
171 pop eax
172 ret
173
174;;
175; Data esi points to while we're calibrating.
176struc CALIBDATA
177 .OverheadLo resd 1
178 .OverheadHi resd 1
179 .ProfiledLo resd 1
180 .ProfiledHi resd 1
181 .EnterTSLo resd 1
182 .EnterTSHi resd 1
183 .MinLo resd 1
184 .MinHi resd 1
185endstruc
186
187
188
189align 16
190;;
191; Do necessary calibrations.
192;
193calibrate:
194 ; prolog
195 push ebp
196 mov ebp, esp
197 pushfd
198 pushad
199 sub esp, CALIBDATA_size
200 mov esi, esp ; esi points to the CALIBDATA
201
202 ;
203 ; Indicate that we have finished calibrating.
204 ;
205 mov eax, 1
206 xchg dword [g_fCalibrated], eax
207
208 ;
209 ; The outer loop - find the right adjustment.
210 ;
211 mov ebx, 200h ; loop counter.
212calib_outer_loop:
213
214 ;
215 ; The inner loop - calls the function number of times to establish a
216 ; good minimum value
217 ;
218 mov ecx, 200h
219 mov dword [esi + CALIBDATA.MinLo], 0ffffffffh
220 mov dword [esi + CALIBDATA.MinHi], 07fffffffh
221calib_inner_loop:
222
223 ; zero the overhead and profiled times.
224 xor eax, eax
225 mov [esi + CALIBDATA.OverheadLo], eax
226 mov [esi + CALIBDATA.OverheadHi], eax
227 mov [esi + CALIBDATA.ProfiledLo], eax
228 mov [esi + CALIBDATA.ProfiledHi], eax
229 call calib_nullproc
230
231 ; subtract the overhead
232 mov eax, [esi + CALIBDATA.ProfiledLo]
233 mov edx, [esi + CALIBDATA.ProfiledHi]
234 sub eax, [esi + CALIBDATA.OverheadLo]
235 sbb edx, [esi + CALIBDATA.OverheadHi]
236
237 ; update the minimum value.
238 test edx, 080000000h
239 jnz near calib_outer_dec ; if negative, just simplify and shortcut
240 cmp edx, [esi + CALIBDATA.MinHi]
241 jg calib_inner_next
242 jl calib_inner_update_minimum
243 cmp eax, [esi + CALIBDATA.MinLo]
244 jge calib_inner_next
245calib_inner_update_minimum:
246 mov [esi + CALIBDATA.MinLo], eax
247 mov [esi + CALIBDATA.MinHi], edx
248calib_inner_next:
249 loop calib_inner_loop
250
251 ; Is the minimum value acceptable?
252 test dword [esi + CALIBDATA.MinHi], 80000000h
253 jnz calib_outer_dec ; simplify if negative.
254 cmp dword [esi + CALIBDATA.MinHi], 0
255 jnz calib_outer_inc ; this shouldn't be possible
256 cmp dword [esi + CALIBDATA.MinLo], 1fh
257 jbe calib_outer_dec ; too low - 2 ticks per pair is the minimum!
258 cmp dword [esi + CALIBDATA.MinLo], 30h
259 jbe calib_done ; this is fine!
260calib_outer_inc:
261 inc dword [g_OverheadAdj]
262 jmp calib_outer_next
263calib_outer_dec:
264 cmp dword [g_OverheadAdj], 1
265 je calib_done
266 dec dword [g_OverheadAdj]
267calib_outer_next:
268 dec ebx
269 jnz calib_outer_loop
270calib_done:
271
272 ; epilog
273 add esp, CALIBDATA_size
274 popad
275 popfd
276 leave
277 ret
278
279
280
281
282;;
283; The calibration __penter - this must be identical to the real thing except for the KPRF call.
284align 16
285calib_penter:
286 ; This part must be identical
287 push eax
288 push edx
289 rdtsc
290 pushfd
291 push ecx
292
293 ; store the entry
294 mov [esi + CALIBDATA.EnterTSLo], eax
295 mov [esi + CALIBDATA.EnterTSHi], edx
296
297 ; create the call frame
298 push edx
299 push eax
300 push 0
301 push 0
302
303 lea eax, [esi + CALIBDATA.OverheadLo]
304 jmp common_overhead
305
306
307;;
308; The calibration __pexit - this must be identical to the real thing except for the KPRF call.
309align 16
310calib_pexit:
311 ; This part must be identical
312 push eax
313 push edx
314 rdtsc
315 pushfd
316 push ecx
317
318 ; update the time
319 push eax
320 push edx
321 sub eax, [esi + CALIBDATA.EnterTSLo]
322 sbb edx, [esi + CALIBDATA.EnterTSHi]
323 add [esi + CALIBDATA.ProfiledLo], eax
324 adc [esi + CALIBDATA.ProfiledHi], edx
325 pop edx
326 pop eax
327
328 ; create the call frame
329 push edx
330 push eax
331 push 0
332 push 0
333
334 lea eax, [esi + CALIBDATA.EnterTSLo]
335 jmp common_overhead
336
337
338;;
339; The 'function' we're profiling.
340; The general idea is that each pair should take something like 2-10 ticks.
341;
342; (Btw. If we don't use multiple pairs here, we end up with the wrong result.)
343align 16
344calib_nullproc:
345 call calib_penter ;0
346 call calib_pexit
347
348 call calib_penter ;1
349 call calib_pexit
350
351 call calib_penter ;2
352 call calib_pexit
353
354 call calib_penter ;3
355 call calib_pexit
356
357 call calib_penter ;4
358 call calib_pexit
359
360 call calib_penter ;5
361 call calib_pexit
362
363 call calib_penter ;6
364 call calib_pexit
365
366 call calib_penter ;7
367 call calib_pexit
368
369 call calib_penter ;8
370 call calib_pexit
371
372 call calib_penter ;9
373 call calib_pexit
374
375 call calib_penter ;a
376 call calib_pexit
377
378 call calib_penter ;b
379 call calib_pexit
380
381 call calib_penter ;c
382 call calib_pexit
383
384 call calib_penter ;d
385 call calib_pexit
386
387 call calib_penter ;e
388 call calib_pexit
389
390 call calib_penter ;f
391 call calib_pexit
392 ret
393
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette