VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 54489

Last change on this file since 54489 was 54489, checked in by vboxsync, 10 years ago

SUPDrvGip.cpp: Simplify the data structures for the TSC measurements, drop the per-method init/delete stuff.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 171.2 KB
Line 
1/* $Id: SUPDrvGip.cpp 54489 2015-02-25 13:02:11Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63
64#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
65# include "dtrace/SUPDrv.h"
66#else
67/* ... */
68#endif
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/** The frequency by which we recalculate the u32UpdateHz and
75 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
76 *
77 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
78 */
79#define GIP_UPDATEHZ_RECALC_FREQ 0x800
80
81/** A reserved TSC value used for synchronization as well as measurement of
82 * TSC deltas. */
83#define GIP_TSC_DELTA_RSVD UINT64_MAX
84/** The number of TSC delta measurement loops in total (includes primer and
85 * read-time loops). */
86#define GIP_TSC_DELTA_LOOPS 96
87/** The number of cache primer loops. */
88#define GIP_TSC_DELTA_PRIMER_LOOPS 4
89/** The number of loops until we keep computing the minumum read time. */
90#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
91
92/** @name Master / worker synchronization values.
93 * @{ */
94/** Stop measurement of TSC delta. */
95#define GIP_TSC_DELTA_SYNC_STOP UINT32_C(0)
96/** Start measurement of TSC delta. */
97#define GIP_TSC_DELTA_SYNC_START UINT32_C(1)
98/** Worker thread is ready for reading the TSC. */
99#define GIP_TSC_DELTA_SYNC_WORKER_READY UINT32_C(2)
100/** Worker thread is done updating TSC delta info. */
101#define GIP_TSC_DELTA_SYNC_WORKER_DONE UINT32_C(3)
102/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
103 * with a timeout. */
104#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER UINT32_C(4)
105/** @} */
106
107/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
108 * master with a timeout. */
109#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
110/** The TSC-refinement interval in seconds. */
111#define GIP_TSC_REFINE_PERIOD_IN_SECS 5
112/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
113#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
114/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
115#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
116/** The TSC delta value for the initial GIP master - 0 in regular builds.
117 * To test the delta code this can be set to a non-zero value. */
118#if 0
119# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
120#else
121# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
122#endif
123
124AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
125AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
126
127/** @def VBOX_SVN_REV
128 * The makefile should define this if it can. */
129#ifndef VBOX_SVN_REV
130# define VBOX_SVN_REV 0
131#endif
132
133#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
134# define DO_NOT_START_GIP
135#endif
136
137
138/*******************************************************************************
139* Internal Functions *
140*******************************************************************************/
141static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
142static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
143static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
144#ifdef SUPDRV_USE_TSC_DELTA_THREAD
145static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
146static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
147static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt);
148#else
149static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
150static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
151#endif
152
153
154/*******************************************************************************
155* Global Variables *
156*******************************************************************************/
157DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
158
159
160
161/*
162 *
163 * Misc Common GIP Code
164 * Misc Common GIP Code
165 * Misc Common GIP Code
166 *
167 *
168 */
169
170
171/**
172 * Finds the GIP CPU index corresponding to @a idCpu.
173 *
174 * @returns GIP CPU array index, UINT32_MAX if not found.
175 * @param pGip The GIP.
176 * @param idCpu The CPU ID.
177 */
178static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
179{
180 uint32_t i;
181 for (i = 0; i < pGip->cCpus; i++)
182 if (pGip->aCPUs[i].idCpu == idCpu)
183 return i;
184 return UINT32_MAX;
185}
186
187
188
189/*
190 *
191 * GIP Mapping and Unmapping Related Code.
192 * GIP Mapping and Unmapping Related Code.
193 * GIP Mapping and Unmapping Related Code.
194 *
195 *
196 */
197
198
199/**
200 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
201 * updating.
202 *
203 * @param pGip Pointer to the GIP.
204 * @param pGipCpu The per CPU structure for this CPU.
205 * @param u64NanoTS The current time.
206 */
207static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
208{
209 /*
210 * Here we don't really care about applying the TSC delta. The re-initialization of this
211 * value is not relevant especially while (re)starting the GIP as the first few ones will
212 * be ignored anyway, see supdrvGipDoUpdateCpu().
213 */
214 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
215 pGipCpu->u64NanoTS = u64NanoTS;
216}
217
218
219/**
220 * Set the current TSC and NanoTS value for the CPU.
221 *
222 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
223 * @param pvUser1 Pointer to the ring-0 GIP mapping.
224 * @param pvUser2 Pointer to the variable holding the current time.
225 */
226static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
227{
228 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
229 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
230
231 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
232 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
233
234 NOREF(pvUser2);
235 NOREF(idCpu);
236}
237
238
239/**
240 * State structure for supdrvGipDetectGetGipCpuCallback.
241 */
242typedef struct SUPDRVGIPDETECTGETCPU
243{
244 /** Bitmap of APIC IDs that has been seen (initialized to zero).
245 * Used to detect duplicate APIC IDs (paranoia). */
246 uint8_t volatile bmApicId[256 / 8];
247 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
248 * initially). The callback clears the methods not detected. */
249 uint32_t volatile fSupported;
250 /** The first callback detecting any kind of range issues (initialized to
251 * NIL_RTCPUID). */
252 RTCPUID volatile idCpuProblem;
253} SUPDRVGIPDETECTGETCPU;
254/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
255typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
256
257
258/**
259 * Checks for alternative ways of getting the CPU ID.
260 *
261 * This also checks the APIC ID, CPU ID and CPU set index values against the
262 * GIP tables.
263 *
264 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
265 * @param pvUser1 Pointer to the state structure.
266 * @param pvUser2 Pointer to the GIP.
267 */
268static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
269{
270 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
271 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
272 uint32_t fSupported = 0;
273 uint16_t idApic;
274 int iCpuSet;
275
276 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
277
278 /*
279 * Check that the CPU ID and CPU set index are interchangable.
280 */
281 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
282 if ((RTCPUID)iCpuSet == idCpu)
283 {
284 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
285 if ( iCpuSet >= 0
286 && iCpuSet < RTCPUSET_MAX_CPUS
287 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
288 {
289 /*
290 * Check whether the IDTR.LIMIT contains a CPU number.
291 */
292#ifdef RT_ARCH_X86
293 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
294#else
295 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
296#endif
297 RTIDTR Idtr;
298 ASMGetIDTR(&Idtr);
299 if (Idtr.cbIdt >= cbIdt)
300 {
301 uint32_t uTmp = Idtr.cbIdt - cbIdt;
302 uTmp &= RTCPUSET_MAX_CPUS - 1;
303 if (uTmp == idCpu)
304 {
305 RTIDTR Idtr2;
306 ASMGetIDTR(&Idtr2);
307 if (Idtr2.cbIdt == Idtr.cbIdt)
308 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
309 }
310 }
311
312 /*
313 * Check whether RDTSCP is an option.
314 */
315 if (ASMHasCpuId())
316 {
317 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
318 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
319 {
320 uint32_t uAux;
321 ASMReadTscWithAux(&uAux);
322 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
323 {
324 ASMNopPause();
325 ASMReadTscWithAux(&uAux);
326 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
327 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
328 }
329 }
330 }
331 }
332 }
333
334 /*
335 * Check that the APIC ID is unique.
336 */
337 idApic = ASMGetApicId();
338 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
339 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
340 fSupported |= SUPGIPGETCPU_APIC_ID;
341 else
342 {
343 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
344 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
345 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
346 idCpu, iCpuSet, idApic));
347 }
348
349 /*
350 * Check that the iCpuSet is within the expected range.
351 */
352 if (RT_UNLIKELY( iCpuSet < 0
353 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
354 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
355 {
356 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
357 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
358 idCpu, iCpuSet, idApic));
359 }
360 else
361 {
362 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
363 if (RT_UNLIKELY(idCpu2 != idCpu))
364 {
365 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
366 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
367 idCpu, iCpuSet, idApic, idCpu2));
368 }
369 }
370
371 /*
372 * Update the supported feature mask before we return.
373 */
374 ASMAtomicAndU32(&pState->fSupported, fSupported);
375
376 NOREF(pvUser2);
377}
378
379
380/**
381 * Increase the timer freqency on hosts where this is possible (NT).
382 *
383 * The idea is that more interrupts is better for us... Also, it's better than
384 * we increase the timer frequence, because we might end up getting inaccurate
385 * callbacks if someone else does it.
386 *
387 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
388 */
389static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
390{
391 if (pDevExt->u32SystemTimerGranularityGrant == 0)
392 {
393 uint32_t u32SystemResolution;
394 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
395 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
396 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
397 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
398 )
399 {
400 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
401 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
402 }
403 }
404}
405
406
407/**
408 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
409 *
410 * @param pDevExt Clears u32SystemTimerGranularityGrant.
411 */
412static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
413{
414 if (pDevExt->u32SystemTimerGranularityGrant)
415 {
416 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
417 AssertRC(rc2);
418 pDevExt->u32SystemTimerGranularityGrant = 0;
419 }
420}
421
422
423/**
424 * Maps the GIP into userspace and/or get the physical address of the GIP.
425 *
426 * @returns IPRT status code.
427 * @param pSession Session to which the GIP mapping should belong.
428 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
429 * @param pHCPhysGip Where to store the physical address. (optional)
430 *
431 * @remark There is no reference counting on the mapping, so one call to this function
432 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
433 * and remove the session as a GIP user.
434 */
435SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
436{
437 int rc;
438 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
439 RTR3PTR pGipR3 = NIL_RTR3PTR;
440 RTHCPHYS HCPhys = NIL_RTHCPHYS;
441 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
442
443 /*
444 * Validate
445 */
446 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
447 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
448 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
449
450#ifdef SUPDRV_USE_MUTEX_FOR_GIP
451 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
452#else
453 RTSemFastMutexRequest(pDevExt->mtxGip);
454#endif
455 if (pDevExt->pGip)
456 {
457 /*
458 * Map it?
459 */
460 rc = VINF_SUCCESS;
461 if (ppGipR3)
462 {
463 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
464 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
465 RTMEM_PROT_READ, RTR0ProcHandleSelf());
466 if (RT_SUCCESS(rc))
467 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
468 }
469
470 /*
471 * Get physical address.
472 */
473 if (pHCPhysGip && RT_SUCCESS(rc))
474 HCPhys = pDevExt->HCPhysGip;
475
476 /*
477 * Reference globally.
478 */
479 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
480 {
481 pSession->fGipReferenced = 1;
482 pDevExt->cGipUsers++;
483 if (pDevExt->cGipUsers == 1)
484 {
485 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
486 uint64_t u64NanoTS;
487
488 /*
489 * GIP starts/resumes updating again. On windows we bump the
490 * host timer frequency to make sure we don't get stuck in guest
491 * mode and to get better timer (and possibly clock) accuracy.
492 */
493 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
494
495 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
496
497 /*
498 * document me
499 */
500 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
501 {
502 unsigned i;
503 for (i = 0; i < pGipR0->cCpus; i++)
504 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
505 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
506 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
507 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
508 }
509
510 /*
511 * document me
512 */
513 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
514 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
515 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
516 || RTMpGetOnlineCount() == 1)
517 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
518 else
519 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
520
521 /*
522 * Detect alternative ways to figure the CPU ID in ring-3 and
523 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
524 * and CPU set indexes while we're at it.
525 */
526 if (RT_SUCCESS(rc))
527 {
528 SUPDRVGIPDETECTGETCPU DetectState;
529 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
530 DetectState.fSupported = UINT32_MAX;
531 DetectState.idCpuProblem = NIL_RTCPUID;
532 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
533 if (DetectState.idCpuProblem == NIL_RTCPUID)
534 {
535 if ( DetectState.fSupported != UINT32_MAX
536 && DetectState.fSupported != 0)
537 {
538 if (pGipR0->fGetGipCpu != DetectState.fSupported)
539 {
540 pGipR0->fGetGipCpu = DetectState.fSupported;
541 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
542 }
543 }
544 else
545 {
546 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
547 DetectState.fSupported));
548 rc = VERR_UNSUPPORTED_CPU;
549 }
550 }
551 else
552 {
553 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
554 DetectState.idCpuProblem, DetectState.idCpuProblem));
555 rc = VERR_INVALID_CPU_ID;
556 }
557 }
558
559 /*
560 * Start the GIP timer if all is well..
561 */
562 if (RT_SUCCESS(rc))
563 {
564#ifndef DO_NOT_START_GIP
565 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
566#endif
567 rc = VINF_SUCCESS;
568 }
569
570 /*
571 * Bail out on error.
572 */
573 if (RT_FAILURE(rc))
574 {
575 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
576 pDevExt->cGipUsers = 0;
577 pSession->fGipReferenced = 0;
578 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
579 {
580 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
581 if (RT_SUCCESS(rc2))
582 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
583 }
584 HCPhys = NIL_RTHCPHYS;
585 pGipR3 = NIL_RTR3PTR;
586 }
587 }
588 }
589 }
590 else
591 {
592 rc = VERR_GENERAL_FAILURE;
593 Log(("SUPR0GipMap: GIP is not available!\n"));
594 }
595#ifdef SUPDRV_USE_MUTEX_FOR_GIP
596 RTSemMutexRelease(pDevExt->mtxGip);
597#else
598 RTSemFastMutexRelease(pDevExt->mtxGip);
599#endif
600
601 /*
602 * Write returns.
603 */
604 if (pHCPhysGip)
605 *pHCPhysGip = HCPhys;
606 if (ppGipR3)
607 *ppGipR3 = pGipR3;
608
609#ifdef DEBUG_DARWIN_GIP
610 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
611#else
612 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
613#endif
614 return rc;
615}
616
617
618/**
619 * Unmaps any user mapping of the GIP and terminates all GIP access
620 * from this session.
621 *
622 * @returns IPRT status code.
623 * @param pSession Session to which the GIP mapping should belong.
624 */
625SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
626{
627 int rc = VINF_SUCCESS;
628 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
629#ifdef DEBUG_DARWIN_GIP
630 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
631 pSession,
632 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
633 pSession->GipMapObjR3));
634#else
635 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
636#endif
637 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
638
639#ifdef SUPDRV_USE_MUTEX_FOR_GIP
640 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
641#else
642 RTSemFastMutexRequest(pDevExt->mtxGip);
643#endif
644
645 /*
646 * Unmap anything?
647 */
648 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
649 {
650 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
651 AssertRC(rc);
652 if (RT_SUCCESS(rc))
653 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
654 }
655
656 /*
657 * Dereference global GIP.
658 */
659 if (pSession->fGipReferenced && !rc)
660 {
661 pSession->fGipReferenced = 0;
662 if ( pDevExt->cGipUsers > 0
663 && !--pDevExt->cGipUsers)
664 {
665 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
666#ifndef DO_NOT_START_GIP
667 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
668#endif
669 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
670 }
671 }
672
673#ifdef SUPDRV_USE_MUTEX_FOR_GIP
674 RTSemMutexRelease(pDevExt->mtxGip);
675#else
676 RTSemFastMutexRelease(pDevExt->mtxGip);
677#endif
678
679 return rc;
680}
681
682
683/**
684 * Gets the GIP pointer.
685 *
686 * @returns Pointer to the GIP or NULL.
687 */
688SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
689{
690 return g_pSUPGlobalInfoPage;
691}
692
693
694
695
696
697/*
698 *
699 *
700 * GIP Initialization, Termination and CPU Offline / Online Related Code.
701 * GIP Initialization, Termination and CPU Offline / Online Related Code.
702 * GIP Initialization, Termination and CPU Offline / Online Related Code.
703 *
704 *
705 */
706
707/**
708 * Used by supdrvInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
709 * to update the TSC frequency related GIP variables.
710 *
711 * @param pGip The GIP.
712 * @param nsElapsed The number of nano seconds elapsed.
713 * @param cElapsedTscTicks The corresponding number of TSC ticks.
714 * @param iTick The tick number for debugging.
715 */
716static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
717{
718 /*
719 * Calculate the frequency.
720 */
721 uint64_t uCpuHz;
722 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
723 && nsElapsed < UINT32_MAX)
724 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
725 else
726 {
727 RTUINT128U CpuHz, Tmp, Divisor;
728 CpuHz.s.Lo = CpuHz.s.Hi = 0;
729 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
730 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
731 uCpuHz = CpuHz.s.Lo;
732 }
733
734 /*
735 * Update the GIP.
736 */
737 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
738 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
739 {
740 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
741
742 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
743 if (iTick + 1 < pGip->cCpus)
744 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
745 }
746}
747
748
749/**
750 * Timer callback function for TSC frequency refinement in invariant GIP mode.
751 *
752 * This is started during driver init and fires once
753 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
754 *
755 * @param pTimer The timer.
756 * @param pvUser Opaque pointer to the device instance data.
757 * @param iTick The timer tick.
758 */
759static DECLCALLBACK(void) supdrvInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
760{
761 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
762 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
763 RTCPUID idCpu;
764 uint64_t cNsElapsed;
765 uint64_t cTscTicksElapsed;
766 uint64_t nsNow;
767 uint64_t uTsc;
768 RTCCUINTREG fEFlags;
769
770 /* Paranoia. */
771 AssertReturnVoid(pGip);
772 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
773
774 /*
775 * If we got a power event, stop the refinement process.
776 */
777 if (pDevExt->fInvTscRefinePowerEvent)
778 {
779 int rc = RTTimerStop(pTimer); AssertRC(rc);
780 return;
781 }
782
783 /*
784 * Try get close to the next clock tick as usual.
785 *
786 * PORTME: If timers are called from the clock interrupt handler, or
787 * an interrupt handler with higher priority than the clock
788 * interrupt, or spinning for ages in timer handlers is frowned
789 * upon, this loop must be disabled!
790 *
791 * Darwin, FreeBSD, Linux, Solaris, Windows 8.1+:
792 * High RTTimeSystemNanoTS resolution should prevent any noticable
793 * spinning her.
794 *
795 * Windows 8.0 and earlier:
796 * We're running in a DPC here, so we may trigger the DPC watchdog?
797 *
798 * OS/2:
799 * Timer callbacks are done in the clock interrupt, so skip it.
800 */
801#if !defined(RT_OS_OS2)
802 nsNow = RTTimeSystemNanoTS();
803 while (RTTimeSystemNanoTS() == nsNow)
804 ASMNopPause();
805#endif
806
807 fEFlags = ASMIntDisableFlags();
808 uTsc = ASMReadTSC();
809 nsNow = RTTimeSystemNanoTS();
810 idCpu = RTMpCpuId();
811 ASMSetFlags(fEFlags);
812
813 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
814 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
815
816 /*
817 * If the above measurement was taken on a different CPU than the one we
818 * started the process on, cTscTicksElapsed will need to be adjusted with
819 * the TSC deltas of both the CPUs.
820 *
821 * We ASSUME that the delta calculation process takes less time than the
822 * TSC frequency refinement timer. If it doesn't, we'll complain and
823 * drop the frequency refinement.
824 *
825 * Note! We cannot entirely trust enmUseTscDelta here because it's
826 * downgraded after each delta calculation.
827 */
828 if ( idCpu != pDevExt->idCpuInvarTscRefine
829 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
830 {
831 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
832 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
833 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
834 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
835 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
836 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
837 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
838 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
839 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
840 {
841 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
842 {
843 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
844 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
845 }
846 }
847 /*
848 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
849 * calculations.
850 */
851 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
852 {
853 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
854 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
855 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
856 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
857 int rc = RTTimerStop(pTimer); AssertRC(rc);
858 return;
859 }
860 }
861
862 /*
863 * Calculate and update the CPU frequency variables in GIP.
864 *
865 * If there is a GIP user already and we've already refined the frequency
866 * a couple of times, don't update it as we want a stable frequency value
867 * for all VMs.
868 */
869 if ( pDevExt->cGipUsers == 0
870 || cNsElapsed < RT_NS_1SEC * 2)
871 {
872 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
873
874 /*
875 * Stop the timer once we've reached the defined refinement period.
876 */
877 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
878 {
879 int rc = RTTimerStop(pTimer);
880 AssertRC(rc);
881 }
882 }
883 else
884 {
885 int rc = RTTimerStop(pTimer);
886 AssertRC(rc);
887 }
888}
889
890
891/**
892 * @callback_method_impl{FNRTPOWERNOTIFICATION}
893 */
894static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
895{
896 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
897
898 /*
899 * If the TSC frequency refinement timer we need to cancel it so it doesn't screw
900 * up the frequency after a long suspend.
901 */
902 if ( enmEvent == RTPOWEREVENT_SUSPEND
903 || enmEvent == RTPOWEREVENT_RESUME)
904 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
905}
906
907
908/**
909 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
910 *
911 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
912 * the CPU may change the TSC frequence between now and when the timer fires
913 * (supdrvInitAsyncRefineTscTimer).
914 *
915 * @param pDevExt Pointer to the device instance data.
916 * @param pGip Pointer to the GIP.
917 */
918static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
919{
920 uint64_t u64NanoTS;
921 RTCCUINTREG fEFlags;
922 int rc;
923
924 /*
925 * Register a power management callback.
926 */
927 pDevExt->fInvTscRefinePowerEvent = true;
928 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
929 AssertRC(rc); /* ignore */
930
931 /*
932 * Record the TSC and NanoTS as the starting anchor point for refinement
933 * of the TSC. We try get as close to a clock tick as possible on systems
934 * which does not provide high resolution time.
935 */
936 u64NanoTS = RTTimeSystemNanoTS();
937 while (RTTimeSystemNanoTS() == u64NanoTS)
938 ASMNopPause();
939
940 fEFlags = ASMIntDisableFlags();
941 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
942 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
943 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
944 ASMSetFlags(fEFlags);
945
946/** @todo we need a power management callback that disables the timer if the
947 * system suspends/resumes. */
948
949 /*
950 * Create a timer that runs on the same CPU so we won't have a depencency
951 * on the TSC-delta and can run in parallel to it. On systems that does not
952 * implement CPU specific timers we'll apply deltas in the timer callback,
953 * just like we do for CPUs going offline.
954 *
955 * The longer the refinement interval the better the accuracy, at least in
956 * theory. If it's too long though, ring-3 may already be starting its
957 * first VMs before we're done. On most systems we will be loading the
958 * support driver during boot and VMs won't be started for a while yet,
959 * it is really only a problem during development (especially with
960 * on-demand driver starting on windows).
961 *
962 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
963 * to calculate the frequency during driver loading, the timer is set
964 * to fire after 200 ms the first time. It will then reschedule itself
965 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
966 * reached or it notices that there is a user land client with GIP
967 * mapped (we want a stable frequency for all VMs).
968 */
969 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
970 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
971 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
972 if (RT_SUCCESS(rc))
973 {
974 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
975 if (RT_SUCCESS(rc))
976 return;
977 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
978 }
979
980 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
981 {
982 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
983 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
984 if (RT_SUCCESS(rc))
985 {
986 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
987 if (RT_SUCCESS(rc))
988 return;
989 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
990 }
991 }
992
993 pDevExt->pInvarTscRefineTimer = NULL;
994 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
995}
996
997
998/**
999 * @callback_method_impl{PFNRTMPWORKER,
1000 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
1001 * the measurements on.}
1002 */
1003DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1004{
1005 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1006 uint64_t *puTscStop = (uint64_t *)pvUser1;
1007 uint64_t *pnsStop = (uint64_t *)pvUser2;
1008
1009 *puTscStop = ASMReadTSC();
1010 *pnsStop = RTTimeSystemNanoTS();
1011
1012 ASMSetFlags(fEFlags);
1013}
1014
1015
1016/**
1017 * Measures the TSC frequency of the system.
1018 *
1019 * The TSC frequency can vary on systems which are not reported as invariant.
1020 * On such systems the object of this function is to find out what the nominal,
1021 * maximum TSC frequency under 'normal' CPU operation.
1022 *
1023 * @returns VBox status code.
1024 * @param pDevExt Pointer to the device instance.
1025 * @param pGip Pointer to the GIP.
1026 * @param fRough Set if we're doing the rough calculation that the
1027 * TSC measuring code needs, where accuracy isn't all
1028 * that important (too high is better than to low).
1029 * When clear we try for best accuracy that we can
1030 * achieve in reasonably short time.
1031 */
1032static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, bool fRough)
1033{
1034 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1035 int cTriesLeft = fRough ? 4 : 2;
1036 while (cTriesLeft-- > 0)
1037 {
1038 RTCCUINTREG fEFlags;
1039 uint64_t nsStart;
1040 uint64_t nsStop;
1041 uint64_t uTscStart;
1042 uint64_t uTscStop;
1043 RTCPUID idCpuStart;
1044 RTCPUID idCpuStop;
1045
1046 /*
1047 * Synchronize with the host OS clock tick on systems without high
1048 * resolution time API (older Windows version for example).
1049 */
1050 nsStart = RTTimeSystemNanoTS();
1051 while (RTTimeSystemNanoTS() == nsStart)
1052 ASMNopPause();
1053
1054 /*
1055 * Read the TSC and current time, noting which CPU we're on.
1056 */
1057 fEFlags = ASMIntDisableFlags();
1058 uTscStart = ASMReadTSC();
1059 nsStart = RTTimeSystemNanoTS();
1060 idCpuStart = RTMpCpuId();
1061 ASMSetFlags(fEFlags);
1062
1063 /*
1064 * Delay for a while.
1065 */
1066 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1067 {
1068 /*
1069 * Sleep-wait since the TSC frequency is constant, it eases host load.
1070 * Shorter interval produces more variance in the frequency (esp. Windows).
1071 */
1072 uint64_t msElapsed = 0;
1073 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1074 / RT_NS_1MS;
1075 do
1076 {
1077 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1078 nsStop = RTTimeSystemNanoTS();
1079 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1080 } while (msElapsed < msDelay);
1081
1082 while (RTTimeSystemNanoTS() == nsStop)
1083 ASMNopPause();
1084 }
1085 else
1086 {
1087 /*
1088 * Busy-wait keeping the frequency up.
1089 */
1090 do
1091 {
1092 ASMNopPause();
1093 nsStop = RTTimeSystemNanoTS();
1094 } while (nsStop - nsStart < RT_NS_100MS);
1095 }
1096
1097 /*
1098 * Read the TSC and time again.
1099 */
1100 fEFlags = ASMIntDisableFlags();
1101 uTscStop = ASMReadTSC();
1102 nsStop = RTTimeSystemNanoTS();
1103 idCpuStop = RTMpCpuId();
1104 ASMSetFlags(fEFlags);
1105
1106 /*
1107 * If the CPU changes things get a bit complicated and what we
1108 * can get away with depends on the GIP mode / TSC reliablity.
1109 */
1110 if (idCpuStop != idCpuStart)
1111 {
1112 bool fDoXCall = false;
1113
1114 /*
1115 * Synchronous TSC mode: we're probably fine as it's unlikely
1116 * that we were rescheduled because of TSC throttling or power
1117 * management reasons, so just go ahead.
1118 */
1119 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1120 {
1121 /* Probably ok, maybe we should retry once?. */
1122 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1123 }
1124 /*
1125 * If we're just doing the rough measurement, do the cross call and
1126 * get on with things (we don't have deltas!).
1127 */
1128 else if (fRough)
1129 fDoXCall = true;
1130 /*
1131 * Invariant TSC mode: It doesn't matter if we have delta available
1132 * for both CPUs. That is not something we can assume at this point.
1133 *
1134 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1135 * downgraded after each delta calculation and the delta
1136 * calculations may not be complete yet.
1137 */
1138 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1139 {
1140/** @todo This section of code is never reached atm, consider dropping it later on... */
1141 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1142 {
1143 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1144 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1145 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1146 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1147 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1148 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1149 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1150 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1151 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1152 {
1153 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1154 {
1155 uTscStart -= iStartTscDelta;
1156 uTscStop -= iStopTscDelta;
1157 }
1158 }
1159 /*
1160 * Invalid CPU indexes are not caused by online/offline races, so
1161 * we have to trigger driver load failure if that happens as GIP
1162 * and IPRT assumptions are busted on this system.
1163 */
1164 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1165 {
1166 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1167 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1168 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1169 return VERR_INVALID_CPU_INDEX;
1170 }
1171 /*
1172 * No valid deltas. We retry, if we're on our last retry
1173 * we do the cross call instead just to get a result. The
1174 * frequency will be refined in a few seconds anyways.
1175 */
1176 else if (cTriesLeft > 0)
1177 continue;
1178 else
1179 fDoXCall = true;
1180 }
1181 }
1182 /*
1183 * Asynchronous TSC mode: This is bad as the reason we usually
1184 * use this mode is to deal with variable TSC frequencies and
1185 * deltas. So, we need to get the TSC from the same CPU as
1186 * started it, we also need to keep that CPU busy. So, retry
1187 * and fall back to the cross call on the last attempt.
1188 */
1189 else
1190 {
1191 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1192 if (cTriesLeft > 0)
1193 continue;
1194 fDoXCall = true;
1195 }
1196
1197 if (fDoXCall)
1198 {
1199 /*
1200 * Try read the TSC and timestamp on the start CPU.
1201 */
1202 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1203 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1204 continue;
1205 }
1206 }
1207
1208 /*
1209 * Calculate the TSC frequency and update it (shared with the refinement timer).
1210 */
1211 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1212 return VINF_SUCCESS;
1213 }
1214
1215 Assert(!fRough);
1216 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1217}
1218
1219
1220/**
1221 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1222 *
1223 * @returns Index of the CPU in the cache set.
1224 * @param pGip The GIP.
1225 * @param idCpu The CPU ID.
1226 */
1227static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1228{
1229 uint32_t i, cTries;
1230
1231 /*
1232 * ASSUMES that CPU IDs are constant.
1233 */
1234 for (i = 0; i < pGip->cCpus; i++)
1235 if (pGip->aCPUs[i].idCpu == idCpu)
1236 return i;
1237
1238 cTries = 0;
1239 do
1240 {
1241 for (i = 0; i < pGip->cCpus; i++)
1242 {
1243 bool fRc;
1244 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1245 if (fRc)
1246 return i;
1247 }
1248 } while (cTries++ < 32);
1249 AssertReleaseFailed();
1250 return i - 1;
1251}
1252
1253
1254/**
1255 * The calling CPU should be accounted as online, update GIP accordingly.
1256 *
1257 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1258 *
1259 * @param pDevExt The device extension.
1260 * @param idCpu The CPU ID.
1261 */
1262static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1263{
1264 int iCpuSet = 0;
1265 uint16_t idApic = UINT16_MAX;
1266 uint32_t i = 0;
1267 uint64_t u64NanoTS = 0;
1268 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1269
1270 AssertPtrReturnVoid(pGip);
1271 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1272 AssertRelease(idCpu == RTMpCpuId());
1273 Assert(pGip->cPossibleCpus == RTMpGetCount());
1274
1275 /*
1276 * Do this behind a spinlock with interrupts disabled as this can fire
1277 * on all CPUs simultaneously, see @bugref{6110}.
1278 */
1279 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1280
1281 /*
1282 * Update the globals.
1283 */
1284 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1285 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1286 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1287 if (iCpuSet >= 0)
1288 {
1289 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1290 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1291 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1292 }
1293
1294 /*
1295 * Update the entry.
1296 */
1297 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1298 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1299
1300 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1301
1302 idApic = ASMGetApicId();
1303 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1304 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1305 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1306
1307 /*
1308 * Update the APIC ID and CPU set index mappings.
1309 */
1310 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1311 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1312
1313 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1314 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1315
1316 /* Update the Mp online/offline counter. */
1317 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1318
1319 /* Commit it. */
1320 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1321
1322 RTSpinlockRelease(pDevExt->hGipSpinlock);
1323}
1324
1325
1326/**
1327 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1328 *
1329 * @param idCpu The CPU ID we are running on.
1330 * @param pvUser1 Opaque pointer to the device instance data.
1331 * @param pvUser2 Not used.
1332 */
1333static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1334{
1335 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1336 NOREF(pvUser2);
1337 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1338}
1339
1340
1341/**
1342 * The CPU should be accounted as offline, update the GIP accordingly.
1343 *
1344 * This is used by supdrvGipMpEvent.
1345 *
1346 * @param pDevExt The device extension.
1347 * @param idCpu The CPU ID.
1348 */
1349static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1350{
1351 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1352 int iCpuSet;
1353 unsigned i;
1354
1355 AssertPtrReturnVoid(pGip);
1356 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1357
1358 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1359 AssertReturnVoid(iCpuSet >= 0);
1360
1361 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1362 AssertReturnVoid(i < pGip->cCpus);
1363 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1364
1365 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1366 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1367
1368 /* Update the Mp online/offline counter. */
1369 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1370
1371 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1372 {
1373 /* Reset the TSC delta, we will recalculate it lazily. */
1374 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1375 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1376 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1377 }
1378
1379 /* Commit it. */
1380 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1381
1382 RTSpinlockRelease(pDevExt->hGipSpinlock);
1383}
1384
1385
1386/**
1387 * Multiprocessor event notification callback.
1388 *
1389 * This is used to make sure that the GIP master gets passed on to
1390 * another CPU. It also updates the associated CPU data.
1391 *
1392 * @param enmEvent The event.
1393 * @param idCpu The cpu it applies to.
1394 * @param pvUser Pointer to the device extension.
1395 */
1396static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1397{
1398 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1399 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1400
1401 if (pGip)
1402 {
1403 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1404 switch (enmEvent)
1405 {
1406 case RTMPEVENT_ONLINE:
1407 {
1408 RTThreadPreemptDisable(&PreemptState);
1409 if (idCpu == RTMpCpuId())
1410 {
1411 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1412 RTThreadPreemptRestore(&PreemptState);
1413 }
1414 else
1415 {
1416 RTThreadPreemptRestore(&PreemptState);
1417 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1418 }
1419
1420 /*
1421 * Recompute TSC-delta for the newly online'd CPU.
1422 */
1423 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1424 {
1425#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1426 supdrvTscDeltaThreadStartMeasurement(pDevExt);
1427#else
1428 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1429 supdrvMeasureTscDeltaOne(pDevExt, iCpu);
1430#endif
1431 }
1432 break;
1433 }
1434
1435 case RTMPEVENT_OFFLINE:
1436 supdrvGipMpEventOffline(pDevExt, idCpu);
1437 break;
1438 }
1439 }
1440
1441 /*
1442 * Make sure there is a master GIP.
1443 */
1444 if (enmEvent == RTMPEVENT_OFFLINE)
1445 {
1446 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1447 if (idGipMaster == idCpu)
1448 {
1449 /*
1450 * The GIP master is going offline, find a new one.
1451 */
1452 bool fIgnored;
1453 unsigned i;
1454 RTCPUID idNewGipMaster = NIL_RTCPUID;
1455 RTCPUSET OnlineCpus;
1456 RTMpGetOnlineSet(&OnlineCpus);
1457
1458 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1459 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1460 {
1461 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1462 if (idCurCpu != idGipMaster)
1463 {
1464 idNewGipMaster = idCurCpu;
1465 break;
1466 }
1467 }
1468
1469 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1470 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1471 NOREF(fIgnored);
1472 }
1473 }
1474}
1475
1476
1477/**
1478 * On CPU initialization callback for RTMpOnAll.
1479 *
1480 * @param idCpu The CPU ID.
1481 * @param pvUser1 The device extension.
1482 * @param pvUser2 The GIP.
1483 */
1484static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1485{
1486 /* This is good enough, even though it will update some of the globals a
1487 bit to much. */
1488 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1489}
1490
1491
1492/**
1493 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1494 *
1495 * @param idCpu Ignored.
1496 * @param pvUser1 Where to put the TSC.
1497 * @param pvUser2 Ignored.
1498 */
1499static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1500{
1501 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1502 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1503}
1504
1505
1506/**
1507 * Determine if Async GIP mode is required because of TSC drift.
1508 *
1509 * When using the default/normal timer code it is essential that the time stamp counter
1510 * (TSC) runs never backwards, that is, a read operation to the counter should return
1511 * a bigger value than any previous read operation. This is guaranteed by the latest
1512 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1513 * case we have to choose the asynchronous timer mode.
1514 *
1515 * @param poffMin Pointer to the determined difference between different
1516 * cores (optional, can be NULL).
1517 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1518 */
1519static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1520{
1521 /*
1522 * Just iterate all the cpus 8 times and make sure that the TSC is
1523 * ever increasing. We don't bother taking TSC rollover into account.
1524 */
1525 int iEndCpu = RTMpGetArraySize();
1526 int iCpu;
1527 int cLoops = 8;
1528 bool fAsync = false;
1529 int rc = VINF_SUCCESS;
1530 uint64_t offMax = 0;
1531 uint64_t offMin = ~(uint64_t)0;
1532 uint64_t PrevTsc = ASMReadTSC();
1533
1534 while (cLoops-- > 0)
1535 {
1536 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1537 {
1538 uint64_t CurTsc;
1539 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1540 &CurTsc, (void *)(uintptr_t)iCpu);
1541 if (RT_SUCCESS(rc))
1542 {
1543 if (CurTsc <= PrevTsc)
1544 {
1545 fAsync = true;
1546 offMin = offMax = PrevTsc - CurTsc;
1547 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1548 iCpu, cLoops, CurTsc, PrevTsc));
1549 break;
1550 }
1551
1552 /* Gather statistics (except the first time). */
1553 if (iCpu != 0 || cLoops != 7)
1554 {
1555 uint64_t off = CurTsc - PrevTsc;
1556 if (off < offMin)
1557 offMin = off;
1558 if (off > offMax)
1559 offMax = off;
1560 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1561 }
1562
1563 /* Next */
1564 PrevTsc = CurTsc;
1565 }
1566 else if (rc == VERR_NOT_SUPPORTED)
1567 break;
1568 else
1569 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1570 }
1571
1572 /* broke out of the loop. */
1573 if (iCpu < iEndCpu)
1574 break;
1575 }
1576
1577 if (poffMin)
1578 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1579 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1580 fAsync, iEndCpu, rc, offMin, offMax));
1581#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1582 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1583#endif
1584 return fAsync;
1585}
1586
1587
1588/**
1589 * supdrvGipInit() worker that determines the GIP TSC mode.
1590 *
1591 * @returns The most suitable TSC mode.
1592 * @param pDevExt Pointer to the device instance data.
1593 */
1594static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1595{
1596 uint64_t u64DiffCoresIgnored;
1597 uint32_t uEAX, uEBX, uECX, uEDX;
1598
1599 /*
1600 * Establish whether the CPU advertises TSC as invariant, we need that in
1601 * a couple of places below.
1602 */
1603 bool fInvariantTsc = false;
1604 if (ASMHasCpuId())
1605 {
1606 uEAX = ASMCpuId_EAX(0x80000000);
1607 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1608 {
1609 uEDX = ASMCpuId_EDX(0x80000007);
1610 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1611 fInvariantTsc = true;
1612 }
1613 }
1614
1615 /*
1616 * On single CPU systems, we don't need to consider ASYNC mode.
1617 */
1618 if (RTMpGetCount() <= 1)
1619 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1620
1621 /*
1622 * Allow the user and/or OS specific bits to force async mode.
1623 */
1624 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1625 return SUPGIPMODE_ASYNC_TSC;
1626
1627 /*
1628 * Use invariant mode if the CPU says TSC is invariant.
1629 */
1630 if (fInvariantTsc)
1631 return SUPGIPMODE_INVARIANT_TSC;
1632
1633 /*
1634 * TSC is not invariant and we're on SMP, this presents two problems:
1635 *
1636 * (1) There might be a skew between the CPU, so that cpu0
1637 * returns a TSC that is slightly different from cpu1.
1638 * This screw may be due to (2), bad TSC initialization
1639 * or slightly different TSC rates.
1640 *
1641 * (2) Power management (and other things) may cause the TSC
1642 * to run at a non-constant speed, and cause the speed
1643 * to be different on the cpus. This will result in (1).
1644 *
1645 * If any of the above is detected, we will have to use ASYNC mode.
1646 */
1647 /* (1). Try check for current differences between the cpus. */
1648 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1649 return SUPGIPMODE_ASYNC_TSC;
1650
1651 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1652 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1653 if ( ASMIsValidStdRange(uEAX)
1654 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
1655 {
1656 /* Check for APM support. */
1657 uEAX = ASMCpuId_EAX(0x80000000);
1658 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1659 {
1660 uEDX = ASMCpuId_EDX(0x80000007);
1661 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1662 return SUPGIPMODE_ASYNC_TSC;
1663 }
1664 }
1665
1666 return SUPGIPMODE_SYNC_TSC;
1667}
1668
1669
1670/**
1671 * Initializes per-CPU GIP information.
1672 *
1673 * @param pGip Pointer to the GIP.
1674 * @param pCpu Pointer to which GIP CPU to initalize.
1675 * @param u64NanoTS The current nanosecond timestamp.
1676 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1677 */
1678static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1679{
1680 pCpu->u32TransactionId = 2;
1681 pCpu->u64NanoTS = u64NanoTS;
1682 pCpu->u64TSC = ASMReadTSC();
1683 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1684 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1685
1686 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1687 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
1688 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1689 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1690
1691 /*
1692 * The first time we're called, we don't have a CPU frequency handy,
1693 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1694 * called again and at that point we have a more plausible CPU frequency
1695 * value handy. The frequency history will also be adjusted again on
1696 * the 2nd timer callout (maybe we can skip that now?).
1697 */
1698 if (!uCpuHz)
1699 {
1700 pCpu->u64CpuHz = _4G - 1;
1701 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1702 }
1703 else
1704 {
1705 pCpu->u64CpuHz = uCpuHz;
1706 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1707 }
1708 pCpu->au32TSCHistory[0]
1709 = pCpu->au32TSCHistory[1]
1710 = pCpu->au32TSCHistory[2]
1711 = pCpu->au32TSCHistory[3]
1712 = pCpu->au32TSCHistory[4]
1713 = pCpu->au32TSCHistory[5]
1714 = pCpu->au32TSCHistory[6]
1715 = pCpu->au32TSCHistory[7]
1716 = pCpu->u32UpdateIntervalTSC;
1717}
1718
1719
1720/**
1721 * Initializes the GIP data.
1722 *
1723 * @param pDevExt Pointer to the device instance data.
1724 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1725 * @param HCPhys The physical address of the GIP.
1726 * @param u64NanoTS The current nanosecond timestamp.
1727 * @param uUpdateHz The update frequency.
1728 * @param uUpdateIntervalNS The update interval in nanoseconds.
1729 * @param cCpus The CPU count.
1730 */
1731static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1732 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
1733{
1734 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
1735 unsigned i;
1736#ifdef DEBUG_DARWIN_GIP
1737 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1738#else
1739 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1740#endif
1741
1742 /*
1743 * Initialize the structure.
1744 */
1745 memset(pGip, 0, cbGip);
1746
1747 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1748 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1749 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1750 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1751 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1752 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1753 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1754 else
1755 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1756 pGip->cCpus = (uint16_t)cCpus;
1757 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1758 pGip->u32UpdateHz = uUpdateHz;
1759 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1760 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1761 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1762 RTCpuSetEmpty(&pGip->PresentCpuSet);
1763 RTMpGetSet(&pGip->PossibleCpuSet);
1764 pGip->cOnlineCpus = RTMpGetOnlineCount();
1765 pGip->cPresentCpus = RTMpGetPresentCount();
1766 pGip->cPossibleCpus = RTMpGetCount();
1767 pGip->idCpuMax = RTMpGetMaxCpuId();
1768 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
1769 pGip->aiCpuFromApicId[i] = UINT16_MAX;
1770 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
1771 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
1772 for (i = 0; i < cCpus; i++)
1773 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
1774
1775 /*
1776 * Link it to the device extension.
1777 */
1778 pDevExt->pGip = pGip;
1779 pDevExt->HCPhysGip = HCPhys;
1780 pDevExt->cGipUsers = 0;
1781}
1782
1783
1784/**
1785 * Creates the GIP.
1786 *
1787 * @returns VBox status code.
1788 * @param pDevExt Instance data. GIP stuff may be updated.
1789 */
1790int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
1791{
1792 PSUPGLOBALINFOPAGE pGip;
1793 RTHCPHYS HCPhysGip;
1794 uint32_t u32SystemResolution;
1795 uint32_t u32Interval;
1796 uint32_t u32MinInterval;
1797 uint32_t uMod;
1798 unsigned cCpus;
1799 int rc;
1800
1801 LogFlow(("supdrvGipCreate:\n"));
1802
1803 /*
1804 * Assert order.
1805 */
1806 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
1807 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
1808 Assert(!pDevExt->pGipTimer);
1809#ifdef SUPDRV_USE_MUTEX_FOR_GIP
1810 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
1811 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
1812#else
1813 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
1814 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
1815#endif
1816
1817 /*
1818 * Check the CPU count.
1819 */
1820 cCpus = RTMpGetArraySize();
1821 if ( cCpus > RTCPUSET_MAX_CPUS
1822 || cCpus > 256 /* ApicId is used for the mappings */)
1823 {
1824 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
1825 return VERR_TOO_MANY_CPUS;
1826 }
1827
1828 /*
1829 * Allocate a contiguous set of pages with a default kernel mapping.
1830 */
1831 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
1832 if (RT_FAILURE(rc))
1833 {
1834 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
1835 return rc;
1836 }
1837 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
1838 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
1839
1840 /*
1841 * Find a reasonable update interval and initialize the structure.
1842 */
1843 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
1844 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
1845 * See @bugref{6710}. */
1846 u32MinInterval = RT_NS_10MS;
1847 u32SystemResolution = RTTimerGetSystemGranularity();
1848 u32Interval = u32MinInterval;
1849 uMod = u32MinInterval % u32SystemResolution;
1850 if (uMod)
1851 u32Interval += u32SystemResolution - uMod;
1852
1853 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
1854
1855 /*
1856 * Important sanity check...
1857 */
1858 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
1859 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
1860 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
1861 {
1862 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
1863 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
1864 return VERR_INTERNAL_ERROR_2;
1865 }
1866
1867 /*
1868 * Do the TSC frequency measurements.
1869 *
1870 * If we're in invariant TSC mode, just to a quick preliminary measurement
1871 * that the TSC-delta measurement code can use to yield cross calls.
1872 *
1873 * If we're in any of the other two modes, neither which require MP init,
1874 * notifications or deltas for the job, do the full measurement now so
1875 * that supdrvGipInitOnCpu() can populate the TSC interval and history
1876 * array with more reasonable values.
1877 */
1878 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1879 {
1880 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, true /*fRough*/); /* cannot fail */
1881 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt, pGip);
1882 }
1883 else
1884 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, false /*fRough*/);
1885 if (RT_SUCCESS(rc))
1886 {
1887 /*
1888 * Start TSC-delta measurement thread before we start getting MP
1889 * events that will try kick it into action (includes the
1890 * RTMpOnAll/supdrvGipInitOnCpu call below).
1891 */
1892 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
1893 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
1894#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1895 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
1896 && pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1897 rc = supdrvTscDeltaThreadInit(pDevExt);
1898#endif
1899 if (RT_SUCCESS(rc))
1900 {
1901 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
1902 if (RT_SUCCESS(rc))
1903 {
1904 /*
1905 * Do GIP initialization on all online CPUs. Wake up the
1906 * TSC-delta thread afterwards.
1907 */
1908 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
1909 if (RT_SUCCESS(rc))
1910 {
1911#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1912 supdrvTscDeltaThreadStartMeasurement(pDevExt);
1913#else
1914 uint16_t iCpu;
1915 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1916 {
1917 /*
1918 * Measure the TSC deltas now that we have MP notifications.
1919 */
1920 int cTries = 5;
1921 do
1922 {
1923 rc = supdrvMeasureInitialTscDeltas(pDevExt);
1924 if ( rc != VERR_TRY_AGAIN
1925 && rc != VERR_CPU_OFFLINE)
1926 break;
1927 } while (--cTries > 0);
1928 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1929 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
1930 }
1931 else
1932 {
1933 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1934 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
1935 }
1936 if (RT_SUCCESS(rc))
1937#endif
1938 {
1939 /*
1940 * Create the timer.
1941 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
1942 */
1943 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1944 {
1945 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
1946 supdrvGipAsyncTimer, pDevExt);
1947 if (rc == VERR_NOT_SUPPORTED)
1948 {
1949 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
1950 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
1951 }
1952 }
1953 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1954 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
1955 supdrvGipSyncAndInvariantTimer, pDevExt);
1956 if (RT_SUCCESS(rc))
1957 {
1958 /*
1959 * We're good.
1960 */
1961 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
1962 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1963
1964 g_pSUPGlobalInfoPage = pGip;
1965 return VINF_SUCCESS;
1966 }
1967
1968 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
1969 Assert(!pDevExt->pGipTimer);
1970 }
1971 }
1972 else
1973 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
1974 }
1975 else
1976 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
1977 }
1978 else
1979 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
1980 }
1981 else
1982 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
1983
1984 /* Releases timer frequency increase too. */
1985 supdrvGipDestroy(pDevExt);
1986 return rc;
1987}
1988
1989
1990/**
1991 * Invalidates the GIP data upon termination.
1992 *
1993 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1994 */
1995static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
1996{
1997 unsigned i;
1998 pGip->u32Magic = 0;
1999 for (i = 0; i < pGip->cCpus; i++)
2000 {
2001 pGip->aCPUs[i].u64NanoTS = 0;
2002 pGip->aCPUs[i].u64TSC = 0;
2003 pGip->aCPUs[i].iTSCHistoryHead = 0;
2004 pGip->aCPUs[i].u64TSCSample = 0;
2005 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2006 }
2007}
2008
2009
2010/**
2011 * Terminates the GIP.
2012 *
2013 * @param pDevExt Instance data. GIP stuff may be updated.
2014 */
2015void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2016{
2017 int rc;
2018#ifdef DEBUG_DARWIN_GIP
2019 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2020 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2021 pDevExt->pGipTimer, pDevExt->GipMemObj));
2022#endif
2023
2024 /*
2025 * Stop receiving MP notifications before tearing anything else down.
2026 */
2027 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2028
2029#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2030 /*
2031 * Terminate the TSC-delta measurement thread and resources.
2032 */
2033 supdrvTscDeltaTerm(pDevExt);
2034#endif
2035
2036 /*
2037 * Destroy the TSC-refinement timer.
2038 */
2039 if (pDevExt->pInvarTscRefineTimer)
2040 {
2041 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2042 pDevExt->pInvarTscRefineTimer = NULL;
2043 }
2044
2045 /*
2046 * Invalid the GIP data.
2047 */
2048 if (pDevExt->pGip)
2049 {
2050 supdrvGipTerm(pDevExt->pGip);
2051 pDevExt->pGip = NULL;
2052 }
2053 g_pSUPGlobalInfoPage = NULL;
2054
2055 /*
2056 * Destroy the timer and free the GIP memory object.
2057 */
2058 if (pDevExt->pGipTimer)
2059 {
2060 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2061 pDevExt->pGipTimer = NULL;
2062 }
2063
2064 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2065 {
2066 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2067 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2068 }
2069
2070 /*
2071 * Finally, make sure we've release the system timer resolution request
2072 * if one actually succeeded and is still pending.
2073 */
2074 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2075}
2076
2077
2078
2079
2080/*
2081 *
2082 *
2083 * GIP Update Timer Related Code
2084 * GIP Update Timer Related Code
2085 * GIP Update Timer Related Code
2086 *
2087 *
2088 */
2089
2090
2091/**
2092 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2093 * updates all the per cpu data except the transaction id.
2094 *
2095 * @param pDevExt The device extension.
2096 * @param pGipCpu Pointer to the per cpu data.
2097 * @param u64NanoTS The current time stamp.
2098 * @param u64TSC The current TSC.
2099 * @param iTick The current timer tick.
2100 *
2101 * @remarks Can be called with interrupts disabled!
2102 */
2103static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2104{
2105 uint64_t u64TSCDelta;
2106 uint32_t u32UpdateIntervalTSC;
2107 uint32_t u32UpdateIntervalTSCSlack;
2108 unsigned iTSCHistoryHead;
2109 uint64_t u64CpuHz;
2110 uint32_t u32TransactionId;
2111
2112 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2113 AssertPtrReturnVoid(pGip);
2114
2115 /* Delta between this and the previous update. */
2116 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2117
2118 /*
2119 * Update the NanoTS.
2120 */
2121 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2122
2123 /*
2124 * Calc TSC delta.
2125 */
2126 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2127 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2128
2129 /*
2130 * We don't need to keep realculating the frequency when it's invariant, so
2131 * the remainder of this function is only for the sync and async TSC modes.
2132 */
2133 if (pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC)
2134 {
2135 if (u64TSCDelta >> 32)
2136 {
2137 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2138 pGipCpu->cErrors++;
2139 }
2140
2141 /*
2142 * On the 2nd and 3rd callout, reset the history with the current TSC
2143 * interval since the values entered by supdrvGipInit are totally off.
2144 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2145 * better, while the 3rd should be most reliable.
2146 */
2147 /** @todo Could we drop this now that we initializes the history
2148 * with nominal TSC frequency values? */
2149 u32TransactionId = pGipCpu->u32TransactionId;
2150 if (RT_UNLIKELY( ( u32TransactionId == 5
2151 || u32TransactionId == 7)
2152 && ( iTick == 2
2153 || iTick == 3) ))
2154 {
2155 unsigned i;
2156 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2157 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2158 }
2159
2160 /*
2161 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2162 * Wait until we have at least one full history since the above history reset. The
2163 * assumption is that the majority of the previous history values will be tolerable.
2164 * See @bugref{6710} comment #67.
2165 */
2166 /** @todo Could we drop the fuding there now that we initializes the history
2167 * with nominal TSC frequency values? */
2168 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2169 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2170 {
2171 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2172 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2173 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2174 {
2175 uint32_t u32;
2176 u32 = pGipCpu->au32TSCHistory[0];
2177 u32 += pGipCpu->au32TSCHistory[1];
2178 u32 += pGipCpu->au32TSCHistory[2];
2179 u32 += pGipCpu->au32TSCHistory[3];
2180 u32 >>= 2;
2181 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2182 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2183 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2184 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2185 u64TSCDelta >>= 2;
2186 u64TSCDelta += u32;
2187 u64TSCDelta >>= 1;
2188 }
2189 }
2190
2191 /*
2192 * TSC History.
2193 */
2194 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2195 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2196 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2197 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2198
2199 /*
2200 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2201 *
2202 * On Windows, we have an occasional (but recurring) sour value that messed up
2203 * the history but taking only 1 interval reduces the precision overall.
2204 */
2205 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2206 || pGip->u32UpdateHz >= 1000)
2207 {
2208 uint32_t u32;
2209 u32 = pGipCpu->au32TSCHistory[0];
2210 u32 += pGipCpu->au32TSCHistory[1];
2211 u32 += pGipCpu->au32TSCHistory[2];
2212 u32 += pGipCpu->au32TSCHistory[3];
2213 u32 >>= 2;
2214 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2215 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2216 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2217 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2218 u32UpdateIntervalTSC >>= 2;
2219 u32UpdateIntervalTSC += u32;
2220 u32UpdateIntervalTSC >>= 1;
2221
2222 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2223 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2224 }
2225 else if (pGip->u32UpdateHz >= 90)
2226 {
2227 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2228 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2229 u32UpdateIntervalTSC >>= 1;
2230
2231 /* value chosen on a 2GHz thinkpad running windows */
2232 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2233 }
2234 else
2235 {
2236 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2237
2238 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2239 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2240 }
2241 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2242
2243 /*
2244 * CpuHz.
2245 */
2246 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2247 u64CpuHz /= pGip->u32UpdateIntervalNS;
2248 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2249 }
2250}
2251
2252
2253/**
2254 * Updates the GIP.
2255 *
2256 * @param pDevExt The device extension.
2257 * @param u64NanoTS The current nanosecond timesamp.
2258 * @param u64TSC The current TSC timesamp.
2259 * @param idCpu The CPU ID.
2260 * @param iTick The current timer tick.
2261 *
2262 * @remarks Can be called with interrupts disabled!
2263 */
2264static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2265{
2266 /*
2267 * Determine the relevant CPU data.
2268 */
2269 PSUPGIPCPU pGipCpu;
2270 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2271 AssertPtrReturnVoid(pGip);
2272
2273 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2274 pGipCpu = &pGip->aCPUs[0];
2275 else
2276 {
2277 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
2278 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
2279 return;
2280 pGipCpu = &pGip->aCPUs[iCpu];
2281 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
2282 return;
2283 }
2284
2285 /*
2286 * Start update transaction.
2287 */
2288 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2289 {
2290 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2291 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2292 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2293 pGipCpu->cErrors++;
2294 return;
2295 }
2296
2297 /*
2298 * Recalc the update frequency every 0x800th time.
2299 */
2300 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
2301 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2302 {
2303 if (pGip->u64NanoTSLastUpdateHz)
2304 {
2305#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2306 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2307 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2308 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2309 {
2310 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2311 * calculation on non-invariant hosts if it changes the history decision
2312 * taken in supdrvGipDoUpdateCpu(). */
2313 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2314 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2315 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2316 }
2317#endif
2318 }
2319 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2320 }
2321
2322 /*
2323 * Update the data.
2324 */
2325 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2326
2327 /*
2328 * Complete transaction.
2329 */
2330 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2331}
2332
2333
2334/**
2335 * Updates the per cpu GIP data for the calling cpu.
2336 *
2337 * @param pDevExt The device extension.
2338 * @param u64NanoTS The current nanosecond timesamp.
2339 * @param u64TSC The current TSC timesamp.
2340 * @param idCpu The CPU ID.
2341 * @param idApic The APIC id for the CPU index.
2342 * @param iTick The current timer tick.
2343 *
2344 * @remarks Can be called with interrupts disabled!
2345 */
2346static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2347 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2348{
2349 uint32_t iCpu;
2350 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2351
2352 /*
2353 * Avoid a potential race when a CPU online notification doesn't fire on
2354 * the onlined CPU but the tick creeps in before the event notification is
2355 * run.
2356 */
2357 if (RT_UNLIKELY(iTick == 1))
2358 {
2359 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2360 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2361 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2362 }
2363
2364 iCpu = pGip->aiCpuFromApicId[idApic];
2365 if (RT_LIKELY(iCpu < pGip->cCpus))
2366 {
2367 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2368 if (pGipCpu->idCpu == idCpu)
2369 {
2370 /*
2371 * Start update transaction.
2372 */
2373 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2374 {
2375 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2376 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2377 pGipCpu->cErrors++;
2378 return;
2379 }
2380
2381 /*
2382 * Update the data.
2383 */
2384 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2385
2386 /*
2387 * Complete transaction.
2388 */
2389 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2390 }
2391 }
2392}
2393
2394
2395/**
2396 * Timer callback function for the sync and invariant GIP modes.
2397 *
2398 * @param pTimer The timer.
2399 * @param pvUser Opaque pointer to the device extension.
2400 * @param iTick The timer tick.
2401 */
2402static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2403{
2404 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2405 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2406 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2407 uint64_t u64TSC = ASMReadTSC();
2408 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2409
2410 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2411 {
2412 /*
2413 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2414 * missing timer ticks is not an option for GIP because the GIP users
2415 * will end up incrementing the time in 1ns per time getter call until
2416 * there is a complete timer update. So, if the delta has yet to be
2417 * calculated, we just pretend it is zero for now (the GIP users
2418 * probably won't have it for a wee while either and will do the same).
2419 *
2420 * We could maybe on some platforms try cross calling a CPU with a
2421 * working delta here, but it's not worth the hassle since the
2422 * likelyhood of this happening is really low. On Windows, Linux, and
2423 * Solaris timers fire on the CPU they were registered/started on.
2424 * Darwin timers doesn't necessarily (they are high priority threads).
2425 */
2426 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2427 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2428 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2429 Assert(!ASMIntAreEnabled());
2430 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2431 {
2432 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2433 if (iTscDelta != INT64_MAX)
2434 u64TSC -= iTscDelta;
2435 }
2436 }
2437
2438 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2439
2440 ASMSetFlags(fOldFlags);
2441}
2442
2443
2444/**
2445 * Timer callback function for async GIP mode.
2446 * @param pTimer The timer.
2447 * @param pvUser Opaque pointer to the device extension.
2448 * @param iTick The timer tick.
2449 */
2450static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2451{
2452 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2453 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2454 RTCPUID idCpu = RTMpCpuId();
2455 uint64_t u64TSC = ASMReadTSC();
2456 uint64_t NanoTS = RTTimeSystemNanoTS();
2457
2458 /** @todo reset the transaction number and whatnot when iTick == 1. */
2459 if (pDevExt->idGipMaster == idCpu)
2460 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2461 else
2462 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
2463
2464 ASMSetFlags(fOldFlags);
2465}
2466
2467
2468
2469
2470/*
2471 *
2472 *
2473 * TSC Delta Measurements And Related Code
2474 * TSC Delta Measurements And Related Code
2475 * TSC Delta Measurements And Related Code
2476 *
2477 *
2478 */
2479
2480
2481/*
2482 * Select TSC delta measurement algorithm.
2483 */
2484#if 0
2485# define GIP_TSC_DELTA_METHOD_1
2486#else
2487# define GIP_TSC_DELTA_METHOD_2
2488#endif
2489
2490/** For padding variables to keep them away from other cache lines. Better too
2491 * large than too small!
2492 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2493 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2494 * III had 32 bytes cache lines. */
2495#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2496
2497
2498/**
2499 * TSC delta measurment algorithm \#2 result entry.
2500 */
2501typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2502{
2503 uint32_t iSeqMine;
2504 uint32_t iSeqOther;
2505 uint64_t uTsc;
2506} SUPDRVTSCDELTAMETHOD2ENTRY;
2507
2508/**
2509 * TSC delta measurment algorithm \#2 Data.
2510 */
2511typedef struct SUPDRVTSCDELTAMETHOD2
2512{
2513 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2514 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2515 /** The current sequence number of this worker. */
2516 uint32_t volatile iCurSeqNo;
2517 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2518 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2519 /** Result table. */
2520 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2521} SUPDRVTSCDELTAMETHOD2;
2522/** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
2523typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2524
2525
2526/**
2527 * The TSC delta synchronization struct, version 2.
2528 *
2529 * The syncrhonization variable is completely isolated in its own cache line
2530 * (provided our max cache line size estimate is correct).
2531 */
2532typedef struct SUPTSCDELTASYNC2
2533{
2534 /** Padding to make sure the uVar1 is in its own cache line. */
2535 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2536
2537 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2538 volatile uint32_t uSyncVar;
2539 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2540 volatile uint32_t uSyncSeq;
2541
2542 /** Padding to make sure the uVar1 is in its own cache line. */
2543 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2544
2545 /** Start RDTSC value. Put here mainly to save stack space. */
2546 uint64_t uTscStart;
2547 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2548 uint64_t cMaxTscTicks;
2549} SUPTSCDELTASYNC2;
2550AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2551typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2552
2553/** Prestart wait. */
2554#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2555/** Prestart aborted. */
2556#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2557/** Ready (on your mark). */
2558#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2559/** Steady (get set). */
2560#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2561/** Go! */
2562#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2563/** Used by the verfication test. */
2564#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2565
2566/** We reached the time limit. */
2567#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2568/** The other party won't touch the sync struct ever again. */
2569#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2570
2571
2572/**
2573 * Argument package/state passed by supdrvMeasureTscDeltaOne() to the RTMpOn
2574 * callback worker.
2575 * @todo add
2576 */
2577typedef struct SUPDRVGIPTSCDELTARGS
2578{
2579 /** The device extension. */
2580 PSUPDRVDEVEXT pDevExt;
2581 /** Pointer to the GIP CPU array entry for the worker. */
2582 PSUPGIPCPU pWorker;
2583 /** Pointer to the GIP CPU array entry for the master. */
2584 PSUPGIPCPU pMaster;
2585 /** The maximum number of ticks to spend in supdrvMeasureTscDeltaCallback.
2586 * (This is what we need a rough TSC frequency for.) */
2587 uint64_t cMaxTscTicks;
2588 /** Used to abort synchronization setup. */
2589 bool volatile fAbortSetup;
2590
2591#if 0
2592 /** Method 1 data. */
2593 struct
2594 {
2595 } M1;
2596#endif
2597
2598#ifdef GIP_TSC_DELTA_METHOD_2
2599 struct
2600 {
2601 PSUPDRVTSCDELTAMETHOD2 pMasterData;
2602 PSUPDRVTSCDELTAMETHOD2 pWorkerData;
2603 } M2;
2604#endif
2605
2606
2607 /** Padding to make sure the master variables live in its own cache lines. */
2608 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2609 /** The time the master spent in the MP worker. */
2610 uint64_t cElapsedMasterTscTicks;
2611 /** The iTry value when stopped at. */
2612 uint32_t iTry;
2613 /** Set if the run timed out. */
2614 bool volatile fTimedOut;
2615 /** Pointer to the master's synchronization struct (on stack). */
2616 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2617 /** Master data union. */
2618 union
2619 {
2620 /** Data (master) for delta verification. */
2621 struct
2622 {
2623 /** Verification test TSC values for the master. */
2624 uint64_t volatile auTscs[32];
2625 } Verify;
2626 /** Data (master) for measurement method \#2. */
2627 struct
2628 {
2629 /** Data and sequence number. */
2630 SUPDRVTSCDELTAMETHOD2 Data;
2631 /** The lag setting for the next run. */
2632 bool fLag;
2633 /** Number of hits. */
2634 uint32_t cHits;
2635 } M2;
2636 } uMaster;
2637 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2638 * VERR_TRY_AGAIN on timeout. */
2639 int32_t rcVerify;
2640#ifdef TSCDELTA_VERIFY_WITH_STATS
2641 /** The maximum difference between TSC read during delta verification. */
2642 int64_t cMaxVerifyTscTicks;
2643 /** The minimum difference between two TSC reads during verification. */
2644 int64_t cMinVerifyTscTicks;
2645 /** The bad TSC diff, worker relative to master (= worker - master).
2646 * Negative value means the worker is behind the master. */
2647 int64_t iVerifyBadTscDiff;
2648#endif
2649
2650 /** Padding to make sure the uVar1 is in its own cache line. */
2651 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2652 /** Pointer to the worker's synchronization struct (on stack). */
2653 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2654 /** The time the worker spent in the MP worker. */
2655 uint64_t cElapsedWorkerTscTicks;
2656 /** Worker data union. */
2657 union
2658 {
2659 /** Data (worker) for delta verification. */
2660 struct
2661 {
2662 /** Verification test TSC values for the worker. */
2663 uint64_t volatile auTscs[32];
2664 } Verify;
2665 /** Data (worker) for measurement method \#2. */
2666 struct
2667 {
2668 /** Data and sequence number. */
2669 SUPDRVTSCDELTAMETHOD2 Data;
2670 /** The lag setting for the next run (set by master). */
2671 bool fLag;
2672 } M2;
2673 } uWorker;
2674
2675 /** Padding to make sure the above is in its own cache line. */
2676 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2677} SUPDRVGIPTSCDELTARGS;
2678typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2679
2680
2681/** @name Macros that implements the basic synchronization steps common to
2682 * the algorithms.
2683 *
2684 * Must be used from loop as the timeouts are implemented via 'break' statements
2685 * at the moment.
2686 *
2687 * @{
2688 */
2689#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2690# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2691# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2692# define TSCDELTA_DBG_CHECK_LOOP() \
2693 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2694#else
2695# define TSCDELTA_DBG_VARS() ((void)0)
2696# define TSCDELTA_DBG_START_LOOP() ((void)0)
2697# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2698#endif
2699#if 0
2700# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2701#else
2702# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2703#endif
2704#if 0
2705# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
2706#else
2707# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
2708#endif
2709#if 0
2710# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
2711#else
2712# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
2713#endif
2714
2715
2716static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2717 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
2718{
2719 uint32_t iMySeq = fIsMaster ? 0 : 256;
2720 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
2721 uint32_t u32Tmp;
2722 uint32_t iSync2Loops = 0;
2723 RTCCUINTREG fEFlags;
2724 TSCDELTA_DBG_VARS();
2725
2726 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
2727
2728 /*
2729 * The master tells the worker to get on it's mark.
2730 */
2731 if (fIsMaster)
2732 {
2733 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2734 { /* likely*/ }
2735 else
2736 {
2737 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2738 return false;
2739 }
2740 }
2741
2742 /*
2743 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
2744 */
2745 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
2746 for (;;)
2747 {
2748 fEFlags = ASMIntDisableFlags();
2749 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2750 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
2751 break;
2752 ASMSetFlags(fEFlags);
2753 ASMNopPause();
2754
2755 /* Abort? */
2756 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
2757 {
2758 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2759 return false;
2760 }
2761
2762 /* Check for timeouts every so often (not every loop in case RDTSC is
2763 trapping or something). Must check the first time around. */
2764#if 0 /* For debugging the timeout paths. */
2765 static uint32_t volatile xxx;
2766#endif
2767 if ( ( (iSync2Loops & 0x3ff) == 0
2768 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
2769#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
2770 || (!fIsMaster && (++xxx & 0xf) == 0)
2771#endif
2772 )
2773 {
2774 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
2775 ignore the timeout if we've got the go ahead already (simpler). */
2776 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
2777 {
2778 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
2779 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
2780 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
2781 return false;
2782 }
2783 }
2784 iSync2Loops++;
2785 }
2786
2787 /*
2788 * Interrupts are now disabled and will remain disabled until we do
2789 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
2790 */
2791 *pfEFlags = fEFlags;
2792
2793 /*
2794 * The worker tells the master that it is on its mark and that the master
2795 * need to get into position as well.
2796 */
2797 if (!fIsMaster)
2798 {
2799 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2800 { /* likely */ }
2801 else
2802 {
2803 ASMSetFlags(fEFlags);
2804 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2805 return false;
2806 }
2807 }
2808
2809 /*
2810 * The master sends the 'go' to the worker and wait for ACK.
2811 */
2812 if (fIsMaster)
2813 {
2814 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2815 { /* likely */ }
2816 else
2817 {
2818 ASMSetFlags(fEFlags);
2819 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2820 return false;
2821 }
2822 }
2823
2824 /*
2825 * Wait for the 'go' signal (ack in the master case).
2826 */
2827 TSCDELTA_DBG_START_LOOP();
2828 for (;;)
2829 {
2830 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2831 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
2832 break;
2833 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
2834 { /* likely */ }
2835 else
2836 {
2837 ASMSetFlags(fEFlags);
2838 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2839 return false;
2840 }
2841
2842 TSCDELTA_DBG_CHECK_LOOP();
2843 ASMNopPause();
2844 }
2845
2846 /*
2847 * The worker acks the 'go' (shouldn't fail).
2848 */
2849 if (!fIsMaster)
2850 {
2851 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2852 { /* likely */ }
2853 else
2854 {
2855 ASMSetFlags(fEFlags);
2856 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2857 return false;
2858 }
2859 }
2860
2861 /*
2862 * Try enter mostly lockstep execution with it.
2863 */
2864 for (;;)
2865 {
2866 uint32_t iOtherSeq1, iOtherSeq2;
2867 ASMCompilerBarrier();
2868 ASMSerializeInstruction();
2869
2870 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
2871 ASMNopPause();
2872 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
2873 ASMNopPause();
2874 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
2875
2876 ASMCompilerBarrier();
2877 if (iOtherSeq1 == iOtherSeq2)
2878 return true;
2879
2880 /* Did the other guy give up? Should we give up? */
2881 if ( iOtherSeq1 == UINT32_MAX
2882 || iOtherSeq2 == UINT32_MAX)
2883 return true;
2884 if (++iMySeq >= iMaxSeq)
2885 {
2886 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
2887 return true;
2888 }
2889 ASMNopPause();
2890 }
2891}
2892
2893#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2894 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2895 { /*likely*/ } \
2896 else if (true) \
2897 { \
2898 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
2899 break; \
2900 } else do {} while (0)
2901#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2902 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2903 { /*likely*/ } \
2904 else if (true) \
2905 { \
2906 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
2907 break; \
2908 } else do {} while (0)
2909
2910
2911static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2912 bool fIsMaster, RTCCUINTREG fEFlags)
2913{
2914 TSCDELTA_DBG_VARS();
2915
2916 /*
2917 * Wait for the 'ready' signal. In the master's case, this means the
2918 * worker has completed its data collection, while in the worker's case it
2919 * means the master is done processing the data and it's time for the next
2920 * loop iteration (or whatever).
2921 */
2922 ASMSetFlags(fEFlags);
2923 TSCDELTA_DBG_START_LOOP();
2924 for (;;)
2925 {
2926 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2927 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
2928 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
2929 return true;
2930 ASMNopPause();
2931 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
2932 { /* likely */}
2933 else
2934 {
2935 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
2936 return false; /* shouldn't ever happen! */
2937 }
2938 TSCDELTA_DBG_CHECK_LOOP();
2939 ASMNopPause();
2940 }
2941}
2942
2943#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2944 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
2945 { /* likely */ } \
2946 else if (true) \
2947 { \
2948 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
2949 break; \
2950 } else do {} while (0)
2951
2952#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
2953 /* \
2954 * Tell the worker that we're done processing the data and ready for the next round. \
2955 */ \
2956 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2957 { /* likely */ } \
2958 else if (true)\
2959 { \
2960 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2961 break; \
2962 } else do {} while (0)
2963
2964#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2965 if (true) { \
2966 /* \
2967 * Tell the master that we're done collecting data and wait for the next round to start. \
2968 */ \
2969 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2970 { /* likely */ } \
2971 else \
2972 { \
2973 ASMSetFlags(a_fEFlags); \
2974 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2975 break; \
2976 } \
2977 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
2978 { /* likely */ } \
2979 else \
2980 { \
2981 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
2982 break; \
2983 } \
2984 } else do {} while (0)
2985/** @} */
2986
2987
2988#ifdef GIP_TSC_DELTA_METHOD_1
2989/**
2990 * TSC delta measurment algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
2991 *
2992 *
2993 * We ignore the first few runs of the loop in order to prime the
2994 * cache. Also, we need to be careful about using 'pause' instruction
2995 * in critical busy-wait loops in this code - it can cause undesired
2996 * behaviour with hyperthreading.
2997 *
2998 * We try to minimize the measurement error by computing the minimum
2999 * read time of the compare statement in the worker by taking TSC
3000 * measurements across it.
3001 *
3002 * It must be noted that the computed minimum read time is mostly to
3003 * eliminate huge deltas when the worker is too early and doesn't by
3004 * itself help produce more accurate deltas. We allow two times the
3005 * computed minimum as an arbibtrary acceptable threshold. Therefore,
3006 * it is still possible to get negative deltas where there are none
3007 * when the worker is earlier. As long as these occasional negative
3008 * deltas are lower than the time it takes to exit guest-context and
3009 * the OS to reschedule EMT on a different CPU we won't expose a TSC
3010 * that jumped backwards. It is because of the existence of the
3011 * negative deltas we don't recompute the delta with the master and
3012 * worker interchanged to eliminate the remaining measurement error.
3013 *
3014 *
3015 * @param pArgs The argument/state data.
3016 * @param pMySync My synchronization structure.
3017 * @param pOtherSync My partner's synchronization structure.
3018 * @param fIsMaster Set if master, clear if worker.
3019 * @param iTry The attempt number.
3020 */
3021static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3022 bool fIsMaster, uint32_t iTry)
3023{
3024 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3025 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3026 uint64_t uMinCmpReadTime = UINT64_MAX;
3027 unsigned iLoop;
3028 NOREF(iTry);
3029
3030 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3031 {
3032 RTCCUINTREG fEFlags;
3033 if (fIsMaster)
3034 {
3035 /*
3036 * The master.
3037 */
3038 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3039 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3040 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3041 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3042
3043 do
3044 {
3045 ASMSerializeInstruction();
3046 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3047 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3048
3049 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3050
3051 /* Process the data. */
3052 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3053 {
3054 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3055 {
3056 int64_t iDelta = pGipCpuWorker->u64TSCSample
3057 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3058 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3059 ? iDelta < pGipCpuWorker->i64TSCDelta
3060 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3061 pGipCpuWorker->i64TSCDelta = iDelta;
3062 }
3063 }
3064
3065 /* Reset our TSC sample and tell the worker to move on. */
3066 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3067 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3068 }
3069 else
3070 {
3071 /*
3072 * The worker.
3073 */
3074 uint64_t uTscWorker;
3075 uint64_t uTscWorkerFlushed;
3076 uint64_t uCmpReadTime;
3077
3078 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3079 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3080
3081 /*
3082 * Keep reading the TSC until we notice that the master has read his. Reading
3083 * the TSC -after- the master has updated the memory is way too late. We thus
3084 * compensate by trying to measure how long it took for the worker to notice
3085 * the memory flushed from the master.
3086 */
3087 do
3088 {
3089 ASMSerializeInstruction();
3090 uTscWorker = ASMReadTSC();
3091 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3092 ASMSerializeInstruction();
3093 uTscWorkerFlushed = ASMReadTSC();
3094
3095 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3096 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3097 {
3098 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3099 if (uCmpReadTime < (uMinCmpReadTime << 1))
3100 {
3101 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3102 if (uCmpReadTime < uMinCmpReadTime)
3103 uMinCmpReadTime = uCmpReadTime;
3104 }
3105 else
3106 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3107 }
3108 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3109 {
3110 if (uCmpReadTime < uMinCmpReadTime)
3111 uMinCmpReadTime = uCmpReadTime;
3112 }
3113
3114 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3115 }
3116 }
3117
3118 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3119 pMySync->uSyncVar));
3120
3121 /*
3122 * We must reset the worker TSC sample value in case it gets picked as a
3123 * GIP master later on (it's trashed above, naturally).
3124 */
3125 if (!fIsMaster)
3126 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3127}
3128#endif /* GIP_TSC_DELTA_METHOD_1 */
3129
3130
3131#ifdef GIP_TSC_DELTA_METHOD_2
3132/*
3133 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3134 */
3135
3136# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3137# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3138
3139
3140static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, uint32_t iLoop)
3141{
3142 PSUPDRVTSCDELTAMETHOD2 pMasterData = pArgs->M2.pMasterData;
3143 PSUPDRVTSCDELTAMETHOD2 pOtherData = pArgs->M2.pWorkerData;
3144 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3145 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3146 uint32_t idxResult;
3147 uint32_t cHits = 0;
3148
3149 /*
3150 * Look for matching entries in the master and worker tables.
3151 */
3152 for (idxResult = 0; idxResult < RT_ELEMENTS(pMasterData->aResults); idxResult++)
3153 {
3154 uint32_t idxOther = pMasterData->aResults[idxResult].iSeqOther;
3155 if (idxOther & 1)
3156 {
3157 idxOther >>= 1;
3158 if (idxOther < RT_ELEMENTS(pOtherData->aResults))
3159 {
3160 if (pOtherData->aResults[idxOther].iSeqOther == pMasterData->aResults[idxResult].iSeqMine)
3161 {
3162 int64_t iDelta;
3163 iDelta = pOtherData->aResults[idxOther].uTsc
3164 - (pMasterData->aResults[idxResult].uTsc - iMasterTscDelta);
3165 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3166 ? iDelta < iBestDelta
3167 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3168 iBestDelta = iDelta;
3169 cHits++;
3170 }
3171 }
3172 }
3173 }
3174
3175 /*
3176 * Save the results.
3177 */
3178 if (cHits > 2)
3179 pArgs->pWorker->i64TSCDelta = iBestDelta;
3180 pArgs->uMaster.M2.cHits += cHits;
3181}
3182
3183
3184/**
3185 * The core function of the 2nd TSC delta mesurment algorithm.
3186 *
3187 * The idea here is that we have the two CPUs execute the exact same code
3188 * collecting a largish set of TSC samples. The code has one data dependency on
3189 * the other CPU which intention it is to synchronize the execution as well as
3190 * help cross references the two sets of TSC samples (the sequence numbers).
3191 *
3192 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3193 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3194 * it will help with making the CPUs enter lock step execution occationally.
3195 *
3196 */
3197static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3198{
3199 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3200 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3201
3202 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3203 ASMSerializeInstruction();
3204 while (cLeft-- > 0)
3205 {
3206 uint64_t uTsc;
3207 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3208 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3209 ASMCompilerBarrier();
3210 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3211 uTsc = ASMReadTSC();
3212 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3213 ASMCompilerBarrier();
3214 ASMSerializeInstruction();
3215 pEntry->iSeqMine = iSeqMine;
3216 pEntry->iSeqOther = iSeqOther;
3217 pEntry->uTsc = uTsc;
3218 pEntry++;
3219 ASMSerializeInstruction();
3220 if (fLag)
3221 ASMNopPause();
3222 }
3223}
3224
3225
3226/**
3227 * TSC delta measurment algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3228 *
3229 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3230 *
3231 * @param pArgs The argument/state data.
3232 * @param pMySync My synchronization structure.
3233 * @param pOtherSync My partner's synchronization structure.
3234 * @param fIsMaster Set if master, clear if worker.
3235 * @param iTry The attempt number.
3236 */
3237static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3238 bool fIsMaster, uint32_t iTry)
3239{
3240 unsigned iLoop;
3241
3242 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3243 {
3244 RTCCUINTREG fEFlags;
3245 if (fIsMaster)
3246 {
3247 /*
3248 * Adjust the loop lag fudge.
3249 */
3250# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3251 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3252 {
3253 /* Lag during the priming to be nice to everyone.. */
3254 pArgs->uMaster.M2.fLag = true;
3255 pArgs->uWorker.M2.fLag = true;
3256 }
3257 else
3258# endif
3259 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3260 {
3261 /* 25 % of the body without lagging. */
3262 pArgs->uMaster.M2.fLag = false;
3263 pArgs->uWorker.M2.fLag = false;
3264 }
3265 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3266 {
3267 /* 25 % of the body with both lagging. */
3268 pArgs->uMaster.M2.fLag = true;
3269 pArgs->uWorker.M2.fLag = true;
3270 }
3271 else
3272 {
3273 /* 50% of the body with alternating lag. */
3274 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3275 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3276 }
3277
3278 /*
3279 * Sync up with the worker and collect data.
3280 */
3281 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3282 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3283 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3284
3285 /*
3286 * Process the data.
3287 */
3288# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3289 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3290# endif
3291 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, iLoop);
3292
3293 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3294 }
3295 else
3296 {
3297 /*
3298 * The worker.
3299 */
3300 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3301 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3302 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3303 }
3304 }
3305}
3306
3307#endif /* GIP_TSC_DELTA_METHOD_2 */
3308
3309
3310
3311static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3312 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3313{
3314 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3315 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3316 uint32_t i;
3317 TSCDELTA_DBG_VARS();
3318
3319 for (;;)
3320 {
3321 RTCCUINTREG fEFlags;
3322 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3323 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3324
3325 if (fIsMaster)
3326 {
3327 uint64_t uTscWorker;
3328 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3329
3330 /*
3331 * Collect TSC, master goes first.
3332 */
3333 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3334 {
3335 /* Read, kick & wait #1. */
3336 uint64_t register uTsc = ASMReadTSC();
3337 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3338 ASMSerializeInstruction();
3339 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3340 TSCDELTA_DBG_START_LOOP();
3341 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3342 {
3343 TSCDELTA_DBG_CHECK_LOOP();
3344 ASMNopPause();
3345 }
3346
3347 /* Read, kick & wait #2. */
3348 uTsc = ASMReadTSC();
3349 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3350 ASMSerializeInstruction();
3351 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3352 TSCDELTA_DBG_START_LOOP();
3353 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3354 {
3355 TSCDELTA_DBG_CHECK_LOOP();
3356 ASMNopPause();
3357 }
3358 }
3359
3360 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3361
3362 /*
3363 * Process the data.
3364 */
3365#ifdef TSCDELTA_VERIFY_WITH_STATS
3366 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3367 pArgs->cMinVerifyTscTicks = INT64_MAX;
3368 pArgs->iVerifyBadTscDiff = 0;
3369#endif
3370 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3371 uTscWorker = 0;
3372 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3373 {
3374 /* Master vs previous worker entry. */
3375 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3376 int64_t iDiff;
3377 if (i > 0)
3378 {
3379 iDiff = uTscMaster - uTscWorker;
3380#ifdef TSCDELTA_VERIFY_WITH_STATS
3381 if (iDiff > pArgs->cMaxVerifyTscTicks)
3382 pArgs->cMaxVerifyTscTicks = iDiff;
3383 if (iDiff < pArgs->cMinVerifyTscTicks)
3384 pArgs->cMinVerifyTscTicks = iDiff;
3385#endif
3386 if (iDiff < 0)
3387 {
3388#ifdef TSCDELTA_VERIFY_WITH_STATS
3389 pArgs->iVerifyBadTscDiff = -iDiff;
3390#endif
3391 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3392 break;
3393 }
3394 }
3395
3396 /* Worker vs master. */
3397 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3398 iDiff = uTscWorker - uTscMaster;
3399#ifdef TSCDELTA_VERIFY_WITH_STATS
3400 if (iDiff > pArgs->cMaxVerifyTscTicks)
3401 pArgs->cMaxVerifyTscTicks = iDiff;
3402 if (iDiff < pArgs->cMinVerifyTscTicks)
3403 pArgs->cMinVerifyTscTicks = iDiff;
3404#endif
3405 if (iDiff < 0)
3406 {
3407#ifdef TSCDELTA_VERIFY_WITH_STATS
3408 pArgs->iVerifyBadTscDiff = iDiff;
3409#endif
3410 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3411 break;
3412 }
3413 }
3414
3415 /* Done. */
3416 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3417 }
3418 else
3419 {
3420 /*
3421 * The worker, master leads.
3422 */
3423 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3424
3425 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3426 {
3427 uint64_t register uTsc;
3428
3429 /* Wait, Read and Kick #1. */
3430 TSCDELTA_DBG_START_LOOP();
3431 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3432 {
3433 TSCDELTA_DBG_CHECK_LOOP();
3434 ASMNopPause();
3435 }
3436 uTsc = ASMReadTSC();
3437 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3438 ASMSerializeInstruction();
3439 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3440
3441 /* Wait, Read and Kick #2. */
3442 TSCDELTA_DBG_START_LOOP();
3443 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3444 {
3445 TSCDELTA_DBG_CHECK_LOOP();
3446 ASMNopPause();
3447 }
3448 uTsc = ASMReadTSC();
3449 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3450 ASMSerializeInstruction();
3451 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3452 }
3453
3454 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3455 }
3456 return pArgs->rcVerify;
3457 }
3458
3459 /*
3460 * Timed out, please retry.
3461 */
3462 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3463 return VERR_TIMEOUT;
3464}
3465
3466
3467
3468/**
3469 * Handles the special abort procedure during synchronization setup in
3470 * supdrvMeasureTscDeltaCallbackUnwrapped().
3471 *
3472 * @returns 0 (dummy, ignored)
3473 * @param pArgs Pointer to argument/state data.
3474 * @param pMySync Pointer to my sync structure.
3475 * @param fIsMaster Set if we're the master, clear if worker.
3476 * @param fTimeout Set if it's a timeout.
3477 */
3478DECL_NO_INLINE(static, int)
3479supdrvMeasureTscDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3480{
3481 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3482 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3483 TSCDELTA_DBG_VARS();
3484
3485 /*
3486 * Clear our sync pointer and make sure the abort flag is set.
3487 */
3488 ASMAtomicWriteNullPtr(ppMySync);
3489 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3490 if (fTimeout)
3491 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3492
3493 /*
3494 * Make sure the other party is out of there and won't be touching our
3495 * sync state again (would cause stack corruption).
3496 */
3497 TSCDELTA_DBG_START_LOOP();
3498 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3499 {
3500 ASMNopPause();
3501 ASMNopPause();
3502 ASMNopPause();
3503 TSCDELTA_DBG_CHECK_LOOP();
3504 }
3505
3506 return 0;
3507}
3508
3509
3510/**
3511 * This is used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3512 * and compute the delta between them.
3513 *
3514 * To reduce code size a good when timeout handling was added, a dummy return
3515 * value had to be added (saves 1-3 lines per timeout case), thus this
3516 * 'Unwrapped' function and the dummy 0 return value.
3517 *
3518 * @returns 0 (dummy, ignored)
3519 * @param idCpu The CPU we are current scheduled on.
3520 * @param pArgs Pointer to a parameter package.
3521 *
3522 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3523 * read the TSC at exactly the same time on both the master and the
3524 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3525 * contention, SMI, pipelining etc. there is no guaranteed way of
3526 * doing this on x86 CPUs.
3527 */
3528static int supdrvMeasureTscDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3529{
3530 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3531 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3532 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3533 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3534 uint32_t iTry;
3535 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3536 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3537 SUPTSCDELTASYNC2 MySync;
3538 PSUPTSCDELTASYNC2 pOtherSync;
3539 int rc;
3540 TSCDELTA_DBG_VARS();
3541
3542 /* A bit of paranoia first. */
3543 if (!pGipCpuMaster || !pGipCpuWorker)
3544 return 0;
3545
3546 /*
3547 * If the CPU isn't part of the measurement, return immediately.
3548 */
3549 if ( !fIsMaster
3550 && idCpu != pGipCpuWorker->idCpu)
3551 return 0;
3552
3553 /*
3554 * Set up my synchronization stuff and wait for the other party to show up.
3555 *
3556 * We don't wait forever since the other party may be off fishing (offline,
3557 * spinning with ints disables, whatever), we must play nice to the rest of
3558 * the system as this context generally isn't one in which we will get
3559 * preempted and we may hold up a number of lower priority interrupts.
3560 */
3561 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3562 ASMAtomicWritePtr(ppMySync, &MySync);
3563 MySync.uTscStart = ASMReadTSC();
3564 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3565
3566 /* Look for the partner, might not be here yet... Special abort considerations. */
3567 iTry = 0;
3568 TSCDELTA_DBG_START_LOOP();
3569 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3570 {
3571 ASMNopPause();
3572 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3573 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu) )
3574 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3575 if ( (iTry++ & 0xff) == 0
3576 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3577 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3578 TSCDELTA_DBG_CHECK_LOOP();
3579 ASMNopPause();
3580 }
3581
3582 /* I found my partner, waiting to be found... Special abort considerations. */
3583 if (fIsMaster)
3584 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3585 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3586
3587 iTry = 0;
3588 TSCDELTA_DBG_START_LOOP();
3589 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3590 {
3591 ASMNopPause();
3592 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3593 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3594 if ( (iTry++ & 0xff) == 0
3595 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3596 {
3597 if ( fIsMaster
3598 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3599 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3600 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3601 }
3602 TSCDELTA_DBG_CHECK_LOOP();
3603 }
3604
3605 if (!fIsMaster)
3606 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3607 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3608
3609/** @todo Add a resumable state to pArgs so we don't waste time if we time
3610 * out or something. Timeouts are legit, any of the two CPUs may get
3611 * interrupted. */
3612
3613 /*
3614 * Start by seeing if we have a zero delta between the two CPUs.
3615 * This should normally be the case.
3616 */
3617 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3618 if (RT_SUCCESS(rc))
3619 {
3620 if (fIsMaster)
3621 {
3622 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3623 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3624 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3625 }
3626 }
3627 /*
3628 * If the verification didn't time out, do regular delta measurements.
3629 * We retry this until we get a reasonable value.
3630 */
3631 else if (rc != VERR_TIMEOUT)
3632 {
3633 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3634 for (iTry = 0; iTry < 12; iTry++)
3635 {
3636 /*
3637 * Check the state before we start.
3638 */
3639 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3640 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3641 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3642 {
3643 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3644 break;
3645 }
3646
3647 /*
3648 * Do the measurements.
3649 */
3650#ifdef GIP_TSC_DELTA_METHOD_1
3651 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3652#elif defined(GIP_TSC_DELTA_METHOD_2)
3653 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3654#else
3655# error "huh??"
3656#endif
3657
3658 /*
3659 * Check the state.
3660 */
3661 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3662 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3663 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3664 {
3665 if (fIsMaster)
3666 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3667 else
3668 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3669 break;
3670 }
3671
3672 /*
3673 * Success? If so, stop trying. Master decides.
3674 */
3675 if (fIsMaster)
3676 {
3677 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3678 {
3679 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3680 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3681 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3682 break;
3683 }
3684 }
3685 }
3686 if (fIsMaster)
3687 pArgs->iTry = iTry;
3688 }
3689
3690 /*
3691 * End the synchroniziation dance. We tell the other that we're done,
3692 * then wait for the same kind of reply.
3693 */
3694 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3695 ASMAtomicWriteNullPtr(ppMySync);
3696 iTry = 0;
3697 TSCDELTA_DBG_START_LOOP();
3698 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
3699 {
3700 iTry++;
3701 if ( iTry == 0
3702 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu))
3703 break; /* this really shouldn't happen. */
3704 TSCDELTA_DBG_CHECK_LOOP();
3705 ASMNopPause();
3706 }
3707
3708 /*
3709 * Collect some runtime stats.
3710 */
3711 if (fIsMaster)
3712 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
3713 else
3714 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
3715 return 0;
3716}
3717
3718/**
3719 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3720 * and compute the delta between them.
3721 *
3722 * @param idCpu The CPU we are current scheduled on.
3723 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
3724 * @param pvUser2 Unused.
3725 */
3726static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3727{
3728 supdrvMeasureTscDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
3729}
3730
3731
3732/**
3733 * Measures the TSC delta between the master GIP CPU and one specified worker
3734 * CPU.
3735 *
3736 * @returns VBox status code.
3737 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
3738 * failure.
3739 * @param pDevExt Pointer to the device instance data.
3740 * @param idxWorker The index of the worker CPU from the GIP's array of
3741 * CPUs.
3742 *
3743 * @remarks This must be called with preemption enabled!
3744 */
3745static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
3746{
3747 int rc;
3748 int rc2;
3749 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3750 RTCPUID idMaster = pDevExt->idGipMaster;
3751 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
3752 PSUPGIPCPU pGipCpuMaster;
3753 uint32_t iGipCpuMaster;
3754
3755 /* Validate input a bit. */
3756 AssertReturn(pGip, VERR_INVALID_PARAMETER);
3757 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3758 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3759
3760 /*
3761 * Don't attempt measuring the delta for the GIP master.
3762 */
3763 if (pGipCpuWorker->idCpu == idMaster)
3764 {
3765 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
3766 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3767 return VINF_SUCCESS;
3768 }
3769
3770 /*
3771 * One measurement at at time, at least for now. We might be using
3772 * broadcast IPIs so, so be nice to the rest of the system.
3773 */
3774#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3775 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
3776#else
3777 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
3778#endif
3779 if (RT_FAILURE(rc))
3780 return rc;
3781
3782 /*
3783 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
3784 * try pick a different master. (This fudge only works with multi core systems.)
3785 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
3786 *
3787 * We skip this on AMDs for now as their HTT is different from intel's and
3788 * it doesn't seem to have any favorable effect on the results.
3789 *
3790 * If the master is offline, we need a new master too, so share the code.
3791 */
3792 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
3793 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
3794 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
3795 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
3796 && ASMHasCpuId()
3797 && ASMIsValidStdRange(ASMCpuId_EAX(0))
3798 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
3799 && !ASMIsAmdCpu()
3800 && pGip->cOnlineCpus > 2)
3801 || !RTMpIsCpuOnline(idMaster) )
3802 {
3803 uint32_t i;
3804 for (i = 0; i < pGip->cCpus; i++)
3805 if ( i != iGipCpuMaster
3806 && i != idxWorker
3807 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
3808 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
3809 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
3810 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
3811 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
3812 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
3813 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
3814 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
3815 {
3816 iGipCpuMaster = i;
3817 pGipCpuMaster = &pGip->aCPUs[i];
3818 idMaster = pGipCpuMaster->idCpu;
3819 break;
3820 }
3821 }
3822
3823 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
3824 {
3825 /*
3826 * Initialize data package for the RTMpOnPair callback.
3827 */
3828 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
3829 if (pArgs)
3830 {
3831 pArgs->pWorker = pGipCpuWorker;
3832 pArgs->pMaster = pGipCpuMaster;
3833 pArgs->pDevExt = pDevExt;
3834 pArgs->pSyncMaster = NULL;
3835 pArgs->pSyncWorker = NULL;
3836 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
3837
3838 /*
3839 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
3840 * and supdrvMeasureTscDeltaCallback can use it as a success check.
3841 */
3842 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
3843 * that when doing the restart loop reorg. */
3844 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
3845 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
3846 supdrvMeasureTscDeltaCallback, pArgs, NULL);
3847 if (RT_SUCCESS(rc))
3848 {
3849#if 0
3850 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
3851 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
3852 pArgs->fTimedOut ? " timed out" :"");
3853#endif
3854#if 0
3855 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
3856 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
3857#endif
3858 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
3859 {
3860 /*
3861 * Work the TSC delta applicability rating. It starts
3862 * optimistic in supdrvGipInit, we downgrade it here.
3863 */
3864 SUPGIPUSETSCDELTA enmRating;
3865 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
3866 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
3867 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
3868 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
3869 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
3870 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
3871 else
3872 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
3873 if (pGip->enmUseTscDelta < enmRating)
3874 {
3875 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
3876 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
3877 }
3878 }
3879 else
3880 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3881 }
3882 /** @todo return try-again if we get an offline CPU error. */
3883
3884 RTMemFree(pArgs);
3885 }
3886 else
3887 rc = VERR_NO_MEMORY;
3888 }
3889 else
3890 rc = VERR_CPU_OFFLINE;
3891
3892 /*
3893 * We're done now.
3894 */
3895#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3896 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3897#else
3898 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3899#endif
3900 return rc;
3901}
3902
3903
3904/**
3905 * Clears TSC delta related variables.
3906 *
3907 * Clears all TSC samples as well as the delta synchronization variable on the
3908 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
3909 *
3910 * @param pDevExt Pointer to the device instance data.
3911 * @param fClearDeltas Whether the deltas are also to be cleared.
3912 */
3913static void supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
3914{
3915 unsigned iCpu;
3916 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3917 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3918 {
3919 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
3920 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
3921 if (fClearDeltas)
3922 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
3923 }
3924}
3925
3926
3927/**
3928 * Performs the initial measurements of the TSC deltas between CPUs.
3929 *
3930 * This is called by supdrvGipCreate or triggered by it if threaded.
3931 *
3932 * @returns VBox status code.
3933 * @param pDevExt Pointer to the device instance data.
3934 *
3935 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
3936 * idCpu, GIP's online CPU set which are populated in
3937 * supdrvGipInitOnCpu().
3938 */
3939static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
3940{
3941 PSUPGIPCPU pGipCpuMaster;
3942 unsigned iCpu;
3943 unsigned iOddEven;
3944 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3945 uint32_t idxMaster = UINT32_MAX;
3946 int rc = VINF_SUCCESS;
3947 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
3948
3949 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3950
3951 /*
3952 * Pick the first CPU online as the master TSC and make it the new GIP master based
3953 * on the APIC ID.
3954 *
3955 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
3956 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
3957 * master as this point since the sync/async timer isn't created yet.
3958 */
3959 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
3960 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
3961 {
3962 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
3963 if (idxCpu != UINT16_MAX)
3964 {
3965 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
3966 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
3967 {
3968 idxMaster = idxCpu;
3969 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
3970 break;
3971 }
3972 }
3973 }
3974 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
3975 pGipCpuMaster = &pGip->aCPUs[idxMaster];
3976 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
3977
3978 /*
3979 * If there is only a single CPU online we have nothing to do.
3980 */
3981 if (pGip->cOnlineCpus <= 1)
3982 {
3983 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
3984 return VINF_SUCCESS;
3985 }
3986
3987 /*
3988 * Loop thru the GIP CPU array and get deltas for each CPU (except the
3989 * master). We do the CPUs with the even numbered APIC IDs first so that
3990 * we've got alternative master CPUs to pick from on hyper-threaded systems.
3991 */
3992 for (iOddEven = 0; iOddEven < 2; iOddEven++)
3993 {
3994 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3995 {
3996 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
3997 if ( iCpu != idxMaster
3998 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
3999 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4000 {
4001 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4002 if (RT_FAILURE(rc))
4003 {
4004 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4005 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4006 break;
4007 }
4008
4009 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4010 {
4011 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4012 rc = VERR_TRY_AGAIN;
4013 break;
4014 }
4015 }
4016 }
4017 }
4018
4019 return rc;
4020}
4021
4022
4023#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4024
4025/**
4026 * Switches the TSC-delta measurement thread into the butchered state.
4027 *
4028 * @returns VBox status code.
4029 * @param pDevExt Pointer to the device instance data.
4030 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4031 * @param pszFailed An error message to log.
4032 * @param rcFailed The error code to exit the thread with.
4033 */
4034static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4035{
4036 if (!fSpinlockHeld)
4037 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4038
4039 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4040 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4041 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
4042 return rcFailed;
4043}
4044
4045
4046/**
4047 * The TSC-delta measurement thread.
4048 *
4049 * @returns VBox status code.
4050 * @param hThread The thread handle.
4051 * @param pvUser Opaque pointer to the device instance data.
4052 */
4053static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4054{
4055 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4056 bool fInitialMeasurement = true;
4057 uint32_t cConsecutiveTimeouts = 0;
4058 int rc = VERR_INTERNAL_ERROR_2;
4059 for (;;)
4060 {
4061 /*
4062 * Switch on the current state.
4063 */
4064 SUPDRVTSCDELTATHREADSTATE enmState;
4065 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4066 enmState = pDevExt->enmTscDeltaThreadState;
4067 switch (enmState)
4068 {
4069 case kTscDeltaThreadState_Creating:
4070 {
4071 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4072 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4073 if (RT_FAILURE(rc))
4074 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4075 /* fall thru */
4076 }
4077
4078 case kTscDeltaThreadState_Listening:
4079 {
4080 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4081
4082 /* Simple adaptive timeout. */
4083 if (cConsecutiveTimeouts++ == 10)
4084 {
4085 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
4086 pDevExt->cMsTscDeltaTimeout = 10;
4087 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
4088 pDevExt->cMsTscDeltaTimeout = 100;
4089 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
4090 pDevExt->cMsTscDeltaTimeout = 500;
4091 cConsecutiveTimeouts = 0;
4092 }
4093 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
4094 if ( RT_FAILURE(rc)
4095 && rc != VERR_TIMEOUT)
4096 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4097 RTThreadUserReset(pDevExt->hTscDeltaThread);
4098 break;
4099 }
4100
4101 case kTscDeltaThreadState_WaitAndMeasure:
4102 {
4103 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4104 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4105 if (RT_FAILURE(rc))
4106 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4107 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4108 pDevExt->cMsTscDeltaTimeout = 1;
4109 RTThreadSleep(1);
4110 /* fall thru */
4111 }
4112
4113 case kTscDeltaThreadState_Measuring:
4114 {
4115 cConsecutiveTimeouts = 0;
4116 if (fInitialMeasurement)
4117 {
4118 int cTries = 8;
4119 int cMsWaitPerTry = 10;
4120 fInitialMeasurement = false;
4121 do
4122 {
4123 rc = supdrvMeasureInitialTscDeltas(pDevExt);
4124 if ( RT_SUCCESS(rc)
4125 || ( RT_FAILURE(rc)
4126 && rc != VERR_TRY_AGAIN
4127 && rc != VERR_CPU_OFFLINE))
4128 {
4129 break;
4130 }
4131 RTThreadSleep(cMsWaitPerTry);
4132 } while (cTries-- > 0);
4133 }
4134 else
4135 {
4136 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4137 unsigned iCpu;
4138
4139 /* Measure TSC-deltas only for the CPUs that are in the set. */
4140 rc = VINF_SUCCESS;
4141 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4142 {
4143 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4144 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4145 {
4146 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4147 {
4148 int rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4149 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4150 rc = rc2;
4151 }
4152 else
4153 {
4154 /*
4155 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex,
4156 * mark the delta as fine to get the timer thread off our back.
4157 */
4158 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4159 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4160 }
4161 }
4162 }
4163 }
4164 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4165 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4166 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4167 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4168 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */
4169 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4170 break;
4171 }
4172
4173 case kTscDeltaThreadState_Terminating:
4174 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4175 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4176 return VINF_SUCCESS;
4177
4178 case kTscDeltaThreadState_Butchered:
4179 default:
4180 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4181 }
4182 }
4183
4184 return rc;
4185}
4186
4187
4188/**
4189 * Waits for the TSC-delta measurement thread to respond to a state change.
4190 *
4191 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4192 * other error code on internal error.
4193 *
4194 * @param pThis Pointer to the grant service instance data.
4195 * @param enmCurState The current state.
4196 * @param enmNewState The new state we're waiting for it to enter.
4197 */
4198static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4199 SUPDRVTSCDELTATHREADSTATE enmNewState)
4200{
4201 /*
4202 * Wait a short while for the expected state transition.
4203 */
4204 int rc;
4205 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4206 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4207 if (pDevExt->enmTscDeltaThreadState == enmNewState)
4208 {
4209 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4210 rc = VINF_SUCCESS;
4211 }
4212 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
4213 {
4214 /*
4215 * Wait longer if the state has not yet transitioned to the one we want.
4216 */
4217 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4218 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4219 if ( RT_SUCCESS(rc)
4220 || rc == VERR_TIMEOUT)
4221 {
4222 /*
4223 * Check the state whether we've succeeded.
4224 */
4225 SUPDRVTSCDELTATHREADSTATE enmState;
4226 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4227 enmState = pDevExt->enmTscDeltaThreadState;
4228 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4229 if (enmState == enmNewState)
4230 rc = VINF_SUCCESS;
4231 else if (enmState == enmCurState)
4232 {
4233 rc = VERR_TIMEOUT;
4234 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
4235 enmNewState));
4236 }
4237 else
4238 {
4239 rc = VERR_INTERNAL_ERROR;
4240 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4241 enmState, enmNewState));
4242 }
4243 }
4244 else
4245 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4246 }
4247 else
4248 {
4249 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4250 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
4251 rc = VERR_INTERNAL_ERROR;
4252 }
4253
4254 return rc;
4255}
4256
4257
4258/**
4259 * Signals the TSC-delta thread to start measuring TSC-deltas.
4260 *
4261 * @param pDevExt Pointer to the device instance data.
4262 */
4263static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt)
4264{
4265 if (RT_LIKELY(pDevExt->hTscDeltaThread != NIL_RTTHREAD))
4266 {
4267 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4268 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4269 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4270 {
4271 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4272 }
4273 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4274 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4275 }
4276}
4277
4278
4279/**
4280 * Terminates the actual thread running supdrvTscDeltaThread().
4281 *
4282 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4283 * supdrvTscDeltaTerm().
4284 *
4285 * @param pDevExt Pointer to the device instance data.
4286 */
4287static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4288{
4289 int rc;
4290 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4291 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4292 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4293 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4294 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4295 if (RT_FAILURE(rc))
4296 {
4297 /* Signal a few more times before giving up. */
4298 int cTriesLeft = 5;
4299 while (--cTriesLeft > 0)
4300 {
4301 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4302 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4303 if (rc != VERR_TIMEOUT)
4304 break;
4305 }
4306 }
4307}
4308
4309
4310/**
4311 * Initializes and spawns the TSC-delta measurement thread.
4312 *
4313 * A thread is required for servicing re-measurement requests from events like
4314 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4315 * under all contexts on all OSs.
4316 *
4317 * @returns VBox status code.
4318 * @param pDevExt Pointer to the device instance data.
4319 *
4320 * @remarks Must only be called -after- initializing GIP and setting up MP
4321 * notifications!
4322 */
4323static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4324{
4325 int rc;
4326 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4327 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4328 if (RT_SUCCESS(rc))
4329 {
4330 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4331 if (RT_SUCCESS(rc))
4332 {
4333 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4334 pDevExt->cMsTscDeltaTimeout = 1;
4335 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4336 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4337 if (RT_SUCCESS(rc))
4338 {
4339 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4340 if (RT_SUCCESS(rc))
4341 {
4342 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4343 return rc;
4344 }
4345
4346 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4347 supdrvTscDeltaThreadTerminate(pDevExt);
4348 }
4349 else
4350 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4351 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4352 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4353 }
4354 else
4355 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4356 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4357 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4358 }
4359 else
4360 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4361
4362 return rc;
4363}
4364
4365
4366/**
4367 * Terminates the TSC-delta measurement thread and cleanup.
4368 *
4369 * @param pDevExt Pointer to the device instance data.
4370 */
4371static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4372{
4373 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4374 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4375 {
4376 supdrvTscDeltaThreadTerminate(pDevExt);
4377 }
4378
4379 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4380 {
4381 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4382 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4383 }
4384
4385 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4386 {
4387 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4388 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4389 }
4390
4391 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4392}
4393
4394#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4395
4396/**
4397 * Measure the TSC delta for the CPU given by its CPU set index.
4398 *
4399 * @returns VBox status code.
4400 * @retval VERR_INTERRUPTED if interrupted while waiting.
4401 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4402 * measurment.
4403 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4404 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4405 *
4406 * @param pSession The caller's session. GIP must've been mapped.
4407 * @param iCpuSet The CPU set index of the CPU to measure.
4408 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4409 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4410 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4411 * ready.
4412 * @param cTries Number of times to try, pass 0 for the default.
4413 */
4414SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4415 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4416{
4417 PSUPDRVDEVEXT pDevExt;
4418 PSUPGLOBALINFOPAGE pGip;
4419 uint16_t iGipCpu;
4420 int rc;
4421#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4422 uint64_t msTsStartWait;
4423 uint32_t iWaitLoop;
4424#endif
4425
4426 /*
4427 * Validate and adjust the input.
4428 */
4429 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4430 if (!pSession->fGipReferenced)
4431 return VERR_WRONG_ORDER;
4432
4433 pDevExt = pSession->pDevExt;
4434 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4435
4436 pGip = pDevExt->pGip;
4437 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4438
4439 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4440 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4441 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4442 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4443
4444 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4445 return VERR_INVALID_FLAGS;
4446
4447 /*
4448 * The request is a noop if the TSC delta isn't being used.
4449 */
4450 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4451 return VINF_SUCCESS;
4452
4453 if (cTries == 0)
4454 cTries = 12;
4455 else if (cTries > 256)
4456 cTries = 256;
4457
4458 if (cMsWaitRetry == 0)
4459 cMsWaitRetry = 2;
4460 else if (cMsWaitRetry > 1000)
4461 cMsWaitRetry = 1000;
4462
4463#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4464 /*
4465 * Has the TSC already been measured and we're not forced to redo it?
4466 */
4467 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4468 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4469 return VINF_SUCCESS;
4470
4471 /*
4472 * Asynchronous request? Forward it to the thread, no waiting.
4473 */
4474 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4475 {
4476 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4477 * to pass those options to the thread somehow and implement it in the
4478 * thread. Check if anyone uses/needs fAsync before implementing this. */
4479 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4480 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4481 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4482 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4483 {
4484 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4485 rc = VINF_SUCCESS;
4486 }
4487 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4488 rc = VERR_THREAD_IS_DEAD;
4489 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4490 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4491 return VINF_SUCCESS;
4492 }
4493
4494 /*
4495 * If a TSC-delta measurement request is already being serviced by the thread,
4496 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4497 */
4498 msTsStartWait = RTTimeSystemMilliTS();
4499 for (iWaitLoop = 0;; iWaitLoop++)
4500 {
4501 uint64_t cMsElapsed;
4502 SUPDRVTSCDELTATHREADSTATE enmState;
4503 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4504 enmState = pDevExt->enmTscDeltaThreadState;
4505 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4506
4507 if (enmState == kTscDeltaThreadState_Measuring)
4508 { /* Must wait, the thread is busy. */ }
4509 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4510 { /* Must wait, this state only says what will happen next. */ }
4511 else if (enmState == kTscDeltaThreadState_Terminating)
4512 { /* Must wait, this state only says what should happen next. */ }
4513 else
4514 break; /* All other states, the thread is either idly listening or dead. */
4515
4516 /* Wait or fail. */
4517 if (cMsWaitThread == 0)
4518 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4519 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4520 if (cMsElapsed >= cMsWaitThread)
4521 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4522
4523 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4524 if (rc == VERR_INTERRUPTED)
4525 return rc;
4526 }
4527#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4528
4529 /*
4530 * Try measure the TSC delta the given number of times.
4531 */
4532 for (;;)
4533 {
4534 /* Unless we're forced to measure the delta, check whether it's done already. */
4535 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4536 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4537 {
4538 rc = VINF_SUCCESS;
4539 break;
4540 }
4541
4542 /* Measure it. */
4543 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4544 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4545 {
4546 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4547 break;
4548 }
4549
4550 /* Retry? */
4551 if (cTries <= 1)
4552 break;
4553 cTries--;
4554
4555 /* Always delay between retries (be nice to the rest of the system
4556 and avoid the BSOD hounds). */
4557 rc = RTThreadSleep(cMsWaitRetry);
4558 if (rc == VERR_INTERRUPTED)
4559 break;
4560 }
4561
4562 return rc;
4563}
4564
4565
4566/**
4567 * Service a TSC-delta measurement request.
4568 *
4569 * @returns VBox status code.
4570 * @param pDevExt Pointer to the device instance data.
4571 * @param pSession The support driver session.
4572 * @param pReq Pointer to the TSC-delta measurement request.
4573 */
4574int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4575{
4576 uint32_t cTries;
4577 uint32_t iCpuSet;
4578 uint32_t fFlags;
4579 RTMSINTERVAL cMsWaitRetry;
4580
4581 /*
4582 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4583 */
4584 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4585
4586 if (pReq->u.In.idCpu == NIL_RTCPUID)
4587 return VERR_INVALID_CPU_ID;
4588 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4589 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4590 return VERR_INVALID_CPU_ID;
4591
4592 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4593
4594 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4595
4596 fFlags = 0;
4597 if (pReq->u.In.fAsync)
4598 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4599 if (pReq->u.In.fForce)
4600 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4601
4602 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4603 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4604 cTries);
4605}
4606
4607
4608/**
4609 * Reads TSC with delta applied.
4610 *
4611 * Will try to resolve delta value INT64_MAX before applying it. This is the
4612 * main purpose of this function, to handle the case where the delta needs to be
4613 * determined.
4614 *
4615 * @returns VBox status code.
4616 * @param pDevExt Pointer to the device instance data.
4617 * @param pSession The support driver session.
4618 * @param pReq Pointer to the TSC-read request.
4619 */
4620int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4621{
4622 PSUPGLOBALINFOPAGE pGip;
4623 int rc;
4624
4625 /*
4626 * Validate. We require the client to have mapped GIP (no asserting on
4627 * ring-3 preconditions).
4628 */
4629 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4630 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4631 return VERR_WRONG_ORDER;
4632 pGip = pDevExt->pGip;
4633 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4634
4635 /*
4636 * We're usually here because we need to apply delta, but we shouldn't be
4637 * upset if the GIP is some different mode.
4638 */
4639 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4640 {
4641 uint32_t cTries = 0;
4642 for (;;)
4643 {
4644 /*
4645 * Start by gathering the data, using CLI for disabling preemption
4646 * while we do that.
4647 */
4648 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4649 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4650 int iGipCpu;
4651 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4652 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4653 {
4654 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4655 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4656 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4657 ASMSetFlags(fEFlags);
4658
4659 /*
4660 * If we're lucky we've got a delta, but no predicitions here
4661 * as this I/O control is normally only used when the TSC delta
4662 * is set to INT64_MAX.
4663 */
4664 if (i64Delta != INT64_MAX)
4665 {
4666 pReq->u.Out.u64AdjustedTsc -= i64Delta;
4667 rc = VINF_SUCCESS;
4668 break;
4669 }
4670
4671 /* Give up after a few times. */
4672 if (cTries >= 4)
4673 {
4674 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4675 break;
4676 }
4677
4678 /* Need to measure the delta an try again. */
4679 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4680 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4681 /** @todo should probably delay on failure... dpc watchdogs */
4682 }
4683 else
4684 {
4685 /* This really shouldn't happen. */
4686 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
4687 pReq->u.Out.idApic = ASMGetApicId();
4688 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4689 ASMSetFlags(fEFlags);
4690 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
4691 break;
4692 }
4693 }
4694 }
4695 else
4696 {
4697 /*
4698 * No delta to apply. Easy. Deal with preemption the lazy way.
4699 */
4700 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4701 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4702 int iGipCpu;
4703 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4704 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4705 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4706 else
4707 pReq->u.Out.idApic = ASMGetApicId();
4708 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4709 ASMSetFlags(fEFlags);
4710 rc = VINF_SUCCESS;
4711 }
4712
4713 return rc;
4714}
4715
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette