VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 54346

Last change on this file since 54346 was 54345, checked in by vboxsync, 10 years ago

Cleaning up the 2nd algo.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 135.5 KB
Line 
1/* $Id: SUPDrvGip.cpp 54345 2015-02-20 20:27:23Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63
64#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
65# include "dtrace/SUPDrv.h"
66#else
67/* ... */
68#endif
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/** The frequency by which we recalculate the u32UpdateHz and
75 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
76 *
77 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
78 */
79#define GIP_UPDATEHZ_RECALC_FREQ 0x800
80
81/** A reserved TSC value used for synchronization as well as measurement of
82 * TSC deltas. */
83#define GIP_TSC_DELTA_RSVD UINT64_MAX
84/** The number of TSC delta measurement loops in total (includes primer and
85 * read-time loops). */
86#define GIP_TSC_DELTA_LOOPS 96
87/** The number of cache primer loops. */
88#define GIP_TSC_DELTA_PRIMER_LOOPS 4
89/** The number of loops until we keep computing the minumum read time. */
90#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
91/** Stop measurement of TSC delta. */
92#define GIP_TSC_DELTA_SYNC_STOP 0
93/** Start measurement of TSC delta. */
94#define GIP_TSC_DELTA_SYNC_START 1
95/** Worker thread is ready for reading the TSC. */
96#define GIP_TSC_DELTA_SYNC_WORKER_READY 2
97/** Worker thread is done updating TSC delta info. */
98#define GIP_TSC_DELTA_SYNC_WORKER_DONE 3
99/** When IPRT is isn't concurrent safe: Master is ready and will wait for worker
100 * with a timeout. */
101#define GIP_TSC_DELTA_SYNC_PRESTART_MASTER 4
102/** When IPRT is isn't concurrent safe: Worker is ready after waiting for
103 * master with a timeout. */
104#define GIP_TSC_DELTA_SYNC_PRESTART_WORKER 5
105/** The TSC-refinement interval in seconds. */
106#define GIP_TSC_REFINE_INTERVAL 5
107/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
108#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
109/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
110#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
111/** The TSC delta value for the initial GIP master - 0 in regular builds.
112 * To test the delta code this can be set to a non-zero value. */
113#if 0
114# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
115#else
116# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
117#endif
118
119AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
120AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
121
122/** @def VBOX_SVN_REV
123 * The makefile should define this if it can. */
124#ifndef VBOX_SVN_REV
125# define VBOX_SVN_REV 0
126#endif
127
128#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
129# define DO_NOT_START_GIP
130#endif
131
132
133/*******************************************************************************
134* Internal Functions *
135*******************************************************************************/
136static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
137static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
138static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
139#ifdef SUPDRV_USE_TSC_DELTA_THREAD
140static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
141static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
142static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt);
143#endif
144
145
146/*******************************************************************************
147* Global Variables *
148*******************************************************************************/
149DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
150
151
152
153/*
154 *
155 * Misc Common GIP Code
156 * Misc Common GIP Code
157 * Misc Common GIP Code
158 *
159 *
160 */
161
162
163/**
164 * Finds the GIP CPU index corresponding to @a idCpu.
165 *
166 * @returns GIP CPU array index, UINT32_MAX if not found.
167 * @param pGip The GIP.
168 * @param idCpu The CPU ID.
169 */
170static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
171{
172 uint32_t i;
173 for (i = 0; i < pGip->cCpus; i++)
174 if (pGip->aCPUs[i].idCpu == idCpu)
175 return i;
176 return UINT32_MAX;
177}
178
179
180/**
181 * Applies the TSC delta to the supplied raw TSC value.
182 *
183 * @returns VBox status code. (Ignored by all users, just FYI.)
184 * @param pGip Pointer to the GIP.
185 * @param puTsc Pointer to a valid TSC value before the TSC delta has been applied.
186 * @param idApic The APIC ID of the CPU @c puTsc corresponds to.
187 * @param fDeltaApplied Where to store whether the TSC delta was succesfully
188 * applied or not (optional, can be NULL).
189 *
190 * @remarks Maybe called with interrupts disabled in ring-0!
191 *
192 * @note Don't you dare change the delta calculation. If you really do, make
193 * sure you update all places where it's used (IPRT, SUPLibAll.cpp,
194 * SUPDrv.c, supdrvGipMpEvent, and more).
195 */
196DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
197{
198 int rc;
199
200 /*
201 * Validate input.
202 */
203 AssertPtr(puTsc);
204 AssertPtr(pGip);
205 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
206
207 /*
208 * Carefully convert the idApic into a GIPCPU entry.
209 */
210 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
211 {
212 uint16_t iCpu = pGip->aiCpuFromApicId[idApic];
213 if (RT_LIKELY(iCpu < pGip->cCpus))
214 {
215 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
216
217 /*
218 * Apply the delta if valid.
219 */
220 if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX))
221 {
222 *puTsc -= pGipCpu->i64TSCDelta;
223 if (pfDeltaApplied)
224 *pfDeltaApplied = true;
225 return VINF_SUCCESS;
226 }
227
228 rc = VINF_SUCCESS;
229 }
230 else
231 {
232 AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus));
233 rc = VERR_INVALID_CPU_INDEX;
234 }
235 }
236 else
237 {
238 AssertMsgFailed(("idApic=%u\n", idApic));
239 rc = VERR_INVALID_CPU_ID;
240 }
241 if (pfDeltaApplied)
242 *pfDeltaApplied = false;
243 return rc;
244}
245
246
247/*
248 *
249 * GIP Mapping and Unmapping Related Code.
250 * GIP Mapping and Unmapping Related Code.
251 * GIP Mapping and Unmapping Related Code.
252 *
253 *
254 */
255
256
257/**
258 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
259 * updating.
260 *
261 * @param pGip Pointer to the GIP.
262 * @param pGipCpu The per CPU structure for this CPU.
263 * @param u64NanoTS The current time.
264 */
265static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
266{
267 /*
268 * Here we don't really care about applying the TSC delta. The re-initialization of this
269 * value is not relevant especially while (re)starting the GIP as the first few ones will
270 * be ignored anyway, see supdrvGipDoUpdateCpu().
271 */
272 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
273 pGipCpu->u64NanoTS = u64NanoTS;
274}
275
276
277/**
278 * Set the current TSC and NanoTS value for the CPU.
279 *
280 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
281 * @param pvUser1 Pointer to the ring-0 GIP mapping.
282 * @param pvUser2 Pointer to the variable holding the current time.
283 */
284static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
285{
286 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
287 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
288
289 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
290 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
291
292 NOREF(pvUser2);
293 NOREF(idCpu);
294}
295
296
297/**
298 * State structure for supdrvGipDetectGetGipCpuCallback.
299 */
300typedef struct SUPDRVGIPDETECTGETCPU
301{
302 /** Bitmap of APIC IDs that has been seen (initialized to zero).
303 * Used to detect duplicate APIC IDs (paranoia). */
304 uint8_t volatile bmApicId[256 / 8];
305 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
306 * initially). The callback clears the methods not detected. */
307 uint32_t volatile fSupported;
308 /** The first callback detecting any kind of range issues (initialized to
309 * NIL_RTCPUID). */
310 RTCPUID volatile idCpuProblem;
311} SUPDRVGIPDETECTGETCPU;
312/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
313typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
314
315
316/**
317 * Checks for alternative ways of getting the CPU ID.
318 *
319 * This also checks the APIC ID, CPU ID and CPU set index values against the
320 * GIP tables.
321 *
322 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
323 * @param pvUser1 Pointer to the state structure.
324 * @param pvUser2 Pointer to the GIP.
325 */
326static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
327{
328 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
329 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
330 uint32_t fSupported = 0;
331 uint16_t idApic;
332 int iCpuSet;
333
334 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
335
336 /*
337 * Check that the CPU ID and CPU set index are interchangable.
338 */
339 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
340 if ((RTCPUID)iCpuSet == idCpu)
341 {
342 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
343 if ( iCpuSet >= 0
344 && iCpuSet < RTCPUSET_MAX_CPUS
345 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
346 {
347 /*
348 * Check whether the IDTR.LIMIT contains a CPU number.
349 */
350#ifdef RT_ARCH_X86
351 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
352#else
353 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
354#endif
355 RTIDTR Idtr;
356 ASMGetIDTR(&Idtr);
357 if (Idtr.cbIdt >= cbIdt)
358 {
359 uint32_t uTmp = Idtr.cbIdt - cbIdt;
360 uTmp &= RTCPUSET_MAX_CPUS - 1;
361 if (uTmp == idCpu)
362 {
363 RTIDTR Idtr2;
364 ASMGetIDTR(&Idtr2);
365 if (Idtr2.cbIdt == Idtr.cbIdt)
366 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
367 }
368 }
369
370 /*
371 * Check whether RDTSCP is an option.
372 */
373 if (ASMHasCpuId())
374 {
375 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
376 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
377 {
378 uint32_t uAux;
379 ASMReadTscWithAux(&uAux);
380 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
381 {
382 ASMNopPause();
383 ASMReadTscWithAux(&uAux);
384 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
385 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
386 }
387 }
388 }
389 }
390 }
391
392 /*
393 * Check that the APIC ID is unique.
394 */
395 idApic = ASMGetApicId();
396 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
397 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
398 fSupported |= SUPGIPGETCPU_APIC_ID;
399 else
400 {
401 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
402 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
403 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
404 idCpu, iCpuSet, idApic));
405 }
406
407 /*
408 * Check that the iCpuSet is within the expected range.
409 */
410 if (RT_UNLIKELY( iCpuSet < 0
411 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
412 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
413 {
414 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
415 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
416 idCpu, iCpuSet, idApic));
417 }
418 else
419 {
420 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
421 if (RT_UNLIKELY(idCpu2 != idCpu))
422 {
423 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
424 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
425 idCpu, iCpuSet, idApic, idCpu2));
426 }
427 }
428
429 /*
430 * Update the supported feature mask before we return.
431 */
432 ASMAtomicAndU32(&pState->fSupported, fSupported);
433
434 NOREF(pvUser2);
435}
436
437
438/**
439 * Increase the timer freqency on hosts where this is possible (NT).
440 *
441 * The idea is that more interrupts is better for us... Also, it's better than
442 * we increase the timer frequence, because we might end up getting inaccurate
443 * callbacks if someone else does it.
444 *
445 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
446 */
447static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
448{
449 if (pDevExt->u32SystemTimerGranularityGrant == 0)
450 {
451 uint32_t u32SystemResolution;
452 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
453 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
454 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
455 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
456 )
457 {
458 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
459 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
460 }
461 }
462}
463
464
465/**
466 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
467 *
468 * @param pDevExt Clears u32SystemTimerGranularityGrant.
469 */
470static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
471{
472 if (pDevExt->u32SystemTimerGranularityGrant)
473 {
474 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
475 AssertRC(rc2);
476 pDevExt->u32SystemTimerGranularityGrant = 0;
477 }
478}
479
480
481/**
482 * Maps the GIP into userspace and/or get the physical address of the GIP.
483 *
484 * @returns IPRT status code.
485 * @param pSession Session to which the GIP mapping should belong.
486 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
487 * @param pHCPhysGip Where to store the physical address. (optional)
488 *
489 * @remark There is no reference counting on the mapping, so one call to this function
490 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
491 * and remove the session as a GIP user.
492 */
493SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
494{
495 int rc;
496 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
497 RTR3PTR pGipR3 = NIL_RTR3PTR;
498 RTHCPHYS HCPhys = NIL_RTHCPHYS;
499 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
500
501 /*
502 * Validate
503 */
504 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
505 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
506 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
507
508#ifdef SUPDRV_USE_MUTEX_FOR_GIP
509 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
510#else
511 RTSemFastMutexRequest(pDevExt->mtxGip);
512#endif
513 if (pDevExt->pGip)
514 {
515 /*
516 * Map it?
517 */
518 rc = VINF_SUCCESS;
519 if (ppGipR3)
520 {
521 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
522 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
523 RTMEM_PROT_READ, RTR0ProcHandleSelf());
524 if (RT_SUCCESS(rc))
525 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
526 }
527
528 /*
529 * Get physical address.
530 */
531 if (pHCPhysGip && RT_SUCCESS(rc))
532 HCPhys = pDevExt->HCPhysGip;
533
534 /*
535 * Reference globally.
536 */
537 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
538 {
539 pSession->fGipReferenced = 1;
540 pDevExt->cGipUsers++;
541 if (pDevExt->cGipUsers == 1)
542 {
543 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
544 uint64_t u64NanoTS;
545
546 /*
547 * GIP starts/resumes updating again. On windows we bump the
548 * host timer frequency to make sure we don't get stuck in guest
549 * mode and to get better timer (and possibly clock) accuracy.
550 */
551 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
552
553 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
554
555 /*
556 * document me
557 */
558 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
559 {
560 unsigned i;
561 for (i = 0; i < pGipR0->cCpus; i++)
562 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
563 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
564 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
565 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
566 }
567
568 /*
569 * document me
570 */
571 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
572 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
573 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
574 || RTMpGetOnlineCount() == 1)
575 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
576 else
577 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
578
579 /*
580 * Detect alternative ways to figure the CPU ID in ring-3 and
581 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
582 * and CPU set indexes while we're at it.
583 */
584 if (RT_SUCCESS(rc))
585 {
586 SUPDRVGIPDETECTGETCPU DetectState;
587 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
588 DetectState.fSupported = UINT32_MAX;
589 DetectState.idCpuProblem = NIL_RTCPUID;
590 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
591 if (DetectState.idCpuProblem == NIL_RTCPUID)
592 {
593 if ( DetectState.fSupported != UINT32_MAX
594 && DetectState.fSupported != 0)
595 {
596 if (pGipR0->fGetGipCpu != DetectState.fSupported)
597 {
598 pGipR0->fGetGipCpu = DetectState.fSupported;
599 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
600 }
601 }
602 else
603 {
604 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
605 DetectState.fSupported));
606 rc = VERR_UNSUPPORTED_CPU;
607 }
608 }
609 else
610 {
611 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
612 DetectState.idCpuProblem, DetectState.idCpuProblem));
613 rc = VERR_INVALID_CPU_ID;
614 }
615 }
616
617 /*
618 * Start the GIP timer if all is well..
619 */
620 if (RT_SUCCESS(rc))
621 {
622#ifndef DO_NOT_START_GIP
623 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
624#endif
625 rc = VINF_SUCCESS;
626 }
627
628 /*
629 * Bail out on error.
630 */
631 if (RT_FAILURE(rc))
632 {
633 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
634 pDevExt->cGipUsers = 0;
635 pSession->fGipReferenced = 0;
636 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
637 {
638 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
639 if (RT_SUCCESS(rc2))
640 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
641 }
642 HCPhys = NIL_RTHCPHYS;
643 pGipR3 = NIL_RTR3PTR;
644 }
645 }
646 }
647 }
648 else
649 {
650 rc = VERR_GENERAL_FAILURE;
651 Log(("SUPR0GipMap: GIP is not available!\n"));
652 }
653#ifdef SUPDRV_USE_MUTEX_FOR_GIP
654 RTSemMutexRelease(pDevExt->mtxGip);
655#else
656 RTSemFastMutexRelease(pDevExt->mtxGip);
657#endif
658
659 /*
660 * Write returns.
661 */
662 if (pHCPhysGip)
663 *pHCPhysGip = HCPhys;
664 if (ppGipR3)
665 *ppGipR3 = pGipR3;
666
667#ifdef DEBUG_DARWIN_GIP
668 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
669#else
670 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
671#endif
672 return rc;
673}
674
675
676/**
677 * Unmaps any user mapping of the GIP and terminates all GIP access
678 * from this session.
679 *
680 * @returns IPRT status code.
681 * @param pSession Session to which the GIP mapping should belong.
682 */
683SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
684{
685 int rc = VINF_SUCCESS;
686 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
687#ifdef DEBUG_DARWIN_GIP
688 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
689 pSession,
690 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
691 pSession->GipMapObjR3));
692#else
693 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
694#endif
695 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
696
697#ifdef SUPDRV_USE_MUTEX_FOR_GIP
698 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
699#else
700 RTSemFastMutexRequest(pDevExt->mtxGip);
701#endif
702
703 /*
704 * Unmap anything?
705 */
706 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
707 {
708 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
709 AssertRC(rc);
710 if (RT_SUCCESS(rc))
711 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
712 }
713
714 /*
715 * Dereference global GIP.
716 */
717 if (pSession->fGipReferenced && !rc)
718 {
719 pSession->fGipReferenced = 0;
720 if ( pDevExt->cGipUsers > 0
721 && !--pDevExt->cGipUsers)
722 {
723 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
724#ifndef DO_NOT_START_GIP
725 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
726#endif
727 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
728 }
729 }
730
731#ifdef SUPDRV_USE_MUTEX_FOR_GIP
732 RTSemMutexRelease(pDevExt->mtxGip);
733#else
734 RTSemFastMutexRelease(pDevExt->mtxGip);
735#endif
736
737 return rc;
738}
739
740
741/**
742 * Gets the GIP pointer.
743 *
744 * @returns Pointer to the GIP or NULL.
745 */
746SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
747{
748 return g_pSUPGlobalInfoPage;
749}
750
751
752
753
754
755/*
756 *
757 *
758 * GIP Initialization, Termination and CPU Offline / Online Related Code.
759 * GIP Initialization, Termination and CPU Offline / Online Related Code.
760 * GIP Initialization, Termination and CPU Offline / Online Related Code.
761 *
762 *
763 */
764
765
766/**
767 * Timer callback function for TSC frequency refinement in invariant GIP mode.
768 *
769 * @param pTimer The timer.
770 * @param pvUser Opaque pointer to the device instance data.
771 * @param iTick The timer tick.
772 */
773static DECLCALLBACK(void) supdrvInitAsyncRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
774{
775 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
776 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
777 bool fDeltaApplied = false;
778 uint8_t idApic;
779 uint64_t u64DeltaNanoTS;
780 uint64_t u64DeltaTsc;
781 uint64_t u64NanoTS;
782 uint64_t u64Tsc;
783 RTCCUINTREG uFlags;
784
785 /* Paranoia. */
786 Assert(pGip);
787 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
788
789#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
790 u64NanoTS = RTTimeSystemNanoTS();
791 while (RTTimeSystemNanoTS() == u64NanoTS)
792 ASMNopPause();
793#endif
794 uFlags = ASMIntDisableFlags();
795 idApic = ASMGetApicId();
796 u64Tsc = ASMReadTSC();
797 u64NanoTS = RTTimeSystemNanoTS();
798 ASMSetFlags(uFlags);
799 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
800 supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
801 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
802 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
803
804 if (RT_UNLIKELY( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO
805 && !fDeltaApplied))
806 {
807 Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
808 GIP_TSC_REFINE_INTERVAL));
809 return;
810 }
811
812 /* Calculate the TSC frequency. */
813 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
814 && u64DeltaNanoTS < UINT32_MAX)
815 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
816 else
817 {
818 RTUINT128U CpuHz, Tmp, Divisor;
819 CpuHz.s.Lo = CpuHz.s.Hi = 0;
820 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
821 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
822 pGip->u64CpuHz = CpuHz.s.Lo;
823 }
824
825 /* Update rest of GIP. */
826 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
827 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
828}
829
830
831/**
832 * Starts the TSC-frequency refinement phase asynchronously.
833 *
834 * @param pDevExt Pointer to the device instance data.
835 */
836static void supdrvGipInitAsyncRefineTscFreq(PSUPDRVDEVEXT pDevExt)
837{
838 uint64_t u64NanoTS;
839 RTCCUINTREG uFlags;
840 uint8_t idApic;
841 int rc;
842 PSUPGLOBALINFOPAGE pGip;
843
844 /* Validate. */
845 Assert(pDevExt);
846 Assert(pDevExt->pGip);
847 pGip = pDevExt->pGip;
848
849#ifdef SUPDRV_USE_TSC_DELTA_THREAD
850 /*
851 * If the TSC-delta thread is created, wait until it's done calculating
852 * the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
853 */
854 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
855 && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE)
856 {
857 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
858 if (rc == VERR_TIMEOUT)
859 {
860 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
861 return;
862 }
863 }
864#endif
865
866 /*
867 * Record the TSC and NanoTS as the starting anchor point for refinement of the
868 * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
869 * reading of the TSC and the NanoTS as close as possible.
870 */
871 u64NanoTS = RTTimeSystemNanoTS();
872 while (RTTimeSystemNanoTS() == u64NanoTS)
873 ASMNopPause();
874 uFlags = ASMIntDisableFlags();
875 idApic = ASMGetApicId();
876 pDevExt->u64TscAnchor = ASMReadTSC();
877 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
878 ASMSetFlags(uFlags);
879 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
880 supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */);
881
882 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY,
883 supdrvInitAsyncRefineTscTimer, pDevExt);
884 if (RT_SUCCESS(rc))
885 {
886 /*
887 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
888 * interval as small as possible while gaining the most consistent and accurate frequency
889 * (compared to what the host OS might have measured).
890 *
891 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
892 * same TSC frequency whenever possible so we need to keep the interval short.
893 */
894 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
895 AssertRC(rc);
896 }
897 else
898 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
899}
900
901
902/**
903 * Measures the TSC frequency of the system.
904 *
905 * Uses a busy-wait method for the async. case as it is intended to help push
906 * the CPU frequency up, while for the invariant cases using a sleeping method.
907 *
908 * The TSC frequency can vary on systems which are not reported as invariant.
909 * On such systems the object of this function is to find out what the nominal,
910 * maximum TSC frequency under 'normal' CPU operation.
911 *
912 * @returns VBox status code.
913 * @param pDevExt Pointer to the device instance.
914 *
915 * @remarks Must be called only -after- measuring the TSC deltas.
916 */
917static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
918{
919 int cTriesLeft = 4;
920 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
921
922 /* Assert order. */
923 AssertReturn(pGip, VERR_INVALID_PARAMETER);
924 AssertReturn(pGip->u32Magic == SUPGLOBALINFOPAGE_MAGIC, VERR_WRONG_ORDER);
925
926 while (cTriesLeft-- > 0)
927 {
928 RTCCUINTREG uFlags;
929 uint64_t u64NanoTsBefore;
930 uint64_t u64NanoTsAfter;
931 uint64_t u64TscBefore;
932 uint64_t u64TscAfter;
933 uint8_t idApicBefore;
934 uint8_t idApicAfter;
935
936 /*
937 * Synchronize with the host OS clock tick before reading the TSC.
938 * Especially important on older Windows version where the granularity is terrible.
939 */
940 u64NanoTsBefore = RTTimeSystemNanoTS();
941 while (RTTimeSystemNanoTS() == u64NanoTsBefore)
942 ASMNopPause();
943
944 uFlags = ASMIntDisableFlags();
945 idApicBefore = ASMGetApicId();
946 u64TscBefore = ASMReadTSC();
947 u64NanoTsBefore = RTTimeSystemNanoTS();
948 ASMSetFlags(uFlags);
949
950 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
951 {
952 /*
953 * Sleep-wait since the TSC frequency is constant, it eases host load.
954 * Shorter interval produces more variance in the frequency (esp. Windows).
955 */
956 RTThreadSleep(200);
957 u64NanoTsAfter = RTTimeSystemNanoTS();
958 while (RTTimeSystemNanoTS() == u64NanoTsAfter)
959 ASMNopPause();
960 u64NanoTsAfter = RTTimeSystemNanoTS();
961 }
962 else
963 {
964 /* Busy-wait keeping the frequency up and measure. */
965 for (;;)
966 {
967 u64NanoTsAfter = RTTimeSystemNanoTS();
968 if (u64NanoTsAfter < RT_NS_100MS + u64NanoTsBefore)
969 ASMNopPause();
970 else
971 break;
972 }
973 }
974
975 uFlags = ASMIntDisableFlags();
976 idApicAfter = ASMGetApicId();
977 u64TscAfter = ASMReadTSC();
978 ASMSetFlags(uFlags);
979
980 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
981 {
982 int rc;
983 bool fAppliedBefore;
984 bool fAppliedAfter;
985 rc = supdrvTscDeltaApply(pGip, &u64TscBefore, idApicBefore, &fAppliedBefore); AssertRCReturn(rc, rc);
986 rc = supdrvTscDeltaApply(pGip, &u64TscAfter, idApicAfter, &fAppliedAfter); AssertRCReturn(rc, rc);
987
988 if ( !fAppliedBefore
989 || !fAppliedAfter)
990 {
991#ifdef SUPDRV_USE_TSC_DELTA_THREAD
992 /*
993 * The TSC-delta measurements are kicked-off asynchronously as each host CPU is initialized.
994 * Therefore, if we failed to have a delta for the CPU(s) we were scheduled on (idApicBefore
995 * and idApicAfter) then wait until we have TSC-delta measurements for all online CPUs and
996 * proceed. This should be triggered just once if we're rather unlucky.
997 */
998 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
999 if (rc == VERR_TIMEOUT)
1000 {
1001 SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
1002 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1003 }
1004#else
1005 SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
1006 idApicBefore, idApicAfter, cTriesLeft);
1007#endif
1008 continue;
1009 }
1010 }
1011
1012 /*
1013 * Update GIP.
1014 */
1015 pGip->u64CpuHz = ((u64TscAfter - u64TscBefore) * RT_NS_1SEC_64) / (u64NanoTsAfter - u64NanoTsBefore);
1016 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1017 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
1018 return VINF_SUCCESS;
1019 }
1020
1021 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1022}
1023
1024
1025/**
1026 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1027 *
1028 * @returns Index of the CPU in the cache set.
1029 * @param pGip The GIP.
1030 * @param idCpu The CPU ID.
1031 */
1032static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1033{
1034 uint32_t i, cTries;
1035
1036 /*
1037 * ASSUMES that CPU IDs are constant.
1038 */
1039 for (i = 0; i < pGip->cCpus; i++)
1040 if (pGip->aCPUs[i].idCpu == idCpu)
1041 return i;
1042
1043 cTries = 0;
1044 do
1045 {
1046 for (i = 0; i < pGip->cCpus; i++)
1047 {
1048 bool fRc;
1049 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1050 if (fRc)
1051 return i;
1052 }
1053 } while (cTries++ < 32);
1054 AssertReleaseFailed();
1055 return i - 1;
1056}
1057
1058
1059/**
1060 * The calling CPU should be accounted as online, update GIP accordingly.
1061 *
1062 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1063 *
1064 * @param pDevExt The device extension.
1065 * @param idCpu The CPU ID.
1066 */
1067static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1068{
1069 int iCpuSet = 0;
1070 uint16_t idApic = UINT16_MAX;
1071 uint32_t i = 0;
1072 uint64_t u64NanoTS = 0;
1073 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1074
1075 AssertPtrReturnVoid(pGip);
1076 AssertRelease(idCpu == RTMpCpuId());
1077 Assert(pGip->cPossibleCpus == RTMpGetCount());
1078
1079 /*
1080 * Do this behind a spinlock with interrupts disabled as this can fire
1081 * on all CPUs simultaneously, see @bugref{6110}.
1082 */
1083 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1084
1085 /*
1086 * Update the globals.
1087 */
1088 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1089 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1090 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1091 if (iCpuSet >= 0)
1092 {
1093 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1094 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1095 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1096 }
1097
1098 /*
1099 * Update the entry.
1100 */
1101 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1102 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1103 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
1104 idApic = ASMGetApicId();
1105 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1106 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1107 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1108
1109 /*
1110 * Update the APIC ID and CPU set index mappings.
1111 */
1112 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1113 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1114
1115 /* Update the Mp online/offline counter. */
1116 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1117
1118 /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */
1119 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1120 {
1121 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
1122#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1123 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
1124 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
1125 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
1126 {
1127 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
1128 }
1129 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
1130#endif
1131 }
1132
1133 /* commit it */
1134 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1135
1136 RTSpinlockRelease(pDevExt->hGipSpinlock);
1137}
1138
1139
1140/**
1141 * The CPU should be accounted as offline, update the GIP accordingly.
1142 *
1143 * This is used by supdrvGipMpEvent.
1144 *
1145 * @param pDevExt The device extension.
1146 * @param idCpu The CPU ID.
1147 */
1148static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1149{
1150 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1151 int iCpuSet;
1152 unsigned i;
1153
1154 AssertPtrReturnVoid(pGip);
1155 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1156
1157 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1158 AssertReturnVoid(iCpuSet >= 0);
1159
1160 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1161 AssertReturnVoid(i < pGip->cCpus);
1162 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1163
1164 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1165 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1166
1167 /* Update the Mp online/offline counter. */
1168 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1169
1170 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
1171 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
1172 {
1173 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
1174 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
1175 }
1176
1177 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1178 {
1179 /* Reset the TSC delta, we will recalculate it lazily. */
1180 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1181 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1182 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1183 }
1184
1185 /* commit it */
1186 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1187
1188 RTSpinlockRelease(pDevExt->hGipSpinlock);
1189}
1190
1191
1192/**
1193 * Multiprocessor event notification callback.
1194 *
1195 * This is used to make sure that the GIP master gets passed on to
1196 * another CPU. It also updates the associated CPU data.
1197 *
1198 * @param enmEvent The event.
1199 * @param idCpu The cpu it applies to.
1200 * @param pvUser Pointer to the device extension.
1201 *
1202 * @remarks This function -must- fire on the newly online'd CPU for the
1203 * RTMPEVENT_ONLINE case and can fire on any CPU for the
1204 * RTMPEVENT_OFFLINE case.
1205 */
1206static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1207{
1208 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1209 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1210
1211 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1212
1213 /*
1214 * Update the GIP CPU data.
1215 */
1216 if (pGip)
1217 {
1218 switch (enmEvent)
1219 {
1220 case RTMPEVENT_ONLINE:
1221 AssertRelease(idCpu == RTMpCpuId());
1222 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1223 break;
1224 case RTMPEVENT_OFFLINE:
1225 supdrvGipMpEventOffline(pDevExt, idCpu);
1226 break;
1227 }
1228 }
1229
1230 /*
1231 * Make sure there is a master GIP.
1232 */
1233 if (enmEvent == RTMPEVENT_OFFLINE)
1234 {
1235 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1236 if (idGipMaster == idCpu)
1237 {
1238 /*
1239 * The GIP master is going offline, find a new one.
1240 */
1241 bool fIgnored;
1242 unsigned i;
1243 RTCPUID idNewGipMaster = NIL_RTCPUID;
1244 RTCPUSET OnlineCpus;
1245 RTMpGetOnlineSet(&OnlineCpus);
1246
1247 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1248 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1249 {
1250 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1251 if (idCurCpu != idGipMaster)
1252 {
1253 idNewGipMaster = idCurCpu;
1254 break;
1255 }
1256 }
1257
1258 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1259 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1260 NOREF(fIgnored);
1261 }
1262 }
1263}
1264
1265
1266/**
1267 * On CPU initialization callback for RTMpOnAll.
1268 *
1269 * @param idCpu The CPU ID.
1270 * @param pvUser1 The device extension.
1271 * @param pvUser2 The GIP.
1272 */
1273static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1274{
1275 /* This is good enough, even though it will update some of the globals a
1276 bit to much. */
1277 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1278}
1279
1280
1281/**
1282 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1283 *
1284 * @param idCpu Ignored.
1285 * @param pvUser1 Where to put the TSC.
1286 * @param pvUser2 Ignored.
1287 */
1288static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1289{
1290 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1291}
1292
1293
1294/**
1295 * Determine if Async GIP mode is required because of TSC drift.
1296 *
1297 * When using the default/normal timer code it is essential that the time stamp counter
1298 * (TSC) runs never backwards, that is, a read operation to the counter should return
1299 * a bigger value than any previous read operation. This is guaranteed by the latest
1300 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1301 * case we have to choose the asynchronous timer mode.
1302 *
1303 * @param poffMin Pointer to the determined difference between different
1304 * cores (optional, can be NULL).
1305 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1306 */
1307static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1308{
1309 /*
1310 * Just iterate all the cpus 8 times and make sure that the TSC is
1311 * ever increasing. We don't bother taking TSC rollover into account.
1312 */
1313 int iEndCpu = RTMpGetArraySize();
1314 int iCpu;
1315 int cLoops = 8;
1316 bool fAsync = false;
1317 int rc = VINF_SUCCESS;
1318 uint64_t offMax = 0;
1319 uint64_t offMin = ~(uint64_t)0;
1320 uint64_t PrevTsc = ASMReadTSC();
1321
1322 while (cLoops-- > 0)
1323 {
1324 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1325 {
1326 uint64_t CurTsc;
1327 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker, &CurTsc, NULL);
1328 if (RT_SUCCESS(rc))
1329 {
1330 if (CurTsc <= PrevTsc)
1331 {
1332 fAsync = true;
1333 offMin = offMax = PrevTsc - CurTsc;
1334 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1335 iCpu, cLoops, CurTsc, PrevTsc));
1336 break;
1337 }
1338
1339 /* Gather statistics (except the first time). */
1340 if (iCpu != 0 || cLoops != 7)
1341 {
1342 uint64_t off = CurTsc - PrevTsc;
1343 if (off < offMin)
1344 offMin = off;
1345 if (off > offMax)
1346 offMax = off;
1347 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1348 }
1349
1350 /* Next */
1351 PrevTsc = CurTsc;
1352 }
1353 else if (rc == VERR_NOT_SUPPORTED)
1354 break;
1355 else
1356 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1357 }
1358
1359 /* broke out of the loop. */
1360 if (iCpu < iEndCpu)
1361 break;
1362 }
1363
1364 if (poffMin)
1365 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1366 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1367 fAsync, iEndCpu, rc, offMin, offMax));
1368#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1369 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1370#endif
1371 return fAsync;
1372}
1373
1374
1375/**
1376 * supdrvGipInit() worker that determines the GIP TSC mode.
1377 *
1378 * @returns The most suitable TSC mode.
1379 * @param pDevExt Pointer to the device instance data.
1380 */
1381static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1382{
1383 uint64_t u64DiffCoresIgnored;
1384 uint32_t uEAX, uEBX, uECX, uEDX;
1385
1386 /*
1387 * Establish whether the CPU advertises TSC as invariant, we need that in
1388 * a couple of places below.
1389 */
1390 bool fInvariantTsc = false;
1391 if (ASMHasCpuId())
1392 {
1393 uEAX = ASMCpuId_EAX(0x80000000);
1394 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1395 {
1396 uEDX = ASMCpuId_EDX(0x80000007);
1397 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1398 fInvariantTsc = true;
1399 }
1400 }
1401
1402 /*
1403 * On single CPU systems, we don't need to consider ASYNC mode.
1404 */
1405 if (RTMpGetCount() <= 1)
1406 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1407
1408 /*
1409 * Allow the user and/or OS specific bits to force async mode.
1410 */
1411 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1412 return SUPGIPMODE_ASYNC_TSC;
1413
1414
1415 /*
1416 * Use invariant mode if the CPU says TSC is invariant.
1417 */
1418 if (fInvariantTsc)
1419 return SUPGIPMODE_INVARIANT_TSC;
1420
1421 /*
1422 * TSC is not invariant and we're on SMP, this presents two problems:
1423 *
1424 * (1) There might be a skew between the CPU, so that cpu0
1425 * returns a TSC that is slightly different from cpu1.
1426 * This screw may be due to (2), bad TSC initialization
1427 * or slightly different TSC rates.
1428 *
1429 * (2) Power management (and other things) may cause the TSC
1430 * to run at a non-constant speed, and cause the speed
1431 * to be different on the cpus. This will result in (1).
1432 *
1433 * If any of the above is detected, we will have to use ASYNC mode.
1434 */
1435 /* (1). Try check for current differences between the cpus. */
1436 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1437 return SUPGIPMODE_ASYNC_TSC;
1438
1439 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1440 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1441 if ( ASMIsValidStdRange(uEAX)
1442 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
1443 {
1444 /* Check for APM support. */
1445 uEAX = ASMCpuId_EAX(0x80000000);
1446 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1447 {
1448 uEDX = ASMCpuId_EDX(0x80000007);
1449 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1450 return SUPGIPMODE_ASYNC_TSC;
1451 }
1452 }
1453
1454 return SUPGIPMODE_SYNC_TSC;
1455}
1456
1457
1458/**
1459 * Initializes per-CPU GIP information.
1460 *
1461 * @param pDevExt Pointer to the device instance data.
1462 * @param pGip Pointer to the GIP.
1463 * @param pCpu Pointer to which GIP CPU to initalize.
1464 * @param u64NanoTS The current nanosecond timestamp.
1465 */
1466static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
1467{
1468 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
1469 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
1470 pCpu->u32TransactionId = 2;
1471 pCpu->u64NanoTS = u64NanoTS;
1472 pCpu->u64TSC = ASMReadTSC();
1473 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1474 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1475
1476 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1477 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
1478 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1479 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1480
1481 /*
1482 * We don't know the following values until we've executed updates.
1483 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
1484 * the 2nd timer callout.
1485 */
1486 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */
1487 pCpu->u32UpdateIntervalTSC
1488 = pCpu->au32TSCHistory[0]
1489 = pCpu->au32TSCHistory[1]
1490 = pCpu->au32TSCHistory[2]
1491 = pCpu->au32TSCHistory[3]
1492 = pCpu->au32TSCHistory[4]
1493 = pCpu->au32TSCHistory[5]
1494 = pCpu->au32TSCHistory[6]
1495 = pCpu->au32TSCHistory[7]
1496 = (uint32_t)(_4G / pGip->u32UpdateHz);
1497}
1498
1499
1500/**
1501 * Initializes the GIP data.
1502 *
1503 * @param pDevExt Pointer to the device instance data.
1504 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1505 * @param HCPhys The physical address of the GIP.
1506 * @param u64NanoTS The current nanosecond timestamp.
1507 * @param uUpdateHz The update frequency.
1508 * @param uUpdateIntervalNS The update interval in nanoseconds.
1509 * @param cCpus The CPU count.
1510 */
1511static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1512 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
1513{
1514 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
1515 unsigned i;
1516#ifdef DEBUG_DARWIN_GIP
1517 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1518#else
1519 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1520#endif
1521
1522 /*
1523 * Initialize the structure.
1524 */
1525 memset(pGip, 0, cbGip);
1526
1527 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1528 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1529 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1530 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1531 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1532 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1533 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1534 else
1535 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1536 pGip->cCpus = (uint16_t)cCpus;
1537 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1538 pGip->u32UpdateHz = uUpdateHz;
1539 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1540 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1541 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1542 RTCpuSetEmpty(&pGip->PresentCpuSet);
1543 RTMpGetSet(&pGip->PossibleCpuSet);
1544 pGip->cOnlineCpus = RTMpGetOnlineCount();
1545 pGip->cPresentCpus = RTMpGetPresentCount();
1546 pGip->cPossibleCpus = RTMpGetCount();
1547 pGip->idCpuMax = RTMpGetMaxCpuId();
1548 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
1549 pGip->aiCpuFromApicId[i] = UINT16_MAX;
1550 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
1551 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
1552 for (i = 0; i < cCpus; i++)
1553 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
1554
1555 /*
1556 * Link it to the device extension.
1557 */
1558 pDevExt->pGip = pGip;
1559 pDevExt->HCPhysGip = HCPhys;
1560 pDevExt->cGipUsers = 0;
1561}
1562
1563
1564/**
1565 * Creates the GIP.
1566 *
1567 * @returns VBox status code.
1568 * @param pDevExt Instance data. GIP stuff may be updated.
1569 */
1570int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
1571{
1572 PSUPGLOBALINFOPAGE pGip;
1573 RTHCPHYS HCPhysGip;
1574 uint32_t u32SystemResolution;
1575 uint32_t u32Interval;
1576 uint32_t u32MinInterval;
1577 uint32_t uMod;
1578 unsigned cCpus;
1579 int rc;
1580
1581 LogFlow(("supdrvGipCreate:\n"));
1582
1583 /* Assert order. */
1584 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
1585 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
1586 Assert(!pDevExt->pGipTimer);
1587
1588 /*
1589 * Check the CPU count.
1590 */
1591 cCpus = RTMpGetArraySize();
1592 if ( cCpus > RTCPUSET_MAX_CPUS
1593 || cCpus > 256 /* ApicId is used for the mappings */)
1594 {
1595 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
1596 return VERR_TOO_MANY_CPUS;
1597 }
1598
1599 /*
1600 * Allocate a contiguous set of pages with a default kernel mapping.
1601 */
1602 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
1603 if (RT_FAILURE(rc))
1604 {
1605 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
1606 return rc;
1607 }
1608 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
1609 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
1610
1611 /*
1612 * Allocate the TSC-delta sync struct on a separate cache line.
1613 */
1614 pDevExt->pvTscDeltaSync = RTMemAllocZ(sizeof(SUPTSCDELTASYNC) + 63);
1615 pDevExt->pTscDeltaSync = RT_ALIGN_PT(pDevExt->pvTscDeltaSync, 64, PSUPTSCDELTASYNC);
1616 Assert(RT_ALIGN_PT(pDevExt->pTscDeltaSync, 64, PSUPTSCDELTASYNC) == pDevExt->pTscDeltaSync);
1617
1618 /*
1619 * Find a reasonable update interval and initialize the structure.
1620 */
1621 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
1622 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
1623 * See @bugref{6710}. */
1624 u32MinInterval = RT_NS_10MS;
1625 u32SystemResolution = RTTimerGetSystemGranularity();
1626 u32Interval = u32MinInterval;
1627 uMod = u32MinInterval % u32SystemResolution;
1628 if (uMod)
1629 u32Interval += u32SystemResolution - uMod;
1630
1631 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
1632
1633 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
1634 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
1635 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
1636 {
1637 /* Basically, invariant Windows boxes, should never be detected as async (i.e. TSC-deltas should be 0). */
1638 OSDBGPRINT(("supdrvGipCreate: The TSC-deltas should be normalized by the host OS, but verifying shows it's not!\n"));
1639 return VERR_INTERNAL_ERROR_2;
1640 }
1641
1642 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
1643 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
1644#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1645 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1646 {
1647 /* Initialize TSC-delta measurement thread before executing any Mp event callbacks. */
1648 rc = supdrvTscDeltaThreadInit(pDevExt);
1649 }
1650#endif
1651 if (RT_SUCCESS(rc))
1652 {
1653 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
1654 if (RT_SUCCESS(rc))
1655 {
1656 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
1657 if (RT_SUCCESS(rc))
1658 {
1659#ifndef SUPDRV_USE_TSC_DELTA_THREAD
1660 uint16_t iCpu;
1661 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1662 {
1663 /*
1664 * Measure the TSC deltas now that we have MP notifications.
1665 */
1666 int cTries = 5;
1667 do
1668 {
1669 rc = supdrvMeasureInitialTscDeltas(pDevExt);
1670 if ( rc != VERR_TRY_AGAIN
1671 && rc != VERR_CPU_OFFLINE)
1672 break;
1673 } while (--cTries > 0);
1674 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1675 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
1676 }
1677 else
1678 {
1679 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1680 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
1681 }
1682#endif
1683 if (RT_SUCCESS(rc))
1684 {
1685 rc = supdrvGipInitMeasureTscFreq(pDevExt);
1686 if (RT_SUCCESS(rc))
1687 {
1688 /*
1689 * Create the timer.
1690 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
1691 */
1692 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1693 {
1694 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
1695 supdrvGipAsyncTimer, pDevExt);
1696 if (rc == VERR_NOT_SUPPORTED)
1697 {
1698 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
1699 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
1700 }
1701 }
1702 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1703 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
1704 supdrvGipSyncAndInvariantTimer, pDevExt);
1705 if (RT_SUCCESS(rc))
1706 {
1707 /*
1708 * We're good.
1709 */
1710 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
1711 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1712
1713 g_pSUPGlobalInfoPage = pGip;
1714 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1715 supdrvGipInitAsyncRefineTscFreq(pDevExt);
1716 return VINF_SUCCESS;
1717 }
1718
1719 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
1720 Assert(!pDevExt->pGipTimer);
1721 }
1722 else
1723 OSDBGPRINT(("supdrvGipCreate: supdrvGipInitMeasureTscFreq failed. rc=%Rrc\n", rc));
1724 }
1725 else
1726 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
1727 }
1728 else
1729 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
1730 }
1731 else
1732 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
1733 }
1734 else
1735 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
1736
1737 supdrvGipDestroy(pDevExt); /* Releases timer frequency increase too. */
1738 return rc;
1739}
1740
1741
1742/**
1743 * Invalidates the GIP data upon termination.
1744 *
1745 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1746 */
1747static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
1748{
1749 unsigned i;
1750 pGip->u32Magic = 0;
1751 for (i = 0; i < pGip->cCpus; i++)
1752 {
1753 pGip->aCPUs[i].u64NanoTS = 0;
1754 pGip->aCPUs[i].u64TSC = 0;
1755 pGip->aCPUs[i].iTSCHistoryHead = 0;
1756 pGip->aCPUs[i].u64TSCSample = 0;
1757 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
1758 }
1759}
1760
1761
1762/**
1763 * Terminates the GIP.
1764 *
1765 * @param pDevExt Instance data. GIP stuff may be updated.
1766 */
1767void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
1768{
1769 int rc;
1770#ifdef DEBUG_DARWIN_GIP
1771 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
1772 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
1773 pDevExt->pGipTimer, pDevExt->GipMemObj));
1774#endif
1775
1776 /*
1777 * Stop receiving MP notifications before tearing anything else down.
1778 */
1779 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
1780
1781#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1782 /*
1783 * Terminate the TSC-delta measurement thread and resources.
1784 */
1785 supdrvTscDeltaTerm(pDevExt);
1786#endif
1787
1788 /*
1789 * Destroy the TSC-refinement one-shot timer.
1790 */
1791 if (pDevExt->pTscRefineTimer)
1792 {
1793 RTTimerDestroy(pDevExt->pTscRefineTimer);
1794 pDevExt->pTscRefineTimer = NULL;
1795 }
1796
1797 if (pDevExt->pvTscDeltaSync)
1798 {
1799 RTMemFree(pDevExt->pvTscDeltaSync);
1800 pDevExt->pTscDeltaSync = NULL;
1801 pDevExt->pvTscDeltaSync = NULL;
1802 }
1803
1804 /*
1805 * Invalid the GIP data.
1806 */
1807 if (pDevExt->pGip)
1808 {
1809 supdrvGipTerm(pDevExt->pGip);
1810 pDevExt->pGip = NULL;
1811 }
1812 g_pSUPGlobalInfoPage = NULL;
1813
1814 /*
1815 * Destroy the timer and free the GIP memory object.
1816 */
1817 if (pDevExt->pGipTimer)
1818 {
1819 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
1820 pDevExt->pGipTimer = NULL;
1821 }
1822
1823 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
1824 {
1825 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
1826 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
1827 }
1828
1829 /*
1830 * Finally, make sure we've release the system timer resolution request
1831 * if one actually succeeded and is still pending.
1832 */
1833 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1834}
1835
1836
1837
1838
1839/*
1840 *
1841 *
1842 * GIP Update Timer Related Code
1843 * GIP Update Timer Related Code
1844 * GIP Update Timer Related Code
1845 *
1846 *
1847 */
1848
1849
1850/**
1851 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
1852 * updates all the per cpu data except the transaction id.
1853 *
1854 * @param pDevExt The device extension.
1855 * @param pGipCpu Pointer to the per cpu data.
1856 * @param u64NanoTS The current time stamp.
1857 * @param u64TSC The current TSC.
1858 * @param iTick The current timer tick.
1859 *
1860 * @remarks Can be called with interrupts disabled!
1861 */
1862static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
1863{
1864 uint64_t u64TSCDelta;
1865 uint32_t u32UpdateIntervalTSC;
1866 uint32_t u32UpdateIntervalTSCSlack;
1867 unsigned iTSCHistoryHead;
1868 uint64_t u64CpuHz;
1869 uint32_t u32TransactionId;
1870
1871 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1872 AssertPtrReturnVoid(pGip);
1873
1874 /* Delta between this and the previous update. */
1875 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
1876
1877 /*
1878 * Update the NanoTS.
1879 */
1880 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
1881
1882 /*
1883 * Calc TSC delta.
1884 */
1885 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
1886 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
1887
1888 /* We don't need to keep realculating the frequency when it's invariant. */
1889 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1890 return;
1891
1892 if (u64TSCDelta >> 32)
1893 {
1894 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
1895 pGipCpu->cErrors++;
1896 }
1897
1898 /*
1899 * On the 2nd and 3rd callout, reset the history with the current TSC
1900 * interval since the values entered by supdrvGipInit are totally off.
1901 * The interval on the 1st callout completely unreliable, the 2nd is a bit
1902 * better, while the 3rd should be most reliable.
1903 */
1904 u32TransactionId = pGipCpu->u32TransactionId;
1905 if (RT_UNLIKELY( ( u32TransactionId == 5
1906 || u32TransactionId == 7)
1907 && ( iTick == 2
1908 || iTick == 3) ))
1909 {
1910 unsigned i;
1911 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
1912 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
1913 }
1914
1915 /*
1916 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
1917 * Wait until we have at least one full history since the above history reset. The
1918 * assumption is that the majority of the previous history values will be tolerable.
1919 * See @bugref{6710} comment #67.
1920 */
1921 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
1922 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1923 {
1924 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
1925 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
1926 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
1927 {
1928 uint32_t u32;
1929 u32 = pGipCpu->au32TSCHistory[0];
1930 u32 += pGipCpu->au32TSCHistory[1];
1931 u32 += pGipCpu->au32TSCHistory[2];
1932 u32 += pGipCpu->au32TSCHistory[3];
1933 u32 >>= 2;
1934 u64TSCDelta = pGipCpu->au32TSCHistory[4];
1935 u64TSCDelta += pGipCpu->au32TSCHistory[5];
1936 u64TSCDelta += pGipCpu->au32TSCHistory[6];
1937 u64TSCDelta += pGipCpu->au32TSCHistory[7];
1938 u64TSCDelta >>= 2;
1939 u64TSCDelta += u32;
1940 u64TSCDelta >>= 1;
1941 }
1942 }
1943
1944 /*
1945 * TSC History.
1946 */
1947 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
1948 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
1949 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
1950 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
1951
1952 /*
1953 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
1954 *
1955 * On Windows, we have an occasional (but recurring) sour value that messed up
1956 * the history but taking only 1 interval reduces the precision overall.
1957 * However, this problem existed before the invariant mode was introduced.
1958 */
1959 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1960 || pGip->u32UpdateHz >= 1000)
1961 {
1962 uint32_t u32;
1963 u32 = pGipCpu->au32TSCHistory[0];
1964 u32 += pGipCpu->au32TSCHistory[1];
1965 u32 += pGipCpu->au32TSCHistory[2];
1966 u32 += pGipCpu->au32TSCHistory[3];
1967 u32 >>= 2;
1968 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
1969 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
1970 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
1971 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
1972 u32UpdateIntervalTSC >>= 2;
1973 u32UpdateIntervalTSC += u32;
1974 u32UpdateIntervalTSC >>= 1;
1975
1976 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
1977 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
1978 }
1979 else if (pGip->u32UpdateHz >= 90)
1980 {
1981 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
1982 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
1983 u32UpdateIntervalTSC >>= 1;
1984
1985 /* value chosen on a 2GHz thinkpad running windows */
1986 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
1987 }
1988 else
1989 {
1990 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
1991
1992 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
1993 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
1994 }
1995 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
1996
1997 /*
1998 * CpuHz.
1999 */
2000 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2001 u64CpuHz /= pGip->u32UpdateIntervalNS;
2002 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2003}
2004
2005
2006/**
2007 * Updates the GIP.
2008 *
2009 * @param pDevExt The device extension.
2010 * @param u64NanoTS The current nanosecond timesamp.
2011 * @param u64TSC The current TSC timesamp.
2012 * @param idCpu The CPU ID.
2013 * @param iTick The current timer tick.
2014 *
2015 * @remarks Can be called with interrupts disabled!
2016 */
2017static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2018{
2019 /*
2020 * Determine the relevant CPU data.
2021 */
2022 PSUPGIPCPU pGipCpu;
2023 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2024 AssertPtrReturnVoid(pGip);
2025
2026 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2027 pGipCpu = &pGip->aCPUs[0];
2028 else
2029 {
2030 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
2031 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
2032 return;
2033 pGipCpu = &pGip->aCPUs[iCpu];
2034 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
2035 return;
2036 }
2037
2038 /*
2039 * Start update transaction.
2040 */
2041 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2042 {
2043 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2044 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2045 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2046 pGipCpu->cErrors++;
2047 return;
2048 }
2049
2050 /*
2051 * Recalc the update frequency every 0x800th time.
2052 */
2053 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
2054 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2055 {
2056 if (pGip->u64NanoTSLastUpdateHz)
2057 {
2058#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2059 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2060 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2061 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2062 {
2063 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2064 * calculation on non-invariant hosts if it changes the history decision
2065 * taken in supdrvGipDoUpdateCpu(). */
2066 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2067 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2068 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2069 }
2070#endif
2071 }
2072 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2073 }
2074
2075 /*
2076 * Update the data.
2077 */
2078 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2079
2080 /*
2081 * Complete transaction.
2082 */
2083 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2084}
2085
2086
2087/**
2088 * Updates the per cpu GIP data for the calling cpu.
2089 *
2090 * @param pDevExt The device extension.
2091 * @param u64NanoTS The current nanosecond timesamp.
2092 * @param u64TSC The current TSC timesamp.
2093 * @param idCpu The CPU ID.
2094 * @param idApic The APIC id for the CPU index.
2095 * @param iTick The current timer tick.
2096 *
2097 * @remarks Can be called with interrupts disabled!
2098 */
2099static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2100 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2101{
2102 uint32_t iCpu;
2103 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2104
2105 /*
2106 * Avoid a potential race when a CPU online notification doesn't fire on
2107 * the onlined CPU but the tick creeps in before the event notification is
2108 * run.
2109 */
2110 if (RT_UNLIKELY(iTick == 1))
2111 {
2112 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2113 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2114 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2115 }
2116
2117 iCpu = pGip->aiCpuFromApicId[idApic];
2118 if (RT_LIKELY(iCpu < pGip->cCpus))
2119 {
2120 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2121 if (pGipCpu->idCpu == idCpu)
2122 {
2123 /*
2124 * Start update transaction.
2125 */
2126 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2127 {
2128 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2129 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2130 pGipCpu->cErrors++;
2131 return;
2132 }
2133
2134 /*
2135 * Update the data.
2136 */
2137 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2138
2139 /*
2140 * Complete transaction.
2141 */
2142 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2143 }
2144 }
2145}
2146
2147
2148/**
2149 * Timer callback function for the sync and invariant GIP modes.
2150 *
2151 * @param pTimer The timer.
2152 * @param pvUser Opaque pointer to the device extension.
2153 * @param iTick The timer tick.
2154 */
2155static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2156{
2157 RTCCUINTREG uFlags;
2158 uint64_t u64TSC;
2159 uint64_t u64NanoTS;
2160 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2161 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2162
2163 uFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2164 u64TSC = ASMReadTSC();
2165 u64NanoTS = RTTimeSystemNanoTS();
2166
2167 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2168 {
2169 /*
2170 * The calculations in supdrvGipUpdate() is very timing sensitive and doesn't handle
2171 * missed timer ticks. So for now it is better to use a delta of 0 and have the TSC rate
2172 * affected a bit until we get proper TSC deltas than implementing options like
2173 * rescheduling the tick to be delivered on the right CPU or missing the tick entirely.
2174 *
2175 * The likely hood of this happening is really low. On Windows, Linux, and Solaris
2176 * timers fire on the CPU they were registered/started on. Darwin timers doesn't
2177 * necessarily (they are high priority threads waiting).
2178 */
2179 Assert(!ASMIntAreEnabled());
2180 supdrvTscDeltaApply(pGip, &u64TSC, ASMGetApicId(), NULL /* pfDeltaApplied */);
2181 }
2182
2183 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2184
2185 ASMSetFlags(uFlags);
2186
2187#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2188 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
2189 && !RTCpuSetIsEmpty(&pDevExt->TscDeltaCpuSet))
2190 {
2191 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
2192 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
2193 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
2194 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
2195 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
2196 /** @todo Do the actual poking using -- RTThreadUserSignal() */
2197 }
2198#endif
2199}
2200
2201
2202/**
2203 * Timer callback function for async GIP mode.
2204 * @param pTimer The timer.
2205 * @param pvUser Opaque pointer to the device extension.
2206 * @param iTick The timer tick.
2207 */
2208static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2209{
2210 RTCCUINTREG fOldFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2211 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2212 RTCPUID idCpu = RTMpCpuId();
2213 uint64_t u64TSC = ASMReadTSC();
2214 uint64_t NanoTS = RTTimeSystemNanoTS();
2215
2216 /** @todo reset the transaction number and whatnot when iTick == 1. */
2217 if (pDevExt->idGipMaster == idCpu)
2218 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2219 else
2220 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
2221
2222 ASMSetFlags(fOldFlags);
2223}
2224
2225
2226
2227
2228/*
2229 *
2230 *
2231 * TSC Delta Measurements And Related Code
2232 * TSC Delta Measurements And Related Code
2233 * TSC Delta Measurements And Related Code
2234 *
2235 *
2236 */
2237
2238
2239/*
2240 * Select TSC delta measurement algorithm.
2241 */
2242#if 1
2243# define GIP_TSC_DELTA_METHOD_1
2244#else
2245# define GIP_TSC_DELTA_METHOD_2
2246#endif
2247
2248
2249/**
2250 * TSC delta measurment algorithm \#2 result entry.
2251 */
2252typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2253{
2254 uint32_t iSeqMine;
2255 uint32_t iSeqOther;
2256 uint64_t uTsc;
2257} SUPDRVTSCDELTAMETHOD2ENTRY;
2258
2259/**
2260 * TSC delta measurment algorithm \#2 Data.
2261 */
2262typedef struct SUPDRVTSCDELTAMETHOD2
2263{
2264 /** Padding to make sure the iCurSeqNo is in its own cache line.
2265 * ASSUMES cacheline sizes <= 128 bytes. */
2266 uint32_t au32CacheLinePaddingBefore[128 / sizeof(uint32_t)];
2267 /** The current sequence number of this worker. */
2268 uint32_t volatile iCurSeqNo;
2269 /** Padding to make sure the iCurSeqNo is in its own cache line.
2270 * ASSUMES cacheline sizes <= 128 bytes. */
2271 uint32_t au32CacheLinePaddingAfter[128 / sizeof(uint32_t) - 1];
2272 /** Result table. */
2273 SUPDRVTSCDELTAMETHOD2ENTRY aResults[96];
2274} SUPDRVTSCDELTAMETHOD2;
2275/** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
2276typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2277
2278
2279/**
2280 * Argument package/state passed by supdrvMeasureTscDeltaOne to the RTMpOn
2281 * callback worker.
2282 */
2283typedef struct SUPDRVGIPTSCDELTARGS
2284{
2285 PSUPDRVDEVEXT pDevExt;
2286 PSUPGIPCPU pWorker;
2287 PSUPGIPCPU pMaster;
2288 RTCPUID idMaster;
2289
2290#if 0
2291 /** Method 1 data. */
2292 struct
2293 {
2294 } M1;
2295#endif
2296
2297#ifdef GIP_TSC_DELTA_METHOD_2
2298 struct
2299 {
2300 PSUPDRVTSCDELTAMETHOD2 pMasterData;
2301 PSUPDRVTSCDELTAMETHOD2 pWorkerData;
2302 uint32_t cHits;
2303 bool fLagMaster;
2304 bool fLagWorker;
2305 bool volatile fQuitEarly;
2306 } M2;
2307#endif
2308} SUPDRVGIPTSCDELTARGS;
2309typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2310
2311
2312/** @name Macros that implements the basic synchronization steps common to
2313 * the algorithms.
2314 * @{
2315 */
2316#define TSCDELTA_MASTER_SYNC_BEFORE(a_pTscDeltaSync) \
2317 do {\
2318 ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_START); \
2319 \
2320 /* Disable interrupts only in the master for as short a period \
2321 as possible, thanks again to Windows. See @bugref{6710} comment #73. */ \
2322 uFlags = ASMIntDisableFlags(); \
2323 \
2324 while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) == GIP_TSC_DELTA_SYNC_START) \
2325 { /* nothing */ } \
2326 } while (0)
2327#define TSCDELTA_MASTER_SYNC_AFTER(a_pTscDeltaSync) \
2328 do {\
2329 /* Sync up with worker. */ \
2330 ASMSetFlags(uFlags); \
2331 \
2332 while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE) \
2333 { /* nothing */ } \
2334 } while (0)
2335#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pTscDeltaSync) \
2336 do {\
2337 ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_STOP); \
2338 } while (0)
2339
2340#define TSCDELTA_OTHER_SYNC_BEFORE(a_pTscDeltaSync, a_MidSyncExpr) \
2341 do { \
2342 while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) != GIP_TSC_DELTA_SYNC_START) \
2343 { /* nothing */ } \
2344 a_MidSyncExpr; \
2345 ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_WORKER_READY); \
2346 } while (0)
2347#define TSCDELTA_OTHER_SYNC_AFTER(a_pTscDeltaSync) \
2348 do { \
2349 /* Tell master we're done collecting our data. */ \
2350 ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_WORKER_DONE); \
2351 \
2352 /* Wait for the master to process the data. */ \
2353 while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE) \
2354 ASMNopPause(); \
2355 } while (0)
2356/** @} */
2357
2358#ifdef GIP_TSC_DELTA_METHOD_1
2359
2360/**
2361 * TSC delta measurment algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
2362 *
2363 *
2364 * We ignore the first few runs of the loop in order to prime the
2365 * cache. Also, we need to be careful about using 'pause' instruction
2366 * in critical busy-wait loops in this code - it can cause undesired
2367 * behaviour with hyperthreading.
2368 *
2369 * We try to minimize the measurement error by computing the minimum
2370 * read time of the compare statement in the worker by taking TSC
2371 * measurements across it.
2372 *
2373 * It must be noted that the computed minimum read time is mostly to
2374 * eliminate huge deltas when the worker is too early and doesn't by
2375 * itself help produce more accurate deltas. We allow two times the
2376 * computed minimum as an arbibtrary acceptable threshold. Therefore,
2377 * it is still possible to get negative deltas where there are none
2378 * when the worker is earlier. As long as these occasional negative
2379 * deltas are lower than the time it takes to exit guest-context and
2380 * the OS to reschedule EMT on a different CPU we won't expose a TSC
2381 * that jumped backwards. It is because of the existence of the
2382 * negative deltas we don't recompute the delta with the master and
2383 * worker interchanged to eliminate the remaining measurement error.
2384 *
2385 *
2386 * @param pArgs The argument/state data.
2387 * @param pSync The synchronization structure
2388 * (pDevExt->pTscDeltaSync).
2389 * @param fIsMaster Set if master, clear if worker.
2390 * @param iTry The attempt number.
2391 */
2392static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC pSync, bool fIsMaster, uint32_t iTry)
2393{
2394 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
2395 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
2396 uint64_t uMinCmpReadTime = UINT64_MAX;
2397 unsigned iLoop;
2398 NOREF(iTry);
2399
2400 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
2401 {
2402 if (fIsMaster)
2403 {
2404 /*
2405 * The master.
2406 */
2407 RTCCUINTREG uFlags;
2408 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
2409 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
2410 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
2411 TSCDELTA_MASTER_SYNC_BEFORE(pSync);
2412
2413 do
2414 {
2415 ASMSerializeInstruction();
2416 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
2417 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
2418
2419 TSCDELTA_MASTER_SYNC_AFTER(pSync);
2420
2421 /* Process the data. */
2422 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
2423 {
2424 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
2425 {
2426 int64_t iDelta = pGipCpuWorker->u64TSCSample
2427 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
2428 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
2429 ? iDelta < pGipCpuWorker->i64TSCDelta
2430 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
2431 pGipCpuWorker->i64TSCDelta = iDelta;
2432 }
2433 }
2434
2435 /* Reset our TSC sample and tell the worker to move on. */
2436 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
2437 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pSync);
2438 }
2439 else
2440 {
2441 /*
2442 * The worker.
2443 */
2444 uint64_t uTscWorker;
2445 uint64_t uTscWorkerFlushed;
2446 uint64_t uCmpReadTime;
2447
2448 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
2449 TSCDELTA_OTHER_SYNC_BEFORE(pSync, Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD));
2450
2451 /*
2452 * Keep reading the TSC until we notice that the master has read his. Reading
2453 * the TSC -after- the master has updated the memory is way too late. We thus
2454 * compensate by trying to measure how long it took for the worker to notice
2455 * the memory flushed from the master.
2456 */
2457 do
2458 {
2459 ASMSerializeInstruction();
2460 uTscWorker = ASMReadTSC();
2461 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
2462 ASMSerializeInstruction();
2463 uTscWorkerFlushed = ASMReadTSC();
2464
2465 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
2466 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
2467 {
2468 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
2469 if (uCmpReadTime < (uMinCmpReadTime << 1))
2470 {
2471 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
2472 if (uCmpReadTime < uMinCmpReadTime)
2473 uMinCmpReadTime = uCmpReadTime;
2474 }
2475 else
2476 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
2477 }
2478 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
2479 {
2480 if (uCmpReadTime < uMinCmpReadTime)
2481 uMinCmpReadTime = uCmpReadTime;
2482 }
2483
2484 TSCDELTA_OTHER_SYNC_AFTER(pSync);
2485 }
2486 }
2487
2488 /*
2489 * We must reset the worker TSC sample value in case it gets picked as a
2490 * GIP master later on (it's trashed above, naturally).
2491 */
2492 if (!fIsMaster)
2493 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
2494}
2495
2496
2497/**
2498 * Initializes the argument/state data belonging to algorithm \#1.
2499 *
2500 * @returns VBox status code.
2501 * @param pArgs The argument/state data.
2502 */
2503static int supdrvTscDeltaMethod1Init(PSUPDRVGIPTSCDELTARGS pArgs)
2504{
2505 NOREF(pArgs);
2506 return VINF_SUCCESS;
2507}
2508
2509
2510/**
2511 * Undoes what supdrvTscDeltaMethod1Init() did.
2512 *
2513 * @param pArgs The argument/state data.
2514 */
2515static void supdrvTscDeltaMethod1Delete(PSUPDRVGIPTSCDELTARGS pArgs)
2516{
2517 NOREF(pArgs);
2518}
2519
2520#endif /* GIP_TSC_DELTA_METHOD_1 */
2521
2522
2523#ifdef GIP_TSC_DELTA_METHOD_2
2524/*
2525 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
2526 */
2527
2528# define GIP_TSC_DELTA_M2_LOOPS (12 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
2529# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 1
2530
2531
2532static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, uint32_t iLoop)
2533{
2534 PSUPDRVTSCDELTAMETHOD2 pMasterData = pArgs->M2.pMasterData;
2535 PSUPDRVTSCDELTAMETHOD2 pOtherData = pArgs->M2.pWorkerData;
2536 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
2537 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
2538 uint32_t idxResult;
2539 uint32_t cHits = 0;
2540
2541 /*
2542 * Look for matching entries in the master and worker tables.
2543 */
2544 for (idxResult = 0; idxResult < RT_ELEMENTS(pMasterData->aResults); idxResult++)
2545 {
2546 uint32_t idxOther = pMasterData->aResults[idxResult].iSeqOther;
2547 if (idxOther & 1)
2548 {
2549 idxOther >>= 1;
2550 if (idxOther < RT_ELEMENTS(pOtherData->aResults))
2551 {
2552 if (pOtherData->aResults[idxOther].iSeqOther == pMasterData->aResults[idxResult].iSeqMine)
2553 {
2554 int64_t iDelta;
2555 iDelta = pOtherData->aResults[idxOther].uTsc
2556 - (pMasterData->aResults[idxResult].uTsc - iMasterTscDelta);
2557 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
2558 ? iDelta < iBestDelta
2559 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
2560 iBestDelta = iDelta;
2561 cHits++;
2562 }
2563 }
2564 }
2565 }
2566
2567 /*
2568 * Save the results.
2569 */
2570 if (cHits > 2)
2571 pArgs->pWorker->i64TSCDelta = iBestDelta;
2572 pArgs->M2.cHits += cHits;
2573
2574 /*
2575 * Check and see if we can quit a little early. If the result is already
2576 * extremely good (+/-16 ticks seems reasonable), just stop.
2577 */
2578 if ( iBestDelta >= 0 + GIP_TSC_DELTA_INITIAL_MASTER_VALUE
2579 ? iBestDelta <= 16 + GIP_TSC_DELTA_INITIAL_MASTER_VALUE
2580 : iBestDelta >= -16 + GIP_TSC_DELTA_INITIAL_MASTER_VALUE)
2581 {
2582 /*SUPR0Printf("quitting early #1: hits=%#x iLoop=%d iBestDelta=%lld\n", cHits, iLoop, iBestDelta);*/
2583 ASMAtomicWriteBool(&pArgs->M2.fQuitEarly, true);
2584 }
2585 /*
2586 * After a while, just stop if we get sufficent hits.
2587 */
2588 else if ( iLoop >= GIP_TSC_DELTA_M2_LOOPS / 3
2589 && cHits > 8)
2590 {
2591 uint32_t const cHitsNeeded = GIP_TSC_DELTA_M2_LOOPS * RT_ELEMENTS(pArgs->M2.pMasterData->aResults) / 4; /* 25% */
2592 if ( pArgs->M2.cHits >= cHitsNeeded
2593 && ( iBestDelta >= 0 + GIP_TSC_DELTA_INITIAL_MASTER_VALUE
2594 ? iBestDelta <= GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO + GIP_TSC_DELTA_INITIAL_MASTER_VALUE
2595 : iBestDelta >= -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO + GIP_TSC_DELTA_INITIAL_MASTER_VALUE) )
2596 {
2597 /*SUPR0Printf("quitting early hits=%#x (%#x) needed=%#x iLoop=%d iBestDelta=%lld\n",
2598 pArgs->M2.cHits, cHits, cHitsNeeded, iLoop, iBestDelta);*/
2599 ASMAtomicWriteBool(&pArgs->M2.fQuitEarly, true);
2600 }
2601 }
2602}
2603
2604
2605/**
2606 * The core function of the 2nd TSC delta mesurment algorithm.
2607 *
2608 * The idea here is that we have the two CPUs execute the exact same code
2609 * collecting a largish set of TSC samples. The code has one data dependency on
2610 * the other CPU which intention it is to synchronize the execution as well as
2611 * help cross references the two sets of TSC samples (the sequence numbers).
2612 *
2613 * The @a fLag parameter is used to modify the execution a tiny bit on one or
2614 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
2615 * it will help with making the CPUs enter lock step execution occationally.
2616 *
2617 */
2618static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
2619{
2620 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
2621 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
2622
2623 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
2624 ASMSerializeInstruction();
2625 while (cLeft-- > 0)
2626 {
2627 uint64_t uTsc;
2628 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
2629 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
2630 ASMCompilerBarrier();
2631 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
2632 uTsc = ASMReadTSC();
2633 ASMAtomicIncU32(&pMyData->iCurSeqNo);
2634 ASMCompilerBarrier();
2635 ASMSerializeInstruction();
2636 pEntry->iSeqMine = iSeqMine;
2637 pEntry->iSeqOther = iSeqOther;
2638 pEntry->uTsc = uTsc;
2639 pEntry++;
2640 ASMSerializeInstruction();
2641 if (fLag)
2642 ASMNopPause();
2643 }
2644}
2645
2646
2647/**
2648 * TSC delta measurment algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
2649 *
2650 * See supdrvTscDeltaMethod2CollectData for algorithm details.
2651 *
2652 * @param pArgs The argument/state data.
2653 * @param pSync The synchronization structure
2654 * (pDevExt->pTscDeltaSync).
2655 * @param fIsMaster Set if master, clear if worker.
2656 * @param iTry The attempt number.
2657 */
2658static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC pSync, bool fIsMaster, uint32_t iTry)
2659{
2660 unsigned iLoop;
2661
2662 if (fIsMaster)
2663 ASMAtomicWriteBool(&pArgs->M2.fQuitEarly, false);
2664
2665 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
2666 {
2667 if (fIsMaster)
2668 {
2669 RTCCUINTREG uFlags;
2670
2671 /*
2672 * Adjust the loop lag fudge.
2673 */
2674# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
2675 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
2676 {
2677 /* Lag during the priming to be nice to everyone.. */
2678 pArgs->M2.fLagMaster = true;
2679 pArgs->M2.fLagWorker = true;
2680 }
2681 else
2682# endif
2683 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
2684 {
2685 /* 25 % of the body without lagging. */
2686 pArgs->M2.fLagMaster = false;
2687 pArgs->M2.fLagWorker = false;
2688 }
2689 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
2690 {
2691 /* 25 % of the body with both lagging. */
2692 pArgs->M2.fLagMaster = true;
2693 pArgs->M2.fLagWorker = true;
2694 }
2695 else
2696 {
2697 /* 50% of the body with alternating lag. */
2698 pArgs->M2.fLagMaster = (iLoop & 1) == 0;
2699 pArgs->M2.fLagWorker = (iLoop & 1) == 1;
2700 }
2701
2702 /*
2703 * Sync up with the worker and collect data.
2704 */
2705 TSCDELTA_MASTER_SYNC_BEFORE(pSync);
2706 supdrvTscDeltaMethod2CollectData(pArgs->M2.pMasterData, &pArgs->M2.pWorkerData->iCurSeqNo, pArgs->M2.fLagMaster);
2707 TSCDELTA_MASTER_SYNC_AFTER(pSync);
2708
2709 /*
2710 * Process the data.
2711 */
2712# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
2713 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
2714# endif
2715 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, iLoop);
2716
2717 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pSync);
2718 }
2719 else
2720 {
2721 /*
2722 * The worker.
2723 */
2724 TSCDELTA_OTHER_SYNC_BEFORE(pSync, (void)0);
2725 supdrvTscDeltaMethod2CollectData(pArgs->M2.pWorkerData, &pArgs->M2.pMasterData->iCurSeqNo, pArgs->M2.fLagWorker);
2726 TSCDELTA_OTHER_SYNC_AFTER(pSync);
2727 }
2728
2729 if (ASMAtomicReadBool(&pArgs->M2.fQuitEarly))
2730 break;
2731
2732 }
2733}
2734
2735
2736/**
2737 * Initializes the argument/state data belonging to algorithm \#2.
2738 *
2739 * @returns VBox status code.
2740 * @param pArgs The argument/state data.
2741 */
2742static int supdrvTscDeltaMethod2Init(PSUPDRVGIPTSCDELTARGS pArgs)
2743{
2744 pArgs->M2.pMasterData = NULL;
2745 pArgs->M2.pWorkerData = NULL;
2746
2747 uint32_t const fFlags = /*RTMEMALLOCEX_FLAGS_ANY_CTX |*/ RTMEMALLOCEX_FLAGS_ZEROED;
2748 int rc = RTMemAllocEx(sizeof(*pArgs->M2.pWorkerData), 0, fFlags, (void **)&pArgs->M2.pWorkerData);
2749 if (RT_SUCCESS(rc))
2750 rc = RTMemAllocEx(sizeof(*pArgs->M2.pMasterData), 0, fFlags, (void **)&pArgs->M2.pMasterData);
2751 return rc;
2752}
2753
2754
2755/**
2756 * Undoes what supdrvTscDeltaMethod2Init() did.
2757 *
2758 * @param pArgs The argument/state data.
2759 */
2760static void supdrvTscDeltaMethod2Delete(PSUPDRVGIPTSCDELTARGS pArgs)
2761{
2762 RTMemFreeEx(pArgs->M2.pMasterData, sizeof(*pArgs->M2.pMasterData));
2763 RTMemFreeEx(pArgs->M2.pWorkerData, sizeof(*pArgs->M2.pWorkerData));
2764# if 0
2765 SUPR0Printf("cHits=%d m=%d w=%d\n", pArgs->M2.cHits, pArgs->pMaster->idApic, pArgs->pWorker->idApic);
2766# endif
2767}
2768
2769
2770#endif /* GIP_TSC_DELTA_METHOD_2 */
2771
2772
2773/**
2774 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
2775 * and compute the delta between them.
2776 *
2777 * @param idCpu The CPU we are current scheduled on.
2778 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
2779 * @param pvUser2 Unused.
2780 *
2781 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
2782 * read the TSC at exactly the same time on both the master and the
2783 * worker CPUs. Due to DMA, bus arbitration, cache locality,
2784 * contention, SMI, pipelining etc. there is no guaranteed way of
2785 * doing this on x86 CPUs.
2786 */
2787static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
2788{
2789 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)pvUser1;
2790 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
2791 PSUPTSCDELTASYNC pSync = pDevExt->pTscDeltaSync;
2792 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
2793 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
2794 RTCPUID idMaster = pArgs->idMaster;
2795 uint32_t iTry;
2796
2797 /* A bit of paranoia first. */
2798 if (!pGipCpuMaster || !pGipCpuWorker)
2799 return;
2800
2801 /* If the CPU isn't part of the measurement, return immediately. */
2802 if ( idCpu != idMaster
2803 && idCpu != pGipCpuWorker->idCpu)
2804 return;
2805
2806 /* If the IPRT API isn't concurrent safe, the master and worker wait for each other
2807 with a timeout to avoid deadlocking the entire system. */
2808 if (!RTMpOnAllIsConcurrentSafe())
2809 {
2810 /** @todo This was introduced for Windows, but since Windows doesn't use this
2811 * code path any longer (as DPC timeouts BSOD regardless of interrupts,
2812 * see @bugref{6710} comment 81), eventually phase it out. */
2813 uint64_t uTscNow;
2814 uint64_t uTscStart;
2815 uint64_t const cWaitTicks = 130000; /* Arbitrary value, can be tweaked later. */
2816
2817 ASMSerializeInstruction();
2818 uTscStart = ASMReadTSC();
2819 if (idCpu == idMaster)
2820 {
2821 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_MASTER);
2822 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_WORKER)
2823 {
2824 ASMSerializeInstruction();
2825 uTscNow = ASMReadTSC();
2826 if (uTscNow - uTscStart > cWaitTicks)
2827 {
2828 /* Set the worker delta to indicate failure, not the master. */
2829 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
2830 return;
2831 }
2832
2833 ASMNopPause();
2834 }
2835 }
2836 else
2837 {
2838 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_PRESTART_MASTER)
2839 {
2840 ASMSerializeInstruction();
2841 uTscNow = ASMReadTSC();
2842 if (uTscNow - uTscStart > cWaitTicks)
2843 {
2844 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
2845 return;
2846 }
2847
2848 ASMNopPause();
2849 }
2850 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_PRESTART_WORKER);
2851 }
2852 }
2853
2854 /*
2855 * Retry loop.
2856 */
2857 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
2858 for (iTry = 0; iTry < 12; iTry++)
2859 {
2860 /*
2861 * Do the measurements.
2862 */
2863#ifdef GIP_TSC_DELTA_METHOD_1
2864 supdrvTscDeltaMethod1Loop(pArgs, pSync, idCpu == idMaster, iTry);
2865#elif defined(GIP_TSC_DELTA_METHOD_2)
2866 supdrvTscDeltaMethod2Loop(pArgs, pSync, idCpu == idMaster, iTry);
2867#else
2868# error "huh??"
2869#endif
2870
2871 /*
2872 * Success? If so, stop trying.
2873 */
2874 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
2875 {
2876 if (idCpu == idMaster)
2877 {
2878 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuMaster->iCpuSet);
2879 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuMaster->iCpuSet);
2880 }
2881 else
2882 {
2883 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
2884 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
2885 }
2886 break;
2887 }
2888 }
2889}
2890
2891
2892/**
2893 * Clears TSC delta related variables.
2894 *
2895 * Clears all TSC samples as well as the delta synchronization variable on the
2896 * all the per-CPU structs. Optionally also clears the per-cpu deltas too.
2897 *
2898 * @param pDevExt Pointer to the device instance data.
2899 * @param fClearDeltas Whether the deltas are also to be cleared.
2900 */
2901DECLINLINE(void) supdrvClearTscSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas)
2902{
2903 unsigned iCpu;
2904 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2905 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2906 {
2907 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2908 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
2909 if (fClearDeltas)
2910 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
2911 }
2912 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
2913}
2914
2915
2916/**
2917 * Measures the TSC delta between the master GIP CPU and one specified worker
2918 * CPU.
2919 *
2920 * @returns VBox status code.
2921 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
2922 * failure.
2923 * @param pDevExt Pointer to the device instance data.
2924 * @param idxWorker The index of the worker CPU from the GIP's array of
2925 * CPUs.
2926 *
2927 * @remarks This must be called with preemption enabled!
2928 */
2929static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
2930{
2931 int rc;
2932 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2933 RTCPUID idMaster = pDevExt->idGipMaster;
2934 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
2935 PSUPGIPCPU pGipCpuMaster;
2936 uint32_t iGipCpuMaster;
2937
2938 /* Validate input a bit. */
2939 AssertReturn(pGip, VERR_INVALID_PARAMETER);
2940 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
2941 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2942
2943 /*
2944 * Don't attempt measuring the delta for the GIP master.
2945 */
2946 if (pGipCpuWorker->idCpu == idMaster)
2947 {
2948 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
2949 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
2950 return VINF_SUCCESS;
2951 }
2952
2953 /*
2954 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
2955 * try pick a different master. (This fudge only works with multi core systems.)
2956 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
2957 */
2958 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
2959 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
2960 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
2961 if ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
2962 && ASMHasCpuId()
2963 && ASMIsValidStdRange(ASMCpuId_EAX(0))
2964 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
2965 && pGip->cOnlineCpus > 2)
2966 {
2967 uint32_t i;
2968 for (i = 0; i < pGip->cCpus; i++)
2969 if ( i != iGipCpuMaster
2970 && i != idxWorker
2971 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
2972 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
2973 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
2974 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
2975 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
2976 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
2977 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic)
2978 {
2979 iGipCpuMaster = i;
2980 pGipCpuMaster = &pGip->aCPUs[i];
2981 idMaster = pGipCpuMaster->idCpu;
2982 break;
2983 }
2984 }
2985
2986 /*
2987 * Set the master TSC as the initiator. This serializes delta measurments.
2988 */
2989 while (!ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID))
2990 {
2991 /*
2992 * Sleep here rather than spin as there is a parallel measurement
2993 * being executed and that can take a good while to be done.
2994 */
2995 RTThreadSleep(1);
2996 }
2997
2998 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
2999 {
3000 /*
3001 * Initialize data package for the RTMpOnAll callback.
3002 */
3003 SUPDRVGIPTSCDELTARGS Args;
3004 RT_ZERO(Args);
3005 Args.pWorker = pGipCpuWorker;
3006 Args.pMaster = pGipCpuMaster;
3007 Args.idMaster = idMaster;
3008 Args.pDevExt = pDevExt;
3009#ifdef GIP_TSC_DELTA_METHOD_1
3010 rc = supdrvTscDeltaMethod1Init(&Args);
3011#elif defined(GIP_TSC_DELTA_METHOD_2)
3012 rc = supdrvTscDeltaMethod2Init(&Args);
3013#else
3014# error "huh?"
3015#endif
3016 if (RT_SUCCESS(rc))
3017 {
3018 /*
3019 * Fire TSC-read workers on all CPUs but only synchronize between master
3020 * and one worker to ease memory contention.
3021 */
3022 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
3023 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
3024
3025 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, &Args, NULL);
3026 if (RT_SUCCESS(rc))
3027 {
3028 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
3029 {
3030 /*
3031 * Work the TSC delta applicability rating. It starts
3032 * optimistic in supdrvGipInit, we downgrade it here.
3033 */
3034 SUPGIPUSETSCDELTA enmRating;
3035 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
3036 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
3037 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
3038 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
3039 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
3040 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
3041 else
3042 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
3043 if (pGip->enmUseTscDelta < enmRating)
3044 {
3045 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
3046 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
3047 }
3048 }
3049 else
3050 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3051 }
3052 }
3053
3054#ifdef GIP_TSC_DELTA_METHOD_1
3055 supdrvTscDeltaMethod1Delete(&Args);
3056#elif defined(GIP_TSC_DELTA_METHOD_2)
3057 supdrvTscDeltaMethod2Delete(&Args);
3058#else
3059# error "huh?"
3060#endif
3061 }
3062 else
3063 rc = VERR_CPU_OFFLINE;
3064
3065 ASMAtomicWriteU32(&pDevExt->idTscDeltaInitiator, NIL_RTCPUID);
3066 return rc;
3067}
3068
3069
3070/**
3071 * Performs the initial measurements of the TSC deltas between CPUs.
3072 *
3073 * This is called by supdrvGipCreate or triggered by it if threaded.
3074 *
3075 * @returns VBox status code.
3076 * @param pDevExt Pointer to the device instance data.
3077 *
3078 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
3079 * idCpu, GIP's online CPU set which are populated in
3080 * supdrvGipInitOnCpu().
3081 */
3082static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
3083{
3084 PSUPGIPCPU pGipCpuMaster;
3085 unsigned iCpu;
3086 unsigned iOddEven;
3087 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3088 uint32_t idxMaster = UINT32_MAX;
3089 int rc = VINF_SUCCESS;
3090 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
3091
3092 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3093
3094 /*
3095 * Pick the first CPU online as the master TSC and make it the new GIP master based
3096 * on the APIC ID.
3097 *
3098 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
3099 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
3100 * master as this point since the sync/async timer isn't created yet.
3101 */
3102 supdrvClearTscSamples(pDevExt, true /* fClearDeltas */);
3103 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
3104 {
3105 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
3106 if (idxCpu != UINT16_MAX)
3107 {
3108 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
3109 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
3110 {
3111 idxMaster = idxCpu;
3112 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
3113 break;
3114 }
3115 }
3116 }
3117 AssertReturn(idxMaster != UINT32_MAX, VERR_CPU_NOT_FOUND);
3118 pGipCpuMaster = &pGip->aCPUs[idxMaster];
3119 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpuMaster->idCpu);
3120
3121 /*
3122 * If there is only a single CPU online we have nothing to do.
3123 */
3124 if (pGip->cOnlineCpus <= 1)
3125 {
3126 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
3127 return VINF_SUCCESS;
3128 }
3129
3130 /*
3131 * Loop thru the GIP CPU array and get deltas for each CPU (except the
3132 * master). We do the CPUs with the even numbered APIC IDs first so that
3133 * we've got alternative master CPUs to pick from on hyper-threaded systems.
3134 */
3135 for (iOddEven = 0; iOddEven < 2; iOddEven++)
3136 {
3137 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3138 {
3139 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
3140 if ( iCpu != idxMaster
3141 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
3142 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
3143 {
3144 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
3145 if (RT_FAILURE(rc))
3146 {
3147 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
3148 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
3149 break;
3150 }
3151
3152 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
3153 {
3154 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
3155 rc = VERR_TRY_AGAIN;
3156 break;
3157 }
3158 }
3159 }
3160 }
3161
3162 return rc;
3163}
3164
3165
3166#ifdef SUPDRV_USE_TSC_DELTA_THREAD
3167
3168/**
3169 * Switches the TSC-delta measurement thread into the butchered state.
3170 *
3171 * @returns VBox status code.
3172 * @param pDevExt Pointer to the device instance data.
3173 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
3174 * @param pszFailed An error message to log.
3175 * @param rcFailed The error code to exit the thread with.
3176 */
3177static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
3178{
3179 if (!fSpinlockHeld)
3180 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3181
3182 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
3183 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3184 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
3185 return rcFailed;
3186}
3187
3188
3189/**
3190 * The TSC-delta measurement thread.
3191 *
3192 * @returns VBox status code.
3193 * @param hThread The thread handle.
3194 * @param pvUser Opaque pointer to the device instance data.
3195 */
3196static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
3197{
3198 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
3199 bool fInitialMeasurement = true;
3200 uint32_t cConsecutiveTimeouts = 0;
3201 int rc = VERR_INTERNAL_ERROR_2;
3202 for (;;)
3203 {
3204 /*
3205 * Switch on the current state.
3206 */
3207 SUPDRVTSCDELTATHREADSTATE enmState;
3208 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3209 enmState = pDevExt->enmTscDeltaThreadState;
3210 switch (enmState)
3211 {
3212 case kTscDeltaThreadState_Creating:
3213 {
3214 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
3215 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
3216 if (RT_FAILURE(rc))
3217 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
3218 /* fall thru */
3219 }
3220
3221 case kTscDeltaThreadState_Listening:
3222 {
3223 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3224
3225 /* Simple adaptive timeout. */
3226 if (cConsecutiveTimeouts++ == 10)
3227 {
3228 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */
3229 pDevExt->cMsTscDeltaTimeout = 10;
3230 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */
3231 pDevExt->cMsTscDeltaTimeout = 100;
3232 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */
3233 pDevExt->cMsTscDeltaTimeout = 500;
3234 cConsecutiveTimeouts = 0;
3235 }
3236 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
3237 if ( RT_FAILURE(rc)
3238 && rc != VERR_TIMEOUT)
3239 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
3240 RTThreadUserReset(pDevExt->hTscDeltaThread);
3241 break;
3242 }
3243
3244 case kTscDeltaThreadState_WaitAndMeasure:
3245 {
3246 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
3247 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
3248 if (RT_FAILURE(rc))
3249 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
3250 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3251 pDevExt->cMsTscDeltaTimeout = 1;
3252 RTThreadSleep(10);
3253 /* fall thru */
3254 }
3255
3256 case kTscDeltaThreadState_Measuring:
3257 {
3258 cConsecutiveTimeouts = 0;
3259 if (fInitialMeasurement)
3260 {
3261 int cTries = 8;
3262 int cMsWaitPerTry = 10;
3263 fInitialMeasurement = false;
3264 do
3265 {
3266 rc = supdrvMeasureInitialTscDeltas(pDevExt);
3267 if ( RT_SUCCESS(rc)
3268 || ( RT_FAILURE(rc)
3269 && rc != VERR_TRY_AGAIN
3270 && rc != VERR_CPU_OFFLINE))
3271 {
3272 break;
3273 }
3274 RTThreadSleep(cMsWaitPerTry);
3275 } while (cTries-- > 0);
3276 }
3277 else
3278 {
3279 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3280 unsigned iCpu;
3281
3282 /* Measure TSC-deltas only for the CPUs that are in the set. */
3283 rc = VINF_SUCCESS;
3284 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3285 {
3286 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
3287 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX
3288 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
3289 {
3290 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
3291 }
3292 }
3293 }
3294 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3295 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
3296 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
3297 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3298 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */
3299 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
3300 break;
3301 }
3302
3303 case kTscDeltaThreadState_Terminating:
3304 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
3305 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3306 return VINF_SUCCESS;
3307
3308 case kTscDeltaThreadState_Butchered:
3309 default:
3310 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
3311 }
3312 }
3313
3314 return rc;
3315}
3316
3317
3318/**
3319 * Waits for the TSC-delta measurement thread to respond to a state change.
3320 *
3321 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
3322 * other error code on internal error.
3323 *
3324 * @param pThis Pointer to the grant service instance data.
3325 * @param enmCurState The current state.
3326 * @param enmNewState The new state we're waiting for it to enter.
3327 */
3328static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
3329 SUPDRVTSCDELTATHREADSTATE enmNewState)
3330{
3331 /*
3332 * Wait a short while for the expected state transition.
3333 */
3334 int rc;
3335 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
3336 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3337 if (pDevExt->enmTscDeltaThreadState == enmNewState)
3338 {
3339 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3340 rc = VINF_SUCCESS;
3341 }
3342 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
3343 {
3344 /*
3345 * Wait longer if the state has not yet transitioned to the one we want.
3346 */
3347 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3348 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
3349 if ( RT_SUCCESS(rc)
3350 || rc == VERR_TIMEOUT)
3351 {
3352 /*
3353 * Check the state whether we've succeeded.
3354 */
3355 SUPDRVTSCDELTATHREADSTATE enmState;
3356 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3357 enmState = pDevExt->enmTscDeltaThreadState;
3358 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3359 if (enmState == enmNewState)
3360 rc = VINF_SUCCESS;
3361 else if (enmState == enmCurState)
3362 {
3363 rc = VERR_TIMEOUT;
3364 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
3365 enmNewState));
3366 }
3367 else
3368 {
3369 rc = VERR_INTERNAL_ERROR;
3370 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
3371 enmState, enmNewState));
3372 }
3373 }
3374 else
3375 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
3376 }
3377 else
3378 {
3379 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3380 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
3381 rc = VERR_INTERNAL_ERROR;
3382 }
3383
3384 return rc;
3385}
3386
3387
3388/**
3389 * Waits for TSC-delta measurements to be completed for all online CPUs.
3390 *
3391 * @returns VBox status code.
3392 * @param pDevExt Pointer to the device instance data.
3393 */
3394static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
3395{
3396 int cTriesLeft = 5;
3397 int cMsTotalWait;
3398 int cMsWaited = 0;
3399 int cMsWaitGranularity = 1;
3400
3401 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3402 AssertReturn(pGip, VERR_INVALID_POINTER);
3403
3404 if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD))
3405 return VERR_THREAD_NOT_WAITABLE;
3406
3407 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200);
3408 while (cTriesLeft-- > 0)
3409 {
3410 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
3411 return VINF_SUCCESS;
3412 RTThreadSleep(cMsWaitGranularity);
3413 cMsWaited += cMsWaitGranularity;
3414 if (cMsWaited >= cMsTotalWait)
3415 break;
3416 }
3417
3418 return VERR_TIMEOUT;
3419}
3420
3421
3422/**
3423 * Terminates the actual thread running supdrvTscDeltaThread().
3424 *
3425 * This is an internal worker function for supdrvTscDeltaThreadInit() and
3426 * supdrvTscDeltaTerm().
3427 *
3428 * @param pDevExt Pointer to the device instance data.
3429 */
3430static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
3431{
3432 int rc;
3433 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3434 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
3435 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3436 RTThreadUserSignal(pDevExt->hTscDeltaThread);
3437 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
3438 if (RT_FAILURE(rc))
3439 {
3440 /* Signal a few more times before giving up. */
3441 int cTriesLeft = 5;
3442 while (--cTriesLeft > 0)
3443 {
3444 RTThreadUserSignal(pDevExt->hTscDeltaThread);
3445 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
3446 if (rc != VERR_TIMEOUT)
3447 break;
3448 }
3449 }
3450}
3451
3452
3453/**
3454 * Initializes and spawns the TSC-delta measurement thread.
3455 *
3456 * A thread is required for servicing re-measurement requests from events like
3457 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
3458 * under all contexts on all OSs.
3459 *
3460 * @returns VBox status code.
3461 * @param pDevExt Pointer to the device instance data.
3462 *
3463 * @remarks Must only be called -after- initializing GIP and setting up MP
3464 * notifications!
3465 */
3466static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
3467{
3468 int rc;
3469 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3470 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
3471 if (RT_SUCCESS(rc))
3472 {
3473 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
3474 if (RT_SUCCESS(rc))
3475 {
3476 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
3477 pDevExt->cMsTscDeltaTimeout = 1;
3478 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
3479 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
3480 if (RT_SUCCESS(rc))
3481 {
3482 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
3483 if (RT_SUCCESS(rc))
3484 {
3485 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
3486 return rc;
3487 }
3488
3489 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
3490 supdrvTscDeltaThreadTerminate(pDevExt);
3491 }
3492 else
3493 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
3494 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
3495 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
3496 }
3497 else
3498 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
3499 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
3500 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
3501 }
3502 else
3503 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
3504
3505 return rc;
3506}
3507
3508
3509/**
3510 * Terminates the TSC-delta measurement thread and cleanup.
3511 *
3512 * @param pDevExt Pointer to the device instance data.
3513 */
3514static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
3515{
3516 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
3517 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
3518 {
3519 supdrvTscDeltaThreadTerminate(pDevExt);
3520 }
3521
3522 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
3523 {
3524 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
3525 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
3526 }
3527
3528 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
3529 {
3530 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
3531 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
3532 }
3533
3534 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
3535}
3536
3537#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
3538
3539/**
3540 * Measure the TSC delta for the CPU given by its CPU set index.
3541 *
3542 * @returns VBox status code.
3543 * @retval VERR_INTERRUPTED if interrupted while waiting.
3544 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
3545 * measurment.
3546 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
3547 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
3548 *
3549 * @param pSession The caller's session. GIP must've been mapped.
3550 * @param iCpuSet The CPU set index of the CPU to measure.
3551 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
3552 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
3553 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
3554 * ready.
3555 * @param cTries Number of times to try, pass 0 for the default.
3556 */
3557SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
3558 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
3559{
3560 PSUPDRVDEVEXT pDevExt;
3561 PSUPGLOBALINFOPAGE pGip;
3562 uint16_t iGipCpu;
3563 int rc;
3564#ifdef SUPDRV_USE_TSC_DELTA_THREAD
3565 uint64_t msTsStartWait;
3566 uint32_t iWaitLoop;
3567#endif
3568
3569 /*
3570 * Validate and adjust the input.
3571 */
3572 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
3573 if (!pSession->fGipReferenced)
3574 return VERR_WRONG_ORDER;
3575
3576 pDevExt = pSession->pDevExt;
3577 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
3578
3579 pGip = pDevExt->pGip;
3580 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
3581
3582 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
3583 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
3584 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
3585 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
3586
3587 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
3588 return VERR_INVALID_FLAGS;
3589
3590 if (cTries == 0)
3591 cTries = 12;
3592 else if (cTries > 256)
3593 cTries = 256;
3594
3595 if (cMsWaitRetry > 1000)
3596 cMsWaitRetry = 1000;
3597
3598 /*
3599 * The request is a noop if the TSC delta isn't being used.
3600 */
3601 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
3602 return VINF_SUCCESS;
3603
3604#ifdef SUPDRV_USE_TSC_DELTA_THREAD
3605 /*
3606 * Has the TSC already been measured and we're not forced to redo it?
3607 */
3608 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
3609 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
3610 return VINF_SUCCESS;
3611
3612 /*
3613 * Asynchronous request? Forward it to the thread, no waiting.
3614 */
3615 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
3616 {
3617 /** @todo Async. doesn't implement options like retries, waiting. We'll need
3618 * to pass those options to the thread somehow and implement it in the
3619 * thread. Check if anyone uses/needs fAsync before implementing this. */
3620 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3621 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
3622 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
3623 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
3624 {
3625 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
3626 rc = VINF_SUCCESS;
3627 }
3628 else
3629 rc = VERR_THREAD_IS_DEAD;
3630 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3631 RTThreadUserSignal(pDevExt->hTscDeltaThread);
3632 return VINF_SUCCESS;
3633 }
3634
3635 /*
3636 * If a TSC-delta measurement request is already being serviced by the thread,
3637 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
3638 */
3639 msTsStartWait = RTTimeSystemMilliTS();
3640 for (iWaitLoop = 0;; iWaitLoop++)
3641 {
3642 uint64_t cMsElapsed;
3643 SUPDRVTSCDELTATHREADSTATE enmState;
3644 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
3645 enmState = pDevExt->enmTscDeltaThreadState;
3646 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
3647
3648 if (enmState == kTscDeltaThreadState_Measuring)
3649 { /* Must wait, the thread is busy. */ }
3650 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
3651 { /* Must wait, this state only says what will happen next. */ }
3652 else if (enmState == kTscDeltaThreadState_Terminating)
3653 { /* Must wait, this state only says what should happen next. */ }
3654 else
3655 break; /* All other states, the thread is either idly listening or dead. */
3656
3657 /* Wait or fail. */
3658 if (cMsWaitThread == 0)
3659 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
3660 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
3661 if (cMsElapsed >= cMsWaitThread)
3662 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
3663
3664 rc = RTThreadSleep(RT_MIN(cMsWaitThread - cMsElapsed, RT_MIN(iWaitLoop + 1, 10)));
3665 if (rc == VERR_INTERRUPTED)
3666 return rc;
3667 }
3668#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
3669
3670 /*
3671 * Try measure the TSC delta the given number of times.
3672 */
3673 for (;;)
3674 {
3675 /* Unless we're forced to measure the delta, check whether it's done already. */
3676 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
3677 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
3678 {
3679 rc = VINF_SUCCESS;
3680 break;
3681 }
3682
3683 /* Measure it. */
3684 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
3685 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
3686 {
3687 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
3688 break;
3689 }
3690
3691 /* Retry? */
3692 if (cTries <= 1)
3693 break;
3694 cTries--;
3695
3696 if (cMsWaitRetry)
3697 {
3698 rc = RTThreadSleep(cMsWaitRetry);
3699 if (rc == VERR_INTERRUPTED)
3700 break;
3701 }
3702 }
3703
3704 return rc;
3705}
3706
3707
3708/**
3709 * Service a TSC-delta measurement request.
3710 *
3711 * @returns VBox status code.
3712 * @param pDevExt Pointer to the device instance data.
3713 * @param pSession The support driver session.
3714 * @param pReq Pointer to the TSC-delta measurement request.
3715 */
3716int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
3717{
3718 uint32_t cTries;
3719 uint32_t iCpuSet;
3720 uint32_t fFlags;
3721 RTMSINTERVAL cMsWaitRetry;
3722
3723 /*
3724 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
3725 */
3726 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
3727
3728 if (pReq->u.In.idCpu == NIL_RTCPUID)
3729 return VERR_INVALID_CPU_ID;
3730 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
3731 if (iCpuSet >= RTCPUSET_MAX_CPUS)
3732 return VERR_INVALID_CPU_ID;
3733
3734 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
3735
3736 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
3737
3738 fFlags = 0;
3739 if (pReq->u.In.fAsync)
3740 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
3741 if (pReq->u.In.fForce)
3742 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
3743
3744 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
3745 cTries == 0 ? 5*RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
3746 cTries);
3747}
3748
3749
3750/**
3751 * Reads TSC with delta applied.
3752 *
3753 * Will try to resolve delta value INT64_MAX before applying it. This is the
3754 * main purpose of this function, to handle the case where the delta needs to be
3755 * determined.
3756 *
3757 * @returns VBox status code.
3758 * @param pDevExt Pointer to the device instance data.
3759 * @param pSession The support driver session.
3760 * @param pReq Pointer to the TSC-read request.
3761 */
3762int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
3763{
3764 PSUPGLOBALINFOPAGE pGip;
3765 int rc;
3766
3767 /*
3768 * Validate. We require the client to have mapped GIP (no asserting on
3769 * ring-3 preconditions).
3770 */
3771 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
3772 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
3773 return VERR_WRONG_ORDER;
3774 pGip = pDevExt->pGip;
3775 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
3776
3777 /*
3778 * We're usually here because we need to apply delta, but we shouldn't be
3779 * upset if the GIP is some different mode.
3780 */
3781 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
3782 {
3783 uint32_t cTries = 0;
3784 for (;;)
3785 {
3786 /*
3787 * Start by gathering the data, using CLI for disabling preemption
3788 * while we do that.
3789 */
3790 RTCCUINTREG uFlags = ASMIntDisableFlags();
3791 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
3792 int iGipCpu;
3793 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
3794 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
3795 {
3796 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
3797 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
3798 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
3799 ASMSetFlags(uFlags);
3800
3801 /*
3802 * If we're lucky we've got a delta, but no predicitions here
3803 * as this I/O control is normally only used when the TSC delta
3804 * is set to INT64_MAX.
3805 */
3806 if (i64Delta != INT64_MAX)
3807 {
3808 pReq->u.Out.u64AdjustedTsc -= i64Delta;
3809 rc = VINF_SUCCESS;
3810 break;
3811 }
3812
3813 /* Give up after a few times. */
3814 if (cTries >= 4)
3815 {
3816 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3817 break;
3818 }
3819
3820 /* Need to measure the delta an try again. */
3821 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
3822 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
3823 }
3824 else
3825 {
3826 /* This really shouldn't happen. */
3827 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
3828 pReq->u.Out.idApic = ASMGetApicId();
3829 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
3830 ASMSetFlags(uFlags);
3831 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
3832 break;
3833 }
3834 }
3835 }
3836 else
3837 {
3838 /*
3839 * No delta to apply. Easy. Deal with preemption the lazy way.
3840 */
3841 RTCCUINTREG uFlags = ASMIntDisableFlags();
3842 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
3843 int iGipCpu;
3844 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
3845 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
3846 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
3847 else
3848 pReq->u.Out.idApic = ASMGetApicId();
3849 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
3850 ASMSetFlags(uFlags);
3851 rc = VINF_SUCCESS;
3852 }
3853
3854 return rc;
3855}
3856
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette