VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 57252

Last change on this file since 57252 was 57233, checked in by vboxsync, 9 years ago

SUPDrvGip: Comment typos.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 177.6 KB
Line 
1/* $Id: SUPDrvGip.cpp 57233 2015-08-07 09:33:40Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63
64#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
65# include "dtrace/SUPDrv.h"
66#else
67/* ... */
68#endif
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/** The frequency by which we recalculate the u32UpdateHz and
75 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
76 *
77 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
78 */
79#define GIP_UPDATEHZ_RECALC_FREQ 0x800
80
81/** A reserved TSC value used for synchronization as well as measurement of
82 * TSC deltas. */
83#define GIP_TSC_DELTA_RSVD UINT64_MAX
84/** The number of TSC delta measurement loops in total (includes primer and
85 * read-time loops). */
86#define GIP_TSC_DELTA_LOOPS 96
87/** The number of cache primer loops. */
88#define GIP_TSC_DELTA_PRIMER_LOOPS 4
89/** The number of loops until we keep computing the minumum read time. */
90#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
91
92/** The TSC frequency refinement period in seconds.
93 * The timer fires after 200ms, then every second, this value just says when
94 * to stop it after that. */
95#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
96/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
97#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
98/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
99#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
100/** The TSC delta value for the initial GIP master - 0 in regular builds.
101 * To test the delta code this can be set to a non-zero value. */
102#if 0
103# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
104#else
105# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
106#endif
107
108AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
109AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
110
111/** @def VBOX_SVN_REV
112 * The makefile should define this if it can. */
113#ifndef VBOX_SVN_REV
114# define VBOX_SVN_REV 0
115#endif
116
117#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
118# define DO_NOT_START_GIP
119#endif
120
121
122/*******************************************************************************
123* Internal Functions *
124*******************************************************************************/
125static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
126static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
127static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
128static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
129static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
130#ifdef SUPDRV_USE_TSC_DELTA_THREAD
131static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
132static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
133static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
134#else
135static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
136static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
137#endif
138
139
140/*******************************************************************************
141* Global Variables *
142*******************************************************************************/
143DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
144
145
146
147/*
148 *
149 * Misc Common GIP Code
150 * Misc Common GIP Code
151 * Misc Common GIP Code
152 *
153 *
154 */
155
156
157/**
158 * Finds the GIP CPU index corresponding to @a idCpu.
159 *
160 * @returns GIP CPU array index, UINT32_MAX if not found.
161 * @param pGip The GIP.
162 * @param idCpu The CPU ID.
163 */
164static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
165{
166 uint32_t i;
167 for (i = 0; i < pGip->cCpus; i++)
168 if (pGip->aCPUs[i].idCpu == idCpu)
169 return i;
170 return UINT32_MAX;
171}
172
173
174
175/*
176 *
177 * GIP Mapping and Unmapping Related Code.
178 * GIP Mapping and Unmapping Related Code.
179 * GIP Mapping and Unmapping Related Code.
180 *
181 *
182 */
183
184
185/**
186 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
187 * updating.
188 *
189 * @param pGip Pointer to the GIP.
190 * @param pGipCpu The per CPU structure for this CPU.
191 * @param u64NanoTS The current time.
192 */
193static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
194{
195 /*
196 * Here we don't really care about applying the TSC delta. The re-initialization of this
197 * value is not relevant especially while (re)starting the GIP as the first few ones will
198 * be ignored anyway, see supdrvGipDoUpdateCpu().
199 */
200 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
201 pGipCpu->u64NanoTS = u64NanoTS;
202}
203
204
205/**
206 * Set the current TSC and NanoTS value for the CPU.
207 *
208 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
209 * @param pvUser1 Pointer to the ring-0 GIP mapping.
210 * @param pvUser2 Pointer to the variable holding the current time.
211 */
212static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
213{
214 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
215 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
216
217 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
218 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
219
220 NOREF(pvUser2);
221 NOREF(idCpu);
222}
223
224
225/**
226 * State structure for supdrvGipDetectGetGipCpuCallback.
227 */
228typedef struct SUPDRVGIPDETECTGETCPU
229{
230 /** Bitmap of APIC IDs that has been seen (initialized to zero).
231 * Used to detect duplicate APIC IDs (paranoia). */
232 uint8_t volatile bmApicId[256 / 8];
233 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
234 * initially). The callback clears the methods not detected. */
235 uint32_t volatile fSupported;
236 /** The first callback detecting any kind of range issues (initialized to
237 * NIL_RTCPUID). */
238 RTCPUID volatile idCpuProblem;
239} SUPDRVGIPDETECTGETCPU;
240/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
241typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
242
243
244/**
245 * Checks for alternative ways of getting the CPU ID.
246 *
247 * This also checks the APIC ID, CPU ID and CPU set index values against the
248 * GIP tables.
249 *
250 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
251 * @param pvUser1 Pointer to the state structure.
252 * @param pvUser2 Pointer to the GIP.
253 */
254static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
255{
256 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
257 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
258 uint32_t fSupported = 0;
259 uint16_t idApic;
260 int iCpuSet;
261
262 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
263
264 /*
265 * Check that the CPU ID and CPU set index are interchangable.
266 */
267 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
268 if ((RTCPUID)iCpuSet == idCpu)
269 {
270 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
271 if ( iCpuSet >= 0
272 && iCpuSet < RTCPUSET_MAX_CPUS
273 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
274 {
275 /*
276 * Check whether the IDTR.LIMIT contains a CPU number.
277 */
278#ifdef RT_ARCH_X86
279 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
280#else
281 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
282#endif
283 RTIDTR Idtr;
284 ASMGetIDTR(&Idtr);
285 if (Idtr.cbIdt >= cbIdt)
286 {
287 uint32_t uTmp = Idtr.cbIdt - cbIdt;
288 uTmp &= RTCPUSET_MAX_CPUS - 1;
289 if (uTmp == idCpu)
290 {
291 RTIDTR Idtr2;
292 ASMGetIDTR(&Idtr2);
293 if (Idtr2.cbIdt == Idtr.cbIdt)
294 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
295 }
296 }
297
298 /*
299 * Check whether RDTSCP is an option.
300 */
301 if (ASMHasCpuId())
302 {
303 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
304 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
305 {
306 uint32_t uAux;
307 ASMReadTscWithAux(&uAux);
308 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
309 {
310 ASMNopPause();
311 ASMReadTscWithAux(&uAux);
312 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
313 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
314 }
315 }
316 }
317 }
318 }
319
320 /*
321 * Check that the APIC ID is unique.
322 */
323 idApic = ASMGetApicId();
324 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
325 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
326 fSupported |= SUPGIPGETCPU_APIC_ID;
327 else
328 {
329 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
330 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
331 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
332 idCpu, iCpuSet, idApic));
333 }
334
335 /*
336 * Check that the iCpuSet is within the expected range.
337 */
338 if (RT_UNLIKELY( iCpuSet < 0
339 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
340 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
341 {
342 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
343 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
344 idCpu, iCpuSet, idApic));
345 }
346 else
347 {
348 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
349 if (RT_UNLIKELY(idCpu2 != idCpu))
350 {
351 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
352 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
353 idCpu, iCpuSet, idApic, idCpu2));
354 }
355 }
356
357 /*
358 * Update the supported feature mask before we return.
359 */
360 ASMAtomicAndU32(&pState->fSupported, fSupported);
361
362 NOREF(pvUser2);
363}
364
365
366/**
367 * Increase the timer freqency on hosts where this is possible (NT).
368 *
369 * The idea is that more interrupts is better for us... Also, it's better than
370 * we increase the timer frequence, because we might end up getting inaccurate
371 * callbacks if someone else does it.
372 *
373 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
374 */
375static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
376{
377 if (pDevExt->u32SystemTimerGranularityGrant == 0)
378 {
379 uint32_t u32SystemResolution;
380 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
381 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
382 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
383 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
384 )
385 {
386 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
387 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
388 }
389 }
390}
391
392
393/**
394 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
395 *
396 * @param pDevExt Clears u32SystemTimerGranularityGrant.
397 */
398static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
399{
400 if (pDevExt->u32SystemTimerGranularityGrant)
401 {
402 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
403 AssertRC(rc2);
404 pDevExt->u32SystemTimerGranularityGrant = 0;
405 }
406}
407
408
409/**
410 * Maps the GIP into userspace and/or get the physical address of the GIP.
411 *
412 * @returns IPRT status code.
413 * @param pSession Session to which the GIP mapping should belong.
414 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
415 * @param pHCPhysGip Where to store the physical address. (optional)
416 *
417 * @remark There is no reference counting on the mapping, so one call to this function
418 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
419 * and remove the session as a GIP user.
420 */
421SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
422{
423 int rc;
424 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
425 RTR3PTR pGipR3 = NIL_RTR3PTR;
426 RTHCPHYS HCPhys = NIL_RTHCPHYS;
427 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
428
429 /*
430 * Validate
431 */
432 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
433 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
434 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
435
436#ifdef SUPDRV_USE_MUTEX_FOR_GIP
437 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
438#else
439 RTSemFastMutexRequest(pDevExt->mtxGip);
440#endif
441 if (pDevExt->pGip)
442 {
443 /*
444 * Map it?
445 */
446 rc = VINF_SUCCESS;
447 if (ppGipR3)
448 {
449 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
450 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
451 RTMEM_PROT_READ, RTR0ProcHandleSelf());
452 if (RT_SUCCESS(rc))
453 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
454 }
455
456 /*
457 * Get physical address.
458 */
459 if (pHCPhysGip && RT_SUCCESS(rc))
460 HCPhys = pDevExt->HCPhysGip;
461
462 /*
463 * Reference globally.
464 */
465 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
466 {
467 pSession->fGipReferenced = 1;
468 pDevExt->cGipUsers++;
469 if (pDevExt->cGipUsers == 1)
470 {
471 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
472 uint64_t u64NanoTS;
473
474 /*
475 * GIP starts/resumes updating again. On windows we bump the
476 * host timer frequency to make sure we don't get stuck in guest
477 * mode and to get better timer (and possibly clock) accuracy.
478 */
479 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
480
481 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
482
483 /*
484 * document me
485 */
486 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
487 {
488 unsigned i;
489 for (i = 0; i < pGipR0->cCpus; i++)
490 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
491 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
492 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
493 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
494 }
495
496 /*
497 * document me
498 */
499 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
500 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
501 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
502 || RTMpGetOnlineCount() == 1)
503 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
504 else
505 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
506
507 /*
508 * Detect alternative ways to figure the CPU ID in ring-3 and
509 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
510 * and CPU set indexes while we're at it.
511 */
512 if (RT_SUCCESS(rc))
513 {
514 SUPDRVGIPDETECTGETCPU DetectState;
515 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
516 DetectState.fSupported = UINT32_MAX;
517 DetectState.idCpuProblem = NIL_RTCPUID;
518 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
519 if (DetectState.idCpuProblem == NIL_RTCPUID)
520 {
521 if ( DetectState.fSupported != UINT32_MAX
522 && DetectState.fSupported != 0)
523 {
524 if (pGipR0->fGetGipCpu != DetectState.fSupported)
525 {
526 pGipR0->fGetGipCpu = DetectState.fSupported;
527 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
528 }
529 }
530 else
531 {
532 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
533 DetectState.fSupported));
534 rc = VERR_UNSUPPORTED_CPU;
535 }
536 }
537 else
538 {
539 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
540 DetectState.idCpuProblem, DetectState.idCpuProblem));
541 rc = VERR_INVALID_CPU_ID;
542 }
543 }
544
545 /*
546 * Start the GIP timer if all is well..
547 */
548 if (RT_SUCCESS(rc))
549 {
550#ifndef DO_NOT_START_GIP
551 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
552#endif
553 rc = VINF_SUCCESS;
554 }
555
556 /*
557 * Bail out on error.
558 */
559 if (RT_FAILURE(rc))
560 {
561 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
562 pDevExt->cGipUsers = 0;
563 pSession->fGipReferenced = 0;
564 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
565 {
566 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
567 if (RT_SUCCESS(rc2))
568 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
569 }
570 HCPhys = NIL_RTHCPHYS;
571 pGipR3 = NIL_RTR3PTR;
572 }
573 }
574 }
575 }
576 else
577 {
578 rc = VERR_GENERAL_FAILURE;
579 Log(("SUPR0GipMap: GIP is not available!\n"));
580 }
581#ifdef SUPDRV_USE_MUTEX_FOR_GIP
582 RTSemMutexRelease(pDevExt->mtxGip);
583#else
584 RTSemFastMutexRelease(pDevExt->mtxGip);
585#endif
586
587 /*
588 * Write returns.
589 */
590 if (pHCPhysGip)
591 *pHCPhysGip = HCPhys;
592 if (ppGipR3)
593 *ppGipR3 = pGipR3;
594
595#ifdef DEBUG_DARWIN_GIP
596 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
597#else
598 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
599#endif
600 return rc;
601}
602
603
604/**
605 * Unmaps any user mapping of the GIP and terminates all GIP access
606 * from this session.
607 *
608 * @returns IPRT status code.
609 * @param pSession Session to which the GIP mapping should belong.
610 */
611SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
612{
613 int rc = VINF_SUCCESS;
614 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
615#ifdef DEBUG_DARWIN_GIP
616 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
617 pSession,
618 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
619 pSession->GipMapObjR3));
620#else
621 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
622#endif
623 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
624
625#ifdef SUPDRV_USE_MUTEX_FOR_GIP
626 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
627#else
628 RTSemFastMutexRequest(pDevExt->mtxGip);
629#endif
630
631 /*
632 * GIP test-mode session?
633 */
634 if ( pSession->fGipTestMode
635 && pDevExt->pGip)
636 {
637 supdrvGipSetFlags(pDevExt, pSession, 0, ~SUPGIP_FLAGS_TESTING_ENABLE);
638 Assert(!pSession->fGipTestMode);
639 }
640
641 /*
642 * Unmap anything?
643 */
644 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
645 {
646 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
647 AssertRC(rc);
648 if (RT_SUCCESS(rc))
649 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
650 }
651
652 /*
653 * Dereference global GIP.
654 */
655 if (pSession->fGipReferenced && !rc)
656 {
657 pSession->fGipReferenced = 0;
658 if ( pDevExt->cGipUsers > 0
659 && !--pDevExt->cGipUsers)
660 {
661 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
662#ifndef DO_NOT_START_GIP
663 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
664#endif
665 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
666 }
667 }
668
669#ifdef SUPDRV_USE_MUTEX_FOR_GIP
670 RTSemMutexRelease(pDevExt->mtxGip);
671#else
672 RTSemFastMutexRelease(pDevExt->mtxGip);
673#endif
674
675 return rc;
676}
677
678
679/**
680 * Gets the GIP pointer.
681 *
682 * @returns Pointer to the GIP or NULL.
683 */
684SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
685{
686 return g_pSUPGlobalInfoPage;
687}
688
689
690
691
692
693/*
694 *
695 *
696 * GIP Initialization, Termination and CPU Offline / Online Related Code.
697 * GIP Initialization, Termination and CPU Offline / Online Related Code.
698 * GIP Initialization, Termination and CPU Offline / Online Related Code.
699 *
700 *
701 */
702
703/**
704 * Used by supdrvInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
705 * to update the TSC frequency related GIP variables.
706 *
707 * @param pGip The GIP.
708 * @param nsElapsed The number of nano seconds elapsed.
709 * @param cElapsedTscTicks The corresponding number of TSC ticks.
710 * @param iTick The tick number for debugging.
711 */
712static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
713{
714 /*
715 * Calculate the frequency.
716 */
717 uint64_t uCpuHz;
718 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
719 && nsElapsed < UINT32_MAX)
720 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
721 else
722 {
723 RTUINT128U CpuHz, Tmp, Divisor;
724 CpuHz.s.Lo = CpuHz.s.Hi = 0;
725 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
726 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
727 uCpuHz = CpuHz.s.Lo;
728 }
729
730 /*
731 * Update the GIP.
732 */
733 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
734 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
735 {
736 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
737
738 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
739 if (iTick + 1 < pGip->cCpus)
740 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
741 }
742}
743
744
745/**
746 * Timer callback function for TSC frequency refinement in invariant GIP mode.
747 *
748 * This is started during driver init and fires once
749 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
750 *
751 * @param pTimer The timer.
752 * @param pvUser Opaque pointer to the device instance data.
753 * @param iTick The timer tick.
754 */
755static DECLCALLBACK(void) supdrvInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
756{
757 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
758 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
759 RTCPUID idCpu;
760 uint64_t cNsElapsed;
761 uint64_t cTscTicksElapsed;
762 uint64_t nsNow;
763 uint64_t uTsc;
764 RTCCUINTREG fEFlags;
765
766 /* Paranoia. */
767 AssertReturnVoid(pGip);
768 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
769
770 /*
771 * If we got a power event, stop the refinement process.
772 */
773 if (pDevExt->fInvTscRefinePowerEvent)
774 {
775 int rc = RTTimerStop(pTimer); AssertRC(rc);
776 return;
777 }
778
779 /*
780 * Read the TSC and time, noting which CPU we are on.
781 *
782 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
783 * systems where it matters we're in a context where we cannot waste that
784 * much time (DPC watchdog, called from clock interrupt).
785 */
786 fEFlags = ASMIntDisableFlags();
787 uTsc = ASMReadTSC();
788 nsNow = RTTimeSystemNanoTS();
789 idCpu = RTMpCpuId();
790 ASMSetFlags(fEFlags);
791
792 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
793 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
794
795 /*
796 * If the above measurement was taken on a different CPU than the one we
797 * started the process on, cTscTicksElapsed will need to be adjusted with
798 * the TSC deltas of both the CPUs.
799 *
800 * We ASSUME that the delta calculation process takes less time than the
801 * TSC frequency refinement timer. If it doesn't, we'll complain and
802 * drop the frequency refinement.
803 *
804 * Note! We cannot entirely trust enmUseTscDelta here because it's
805 * downgraded after each delta calculation.
806 */
807 if ( idCpu != pDevExt->idCpuInvarTscRefine
808 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
809 {
810 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
811 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
812 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
813 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
814 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
815 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
816 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
817 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
818 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
819 {
820 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
821 {
822 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
823 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
824 }
825 }
826 /*
827 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
828 * calculations.
829 */
830 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
831 {
832 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
833 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
834 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
835 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
836 int rc = RTTimerStop(pTimer); AssertRC(rc);
837 return;
838 }
839 }
840
841 /*
842 * Calculate and update the CPU frequency variables in GIP.
843 *
844 * If there is a GIP user already and we've already refined the frequency
845 * a couple of times, don't update it as we want a stable frequency value
846 * for all VMs.
847 */
848 if ( pDevExt->cGipUsers == 0
849 || cNsElapsed < RT_NS_1SEC * 2)
850 {
851 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
852
853 /*
854 * Stop the timer once we've reached the defined refinement period.
855 */
856 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
857 {
858 int rc = RTTimerStop(pTimer);
859 AssertRC(rc);
860 }
861 }
862 else
863 {
864 int rc = RTTimerStop(pTimer);
865 AssertRC(rc);
866 }
867}
868
869
870/**
871 * @callback_method_impl{FNRTPOWERNOTIFICATION}
872 */
873static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
874{
875 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
876 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
877
878 /*
879 * If the TSC frequency refinement timer is running, we need to cancel it so it
880 * doesn't screw up the frequency after a long suspend.
881 *
882 * Recalculate all TSC-deltas on host resume as it may have changed, seen
883 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
884 */
885 if (enmEvent == RTPOWEREVENT_RESUME)
886 {
887 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
888 if ( RT_LIKELY(pGip)
889 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
890 && !supdrvOSAreCpusOfflinedOnSuspend())
891 {
892#ifdef SUPDRV_USE_TSC_DELTA_THREAD
893 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
894#else
895 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
896 supdrvMeasureInitialTscDeltas(pDevExt);
897#endif
898 }
899 }
900 else if (enmEvent == RTPOWEREVENT_SUSPEND)
901 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
902}
903
904
905/**
906 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
907 *
908 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
909 * the CPU may change the TSC frequence between now and when the timer fires
910 * (supdrvInitAsyncRefineTscTimer).
911 *
912 * @param pDevExt Pointer to the device instance data.
913 * @param pGip Pointer to the GIP.
914 */
915static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
916{
917 uint64_t u64NanoTS;
918 RTCCUINTREG fEFlags;
919 int rc;
920
921 /*
922 * Register a power management callback.
923 */
924 pDevExt->fInvTscRefinePowerEvent = false;
925 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
926 AssertRC(rc); /* ignore */
927
928 /*
929 * Record the TSC and NanoTS as the starting anchor point for refinement
930 * of the TSC. We try get as close to a clock tick as possible on systems
931 * which does not provide high resolution time.
932 */
933 u64NanoTS = RTTimeSystemNanoTS();
934 while (RTTimeSystemNanoTS() == u64NanoTS)
935 ASMNopPause();
936
937 fEFlags = ASMIntDisableFlags();
938 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
939 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
940 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
941 ASMSetFlags(fEFlags);
942
943 /*
944 * Create a timer that runs on the same CPU so we won't have a depencency
945 * on the TSC-delta and can run in parallel to it. On systems that does not
946 * implement CPU specific timers we'll apply deltas in the timer callback,
947 * just like we do for CPUs going offline.
948 *
949 * The longer the refinement interval the better the accuracy, at least in
950 * theory. If it's too long though, ring-3 may already be starting its
951 * first VMs before we're done. On most systems we will be loading the
952 * support driver during boot and VMs won't be started for a while yet,
953 * it is really only a problem during development (especially with
954 * on-demand driver starting on windows).
955 *
956 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
957 * to calculate the frequency during driver loading, the timer is set
958 * to fire after 200 ms the first time. It will then reschedule itself
959 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
960 * reached or it notices that there is a user land client with GIP
961 * mapped (we want a stable frequency for all VMs).
962 */
963 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
964 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
965 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
966 if (RT_SUCCESS(rc))
967 {
968 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
969 if (RT_SUCCESS(rc))
970 return;
971 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
972 }
973
974 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
975 {
976 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
977 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
978 if (RT_SUCCESS(rc))
979 {
980 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
981 if (RT_SUCCESS(rc))
982 return;
983 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
984 }
985 }
986
987 pDevExt->pInvarTscRefineTimer = NULL;
988 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
989}
990
991
992/**
993 * @callback_method_impl{PFNRTMPWORKER,
994 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
995 * the measurements on.}
996 */
997DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
998{
999 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1000 uint64_t *puTscStop = (uint64_t *)pvUser1;
1001 uint64_t *pnsStop = (uint64_t *)pvUser2;
1002
1003 *puTscStop = ASMReadTSC();
1004 *pnsStop = RTTimeSystemNanoTS();
1005
1006 ASMSetFlags(fEFlags);
1007}
1008
1009
1010/**
1011 * Measures the TSC frequency of the system.
1012 *
1013 * The TSC frequency can vary on systems which are not reported as invariant.
1014 * On such systems the object of this function is to find out what the nominal,
1015 * maximum TSC frequency under 'normal' CPU operation.
1016 *
1017 * @returns VBox status code.
1018 * @param pDevExt Pointer to the device instance.
1019 * @param pGip Pointer to the GIP.
1020 * @param fRough Set if we're doing the rough calculation that the
1021 * TSC measuring code needs, where accuracy isn't all
1022 * that important (too high is better than to low).
1023 * When clear we try for best accuracy that we can
1024 * achieve in reasonably short time.
1025 */
1026static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, bool fRough)
1027{
1028 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1029 int cTriesLeft = fRough ? 4 : 2;
1030 while (cTriesLeft-- > 0)
1031 {
1032 RTCCUINTREG fEFlags;
1033 uint64_t nsStart;
1034 uint64_t nsStop;
1035 uint64_t uTscStart;
1036 uint64_t uTscStop;
1037 RTCPUID idCpuStart;
1038 RTCPUID idCpuStop;
1039
1040 /*
1041 * Synchronize with the host OS clock tick on systems without high
1042 * resolution time API (older Windows version for example).
1043 */
1044 nsStart = RTTimeSystemNanoTS();
1045 while (RTTimeSystemNanoTS() == nsStart)
1046 ASMNopPause();
1047
1048 /*
1049 * Read the TSC and current time, noting which CPU we're on.
1050 */
1051 fEFlags = ASMIntDisableFlags();
1052 uTscStart = ASMReadTSC();
1053 nsStart = RTTimeSystemNanoTS();
1054 idCpuStart = RTMpCpuId();
1055 ASMSetFlags(fEFlags);
1056
1057 /*
1058 * Delay for a while.
1059 */
1060 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1061 {
1062 /*
1063 * Sleep-wait since the TSC frequency is constant, it eases host load.
1064 * Shorter interval produces more variance in the frequency (esp. Windows).
1065 */
1066 uint64_t msElapsed = 0;
1067 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1068 / RT_NS_1MS;
1069 do
1070 {
1071 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1072 nsStop = RTTimeSystemNanoTS();
1073 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1074 } while (msElapsed < msDelay);
1075
1076 while (RTTimeSystemNanoTS() == nsStop)
1077 ASMNopPause();
1078 }
1079 else
1080 {
1081 /*
1082 * Busy-wait keeping the frequency up.
1083 */
1084 do
1085 {
1086 ASMNopPause();
1087 nsStop = RTTimeSystemNanoTS();
1088 } while (nsStop - nsStart < RT_NS_100MS);
1089 }
1090
1091 /*
1092 * Read the TSC and time again.
1093 */
1094 fEFlags = ASMIntDisableFlags();
1095 uTscStop = ASMReadTSC();
1096 nsStop = RTTimeSystemNanoTS();
1097 idCpuStop = RTMpCpuId();
1098 ASMSetFlags(fEFlags);
1099
1100 /*
1101 * If the CPU changes things get a bit complicated and what we
1102 * can get away with depends on the GIP mode / TSC reliablity.
1103 */
1104 if (idCpuStop != idCpuStart)
1105 {
1106 bool fDoXCall = false;
1107
1108 /*
1109 * Synchronous TSC mode: we're probably fine as it's unlikely
1110 * that we were rescheduled because of TSC throttling or power
1111 * management reasons, so just go ahead.
1112 */
1113 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1114 {
1115 /* Probably ok, maybe we should retry once?. */
1116 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1117 }
1118 /*
1119 * If we're just doing the rough measurement, do the cross call and
1120 * get on with things (we don't have deltas!).
1121 */
1122 else if (fRough)
1123 fDoXCall = true;
1124 /*
1125 * Invariant TSC mode: It doesn't matter if we have delta available
1126 * for both CPUs. That is not something we can assume at this point.
1127 *
1128 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1129 * downgraded after each delta calculation and the delta
1130 * calculations may not be complete yet.
1131 */
1132 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1133 {
1134/** @todo This section of code is never reached atm, consider dropping it later on... */
1135 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1136 {
1137 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1138 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1139 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1140 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1141 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1142 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1143 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1144 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1145 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1146 {
1147 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1148 {
1149 uTscStart -= iStartTscDelta;
1150 uTscStop -= iStopTscDelta;
1151 }
1152 }
1153 /*
1154 * Invalid CPU indexes are not caused by online/offline races, so
1155 * we have to trigger driver load failure if that happens as GIP
1156 * and IPRT assumptions are busted on this system.
1157 */
1158 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1159 {
1160 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1161 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1162 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1163 return VERR_INVALID_CPU_INDEX;
1164 }
1165 /*
1166 * No valid deltas. We retry, if we're on our last retry
1167 * we do the cross call instead just to get a result. The
1168 * frequency will be refined in a few seconds anyways.
1169 */
1170 else if (cTriesLeft > 0)
1171 continue;
1172 else
1173 fDoXCall = true;
1174 }
1175 }
1176 /*
1177 * Asynchronous TSC mode: This is bad as the reason we usually
1178 * use this mode is to deal with variable TSC frequencies and
1179 * deltas. So, we need to get the TSC from the same CPU as
1180 * started it, we also need to keep that CPU busy. So, retry
1181 * and fall back to the cross call on the last attempt.
1182 */
1183 else
1184 {
1185 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1186 if (cTriesLeft > 0)
1187 continue;
1188 fDoXCall = true;
1189 }
1190
1191 if (fDoXCall)
1192 {
1193 /*
1194 * Try read the TSC and timestamp on the start CPU.
1195 */
1196 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1197 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1198 continue;
1199 }
1200 }
1201
1202 /*
1203 * Calculate the TSC frequency and update it (shared with the refinement timer).
1204 */
1205 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1206 return VINF_SUCCESS;
1207 }
1208
1209 Assert(!fRough);
1210 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1211}
1212
1213
1214/**
1215 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1216 *
1217 * @returns Index of the CPU in the cache set.
1218 * @param pGip The GIP.
1219 * @param idCpu The CPU ID.
1220 */
1221static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1222{
1223 uint32_t i, cTries;
1224
1225 /*
1226 * ASSUMES that CPU IDs are constant.
1227 */
1228 for (i = 0; i < pGip->cCpus; i++)
1229 if (pGip->aCPUs[i].idCpu == idCpu)
1230 return i;
1231
1232 cTries = 0;
1233 do
1234 {
1235 for (i = 0; i < pGip->cCpus; i++)
1236 {
1237 bool fRc;
1238 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1239 if (fRc)
1240 return i;
1241 }
1242 } while (cTries++ < 32);
1243 AssertReleaseFailed();
1244 return i - 1;
1245}
1246
1247
1248/**
1249 * The calling CPU should be accounted as online, update GIP accordingly.
1250 *
1251 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1252 *
1253 * @param pDevExt The device extension.
1254 * @param idCpu The CPU ID.
1255 */
1256static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1257{
1258 int iCpuSet = 0;
1259 uint16_t idApic = UINT16_MAX;
1260 uint32_t i = 0;
1261 uint64_t u64NanoTS = 0;
1262 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1263
1264 AssertPtrReturnVoid(pGip);
1265 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1266 AssertRelease(idCpu == RTMpCpuId());
1267 Assert(pGip->cPossibleCpus == RTMpGetCount());
1268
1269 /*
1270 * Do this behind a spinlock with interrupts disabled as this can fire
1271 * on all CPUs simultaneously, see @bugref{6110}.
1272 */
1273 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1274
1275 /*
1276 * Update the globals.
1277 */
1278 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1279 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1280 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1281 if (iCpuSet >= 0)
1282 {
1283 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1284 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1285 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1286 }
1287
1288 /*
1289 * Update the entry.
1290 */
1291 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1292 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1293
1294 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1295
1296 idApic = ASMGetApicId();
1297 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1298 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1299 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1300
1301 /*
1302 * Update the APIC ID and CPU set index mappings.
1303 */
1304 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1305 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1306
1307 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1308 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1309
1310 /* Update the Mp online/offline counter. */
1311 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1312
1313 /* Commit it. */
1314 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1315
1316 RTSpinlockRelease(pDevExt->hGipSpinlock);
1317}
1318
1319
1320/**
1321 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1322 *
1323 * @param idCpu The CPU ID we are running on.
1324 * @param pvUser1 Opaque pointer to the device instance data.
1325 * @param pvUser2 Not used.
1326 */
1327static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1328{
1329 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1330 NOREF(pvUser2);
1331 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1332}
1333
1334
1335/**
1336 * The CPU should be accounted as offline, update the GIP accordingly.
1337 *
1338 * This is used by supdrvGipMpEvent.
1339 *
1340 * @param pDevExt The device extension.
1341 * @param idCpu The CPU ID.
1342 */
1343static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1344{
1345 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1346 int iCpuSet;
1347 unsigned i;
1348
1349 AssertPtrReturnVoid(pGip);
1350 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1351
1352 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1353 AssertReturnVoid(iCpuSet >= 0);
1354
1355 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1356 AssertReturnVoid(i < pGip->cCpus);
1357 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1358
1359 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1360 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1361
1362 /* Update the Mp online/offline counter. */
1363 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1364
1365 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1366 {
1367 /* Reset the TSC delta, we will recalculate it lazily. */
1368 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1369 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1370 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1371 }
1372
1373 /* Commit it. */
1374 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1375
1376 RTSpinlockRelease(pDevExt->hGipSpinlock);
1377}
1378
1379
1380/**
1381 * Multiprocessor event notification callback.
1382 *
1383 * This is used to make sure that the GIP master gets passed on to
1384 * another CPU. It also updates the associated CPU data.
1385 *
1386 * @param enmEvent The event.
1387 * @param idCpu The cpu it applies to.
1388 * @param pvUser Pointer to the device extension.
1389 */
1390static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1391{
1392 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1393 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1394
1395 if (pGip)
1396 {
1397 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1398 switch (enmEvent)
1399 {
1400 case RTMPEVENT_ONLINE:
1401 {
1402 RTThreadPreemptDisable(&PreemptState);
1403 if (idCpu == RTMpCpuId())
1404 {
1405 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1406 RTThreadPreemptRestore(&PreemptState);
1407 }
1408 else
1409 {
1410 RTThreadPreemptRestore(&PreemptState);
1411 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1412 }
1413
1414 /*
1415 * Recompute TSC-delta for the newly online'd CPU.
1416 */
1417 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1418 {
1419#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1420 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1421#else
1422 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1423 supdrvMeasureTscDeltaOne(pDevExt, iCpu);
1424#endif
1425 }
1426 break;
1427 }
1428
1429 case RTMPEVENT_OFFLINE:
1430 supdrvGipMpEventOffline(pDevExt, idCpu);
1431 break;
1432 }
1433 }
1434
1435 /*
1436 * Make sure there is a master GIP.
1437 */
1438 if (enmEvent == RTMPEVENT_OFFLINE)
1439 {
1440 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1441 if (idGipMaster == idCpu)
1442 {
1443 /*
1444 * The GIP master is going offline, find a new one.
1445 */
1446 bool fIgnored;
1447 unsigned i;
1448 RTCPUID idNewGipMaster = NIL_RTCPUID;
1449 RTCPUSET OnlineCpus;
1450 RTMpGetOnlineSet(&OnlineCpus);
1451
1452 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1453 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1454 {
1455 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1456 if (idCurCpu != idGipMaster)
1457 {
1458 idNewGipMaster = idCurCpu;
1459 break;
1460 }
1461 }
1462
1463 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1464 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1465 NOREF(fIgnored);
1466 }
1467 }
1468}
1469
1470
1471/**
1472 * On CPU initialization callback for RTMpOnAll.
1473 *
1474 * @param idCpu The CPU ID.
1475 * @param pvUser1 The device extension.
1476 * @param pvUser2 The GIP.
1477 */
1478static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1479{
1480 /* This is good enough, even though it will update some of the globals a
1481 bit to much. */
1482 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1483}
1484
1485
1486/**
1487 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1488 *
1489 * @param idCpu Ignored.
1490 * @param pvUser1 Where to put the TSC.
1491 * @param pvUser2 Ignored.
1492 */
1493static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1494{
1495 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1496 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1497}
1498
1499
1500/**
1501 * Determine if Async GIP mode is required because of TSC drift.
1502 *
1503 * When using the default/normal timer code it is essential that the time stamp counter
1504 * (TSC) runs never backwards, that is, a read operation to the counter should return
1505 * a bigger value than any previous read operation. This is guaranteed by the latest
1506 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1507 * case we have to choose the asynchronous timer mode.
1508 *
1509 * @param poffMin Pointer to the determined difference between different
1510 * cores (optional, can be NULL).
1511 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1512 */
1513static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1514{
1515 /*
1516 * Just iterate all the cpus 8 times and make sure that the TSC is
1517 * ever increasing. We don't bother taking TSC rollover into account.
1518 */
1519 int iEndCpu = RTMpGetArraySize();
1520 int iCpu;
1521 int cLoops = 8;
1522 bool fAsync = false;
1523 int rc = VINF_SUCCESS;
1524 uint64_t offMax = 0;
1525 uint64_t offMin = ~(uint64_t)0;
1526 uint64_t PrevTsc = ASMReadTSC();
1527
1528 while (cLoops-- > 0)
1529 {
1530 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1531 {
1532 uint64_t CurTsc;
1533 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1534 &CurTsc, (void *)(uintptr_t)iCpu);
1535 if (RT_SUCCESS(rc))
1536 {
1537 if (CurTsc <= PrevTsc)
1538 {
1539 fAsync = true;
1540 offMin = offMax = PrevTsc - CurTsc;
1541 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1542 iCpu, cLoops, CurTsc, PrevTsc));
1543 break;
1544 }
1545
1546 /* Gather statistics (except the first time). */
1547 if (iCpu != 0 || cLoops != 7)
1548 {
1549 uint64_t off = CurTsc - PrevTsc;
1550 if (off < offMin)
1551 offMin = off;
1552 if (off > offMax)
1553 offMax = off;
1554 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1555 }
1556
1557 /* Next */
1558 PrevTsc = CurTsc;
1559 }
1560 else if (rc == VERR_NOT_SUPPORTED)
1561 break;
1562 else
1563 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1564 }
1565
1566 /* broke out of the loop. */
1567 if (iCpu < iEndCpu)
1568 break;
1569 }
1570
1571 if (poffMin)
1572 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1573 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1574 fAsync, iEndCpu, rc, offMin, offMax));
1575#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1576 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1577#endif
1578 return fAsync;
1579}
1580
1581
1582/**
1583 * supdrvGipInit() worker that determines the GIP TSC mode.
1584 *
1585 * @returns The most suitable TSC mode.
1586 * @param pDevExt Pointer to the device instance data.
1587 */
1588static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1589{
1590 uint64_t u64DiffCoresIgnored;
1591 uint32_t uEAX, uEBX, uECX, uEDX;
1592
1593 /*
1594 * Establish whether the CPU advertises TSC as invariant, we need that in
1595 * a couple of places below.
1596 */
1597 bool fInvariantTsc = false;
1598 if (ASMHasCpuId())
1599 {
1600 uEAX = ASMCpuId_EAX(0x80000000);
1601 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1602 {
1603 uEDX = ASMCpuId_EDX(0x80000007);
1604 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1605 fInvariantTsc = true;
1606 }
1607 }
1608
1609 /*
1610 * On single CPU systems, we don't need to consider ASYNC mode.
1611 */
1612 if (RTMpGetCount() <= 1)
1613 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1614
1615 /*
1616 * Allow the user and/or OS specific bits to force async mode.
1617 */
1618 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1619 return SUPGIPMODE_ASYNC_TSC;
1620
1621 /*
1622 * Use invariant mode if the CPU says TSC is invariant.
1623 */
1624 if (fInvariantTsc)
1625 return SUPGIPMODE_INVARIANT_TSC;
1626
1627 /*
1628 * TSC is not invariant and we're on SMP, this presents two problems:
1629 *
1630 * (1) There might be a skew between the CPU, so that cpu0
1631 * returns a TSC that is slightly different from cpu1.
1632 * This screw may be due to (2), bad TSC initialization
1633 * or slightly different TSC rates.
1634 *
1635 * (2) Power management (and other things) may cause the TSC
1636 * to run at a non-constant speed, and cause the speed
1637 * to be different on the cpus. This will result in (1).
1638 *
1639 * If any of the above is detected, we will have to use ASYNC mode.
1640 */
1641 /* (1). Try check for current differences between the cpus. */
1642 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1643 return SUPGIPMODE_ASYNC_TSC;
1644
1645 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1646 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1647 if ( ASMIsValidStdRange(uEAX)
1648 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
1649 {
1650 /* Check for APM support. */
1651 uEAX = ASMCpuId_EAX(0x80000000);
1652 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1653 {
1654 uEDX = ASMCpuId_EDX(0x80000007);
1655 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1656 return SUPGIPMODE_ASYNC_TSC;
1657 }
1658 }
1659
1660 return SUPGIPMODE_SYNC_TSC;
1661}
1662
1663
1664/**
1665 * Initializes per-CPU GIP information.
1666 *
1667 * @param pGip Pointer to the GIP.
1668 * @param pCpu Pointer to which GIP CPU to initalize.
1669 * @param u64NanoTS The current nanosecond timestamp.
1670 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1671 */
1672static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1673{
1674 pCpu->u32TransactionId = 2;
1675 pCpu->u64NanoTS = u64NanoTS;
1676 pCpu->u64TSC = ASMReadTSC();
1677 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1678 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1679
1680 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1681 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
1682 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1683 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1684
1685 /*
1686 * The first time we're called, we don't have a CPU frequency handy,
1687 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1688 * called again and at that point we have a more plausible CPU frequency
1689 * value handy. The frequency history will also be adjusted again on
1690 * the 2nd timer callout (maybe we can skip that now?).
1691 */
1692 if (!uCpuHz)
1693 {
1694 pCpu->u64CpuHz = _4G - 1;
1695 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1696 }
1697 else
1698 {
1699 pCpu->u64CpuHz = uCpuHz;
1700 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1701 }
1702 pCpu->au32TSCHistory[0]
1703 = pCpu->au32TSCHistory[1]
1704 = pCpu->au32TSCHistory[2]
1705 = pCpu->au32TSCHistory[3]
1706 = pCpu->au32TSCHistory[4]
1707 = pCpu->au32TSCHistory[5]
1708 = pCpu->au32TSCHistory[6]
1709 = pCpu->au32TSCHistory[7]
1710 = pCpu->u32UpdateIntervalTSC;
1711}
1712
1713
1714/**
1715 * Initializes the GIP data.
1716 *
1717 * @param pDevExt Pointer to the device instance data.
1718 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1719 * @param HCPhys The physical address of the GIP.
1720 * @param u64NanoTS The current nanosecond timestamp.
1721 * @param uUpdateHz The update frequency.
1722 * @param uUpdateIntervalNS The update interval in nanoseconds.
1723 * @param cCpus The CPU count.
1724 */
1725static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1726 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
1727{
1728 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
1729 unsigned i;
1730#ifdef DEBUG_DARWIN_GIP
1731 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1732#else
1733 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1734#endif
1735
1736 /*
1737 * Initialize the structure.
1738 */
1739 memset(pGip, 0, cbGip);
1740
1741 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1742 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1743 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1744 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1745 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1746 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1747 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1748 else
1749 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1750 pGip->cCpus = (uint16_t)cCpus;
1751 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1752 pGip->u32UpdateHz = uUpdateHz;
1753 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1754 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1755 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1756 RTCpuSetEmpty(&pGip->PresentCpuSet);
1757 RTMpGetSet(&pGip->PossibleCpuSet);
1758 pGip->cOnlineCpus = RTMpGetOnlineCount();
1759 pGip->cPresentCpus = RTMpGetPresentCount();
1760 pGip->cPossibleCpus = RTMpGetCount();
1761 pGip->idCpuMax = RTMpGetMaxCpuId();
1762 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
1763 pGip->aiCpuFromApicId[i] = UINT16_MAX;
1764 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
1765 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
1766 for (i = 0; i < cCpus; i++)
1767 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
1768
1769 /*
1770 * Link it to the device extension.
1771 */
1772 pDevExt->pGip = pGip;
1773 pDevExt->HCPhysGip = HCPhys;
1774 pDevExt->cGipUsers = 0;
1775}
1776
1777
1778/**
1779 * Creates the GIP.
1780 *
1781 * @returns VBox status code.
1782 * @param pDevExt Instance data. GIP stuff may be updated.
1783 */
1784int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
1785{
1786 PSUPGLOBALINFOPAGE pGip;
1787 RTHCPHYS HCPhysGip;
1788 uint32_t u32SystemResolution;
1789 uint32_t u32Interval;
1790 uint32_t u32MinInterval;
1791 uint32_t uMod;
1792 unsigned cCpus;
1793 int rc;
1794
1795 LogFlow(("supdrvGipCreate:\n"));
1796
1797 /*
1798 * Assert order.
1799 */
1800 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
1801 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
1802 Assert(!pDevExt->pGipTimer);
1803#ifdef SUPDRV_USE_MUTEX_FOR_GIP
1804 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
1805 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
1806#else
1807 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
1808 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
1809#endif
1810
1811 /*
1812 * Check the CPU count.
1813 */
1814 cCpus = RTMpGetArraySize();
1815 if ( cCpus > RTCPUSET_MAX_CPUS
1816 || cCpus > 256 /* ApicId is used for the mappings */)
1817 {
1818 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
1819 return VERR_TOO_MANY_CPUS;
1820 }
1821
1822 /*
1823 * Allocate a contiguous set of pages with a default kernel mapping.
1824 */
1825 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
1826 if (RT_FAILURE(rc))
1827 {
1828 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
1829 return rc;
1830 }
1831 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
1832 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
1833
1834 /*
1835 * Find a reasonable update interval and initialize the structure.
1836 */
1837 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
1838 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
1839 * See @bugref{6710}. */
1840 u32MinInterval = RT_NS_10MS;
1841 u32SystemResolution = RTTimerGetSystemGranularity();
1842 u32Interval = u32MinInterval;
1843 uMod = u32MinInterval % u32SystemResolution;
1844 if (uMod)
1845 u32Interval += u32SystemResolution - uMod;
1846
1847 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
1848
1849 /*
1850 * Important sanity check...
1851 */
1852 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
1853 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
1854 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
1855 {
1856 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
1857 return VERR_INTERNAL_ERROR_2;
1858 }
1859
1860 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
1861 AssertReturn( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
1862 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED, VERR_INTERNAL_ERROR_3);
1863
1864 /*
1865 * Do the TSC frequency measurements.
1866 *
1867 * If we're in invariant TSC mode, just to a quick preliminary measurement
1868 * that the TSC-delta measurement code can use to yield cross calls.
1869 *
1870 * If we're in any of the other two modes, neither which require MP init,
1871 * notifications or deltas for the job, do the full measurement now so
1872 * that supdrvGipInitOnCpu() can populate the TSC interval and history
1873 * array with more reasonable values.
1874 */
1875 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1876 {
1877 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, true /*fRough*/); /* cannot fail */
1878 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt, pGip);
1879 }
1880 else
1881 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, false /*fRough*/);
1882 if (RT_SUCCESS(rc))
1883 {
1884 /*
1885 * Start TSC-delta measurement thread before we start getting MP
1886 * events that will try kick it into action (includes the
1887 * RTMpOnAll/supdrvGipInitOnCpu call below).
1888 */
1889 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
1890 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
1891#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1892 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1893 rc = supdrvTscDeltaThreadInit(pDevExt);
1894#endif
1895 if (RT_SUCCESS(rc))
1896 {
1897 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
1898 if (RT_SUCCESS(rc))
1899 {
1900 /*
1901 * Do GIP initialization on all online CPUs. Wake up the
1902 * TSC-delta thread afterwards.
1903 */
1904 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
1905 if (RT_SUCCESS(rc))
1906 {
1907#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1908 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1909#else
1910 uint16_t iCpu;
1911 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1912 {
1913 /*
1914 * Measure the TSC deltas now that we have MP notifications.
1915 */
1916 int cTries = 5;
1917 do
1918 {
1919 rc = supdrvMeasureInitialTscDeltas(pDevExt);
1920 if ( rc != VERR_TRY_AGAIN
1921 && rc != VERR_CPU_OFFLINE)
1922 break;
1923 } while (--cTries > 0);
1924 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1925 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
1926 }
1927 else
1928 {
1929 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1930 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
1931 }
1932 if (RT_SUCCESS(rc))
1933#endif
1934 {
1935 /*
1936 * Create the timer.
1937 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
1938 */
1939 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1940 {
1941 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
1942 supdrvGipAsyncTimer, pDevExt);
1943 if (rc == VERR_NOT_SUPPORTED)
1944 {
1945 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
1946 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
1947 }
1948 }
1949 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1950 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
1951 supdrvGipSyncAndInvariantTimer, pDevExt);
1952 if (RT_SUCCESS(rc))
1953 {
1954 /*
1955 * We're good.
1956 */
1957 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
1958 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1959
1960 g_pSUPGlobalInfoPage = pGip;
1961 return VINF_SUCCESS;
1962 }
1963
1964 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
1965 Assert(!pDevExt->pGipTimer);
1966 }
1967 }
1968 else
1969 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
1970 }
1971 else
1972 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
1973 }
1974 else
1975 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
1976 }
1977 else
1978 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
1979
1980 /* Releases timer frequency increase too. */
1981 supdrvGipDestroy(pDevExt);
1982 return rc;
1983}
1984
1985
1986/**
1987 * Invalidates the GIP data upon termination.
1988 *
1989 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1990 */
1991static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
1992{
1993 unsigned i;
1994 pGip->u32Magic = 0;
1995 for (i = 0; i < pGip->cCpus; i++)
1996 {
1997 pGip->aCPUs[i].u64NanoTS = 0;
1998 pGip->aCPUs[i].u64TSC = 0;
1999 pGip->aCPUs[i].iTSCHistoryHead = 0;
2000 pGip->aCPUs[i].u64TSCSample = 0;
2001 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2002 }
2003}
2004
2005
2006/**
2007 * Terminates the GIP.
2008 *
2009 * @param pDevExt Instance data. GIP stuff may be updated.
2010 */
2011void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2012{
2013 int rc;
2014#ifdef DEBUG_DARWIN_GIP
2015 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2016 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2017 pDevExt->pGipTimer, pDevExt->GipMemObj));
2018#endif
2019
2020 /*
2021 * Stop receiving MP notifications before tearing anything else down.
2022 */
2023 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2024
2025#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2026 /*
2027 * Terminate the TSC-delta measurement thread and resources.
2028 */
2029 supdrvTscDeltaTerm(pDevExt);
2030#endif
2031
2032 /*
2033 * Destroy the TSC-refinement timer.
2034 */
2035 if (pDevExt->pInvarTscRefineTimer)
2036 {
2037 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2038 pDevExt->pInvarTscRefineTimer = NULL;
2039 }
2040
2041 /*
2042 * Invalid the GIP data.
2043 */
2044 if (pDevExt->pGip)
2045 {
2046 supdrvGipTerm(pDevExt->pGip);
2047 pDevExt->pGip = NULL;
2048 }
2049 g_pSUPGlobalInfoPage = NULL;
2050
2051 /*
2052 * Destroy the timer and free the GIP memory object.
2053 */
2054 if (pDevExt->pGipTimer)
2055 {
2056 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2057 pDevExt->pGipTimer = NULL;
2058 }
2059
2060 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2061 {
2062 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2063 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2064 }
2065
2066 /*
2067 * Finally, make sure we've release the system timer resolution request
2068 * if one actually succeeded and is still pending.
2069 */
2070 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2071}
2072
2073
2074
2075
2076/*
2077 *
2078 *
2079 * GIP Update Timer Related Code
2080 * GIP Update Timer Related Code
2081 * GIP Update Timer Related Code
2082 *
2083 *
2084 */
2085
2086
2087/**
2088 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2089 * updates all the per cpu data except the transaction id.
2090 *
2091 * @param pDevExt The device extension.
2092 * @param pGipCpu Pointer to the per cpu data.
2093 * @param u64NanoTS The current time stamp.
2094 * @param u64TSC The current TSC.
2095 * @param iTick The current timer tick.
2096 *
2097 * @remarks Can be called with interrupts disabled!
2098 */
2099static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2100{
2101 uint64_t u64TSCDelta;
2102 bool fUpdateCpuHz;
2103 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2104 AssertPtrReturnVoid(pGip);
2105
2106 /* Delta between this and the previous update. */
2107 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2108
2109 /*
2110 * Update the NanoTS.
2111 */
2112 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2113
2114 /*
2115 * Calc TSC delta.
2116 */
2117 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2118 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2119
2120 /*
2121 * Determine if we need to update the CPU (TSC) frequency calculation.
2122 *
2123 * We don't need to keep realculating the frequency when it's invariant,
2124 * unless the special tstGIP-2 testing mode is enabled.
2125 */
2126 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2127 if (!(pGip->fFlags & SUPGIP_FLAGS_TESTING))
2128 { /* likely*/ }
2129 else
2130 {
2131 uint32_t fGipFlags = pGip->fFlags;
2132 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2133 {
2134 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2135 {
2136 /* Cache the TSC frequency before forcing updates due to test mode. */
2137 if (!fUpdateCpuHz)
2138 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2139 ASMAtomicAndU32(&pGip->fFlags, ~SUPGIP_FLAGS_TESTING_START);
2140 }
2141 fUpdateCpuHz = true;
2142 }
2143 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2144 {
2145 /* Restore the cached TSC frequency if any. */
2146 if (!fUpdateCpuHz)
2147 {
2148 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2149 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2150 }
2151 ASMAtomicAndU32(&pGip->fFlags, ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING));
2152 }
2153 }
2154
2155 /*
2156 * Calculate the CPU (TSC) frequency if necessary.
2157 */
2158 if (fUpdateCpuHz)
2159 {
2160 uint64_t u64CpuHz;
2161 uint32_t u32UpdateIntervalTSC;
2162 uint32_t u32UpdateIntervalTSCSlack;
2163 uint32_t u32TransactionId;
2164 unsigned iTSCHistoryHead;
2165
2166 if (u64TSCDelta >> 32)
2167 {
2168 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2169 pGipCpu->cErrors++;
2170 }
2171
2172 /*
2173 * On the 2nd and 3rd callout, reset the history with the current TSC
2174 * interval since the values entered by supdrvGipInit are totally off.
2175 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2176 * better, while the 3rd should be most reliable.
2177 */
2178 /** @todo Could we drop this now that we initializes the history
2179 * with nominal TSC frequency values? */
2180 u32TransactionId = pGipCpu->u32TransactionId;
2181 if (RT_UNLIKELY( ( u32TransactionId == 5
2182 || u32TransactionId == 7)
2183 && ( iTick == 2
2184 || iTick == 3) ))
2185 {
2186 unsigned i;
2187 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2188 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2189 }
2190
2191 /*
2192 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2193 * Wait until we have at least one full history since the above history reset. The
2194 * assumption is that the majority of the previous history values will be tolerable.
2195 * See @bugref{6710#c67}.
2196 */
2197 /** @todo Could we drop the fudging there now that we initializes the history
2198 * with nominal TSC frequency values? */
2199 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2200 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2201 {
2202 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2203 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2204 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2205 {
2206 uint32_t u32;
2207 u32 = pGipCpu->au32TSCHistory[0];
2208 u32 += pGipCpu->au32TSCHistory[1];
2209 u32 += pGipCpu->au32TSCHistory[2];
2210 u32 += pGipCpu->au32TSCHistory[3];
2211 u32 >>= 2;
2212 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2213 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2214 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2215 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2216 u64TSCDelta >>= 2;
2217 u64TSCDelta += u32;
2218 u64TSCDelta >>= 1;
2219 }
2220 }
2221
2222 /*
2223 * TSC History.
2224 */
2225 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2226 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2227 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2228 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2229
2230 /*
2231 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2232 *
2233 * On Windows, we have an occasional (but recurring) sour value that messed up
2234 * the history but taking only 1 interval reduces the precision overall.
2235 */
2236 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2237 || pGip->u32UpdateHz >= 1000)
2238 {
2239 uint32_t u32;
2240 u32 = pGipCpu->au32TSCHistory[0];
2241 u32 += pGipCpu->au32TSCHistory[1];
2242 u32 += pGipCpu->au32TSCHistory[2];
2243 u32 += pGipCpu->au32TSCHistory[3];
2244 u32 >>= 2;
2245 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2246 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2247 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2248 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2249 u32UpdateIntervalTSC >>= 2;
2250 u32UpdateIntervalTSC += u32;
2251 u32UpdateIntervalTSC >>= 1;
2252
2253 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2254 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2255 }
2256 else if (pGip->u32UpdateHz >= 90)
2257 {
2258 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2259 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2260 u32UpdateIntervalTSC >>= 1;
2261
2262 /* value chosen on a 2GHz thinkpad running windows */
2263 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2264 }
2265 else
2266 {
2267 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2268
2269 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2270 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2271 }
2272 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2273
2274 /*
2275 * CpuHz.
2276 */
2277 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2278 u64CpuHz /= pGip->u32UpdateIntervalNS;
2279 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2280 }
2281}
2282
2283
2284/**
2285 * Updates the GIP.
2286 *
2287 * @param pDevExt The device extension.
2288 * @param u64NanoTS The current nanosecond timestamp.
2289 * @param u64TSC The current TSC timestamp.
2290 * @param idCpu The CPU ID.
2291 * @param iTick The current timer tick.
2292 *
2293 * @remarks Can be called with interrupts disabled!
2294 */
2295static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2296{
2297 /*
2298 * Determine the relevant CPU data.
2299 */
2300 PSUPGIPCPU pGipCpu;
2301 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2302 AssertPtrReturnVoid(pGip);
2303
2304 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2305 pGipCpu = &pGip->aCPUs[0];
2306 else
2307 {
2308 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
2309 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
2310 return;
2311 pGipCpu = &pGip->aCPUs[iCpu];
2312 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
2313 return;
2314 }
2315
2316 /*
2317 * Start update transaction.
2318 */
2319 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2320 {
2321 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2322 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2323 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2324 pGipCpu->cErrors++;
2325 return;
2326 }
2327
2328 /*
2329 * Recalc the update frequency every 0x800th time.
2330 */
2331 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariant hosts. */
2332 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2333 {
2334 if (pGip->u64NanoTSLastUpdateHz)
2335 {
2336#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2337 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2338 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2339 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2340 {
2341 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2342 * calculation on non-invariant hosts if it changes the history decision
2343 * taken in supdrvGipDoUpdateCpu(). */
2344 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2345 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2346 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2347 }
2348#endif
2349 }
2350 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2351 }
2352
2353 /*
2354 * Update the data.
2355 */
2356 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2357
2358 /*
2359 * Complete transaction.
2360 */
2361 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2362}
2363
2364
2365/**
2366 * Updates the per cpu GIP data for the calling cpu.
2367 *
2368 * @param pDevExt The device extension.
2369 * @param u64NanoTS The current nanosecond timestamp.
2370 * @param u64TSC The current TSC timesaver.
2371 * @param idCpu The CPU ID.
2372 * @param idApic The APIC id for the CPU index.
2373 * @param iTick The current timer tick.
2374 *
2375 * @remarks Can be called with interrupts disabled!
2376 */
2377static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2378 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2379{
2380 uint32_t iCpu;
2381 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2382
2383 /*
2384 * Avoid a potential race when a CPU online notification doesn't fire on
2385 * the onlined CPU but the tick creeps in before the event notification is
2386 * run.
2387 */
2388 if (RT_UNLIKELY(iTick == 1))
2389 {
2390 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2391 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2392 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2393 }
2394
2395 iCpu = pGip->aiCpuFromApicId[idApic];
2396 if (RT_LIKELY(iCpu < pGip->cCpus))
2397 {
2398 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2399 if (pGipCpu->idCpu == idCpu)
2400 {
2401 /*
2402 * Start update transaction.
2403 */
2404 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2405 {
2406 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2407 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2408 pGipCpu->cErrors++;
2409 return;
2410 }
2411
2412 /*
2413 * Update the data.
2414 */
2415 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2416
2417 /*
2418 * Complete transaction.
2419 */
2420 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2421 }
2422 }
2423}
2424
2425
2426/**
2427 * Timer callback function for the sync and invariant GIP modes.
2428 *
2429 * @param pTimer The timer.
2430 * @param pvUser Opaque pointer to the device extension.
2431 * @param iTick The timer tick.
2432 */
2433static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2434{
2435 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2436 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2437 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2438 uint64_t u64TSC = ASMReadTSC();
2439 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2440
2441 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2442 {
2443 /*
2444 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2445 * missing timer ticks is not an option for GIP because the GIP users
2446 * will end up incrementing the time in 1ns per time getter call until
2447 * there is a complete timer update. So, if the delta has yet to be
2448 * calculated, we just pretend it is zero for now (the GIP users
2449 * probably won't have it for a wee while either and will do the same).
2450 *
2451 * We could maybe on some platforms try cross calling a CPU with a
2452 * working delta here, but it's not worth the hassle since the
2453 * likelihood of this happening is really low. On Windows, Linux, and
2454 * Solaris timers fire on the CPU they were registered/started on.
2455 * Darwin timers doesn't necessarily (they are high priority threads).
2456 */
2457 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2458 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2459 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2460 Assert(!ASMIntAreEnabled());
2461 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2462 {
2463 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2464 if (iTscDelta != INT64_MAX)
2465 u64TSC -= iTscDelta;
2466 }
2467 }
2468
2469 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2470
2471 ASMSetFlags(fEFlags);
2472}
2473
2474
2475/**
2476 * Timer callback function for async GIP mode.
2477 * @param pTimer The timer.
2478 * @param pvUser Opaque pointer to the device extension.
2479 * @param iTick The timer tick.
2480 */
2481static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2482{
2483 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2484 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2485 RTCPUID idCpu = RTMpCpuId();
2486 uint64_t u64TSC = ASMReadTSC();
2487 uint64_t NanoTS = RTTimeSystemNanoTS();
2488
2489 /** @todo reset the transaction number and whatnot when iTick == 1. */
2490 if (pDevExt->idGipMaster == idCpu)
2491 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2492 else
2493 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
2494
2495 ASMSetFlags(fEFlags);
2496}
2497
2498
2499
2500
2501/*
2502 *
2503 *
2504 * TSC Delta Measurements And Related Code
2505 * TSC Delta Measurements And Related Code
2506 * TSC Delta Measurements And Related Code
2507 *
2508 *
2509 */
2510
2511
2512/*
2513 * Select TSC delta measurement algorithm.
2514 */
2515#if 0
2516# define GIP_TSC_DELTA_METHOD_1
2517#else
2518# define GIP_TSC_DELTA_METHOD_2
2519#endif
2520
2521/** For padding variables to keep them away from other cache lines. Better too
2522 * large than too small!
2523 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2524 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2525 * III had 32 bytes cache lines. */
2526#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2527
2528
2529/**
2530 * TSC delta measurement algorithm \#2 result entry.
2531 */
2532typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2533{
2534 uint32_t iSeqMine;
2535 uint32_t iSeqOther;
2536 uint64_t uTsc;
2537} SUPDRVTSCDELTAMETHOD2ENTRY;
2538
2539/**
2540 * TSC delta measurement algorithm \#2 Data.
2541 */
2542typedef struct SUPDRVTSCDELTAMETHOD2
2543{
2544 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2545 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2546 /** The current sequence number of this worker. */
2547 uint32_t volatile iCurSeqNo;
2548 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2549 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2550 /** Result table. */
2551 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2552} SUPDRVTSCDELTAMETHOD2;
2553/** Pointer to the data for TSC delta measurement algorithm \#2 .*/
2554typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2555
2556
2557/**
2558 * The TSC delta synchronization struct, version 2.
2559 *
2560 * The synchronization variable is completely isolated in its own cache line
2561 * (provided our max cache line size estimate is correct).
2562 */
2563typedef struct SUPTSCDELTASYNC2
2564{
2565 /** Padding to make sure the uVar1 is in its own cache line. */
2566 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2567
2568 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2569 volatile uint32_t uSyncVar;
2570 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2571 volatile uint32_t uSyncSeq;
2572
2573 /** Padding to make sure the uVar1 is in its own cache line. */
2574 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2575
2576 /** Start RDTSC value. Put here mainly to save stack space. */
2577 uint64_t uTscStart;
2578 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2579 uint64_t cMaxTscTicks;
2580} SUPTSCDELTASYNC2;
2581AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2582typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2583
2584/** Prestart wait. */
2585#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2586/** Prestart aborted. */
2587#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2588/** Ready (on your mark). */
2589#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2590/** Steady (get set). */
2591#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2592/** Go! */
2593#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2594/** Used by the verification test. */
2595#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2596
2597/** We reached the time limit. */
2598#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2599/** The other party won't touch the sync struct ever again. */
2600#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2601
2602
2603/**
2604 * Argument package/state passed by supdrvMeasureTscDeltaOne() to the RTMpOn
2605 * callback worker.
2606 * @todo add
2607 */
2608typedef struct SUPDRVGIPTSCDELTARGS
2609{
2610 /** The device extension. */
2611 PSUPDRVDEVEXT pDevExt;
2612 /** Pointer to the GIP CPU array entry for the worker. */
2613 PSUPGIPCPU pWorker;
2614 /** Pointer to the GIP CPU array entry for the master. */
2615 PSUPGIPCPU pMaster;
2616 /** The maximum number of ticks to spend in supdrvMeasureTscDeltaCallback.
2617 * (This is what we need a rough TSC frequency for.) */
2618 uint64_t cMaxTscTicks;
2619 /** Used to abort synchronization setup. */
2620 bool volatile fAbortSetup;
2621
2622 /** Padding to make sure the master variables live in its own cache lines. */
2623 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2624
2625 /** @name Master
2626 * @{ */
2627 /** The time the master spent in the MP worker. */
2628 uint64_t cElapsedMasterTscTicks;
2629 /** The iTry value when stopped at. */
2630 uint32_t iTry;
2631 /** Set if the run timed out. */
2632 bool volatile fTimedOut;
2633 /** Pointer to the master's synchronization struct (on stack). */
2634 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2635 /** Master data union. */
2636 union
2637 {
2638 /** Data (master) for delta verification. */
2639 struct
2640 {
2641 /** Verification test TSC values for the master. */
2642 uint64_t volatile auTscs[32];
2643 } Verify;
2644 /** Data (master) for measurement method \#2. */
2645 struct
2646 {
2647 /** Data and sequence number. */
2648 SUPDRVTSCDELTAMETHOD2 Data;
2649 /** The lag setting for the next run. */
2650 bool fLag;
2651 /** Number of hits. */
2652 uint32_t cHits;
2653 } M2;
2654 } uMaster;
2655 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2656 * VERR_TRY_AGAIN on timeout. */
2657 int32_t rcVerify;
2658#ifdef TSCDELTA_VERIFY_WITH_STATS
2659 /** The maximum difference between TSC read during delta verification. */
2660 int64_t cMaxVerifyTscTicks;
2661 /** The minimum difference between two TSC reads during verification. */
2662 int64_t cMinVerifyTscTicks;
2663 /** The bad TSC diff, worker relative to master (= worker - master).
2664 * Negative value means the worker is behind the master. */
2665 int64_t iVerifyBadTscDiff;
2666#endif
2667 /** @} */
2668
2669 /** Padding to make sure the worker variables live is in its own cache line. */
2670 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2671
2672 /** @name Proletarian
2673 * @{ */
2674 /** Pointer to the worker's synchronization struct (on stack). */
2675 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2676 /** The time the worker spent in the MP worker. */
2677 uint64_t cElapsedWorkerTscTicks;
2678 /** Worker data union. */
2679 union
2680 {
2681 /** Data (worker) for delta verification. */
2682 struct
2683 {
2684 /** Verification test TSC values for the worker. */
2685 uint64_t volatile auTscs[32];
2686 } Verify;
2687 /** Data (worker) for measurement method \#2. */
2688 struct
2689 {
2690 /** Data and sequence number. */
2691 SUPDRVTSCDELTAMETHOD2 Data;
2692 /** The lag setting for the next run (set by master). */
2693 bool fLag;
2694 } M2;
2695 } uWorker;
2696 /** @} */
2697
2698 /** Padding to make sure the above is in its own cache line. */
2699 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2700} SUPDRVGIPTSCDELTARGS;
2701typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2702
2703
2704/** @name Macros that implements the basic synchronization steps common to
2705 * the algorithms.
2706 *
2707 * Must be used from loop as the timeouts are implemented via 'break' statements
2708 * at the moment.
2709 *
2710 * @{
2711 */
2712#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2713# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2714# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2715# define TSCDELTA_DBG_CHECK_LOOP() \
2716 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2717#else
2718# define TSCDELTA_DBG_VARS() ((void)0)
2719# define TSCDELTA_DBG_START_LOOP() ((void)0)
2720# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2721#endif
2722#if 0
2723# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2724#else
2725# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2726#endif
2727#if 0
2728# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
2729#else
2730# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
2731#endif
2732#if 0
2733# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
2734#else
2735# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
2736#endif
2737
2738
2739static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2740 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
2741{
2742 uint32_t iMySeq = fIsMaster ? 0 : 256;
2743 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
2744 uint32_t u32Tmp;
2745 uint32_t iSync2Loops = 0;
2746 RTCCUINTREG fEFlags;
2747 TSCDELTA_DBG_VARS();
2748
2749 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
2750
2751 /*
2752 * The master tells the worker to get on it's mark.
2753 */
2754 if (fIsMaster)
2755 {
2756 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2757 { /* likely*/ }
2758 else
2759 {
2760 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2761 return false;
2762 }
2763 }
2764
2765 /*
2766 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
2767 */
2768 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
2769 for (;;)
2770 {
2771 fEFlags = ASMIntDisableFlags();
2772 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2773 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
2774 break;
2775 ASMSetFlags(fEFlags);
2776 ASMNopPause();
2777
2778 /* Abort? */
2779 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
2780 {
2781 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2782 return false;
2783 }
2784
2785 /* Check for timeouts every so often (not every loop in case RDTSC is
2786 trapping or something). Must check the first time around. */
2787#if 0 /* For debugging the timeout paths. */
2788 static uint32_t volatile xxx;
2789#endif
2790 if ( ( (iSync2Loops & 0x3ff) == 0
2791 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
2792#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
2793 || (!fIsMaster && (++xxx & 0xf) == 0)
2794#endif
2795 )
2796 {
2797 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
2798 ignore the timeout if we've got the go ahead already (simpler). */
2799 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
2800 {
2801 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
2802 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
2803 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
2804 return false;
2805 }
2806 }
2807 iSync2Loops++;
2808 }
2809
2810 /*
2811 * Interrupts are now disabled and will remain disabled until we do
2812 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
2813 */
2814 *pfEFlags = fEFlags;
2815
2816 /*
2817 * The worker tells the master that it is on its mark and that the master
2818 * need to get into position as well.
2819 */
2820 if (!fIsMaster)
2821 {
2822 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2823 { /* likely */ }
2824 else
2825 {
2826 ASMSetFlags(fEFlags);
2827 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2828 return false;
2829 }
2830 }
2831
2832 /*
2833 * The master sends the 'go' to the worker and wait for ACK.
2834 */
2835 if (fIsMaster)
2836 {
2837 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2838 { /* likely */ }
2839 else
2840 {
2841 ASMSetFlags(fEFlags);
2842 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2843 return false;
2844 }
2845 }
2846
2847 /*
2848 * Wait for the 'go' signal (ack in the master case).
2849 */
2850 TSCDELTA_DBG_START_LOOP();
2851 for (;;)
2852 {
2853 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2854 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
2855 break;
2856 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
2857 { /* likely */ }
2858 else
2859 {
2860 ASMSetFlags(fEFlags);
2861 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2862 return false;
2863 }
2864
2865 TSCDELTA_DBG_CHECK_LOOP();
2866 ASMNopPause();
2867 }
2868
2869 /*
2870 * The worker acks the 'go' (shouldn't fail).
2871 */
2872 if (!fIsMaster)
2873 {
2874 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2875 { /* likely */ }
2876 else
2877 {
2878 ASMSetFlags(fEFlags);
2879 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2880 return false;
2881 }
2882 }
2883
2884 /*
2885 * Try enter mostly lockstep execution with it.
2886 */
2887 for (;;)
2888 {
2889 uint32_t iOtherSeq1, iOtherSeq2;
2890 ASMCompilerBarrier();
2891 ASMSerializeInstruction();
2892
2893 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
2894 ASMNopPause();
2895 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
2896 ASMNopPause();
2897 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
2898
2899 ASMCompilerBarrier();
2900 if (iOtherSeq1 == iOtherSeq2)
2901 return true;
2902
2903 /* Did the other guy give up? Should we give up? */
2904 if ( iOtherSeq1 == UINT32_MAX
2905 || iOtherSeq2 == UINT32_MAX)
2906 return true;
2907 if (++iMySeq >= iMaxSeq)
2908 {
2909 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
2910 return true;
2911 }
2912 ASMNopPause();
2913 }
2914}
2915
2916#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2917 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2918 { /*likely*/ } \
2919 else if (true) \
2920 { \
2921 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
2922 break; \
2923 } else do {} while (0)
2924#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2925 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2926 { /*likely*/ } \
2927 else if (true) \
2928 { \
2929 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
2930 break; \
2931 } else do {} while (0)
2932
2933
2934static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2935 bool fIsMaster, RTCCUINTREG fEFlags)
2936{
2937 TSCDELTA_DBG_VARS();
2938
2939 /*
2940 * Wait for the 'ready' signal. In the master's case, this means the
2941 * worker has completed its data collection, while in the worker's case it
2942 * means the master is done processing the data and it's time for the next
2943 * loop iteration (or whatever).
2944 */
2945 ASMSetFlags(fEFlags);
2946 TSCDELTA_DBG_START_LOOP();
2947 for (;;)
2948 {
2949 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2950 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
2951 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
2952 return true;
2953 ASMNopPause();
2954 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
2955 { /* likely */}
2956 else
2957 {
2958 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
2959 return false; /* shouldn't ever happen! */
2960 }
2961 TSCDELTA_DBG_CHECK_LOOP();
2962 ASMNopPause();
2963 }
2964}
2965
2966#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2967 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
2968 { /* likely */ } \
2969 else if (true) \
2970 { \
2971 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
2972 break; \
2973 } else do {} while (0)
2974
2975#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
2976 /* \
2977 * Tell the worker that we're done processing the data and ready for the next round. \
2978 */ \
2979 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2980 { /* likely */ } \
2981 else if (true)\
2982 { \
2983 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2984 break; \
2985 } else do {} while (0)
2986
2987#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2988 if (true) { \
2989 /* \
2990 * Tell the master that we're done collecting data and wait for the next round to start. \
2991 */ \
2992 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2993 { /* likely */ } \
2994 else \
2995 { \
2996 ASMSetFlags(a_fEFlags); \
2997 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2998 break; \
2999 } \
3000 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3001 { /* likely */ } \
3002 else \
3003 { \
3004 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3005 break; \
3006 } \
3007 } else do {} while (0)
3008/** @} */
3009
3010
3011#ifdef GIP_TSC_DELTA_METHOD_1
3012/**
3013 * TSC delta measurement algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3014 *
3015 *
3016 * We ignore the first few runs of the loop in order to prime the
3017 * cache. Also, we need to be careful about using 'pause' instruction
3018 * in critical busy-wait loops in this code - it can cause undesired
3019 * behaviour with hyperthreading.
3020 *
3021 * We try to minimize the measurement error by computing the minimum
3022 * read time of the compare statement in the worker by taking TSC
3023 * measurements across it.
3024 *
3025 * It must be noted that the computed minimum read time is mostly to
3026 * eliminate huge deltas when the worker is too early and doesn't by
3027 * itself help produce more accurate deltas. We allow two times the
3028 * computed minimum as an arbitrary acceptable threshold. Therefore,
3029 * it is still possible to get negative deltas where there are none
3030 * when the worker is earlier. As long as these occasional negative
3031 * deltas are lower than the time it takes to exit guest-context and
3032 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3033 * that jumped backwards. It is due to the existence of the negative
3034 * deltas that we don't recompute the delta with the master and
3035 * worker interchanged to eliminate the remaining measurement error.
3036 *
3037 *
3038 * @param pArgs The argument/state data.
3039 * @param pMySync My synchronization structure.
3040 * @param pOtherSync My partner's synchronization structure.
3041 * @param fIsMaster Set if master, clear if worker.
3042 * @param iTry The attempt number.
3043 */
3044static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3045 bool fIsMaster, uint32_t iTry)
3046{
3047 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3048 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3049 uint64_t uMinCmpReadTime = UINT64_MAX;
3050 unsigned iLoop;
3051 NOREF(iTry);
3052
3053 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3054 {
3055 RTCCUINTREG fEFlags;
3056 if (fIsMaster)
3057 {
3058 /*
3059 * The master.
3060 */
3061 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3062 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3063 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3064 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3065
3066 do
3067 {
3068 ASMSerializeInstruction();
3069 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3070 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3071
3072 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3073
3074 /* Process the data. */
3075 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3076 {
3077 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3078 {
3079 int64_t iDelta = pGipCpuWorker->u64TSCSample
3080 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3081 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3082 ? iDelta < pGipCpuWorker->i64TSCDelta
3083 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3084 pGipCpuWorker->i64TSCDelta = iDelta;
3085 }
3086 }
3087
3088 /* Reset our TSC sample and tell the worker to move on. */
3089 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3090 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3091 }
3092 else
3093 {
3094 /*
3095 * The worker.
3096 */
3097 uint64_t uTscWorker;
3098 uint64_t uTscWorkerFlushed;
3099 uint64_t uCmpReadTime;
3100
3101 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3102 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3103
3104 /*
3105 * Keep reading the TSC until we notice that the master has read his. Reading
3106 * the TSC -after- the master has updated the memory is way too late. We thus
3107 * compensate by trying to measure how long it took for the worker to notice
3108 * the memory flushed from the master.
3109 */
3110 do
3111 {
3112 ASMSerializeInstruction();
3113 uTscWorker = ASMReadTSC();
3114 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3115 ASMSerializeInstruction();
3116 uTscWorkerFlushed = ASMReadTSC();
3117
3118 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3119 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3120 {
3121 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3122 if (uCmpReadTime < (uMinCmpReadTime << 1))
3123 {
3124 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3125 if (uCmpReadTime < uMinCmpReadTime)
3126 uMinCmpReadTime = uCmpReadTime;
3127 }
3128 else
3129 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3130 }
3131 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3132 {
3133 if (uCmpReadTime < uMinCmpReadTime)
3134 uMinCmpReadTime = uCmpReadTime;
3135 }
3136
3137 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3138 }
3139 }
3140
3141 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3142 pMySync->uSyncVar));
3143
3144 /*
3145 * We must reset the worker TSC sample value in case it gets picked as a
3146 * GIP master later on (it's trashed above, naturally).
3147 */
3148 if (!fIsMaster)
3149 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3150}
3151#endif /* GIP_TSC_DELTA_METHOD_1 */
3152
3153
3154#ifdef GIP_TSC_DELTA_METHOD_2
3155/*
3156 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3157 */
3158
3159# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3160# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3161
3162
3163static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, uint32_t iLoop)
3164{
3165 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3166 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3167 uint32_t idxResult;
3168 uint32_t cHits = 0;
3169
3170 /*
3171 * Look for matching entries in the master and worker tables.
3172 */
3173 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3174 {
3175 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3176 if (idxOther & 1)
3177 {
3178 idxOther >>= 1;
3179 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3180 {
3181 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3182 {
3183 int64_t iDelta;
3184 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3185 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3186 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3187 ? iDelta < iBestDelta
3188 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3189 iBestDelta = iDelta;
3190 cHits++;
3191 }
3192 }
3193 }
3194 }
3195
3196 /*
3197 * Save the results.
3198 */
3199 if (cHits > 2)
3200 pArgs->pWorker->i64TSCDelta = iBestDelta;
3201 pArgs->uMaster.M2.cHits += cHits;
3202}
3203
3204
3205/**
3206 * The core function of the 2nd TSC delta measurement algorithm.
3207 *
3208 * The idea here is that we have the two CPUs execute the exact same code
3209 * collecting a largish set of TSC samples. The code has one data dependency on
3210 * the other CPU which intention it is to synchronize the execution as well as
3211 * help cross references the two sets of TSC samples (the sequence numbers).
3212 *
3213 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3214 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3215 * it will help with making the CPUs enter lock step execution occasionally.
3216 *
3217 */
3218static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3219{
3220 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3221 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3222
3223 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3224 ASMSerializeInstruction();
3225 while (cLeft-- > 0)
3226 {
3227 uint64_t uTsc;
3228 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3229 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3230 ASMCompilerBarrier();
3231 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3232 uTsc = ASMReadTSC();
3233 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3234 ASMCompilerBarrier();
3235 ASMSerializeInstruction();
3236 pEntry->iSeqMine = iSeqMine;
3237 pEntry->iSeqOther = iSeqOther;
3238 pEntry->uTsc = uTsc;
3239 pEntry++;
3240 ASMSerializeInstruction();
3241 if (fLag)
3242 ASMNopPause();
3243 }
3244}
3245
3246
3247/**
3248 * TSC delta measurement algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3249 *
3250 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3251 *
3252 * @param pArgs The argument/state data.
3253 * @param pMySync My synchronization structure.
3254 * @param pOtherSync My partner's synchronization structure.
3255 * @param fIsMaster Set if master, clear if worker.
3256 * @param iTry The attempt number.
3257 */
3258static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3259 bool fIsMaster, uint32_t iTry)
3260{
3261 unsigned iLoop;
3262
3263 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3264 {
3265 RTCCUINTREG fEFlags;
3266 if (fIsMaster)
3267 {
3268 /*
3269 * Adjust the loop lag fudge.
3270 */
3271# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3272 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3273 {
3274 /* Lag during the priming to be nice to everyone.. */
3275 pArgs->uMaster.M2.fLag = true;
3276 pArgs->uWorker.M2.fLag = true;
3277 }
3278 else
3279# endif
3280 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3281 {
3282 /* 25 % of the body without lagging. */
3283 pArgs->uMaster.M2.fLag = false;
3284 pArgs->uWorker.M2.fLag = false;
3285 }
3286 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3287 {
3288 /* 25 % of the body with both lagging. */
3289 pArgs->uMaster.M2.fLag = true;
3290 pArgs->uWorker.M2.fLag = true;
3291 }
3292 else
3293 {
3294 /* 50% of the body with alternating lag. */
3295 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3296 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3297 }
3298
3299 /*
3300 * Sync up with the worker and collect data.
3301 */
3302 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3303 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3304 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3305
3306 /*
3307 * Process the data.
3308 */
3309# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3310 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3311# endif
3312 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, iLoop);
3313
3314 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3315 }
3316 else
3317 {
3318 /*
3319 * The worker.
3320 */
3321 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3322 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3323 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3324 }
3325 }
3326}
3327
3328#endif /* GIP_TSC_DELTA_METHOD_2 */
3329
3330
3331
3332static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3333 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3334{
3335 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3336 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3337 uint32_t i;
3338 TSCDELTA_DBG_VARS();
3339
3340 for (;;)
3341 {
3342 RTCCUINTREG fEFlags;
3343 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3344 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3345
3346 if (fIsMaster)
3347 {
3348 uint64_t uTscWorker;
3349 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3350
3351 /*
3352 * Collect TSC, master goes first.
3353 */
3354 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3355 {
3356 /* Read, kick & wait #1. */
3357 uint64_t register uTsc = ASMReadTSC();
3358 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3359 ASMSerializeInstruction();
3360 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3361 TSCDELTA_DBG_START_LOOP();
3362 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3363 {
3364 TSCDELTA_DBG_CHECK_LOOP();
3365 ASMNopPause();
3366 }
3367
3368 /* Read, kick & wait #2. */
3369 uTsc = ASMReadTSC();
3370 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3371 ASMSerializeInstruction();
3372 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3373 TSCDELTA_DBG_START_LOOP();
3374 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3375 {
3376 TSCDELTA_DBG_CHECK_LOOP();
3377 ASMNopPause();
3378 }
3379 }
3380
3381 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3382
3383 /*
3384 * Process the data.
3385 */
3386#ifdef TSCDELTA_VERIFY_WITH_STATS
3387 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3388 pArgs->cMinVerifyTscTicks = INT64_MAX;
3389 pArgs->iVerifyBadTscDiff = 0;
3390#endif
3391 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3392 uTscWorker = 0;
3393 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3394 {
3395 /* Master vs previous worker entry. */
3396 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3397 int64_t iDiff;
3398 if (i > 0)
3399 {
3400 iDiff = uTscMaster - uTscWorker;
3401#ifdef TSCDELTA_VERIFY_WITH_STATS
3402 if (iDiff > pArgs->cMaxVerifyTscTicks)
3403 pArgs->cMaxVerifyTscTicks = iDiff;
3404 if (iDiff < pArgs->cMinVerifyTscTicks)
3405 pArgs->cMinVerifyTscTicks = iDiff;
3406#endif
3407 if (iDiff < 0)
3408 {
3409#ifdef TSCDELTA_VERIFY_WITH_STATS
3410 pArgs->iVerifyBadTscDiff = -iDiff;
3411#endif
3412 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3413 break;
3414 }
3415 }
3416
3417 /* Worker vs master. */
3418 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3419 iDiff = uTscWorker - uTscMaster;
3420#ifdef TSCDELTA_VERIFY_WITH_STATS
3421 if (iDiff > pArgs->cMaxVerifyTscTicks)
3422 pArgs->cMaxVerifyTscTicks = iDiff;
3423 if (iDiff < pArgs->cMinVerifyTscTicks)
3424 pArgs->cMinVerifyTscTicks = iDiff;
3425#endif
3426 if (iDiff < 0)
3427 {
3428#ifdef TSCDELTA_VERIFY_WITH_STATS
3429 pArgs->iVerifyBadTscDiff = iDiff;
3430#endif
3431 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3432 break;
3433 }
3434 }
3435
3436 /* Done. */
3437 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3438 }
3439 else
3440 {
3441 /*
3442 * The worker, master leads.
3443 */
3444 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3445
3446 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3447 {
3448 uint64_t register uTsc;
3449
3450 /* Wait, Read and Kick #1. */
3451 TSCDELTA_DBG_START_LOOP();
3452 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3453 {
3454 TSCDELTA_DBG_CHECK_LOOP();
3455 ASMNopPause();
3456 }
3457 uTsc = ASMReadTSC();
3458 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3459 ASMSerializeInstruction();
3460 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3461
3462 /* Wait, Read and Kick #2. */
3463 TSCDELTA_DBG_START_LOOP();
3464 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3465 {
3466 TSCDELTA_DBG_CHECK_LOOP();
3467 ASMNopPause();
3468 }
3469 uTsc = ASMReadTSC();
3470 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3471 ASMSerializeInstruction();
3472 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3473 }
3474
3475 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3476 }
3477 return pArgs->rcVerify;
3478 }
3479
3480 /*
3481 * Timed out, please retry.
3482 */
3483 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3484 return VERR_TIMEOUT;
3485}
3486
3487
3488
3489/**
3490 * Handles the special abort procedure during synchronization setup in
3491 * supdrvMeasureTscDeltaCallbackUnwrapped().
3492 *
3493 * @returns 0 (dummy, ignored)
3494 * @param pArgs Pointer to argument/state data.
3495 * @param pMySync Pointer to my sync structure.
3496 * @param fIsMaster Set if we're the master, clear if worker.
3497 * @param fTimeout Set if it's a timeout.
3498 */
3499DECL_NO_INLINE(static, int)
3500supdrvMeasureTscDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3501{
3502 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3503 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3504 TSCDELTA_DBG_VARS();
3505
3506 /*
3507 * Clear our sync pointer and make sure the abort flag is set.
3508 */
3509 ASMAtomicWriteNullPtr(ppMySync);
3510 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3511 if (fTimeout)
3512 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3513
3514 /*
3515 * Make sure the other party is out of there and won't be touching our
3516 * sync state again (would cause stack corruption).
3517 */
3518 TSCDELTA_DBG_START_LOOP();
3519 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3520 {
3521 ASMNopPause();
3522 ASMNopPause();
3523 ASMNopPause();
3524 TSCDELTA_DBG_CHECK_LOOP();
3525 }
3526
3527 return 0;
3528}
3529
3530
3531/**
3532 * This is used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3533 * and compute the delta between them.
3534 *
3535 * To reduce code size a good when timeout handling was added, a dummy return
3536 * value had to be added (saves 1-3 lines per timeout case), thus this
3537 * 'Unwrapped' function and the dummy 0 return value.
3538 *
3539 * @returns 0 (dummy, ignored)
3540 * @param idCpu The CPU we are current scheduled on.
3541 * @param pArgs Pointer to a parameter package.
3542 *
3543 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3544 * read the TSC at exactly the same time on both the master and the
3545 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3546 * contention, SMI, pipelining etc. there is no guaranteed way of
3547 * doing this on x86 CPUs.
3548 */
3549static int supdrvMeasureTscDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3550{
3551 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3552 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3553 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3554 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3555 uint32_t iTry;
3556 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3557 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3558 SUPTSCDELTASYNC2 MySync;
3559 PSUPTSCDELTASYNC2 pOtherSync;
3560 int rc;
3561 TSCDELTA_DBG_VARS();
3562
3563 /* A bit of paranoia first. */
3564 if (!pGipCpuMaster || !pGipCpuWorker)
3565 return 0;
3566
3567 /*
3568 * If the CPU isn't part of the measurement, return immediately.
3569 */
3570 if ( !fIsMaster
3571 && idCpu != pGipCpuWorker->idCpu)
3572 return 0;
3573
3574 /*
3575 * Set up my synchronization stuff and wait for the other party to show up.
3576 *
3577 * We don't wait forever since the other party may be off fishing (offline,
3578 * spinning with ints disables, whatever), we must play nice to the rest of
3579 * the system as this context generally isn't one in which we will get
3580 * preempted and we may hold up a number of lower priority interrupts.
3581 */
3582 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3583 ASMAtomicWritePtr(ppMySync, &MySync);
3584 MySync.uTscStart = ASMReadTSC();
3585 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3586
3587 /* Look for the partner, might not be here yet... Special abort considerations. */
3588 iTry = 0;
3589 TSCDELTA_DBG_START_LOOP();
3590 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3591 {
3592 ASMNopPause();
3593 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3594 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu) )
3595 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3596 if ( (iTry++ & 0xff) == 0
3597 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3598 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3599 TSCDELTA_DBG_CHECK_LOOP();
3600 ASMNopPause();
3601 }
3602
3603 /* I found my partner, waiting to be found... Special abort considerations. */
3604 if (fIsMaster)
3605 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3606 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3607
3608 iTry = 0;
3609 TSCDELTA_DBG_START_LOOP();
3610 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3611 {
3612 ASMNopPause();
3613 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3614 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3615 if ( (iTry++ & 0xff) == 0
3616 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3617 {
3618 if ( fIsMaster
3619 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3620 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3621 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3622 }
3623 TSCDELTA_DBG_CHECK_LOOP();
3624 }
3625
3626 if (!fIsMaster)
3627 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3628 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3629
3630/** @todo Add a resumable state to pArgs so we don't waste time if we time
3631 * out or something. Timeouts are legit, any of the two CPUs may get
3632 * interrupted. */
3633
3634 /*
3635 * Start by seeing if we have a zero delta between the two CPUs.
3636 * This should normally be the case.
3637 */
3638 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3639 if (RT_SUCCESS(rc))
3640 {
3641 if (fIsMaster)
3642 {
3643 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3644 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3645 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3646 }
3647 }
3648 /*
3649 * If the verification didn't time out, do regular delta measurements.
3650 * We retry this until we get a reasonable value.
3651 */
3652 else if (rc != VERR_TIMEOUT)
3653 {
3654 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3655 for (iTry = 0; iTry < 12; iTry++)
3656 {
3657 /*
3658 * Check the state before we start.
3659 */
3660 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3661 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3662 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3663 {
3664 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3665 break;
3666 }
3667
3668 /*
3669 * Do the measurements.
3670 */
3671#ifdef GIP_TSC_DELTA_METHOD_1
3672 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3673#elif defined(GIP_TSC_DELTA_METHOD_2)
3674 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3675#else
3676# error "huh??"
3677#endif
3678
3679 /*
3680 * Check the state.
3681 */
3682 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3683 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3684 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3685 {
3686 if (fIsMaster)
3687 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3688 else
3689 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3690 break;
3691 }
3692
3693 /*
3694 * Success? If so, stop trying. Master decides.
3695 */
3696 if (fIsMaster)
3697 {
3698 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3699 {
3700 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3701 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3702 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3703 break;
3704 }
3705 }
3706 }
3707 if (fIsMaster)
3708 pArgs->iTry = iTry;
3709 }
3710
3711 /*
3712 * End the synchronization dance. We tell the other that we're done,
3713 * then wait for the same kind of reply.
3714 */
3715 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3716 ASMAtomicWriteNullPtr(ppMySync);
3717 iTry = 0;
3718 TSCDELTA_DBG_START_LOOP();
3719 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
3720 {
3721 iTry++;
3722 if ( iTry == 0
3723 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu))
3724 break; /* this really shouldn't happen. */
3725 TSCDELTA_DBG_CHECK_LOOP();
3726 ASMNopPause();
3727 }
3728
3729 /*
3730 * Collect some runtime stats.
3731 */
3732 if (fIsMaster)
3733 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
3734 else
3735 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
3736 return 0;
3737}
3738
3739/**
3740 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3741 * and compute the delta between them.
3742 *
3743 * @param idCpu The CPU we are current scheduled on.
3744 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
3745 * @param pvUser2 Unused.
3746 */
3747static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3748{
3749 supdrvMeasureTscDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
3750}
3751
3752
3753/**
3754 * Measures the TSC delta between the master GIP CPU and one specified worker
3755 * CPU.
3756 *
3757 * @returns VBox status code.
3758 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
3759 * failure.
3760 * @param pDevExt Pointer to the device instance data.
3761 * @param idxWorker The index of the worker CPU from the GIP's array of
3762 * CPUs.
3763 *
3764 * @remarks This must be called with preemption enabled!
3765 */
3766static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
3767{
3768 int rc;
3769 int rc2;
3770 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3771 RTCPUID idMaster = pDevExt->idGipMaster;
3772 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
3773 PSUPGIPCPU pGipCpuMaster;
3774 uint32_t iGipCpuMaster;
3775 uint32_t u32Tmp;
3776
3777 /* Validate input a bit. */
3778 AssertReturn(pGip, VERR_INVALID_PARAMETER);
3779 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3780 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3781
3782 /*
3783 * Don't attempt measuring the delta for the GIP master.
3784 */
3785 if (pGipCpuWorker->idCpu == idMaster)
3786 {
3787 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
3788 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3789 return VINF_SUCCESS;
3790 }
3791
3792 /*
3793 * One measurement at a time, at least for now. We might be using
3794 * broadcast IPIs so, so be nice to the rest of the system.
3795 */
3796#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3797 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
3798#else
3799 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
3800#endif
3801 if (RT_FAILURE(rc))
3802 return rc;
3803
3804 /*
3805 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
3806 * try pick a different master. (This fudge only works with multi core systems.)
3807 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
3808 *
3809 * We skip this on AMDs for now as their HTT is different from Intel's and
3810 * it doesn't seem to have any favorable effect on the results.
3811 *
3812 * If the master is offline, we need a new master too, so share the code.
3813 */
3814 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
3815 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
3816 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
3817 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
3818 && pGip->cOnlineCpus > 2
3819 && ASMHasCpuId()
3820 && ASMIsValidStdRange(ASMCpuId_EAX(0))
3821 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
3822 && ( !ASMIsAmdCpu()
3823 || ASMGetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
3824 || ( ASMGetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
3825 && ASMGetCpuModelAMD(u32Tmp) >= 0x02) ) )
3826 || !RTMpIsCpuOnline(idMaster) )
3827 {
3828 uint32_t i;
3829 for (i = 0; i < pGip->cCpus; i++)
3830 if ( i != iGipCpuMaster
3831 && i != idxWorker
3832 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
3833 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
3834 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
3835 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
3836 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
3837 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
3838 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
3839 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
3840 {
3841 iGipCpuMaster = i;
3842 pGipCpuMaster = &pGip->aCPUs[i];
3843 idMaster = pGipCpuMaster->idCpu;
3844 break;
3845 }
3846 }
3847
3848 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
3849 {
3850 /*
3851 * Initialize data package for the RTMpOnPair callback.
3852 */
3853 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
3854 if (pArgs)
3855 {
3856 pArgs->pWorker = pGipCpuWorker;
3857 pArgs->pMaster = pGipCpuMaster;
3858 pArgs->pDevExt = pDevExt;
3859 pArgs->pSyncMaster = NULL;
3860 pArgs->pSyncWorker = NULL;
3861 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
3862
3863 /*
3864 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
3865 * and supdrvMeasureTscDeltaCallback can use it as a success check.
3866 */
3867 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
3868 * that when doing the restart loop reorg. */
3869 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
3870 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
3871 supdrvMeasureTscDeltaCallback, pArgs, NULL);
3872 if (RT_SUCCESS(rc))
3873 {
3874#if 0
3875 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
3876 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
3877 pArgs->fTimedOut ? " timed out" :"");
3878#endif
3879#if 0
3880 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
3881 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
3882#endif
3883 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
3884 {
3885 /*
3886 * Work the TSC delta applicability rating. It starts
3887 * optimistic in supdrvGipInit, we downgrade it here.
3888 */
3889 SUPGIPUSETSCDELTA enmRating;
3890 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
3891 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
3892 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
3893 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
3894 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
3895 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
3896 else
3897 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
3898 if (pGip->enmUseTscDelta < enmRating)
3899 {
3900 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
3901 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
3902 }
3903 }
3904 else
3905 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3906 }
3907 /** @todo return try-again if we get an offline CPU error. */
3908
3909 RTMemFree(pArgs);
3910 }
3911 else
3912 rc = VERR_NO_MEMORY;
3913 }
3914 else
3915 rc = VERR_CPU_OFFLINE;
3916
3917 /*
3918 * We're done now.
3919 */
3920#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3921 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3922#else
3923 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3924#endif
3925 return rc;
3926}
3927
3928
3929/**
3930 * Resets the TSC-delta related TSC samples and optionally the deltas
3931 * themselves.
3932 *
3933 * @param pDevExt Pointer to the device instance data.
3934 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
3935 *
3936 * @remarks This might be called while holding a spinlock!
3937 */
3938static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
3939{
3940 unsigned iCpu;
3941 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3942 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3943 {
3944 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
3945 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
3946 if (fResetTscDeltas)
3947 {
3948 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
3949 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
3950 }
3951 }
3952}
3953
3954
3955/**
3956 * Picks an online CPU as the master TSC for TSC-delta computations.
3957 *
3958 * @returns VBox status code.
3959 * @param pDevExt Pointer to the device instance data.
3960 * @param pidxMaster Where to store the CPU array index of the chosen
3961 * master. Optional, can be NULL.
3962 */
3963static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
3964{
3965 /*
3966 * Pick the first CPU online as the master TSC and make it the new GIP master based
3967 * on the APIC ID.
3968 *
3969 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
3970 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
3971 * master as this point since the sync/async timer isn't created yet.
3972 */
3973 unsigned iCpu;
3974 uint32_t idxMaster = UINT32_MAX;
3975 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3976 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
3977 {
3978 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
3979 if (idxCpu != UINT16_MAX)
3980 {
3981 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
3982 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
3983 {
3984 idxMaster = idxCpu;
3985 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
3986 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
3987 if (pidxMaster)
3988 *pidxMaster = idxMaster;
3989 return VINF_SUCCESS;
3990 }
3991 }
3992 }
3993 return VERR_CPU_OFFLINE;
3994}
3995
3996
3997/**
3998 * Performs the initial measurements of the TSC deltas between CPUs.
3999 *
4000 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4001 * triggered by it if threaded.
4002 *
4003 * @returns VBox status code.
4004 * @param pDevExt Pointer to the device instance data.
4005 *
4006 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4007 * idCpu, GIP's online CPU set which are populated in
4008 * supdrvGipInitOnCpu().
4009 */
4010static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
4011{
4012 PSUPGIPCPU pGipCpuMaster;
4013 unsigned iCpu;
4014 unsigned iOddEven;
4015 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4016 uint32_t idxMaster = UINT32_MAX;
4017 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4018
4019 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4020 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4021 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4022 if (RT_FAILURE(rc))
4023 {
4024 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4025 return rc;
4026 }
4027 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4028 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4029 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4030
4031 /*
4032 * If there is only a single CPU online we have nothing to do.
4033 */
4034 if (pGip->cOnlineCpus <= 1)
4035 {
4036 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4037 return VINF_SUCCESS;
4038 }
4039
4040 /*
4041 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4042 * master). We do the CPUs with the even numbered APIC IDs first so that
4043 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4044 */
4045 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4046 {
4047 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4048 {
4049 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4050 if ( iCpu != idxMaster
4051 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4052 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4053 {
4054 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4055 if (RT_FAILURE(rc))
4056 {
4057 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4058 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4059 break;
4060 }
4061
4062 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4063 {
4064 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4065 rc = VERR_TRY_AGAIN;
4066 break;
4067 }
4068 }
4069 }
4070 }
4071
4072 return rc;
4073}
4074
4075
4076#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4077
4078/**
4079 * Switches the TSC-delta measurement thread into the butchered state.
4080 *
4081 * @returns VBox status code.
4082 * @param pDevExt Pointer to the device instance data.
4083 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4084 * @param pszFailed An error message to log.
4085 * @param rcFailed The error code to exit the thread with.
4086 */
4087static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4088{
4089 if (!fSpinlockHeld)
4090 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4091
4092 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4093 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4094 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
4095 return rcFailed;
4096}
4097
4098
4099/**
4100 * The TSC-delta measurement thread.
4101 *
4102 * @returns VBox status code.
4103 * @param hThread The thread handle.
4104 * @param pvUser Opaque pointer to the device instance data.
4105 */
4106static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4107{
4108 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4109 uint32_t cConsecutiveTimeouts = 0;
4110 int rc = VERR_INTERNAL_ERROR_2;
4111 for (;;)
4112 {
4113 /*
4114 * Switch on the current state.
4115 */
4116 SUPDRVTSCDELTATHREADSTATE enmState;
4117 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4118 enmState = pDevExt->enmTscDeltaThreadState;
4119 switch (enmState)
4120 {
4121 case kTscDeltaThreadState_Creating:
4122 {
4123 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4124 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4125 if (RT_FAILURE(rc))
4126 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4127 /* fall thru */
4128 }
4129
4130 case kTscDeltaThreadState_Listening:
4131 {
4132 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4133
4134 /*
4135 * Linux counts uninterruptible sleeps as load, hence we shall do a
4136 * regular, interruptible sleep here and ignore wake ups due to signals.
4137 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4138 */
4139 rc = RTThreadUserWaitNoResume(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
4140 if ( RT_FAILURE(rc)
4141 && rc != VERR_TIMEOUT
4142 && rc != VERR_INTERRUPTED)
4143 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4144 RTThreadUserReset(pDevExt->hTscDeltaThread);
4145 break;
4146 }
4147
4148 case kTscDeltaThreadState_WaitAndMeasure:
4149 {
4150 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4151 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4152 if (RT_FAILURE(rc))
4153 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4154 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4155 RTThreadSleep(1);
4156 /* fall thru */
4157 }
4158
4159 case kTscDeltaThreadState_Measuring:
4160 {
4161 cConsecutiveTimeouts = 0;
4162 if (pDevExt->fTscThreadRecomputeAllDeltas)
4163 {
4164 int cTries = 8;
4165 int cMsWaitPerTry = 10;
4166 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4167 Assert(pGip);
4168 do
4169 {
4170 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4171 rc = supdrvMeasureInitialTscDeltas(pDevExt);
4172 if ( RT_SUCCESS(rc)
4173 || ( RT_FAILURE(rc)
4174 && rc != VERR_TRY_AGAIN
4175 && rc != VERR_CPU_OFFLINE))
4176 {
4177 break;
4178 }
4179 RTThreadSleep(cMsWaitPerTry);
4180 } while (cTries-- > 0);
4181 pDevExt->fTscThreadRecomputeAllDeltas = false;
4182 }
4183 else
4184 {
4185 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4186 unsigned iCpu;
4187
4188 /* Measure TSC-deltas only for the CPUs that are in the set. */
4189 rc = VINF_SUCCESS;
4190 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4191 {
4192 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4193 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4194 {
4195 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4196 {
4197 int rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4198 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4199 rc = rc2;
4200 }
4201 else
4202 {
4203 /*
4204 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4205 * mark the delta as fine to get the timer thread off our back.
4206 */
4207 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4208 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4209 }
4210 }
4211 }
4212 }
4213 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4214 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4215 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4216 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4217 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4218 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4219 break;
4220 }
4221
4222 case kTscDeltaThreadState_Terminating:
4223 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4224 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4225 return VINF_SUCCESS;
4226
4227 case kTscDeltaThreadState_Butchered:
4228 default:
4229 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4230 }
4231 }
4232
4233 return rc;
4234}
4235
4236
4237/**
4238 * Waits for the TSC-delta measurement thread to respond to a state change.
4239 *
4240 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4241 * other error code on internal error.
4242 *
4243 * @param pThis Pointer to the grant service instance data.
4244 * @param enmCurState The current state.
4245 * @param enmNewState The new state we're waiting for it to enter.
4246 */
4247static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4248 SUPDRVTSCDELTATHREADSTATE enmNewState)
4249{
4250 /*
4251 * Wait a short while for the expected state transition.
4252 */
4253 int rc;
4254 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4255 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4256 if (pDevExt->enmTscDeltaThreadState == enmNewState)
4257 {
4258 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4259 rc = VINF_SUCCESS;
4260 }
4261 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
4262 {
4263 /*
4264 * Wait longer if the state has not yet transitioned to the one we want.
4265 */
4266 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4267 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4268 if ( RT_SUCCESS(rc)
4269 || rc == VERR_TIMEOUT)
4270 {
4271 /*
4272 * Check the state whether we've succeeded.
4273 */
4274 SUPDRVTSCDELTATHREADSTATE enmState;
4275 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4276 enmState = pDevExt->enmTscDeltaThreadState;
4277 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4278 if (enmState == enmNewState)
4279 rc = VINF_SUCCESS;
4280 else if (enmState == enmCurState)
4281 {
4282 rc = VERR_TIMEOUT;
4283 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
4284 enmNewState));
4285 }
4286 else
4287 {
4288 rc = VERR_INTERNAL_ERROR;
4289 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4290 enmState, enmNewState));
4291 }
4292 }
4293 else
4294 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4295 }
4296 else
4297 {
4298 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4299 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
4300 rc = VERR_INTERNAL_ERROR;
4301 }
4302
4303 return rc;
4304}
4305
4306
4307/**
4308 * Signals the TSC-delta thread to start measuring TSC-deltas.
4309 *
4310 * @param pDevExt Pointer to the device instance data.
4311 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4312 */
4313static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4314{
4315 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4316 {
4317 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4318 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4319 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4320 {
4321 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4322 if (fForceAll)
4323 pDevExt->fTscThreadRecomputeAllDeltas = true;
4324 }
4325 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4326 && fForceAll)
4327 pDevExt->fTscThreadRecomputeAllDeltas = true;
4328 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4329 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4330 }
4331}
4332
4333
4334/**
4335 * Terminates the actual thread running supdrvTscDeltaThread().
4336 *
4337 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4338 * supdrvTscDeltaTerm().
4339 *
4340 * @param pDevExt Pointer to the device instance data.
4341 */
4342static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4343{
4344 int rc;
4345 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4346 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4347 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4348 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4349 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4350 if (RT_FAILURE(rc))
4351 {
4352 /* Signal a few more times before giving up. */
4353 int cTriesLeft = 5;
4354 while (--cTriesLeft > 0)
4355 {
4356 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4357 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4358 if (rc != VERR_TIMEOUT)
4359 break;
4360 }
4361 }
4362}
4363
4364
4365/**
4366 * Initializes and spawns the TSC-delta measurement thread.
4367 *
4368 * A thread is required for servicing re-measurement requests from events like
4369 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4370 * under all contexts on all OSs.
4371 *
4372 * @returns VBox status code.
4373 * @param pDevExt Pointer to the device instance data.
4374 *
4375 * @remarks Must only be called -after- initializing GIP and setting up MP
4376 * notifications!
4377 */
4378static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4379{
4380 int rc;
4381 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4382 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4383 if (RT_SUCCESS(rc))
4384 {
4385 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4386 if (RT_SUCCESS(rc))
4387 {
4388 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4389 pDevExt->cMsTscDeltaTimeout = 60000;
4390 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4391 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4392 if (RT_SUCCESS(rc))
4393 {
4394 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4395 if (RT_SUCCESS(rc))
4396 {
4397 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4398 return rc;
4399 }
4400
4401 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4402 supdrvTscDeltaThreadTerminate(pDevExt);
4403 }
4404 else
4405 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4406 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4407 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4408 }
4409 else
4410 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4411 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4412 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4413 }
4414 else
4415 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4416
4417 return rc;
4418}
4419
4420
4421/**
4422 * Terminates the TSC-delta measurement thread and cleanup.
4423 *
4424 * @param pDevExt Pointer to the device instance data.
4425 */
4426static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4427{
4428 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4429 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4430 {
4431 supdrvTscDeltaThreadTerminate(pDevExt);
4432 }
4433
4434 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4435 {
4436 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4437 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4438 }
4439
4440 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4441 {
4442 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4443 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4444 }
4445
4446 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4447}
4448
4449#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4450
4451/**
4452 * Measure the TSC delta for the CPU given by its CPU set index.
4453 *
4454 * @returns VBox status code.
4455 * @retval VERR_INTERRUPTED if interrupted while waiting.
4456 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4457 * measurement.
4458 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4459 *
4460 * @param pSession The caller's session. GIP must've been mapped.
4461 * @param iCpuSet The CPU set index of the CPU to measure.
4462 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4463 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4464 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4465 * ready.
4466 * @param cTries Number of times to try, pass 0 for the default.
4467 */
4468SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4469 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4470{
4471 PSUPDRVDEVEXT pDevExt;
4472 PSUPGLOBALINFOPAGE pGip;
4473 uint16_t iGipCpu;
4474 int rc;
4475#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4476 uint64_t msTsStartWait;
4477 uint32_t iWaitLoop;
4478#endif
4479
4480 /*
4481 * Validate and adjust the input.
4482 */
4483 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4484 if (!pSession->fGipReferenced)
4485 return VERR_WRONG_ORDER;
4486
4487 pDevExt = pSession->pDevExt;
4488 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4489
4490 pGip = pDevExt->pGip;
4491 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4492
4493 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4494 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4495 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4496 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4497
4498 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4499 return VERR_INVALID_FLAGS;
4500
4501 /*
4502 * The request is a noop if the TSC delta isn't being used.
4503 */
4504 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4505 return VINF_SUCCESS;
4506
4507 if (cTries == 0)
4508 cTries = 12;
4509 else if (cTries > 256)
4510 cTries = 256;
4511
4512 if (cMsWaitRetry == 0)
4513 cMsWaitRetry = 2;
4514 else if (cMsWaitRetry > 1000)
4515 cMsWaitRetry = 1000;
4516
4517#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4518 /*
4519 * Has the TSC already been measured and we're not forced to redo it?
4520 */
4521 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4522 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4523 return VINF_SUCCESS;
4524
4525 /*
4526 * Asynchronous request? Forward it to the thread, no waiting.
4527 */
4528 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4529 {
4530 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4531 * to pass those options to the thread somehow and implement it in the
4532 * thread. Check if anyone uses/needs fAsync before implementing this. */
4533 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4534 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4535 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4536 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4537 {
4538 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4539 rc = VINF_SUCCESS;
4540 }
4541 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4542 rc = VERR_THREAD_IS_DEAD;
4543 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4544 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4545 return VINF_SUCCESS;
4546 }
4547
4548 /*
4549 * If a TSC-delta measurement request is already being serviced by the thread,
4550 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4551 */
4552 msTsStartWait = RTTimeSystemMilliTS();
4553 for (iWaitLoop = 0;; iWaitLoop++)
4554 {
4555 uint64_t cMsElapsed;
4556 SUPDRVTSCDELTATHREADSTATE enmState;
4557 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4558 enmState = pDevExt->enmTscDeltaThreadState;
4559 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4560
4561 if (enmState == kTscDeltaThreadState_Measuring)
4562 { /* Must wait, the thread is busy. */ }
4563 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4564 { /* Must wait, this state only says what will happen next. */ }
4565 else if (enmState == kTscDeltaThreadState_Terminating)
4566 { /* Must wait, this state only says what should happen next. */ }
4567 else
4568 break; /* All other states, the thread is either idly listening or dead. */
4569
4570 /* Wait or fail. */
4571 if (cMsWaitThread == 0)
4572 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4573 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4574 if (cMsElapsed >= cMsWaitThread)
4575 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4576
4577 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4578 if (rc == VERR_INTERRUPTED)
4579 return rc;
4580 }
4581#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4582
4583 /*
4584 * Try measure the TSC delta the given number of times.
4585 */
4586 for (;;)
4587 {
4588 /* Unless we're forced to measure the delta, check whether it's done already. */
4589 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4590 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4591 {
4592 rc = VINF_SUCCESS;
4593 break;
4594 }
4595
4596 /* Measure it. */
4597 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4598 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4599 {
4600 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4601 break;
4602 }
4603
4604 /* Retry? */
4605 if (cTries <= 1)
4606 break;
4607 cTries--;
4608
4609 /* Always delay between retries (be nice to the rest of the system
4610 and avoid the BSOD hounds). */
4611 rc = RTThreadSleep(cMsWaitRetry);
4612 if (rc == VERR_INTERRUPTED)
4613 break;
4614 }
4615
4616 return rc;
4617}
4618
4619
4620/**
4621 * Service a TSC-delta measurement request.
4622 *
4623 * @returns VBox status code.
4624 * @param pDevExt Pointer to the device instance data.
4625 * @param pSession The support driver session.
4626 * @param pReq Pointer to the TSC-delta measurement request.
4627 */
4628int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4629{
4630 uint32_t cTries;
4631 uint32_t iCpuSet;
4632 uint32_t fFlags;
4633 RTMSINTERVAL cMsWaitRetry;
4634
4635 /*
4636 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4637 */
4638 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4639
4640 if (pReq->u.In.idCpu == NIL_RTCPUID)
4641 return VERR_INVALID_CPU_ID;
4642 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4643 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4644 return VERR_INVALID_CPU_ID;
4645
4646 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4647
4648 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4649
4650 fFlags = 0;
4651 if (pReq->u.In.fAsync)
4652 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4653 if (pReq->u.In.fForce)
4654 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4655
4656 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4657 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4658 cTries);
4659}
4660
4661
4662/**
4663 * Reads TSC with delta applied.
4664 *
4665 * Will try to resolve delta value INT64_MAX before applying it. This is the
4666 * main purpose of this function, to handle the case where the delta needs to be
4667 * determined.
4668 *
4669 * @returns VBox status code.
4670 * @param pDevExt Pointer to the device instance data.
4671 * @param pSession The support driver session.
4672 * @param pReq Pointer to the TSC-read request.
4673 */
4674int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4675{
4676 PSUPGLOBALINFOPAGE pGip;
4677 int rc;
4678
4679 /*
4680 * Validate. We require the client to have mapped GIP (no asserting on
4681 * ring-3 preconditions).
4682 */
4683 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4684 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4685 return VERR_WRONG_ORDER;
4686 pGip = pDevExt->pGip;
4687 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4688
4689 /*
4690 * We're usually here because we need to apply delta, but we shouldn't be
4691 * upset if the GIP is some different mode.
4692 */
4693 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4694 {
4695 uint32_t cTries = 0;
4696 for (;;)
4697 {
4698 /*
4699 * Start by gathering the data, using CLI for disabling preemption
4700 * while we do that.
4701 */
4702 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4703 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4704 int iGipCpu;
4705 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4706 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4707 {
4708 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4709 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4710 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4711 ASMSetFlags(fEFlags);
4712
4713 /*
4714 * If we're lucky we've got a delta, but no predictions here
4715 * as this I/O control is normally only used when the TSC delta
4716 * is set to INT64_MAX.
4717 */
4718 if (i64Delta != INT64_MAX)
4719 {
4720 pReq->u.Out.u64AdjustedTsc -= i64Delta;
4721 rc = VINF_SUCCESS;
4722 break;
4723 }
4724
4725 /* Give up after a few times. */
4726 if (cTries >= 4)
4727 {
4728 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4729 break;
4730 }
4731
4732 /* Need to measure the delta an try again. */
4733 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4734 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4735 /** @todo should probably delay on failure... dpc watchdogs */
4736 }
4737 else
4738 {
4739 /* This really shouldn't happen. */
4740 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
4741 pReq->u.Out.idApic = ASMGetApicId();
4742 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4743 ASMSetFlags(fEFlags);
4744 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
4745 break;
4746 }
4747 }
4748 }
4749 else
4750 {
4751 /*
4752 * No delta to apply. Easy. Deal with preemption the lazy way.
4753 */
4754 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4755 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4756 int iGipCpu;
4757 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4758 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4759 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4760 else
4761 pReq->u.Out.idApic = ASMGetApicId();
4762 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4763 ASMSetFlags(fEFlags);
4764 rc = VINF_SUCCESS;
4765 }
4766
4767 return rc;
4768}
4769
4770
4771/**
4772 * Worker for supdrvIOCtl_GipSetFlags.
4773 *
4774 * @returns VBox status code.
4775 * @retval VERR_WRONG_ORDER if an enable-once-per-session flag is set again for
4776 * a session.
4777 *
4778 * @param pDevExt Pointer to the device instance data.
4779 * @param pSession The support driver session.
4780 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4781 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4782 *
4783 * @remarks Caller must own the GIP mutex.
4784 *
4785 * @remarks This function doesn't validate any of the flags.
4786 */
4787static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
4788{
4789 uint32_t cRefs;
4790 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4791 AssertMsg((fOrMask & fAndMask) == fOrMask, ("%#x & %#x\n", fOrMask, fAndMask)); /* ASSUMED by code below */
4792
4793 /*
4794 * Compute GIP test-mode flags.
4795 */
4796 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
4797 {
4798 if (!pSession->fGipTestMode)
4799 {
4800 Assert(pDevExt->cGipTestModeRefs < _64K);
4801 pSession->fGipTestMode = true;
4802 cRefs = ++pDevExt->cGipTestModeRefs;
4803 if (cRefs == 1)
4804 {
4805 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
4806 fAndMask &= ~SUPGIP_FLAGS_TESTING_STOP;
4807 }
4808 }
4809 else
4810 {
4811 LogRelMax(10, ("supdrvGipSetFlags: SUPGIP_FLAGS_TESTING_ENABLE already set for this session\n"));
4812 return VERR_WRONG_ORDER;
4813 }
4814 }
4815 else if ( !(fAndMask & SUPGIP_FLAGS_TESTING_ENABLE)
4816 && pSession->fGipTestMode)
4817 {
4818 Assert(pDevExt->cGipTestModeRefs > 0);
4819 Assert(pDevExt->cGipTestModeRefs < _64K);
4820 pSession->fGipTestMode = false;
4821 cRefs = --pDevExt->cGipTestModeRefs;
4822 if (!cRefs)
4823 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
4824 else
4825 fAndMask |= SUPGIP_FLAGS_TESTING_ENABLE;
4826 }
4827
4828 /*
4829 * Commit the flags. This should be done as atomically as possible
4830 * since the flag consumers won't be holding the GIP mutex.
4831 */
4832 ASMAtomicOrU32(&pGip->fFlags, fOrMask);
4833 ASMAtomicAndU32(&pGip->fFlags, fAndMask);
4834
4835 return VINF_SUCCESS;
4836}
4837
4838
4839/**
4840 * Sets GIP test mode parameters.
4841 *
4842 * @returns VBox status code.
4843 * @param pDevExt Pointer to the device instance data.
4844 * @param pSession The support driver session.
4845 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4846 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4847 */
4848int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
4849{
4850 PSUPGLOBALINFOPAGE pGip;
4851 int rc;
4852
4853 /*
4854 * Validate. We require the client to have mapped GIP (no asserting on
4855 * ring-3 preconditions).
4856 */
4857 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
4858 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4859 return VERR_WRONG_ORDER;
4860 pGip = pDevExt->pGip;
4861 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
4862
4863 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
4864 return VERR_INVALID_PARAMETER;
4865 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
4866 return VERR_INVALID_PARAMETER;
4867
4868 /*
4869 * Don't confuse supdrvGipSetFlags or anyone else by both setting
4870 * and clearing the same flags. AND takes precedence.
4871 */
4872 fOrMask &= fAndMask;
4873
4874 /*
4875 * Take the loader lock to avoid having to think about races between two
4876 * clients changing the flags at the same time (state is not simple).
4877 */
4878#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4879 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4880#else
4881 RTSemFastMutexRequest(pDevExt->mtxGip);
4882#endif
4883
4884 rc = supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
4885
4886#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4887 RTSemMutexRelease(pDevExt->mtxGip);
4888#else
4889 RTSemFastMutexRelease(pDevExt->mtxGip);
4890#endif
4891 return rc;
4892}
4893
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette