VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 63549

Last change on this file since 63549 was 63523, checked in by vboxsync, 8 years ago

gcc 6 warning

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 178.4 KB
Line 
1/* $Id: SUPDrvGip.cpp 63523 2016-08-16 06:46:55Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2016 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27
28/*********************************************************************************************************************************
29* Header Files *
30*********************************************************************************************************************************/
31#define LOG_GROUP LOG_GROUP_SUP_DRV
32#define SUPDRV_AGNOSTIC
33#include "SUPDrvInternal.h"
34#ifndef PAGE_SHIFT
35# include <iprt/param.h>
36#endif
37#include <iprt/asm.h>
38#include <iprt/asm-amd64-x86.h>
39#include <iprt/asm-math.h>
40#include <iprt/cpuset.h>
41#include <iprt/handletable.h>
42#include <iprt/mem.h>
43#include <iprt/mp.h>
44#include <iprt/power.h>
45#include <iprt/process.h>
46#include <iprt/semaphore.h>
47#include <iprt/spinlock.h>
48#include <iprt/thread.h>
49#include <iprt/uuid.h>
50#include <iprt/net.h>
51#include <iprt/crc.h>
52#include <iprt/string.h>
53#include <iprt/timer.h>
54#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
55# include <iprt/rand.h>
56# include <iprt/path.h>
57#endif
58#include <iprt/uint128.h>
59#include <iprt/x86.h>
60
61#include <VBox/param.h>
62#include <VBox/log.h>
63#include <VBox/err.h>
64
65#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
66# include "dtrace/SUPDrv.h"
67#else
68/* ... */
69#endif
70
71
72/*********************************************************************************************************************************
73* Defined Constants And Macros *
74*********************************************************************************************************************************/
75/** The frequency by which we recalculate the u32UpdateHz and
76 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
77 *
78 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
79 */
80#define GIP_UPDATEHZ_RECALC_FREQ 0x800
81
82/** A reserved TSC value used for synchronization as well as measurement of
83 * TSC deltas. */
84#define GIP_TSC_DELTA_RSVD UINT64_MAX
85/** The number of TSC delta measurement loops in total (includes primer and
86 * read-time loops). */
87#define GIP_TSC_DELTA_LOOPS 96
88/** The number of cache primer loops. */
89#define GIP_TSC_DELTA_PRIMER_LOOPS 4
90/** The number of loops until we keep computing the minumum read time. */
91#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
92
93/** The TSC frequency refinement period in seconds.
94 * The timer fires after 200ms, then every second, this value just says when
95 * to stop it after that. */
96#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
97/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
98#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
99/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
100#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
101/** The TSC delta value for the initial GIP master - 0 in regular builds.
102 * To test the delta code this can be set to a non-zero value. */
103#if 0
104# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
105#else
106# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
107#endif
108
109AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
110AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
111
112/** @def VBOX_SVN_REV
113 * The makefile should define this if it can. */
114#ifndef VBOX_SVN_REV
115# define VBOX_SVN_REV 0
116#endif
117
118#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
119# define DO_NOT_START_GIP
120#endif
121
122
123/*********************************************************************************************************************************
124* Internal Functions *
125*********************************************************************************************************************************/
126static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
127static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
128static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
129static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
130static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
131#ifdef SUPDRV_USE_TSC_DELTA_THREAD
132static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
133static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
134static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
135#else
136static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
137static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
138#endif
139
140
141/*********************************************************************************************************************************
142* Global Variables *
143*********************************************************************************************************************************/
144DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
145
146
147
148/*
149 *
150 * Misc Common GIP Code
151 * Misc Common GIP Code
152 * Misc Common GIP Code
153 *
154 *
155 */
156
157
158/**
159 * Finds the GIP CPU index corresponding to @a idCpu.
160 *
161 * @returns GIP CPU array index, UINT32_MAX if not found.
162 * @param pGip The GIP.
163 * @param idCpu The CPU ID.
164 */
165static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
166{
167 uint32_t i;
168 for (i = 0; i < pGip->cCpus; i++)
169 if (pGip->aCPUs[i].idCpu == idCpu)
170 return i;
171 return UINT32_MAX;
172}
173
174
175
176/*
177 *
178 * GIP Mapping and Unmapping Related Code.
179 * GIP Mapping and Unmapping Related Code.
180 * GIP Mapping and Unmapping Related Code.
181 *
182 *
183 */
184
185
186/**
187 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
188 * updating.
189 *
190 * @param pGipCpu The per CPU structure for this CPU.
191 * @param u64NanoTS The current time.
192 */
193static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
194{
195 /*
196 * Here we don't really care about applying the TSC delta. The re-initialization of this
197 * value is not relevant especially while (re)starting the GIP as the first few ones will
198 * be ignored anyway, see supdrvGipDoUpdateCpu().
199 */
200 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
201 pGipCpu->u64NanoTS = u64NanoTS;
202}
203
204
205/**
206 * Set the current TSC and NanoTS value for the CPU.
207 *
208 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
209 * @param pvUser1 Pointer to the ring-0 GIP mapping.
210 * @param pvUser2 Pointer to the variable holding the current time.
211 */
212static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
213{
214 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
215 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
216
217 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
218 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
219
220 NOREF(pvUser2);
221 NOREF(idCpu);
222}
223
224
225/**
226 * State structure for supdrvGipDetectGetGipCpuCallback.
227 */
228typedef struct SUPDRVGIPDETECTGETCPU
229{
230 /** Bitmap of APIC IDs that has been seen (initialized to zero).
231 * Used to detect duplicate APIC IDs (paranoia). */
232 uint8_t volatile bmApicId[256 / 8];
233 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
234 * initially). The callback clears the methods not detected. */
235 uint32_t volatile fSupported;
236 /** The first callback detecting any kind of range issues (initialized to
237 * NIL_RTCPUID). */
238 RTCPUID volatile idCpuProblem;
239} SUPDRVGIPDETECTGETCPU;
240/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
241typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
242
243
244/**
245 * Checks for alternative ways of getting the CPU ID.
246 *
247 * This also checks the APIC ID, CPU ID and CPU set index values against the
248 * GIP tables.
249 *
250 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
251 * @param pvUser1 Pointer to the state structure.
252 * @param pvUser2 Pointer to the GIP.
253 */
254static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
255{
256 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
257 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
258 uint32_t fSupported = 0;
259 uint16_t idApic;
260 int iCpuSet;
261 NOREF(pGip);
262
263 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
264
265 /*
266 * Check that the CPU ID and CPU set index are interchangable.
267 */
268 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
269 if ((RTCPUID)iCpuSet == idCpu)
270 {
271 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
272 if ( iCpuSet >= 0
273 && iCpuSet < RTCPUSET_MAX_CPUS
274 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
275 {
276 /*
277 * Check whether the IDTR.LIMIT contains a CPU number.
278 */
279#ifdef RT_ARCH_X86
280 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
281#else
282 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
283#endif
284 RTIDTR Idtr;
285 ASMGetIDTR(&Idtr);
286 if (Idtr.cbIdt >= cbIdt)
287 {
288 uint32_t uTmp = Idtr.cbIdt - cbIdt;
289 uTmp &= RTCPUSET_MAX_CPUS - 1;
290 if (uTmp == idCpu)
291 {
292 RTIDTR Idtr2;
293 ASMGetIDTR(&Idtr2);
294 if (Idtr2.cbIdt == Idtr.cbIdt)
295 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
296 }
297 }
298
299 /*
300 * Check whether RDTSCP is an option.
301 */
302 if (ASMHasCpuId())
303 {
304 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
305 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
306 {
307 uint32_t uAux;
308 ASMReadTscWithAux(&uAux);
309 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
310 {
311 ASMNopPause();
312 ASMReadTscWithAux(&uAux);
313 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
314 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
315 }
316 }
317 }
318 }
319 }
320
321 /*
322 * Check that the APIC ID is unique.
323 */
324 idApic = ASMGetApicId();
325 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
326 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
327 fSupported |= SUPGIPGETCPU_APIC_ID;
328 else
329 {
330 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
331 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
332 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
333 idCpu, iCpuSet, idApic));
334 }
335
336 /*
337 * Check that the iCpuSet is within the expected range.
338 */
339 if (RT_UNLIKELY( iCpuSet < 0
340 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
341 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
342 {
343 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
344 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
345 idCpu, iCpuSet, idApic));
346 }
347 else
348 {
349 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
350 if (RT_UNLIKELY(idCpu2 != idCpu))
351 {
352 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
353 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
354 idCpu, iCpuSet, idApic, idCpu2));
355 }
356 }
357
358 /*
359 * Update the supported feature mask before we return.
360 */
361 ASMAtomicAndU32(&pState->fSupported, fSupported);
362
363 NOREF(pvUser2);
364}
365
366
367/**
368 * Increase the timer freqency on hosts where this is possible (NT).
369 *
370 * The idea is that more interrupts is better for us... Also, it's better than
371 * we increase the timer frequence, because we might end up getting inaccurate
372 * callbacks if someone else does it.
373 *
374 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
375 */
376static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
377{
378 if (pDevExt->u32SystemTimerGranularityGrant == 0)
379 {
380 uint32_t u32SystemResolution;
381 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
382 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
383 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
384 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
385 )
386 {
387#if 0 /* def VBOX_STRICT - this is somehow triggers bogus assertions on windows 10 */
388 uint32_t u32After = RTTimerGetSystemGranularity();
389 AssertMsg(u32After <= u32SystemResolution, ("u32After=%u u32SystemResolution=%u\n", u32After, u32SystemResolution));
390#endif
391 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
392 }
393 }
394}
395
396
397/**
398 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
399 *
400 * @param pDevExt Clears u32SystemTimerGranularityGrant.
401 */
402static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
403{
404 if (pDevExt->u32SystemTimerGranularityGrant)
405 {
406 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
407 AssertRC(rc2);
408 pDevExt->u32SystemTimerGranularityGrant = 0;
409 }
410}
411
412
413/**
414 * Maps the GIP into userspace and/or get the physical address of the GIP.
415 *
416 * @returns IPRT status code.
417 * @param pSession Session to which the GIP mapping should belong.
418 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
419 * @param pHCPhysGip Where to store the physical address. (optional)
420 *
421 * @remark There is no reference counting on the mapping, so one call to this function
422 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
423 * and remove the session as a GIP user.
424 */
425SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
426{
427 int rc;
428 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
429 RTR3PTR pGipR3 = NIL_RTR3PTR;
430 RTHCPHYS HCPhys = NIL_RTHCPHYS;
431 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
432
433 /*
434 * Validate
435 */
436 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
437 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
438 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
439
440#ifdef SUPDRV_USE_MUTEX_FOR_GIP
441 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
442#else
443 RTSemFastMutexRequest(pDevExt->mtxGip);
444#endif
445 if (pDevExt->pGip)
446 {
447 /*
448 * Map it?
449 */
450 rc = VINF_SUCCESS;
451 if (ppGipR3)
452 {
453 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
454 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
455 RTMEM_PROT_READ, NIL_RTR0PROCESS);
456 if (RT_SUCCESS(rc))
457 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
458 }
459
460 /*
461 * Get physical address.
462 */
463 if (pHCPhysGip && RT_SUCCESS(rc))
464 HCPhys = pDevExt->HCPhysGip;
465
466 /*
467 * Reference globally.
468 */
469 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
470 {
471 pSession->fGipReferenced = 1;
472 pDevExt->cGipUsers++;
473 if (pDevExt->cGipUsers == 1)
474 {
475 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
476 uint64_t u64NanoTS;
477
478 /*
479 * GIP starts/resumes updating again. On windows we bump the
480 * host timer frequency to make sure we don't get stuck in guest
481 * mode and to get better timer (and possibly clock) accuracy.
482 */
483 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
484
485 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
486
487 /*
488 * document me
489 */
490 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
491 {
492 unsigned i;
493 for (i = 0; i < pGipR0->cCpus; i++)
494 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
495 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
496 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
497 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
498 }
499
500 /*
501 * document me
502 */
503 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
504 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
505 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
506 || RTMpGetOnlineCount() == 1)
507 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
508 else
509 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
510
511 /*
512 * Detect alternative ways to figure the CPU ID in ring-3 and
513 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
514 * and CPU set indexes while we're at it.
515 */
516 if (RT_SUCCESS(rc))
517 {
518 SUPDRVGIPDETECTGETCPU DetectState;
519 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
520 DetectState.fSupported = UINT32_MAX;
521 DetectState.idCpuProblem = NIL_RTCPUID;
522 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
523 if (DetectState.idCpuProblem == NIL_RTCPUID)
524 {
525 if ( DetectState.fSupported != UINT32_MAX
526 && DetectState.fSupported != 0)
527 {
528 if (pGipR0->fGetGipCpu != DetectState.fSupported)
529 {
530 pGipR0->fGetGipCpu = DetectState.fSupported;
531 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
532 }
533 }
534 else
535 {
536 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
537 DetectState.fSupported));
538 rc = VERR_UNSUPPORTED_CPU;
539 }
540 }
541 else
542 {
543 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
544 DetectState.idCpuProblem, DetectState.idCpuProblem));
545 rc = VERR_INVALID_CPU_ID;
546 }
547 }
548
549 /*
550 * Start the GIP timer if all is well..
551 */
552 if (RT_SUCCESS(rc))
553 {
554#ifndef DO_NOT_START_GIP
555 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
556#endif
557 rc = VINF_SUCCESS;
558 }
559
560 /*
561 * Bail out on error.
562 */
563 if (RT_FAILURE(rc))
564 {
565 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
566 pDevExt->cGipUsers = 0;
567 pSession->fGipReferenced = 0;
568 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
569 {
570 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
571 if (RT_SUCCESS(rc2))
572 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
573 }
574 HCPhys = NIL_RTHCPHYS;
575 pGipR3 = NIL_RTR3PTR;
576 }
577 }
578 }
579 }
580 else
581 {
582 rc = VERR_GENERAL_FAILURE;
583 Log(("SUPR0GipMap: GIP is not available!\n"));
584 }
585#ifdef SUPDRV_USE_MUTEX_FOR_GIP
586 RTSemMutexRelease(pDevExt->mtxGip);
587#else
588 RTSemFastMutexRelease(pDevExt->mtxGip);
589#endif
590
591 /*
592 * Write returns.
593 */
594 if (pHCPhysGip)
595 *pHCPhysGip = HCPhys;
596 if (ppGipR3)
597 *ppGipR3 = pGipR3;
598
599#ifdef DEBUG_DARWIN_GIP
600 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
601#else
602 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
603#endif
604 return rc;
605}
606
607
608/**
609 * Unmaps any user mapping of the GIP and terminates all GIP access
610 * from this session.
611 *
612 * @returns IPRT status code.
613 * @param pSession Session to which the GIP mapping should belong.
614 */
615SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
616{
617 int rc = VINF_SUCCESS;
618 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
619#ifdef DEBUG_DARWIN_GIP
620 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
621 pSession,
622 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
623 pSession->GipMapObjR3));
624#else
625 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
626#endif
627 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
628
629#ifdef SUPDRV_USE_MUTEX_FOR_GIP
630 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
631#else
632 RTSemFastMutexRequest(pDevExt->mtxGip);
633#endif
634
635 /*
636 * GIP test-mode session?
637 */
638 if ( pSession->fGipTestMode
639 && pDevExt->pGip)
640 {
641 supdrvGipSetFlags(pDevExt, pSession, 0, ~SUPGIP_FLAGS_TESTING_ENABLE);
642 Assert(!pSession->fGipTestMode);
643 }
644
645 /*
646 * Unmap anything?
647 */
648 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
649 {
650 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
651 AssertRC(rc);
652 if (RT_SUCCESS(rc))
653 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
654 }
655
656 /*
657 * Dereference global GIP.
658 */
659 if (pSession->fGipReferenced && !rc)
660 {
661 pSession->fGipReferenced = 0;
662 if ( pDevExt->cGipUsers > 0
663 && !--pDevExt->cGipUsers)
664 {
665 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
666#ifndef DO_NOT_START_GIP
667 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
668#endif
669 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
670 }
671 }
672
673#ifdef SUPDRV_USE_MUTEX_FOR_GIP
674 RTSemMutexRelease(pDevExt->mtxGip);
675#else
676 RTSemFastMutexRelease(pDevExt->mtxGip);
677#endif
678
679 return rc;
680}
681
682
683/**
684 * Gets the GIP pointer.
685 *
686 * @returns Pointer to the GIP or NULL.
687 */
688SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
689{
690 return g_pSUPGlobalInfoPage;
691}
692
693
694
695
696
697/*
698 *
699 *
700 * GIP Initialization, Termination and CPU Offline / Online Related Code.
701 * GIP Initialization, Termination and CPU Offline / Online Related Code.
702 * GIP Initialization, Termination and CPU Offline / Online Related Code.
703 *
704 *
705 */
706
707/**
708 * Used by supdrvInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
709 * to update the TSC frequency related GIP variables.
710 *
711 * @param pGip The GIP.
712 * @param nsElapsed The number of nanoseconds elapsed.
713 * @param cElapsedTscTicks The corresponding number of TSC ticks.
714 * @param iTick The tick number for debugging.
715 */
716static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
717{
718 /*
719 * Calculate the frequency.
720 */
721 uint64_t uCpuHz;
722 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
723 && nsElapsed < UINT32_MAX)
724 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
725 else
726 {
727 RTUINT128U CpuHz, Tmp, Divisor;
728 CpuHz.s.Lo = CpuHz.s.Hi = 0;
729 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
730 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
731 uCpuHz = CpuHz.s.Lo;
732 }
733
734 /*
735 * Update the GIP.
736 */
737 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
738 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
739 {
740 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
741
742 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
743 if (iTick + 1 < pGip->cCpus)
744 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
745 }
746}
747
748
749/**
750 * Timer callback function for TSC frequency refinement in invariant GIP mode.
751 *
752 * This is started during driver init and fires once
753 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
754 *
755 * @param pTimer The timer.
756 * @param pvUser Opaque pointer to the device instance data.
757 * @param iTick The timer tick.
758 */
759static DECLCALLBACK(void) supdrvInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
760{
761 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
762 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
763 RTCPUID idCpu;
764 uint64_t cNsElapsed;
765 uint64_t cTscTicksElapsed;
766 uint64_t nsNow;
767 uint64_t uTsc;
768 RTCCUINTREG fEFlags;
769
770 /* Paranoia. */
771 AssertReturnVoid(pGip);
772 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
773
774 /*
775 * If we got a power event, stop the refinement process.
776 */
777 if (pDevExt->fInvTscRefinePowerEvent)
778 {
779 int rc = RTTimerStop(pTimer); AssertRC(rc);
780 return;
781 }
782
783 /*
784 * Read the TSC and time, noting which CPU we are on.
785 *
786 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
787 * systems where it matters we're in a context where we cannot waste that
788 * much time (DPC watchdog, called from clock interrupt).
789 */
790 fEFlags = ASMIntDisableFlags();
791 uTsc = ASMReadTSC();
792 nsNow = RTTimeSystemNanoTS();
793 idCpu = RTMpCpuId();
794 ASMSetFlags(fEFlags);
795
796 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
797 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
798
799 /*
800 * If the above measurement was taken on a different CPU than the one we
801 * started the process on, cTscTicksElapsed will need to be adjusted with
802 * the TSC deltas of both the CPUs.
803 *
804 * We ASSUME that the delta calculation process takes less time than the
805 * TSC frequency refinement timer. If it doesn't, we'll complain and
806 * drop the frequency refinement.
807 *
808 * Note! We cannot entirely trust enmUseTscDelta here because it's
809 * downgraded after each delta calculation.
810 */
811 if ( idCpu != pDevExt->idCpuInvarTscRefine
812 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
813 {
814 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
815 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
816 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
817 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
818 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
819 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
820 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
821 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
822 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
823 {
824 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
825 {
826 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
827 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
828 }
829 }
830 /*
831 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
832 * calculations.
833 */
834 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
835 {
836 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
837 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
838 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
839 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
840 int rc = RTTimerStop(pTimer); AssertRC(rc);
841 return;
842 }
843 }
844
845 /*
846 * Calculate and update the CPU frequency variables in GIP.
847 *
848 * If there is a GIP user already and we've already refined the frequency
849 * a couple of times, don't update it as we want a stable frequency value
850 * for all VMs.
851 */
852 if ( pDevExt->cGipUsers == 0
853 || cNsElapsed < RT_NS_1SEC * 2)
854 {
855 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
856
857 /*
858 * Stop the timer once we've reached the defined refinement period.
859 */
860 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
861 {
862 int rc = RTTimerStop(pTimer);
863 AssertRC(rc);
864 }
865 }
866 else
867 {
868 int rc = RTTimerStop(pTimer);
869 AssertRC(rc);
870 }
871}
872
873
874/**
875 * @callback_method_impl{FNRTPOWERNOTIFICATION}
876 */
877static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
878{
879 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
880 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
881
882 /*
883 * If the TSC frequency refinement timer is running, we need to cancel it so it
884 * doesn't screw up the frequency after a long suspend.
885 *
886 * Recalculate all TSC-deltas on host resume as it may have changed, seen
887 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
888 */
889 if (enmEvent == RTPOWEREVENT_RESUME)
890 {
891 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
892 if ( RT_LIKELY(pGip)
893 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
894 && !supdrvOSAreCpusOfflinedOnSuspend())
895 {
896#ifdef SUPDRV_USE_TSC_DELTA_THREAD
897 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
898#else
899 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
900 supdrvMeasureInitialTscDeltas(pDevExt);
901#endif
902 }
903 }
904 else if (enmEvent == RTPOWEREVENT_SUSPEND)
905 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
906}
907
908
909/**
910 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
911 *
912 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
913 * the CPU may change the TSC frequence between now and when the timer fires
914 * (supdrvInitAsyncRefineTscTimer).
915 *
916 * @param pDevExt Pointer to the device instance data.
917 */
918static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt)
919{
920 uint64_t u64NanoTS;
921 RTCCUINTREG fEFlags;
922 int rc;
923
924 /*
925 * Register a power management callback.
926 */
927 pDevExt->fInvTscRefinePowerEvent = false;
928 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
929 AssertRC(rc); /* ignore */
930
931 /*
932 * Record the TSC and NanoTS as the starting anchor point for refinement
933 * of the TSC. We try get as close to a clock tick as possible on systems
934 * which does not provide high resolution time.
935 */
936 u64NanoTS = RTTimeSystemNanoTS();
937 while (RTTimeSystemNanoTS() == u64NanoTS)
938 ASMNopPause();
939
940 fEFlags = ASMIntDisableFlags();
941 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
942 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
943 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
944 ASMSetFlags(fEFlags);
945
946 /*
947 * Create a timer that runs on the same CPU so we won't have a depencency
948 * on the TSC-delta and can run in parallel to it. On systems that does not
949 * implement CPU specific timers we'll apply deltas in the timer callback,
950 * just like we do for CPUs going offline.
951 *
952 * The longer the refinement interval the better the accuracy, at least in
953 * theory. If it's too long though, ring-3 may already be starting its
954 * first VMs before we're done. On most systems we will be loading the
955 * support driver during boot and VMs won't be started for a while yet,
956 * it is really only a problem during development (especially with
957 * on-demand driver starting on windows).
958 *
959 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
960 * to calculate the frequency during driver loading, the timer is set
961 * to fire after 200 ms the first time. It will then reschedule itself
962 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
963 * reached or it notices that there is a user land client with GIP
964 * mapped (we want a stable frequency for all VMs).
965 */
966 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
967 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
968 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
969 if (RT_SUCCESS(rc))
970 {
971 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
972 if (RT_SUCCESS(rc))
973 return;
974 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
975 }
976
977 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
978 {
979 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
980 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
981 if (RT_SUCCESS(rc))
982 {
983 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
984 if (RT_SUCCESS(rc))
985 return;
986 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
987 }
988 }
989
990 pDevExt->pInvarTscRefineTimer = NULL;
991 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
992}
993
994
995/**
996 * @callback_method_impl{PFNRTMPWORKER,
997 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
998 * the measurements on.}
999 */
1000DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1001{
1002 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1003 uint64_t *puTscStop = (uint64_t *)pvUser1;
1004 uint64_t *pnsStop = (uint64_t *)pvUser2;
1005 RT_NOREF1(idCpu);
1006
1007 *puTscStop = ASMReadTSC();
1008 *pnsStop = RTTimeSystemNanoTS();
1009
1010 ASMSetFlags(fEFlags);
1011}
1012
1013
1014/**
1015 * Measures the TSC frequency of the system.
1016 *
1017 * The TSC frequency can vary on systems which are not reported as invariant.
1018 * On such systems the object of this function is to find out what the nominal,
1019 * maximum TSC frequency under 'normal' CPU operation.
1020 *
1021 * @returns VBox status code.
1022 * @param pGip Pointer to the GIP.
1023 * @param fRough Set if we're doing the rough calculation that the
1024 * TSC measuring code needs, where accuracy isn't all
1025 * that important (too high is better than too low).
1026 * When clear we try for best accuracy that we can
1027 * achieve in reasonably short time.
1028 */
1029static int supdrvGipInitMeasureTscFreq(PSUPGLOBALINFOPAGE pGip, bool fRough)
1030{
1031 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1032 int cTriesLeft = fRough ? 4 : 2;
1033 while (cTriesLeft-- > 0)
1034 {
1035 RTCCUINTREG fEFlags;
1036 uint64_t nsStart;
1037 uint64_t nsStop;
1038 uint64_t uTscStart;
1039 uint64_t uTscStop;
1040 RTCPUID idCpuStart;
1041 RTCPUID idCpuStop;
1042
1043 /*
1044 * Synchronize with the host OS clock tick on systems without high
1045 * resolution time API (older Windows version for example).
1046 */
1047 nsStart = RTTimeSystemNanoTS();
1048 while (RTTimeSystemNanoTS() == nsStart)
1049 ASMNopPause();
1050
1051 /*
1052 * Read the TSC and current time, noting which CPU we're on.
1053 */
1054 fEFlags = ASMIntDisableFlags();
1055 uTscStart = ASMReadTSC();
1056 nsStart = RTTimeSystemNanoTS();
1057 idCpuStart = RTMpCpuId();
1058 ASMSetFlags(fEFlags);
1059
1060 /*
1061 * Delay for a while.
1062 */
1063 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1064 {
1065 /*
1066 * Sleep-wait since the TSC frequency is constant, it eases host load.
1067 * Shorter interval produces more variance in the frequency (esp. Windows).
1068 */
1069 uint64_t msElapsed = 0;
1070 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1071 / RT_NS_1MS;
1072 do
1073 {
1074 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1075 nsStop = RTTimeSystemNanoTS();
1076 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1077 } while (msElapsed < msDelay);
1078
1079 while (RTTimeSystemNanoTS() == nsStop)
1080 ASMNopPause();
1081 }
1082 else
1083 {
1084 /*
1085 * Busy-wait keeping the frequency up.
1086 */
1087 do
1088 {
1089 ASMNopPause();
1090 nsStop = RTTimeSystemNanoTS();
1091 } while (nsStop - nsStart < RT_NS_100MS);
1092 }
1093
1094 /*
1095 * Read the TSC and time again.
1096 */
1097 fEFlags = ASMIntDisableFlags();
1098 uTscStop = ASMReadTSC();
1099 nsStop = RTTimeSystemNanoTS();
1100 idCpuStop = RTMpCpuId();
1101 ASMSetFlags(fEFlags);
1102
1103 /*
1104 * If the CPU changes, things get a bit complicated and what we
1105 * can get away with depends on the GIP mode / TSC reliability.
1106 */
1107 if (idCpuStop != idCpuStart)
1108 {
1109 bool fDoXCall = false;
1110
1111 /*
1112 * Synchronous TSC mode: we're probably fine as it's unlikely
1113 * that we were rescheduled because of TSC throttling or power
1114 * management reasons, so just go ahead.
1115 */
1116 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1117 {
1118 /* Probably ok, maybe we should retry once?. */
1119 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1120 }
1121 /*
1122 * If we're just doing the rough measurement, do the cross call and
1123 * get on with things (we don't have deltas!).
1124 */
1125 else if (fRough)
1126 fDoXCall = true;
1127 /*
1128 * Invariant TSC mode: It doesn't matter if we have delta available
1129 * for both CPUs. That is not something we can assume at this point.
1130 *
1131 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1132 * downgraded after each delta calculation and the delta
1133 * calculations may not be complete yet.
1134 */
1135 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1136 {
1137/** @todo This section of code is never reached atm, consider dropping it later on... */
1138 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1139 {
1140 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1141 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1142 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1143 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1144 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1145 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1146 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1147 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1148 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1149 {
1150 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1151 {
1152 uTscStart -= iStartTscDelta;
1153 uTscStop -= iStopTscDelta;
1154 }
1155 }
1156 /*
1157 * Invalid CPU indexes are not caused by online/offline races, so
1158 * we have to trigger driver load failure if that happens as GIP
1159 * and IPRT assumptions are busted on this system.
1160 */
1161 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1162 {
1163 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1164 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1165 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1166 return VERR_INVALID_CPU_INDEX;
1167 }
1168 /*
1169 * No valid deltas. We retry, if we're on our last retry
1170 * we do the cross call instead just to get a result. The
1171 * frequency will be refined in a few seconds anyway.
1172 */
1173 else if (cTriesLeft > 0)
1174 continue;
1175 else
1176 fDoXCall = true;
1177 }
1178 }
1179 /*
1180 * Asynchronous TSC mode: This is bad, as the reason we usually
1181 * use this mode is to deal with variable TSC frequencies and
1182 * deltas. So, we need to get the TSC from the same CPU as
1183 * started it, we also need to keep that CPU busy. So, retry
1184 * and fall back to the cross call on the last attempt.
1185 */
1186 else
1187 {
1188 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1189 if (cTriesLeft > 0)
1190 continue;
1191 fDoXCall = true;
1192 }
1193
1194 if (fDoXCall)
1195 {
1196 /*
1197 * Try read the TSC and timestamp on the start CPU.
1198 */
1199 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1200 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1201 continue;
1202 }
1203 }
1204
1205 /*
1206 * Calculate the TSC frequency and update it (shared with the refinement timer).
1207 */
1208 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1209 return VINF_SUCCESS;
1210 }
1211
1212 Assert(!fRough);
1213 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1214}
1215
1216
1217/**
1218 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1219 *
1220 * @returns Index of the CPU in the cache set.
1221 * @param pGip The GIP.
1222 * @param idCpu The CPU ID.
1223 */
1224static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1225{
1226 uint32_t i, cTries;
1227
1228 /*
1229 * ASSUMES that CPU IDs are constant.
1230 */
1231 for (i = 0; i < pGip->cCpus; i++)
1232 if (pGip->aCPUs[i].idCpu == idCpu)
1233 return i;
1234
1235 cTries = 0;
1236 do
1237 {
1238 for (i = 0; i < pGip->cCpus; i++)
1239 {
1240 bool fRc;
1241 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1242 if (fRc)
1243 return i;
1244 }
1245 } while (cTries++ < 32);
1246 AssertReleaseFailed();
1247 return i - 1;
1248}
1249
1250
1251/**
1252 * The calling CPU should be accounted as online, update GIP accordingly.
1253 *
1254 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1255 *
1256 * @param pDevExt The device extension.
1257 * @param idCpu The CPU ID.
1258 */
1259static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1260{
1261 int iCpuSet = 0;
1262 uint16_t idApic = UINT16_MAX;
1263 uint32_t i = 0;
1264 uint64_t u64NanoTS = 0;
1265 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1266
1267 AssertPtrReturnVoid(pGip);
1268 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1269 AssertRelease(idCpu == RTMpCpuId());
1270 Assert(pGip->cPossibleCpus == RTMpGetCount());
1271
1272 /*
1273 * Do this behind a spinlock with interrupts disabled as this can fire
1274 * on all CPUs simultaneously, see @bugref{6110}.
1275 */
1276 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1277
1278 /*
1279 * Update the globals.
1280 */
1281 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1282 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1283 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1284 if (iCpuSet >= 0)
1285 {
1286 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1287 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1288 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1289 }
1290
1291 /*
1292 * Update the entry.
1293 */
1294 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1295 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1296
1297 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1298
1299 idApic = ASMGetApicId();
1300 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1301 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1302 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1303
1304 /*
1305 * Update the APIC ID and CPU set index mappings.
1306 */
1307 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1308 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1309
1310 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1311 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1312
1313 /* Update the Mp online/offline counter. */
1314 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1315
1316 /* Commit it. */
1317 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1318
1319 RTSpinlockRelease(pDevExt->hGipSpinlock);
1320}
1321
1322
1323/**
1324 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1325 *
1326 * @param idCpu The CPU ID we are running on.
1327 * @param pvUser1 Opaque pointer to the device instance data.
1328 * @param pvUser2 Not used.
1329 */
1330static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1331{
1332 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1333 NOREF(pvUser2);
1334 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1335}
1336
1337
1338/**
1339 * The CPU should be accounted as offline, update the GIP accordingly.
1340 *
1341 * This is used by supdrvGipMpEvent.
1342 *
1343 * @param pDevExt The device extension.
1344 * @param idCpu The CPU ID.
1345 */
1346static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1347{
1348 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1349 int iCpuSet;
1350 unsigned i;
1351
1352 AssertPtrReturnVoid(pGip);
1353 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1354
1355 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1356 AssertReturnVoid(iCpuSet >= 0);
1357
1358 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1359 AssertReturnVoid(i < pGip->cCpus);
1360 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1361
1362 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1363 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1364
1365 /* Update the Mp online/offline counter. */
1366 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1367
1368 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1369 {
1370 /* Reset the TSC delta, we will recalculate it lazily. */
1371 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1372 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1373 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1374 }
1375
1376 /* Commit it. */
1377 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1378
1379 RTSpinlockRelease(pDevExt->hGipSpinlock);
1380}
1381
1382
1383/**
1384 * Multiprocessor event notification callback.
1385 *
1386 * This is used to make sure that the GIP master gets passed on to
1387 * another CPU. It also updates the associated CPU data.
1388 *
1389 * @param enmEvent The event.
1390 * @param idCpu The cpu it applies to.
1391 * @param pvUser Pointer to the device extension.
1392 */
1393static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1394{
1395 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1396 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1397
1398 if (pGip)
1399 {
1400 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1401 switch (enmEvent)
1402 {
1403 case RTMPEVENT_ONLINE:
1404 {
1405 RTThreadPreemptDisable(&PreemptState);
1406 if (idCpu == RTMpCpuId())
1407 {
1408 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1409 RTThreadPreemptRestore(&PreemptState);
1410 }
1411 else
1412 {
1413 RTThreadPreemptRestore(&PreemptState);
1414 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1415 }
1416
1417 /*
1418 * Recompute TSC-delta for the newly online'd CPU.
1419 */
1420 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1421 {
1422#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1423 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1424#else
1425 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1426 supdrvMeasureTscDeltaOne(pDevExt, iCpu);
1427#endif
1428 }
1429 break;
1430 }
1431
1432 case RTMPEVENT_OFFLINE:
1433 supdrvGipMpEventOffline(pDevExt, idCpu);
1434 break;
1435 }
1436 }
1437
1438 /*
1439 * Make sure there is a master GIP.
1440 */
1441 if (enmEvent == RTMPEVENT_OFFLINE)
1442 {
1443 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1444 if (idGipMaster == idCpu)
1445 {
1446 /*
1447 * The GIP master is going offline, find a new one.
1448 */
1449 bool fIgnored;
1450 unsigned i;
1451 RTCPUID idNewGipMaster = NIL_RTCPUID;
1452 RTCPUSET OnlineCpus;
1453 RTMpGetOnlineSet(&OnlineCpus);
1454
1455 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1456 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1457 {
1458 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1459 if (idCurCpu != idGipMaster)
1460 {
1461 idNewGipMaster = idCurCpu;
1462 break;
1463 }
1464 }
1465
1466 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1467 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1468 NOREF(fIgnored);
1469 }
1470 }
1471}
1472
1473
1474/**
1475 * On CPU initialization callback for RTMpOnAll.
1476 *
1477 * @param idCpu The CPU ID.
1478 * @param pvUser1 The device extension.
1479 * @param pvUser2 The GIP.
1480 */
1481static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1482{
1483 /* This is good enough, even though it will update some of the globals a
1484 bit to much. */
1485 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1486 NOREF(pvUser2);
1487}
1488
1489
1490/**
1491 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1492 *
1493 * @param idCpu Ignored.
1494 * @param pvUser1 Where to put the TSC.
1495 * @param pvUser2 Ignored.
1496 */
1497static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1498{
1499 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1500 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1501 RT_NOREF2(idCpu, pvUser2);
1502}
1503
1504
1505/**
1506 * Determine if Async GIP mode is required because of TSC drift.
1507 *
1508 * When using the default/normal timer code it is essential that the time stamp counter
1509 * (TSC) runs never backwards, that is, a read operation to the counter should return
1510 * a bigger value than any previous read operation. This is guaranteed by the latest
1511 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1512 * case we have to choose the asynchronous timer mode.
1513 *
1514 * @param poffMin Pointer to the determined difference between different
1515 * cores (optional, can be NULL).
1516 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1517 */
1518static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1519{
1520 /*
1521 * Just iterate all the cpus 8 times and make sure that the TSC is
1522 * ever increasing. We don't bother taking TSC rollover into account.
1523 */
1524 int iEndCpu = RTMpGetArraySize();
1525 int iCpu;
1526 int cLoops = 8;
1527 bool fAsync = false;
1528 int rc = VINF_SUCCESS;
1529 uint64_t offMax = 0;
1530 uint64_t offMin = ~(uint64_t)0;
1531 uint64_t PrevTsc = ASMReadTSC();
1532
1533 while (cLoops-- > 0)
1534 {
1535 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1536 {
1537 uint64_t CurTsc;
1538 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1539 &CurTsc, (void *)(uintptr_t)iCpu);
1540 if (RT_SUCCESS(rc))
1541 {
1542 if (CurTsc <= PrevTsc)
1543 {
1544 fAsync = true;
1545 offMin = offMax = PrevTsc - CurTsc;
1546 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1547 iCpu, cLoops, CurTsc, PrevTsc));
1548 break;
1549 }
1550
1551 /* Gather statistics (except the first time). */
1552 if (iCpu != 0 || cLoops != 7)
1553 {
1554 uint64_t off = CurTsc - PrevTsc;
1555 if (off < offMin)
1556 offMin = off;
1557 if (off > offMax)
1558 offMax = off;
1559 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1560 }
1561
1562 /* Next */
1563 PrevTsc = CurTsc;
1564 }
1565 else if (rc == VERR_NOT_SUPPORTED)
1566 break;
1567 else
1568 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1569 }
1570
1571 /* broke out of the loop. */
1572 if (iCpu < iEndCpu)
1573 break;
1574 }
1575
1576 if (poffMin)
1577 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1578 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1579 fAsync, iEndCpu, rc, offMin, offMax));
1580#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1581 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1582#endif
1583 return fAsync;
1584}
1585
1586
1587/**
1588 * supdrvGipInit() worker that determines the GIP TSC mode.
1589 *
1590 * @returns The most suitable TSC mode.
1591 * @param pDevExt Pointer to the device instance data.
1592 */
1593static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1594{
1595 uint64_t u64DiffCoresIgnored;
1596 uint32_t uEAX, uEBX, uECX, uEDX;
1597
1598 /*
1599 * Establish whether the CPU advertises TSC as invariant, we need that in
1600 * a couple of places below.
1601 */
1602 bool fInvariantTsc = false;
1603 if (ASMHasCpuId())
1604 {
1605 uEAX = ASMCpuId_EAX(0x80000000);
1606 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1607 {
1608 uEDX = ASMCpuId_EDX(0x80000007);
1609 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1610 fInvariantTsc = true;
1611 }
1612 }
1613
1614 /*
1615 * On single CPU systems, we don't need to consider ASYNC mode.
1616 */
1617 if (RTMpGetCount() <= 1)
1618 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1619
1620 /*
1621 * Allow the user and/or OS specific bits to force async mode.
1622 */
1623 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1624 return SUPGIPMODE_ASYNC_TSC;
1625
1626 /*
1627 * Use invariant mode if the CPU says TSC is invariant.
1628 */
1629 if (fInvariantTsc)
1630 return SUPGIPMODE_INVARIANT_TSC;
1631
1632 /*
1633 * TSC is not invariant and we're on SMP, this presents two problems:
1634 *
1635 * (1) There might be a skew between the CPU, so that cpu0
1636 * returns a TSC that is slightly different from cpu1.
1637 * This screw may be due to (2), bad TSC initialization
1638 * or slightly different TSC rates.
1639 *
1640 * (2) Power management (and other things) may cause the TSC
1641 * to run at a non-constant speed, and cause the speed
1642 * to be different on the cpus. This will result in (1).
1643 *
1644 * If any of the above is detected, we will have to use ASYNC mode.
1645 */
1646 /* (1). Try check for current differences between the cpus. */
1647 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1648 return SUPGIPMODE_ASYNC_TSC;
1649
1650 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1651 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1652 if ( ASMIsValidStdRange(uEAX)
1653 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
1654 {
1655 /* Check for APM support. */
1656 uEAX = ASMCpuId_EAX(0x80000000);
1657 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1658 {
1659 uEDX = ASMCpuId_EDX(0x80000007);
1660 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1661 return SUPGIPMODE_ASYNC_TSC;
1662 }
1663 }
1664
1665 return SUPGIPMODE_SYNC_TSC;
1666}
1667
1668
1669/**
1670 * Initializes per-CPU GIP information.
1671 *
1672 * @param pGip Pointer to the GIP.
1673 * @param pCpu Pointer to which GIP CPU to initialize.
1674 * @param u64NanoTS The current nanosecond timestamp.
1675 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1676 */
1677static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1678{
1679 pCpu->u32TransactionId = 2;
1680 pCpu->u64NanoTS = u64NanoTS;
1681 pCpu->u64TSC = ASMReadTSC();
1682 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1683 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1684
1685 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1686 ASMAtomicWriteU32(&pCpu->idCpu, NIL_RTCPUID);
1687 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1688 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1689
1690 /*
1691 * The first time we're called, we don't have a CPU frequency handy,
1692 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1693 * called again and at that point we have a more plausible CPU frequency
1694 * value handy. The frequency history will also be adjusted again on
1695 * the 2nd timer callout (maybe we can skip that now?).
1696 */
1697 if (!uCpuHz)
1698 {
1699 pCpu->u64CpuHz = _4G - 1;
1700 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1701 }
1702 else
1703 {
1704 pCpu->u64CpuHz = uCpuHz;
1705 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1706 }
1707 pCpu->au32TSCHistory[0]
1708 = pCpu->au32TSCHistory[1]
1709 = pCpu->au32TSCHistory[2]
1710 = pCpu->au32TSCHistory[3]
1711 = pCpu->au32TSCHistory[4]
1712 = pCpu->au32TSCHistory[5]
1713 = pCpu->au32TSCHistory[6]
1714 = pCpu->au32TSCHistory[7]
1715 = pCpu->u32UpdateIntervalTSC;
1716}
1717
1718
1719/**
1720 * Initializes the GIP data.
1721 *
1722 * @param pDevExt Pointer to the device instance data.
1723 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1724 * @param HCPhys The physical address of the GIP.
1725 * @param u64NanoTS The current nanosecond timestamp.
1726 * @param uUpdateHz The update frequency.
1727 * @param uUpdateIntervalNS The update interval in nanoseconds.
1728 * @param cCpus The CPU count.
1729 */
1730static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1731 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
1732{
1733 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
1734 unsigned i;
1735#ifdef DEBUG_DARWIN_GIP
1736 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1737#else
1738 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1739#endif
1740
1741 /*
1742 * Initialize the structure.
1743 */
1744 memset(pGip, 0, cbGip);
1745
1746 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1747 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1748 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1749 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1750 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1751 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1752 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1753 else
1754 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1755 pGip->cCpus = (uint16_t)cCpus;
1756 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1757 pGip->u32UpdateHz = uUpdateHz;
1758 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1759 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1760 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1761 RTCpuSetEmpty(&pGip->PresentCpuSet);
1762 RTMpGetSet(&pGip->PossibleCpuSet);
1763 pGip->cOnlineCpus = RTMpGetOnlineCount();
1764 pGip->cPresentCpus = RTMpGetPresentCount();
1765 pGip->cPossibleCpus = RTMpGetCount();
1766 pGip->idCpuMax = RTMpGetMaxCpuId();
1767 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
1768 pGip->aiCpuFromApicId[i] = UINT16_MAX;
1769 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
1770 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
1771 for (i = 0; i < cCpus; i++)
1772 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
1773
1774 /*
1775 * Link it to the device extension.
1776 */
1777 pDevExt->pGip = pGip;
1778 pDevExt->HCPhysGip = HCPhys;
1779 pDevExt->cGipUsers = 0;
1780}
1781
1782
1783/**
1784 * Creates the GIP.
1785 *
1786 * @returns VBox status code.
1787 * @param pDevExt Instance data. GIP stuff may be updated.
1788 */
1789int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
1790{
1791 PSUPGLOBALINFOPAGE pGip;
1792 RTHCPHYS HCPhysGip;
1793 uint32_t u32SystemResolution;
1794 uint32_t u32Interval;
1795 uint32_t u32MinInterval;
1796 uint32_t uMod;
1797 unsigned cCpus;
1798 int rc;
1799
1800 LogFlow(("supdrvGipCreate:\n"));
1801
1802 /*
1803 * Assert order.
1804 */
1805 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
1806 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
1807 Assert(!pDevExt->pGipTimer);
1808#ifdef SUPDRV_USE_MUTEX_FOR_GIP
1809 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
1810 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
1811#else
1812 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
1813 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
1814#endif
1815
1816 /*
1817 * Check the CPU count.
1818 */
1819 cCpus = RTMpGetArraySize();
1820 if ( cCpus > RTCPUSET_MAX_CPUS
1821#if RTCPUSET_MAX_CPUS != 256
1822 || cCpus > 256 /* ApicId is used for the mappings */
1823#endif
1824 )
1825 {
1826 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
1827 return VERR_TOO_MANY_CPUS;
1828 }
1829
1830 /*
1831 * Allocate a contiguous set of pages with a default kernel mapping.
1832 */
1833 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
1834 if (RT_FAILURE(rc))
1835 {
1836 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
1837 return rc;
1838 }
1839 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
1840 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
1841
1842 /*
1843 * Find a reasonable update interval and initialize the structure.
1844 */
1845 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
1846 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
1847 * See @bugref{6710}. */
1848 u32MinInterval = RT_NS_10MS;
1849 u32SystemResolution = RTTimerGetSystemGranularity();
1850 u32Interval = u32MinInterval;
1851 uMod = u32MinInterval % u32SystemResolution;
1852 if (uMod)
1853 u32Interval += u32SystemResolution - uMod;
1854
1855 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
1856
1857 /*
1858 * Important sanity check...
1859 */
1860 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
1861 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
1862 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
1863 {
1864 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
1865 return VERR_INTERNAL_ERROR_2;
1866 }
1867
1868 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
1869 AssertReturn( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
1870 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED, VERR_INTERNAL_ERROR_3);
1871
1872 /*
1873 * Do the TSC frequency measurements.
1874 *
1875 * If we're in invariant TSC mode, just to a quick preliminary measurement
1876 * that the TSC-delta measurement code can use to yield cross calls.
1877 *
1878 * If we're in any of the other two modes, neither which require MP init,
1879 * notifications or deltas for the job, do the full measurement now so
1880 * that supdrvGipInitOnCpu() can populate the TSC interval and history
1881 * array with more reasonable values.
1882 */
1883 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1884 {
1885 rc = supdrvGipInitMeasureTscFreq(pGip, true /*fRough*/); /* cannot fail */
1886 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt);
1887 }
1888 else
1889 rc = supdrvGipInitMeasureTscFreq(pGip, false /*fRough*/);
1890 if (RT_SUCCESS(rc))
1891 {
1892 /*
1893 * Start TSC-delta measurement thread before we start getting MP
1894 * events that will try kick it into action (includes the
1895 * RTMpOnAll/supdrvGipInitOnCpu call below).
1896 */
1897 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
1898 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
1899#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1900 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1901 rc = supdrvTscDeltaThreadInit(pDevExt);
1902#endif
1903 if (RT_SUCCESS(rc))
1904 {
1905 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
1906 if (RT_SUCCESS(rc))
1907 {
1908 /*
1909 * Do GIP initialization on all online CPUs. Wake up the
1910 * TSC-delta thread afterwards.
1911 */
1912 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
1913 if (RT_SUCCESS(rc))
1914 {
1915#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1916 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1917#else
1918 uint16_t iCpu;
1919 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1920 {
1921 /*
1922 * Measure the TSC deltas now that we have MP notifications.
1923 */
1924 int cTries = 5;
1925 do
1926 {
1927 rc = supdrvMeasureInitialTscDeltas(pDevExt);
1928 if ( rc != VERR_TRY_AGAIN
1929 && rc != VERR_CPU_OFFLINE)
1930 break;
1931 } while (--cTries > 0);
1932 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1933 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
1934 }
1935 else
1936 {
1937 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1938 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
1939 }
1940 if (RT_SUCCESS(rc))
1941#endif
1942 {
1943 /*
1944 * Create the timer.
1945 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
1946 */
1947 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1948 {
1949 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
1950 supdrvGipAsyncTimer, pDevExt);
1951 if (rc == VERR_NOT_SUPPORTED)
1952 {
1953 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
1954 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
1955 }
1956 }
1957 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1958 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
1959 supdrvGipSyncAndInvariantTimer, pDevExt);
1960 if (RT_SUCCESS(rc))
1961 {
1962 /*
1963 * We're good.
1964 */
1965 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
1966 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1967
1968 g_pSUPGlobalInfoPage = pGip;
1969 return VINF_SUCCESS;
1970 }
1971
1972 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
1973 Assert(!pDevExt->pGipTimer);
1974 }
1975 }
1976 else
1977 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
1978 }
1979 else
1980 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
1981 }
1982 else
1983 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
1984 }
1985 else
1986 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
1987
1988 /* Releases timer frequency increase too. */
1989 supdrvGipDestroy(pDevExt);
1990 return rc;
1991}
1992
1993
1994/**
1995 * Invalidates the GIP data upon termination.
1996 *
1997 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1998 */
1999static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
2000{
2001 unsigned i;
2002 pGip->u32Magic = 0;
2003 for (i = 0; i < pGip->cCpus; i++)
2004 {
2005 pGip->aCPUs[i].u64NanoTS = 0;
2006 pGip->aCPUs[i].u64TSC = 0;
2007 pGip->aCPUs[i].iTSCHistoryHead = 0;
2008 pGip->aCPUs[i].u64TSCSample = 0;
2009 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2010 }
2011}
2012
2013
2014/**
2015 * Terminates the GIP.
2016 *
2017 * @param pDevExt Instance data. GIP stuff may be updated.
2018 */
2019void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2020{
2021 int rc;
2022#ifdef DEBUG_DARWIN_GIP
2023 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2024 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2025 pDevExt->pGipTimer, pDevExt->GipMemObj));
2026#endif
2027
2028 /*
2029 * Stop receiving MP notifications before tearing anything else down.
2030 */
2031 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2032
2033#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2034 /*
2035 * Terminate the TSC-delta measurement thread and resources.
2036 */
2037 supdrvTscDeltaTerm(pDevExt);
2038#endif
2039
2040 /*
2041 * Destroy the TSC-refinement timer.
2042 */
2043 if (pDevExt->pInvarTscRefineTimer)
2044 {
2045 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2046 pDevExt->pInvarTscRefineTimer = NULL;
2047 }
2048
2049 /*
2050 * Invalid the GIP data.
2051 */
2052 if (pDevExt->pGip)
2053 {
2054 supdrvGipTerm(pDevExt->pGip);
2055 pDevExt->pGip = NULL;
2056 }
2057 g_pSUPGlobalInfoPage = NULL;
2058
2059 /*
2060 * Destroy the timer and free the GIP memory object.
2061 */
2062 if (pDevExt->pGipTimer)
2063 {
2064 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2065 pDevExt->pGipTimer = NULL;
2066 }
2067
2068 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2069 {
2070 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2071 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2072 }
2073
2074 /*
2075 * Finally, make sure we've release the system timer resolution request
2076 * if one actually succeeded and is still pending.
2077 */
2078 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2079}
2080
2081
2082
2083
2084/*
2085 *
2086 *
2087 * GIP Update Timer Related Code
2088 * GIP Update Timer Related Code
2089 * GIP Update Timer Related Code
2090 *
2091 *
2092 */
2093
2094
2095/**
2096 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2097 * updates all the per cpu data except the transaction id.
2098 *
2099 * @param pDevExt The device extension.
2100 * @param pGipCpu Pointer to the per cpu data.
2101 * @param u64NanoTS The current time stamp.
2102 * @param u64TSC The current TSC.
2103 * @param iTick The current timer tick.
2104 *
2105 * @remarks Can be called with interrupts disabled!
2106 */
2107static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2108{
2109 uint64_t u64TSCDelta;
2110 bool fUpdateCpuHz;
2111 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2112 AssertPtrReturnVoid(pGip);
2113
2114 /* Delta between this and the previous update. */
2115 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2116
2117 /*
2118 * Update the NanoTS.
2119 */
2120 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2121
2122 /*
2123 * Calc TSC delta.
2124 */
2125 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2126 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2127
2128 /*
2129 * Determine if we need to update the CPU (TSC) frequency calculation.
2130 *
2131 * We don't need to keep recalculating the frequency when it's invariant,
2132 * unless the special tstGIP-2 testing mode is enabled.
2133 */
2134 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2135 if (!(pGip->fFlags & SUPGIP_FLAGS_TESTING))
2136 { /* likely*/ }
2137 else
2138 {
2139 uint32_t fGipFlags = pGip->fFlags;
2140 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2141 {
2142 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2143 {
2144 /* Cache the TSC frequency before forcing updates due to test mode. */
2145 if (!fUpdateCpuHz)
2146 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2147 ASMAtomicAndU32(&pGip->fFlags, ~SUPGIP_FLAGS_TESTING_START);
2148 }
2149 fUpdateCpuHz = true;
2150 }
2151 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2152 {
2153 /* Restore the cached TSC frequency if any. */
2154 if (!fUpdateCpuHz)
2155 {
2156 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2157 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2158 }
2159 ASMAtomicAndU32(&pGip->fFlags, ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING));
2160 }
2161 }
2162
2163 /*
2164 * Calculate the CPU (TSC) frequency if necessary.
2165 */
2166 if (fUpdateCpuHz)
2167 {
2168 uint64_t u64CpuHz;
2169 uint32_t u32UpdateIntervalTSC;
2170 uint32_t u32UpdateIntervalTSCSlack;
2171 uint32_t u32TransactionId;
2172 unsigned iTSCHistoryHead;
2173
2174 if (u64TSCDelta >> 32)
2175 {
2176 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2177 pGipCpu->cErrors++;
2178 }
2179
2180 /*
2181 * On the 2nd and 3rd callout, reset the history with the current TSC
2182 * interval since the values entered by supdrvGipInit are totally off.
2183 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2184 * better, while the 3rd should be most reliable.
2185 */
2186 /** @todo Could we drop this now that we initializes the history
2187 * with nominal TSC frequency values? */
2188 u32TransactionId = pGipCpu->u32TransactionId;
2189 if (RT_UNLIKELY( ( u32TransactionId == 5
2190 || u32TransactionId == 7)
2191 && ( iTick == 2
2192 || iTick == 3) ))
2193 {
2194 unsigned i;
2195 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2196 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2197 }
2198
2199 /*
2200 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2201 * Wait until we have at least one full history since the above history reset. The
2202 * assumption is that the majority of the previous history values will be tolerable.
2203 * See @bugref{6710#c67}.
2204 */
2205 /** @todo Could we drop the fudging there now that we initializes the history
2206 * with nominal TSC frequency values? */
2207 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2208 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2209 {
2210 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2211 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2212 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2213 {
2214 uint32_t u32;
2215 u32 = pGipCpu->au32TSCHistory[0];
2216 u32 += pGipCpu->au32TSCHistory[1];
2217 u32 += pGipCpu->au32TSCHistory[2];
2218 u32 += pGipCpu->au32TSCHistory[3];
2219 u32 >>= 2;
2220 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2221 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2222 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2223 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2224 u64TSCDelta >>= 2;
2225 u64TSCDelta += u32;
2226 u64TSCDelta >>= 1;
2227 }
2228 }
2229
2230 /*
2231 * TSC History.
2232 */
2233 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2234 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2235 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2236 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2237
2238 /*
2239 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2240 *
2241 * On Windows, we have an occasional (but recurring) sour value that messed up
2242 * the history but taking only 1 interval reduces the precision overall.
2243 */
2244 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2245 || pGip->u32UpdateHz >= 1000)
2246 {
2247 uint32_t u32;
2248 u32 = pGipCpu->au32TSCHistory[0];
2249 u32 += pGipCpu->au32TSCHistory[1];
2250 u32 += pGipCpu->au32TSCHistory[2];
2251 u32 += pGipCpu->au32TSCHistory[3];
2252 u32 >>= 2;
2253 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2254 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2255 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2256 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2257 u32UpdateIntervalTSC >>= 2;
2258 u32UpdateIntervalTSC += u32;
2259 u32UpdateIntervalTSC >>= 1;
2260
2261 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2262 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2263 }
2264 else if (pGip->u32UpdateHz >= 90)
2265 {
2266 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2267 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2268 u32UpdateIntervalTSC >>= 1;
2269
2270 /* value chosen on a 2GHz thinkpad running windows */
2271 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2272 }
2273 else
2274 {
2275 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2276
2277 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2278 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2279 }
2280 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2281
2282 /*
2283 * CpuHz.
2284 */
2285 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2286 u64CpuHz /= pGip->u32UpdateIntervalNS;
2287 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2288 }
2289}
2290
2291
2292/**
2293 * Updates the GIP.
2294 *
2295 * @param pDevExt The device extension.
2296 * @param u64NanoTS The current nanosecond timestamp.
2297 * @param u64TSC The current TSC timestamp.
2298 * @param idCpu The CPU ID.
2299 * @param iTick The current timer tick.
2300 *
2301 * @remarks Can be called with interrupts disabled!
2302 */
2303static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2304{
2305 /*
2306 * Determine the relevant CPU data.
2307 */
2308 PSUPGIPCPU pGipCpu;
2309 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2310 AssertPtrReturnVoid(pGip);
2311
2312 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2313 pGipCpu = &pGip->aCPUs[0];
2314 else
2315 {
2316 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
2317 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
2318 return;
2319 pGipCpu = &pGip->aCPUs[iCpu];
2320 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
2321 return;
2322 }
2323
2324 /*
2325 * Start update transaction.
2326 */
2327 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2328 {
2329 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2330 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2331 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2332 pGipCpu->cErrors++;
2333 return;
2334 }
2335
2336 /*
2337 * Recalc the update frequency every 0x800th time.
2338 */
2339 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariant hosts. */
2340 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2341 {
2342 if (pGip->u64NanoTSLastUpdateHz)
2343 {
2344#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2345 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2346 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2347 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2348 {
2349 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2350 * calculation on non-invariant hosts if it changes the history decision
2351 * taken in supdrvGipDoUpdateCpu(). */
2352 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2353 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2354 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2355 }
2356#endif
2357 }
2358 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2359 }
2360
2361 /*
2362 * Update the data.
2363 */
2364 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2365
2366 /*
2367 * Complete transaction.
2368 */
2369 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2370}
2371
2372
2373/**
2374 * Updates the per cpu GIP data for the calling cpu.
2375 *
2376 * @param pDevExt The device extension.
2377 * @param u64NanoTS The current nanosecond timestamp.
2378 * @param u64TSC The current TSC timesaver.
2379 * @param idCpu The CPU ID.
2380 * @param idApic The APIC id for the CPU index.
2381 * @param iTick The current timer tick.
2382 *
2383 * @remarks Can be called with interrupts disabled!
2384 */
2385static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2386 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2387{
2388 uint32_t iCpu;
2389 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2390
2391 /*
2392 * Avoid a potential race when a CPU online notification doesn't fire on
2393 * the onlined CPU but the tick creeps in before the event notification is
2394 * run.
2395 */
2396 if (RT_UNLIKELY(iTick == 1))
2397 {
2398 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2399 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2400 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2401 }
2402
2403 iCpu = pGip->aiCpuFromApicId[idApic];
2404 if (RT_LIKELY(iCpu < pGip->cCpus))
2405 {
2406 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2407 if (pGipCpu->idCpu == idCpu)
2408 {
2409 /*
2410 * Start update transaction.
2411 */
2412 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2413 {
2414 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2415 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2416 pGipCpu->cErrors++;
2417 return;
2418 }
2419
2420 /*
2421 * Update the data.
2422 */
2423 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2424
2425 /*
2426 * Complete transaction.
2427 */
2428 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2429 }
2430 }
2431}
2432
2433
2434/**
2435 * Timer callback function for the sync and invariant GIP modes.
2436 *
2437 * @param pTimer The timer.
2438 * @param pvUser Opaque pointer to the device extension.
2439 * @param iTick The timer tick.
2440 */
2441static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2442{
2443 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2444 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2445 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2446 uint64_t u64TSC = ASMReadTSC();
2447 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2448 RT_NOREF1(pTimer);
2449
2450 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2451 {
2452 /*
2453 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2454 * missing timer ticks is not an option for GIP because the GIP users
2455 * will end up incrementing the time in 1ns per time getter call until
2456 * there is a complete timer update. So, if the delta has yet to be
2457 * calculated, we just pretend it is zero for now (the GIP users
2458 * probably won't have it for a wee while either and will do the same).
2459 *
2460 * We could maybe on some platforms try cross calling a CPU with a
2461 * working delta here, but it's not worth the hassle since the
2462 * likelihood of this happening is really low. On Windows, Linux, and
2463 * Solaris timers fire on the CPU they were registered/started on.
2464 * Darwin timers doesn't necessarily (they are high priority threads).
2465 */
2466 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2467 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2468 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2469 Assert(!ASMIntAreEnabled());
2470 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2471 {
2472 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2473 if (iTscDelta != INT64_MAX)
2474 u64TSC -= iTscDelta;
2475 }
2476 }
2477
2478 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2479
2480 ASMSetFlags(fEFlags);
2481}
2482
2483
2484/**
2485 * Timer callback function for async GIP mode.
2486 * @param pTimer The timer.
2487 * @param pvUser Opaque pointer to the device extension.
2488 * @param iTick The timer tick.
2489 */
2490static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2491{
2492 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2493 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2494 RTCPUID idCpu = RTMpCpuId();
2495 uint64_t u64TSC = ASMReadTSC();
2496 uint64_t NanoTS = RTTimeSystemNanoTS();
2497 RT_NOREF1(pTimer);
2498
2499 /** @todo reset the transaction number and whatnot when iTick == 1. */
2500 if (pDevExt->idGipMaster == idCpu)
2501 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2502 else
2503 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
2504
2505 ASMSetFlags(fEFlags);
2506}
2507
2508
2509
2510
2511/*
2512 *
2513 *
2514 * TSC Delta Measurements And Related Code
2515 * TSC Delta Measurements And Related Code
2516 * TSC Delta Measurements And Related Code
2517 *
2518 *
2519 */
2520
2521
2522/*
2523 * Select TSC delta measurement algorithm.
2524 */
2525#if 0
2526# define GIP_TSC_DELTA_METHOD_1
2527#else
2528# define GIP_TSC_DELTA_METHOD_2
2529#endif
2530
2531/** For padding variables to keep them away from other cache lines. Better too
2532 * large than too small!
2533 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2534 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2535 * III had 32 bytes cache lines. */
2536#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2537
2538
2539/**
2540 * TSC delta measurement algorithm \#2 result entry.
2541 */
2542typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2543{
2544 uint32_t iSeqMine;
2545 uint32_t iSeqOther;
2546 uint64_t uTsc;
2547} SUPDRVTSCDELTAMETHOD2ENTRY;
2548
2549/**
2550 * TSC delta measurement algorithm \#2 Data.
2551 */
2552typedef struct SUPDRVTSCDELTAMETHOD2
2553{
2554 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2555 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2556 /** The current sequence number of this worker. */
2557 uint32_t volatile iCurSeqNo;
2558 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2559 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2560 /** Result table. */
2561 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2562} SUPDRVTSCDELTAMETHOD2;
2563/** Pointer to the data for TSC delta measurement algorithm \#2 .*/
2564typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2565
2566
2567/**
2568 * The TSC delta synchronization struct, version 2.
2569 *
2570 * The synchronization variable is completely isolated in its own cache line
2571 * (provided our max cache line size estimate is correct).
2572 */
2573typedef struct SUPTSCDELTASYNC2
2574{
2575 /** Padding to make sure the uVar1 is in its own cache line. */
2576 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2577
2578 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2579 volatile uint32_t uSyncVar;
2580 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2581 volatile uint32_t uSyncSeq;
2582
2583 /** Padding to make sure the uVar1 is in its own cache line. */
2584 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2585
2586 /** Start RDTSC value. Put here mainly to save stack space. */
2587 uint64_t uTscStart;
2588 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2589 uint64_t cMaxTscTicks;
2590} SUPTSCDELTASYNC2;
2591AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2592typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2593
2594/** Prestart wait. */
2595#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2596/** Prestart aborted. */
2597#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2598/** Ready (on your mark). */
2599#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2600/** Steady (get set). */
2601#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2602/** Go! */
2603#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2604/** Used by the verification test. */
2605#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2606
2607/** We reached the time limit. */
2608#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2609/** The other party won't touch the sync struct ever again. */
2610#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2611
2612
2613/**
2614 * Argument package/state passed by supdrvMeasureTscDeltaOne() to the RTMpOn
2615 * callback worker.
2616 * @todo add
2617 */
2618typedef struct SUPDRVGIPTSCDELTARGS
2619{
2620 /** The device extension. */
2621 PSUPDRVDEVEXT pDevExt;
2622 /** Pointer to the GIP CPU array entry for the worker. */
2623 PSUPGIPCPU pWorker;
2624 /** Pointer to the GIP CPU array entry for the master. */
2625 PSUPGIPCPU pMaster;
2626 /** The maximum number of ticks to spend in supdrvMeasureTscDeltaCallback.
2627 * (This is what we need a rough TSC frequency for.) */
2628 uint64_t cMaxTscTicks;
2629 /** Used to abort synchronization setup. */
2630 bool volatile fAbortSetup;
2631
2632 /** Padding to make sure the master variables live in its own cache lines. */
2633 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2634
2635 /** @name Master
2636 * @{ */
2637 /** The time the master spent in the MP worker. */
2638 uint64_t cElapsedMasterTscTicks;
2639 /** The iTry value when stopped at. */
2640 uint32_t iTry;
2641 /** Set if the run timed out. */
2642 bool volatile fTimedOut;
2643 /** Pointer to the master's synchronization struct (on stack). */
2644 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2645 /** Master data union. */
2646 union
2647 {
2648 /** Data (master) for delta verification. */
2649 struct
2650 {
2651 /** Verification test TSC values for the master. */
2652 uint64_t volatile auTscs[32];
2653 } Verify;
2654 /** Data (master) for measurement method \#2. */
2655 struct
2656 {
2657 /** Data and sequence number. */
2658 SUPDRVTSCDELTAMETHOD2 Data;
2659 /** The lag setting for the next run. */
2660 bool fLag;
2661 /** Number of hits. */
2662 uint32_t cHits;
2663 } M2;
2664 } uMaster;
2665 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2666 * VERR_TRY_AGAIN on timeout. */
2667 int32_t rcVerify;
2668#ifdef TSCDELTA_VERIFY_WITH_STATS
2669 /** The maximum difference between TSC read during delta verification. */
2670 int64_t cMaxVerifyTscTicks;
2671 /** The minimum difference between two TSC reads during verification. */
2672 int64_t cMinVerifyTscTicks;
2673 /** The bad TSC diff, worker relative to master (= worker - master).
2674 * Negative value means the worker is behind the master. */
2675 int64_t iVerifyBadTscDiff;
2676#endif
2677 /** @} */
2678
2679 /** Padding to make sure the worker variables live is in its own cache line. */
2680 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2681
2682 /** @name Proletarian
2683 * @{ */
2684 /** Pointer to the worker's synchronization struct (on stack). */
2685 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2686 /** The time the worker spent in the MP worker. */
2687 uint64_t cElapsedWorkerTscTicks;
2688 /** Worker data union. */
2689 union
2690 {
2691 /** Data (worker) for delta verification. */
2692 struct
2693 {
2694 /** Verification test TSC values for the worker. */
2695 uint64_t volatile auTscs[32];
2696 } Verify;
2697 /** Data (worker) for measurement method \#2. */
2698 struct
2699 {
2700 /** Data and sequence number. */
2701 SUPDRVTSCDELTAMETHOD2 Data;
2702 /** The lag setting for the next run (set by master). */
2703 bool fLag;
2704 } M2;
2705 } uWorker;
2706 /** @} */
2707
2708 /** Padding to make sure the above is in its own cache line. */
2709 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2710} SUPDRVGIPTSCDELTARGS;
2711typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2712
2713
2714/** @name Macros that implements the basic synchronization steps common to
2715 * the algorithms.
2716 *
2717 * Must be used from loop as the timeouts are implemented via 'break' statements
2718 * at the moment.
2719 *
2720 * @{
2721 */
2722#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2723# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2724# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2725# define TSCDELTA_DBG_CHECK_LOOP() \
2726 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2727#else
2728# define TSCDELTA_DBG_VARS() ((void)0)
2729# define TSCDELTA_DBG_START_LOOP() ((void)0)
2730# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2731#endif
2732#if 0
2733# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2734#else
2735# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2736#endif
2737#if 0
2738# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
2739#else
2740# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
2741#endif
2742#if 0
2743# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
2744#else
2745# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
2746#endif
2747
2748
2749static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2750 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
2751{
2752 uint32_t iMySeq = fIsMaster ? 0 : 256;
2753 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
2754 uint32_t u32Tmp;
2755 uint32_t iSync2Loops = 0;
2756 RTCCUINTREG fEFlags;
2757 TSCDELTA_DBG_VARS();
2758
2759 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
2760
2761 /*
2762 * The master tells the worker to get on it's mark.
2763 */
2764 if (fIsMaster)
2765 {
2766 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2767 { /* likely*/ }
2768 else
2769 {
2770 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2771 return false;
2772 }
2773 }
2774
2775 /*
2776 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
2777 */
2778 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
2779 for (;;)
2780 {
2781 fEFlags = ASMIntDisableFlags();
2782 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2783 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
2784 break;
2785 ASMSetFlags(fEFlags);
2786 ASMNopPause();
2787
2788 /* Abort? */
2789 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
2790 {
2791 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2792 return false;
2793 }
2794
2795 /* Check for timeouts every so often (not every loop in case RDTSC is
2796 trapping or something). Must check the first time around. */
2797#if 0 /* For debugging the timeout paths. */
2798 static uint32_t volatile xxx;
2799#endif
2800 if ( ( (iSync2Loops & 0x3ff) == 0
2801 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
2802#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
2803 || (!fIsMaster && (++xxx & 0xf) == 0)
2804#endif
2805 )
2806 {
2807 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
2808 ignore the timeout if we've got the go ahead already (simpler). */
2809 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
2810 {
2811 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
2812 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
2813 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
2814 return false;
2815 }
2816 }
2817 iSync2Loops++;
2818 }
2819
2820 /*
2821 * Interrupts are now disabled and will remain disabled until we do
2822 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
2823 */
2824 *pfEFlags = fEFlags;
2825
2826 /*
2827 * The worker tells the master that it is on its mark and that the master
2828 * need to get into position as well.
2829 */
2830 if (!fIsMaster)
2831 {
2832 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2833 { /* likely */ }
2834 else
2835 {
2836 ASMSetFlags(fEFlags);
2837 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2838 return false;
2839 }
2840 }
2841
2842 /*
2843 * The master sends the 'go' to the worker and wait for ACK.
2844 */
2845 if (fIsMaster)
2846 {
2847 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2848 { /* likely */ }
2849 else
2850 {
2851 ASMSetFlags(fEFlags);
2852 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2853 return false;
2854 }
2855 }
2856
2857 /*
2858 * Wait for the 'go' signal (ack in the master case).
2859 */
2860 TSCDELTA_DBG_START_LOOP();
2861 for (;;)
2862 {
2863 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2864 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
2865 break;
2866 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
2867 { /* likely */ }
2868 else
2869 {
2870 ASMSetFlags(fEFlags);
2871 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2872 return false;
2873 }
2874
2875 TSCDELTA_DBG_CHECK_LOOP();
2876 ASMNopPause();
2877 }
2878
2879 /*
2880 * The worker acks the 'go' (shouldn't fail).
2881 */
2882 if (!fIsMaster)
2883 {
2884 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2885 { /* likely */ }
2886 else
2887 {
2888 ASMSetFlags(fEFlags);
2889 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2890 return false;
2891 }
2892 }
2893
2894 /*
2895 * Try enter mostly lockstep execution with it.
2896 */
2897 for (;;)
2898 {
2899 uint32_t iOtherSeq1, iOtherSeq2;
2900 ASMCompilerBarrier();
2901 ASMSerializeInstruction();
2902
2903 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
2904 ASMNopPause();
2905 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
2906 ASMNopPause();
2907 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
2908
2909 ASMCompilerBarrier();
2910 if (iOtherSeq1 == iOtherSeq2)
2911 return true;
2912
2913 /* Did the other guy give up? Should we give up? */
2914 if ( iOtherSeq1 == UINT32_MAX
2915 || iOtherSeq2 == UINT32_MAX)
2916 return true;
2917 if (++iMySeq >= iMaxSeq)
2918 {
2919 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
2920 return true;
2921 }
2922 ASMNopPause();
2923 }
2924}
2925
2926#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2927 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2928 { /*likely*/ } \
2929 else if (true) \
2930 { \
2931 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
2932 break; \
2933 } else do {} while (0)
2934#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2935 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2936 { /*likely*/ } \
2937 else if (true) \
2938 { \
2939 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
2940 break; \
2941 } else do {} while (0)
2942
2943
2944static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2945 bool fIsMaster, RTCCUINTREG fEFlags)
2946{
2947 TSCDELTA_DBG_VARS();
2948 RT_NOREF1(pOtherSync);
2949
2950 /*
2951 * Wait for the 'ready' signal. In the master's case, this means the
2952 * worker has completed its data collection, while in the worker's case it
2953 * means the master is done processing the data and it's time for the next
2954 * loop iteration (or whatever).
2955 */
2956 ASMSetFlags(fEFlags);
2957 TSCDELTA_DBG_START_LOOP();
2958 for (;;)
2959 {
2960 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2961 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
2962 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
2963 return true;
2964 ASMNopPause();
2965 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
2966 { /* likely */}
2967 else
2968 {
2969 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
2970 return false; /* shouldn't ever happen! */
2971 }
2972 TSCDELTA_DBG_CHECK_LOOP();
2973 ASMNopPause();
2974 }
2975}
2976
2977#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2978 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
2979 { /* likely */ } \
2980 else if (true) \
2981 { \
2982 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
2983 break; \
2984 } else do {} while (0)
2985
2986#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
2987 /* \
2988 * Tell the worker that we're done processing the data and ready for the next round. \
2989 */ \
2990 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2991 { /* likely */ } \
2992 else if (true)\
2993 { \
2994 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2995 break; \
2996 } else do {} while (0)
2997
2998#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2999 if (true) { \
3000 /* \
3001 * Tell the master that we're done collecting data and wait for the next round to start. \
3002 */ \
3003 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3004 { /* likely */ } \
3005 else \
3006 { \
3007 ASMSetFlags(a_fEFlags); \
3008 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3009 break; \
3010 } \
3011 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3012 { /* likely */ } \
3013 else \
3014 { \
3015 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3016 break; \
3017 } \
3018 } else do {} while (0)
3019/** @} */
3020
3021
3022#ifdef GIP_TSC_DELTA_METHOD_1
3023/**
3024 * TSC delta measurement algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3025 *
3026 *
3027 * We ignore the first few runs of the loop in order to prime the
3028 * cache. Also, we need to be careful about using 'pause' instruction
3029 * in critical busy-wait loops in this code - it can cause undesired
3030 * behaviour with hyperthreading.
3031 *
3032 * We try to minimize the measurement error by computing the minimum
3033 * read time of the compare statement in the worker by taking TSC
3034 * measurements across it.
3035 *
3036 * It must be noted that the computed minimum read time is mostly to
3037 * eliminate huge deltas when the worker is too early and doesn't by
3038 * itself help produce more accurate deltas. We allow two times the
3039 * computed minimum as an arbitrary acceptable threshold. Therefore,
3040 * it is still possible to get negative deltas where there are none
3041 * when the worker is earlier. As long as these occasional negative
3042 * deltas are lower than the time it takes to exit guest-context and
3043 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3044 * that jumped backwards. It is due to the existence of the negative
3045 * deltas that we don't recompute the delta with the master and
3046 * worker interchanged to eliminate the remaining measurement error.
3047 *
3048 *
3049 * @param pArgs The argument/state data.
3050 * @param pMySync My synchronization structure.
3051 * @param pOtherSync My partner's synchronization structure.
3052 * @param fIsMaster Set if master, clear if worker.
3053 * @param iTry The attempt number.
3054 */
3055static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3056 bool fIsMaster, uint32_t iTry)
3057{
3058 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3059 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3060 uint64_t uMinCmpReadTime = UINT64_MAX;
3061 unsigned iLoop;
3062 NOREF(iTry);
3063
3064 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3065 {
3066 RTCCUINTREG fEFlags;
3067 if (fIsMaster)
3068 {
3069 /*
3070 * The master.
3071 */
3072 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3073 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3074 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3075 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3076
3077 do
3078 {
3079 ASMSerializeInstruction();
3080 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3081 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3082
3083 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3084
3085 /* Process the data. */
3086 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3087 {
3088 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3089 {
3090 int64_t iDelta = pGipCpuWorker->u64TSCSample
3091 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3092 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3093 ? iDelta < pGipCpuWorker->i64TSCDelta
3094 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3095 pGipCpuWorker->i64TSCDelta = iDelta;
3096 }
3097 }
3098
3099 /* Reset our TSC sample and tell the worker to move on. */
3100 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3101 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3102 }
3103 else
3104 {
3105 /*
3106 * The worker.
3107 */
3108 uint64_t uTscWorker;
3109 uint64_t uTscWorkerFlushed;
3110 uint64_t uCmpReadTime;
3111
3112 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3113 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3114
3115 /*
3116 * Keep reading the TSC until we notice that the master has read his. Reading
3117 * the TSC -after- the master has updated the memory is way too late. We thus
3118 * compensate by trying to measure how long it took for the worker to notice
3119 * the memory flushed from the master.
3120 */
3121 do
3122 {
3123 ASMSerializeInstruction();
3124 uTscWorker = ASMReadTSC();
3125 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3126 ASMSerializeInstruction();
3127 uTscWorkerFlushed = ASMReadTSC();
3128
3129 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3130 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3131 {
3132 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3133 if (uCmpReadTime < (uMinCmpReadTime << 1))
3134 {
3135 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3136 if (uCmpReadTime < uMinCmpReadTime)
3137 uMinCmpReadTime = uCmpReadTime;
3138 }
3139 else
3140 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3141 }
3142 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3143 {
3144 if (uCmpReadTime < uMinCmpReadTime)
3145 uMinCmpReadTime = uCmpReadTime;
3146 }
3147
3148 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3149 }
3150 }
3151
3152 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3153 pMySync->uSyncVar));
3154
3155 /*
3156 * We must reset the worker TSC sample value in case it gets picked as a
3157 * GIP master later on (it's trashed above, naturally).
3158 */
3159 if (!fIsMaster)
3160 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3161}
3162#endif /* GIP_TSC_DELTA_METHOD_1 */
3163
3164
3165#ifdef GIP_TSC_DELTA_METHOD_2
3166/*
3167 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3168 */
3169
3170# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3171# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3172
3173
3174static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs)
3175{
3176 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3177 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3178 uint32_t idxResult;
3179 uint32_t cHits = 0;
3180
3181 /*
3182 * Look for matching entries in the master and worker tables.
3183 */
3184 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3185 {
3186 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3187 if (idxOther & 1)
3188 {
3189 idxOther >>= 1;
3190 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3191 {
3192 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3193 {
3194 int64_t iDelta;
3195 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3196 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3197 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3198 ? iDelta < iBestDelta
3199 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3200 iBestDelta = iDelta;
3201 cHits++;
3202 }
3203 }
3204 }
3205 }
3206
3207 /*
3208 * Save the results.
3209 */
3210 if (cHits > 2)
3211 pArgs->pWorker->i64TSCDelta = iBestDelta;
3212 pArgs->uMaster.M2.cHits += cHits;
3213}
3214
3215
3216/**
3217 * The core function of the 2nd TSC delta measurement algorithm.
3218 *
3219 * The idea here is that we have the two CPUs execute the exact same code
3220 * collecting a largish set of TSC samples. The code has one data dependency on
3221 * the other CPU which intention it is to synchronize the execution as well as
3222 * help cross references the two sets of TSC samples (the sequence numbers).
3223 *
3224 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3225 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3226 * it will help with making the CPUs enter lock step execution occasionally.
3227 *
3228 */
3229static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3230{
3231 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3232 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3233
3234 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3235 ASMSerializeInstruction();
3236 while (cLeft-- > 0)
3237 {
3238 uint64_t uTsc;
3239 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3240 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3241 ASMCompilerBarrier();
3242 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3243 uTsc = ASMReadTSC();
3244 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3245 ASMCompilerBarrier();
3246 ASMSerializeInstruction();
3247 pEntry->iSeqMine = iSeqMine;
3248 pEntry->iSeqOther = iSeqOther;
3249 pEntry->uTsc = uTsc;
3250 pEntry++;
3251 ASMSerializeInstruction();
3252 if (fLag)
3253 ASMNopPause();
3254 }
3255}
3256
3257
3258/**
3259 * TSC delta measurement algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3260 *
3261 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3262 *
3263 * @param pArgs The argument/state data.
3264 * @param pMySync My synchronization structure.
3265 * @param pOtherSync My partner's synchronization structure.
3266 * @param fIsMaster Set if master, clear if worker.
3267 * @param iTry The attempt number.
3268 */
3269static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3270 bool fIsMaster, uint32_t iTry)
3271{
3272 unsigned iLoop;
3273 RT_NOREF1(iTry);
3274
3275 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3276 {
3277 RTCCUINTREG fEFlags;
3278 if (fIsMaster)
3279 {
3280 /*
3281 * Adjust the loop lag fudge.
3282 */
3283# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3284 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3285 {
3286 /* Lag during the priming to be nice to everyone.. */
3287 pArgs->uMaster.M2.fLag = true;
3288 pArgs->uWorker.M2.fLag = true;
3289 }
3290 else
3291# endif
3292 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3293 {
3294 /* 25 % of the body without lagging. */
3295 pArgs->uMaster.M2.fLag = false;
3296 pArgs->uWorker.M2.fLag = false;
3297 }
3298 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3299 {
3300 /* 25 % of the body with both lagging. */
3301 pArgs->uMaster.M2.fLag = true;
3302 pArgs->uWorker.M2.fLag = true;
3303 }
3304 else
3305 {
3306 /* 50% of the body with alternating lag. */
3307 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3308 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3309 }
3310
3311 /*
3312 * Sync up with the worker and collect data.
3313 */
3314 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3315 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3316 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3317
3318 /*
3319 * Process the data.
3320 */
3321# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3322 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3323# endif
3324 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs);
3325
3326 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3327 }
3328 else
3329 {
3330 /*
3331 * The worker.
3332 */
3333 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3334 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3335 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3336 }
3337 }
3338}
3339
3340#endif /* GIP_TSC_DELTA_METHOD_2 */
3341
3342
3343
3344static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3345 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3346{
3347 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3348 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3349 uint32_t i;
3350 TSCDELTA_DBG_VARS();
3351
3352 for (;;)
3353 {
3354 RTCCUINTREG fEFlags;
3355 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3356 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3357
3358 if (fIsMaster)
3359 {
3360 uint64_t uTscWorker;
3361 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3362
3363 /*
3364 * Collect TSC, master goes first.
3365 */
3366 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3367 {
3368 /* Read, kick & wait #1. */
3369 uint64_t register uTsc = ASMReadTSC();
3370 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3371 ASMSerializeInstruction();
3372 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3373 TSCDELTA_DBG_START_LOOP();
3374 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3375 {
3376 TSCDELTA_DBG_CHECK_LOOP();
3377 ASMNopPause();
3378 }
3379
3380 /* Read, kick & wait #2. */
3381 uTsc = ASMReadTSC();
3382 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3383 ASMSerializeInstruction();
3384 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3385 TSCDELTA_DBG_START_LOOP();
3386 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3387 {
3388 TSCDELTA_DBG_CHECK_LOOP();
3389 ASMNopPause();
3390 }
3391 }
3392
3393 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3394
3395 /*
3396 * Process the data.
3397 */
3398#ifdef TSCDELTA_VERIFY_WITH_STATS
3399 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3400 pArgs->cMinVerifyTscTicks = INT64_MAX;
3401 pArgs->iVerifyBadTscDiff = 0;
3402#endif
3403 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3404 uTscWorker = 0;
3405 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3406 {
3407 /* Master vs previous worker entry. */
3408 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3409 int64_t iDiff;
3410 if (i > 0)
3411 {
3412 iDiff = uTscMaster - uTscWorker;
3413#ifdef TSCDELTA_VERIFY_WITH_STATS
3414 if (iDiff > pArgs->cMaxVerifyTscTicks)
3415 pArgs->cMaxVerifyTscTicks = iDiff;
3416 if (iDiff < pArgs->cMinVerifyTscTicks)
3417 pArgs->cMinVerifyTscTicks = iDiff;
3418#endif
3419 if (iDiff < 0)
3420 {
3421#ifdef TSCDELTA_VERIFY_WITH_STATS
3422 pArgs->iVerifyBadTscDiff = -iDiff;
3423#endif
3424 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3425 break;
3426 }
3427 }
3428
3429 /* Worker vs master. */
3430 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3431 iDiff = uTscWorker - uTscMaster;
3432#ifdef TSCDELTA_VERIFY_WITH_STATS
3433 if (iDiff > pArgs->cMaxVerifyTscTicks)
3434 pArgs->cMaxVerifyTscTicks = iDiff;
3435 if (iDiff < pArgs->cMinVerifyTscTicks)
3436 pArgs->cMinVerifyTscTicks = iDiff;
3437#endif
3438 if (iDiff < 0)
3439 {
3440#ifdef TSCDELTA_VERIFY_WITH_STATS
3441 pArgs->iVerifyBadTscDiff = iDiff;
3442#endif
3443 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3444 break;
3445 }
3446 }
3447
3448 /* Done. */
3449 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3450 }
3451 else
3452 {
3453 /*
3454 * The worker, master leads.
3455 */
3456 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3457
3458 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3459 {
3460 uint64_t register uTsc;
3461
3462 /* Wait, Read and Kick #1. */
3463 TSCDELTA_DBG_START_LOOP();
3464 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3465 {
3466 TSCDELTA_DBG_CHECK_LOOP();
3467 ASMNopPause();
3468 }
3469 uTsc = ASMReadTSC();
3470 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3471 ASMSerializeInstruction();
3472 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3473
3474 /* Wait, Read and Kick #2. */
3475 TSCDELTA_DBG_START_LOOP();
3476 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3477 {
3478 TSCDELTA_DBG_CHECK_LOOP();
3479 ASMNopPause();
3480 }
3481 uTsc = ASMReadTSC();
3482 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3483 ASMSerializeInstruction();
3484 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3485 }
3486
3487 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3488 }
3489 return pArgs->rcVerify;
3490 }
3491
3492 /*
3493 * Timed out, please retry.
3494 */
3495 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3496 return VERR_TIMEOUT;
3497}
3498
3499
3500
3501/**
3502 * Handles the special abort procedure during synchronization setup in
3503 * supdrvMeasureTscDeltaCallbackUnwrapped().
3504 *
3505 * @returns 0 (dummy, ignored)
3506 * @param pArgs Pointer to argument/state data.
3507 * @param pMySync Pointer to my sync structure.
3508 * @param fIsMaster Set if we're the master, clear if worker.
3509 * @param fTimeout Set if it's a timeout.
3510 */
3511DECL_NO_INLINE(static, int)
3512supdrvMeasureTscDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3513{
3514 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3515 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3516 TSCDELTA_DBG_VARS();
3517 RT_NOREF1(pMySync);
3518
3519 /*
3520 * Clear our sync pointer and make sure the abort flag is set.
3521 */
3522 ASMAtomicWriteNullPtr(ppMySync);
3523 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3524 if (fTimeout)
3525 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3526
3527 /*
3528 * Make sure the other party is out of there and won't be touching our
3529 * sync state again (would cause stack corruption).
3530 */
3531 TSCDELTA_DBG_START_LOOP();
3532 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3533 {
3534 ASMNopPause();
3535 ASMNopPause();
3536 ASMNopPause();
3537 TSCDELTA_DBG_CHECK_LOOP();
3538 }
3539
3540 return 0;
3541}
3542
3543
3544/**
3545 * This is used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3546 * and compute the delta between them.
3547 *
3548 * To reduce code size a good when timeout handling was added, a dummy return
3549 * value had to be added (saves 1-3 lines per timeout case), thus this
3550 * 'Unwrapped' function and the dummy 0 return value.
3551 *
3552 * @returns 0 (dummy, ignored)
3553 * @param idCpu The CPU we are current scheduled on.
3554 * @param pArgs Pointer to a parameter package.
3555 *
3556 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3557 * read the TSC at exactly the same time on both the master and the
3558 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3559 * contention, SMI, pipelining etc. there is no guaranteed way of
3560 * doing this on x86 CPUs.
3561 */
3562static int supdrvMeasureTscDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3563{
3564 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3565 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3566 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3567 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3568 uint32_t iTry;
3569 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3570 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3571 SUPTSCDELTASYNC2 MySync;
3572 PSUPTSCDELTASYNC2 pOtherSync;
3573 int rc;
3574 TSCDELTA_DBG_VARS();
3575
3576 /* A bit of paranoia first. */
3577 if (!pGipCpuMaster || !pGipCpuWorker)
3578 return 0;
3579
3580 /*
3581 * If the CPU isn't part of the measurement, return immediately.
3582 */
3583 if ( !fIsMaster
3584 && idCpu != pGipCpuWorker->idCpu)
3585 return 0;
3586
3587 /*
3588 * Set up my synchronization stuff and wait for the other party to show up.
3589 *
3590 * We don't wait forever since the other party may be off fishing (offline,
3591 * spinning with ints disables, whatever), we must play nice to the rest of
3592 * the system as this context generally isn't one in which we will get
3593 * preempted and we may hold up a number of lower priority interrupts.
3594 */
3595 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3596 ASMAtomicWritePtr(ppMySync, &MySync);
3597 MySync.uTscStart = ASMReadTSC();
3598 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3599
3600 /* Look for the partner, might not be here yet... Special abort considerations. */
3601 iTry = 0;
3602 TSCDELTA_DBG_START_LOOP();
3603 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3604 {
3605 ASMNopPause();
3606 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3607 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu) )
3608 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3609 if ( (iTry++ & 0xff) == 0
3610 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3611 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3612 TSCDELTA_DBG_CHECK_LOOP();
3613 ASMNopPause();
3614 }
3615
3616 /* I found my partner, waiting to be found... Special abort considerations. */
3617 if (fIsMaster)
3618 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3619 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3620
3621 iTry = 0;
3622 TSCDELTA_DBG_START_LOOP();
3623 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3624 {
3625 ASMNopPause();
3626 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3627 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3628 if ( (iTry++ & 0xff) == 0
3629 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3630 {
3631 if ( fIsMaster
3632 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3633 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3634 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3635 }
3636 TSCDELTA_DBG_CHECK_LOOP();
3637 }
3638
3639 if (!fIsMaster)
3640 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3641 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3642
3643/** @todo Add a resumable state to pArgs so we don't waste time if we time
3644 * out or something. Timeouts are legit, any of the two CPUs may get
3645 * interrupted. */
3646
3647 /*
3648 * Start by seeing if we have a zero delta between the two CPUs.
3649 * This should normally be the case.
3650 */
3651 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3652 if (RT_SUCCESS(rc))
3653 {
3654 if (fIsMaster)
3655 {
3656 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3657 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3658 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3659 }
3660 }
3661 /*
3662 * If the verification didn't time out, do regular delta measurements.
3663 * We retry this until we get a reasonable value.
3664 */
3665 else if (rc != VERR_TIMEOUT)
3666 {
3667 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3668 for (iTry = 0; iTry < 12; iTry++)
3669 {
3670 /*
3671 * Check the state before we start.
3672 */
3673 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3674 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3675 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3676 {
3677 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3678 break;
3679 }
3680
3681 /*
3682 * Do the measurements.
3683 */
3684#ifdef GIP_TSC_DELTA_METHOD_1
3685 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3686#elif defined(GIP_TSC_DELTA_METHOD_2)
3687 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3688#else
3689# error "huh??"
3690#endif
3691
3692 /*
3693 * Check the state.
3694 */
3695 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3696 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3697 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3698 {
3699 if (fIsMaster)
3700 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3701 else
3702 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3703 break;
3704 }
3705
3706 /*
3707 * Success? If so, stop trying. Master decides.
3708 */
3709 if (fIsMaster)
3710 {
3711 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3712 {
3713 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3714 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3715 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3716 break;
3717 }
3718 }
3719 }
3720 if (fIsMaster)
3721 pArgs->iTry = iTry;
3722 }
3723
3724 /*
3725 * End the synchronization dance. We tell the other that we're done,
3726 * then wait for the same kind of reply.
3727 */
3728 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3729 ASMAtomicWriteNullPtr(ppMySync);
3730 iTry = 0;
3731 TSCDELTA_DBG_START_LOOP();
3732 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
3733 {
3734 iTry++;
3735 if ( iTry == 0
3736 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu))
3737 break; /* this really shouldn't happen. */
3738 TSCDELTA_DBG_CHECK_LOOP();
3739 ASMNopPause();
3740 }
3741
3742 /*
3743 * Collect some runtime stats.
3744 */
3745 if (fIsMaster)
3746 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
3747 else
3748 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
3749 return 0;
3750}
3751
3752/**
3753 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3754 * and compute the delta between them.
3755 *
3756 * @param idCpu The CPU we are current scheduled on.
3757 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
3758 * @param pvUser2 Unused.
3759 */
3760static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3761{
3762 supdrvMeasureTscDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
3763 RT_NOREF1(pvUser2);
3764}
3765
3766
3767/**
3768 * Measures the TSC delta between the master GIP CPU and one specified worker
3769 * CPU.
3770 *
3771 * @returns VBox status code.
3772 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
3773 * failure.
3774 * @param pDevExt Pointer to the device instance data.
3775 * @param idxWorker The index of the worker CPU from the GIP's array of
3776 * CPUs.
3777 *
3778 * @remarks This must be called with preemption enabled!
3779 */
3780static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
3781{
3782 int rc;
3783 int rc2;
3784 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3785 RTCPUID idMaster = pDevExt->idGipMaster;
3786 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
3787 PSUPGIPCPU pGipCpuMaster;
3788 uint32_t iGipCpuMaster;
3789 uint32_t u32Tmp;
3790
3791 /* Validate input a bit. */
3792 AssertReturn(pGip, VERR_INVALID_PARAMETER);
3793 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3794 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3795
3796 /*
3797 * Don't attempt measuring the delta for the GIP master.
3798 */
3799 if (pGipCpuWorker->idCpu == idMaster)
3800 {
3801 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
3802 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3803 return VINF_SUCCESS;
3804 }
3805
3806 /*
3807 * One measurement at a time, at least for now. We might be using
3808 * broadcast IPIs so, so be nice to the rest of the system.
3809 */
3810#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3811 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
3812#else
3813 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
3814#endif
3815 if (RT_FAILURE(rc))
3816 return rc;
3817
3818 /*
3819 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
3820 * try pick a different master. (This fudge only works with multi core systems.)
3821 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
3822 *
3823 * We skip this on AMDs for now as their HTT is different from Intel's and
3824 * it doesn't seem to have any favorable effect on the results.
3825 *
3826 * If the master is offline, we need a new master too, so share the code.
3827 */
3828 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
3829 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
3830 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
3831 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
3832 && pGip->cOnlineCpus > 2
3833 && ASMHasCpuId()
3834 && ASMIsValidStdRange(ASMCpuId_EAX(0))
3835 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
3836 && ( !ASMIsAmdCpu()
3837 || ASMGetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
3838 || ( ASMGetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
3839 && ASMGetCpuModelAMD(u32Tmp) >= 0x02) ) )
3840 || !RTMpIsCpuOnline(idMaster) )
3841 {
3842 uint32_t i;
3843 for (i = 0; i < pGip->cCpus; i++)
3844 if ( i != iGipCpuMaster
3845 && i != idxWorker
3846 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
3847 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
3848 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
3849 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
3850 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
3851 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
3852 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
3853 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
3854 {
3855 iGipCpuMaster = i;
3856 pGipCpuMaster = &pGip->aCPUs[i];
3857 idMaster = pGipCpuMaster->idCpu;
3858 break;
3859 }
3860 }
3861
3862 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
3863 {
3864 /*
3865 * Initialize data package for the RTMpOnPair callback.
3866 */
3867 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
3868 if (pArgs)
3869 {
3870 pArgs->pWorker = pGipCpuWorker;
3871 pArgs->pMaster = pGipCpuMaster;
3872 pArgs->pDevExt = pDevExt;
3873 pArgs->pSyncMaster = NULL;
3874 pArgs->pSyncWorker = NULL;
3875 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
3876
3877 /*
3878 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
3879 * and supdrvMeasureTscDeltaCallback can use it as a success check.
3880 */
3881 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
3882 * that when doing the restart loop reorg. */
3883 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
3884 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
3885 supdrvMeasureTscDeltaCallback, pArgs, NULL);
3886 if (RT_SUCCESS(rc))
3887 {
3888#if 0
3889 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
3890 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
3891 pArgs->fTimedOut ? " timed out" :"");
3892#endif
3893#if 0
3894 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
3895 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
3896#endif
3897 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
3898 {
3899 /*
3900 * Work the TSC delta applicability rating. It starts
3901 * optimistic in supdrvGipInit, we downgrade it here.
3902 */
3903 SUPGIPUSETSCDELTA enmRating;
3904 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
3905 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
3906 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
3907 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
3908 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
3909 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
3910 else
3911 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
3912 if (pGip->enmUseTscDelta < enmRating)
3913 {
3914 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
3915 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
3916 }
3917 }
3918 else
3919 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3920 }
3921 /** @todo return try-again if we get an offline CPU error. */
3922
3923 RTMemFree(pArgs);
3924 }
3925 else
3926 rc = VERR_NO_MEMORY;
3927 }
3928 else
3929 rc = VERR_CPU_OFFLINE;
3930
3931 /*
3932 * We're done now.
3933 */
3934#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3935 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3936#else
3937 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3938#endif
3939 return rc;
3940}
3941
3942
3943/**
3944 * Resets the TSC-delta related TSC samples and optionally the deltas
3945 * themselves.
3946 *
3947 * @param pDevExt Pointer to the device instance data.
3948 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
3949 *
3950 * @remarks This might be called while holding a spinlock!
3951 */
3952static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
3953{
3954 unsigned iCpu;
3955 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3956 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3957 {
3958 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
3959 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
3960 if (fResetTscDeltas)
3961 {
3962 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
3963 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
3964 }
3965 }
3966}
3967
3968
3969/**
3970 * Picks an online CPU as the master TSC for TSC-delta computations.
3971 *
3972 * @returns VBox status code.
3973 * @param pDevExt Pointer to the device instance data.
3974 * @param pidxMaster Where to store the CPU array index of the chosen
3975 * master. Optional, can be NULL.
3976 */
3977static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
3978{
3979 /*
3980 * Pick the first CPU online as the master TSC and make it the new GIP master based
3981 * on the APIC ID.
3982 *
3983 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
3984 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
3985 * master as this point since the sync/async timer isn't created yet.
3986 */
3987 unsigned iCpu;
3988 uint32_t idxMaster = UINT32_MAX;
3989 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3990 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
3991 {
3992 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
3993 if (idxCpu != UINT16_MAX)
3994 {
3995 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
3996 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
3997 {
3998 idxMaster = idxCpu;
3999 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
4000 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
4001 if (pidxMaster)
4002 *pidxMaster = idxMaster;
4003 return VINF_SUCCESS;
4004 }
4005 }
4006 }
4007 return VERR_CPU_OFFLINE;
4008}
4009
4010
4011/**
4012 * Performs the initial measurements of the TSC deltas between CPUs.
4013 *
4014 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4015 * triggered by it if threaded.
4016 *
4017 * @returns VBox status code.
4018 * @param pDevExt Pointer to the device instance data.
4019 *
4020 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4021 * idCpu, GIP's online CPU set which are populated in
4022 * supdrvGipInitOnCpu().
4023 */
4024static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
4025{
4026 PSUPGIPCPU pGipCpuMaster;
4027 unsigned iCpu;
4028 unsigned iOddEven;
4029 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4030 uint32_t idxMaster = UINT32_MAX;
4031 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4032
4033 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4034 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4035 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4036 if (RT_FAILURE(rc))
4037 {
4038 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4039 return rc;
4040 }
4041 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4042 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4043 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4044
4045 /*
4046 * If there is only a single CPU online we have nothing to do.
4047 */
4048 if (pGip->cOnlineCpus <= 1)
4049 {
4050 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4051 return VINF_SUCCESS;
4052 }
4053
4054 /*
4055 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4056 * master). We do the CPUs with the even numbered APIC IDs first so that
4057 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4058 */
4059 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4060 {
4061 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4062 {
4063 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4064 if ( iCpu != idxMaster
4065 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4066 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4067 {
4068 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4069 if (RT_FAILURE(rc))
4070 {
4071 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4072 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4073 break;
4074 }
4075
4076 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4077 {
4078 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4079 rc = VERR_TRY_AGAIN;
4080 break;
4081 }
4082 }
4083 }
4084 }
4085
4086 return rc;
4087}
4088
4089
4090#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4091
4092/**
4093 * Switches the TSC-delta measurement thread into the butchered state.
4094 *
4095 * @returns VBox status code.
4096 * @param pDevExt Pointer to the device instance data.
4097 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4098 * @param pszFailed An error message to log.
4099 * @param rcFailed The error code to exit the thread with.
4100 */
4101static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4102{
4103 if (!fSpinlockHeld)
4104 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4105
4106 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4107 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4108 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", pszFailed, rcFailed));
4109 return rcFailed;
4110}
4111
4112
4113/**
4114 * The TSC-delta measurement thread.
4115 *
4116 * @returns VBox status code.
4117 * @param hThread The thread handle.
4118 * @param pvUser Opaque pointer to the device instance data.
4119 */
4120static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4121{
4122 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4123 uint32_t cConsecutiveTimeouts = 0;
4124 int rc = VERR_INTERNAL_ERROR_2;
4125 for (;;)
4126 {
4127 /*
4128 * Switch on the current state.
4129 */
4130 SUPDRVTSCDELTATHREADSTATE enmState;
4131 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4132 enmState = pDevExt->enmTscDeltaThreadState;
4133 switch (enmState)
4134 {
4135 case kTscDeltaThreadState_Creating:
4136 {
4137 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4138 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4139 if (RT_FAILURE(rc))
4140 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4141 /* fall thru */
4142 }
4143
4144 case kTscDeltaThreadState_Listening:
4145 {
4146 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4147
4148 /*
4149 * Linux counts uninterruptible sleeps as load, hence we shall do a
4150 * regular, interruptible sleep here and ignore wake ups due to signals.
4151 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4152 */
4153 rc = RTThreadUserWaitNoResume(hThread, pDevExt->cMsTscDeltaTimeout);
4154 if ( RT_FAILURE(rc)
4155 && rc != VERR_TIMEOUT
4156 && rc != VERR_INTERRUPTED)
4157 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4158 RTThreadUserReset(hThread);
4159 break;
4160 }
4161
4162 case kTscDeltaThreadState_WaitAndMeasure:
4163 {
4164 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4165 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4166 if (RT_FAILURE(rc))
4167 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4168 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4169 RTThreadSleep(1);
4170 /* fall thru */
4171 }
4172
4173 case kTscDeltaThreadState_Measuring:
4174 {
4175 cConsecutiveTimeouts = 0;
4176 if (pDevExt->fTscThreadRecomputeAllDeltas)
4177 {
4178 int cTries = 8;
4179 int cMsWaitPerTry = 10;
4180 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4181 Assert(pGip);
4182 do
4183 {
4184 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4185 rc = supdrvMeasureInitialTscDeltas(pDevExt);
4186 if ( RT_SUCCESS(rc)
4187 || ( RT_FAILURE(rc)
4188 && rc != VERR_TRY_AGAIN
4189 && rc != VERR_CPU_OFFLINE))
4190 {
4191 break;
4192 }
4193 RTThreadSleep(cMsWaitPerTry);
4194 } while (cTries-- > 0);
4195 pDevExt->fTscThreadRecomputeAllDeltas = false;
4196 }
4197 else
4198 {
4199 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4200 unsigned iCpu;
4201
4202 /* Measure TSC-deltas only for the CPUs that are in the set. */
4203 rc = VINF_SUCCESS;
4204 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4205 {
4206 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4207 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4208 {
4209 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4210 {
4211 int rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4212 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4213 rc = rc2;
4214 }
4215 else
4216 {
4217 /*
4218 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4219 * mark the delta as fine to get the timer thread off our back.
4220 */
4221 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4222 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4223 }
4224 }
4225 }
4226 }
4227 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4228 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4229 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4230 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4231 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4232 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4233 break;
4234 }
4235
4236 case kTscDeltaThreadState_Terminating:
4237 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4238 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4239 return VINF_SUCCESS;
4240
4241 case kTscDeltaThreadState_Butchered:
4242 default:
4243 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4244 }
4245 }
4246 /* not reached */
4247}
4248
4249
4250/**
4251 * Waits for the TSC-delta measurement thread to respond to a state change.
4252 *
4253 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4254 * other error code on internal error.
4255 *
4256 * @param pDevExt The device instance data.
4257 * @param enmCurState The current state.
4258 * @param enmNewState The new state we're waiting for it to enter.
4259 */
4260static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4261 SUPDRVTSCDELTATHREADSTATE enmNewState)
4262{
4263 SUPDRVTSCDELTATHREADSTATE enmActualState;
4264 int rc;
4265
4266 /*
4267 * Wait a short while for the expected state transition.
4268 */
4269 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4270 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4271 enmActualState = pDevExt->enmTscDeltaThreadState;
4272 if (enmActualState == enmNewState)
4273 {
4274 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4275 rc = VINF_SUCCESS;
4276 }
4277 else if (enmActualState == enmCurState)
4278 {
4279 /*
4280 * Wait longer if the state has not yet transitioned to the one we want.
4281 */
4282 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4283 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4284 if ( RT_SUCCESS(rc)
4285 || rc == VERR_TIMEOUT)
4286 {
4287 /*
4288 * Check the state whether we've succeeded.
4289 */
4290 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4291 enmActualState = pDevExt->enmTscDeltaThreadState;
4292 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4293 if (enmActualState == enmNewState)
4294 rc = VINF_SUCCESS;
4295 else if (enmActualState == enmCurState)
4296 {
4297 rc = VERR_TIMEOUT;
4298 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmActualState=%d enmNewState=%d\n",
4299 enmActualState, enmNewState));
4300 }
4301 else
4302 {
4303 rc = VERR_INTERNAL_ERROR;
4304 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4305 enmActualState, enmNewState));
4306 }
4307 }
4308 else
4309 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4310 }
4311 else
4312 {
4313 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4314 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state %d when transitioning from %d to %d\n",
4315 enmActualState, enmCurState, enmNewState));
4316 rc = VERR_INTERNAL_ERROR;
4317 }
4318
4319 return rc;
4320}
4321
4322
4323/**
4324 * Signals the TSC-delta thread to start measuring TSC-deltas.
4325 *
4326 * @param pDevExt Pointer to the device instance data.
4327 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4328 */
4329static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4330{
4331 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4332 {
4333 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4334 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4335 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4336 {
4337 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4338 if (fForceAll)
4339 pDevExt->fTscThreadRecomputeAllDeltas = true;
4340 }
4341 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4342 && fForceAll)
4343 pDevExt->fTscThreadRecomputeAllDeltas = true;
4344 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4345 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4346 }
4347}
4348
4349
4350/**
4351 * Terminates the actual thread running supdrvTscDeltaThread().
4352 *
4353 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4354 * supdrvTscDeltaTerm().
4355 *
4356 * @param pDevExt Pointer to the device instance data.
4357 */
4358static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4359{
4360 int rc;
4361 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4362 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4363 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4364 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4365 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4366 if (RT_FAILURE(rc))
4367 {
4368 /* Signal a few more times before giving up. */
4369 int cTriesLeft = 5;
4370 while (--cTriesLeft > 0)
4371 {
4372 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4373 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4374 if (rc != VERR_TIMEOUT)
4375 break;
4376 }
4377 }
4378}
4379
4380
4381/**
4382 * Initializes and spawns the TSC-delta measurement thread.
4383 *
4384 * A thread is required for servicing re-measurement requests from events like
4385 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4386 * under all contexts on all OSs.
4387 *
4388 * @returns VBox status code.
4389 * @param pDevExt Pointer to the device instance data.
4390 *
4391 * @remarks Must only be called -after- initializing GIP and setting up MP
4392 * notifications!
4393 */
4394static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4395{
4396 int rc;
4397 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4398 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4399 if (RT_SUCCESS(rc))
4400 {
4401 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4402 if (RT_SUCCESS(rc))
4403 {
4404 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4405 pDevExt->cMsTscDeltaTimeout = 60000;
4406 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4407 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4408 if (RT_SUCCESS(rc))
4409 {
4410 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4411 if (RT_SUCCESS(rc))
4412 {
4413 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4414 return rc;
4415 }
4416
4417 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4418 supdrvTscDeltaThreadTerminate(pDevExt);
4419 }
4420 else
4421 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4422 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4423 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4424 }
4425 else
4426 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4427 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4428 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4429 }
4430 else
4431 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4432
4433 return rc;
4434}
4435
4436
4437/**
4438 * Terminates the TSC-delta measurement thread and cleanup.
4439 *
4440 * @param pDevExt Pointer to the device instance data.
4441 */
4442static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4443{
4444 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4445 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4446 {
4447 supdrvTscDeltaThreadTerminate(pDevExt);
4448 }
4449
4450 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4451 {
4452 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4453 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4454 }
4455
4456 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4457 {
4458 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4459 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4460 }
4461
4462 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4463}
4464
4465#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4466
4467/**
4468 * Measure the TSC delta for the CPU given by its CPU set index.
4469 *
4470 * @returns VBox status code.
4471 * @retval VERR_INTERRUPTED if interrupted while waiting.
4472 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4473 * measurement.
4474 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4475 *
4476 * @param pSession The caller's session. GIP must've been mapped.
4477 * @param iCpuSet The CPU set index of the CPU to measure.
4478 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4479 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4480 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4481 * ready.
4482 * @param cTries Number of times to try, pass 0 for the default.
4483 */
4484SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4485 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4486{
4487 PSUPDRVDEVEXT pDevExt;
4488 PSUPGLOBALINFOPAGE pGip;
4489 uint16_t iGipCpu;
4490 int rc;
4491#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4492 uint64_t msTsStartWait;
4493 uint32_t iWaitLoop;
4494#endif
4495
4496 /*
4497 * Validate and adjust the input.
4498 */
4499 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4500 if (!pSession->fGipReferenced)
4501 return VERR_WRONG_ORDER;
4502
4503 pDevExt = pSession->pDevExt;
4504 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4505
4506 pGip = pDevExt->pGip;
4507 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4508
4509 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4510 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4511 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4512 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4513
4514 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4515 return VERR_INVALID_FLAGS;
4516
4517 /*
4518 * The request is a noop if the TSC delta isn't being used.
4519 */
4520 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4521 return VINF_SUCCESS;
4522
4523 if (cTries == 0)
4524 cTries = 12;
4525 else if (cTries > 256)
4526 cTries = 256;
4527
4528 if (cMsWaitRetry == 0)
4529 cMsWaitRetry = 2;
4530 else if (cMsWaitRetry > 1000)
4531 cMsWaitRetry = 1000;
4532
4533#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4534 /*
4535 * Has the TSC already been measured and we're not forced to redo it?
4536 */
4537 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4538 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4539 return VINF_SUCCESS;
4540
4541 /*
4542 * Asynchronous request? Forward it to the thread, no waiting.
4543 */
4544 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4545 {
4546 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4547 * to pass those options to the thread somehow and implement it in the
4548 * thread. Check if anyone uses/needs fAsync before implementing this. */
4549 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4550 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4551 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4552 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4553 {
4554 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4555 rc = VINF_SUCCESS;
4556 }
4557 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4558 rc = VERR_THREAD_IS_DEAD;
4559 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4560 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4561 return VINF_SUCCESS;
4562 }
4563
4564 /*
4565 * If a TSC-delta measurement request is already being serviced by the thread,
4566 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4567 */
4568 msTsStartWait = RTTimeSystemMilliTS();
4569 for (iWaitLoop = 0;; iWaitLoop++)
4570 {
4571 uint64_t cMsElapsed;
4572 SUPDRVTSCDELTATHREADSTATE enmState;
4573 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4574 enmState = pDevExt->enmTscDeltaThreadState;
4575 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4576
4577 if (enmState == kTscDeltaThreadState_Measuring)
4578 { /* Must wait, the thread is busy. */ }
4579 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4580 { /* Must wait, this state only says what will happen next. */ }
4581 else if (enmState == kTscDeltaThreadState_Terminating)
4582 { /* Must wait, this state only says what should happen next. */ }
4583 else
4584 break; /* All other states, the thread is either idly listening or dead. */
4585
4586 /* Wait or fail. */
4587 if (cMsWaitThread == 0)
4588 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4589 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4590 if (cMsElapsed >= cMsWaitThread)
4591 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4592
4593 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4594 if (rc == VERR_INTERRUPTED)
4595 return rc;
4596 }
4597#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4598
4599 /*
4600 * Try measure the TSC delta the given number of times.
4601 */
4602 for (;;)
4603 {
4604 /* Unless we're forced to measure the delta, check whether it's done already. */
4605 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4606 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4607 {
4608 rc = VINF_SUCCESS;
4609 break;
4610 }
4611
4612 /* Measure it. */
4613 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4614 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4615 {
4616 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4617 break;
4618 }
4619
4620 /* Retry? */
4621 if (cTries <= 1)
4622 break;
4623 cTries--;
4624
4625 /* Always delay between retries (be nice to the rest of the system
4626 and avoid the BSOD hounds). */
4627 rc = RTThreadSleep(cMsWaitRetry);
4628 if (rc == VERR_INTERRUPTED)
4629 break;
4630 }
4631
4632 return rc;
4633}
4634
4635
4636/**
4637 * Service a TSC-delta measurement request.
4638 *
4639 * @returns VBox status code.
4640 * @param pDevExt Pointer to the device instance data.
4641 * @param pSession The support driver session.
4642 * @param pReq Pointer to the TSC-delta measurement request.
4643 */
4644int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4645{
4646 uint32_t cTries;
4647 uint32_t iCpuSet;
4648 uint32_t fFlags;
4649 RTMSINTERVAL cMsWaitRetry;
4650 RT_NOREF1(pDevExt);
4651
4652 /*
4653 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4654 */
4655 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4656
4657 if (pReq->u.In.idCpu == NIL_RTCPUID)
4658 return VERR_INVALID_CPU_ID;
4659 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4660 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4661 return VERR_INVALID_CPU_ID;
4662
4663 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4664
4665 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4666
4667 fFlags = 0;
4668 if (pReq->u.In.fAsync)
4669 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4670 if (pReq->u.In.fForce)
4671 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4672
4673 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4674 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4675 cTries);
4676}
4677
4678
4679/**
4680 * Reads TSC with delta applied.
4681 *
4682 * Will try to resolve delta value INT64_MAX before applying it. This is the
4683 * main purpose of this function, to handle the case where the delta needs to be
4684 * determined.
4685 *
4686 * @returns VBox status code.
4687 * @param pDevExt Pointer to the device instance data.
4688 * @param pSession The support driver session.
4689 * @param pReq Pointer to the TSC-read request.
4690 */
4691int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4692{
4693 PSUPGLOBALINFOPAGE pGip;
4694 int rc;
4695
4696 /*
4697 * Validate. We require the client to have mapped GIP (no asserting on
4698 * ring-3 preconditions).
4699 */
4700 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4701 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4702 return VERR_WRONG_ORDER;
4703 pGip = pDevExt->pGip;
4704 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4705
4706 /*
4707 * We're usually here because we need to apply delta, but we shouldn't be
4708 * upset if the GIP is some different mode.
4709 */
4710 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4711 {
4712 uint32_t cTries = 0;
4713 for (;;)
4714 {
4715 /*
4716 * Start by gathering the data, using CLI for disabling preemption
4717 * while we do that.
4718 */
4719 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4720 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4721 int iGipCpu;
4722 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4723 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4724 {
4725 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4726 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4727 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4728 ASMSetFlags(fEFlags);
4729
4730 /*
4731 * If we're lucky we've got a delta, but no predictions here
4732 * as this I/O control is normally only used when the TSC delta
4733 * is set to INT64_MAX.
4734 */
4735 if (i64Delta != INT64_MAX)
4736 {
4737 pReq->u.Out.u64AdjustedTsc -= i64Delta;
4738 rc = VINF_SUCCESS;
4739 break;
4740 }
4741
4742 /* Give up after a few times. */
4743 if (cTries >= 4)
4744 {
4745 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4746 break;
4747 }
4748
4749 /* Need to measure the delta an try again. */
4750 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4751 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4752 /** @todo should probably delay on failure... dpc watchdogs */
4753 }
4754 else
4755 {
4756 /* This really shouldn't happen. */
4757 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
4758 pReq->u.Out.idApic = ASMGetApicId();
4759 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4760 ASMSetFlags(fEFlags);
4761 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
4762 break;
4763 }
4764 }
4765 }
4766 else
4767 {
4768 /*
4769 * No delta to apply. Easy. Deal with preemption the lazy way.
4770 */
4771 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4772 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4773 int iGipCpu;
4774 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4775 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4776 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4777 else
4778 pReq->u.Out.idApic = ASMGetApicId();
4779 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4780 ASMSetFlags(fEFlags);
4781 rc = VINF_SUCCESS;
4782 }
4783
4784 return rc;
4785}
4786
4787
4788/**
4789 * Worker for supdrvIOCtl_GipSetFlags.
4790 *
4791 * @returns VBox status code.
4792 * @retval VERR_WRONG_ORDER if an enable-once-per-session flag is set again for
4793 * a session.
4794 *
4795 * @param pDevExt Pointer to the device instance data.
4796 * @param pSession The support driver session.
4797 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4798 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4799 *
4800 * @remarks Caller must own the GIP mutex.
4801 *
4802 * @remarks This function doesn't validate any of the flags.
4803 */
4804static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
4805{
4806 uint32_t cRefs;
4807 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4808 AssertMsg((fOrMask & fAndMask) == fOrMask, ("%#x & %#x\n", fOrMask, fAndMask)); /* ASSUMED by code below */
4809
4810 /*
4811 * Compute GIP test-mode flags.
4812 */
4813 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
4814 {
4815 if (!pSession->fGipTestMode)
4816 {
4817 Assert(pDevExt->cGipTestModeRefs < _64K);
4818 pSession->fGipTestMode = true;
4819 cRefs = ++pDevExt->cGipTestModeRefs;
4820 if (cRefs == 1)
4821 {
4822 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
4823 fAndMask &= ~SUPGIP_FLAGS_TESTING_STOP;
4824 }
4825 }
4826 else
4827 {
4828 LogRelMax(10, ("supdrvGipSetFlags: SUPGIP_FLAGS_TESTING_ENABLE already set for this session\n"));
4829 return VERR_WRONG_ORDER;
4830 }
4831 }
4832 else if ( !(fAndMask & SUPGIP_FLAGS_TESTING_ENABLE)
4833 && pSession->fGipTestMode)
4834 {
4835 Assert(pDevExt->cGipTestModeRefs > 0);
4836 Assert(pDevExt->cGipTestModeRefs < _64K);
4837 pSession->fGipTestMode = false;
4838 cRefs = --pDevExt->cGipTestModeRefs;
4839 if (!cRefs)
4840 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
4841 else
4842 fAndMask |= SUPGIP_FLAGS_TESTING_ENABLE;
4843 }
4844
4845 /*
4846 * Commit the flags. This should be done as atomically as possible
4847 * since the flag consumers won't be holding the GIP mutex.
4848 */
4849 ASMAtomicOrU32(&pGip->fFlags, fOrMask);
4850 ASMAtomicAndU32(&pGip->fFlags, fAndMask);
4851
4852 return VINF_SUCCESS;
4853}
4854
4855
4856/**
4857 * Sets GIP test mode parameters.
4858 *
4859 * @returns VBox status code.
4860 * @param pDevExt Pointer to the device instance data.
4861 * @param pSession The support driver session.
4862 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4863 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4864 */
4865int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
4866{
4867 PSUPGLOBALINFOPAGE pGip;
4868 int rc;
4869
4870 /*
4871 * Validate. We require the client to have mapped GIP (no asserting on
4872 * ring-3 preconditions).
4873 */
4874 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
4875 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4876 return VERR_WRONG_ORDER;
4877 pGip = pDevExt->pGip;
4878 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
4879
4880 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
4881 return VERR_INVALID_PARAMETER;
4882 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
4883 return VERR_INVALID_PARAMETER;
4884
4885 /*
4886 * Don't confuse supdrvGipSetFlags or anyone else by both setting
4887 * and clearing the same flags. AND takes precedence.
4888 */
4889 fOrMask &= fAndMask;
4890
4891 /*
4892 * Take the loader lock to avoid having to think about races between two
4893 * clients changing the flags at the same time (state is not simple).
4894 */
4895#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4896 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
4897#else
4898 RTSemFastMutexRequest(pDevExt->mtxGip);
4899#endif
4900
4901 rc = supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
4902
4903#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4904 RTSemMutexRelease(pDevExt->mtxGip);
4905#else
4906 RTSemFastMutexRelease(pDevExt->mtxGip);
4907#endif
4908 return rc;
4909}
4910
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette