VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 57218

Last change on this file since 57218 was 57218, checked in by vboxsync, 10 years ago

SUPDrv, tstGIP-2: Add support for GIP flags and testing TSC frequency compatibility.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 176.1 KB
Line 
1/* $Id: SUPDrvGip.cpp 57218 2015-08-06 14:53:27Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * The contents of this file may alternatively be used under the terms
18 * of the Common Development and Distribution License Version 1.0
19 * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
20 * VirtualBox OSE distribution, in which case the provisions of the
21 * CDDL are applicable instead of those of the GPL.
22 *
23 * You may elect to license modified versions of this file under the
24 * terms and conditions of either the GPL or the CDDL or both.
25 */
26
27/*******************************************************************************
28* Header Files *
29*******************************************************************************/
30#define LOG_GROUP LOG_GROUP_SUP_DRV
31#define SUPDRV_AGNOSTIC
32#include "SUPDrvInternal.h"
33#ifndef PAGE_SHIFT
34# include <iprt/param.h>
35#endif
36#include <iprt/asm.h>
37#include <iprt/asm-amd64-x86.h>
38#include <iprt/asm-math.h>
39#include <iprt/cpuset.h>
40#include <iprt/handletable.h>
41#include <iprt/mem.h>
42#include <iprt/mp.h>
43#include <iprt/power.h>
44#include <iprt/process.h>
45#include <iprt/semaphore.h>
46#include <iprt/spinlock.h>
47#include <iprt/thread.h>
48#include <iprt/uuid.h>
49#include <iprt/net.h>
50#include <iprt/crc.h>
51#include <iprt/string.h>
52#include <iprt/timer.h>
53#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
54# include <iprt/rand.h>
55# include <iprt/path.h>
56#endif
57#include <iprt/uint128.h>
58#include <iprt/x86.h>
59
60#include <VBox/param.h>
61#include <VBox/log.h>
62#include <VBox/err.h>
63
64#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
65# include "dtrace/SUPDrv.h"
66#else
67/* ... */
68#endif
69
70
71/*******************************************************************************
72* Defined Constants And Macros *
73*******************************************************************************/
74/** The frequency by which we recalculate the u32UpdateHz and
75 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
76 *
77 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
78 */
79#define GIP_UPDATEHZ_RECALC_FREQ 0x800
80
81/** A reserved TSC value used for synchronization as well as measurement of
82 * TSC deltas. */
83#define GIP_TSC_DELTA_RSVD UINT64_MAX
84/** The number of TSC delta measurement loops in total (includes primer and
85 * read-time loops). */
86#define GIP_TSC_DELTA_LOOPS 96
87/** The number of cache primer loops. */
88#define GIP_TSC_DELTA_PRIMER_LOOPS 4
89/** The number of loops until we keep computing the minumum read time. */
90#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
91
92/** The TSC frequency refinement period in seconds.
93 * The timer fires after 200ms, then every second, this value just says when
94 * to stop it after that. */
95#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
96/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
97#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
98/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
99#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
100/** The TSC delta value for the initial GIP master - 0 in regular builds.
101 * To test the delta code this can be set to a non-zero value. */
102#if 0
103# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
104#else
105# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
106#endif
107
108AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
109AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
110
111/** @def VBOX_SVN_REV
112 * The makefile should define this if it can. */
113#ifndef VBOX_SVN_REV
114# define VBOX_SVN_REV 0
115#endif
116
117#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
118# define DO_NOT_START_GIP
119#endif
120
121
122/*******************************************************************************
123* Internal Functions *
124*******************************************************************************/
125static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
126static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
127static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
128static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
129static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
130#ifdef SUPDRV_USE_TSC_DELTA_THREAD
131static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
132static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
133static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
134#else
135static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
136static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
137#endif
138
139
140/*******************************************************************************
141* Global Variables *
142*******************************************************************************/
143DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
144
145
146
147/*
148 *
149 * Misc Common GIP Code
150 * Misc Common GIP Code
151 * Misc Common GIP Code
152 *
153 *
154 */
155
156
157/**
158 * Finds the GIP CPU index corresponding to @a idCpu.
159 *
160 * @returns GIP CPU array index, UINT32_MAX if not found.
161 * @param pGip The GIP.
162 * @param idCpu The CPU ID.
163 */
164static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
165{
166 uint32_t i;
167 for (i = 0; i < pGip->cCpus; i++)
168 if (pGip->aCPUs[i].idCpu == idCpu)
169 return i;
170 return UINT32_MAX;
171}
172
173
174
175/*
176 *
177 * GIP Mapping and Unmapping Related Code.
178 * GIP Mapping and Unmapping Related Code.
179 * GIP Mapping and Unmapping Related Code.
180 *
181 *
182 */
183
184
185/**
186 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
187 * updating.
188 *
189 * @param pGip Pointer to the GIP.
190 * @param pGipCpu The per CPU structure for this CPU.
191 * @param u64NanoTS The current time.
192 */
193static void supdrvGipReInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
194{
195 /*
196 * Here we don't really care about applying the TSC delta. The re-initialization of this
197 * value is not relevant especially while (re)starting the GIP as the first few ones will
198 * be ignored anyway, see supdrvGipDoUpdateCpu().
199 */
200 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
201 pGipCpu->u64NanoTS = u64NanoTS;
202}
203
204
205/**
206 * Set the current TSC and NanoTS value for the CPU.
207 *
208 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
209 * @param pvUser1 Pointer to the ring-0 GIP mapping.
210 * @param pvUser2 Pointer to the variable holding the current time.
211 */
212static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
213{
214 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
215 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
216
217 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
218 supdrvGipReInitCpu(pGip, &pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
219
220 NOREF(pvUser2);
221 NOREF(idCpu);
222}
223
224
225/**
226 * State structure for supdrvGipDetectGetGipCpuCallback.
227 */
228typedef struct SUPDRVGIPDETECTGETCPU
229{
230 /** Bitmap of APIC IDs that has been seen (initialized to zero).
231 * Used to detect duplicate APIC IDs (paranoia). */
232 uint8_t volatile bmApicId[256 / 8];
233 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
234 * initially). The callback clears the methods not detected. */
235 uint32_t volatile fSupported;
236 /** The first callback detecting any kind of range issues (initialized to
237 * NIL_RTCPUID). */
238 RTCPUID volatile idCpuProblem;
239} SUPDRVGIPDETECTGETCPU;
240/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
241typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
242
243
244/**
245 * Checks for alternative ways of getting the CPU ID.
246 *
247 * This also checks the APIC ID, CPU ID and CPU set index values against the
248 * GIP tables.
249 *
250 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
251 * @param pvUser1 Pointer to the state structure.
252 * @param pvUser2 Pointer to the GIP.
253 */
254static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
255{
256 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
257 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
258 uint32_t fSupported = 0;
259 uint16_t idApic;
260 int iCpuSet;
261
262 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
263
264 /*
265 * Check that the CPU ID and CPU set index are interchangable.
266 */
267 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
268 if ((RTCPUID)iCpuSet == idCpu)
269 {
270 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
271 if ( iCpuSet >= 0
272 && iCpuSet < RTCPUSET_MAX_CPUS
273 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
274 {
275 /*
276 * Check whether the IDTR.LIMIT contains a CPU number.
277 */
278#ifdef RT_ARCH_X86
279 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
280#else
281 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
282#endif
283 RTIDTR Idtr;
284 ASMGetIDTR(&Idtr);
285 if (Idtr.cbIdt >= cbIdt)
286 {
287 uint32_t uTmp = Idtr.cbIdt - cbIdt;
288 uTmp &= RTCPUSET_MAX_CPUS - 1;
289 if (uTmp == idCpu)
290 {
291 RTIDTR Idtr2;
292 ASMGetIDTR(&Idtr2);
293 if (Idtr2.cbIdt == Idtr.cbIdt)
294 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
295 }
296 }
297
298 /*
299 * Check whether RDTSCP is an option.
300 */
301 if (ASMHasCpuId())
302 {
303 if ( ASMIsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
304 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
305 {
306 uint32_t uAux;
307 ASMReadTscWithAux(&uAux);
308 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
309 {
310 ASMNopPause();
311 ASMReadTscWithAux(&uAux);
312 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
313 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
314 }
315 }
316 }
317 }
318 }
319
320 /*
321 * Check that the APIC ID is unique.
322 */
323 idApic = ASMGetApicId();
324 if (RT_LIKELY( idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)
325 && !ASMAtomicBitTestAndSet(pState->bmApicId, idApic)))
326 fSupported |= SUPGIPGETCPU_APIC_ID;
327 else
328 {
329 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
330 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
331 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - duplicate APIC ID.\n",
332 idCpu, iCpuSet, idApic));
333 }
334
335 /*
336 * Check that the iCpuSet is within the expected range.
337 */
338 if (RT_UNLIKELY( iCpuSet < 0
339 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
340 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
341 {
342 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
343 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
344 idCpu, iCpuSet, idApic));
345 }
346 else
347 {
348 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
349 if (RT_UNLIKELY(idCpu2 != idCpu))
350 {
351 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
352 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
353 idCpu, iCpuSet, idApic, idCpu2));
354 }
355 }
356
357 /*
358 * Update the supported feature mask before we return.
359 */
360 ASMAtomicAndU32(&pState->fSupported, fSupported);
361
362 NOREF(pvUser2);
363}
364
365
366/**
367 * Increase the timer freqency on hosts where this is possible (NT).
368 *
369 * The idea is that more interrupts is better for us... Also, it's better than
370 * we increase the timer frequence, because we might end up getting inaccurate
371 * callbacks if someone else does it.
372 *
373 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
374 */
375static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
376{
377 if (pDevExt->u32SystemTimerGranularityGrant == 0)
378 {
379 uint32_t u32SystemResolution;
380 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
381 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
382 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
383 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
384 )
385 {
386 Assert(RTTimerGetSystemGranularity() <= u32SystemResolution);
387 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
388 }
389 }
390}
391
392
393/**
394 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
395 *
396 * @param pDevExt Clears u32SystemTimerGranularityGrant.
397 */
398static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
399{
400 if (pDevExt->u32SystemTimerGranularityGrant)
401 {
402 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
403 AssertRC(rc2);
404 pDevExt->u32SystemTimerGranularityGrant = 0;
405 }
406}
407
408
409/**
410 * Maps the GIP into userspace and/or get the physical address of the GIP.
411 *
412 * @returns IPRT status code.
413 * @param pSession Session to which the GIP mapping should belong.
414 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
415 * @param pHCPhysGip Where to store the physical address. (optional)
416 *
417 * @remark There is no reference counting on the mapping, so one call to this function
418 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
419 * and remove the session as a GIP user.
420 */
421SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
422{
423 int rc;
424 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
425 RTR3PTR pGipR3 = NIL_RTR3PTR;
426 RTHCPHYS HCPhys = NIL_RTHCPHYS;
427 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
428
429 /*
430 * Validate
431 */
432 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
433 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
434 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
435
436#ifdef SUPDRV_USE_MUTEX_FOR_GIP
437 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
438#else
439 RTSemFastMutexRequest(pDevExt->mtxGip);
440#endif
441 if (pDevExt->pGip)
442 {
443 /*
444 * Map it?
445 */
446 rc = VINF_SUCCESS;
447 if (ppGipR3)
448 {
449 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
450 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
451 RTMEM_PROT_READ, RTR0ProcHandleSelf());
452 if (RT_SUCCESS(rc))
453 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
454 }
455
456 /*
457 * Get physical address.
458 */
459 if (pHCPhysGip && RT_SUCCESS(rc))
460 HCPhys = pDevExt->HCPhysGip;
461
462 /*
463 * Reference globally.
464 */
465 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
466 {
467 pSession->fGipReferenced = 1;
468 pDevExt->cGipUsers++;
469 if (pDevExt->cGipUsers == 1)
470 {
471 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
472 uint64_t u64NanoTS;
473
474 /*
475 * GIP starts/resumes updating again. On windows we bump the
476 * host timer frequency to make sure we don't get stuck in guest
477 * mode and to get better timer (and possibly clock) accuracy.
478 */
479 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
480
481 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
482
483 /*
484 * document me
485 */
486 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
487 {
488 unsigned i;
489 for (i = 0; i < pGipR0->cCpus; i++)
490 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
491 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
492 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
493 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
494 }
495
496 /*
497 * document me
498 */
499 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
500 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
501 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
502 || RTMpGetOnlineCount() == 1)
503 supdrvGipReInitCpu(pGipR0, &pGipR0->aCPUs[0], u64NanoTS);
504 else
505 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
506
507 /*
508 * Detect alternative ways to figure the CPU ID in ring-3 and
509 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
510 * and CPU set indexes while we're at it.
511 */
512 if (RT_SUCCESS(rc))
513 {
514 SUPDRVGIPDETECTGETCPU DetectState;
515 RT_BZERO((void *)&DetectState.bmApicId, sizeof(DetectState.bmApicId));
516 DetectState.fSupported = UINT32_MAX;
517 DetectState.idCpuProblem = NIL_RTCPUID;
518 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, &DetectState, pGipR0);
519 if (DetectState.idCpuProblem == NIL_RTCPUID)
520 {
521 if ( DetectState.fSupported != UINT32_MAX
522 && DetectState.fSupported != 0)
523 {
524 if (pGipR0->fGetGipCpu != DetectState.fSupported)
525 {
526 pGipR0->fGetGipCpu = DetectState.fSupported;
527 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", DetectState.fSupported));
528 }
529 }
530 else
531 {
532 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
533 DetectState.fSupported));
534 rc = VERR_UNSUPPORTED_CPU;
535 }
536 }
537 else
538 {
539 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
540 DetectState.idCpuProblem, DetectState.idCpuProblem));
541 rc = VERR_INVALID_CPU_ID;
542 }
543 }
544
545 /*
546 * Start the GIP timer if all is well..
547 */
548 if (RT_SUCCESS(rc))
549 {
550#ifndef DO_NOT_START_GIP
551 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
552#endif
553 rc = VINF_SUCCESS;
554 }
555
556 /*
557 * Bail out on error.
558 */
559 if (RT_FAILURE(rc))
560 {
561 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
562 pDevExt->cGipUsers = 0;
563 pSession->fGipReferenced = 0;
564 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
565 {
566 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
567 if (RT_SUCCESS(rc2))
568 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
569 }
570 HCPhys = NIL_RTHCPHYS;
571 pGipR3 = NIL_RTR3PTR;
572 }
573 }
574 }
575 }
576 else
577 {
578 rc = VERR_GENERAL_FAILURE;
579 Log(("SUPR0GipMap: GIP is not available!\n"));
580 }
581#ifdef SUPDRV_USE_MUTEX_FOR_GIP
582 RTSemMutexRelease(pDevExt->mtxGip);
583#else
584 RTSemFastMutexRelease(pDevExt->mtxGip);
585#endif
586
587 /*
588 * Write returns.
589 */
590 if (pHCPhysGip)
591 *pHCPhysGip = HCPhys;
592 if (ppGipR3)
593 *ppGipR3 = pGipR3;
594
595#ifdef DEBUG_DARWIN_GIP
596 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
597#else
598 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
599#endif
600 return rc;
601}
602
603
604/**
605 * Unmaps any user mapping of the GIP and terminates all GIP access
606 * from this session.
607 *
608 * @returns IPRT status code.
609 * @param pSession Session to which the GIP mapping should belong.
610 */
611SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
612{
613 int rc = VINF_SUCCESS;
614 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
615#ifdef DEBUG_DARWIN_GIP
616 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
617 pSession,
618 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
619 pSession->GipMapObjR3));
620#else
621 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
622#endif
623 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
624
625#ifdef SUPDRV_USE_MUTEX_FOR_GIP
626 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
627#else
628 RTSemFastMutexRequest(pDevExt->mtxGip);
629#endif
630
631 /*
632 * GIP test-mode session?
633 */
634 if ( pSession->fGipTestMode
635 && pDevExt->pGip)
636 {
637 uint32_t fFlags = pDevExt->pGip->fFlags;
638 fFlags &= ~SUPGIP_FLAGS_TESTING_ENABLE;
639 supdrvGipSetFlags(pDevExt, pSession, 0, fFlags);
640 Assert(!pSession->fGipTestMode);
641 }
642
643 /*
644 * Unmap anything?
645 */
646 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
647 {
648 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
649 AssertRC(rc);
650 if (RT_SUCCESS(rc))
651 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
652 }
653
654 /*
655 * Dereference global GIP.
656 */
657 if (pSession->fGipReferenced && !rc)
658 {
659 pSession->fGipReferenced = 0;
660 if ( pDevExt->cGipUsers > 0
661 && !--pDevExt->cGipUsers)
662 {
663 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
664#ifndef DO_NOT_START_GIP
665 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
666#endif
667 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
668 }
669 }
670
671#ifdef SUPDRV_USE_MUTEX_FOR_GIP
672 RTSemMutexRelease(pDevExt->mtxGip);
673#else
674 RTSemFastMutexRelease(pDevExt->mtxGip);
675#endif
676
677 return rc;
678}
679
680
681/**
682 * Gets the GIP pointer.
683 *
684 * @returns Pointer to the GIP or NULL.
685 */
686SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
687{
688 return g_pSUPGlobalInfoPage;
689}
690
691
692
693
694
695/*
696 *
697 *
698 * GIP Initialization, Termination and CPU Offline / Online Related Code.
699 * GIP Initialization, Termination and CPU Offline / Online Related Code.
700 * GIP Initialization, Termination and CPU Offline / Online Related Code.
701 *
702 *
703 */
704
705/**
706 * Used by supdrvInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
707 * to update the TSC frequency related GIP variables.
708 *
709 * @param pGip The GIP.
710 * @param nsElapsed The number of nano seconds elapsed.
711 * @param cElapsedTscTicks The corresponding number of TSC ticks.
712 * @param iTick The tick number for debugging.
713 */
714static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
715{
716 /*
717 * Calculate the frequency.
718 */
719 uint64_t uCpuHz;
720 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
721 && nsElapsed < UINT32_MAX)
722 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
723 else
724 {
725 RTUINT128U CpuHz, Tmp, Divisor;
726 CpuHz.s.Lo = CpuHz.s.Hi = 0;
727 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
728 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
729 uCpuHz = CpuHz.s.Lo;
730 }
731
732 /*
733 * Update the GIP.
734 */
735 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
736 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
737 {
738 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
739
740 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
741 if (iTick + 1 < pGip->cCpus)
742 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
743 }
744}
745
746
747/**
748 * Timer callback function for TSC frequency refinement in invariant GIP mode.
749 *
750 * This is started during driver init and fires once
751 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
752 *
753 * @param pTimer The timer.
754 * @param pvUser Opaque pointer to the device instance data.
755 * @param iTick The timer tick.
756 */
757static DECLCALLBACK(void) supdrvInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
758{
759 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
760 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
761 RTCPUID idCpu;
762 uint64_t cNsElapsed;
763 uint64_t cTscTicksElapsed;
764 uint64_t nsNow;
765 uint64_t uTsc;
766 RTCCUINTREG fEFlags;
767
768 /* Paranoia. */
769 AssertReturnVoid(pGip);
770 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
771
772 /*
773 * If we got a power event, stop the refinement process.
774 */
775 if (pDevExt->fInvTscRefinePowerEvent)
776 {
777 int rc = RTTimerStop(pTimer); AssertRC(rc);
778 return;
779 }
780
781 /*
782 * Read the TSC and time, noting which CPU we are on.
783 *
784 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
785 * systems where it matters we're in a context where we cannot waste that
786 * much time (DPC watchdog, called from clock interrupt).
787 */
788 fEFlags = ASMIntDisableFlags();
789 uTsc = ASMReadTSC();
790 nsNow = RTTimeSystemNanoTS();
791 idCpu = RTMpCpuId();
792 ASMSetFlags(fEFlags);
793
794 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
795 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
796
797 /*
798 * If the above measurement was taken on a different CPU than the one we
799 * started the process on, cTscTicksElapsed will need to be adjusted with
800 * the TSC deltas of both the CPUs.
801 *
802 * We ASSUME that the delta calculation process takes less time than the
803 * TSC frequency refinement timer. If it doesn't, we'll complain and
804 * drop the frequency refinement.
805 *
806 * Note! We cannot entirely trust enmUseTscDelta here because it's
807 * downgraded after each delta calculation.
808 */
809 if ( idCpu != pDevExt->idCpuInvarTscRefine
810 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
811 {
812 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
813 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
814 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
815 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
816 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
817 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
818 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
819 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
820 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
821 {
822 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
823 {
824 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
825 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
826 }
827 }
828 /*
829 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
830 * calculations.
831 */
832 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
833 {
834 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
835 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
836 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
837 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
838 int rc = RTTimerStop(pTimer); AssertRC(rc);
839 return;
840 }
841 }
842
843 /*
844 * Calculate and update the CPU frequency variables in GIP.
845 *
846 * If there is a GIP user already and we've already refined the frequency
847 * a couple of times, don't update it as we want a stable frequency value
848 * for all VMs.
849 */
850 if ( pDevExt->cGipUsers == 0
851 || cNsElapsed < RT_NS_1SEC * 2)
852 {
853 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
854
855 /*
856 * Stop the timer once we've reached the defined refinement period.
857 */
858 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
859 {
860 int rc = RTTimerStop(pTimer);
861 AssertRC(rc);
862 }
863 }
864 else
865 {
866 int rc = RTTimerStop(pTimer);
867 AssertRC(rc);
868 }
869}
870
871
872/**
873 * @callback_method_impl{FNRTPOWERNOTIFICATION}
874 */
875static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
876{
877 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
878 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
879
880 /*
881 * If the TSC frequency refinement timer is running, we need to cancel it so it
882 * doesn't screw up the frequency after a long suspend.
883 *
884 * Recalculate all TSC-deltas on host resume as it may have changed, seen
885 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
886 */
887 if (enmEvent == RTPOWEREVENT_RESUME)
888 {
889 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
890 if ( RT_LIKELY(pGip)
891 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
892 && !supdrvOSAreCpusOfflinedOnSuspend())
893 {
894#ifdef SUPDRV_USE_TSC_DELTA_THREAD
895 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
896#else
897 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
898 supdrvMeasureInitialTscDeltas(pDevExt);
899#endif
900 }
901 }
902 else if (enmEvent == RTPOWEREVENT_SUSPEND)
903 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
904}
905
906
907/**
908 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
909 *
910 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
911 * the CPU may change the TSC frequence between now and when the timer fires
912 * (supdrvInitAsyncRefineTscTimer).
913 *
914 * @param pDevExt Pointer to the device instance data.
915 * @param pGip Pointer to the GIP.
916 */
917static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip)
918{
919 uint64_t u64NanoTS;
920 RTCCUINTREG fEFlags;
921 int rc;
922
923 /*
924 * Register a power management callback.
925 */
926 pDevExt->fInvTscRefinePowerEvent = false;
927 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
928 AssertRC(rc); /* ignore */
929
930 /*
931 * Record the TSC and NanoTS as the starting anchor point for refinement
932 * of the TSC. We try get as close to a clock tick as possible on systems
933 * which does not provide high resolution time.
934 */
935 u64NanoTS = RTTimeSystemNanoTS();
936 while (RTTimeSystemNanoTS() == u64NanoTS)
937 ASMNopPause();
938
939 fEFlags = ASMIntDisableFlags();
940 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
941 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
942 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
943 ASMSetFlags(fEFlags);
944
945 /*
946 * Create a timer that runs on the same CPU so we won't have a depencency
947 * on the TSC-delta and can run in parallel to it. On systems that does not
948 * implement CPU specific timers we'll apply deltas in the timer callback,
949 * just like we do for CPUs going offline.
950 *
951 * The longer the refinement interval the better the accuracy, at least in
952 * theory. If it's too long though, ring-3 may already be starting its
953 * first VMs before we're done. On most systems we will be loading the
954 * support driver during boot and VMs won't be started for a while yet,
955 * it is really only a problem during development (especially with
956 * on-demand driver starting on windows).
957 *
958 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
959 * to calculate the frequency during driver loading, the timer is set
960 * to fire after 200 ms the first time. It will then reschedule itself
961 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
962 * reached or it notices that there is a user land client with GIP
963 * mapped (we want a stable frequency for all VMs).
964 */
965 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
966 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
967 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
968 if (RT_SUCCESS(rc))
969 {
970 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
971 if (RT_SUCCESS(rc))
972 return;
973 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
974 }
975
976 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
977 {
978 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
979 supdrvInitRefineInvariantTscFreqTimer, pDevExt);
980 if (RT_SUCCESS(rc))
981 {
982 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
983 if (RT_SUCCESS(rc))
984 return;
985 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
986 }
987 }
988
989 pDevExt->pInvarTscRefineTimer = NULL;
990 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
991}
992
993
994/**
995 * @callback_method_impl{PFNRTMPWORKER,
996 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
997 * the measurements on.}
998 */
999DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1000{
1001 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1002 uint64_t *puTscStop = (uint64_t *)pvUser1;
1003 uint64_t *pnsStop = (uint64_t *)pvUser2;
1004
1005 *puTscStop = ASMReadTSC();
1006 *pnsStop = RTTimeSystemNanoTS();
1007
1008 ASMSetFlags(fEFlags);
1009}
1010
1011
1012/**
1013 * Measures the TSC frequency of the system.
1014 *
1015 * The TSC frequency can vary on systems which are not reported as invariant.
1016 * On such systems the object of this function is to find out what the nominal,
1017 * maximum TSC frequency under 'normal' CPU operation.
1018 *
1019 * @returns VBox status code.
1020 * @param pDevExt Pointer to the device instance.
1021 * @param pGip Pointer to the GIP.
1022 * @param fRough Set if we're doing the rough calculation that the
1023 * TSC measuring code needs, where accuracy isn't all
1024 * that important (too high is better than to low).
1025 * When clear we try for best accuracy that we can
1026 * achieve in reasonably short time.
1027 */
1028static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, bool fRough)
1029{
1030 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1031 int cTriesLeft = fRough ? 4 : 2;
1032 while (cTriesLeft-- > 0)
1033 {
1034 RTCCUINTREG fEFlags;
1035 uint64_t nsStart;
1036 uint64_t nsStop;
1037 uint64_t uTscStart;
1038 uint64_t uTscStop;
1039 RTCPUID idCpuStart;
1040 RTCPUID idCpuStop;
1041
1042 /*
1043 * Synchronize with the host OS clock tick on systems without high
1044 * resolution time API (older Windows version for example).
1045 */
1046 nsStart = RTTimeSystemNanoTS();
1047 while (RTTimeSystemNanoTS() == nsStart)
1048 ASMNopPause();
1049
1050 /*
1051 * Read the TSC and current time, noting which CPU we're on.
1052 */
1053 fEFlags = ASMIntDisableFlags();
1054 uTscStart = ASMReadTSC();
1055 nsStart = RTTimeSystemNanoTS();
1056 idCpuStart = RTMpCpuId();
1057 ASMSetFlags(fEFlags);
1058
1059 /*
1060 * Delay for a while.
1061 */
1062 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1063 {
1064 /*
1065 * Sleep-wait since the TSC frequency is constant, it eases host load.
1066 * Shorter interval produces more variance in the frequency (esp. Windows).
1067 */
1068 uint64_t msElapsed = 0;
1069 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1070 / RT_NS_1MS;
1071 do
1072 {
1073 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1074 nsStop = RTTimeSystemNanoTS();
1075 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1076 } while (msElapsed < msDelay);
1077
1078 while (RTTimeSystemNanoTS() == nsStop)
1079 ASMNopPause();
1080 }
1081 else
1082 {
1083 /*
1084 * Busy-wait keeping the frequency up.
1085 */
1086 do
1087 {
1088 ASMNopPause();
1089 nsStop = RTTimeSystemNanoTS();
1090 } while (nsStop - nsStart < RT_NS_100MS);
1091 }
1092
1093 /*
1094 * Read the TSC and time again.
1095 */
1096 fEFlags = ASMIntDisableFlags();
1097 uTscStop = ASMReadTSC();
1098 nsStop = RTTimeSystemNanoTS();
1099 idCpuStop = RTMpCpuId();
1100 ASMSetFlags(fEFlags);
1101
1102 /*
1103 * If the CPU changes things get a bit complicated and what we
1104 * can get away with depends on the GIP mode / TSC reliablity.
1105 */
1106 if (idCpuStop != idCpuStart)
1107 {
1108 bool fDoXCall = false;
1109
1110 /*
1111 * Synchronous TSC mode: we're probably fine as it's unlikely
1112 * that we were rescheduled because of TSC throttling or power
1113 * management reasons, so just go ahead.
1114 */
1115 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1116 {
1117 /* Probably ok, maybe we should retry once?. */
1118 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1119 }
1120 /*
1121 * If we're just doing the rough measurement, do the cross call and
1122 * get on with things (we don't have deltas!).
1123 */
1124 else if (fRough)
1125 fDoXCall = true;
1126 /*
1127 * Invariant TSC mode: It doesn't matter if we have delta available
1128 * for both CPUs. That is not something we can assume at this point.
1129 *
1130 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1131 * downgraded after each delta calculation and the delta
1132 * calculations may not be complete yet.
1133 */
1134 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1135 {
1136/** @todo This section of code is never reached atm, consider dropping it later on... */
1137 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1138 {
1139 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1140 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1141 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1142 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1143 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1144 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1145 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1146 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1147 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1148 {
1149 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1150 {
1151 uTscStart -= iStartTscDelta;
1152 uTscStop -= iStopTscDelta;
1153 }
1154 }
1155 /*
1156 * Invalid CPU indexes are not caused by online/offline races, so
1157 * we have to trigger driver load failure if that happens as GIP
1158 * and IPRT assumptions are busted on this system.
1159 */
1160 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1161 {
1162 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1163 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1164 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1165 return VERR_INVALID_CPU_INDEX;
1166 }
1167 /*
1168 * No valid deltas. We retry, if we're on our last retry
1169 * we do the cross call instead just to get a result. The
1170 * frequency will be refined in a few seconds anyways.
1171 */
1172 else if (cTriesLeft > 0)
1173 continue;
1174 else
1175 fDoXCall = true;
1176 }
1177 }
1178 /*
1179 * Asynchronous TSC mode: This is bad as the reason we usually
1180 * use this mode is to deal with variable TSC frequencies and
1181 * deltas. So, we need to get the TSC from the same CPU as
1182 * started it, we also need to keep that CPU busy. So, retry
1183 * and fall back to the cross call on the last attempt.
1184 */
1185 else
1186 {
1187 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1188 if (cTriesLeft > 0)
1189 continue;
1190 fDoXCall = true;
1191 }
1192
1193 if (fDoXCall)
1194 {
1195 /*
1196 * Try read the TSC and timestamp on the start CPU.
1197 */
1198 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1199 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1200 continue;
1201 }
1202 }
1203
1204 /*
1205 * Calculate the TSC frequency and update it (shared with the refinement timer).
1206 */
1207 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1208 return VINF_SUCCESS;
1209 }
1210
1211 Assert(!fRough);
1212 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1213}
1214
1215
1216/**
1217 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1218 *
1219 * @returns Index of the CPU in the cache set.
1220 * @param pGip The GIP.
1221 * @param idCpu The CPU ID.
1222 */
1223static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1224{
1225 uint32_t i, cTries;
1226
1227 /*
1228 * ASSUMES that CPU IDs are constant.
1229 */
1230 for (i = 0; i < pGip->cCpus; i++)
1231 if (pGip->aCPUs[i].idCpu == idCpu)
1232 return i;
1233
1234 cTries = 0;
1235 do
1236 {
1237 for (i = 0; i < pGip->cCpus; i++)
1238 {
1239 bool fRc;
1240 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1241 if (fRc)
1242 return i;
1243 }
1244 } while (cTries++ < 32);
1245 AssertReleaseFailed();
1246 return i - 1;
1247}
1248
1249
1250/**
1251 * The calling CPU should be accounted as online, update GIP accordingly.
1252 *
1253 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1254 *
1255 * @param pDevExt The device extension.
1256 * @param idCpu The CPU ID.
1257 */
1258static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1259{
1260 int iCpuSet = 0;
1261 uint16_t idApic = UINT16_MAX;
1262 uint32_t i = 0;
1263 uint64_t u64NanoTS = 0;
1264 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1265
1266 AssertPtrReturnVoid(pGip);
1267 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1268 AssertRelease(idCpu == RTMpCpuId());
1269 Assert(pGip->cPossibleCpus == RTMpGetCount());
1270
1271 /*
1272 * Do this behind a spinlock with interrupts disabled as this can fire
1273 * on all CPUs simultaneously, see @bugref{6110}.
1274 */
1275 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1276
1277 /*
1278 * Update the globals.
1279 */
1280 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1281 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1282 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1283 if (iCpuSet >= 0)
1284 {
1285 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1286 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1287 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1288 }
1289
1290 /*
1291 * Update the entry.
1292 */
1293 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1294 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1295
1296 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1297
1298 idApic = ASMGetApicId();
1299 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1300 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1301 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1302
1303 /*
1304 * Update the APIC ID and CPU set index mappings.
1305 */
1306 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1307 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1308
1309 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1310 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1311
1312 /* Update the Mp online/offline counter. */
1313 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1314
1315 /* Commit it. */
1316 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1317
1318 RTSpinlockRelease(pDevExt->hGipSpinlock);
1319}
1320
1321
1322/**
1323 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1324 *
1325 * @param idCpu The CPU ID we are running on.
1326 * @param pvUser1 Opaque pointer to the device instance data.
1327 * @param pvUser2 Not used.
1328 */
1329static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1330{
1331 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1332 NOREF(pvUser2);
1333 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1334}
1335
1336
1337/**
1338 * The CPU should be accounted as offline, update the GIP accordingly.
1339 *
1340 * This is used by supdrvGipMpEvent.
1341 *
1342 * @param pDevExt The device extension.
1343 * @param idCpu The CPU ID.
1344 */
1345static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1346{
1347 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1348 int iCpuSet;
1349 unsigned i;
1350
1351 AssertPtrReturnVoid(pGip);
1352 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1353
1354 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1355 AssertReturnVoid(iCpuSet >= 0);
1356
1357 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1358 AssertReturnVoid(i < pGip->cCpus);
1359 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1360
1361 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1362 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1363
1364 /* Update the Mp online/offline counter. */
1365 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1366
1367 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1368 {
1369 /* Reset the TSC delta, we will recalculate it lazily. */
1370 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1371 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1372 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1373 }
1374
1375 /* Commit it. */
1376 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1377
1378 RTSpinlockRelease(pDevExt->hGipSpinlock);
1379}
1380
1381
1382/**
1383 * Multiprocessor event notification callback.
1384 *
1385 * This is used to make sure that the GIP master gets passed on to
1386 * another CPU. It also updates the associated CPU data.
1387 *
1388 * @param enmEvent The event.
1389 * @param idCpu The cpu it applies to.
1390 * @param pvUser Pointer to the device extension.
1391 */
1392static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1393{
1394 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1395 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1396
1397 if (pGip)
1398 {
1399 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1400 switch (enmEvent)
1401 {
1402 case RTMPEVENT_ONLINE:
1403 {
1404 RTThreadPreemptDisable(&PreemptState);
1405 if (idCpu == RTMpCpuId())
1406 {
1407 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1408 RTThreadPreemptRestore(&PreemptState);
1409 }
1410 else
1411 {
1412 RTThreadPreemptRestore(&PreemptState);
1413 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1414 }
1415
1416 /*
1417 * Recompute TSC-delta for the newly online'd CPU.
1418 */
1419 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1420 {
1421#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1422 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1423#else
1424 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1425 supdrvMeasureTscDeltaOne(pDevExt, iCpu);
1426#endif
1427 }
1428 break;
1429 }
1430
1431 case RTMPEVENT_OFFLINE:
1432 supdrvGipMpEventOffline(pDevExt, idCpu);
1433 break;
1434 }
1435 }
1436
1437 /*
1438 * Make sure there is a master GIP.
1439 */
1440 if (enmEvent == RTMPEVENT_OFFLINE)
1441 {
1442 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1443 if (idGipMaster == idCpu)
1444 {
1445 /*
1446 * The GIP master is going offline, find a new one.
1447 */
1448 bool fIgnored;
1449 unsigned i;
1450 RTCPUID idNewGipMaster = NIL_RTCPUID;
1451 RTCPUSET OnlineCpus;
1452 RTMpGetOnlineSet(&OnlineCpus);
1453
1454 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1455 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1456 {
1457 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1458 if (idCurCpu != idGipMaster)
1459 {
1460 idNewGipMaster = idCurCpu;
1461 break;
1462 }
1463 }
1464
1465 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1466 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1467 NOREF(fIgnored);
1468 }
1469 }
1470}
1471
1472
1473/**
1474 * On CPU initialization callback for RTMpOnAll.
1475 *
1476 * @param idCpu The CPU ID.
1477 * @param pvUser1 The device extension.
1478 * @param pvUser2 The GIP.
1479 */
1480static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1481{
1482 /* This is good enough, even though it will update some of the globals a
1483 bit to much. */
1484 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1485}
1486
1487
1488/**
1489 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1490 *
1491 * @param idCpu Ignored.
1492 * @param pvUser1 Where to put the TSC.
1493 * @param pvUser2 Ignored.
1494 */
1495static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1496{
1497 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1498 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1499}
1500
1501
1502/**
1503 * Determine if Async GIP mode is required because of TSC drift.
1504 *
1505 * When using the default/normal timer code it is essential that the time stamp counter
1506 * (TSC) runs never backwards, that is, a read operation to the counter should return
1507 * a bigger value than any previous read operation. This is guaranteed by the latest
1508 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1509 * case we have to choose the asynchronous timer mode.
1510 *
1511 * @param poffMin Pointer to the determined difference between different
1512 * cores (optional, can be NULL).
1513 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1514 */
1515static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1516{
1517 /*
1518 * Just iterate all the cpus 8 times and make sure that the TSC is
1519 * ever increasing. We don't bother taking TSC rollover into account.
1520 */
1521 int iEndCpu = RTMpGetArraySize();
1522 int iCpu;
1523 int cLoops = 8;
1524 bool fAsync = false;
1525 int rc = VINF_SUCCESS;
1526 uint64_t offMax = 0;
1527 uint64_t offMin = ~(uint64_t)0;
1528 uint64_t PrevTsc = ASMReadTSC();
1529
1530 while (cLoops-- > 0)
1531 {
1532 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1533 {
1534 uint64_t CurTsc;
1535 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1536 &CurTsc, (void *)(uintptr_t)iCpu);
1537 if (RT_SUCCESS(rc))
1538 {
1539 if (CurTsc <= PrevTsc)
1540 {
1541 fAsync = true;
1542 offMin = offMax = PrevTsc - CurTsc;
1543 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1544 iCpu, cLoops, CurTsc, PrevTsc));
1545 break;
1546 }
1547
1548 /* Gather statistics (except the first time). */
1549 if (iCpu != 0 || cLoops != 7)
1550 {
1551 uint64_t off = CurTsc - PrevTsc;
1552 if (off < offMin)
1553 offMin = off;
1554 if (off > offMax)
1555 offMax = off;
1556 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1557 }
1558
1559 /* Next */
1560 PrevTsc = CurTsc;
1561 }
1562 else if (rc == VERR_NOT_SUPPORTED)
1563 break;
1564 else
1565 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1566 }
1567
1568 /* broke out of the loop. */
1569 if (iCpu < iEndCpu)
1570 break;
1571 }
1572
1573 if (poffMin)
1574 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1575 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1576 fAsync, iEndCpu, rc, offMin, offMax));
1577#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1578 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1579#endif
1580 return fAsync;
1581}
1582
1583
1584/**
1585 * supdrvGipInit() worker that determines the GIP TSC mode.
1586 *
1587 * @returns The most suitable TSC mode.
1588 * @param pDevExt Pointer to the device instance data.
1589 */
1590static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1591{
1592 uint64_t u64DiffCoresIgnored;
1593 uint32_t uEAX, uEBX, uECX, uEDX;
1594
1595 /*
1596 * Establish whether the CPU advertises TSC as invariant, we need that in
1597 * a couple of places below.
1598 */
1599 bool fInvariantTsc = false;
1600 if (ASMHasCpuId())
1601 {
1602 uEAX = ASMCpuId_EAX(0x80000000);
1603 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1604 {
1605 uEDX = ASMCpuId_EDX(0x80000007);
1606 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1607 fInvariantTsc = true;
1608 }
1609 }
1610
1611 /*
1612 * On single CPU systems, we don't need to consider ASYNC mode.
1613 */
1614 if (RTMpGetCount() <= 1)
1615 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1616
1617 /*
1618 * Allow the user and/or OS specific bits to force async mode.
1619 */
1620 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1621 return SUPGIPMODE_ASYNC_TSC;
1622
1623 /*
1624 * Use invariant mode if the CPU says TSC is invariant.
1625 */
1626 if (fInvariantTsc)
1627 return SUPGIPMODE_INVARIANT_TSC;
1628
1629 /*
1630 * TSC is not invariant and we're on SMP, this presents two problems:
1631 *
1632 * (1) There might be a skew between the CPU, so that cpu0
1633 * returns a TSC that is slightly different from cpu1.
1634 * This screw may be due to (2), bad TSC initialization
1635 * or slightly different TSC rates.
1636 *
1637 * (2) Power management (and other things) may cause the TSC
1638 * to run at a non-constant speed, and cause the speed
1639 * to be different on the cpus. This will result in (1).
1640 *
1641 * If any of the above is detected, we will have to use ASYNC mode.
1642 */
1643 /* (1). Try check for current differences between the cpus. */
1644 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1645 return SUPGIPMODE_ASYNC_TSC;
1646
1647 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1648 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1649 if ( ASMIsValidStdRange(uEAX)
1650 && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
1651 {
1652 /* Check for APM support. */
1653 uEAX = ASMCpuId_EAX(0x80000000);
1654 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
1655 {
1656 uEDX = ASMCpuId_EDX(0x80000007);
1657 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1658 return SUPGIPMODE_ASYNC_TSC;
1659 }
1660 }
1661
1662 return SUPGIPMODE_SYNC_TSC;
1663}
1664
1665
1666/**
1667 * Initializes per-CPU GIP information.
1668 *
1669 * @param pGip Pointer to the GIP.
1670 * @param pCpu Pointer to which GIP CPU to initalize.
1671 * @param u64NanoTS The current nanosecond timestamp.
1672 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1673 */
1674static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1675{
1676 pCpu->u32TransactionId = 2;
1677 pCpu->u64NanoTS = u64NanoTS;
1678 pCpu->u64TSC = ASMReadTSC();
1679 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1680 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1681
1682 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1683 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID);
1684 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1685 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1686
1687 /*
1688 * The first time we're called, we don't have a CPU frequency handy,
1689 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1690 * called again and at that point we have a more plausible CPU frequency
1691 * value handy. The frequency history will also be adjusted again on
1692 * the 2nd timer callout (maybe we can skip that now?).
1693 */
1694 if (!uCpuHz)
1695 {
1696 pCpu->u64CpuHz = _4G - 1;
1697 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1698 }
1699 else
1700 {
1701 pCpu->u64CpuHz = uCpuHz;
1702 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1703 }
1704 pCpu->au32TSCHistory[0]
1705 = pCpu->au32TSCHistory[1]
1706 = pCpu->au32TSCHistory[2]
1707 = pCpu->au32TSCHistory[3]
1708 = pCpu->au32TSCHistory[4]
1709 = pCpu->au32TSCHistory[5]
1710 = pCpu->au32TSCHistory[6]
1711 = pCpu->au32TSCHistory[7]
1712 = pCpu->u32UpdateIntervalTSC;
1713}
1714
1715
1716/**
1717 * Initializes the GIP data.
1718 *
1719 * @param pDevExt Pointer to the device instance data.
1720 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1721 * @param HCPhys The physical address of the GIP.
1722 * @param u64NanoTS The current nanosecond timestamp.
1723 * @param uUpdateHz The update frequency.
1724 * @param uUpdateIntervalNS The update interval in nanoseconds.
1725 * @param cCpus The CPU count.
1726 */
1727static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1728 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
1729{
1730 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
1731 unsigned i;
1732#ifdef DEBUG_DARWIN_GIP
1733 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1734#else
1735 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1736#endif
1737
1738 /*
1739 * Initialize the structure.
1740 */
1741 memset(pGip, 0, cbGip);
1742
1743 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1744 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1745 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1746 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1747 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1748 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1749 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1750 else
1751 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1752 pGip->cCpus = (uint16_t)cCpus;
1753 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1754 pGip->u32UpdateHz = uUpdateHz;
1755 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1756 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1757 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1758 RTCpuSetEmpty(&pGip->PresentCpuSet);
1759 RTMpGetSet(&pGip->PossibleCpuSet);
1760 pGip->cOnlineCpus = RTMpGetOnlineCount();
1761 pGip->cPresentCpus = RTMpGetPresentCount();
1762 pGip->cPossibleCpus = RTMpGetCount();
1763 pGip->idCpuMax = RTMpGetMaxCpuId();
1764 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
1765 pGip->aiCpuFromApicId[i] = UINT16_MAX;
1766 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
1767 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
1768 for (i = 0; i < cCpus; i++)
1769 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
1770
1771 /*
1772 * Link it to the device extension.
1773 */
1774 pDevExt->pGip = pGip;
1775 pDevExt->HCPhysGip = HCPhys;
1776 pDevExt->cGipUsers = 0;
1777}
1778
1779
1780/**
1781 * Creates the GIP.
1782 *
1783 * @returns VBox status code.
1784 * @param pDevExt Instance data. GIP stuff may be updated.
1785 */
1786int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
1787{
1788 PSUPGLOBALINFOPAGE pGip;
1789 RTHCPHYS HCPhysGip;
1790 uint32_t u32SystemResolution;
1791 uint32_t u32Interval;
1792 uint32_t u32MinInterval;
1793 uint32_t uMod;
1794 unsigned cCpus;
1795 int rc;
1796
1797 LogFlow(("supdrvGipCreate:\n"));
1798
1799 /*
1800 * Assert order.
1801 */
1802 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
1803 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
1804 Assert(!pDevExt->pGipTimer);
1805#ifdef SUPDRV_USE_MUTEX_FOR_GIP
1806 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
1807 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
1808#else
1809 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
1810 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
1811#endif
1812
1813 /*
1814 * Check the CPU count.
1815 */
1816 cCpus = RTMpGetArraySize();
1817 if ( cCpus > RTCPUSET_MAX_CPUS
1818 || cCpus > 256 /* ApicId is used for the mappings */)
1819 {
1820 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, 256));
1821 return VERR_TOO_MANY_CPUS;
1822 }
1823
1824 /*
1825 * Allocate a contiguous set of pages with a default kernel mapping.
1826 */
1827 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, RT_UOFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), false /*fExecutable*/);
1828 if (RT_FAILURE(rc))
1829 {
1830 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
1831 return rc;
1832 }
1833 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
1834 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
1835
1836 /*
1837 * Find a reasonable update interval and initialize the structure.
1838 */
1839 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
1840 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
1841 * See @bugref{6710}. */
1842 u32MinInterval = RT_NS_10MS;
1843 u32SystemResolution = RTTimerGetSystemGranularity();
1844 u32Interval = u32MinInterval;
1845 uMod = u32MinInterval % u32SystemResolution;
1846 if (uMod)
1847 u32Interval += u32SystemResolution - uMod;
1848
1849 supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval, cCpus);
1850
1851 /*
1852 * Important sanity check...
1853 */
1854 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
1855 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
1856 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
1857 {
1858 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
1859 return VERR_INTERNAL_ERROR_2;
1860 }
1861
1862 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
1863 AssertReturn( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
1864 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED, VERR_INTERNAL_ERROR_3);
1865
1866 /*
1867 * Do the TSC frequency measurements.
1868 *
1869 * If we're in invariant TSC mode, just to a quick preliminary measurement
1870 * that the TSC-delta measurement code can use to yield cross calls.
1871 *
1872 * If we're in any of the other two modes, neither which require MP init,
1873 * notifications or deltas for the job, do the full measurement now so
1874 * that supdrvGipInitOnCpu() can populate the TSC interval and history
1875 * array with more reasonable values.
1876 */
1877 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1878 {
1879 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, true /*fRough*/); /* cannot fail */
1880 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt, pGip);
1881 }
1882 else
1883 rc = supdrvGipInitMeasureTscFreq(pDevExt, pGip, false /*fRough*/);
1884 if (RT_SUCCESS(rc))
1885 {
1886 /*
1887 * Start TSC-delta measurement thread before we start getting MP
1888 * events that will try kick it into action (includes the
1889 * RTMpOnAll/supdrvGipInitOnCpu call below).
1890 */
1891 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
1892 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
1893#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1894 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1895 rc = supdrvTscDeltaThreadInit(pDevExt);
1896#endif
1897 if (RT_SUCCESS(rc))
1898 {
1899 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
1900 if (RT_SUCCESS(rc))
1901 {
1902 /*
1903 * Do GIP initialization on all online CPUs. Wake up the
1904 * TSC-delta thread afterwards.
1905 */
1906 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
1907 if (RT_SUCCESS(rc))
1908 {
1909#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1910 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1911#else
1912 uint16_t iCpu;
1913 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1914 {
1915 /*
1916 * Measure the TSC deltas now that we have MP notifications.
1917 */
1918 int cTries = 5;
1919 do
1920 {
1921 rc = supdrvMeasureInitialTscDeltas(pDevExt);
1922 if ( rc != VERR_TRY_AGAIN
1923 && rc != VERR_CPU_OFFLINE)
1924 break;
1925 } while (--cTries > 0);
1926 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1927 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
1928 }
1929 else
1930 {
1931 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
1932 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
1933 }
1934 if (RT_SUCCESS(rc))
1935#endif
1936 {
1937 /*
1938 * Create the timer.
1939 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
1940 */
1941 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
1942 {
1943 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
1944 supdrvGipAsyncTimer, pDevExt);
1945 if (rc == VERR_NOT_SUPPORTED)
1946 {
1947 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
1948 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
1949 }
1950 }
1951 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
1952 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
1953 supdrvGipSyncAndInvariantTimer, pDevExt);
1954 if (RT_SUCCESS(rc))
1955 {
1956 /*
1957 * We're good.
1958 */
1959 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
1960 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
1961
1962 g_pSUPGlobalInfoPage = pGip;
1963 return VINF_SUCCESS;
1964 }
1965
1966 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
1967 Assert(!pDevExt->pGipTimer);
1968 }
1969 }
1970 else
1971 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
1972 }
1973 else
1974 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
1975 }
1976 else
1977 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
1978 }
1979 else
1980 OSDBGPRINT(("supdrvGipCreate: supdrvMeasureInitialTscDeltas failed. rc=%Rrc\n", rc));
1981
1982 /* Releases timer frequency increase too. */
1983 supdrvGipDestroy(pDevExt);
1984 return rc;
1985}
1986
1987
1988/**
1989 * Invalidates the GIP data upon termination.
1990 *
1991 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1992 */
1993static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
1994{
1995 unsigned i;
1996 pGip->u32Magic = 0;
1997 for (i = 0; i < pGip->cCpus; i++)
1998 {
1999 pGip->aCPUs[i].u64NanoTS = 0;
2000 pGip->aCPUs[i].u64TSC = 0;
2001 pGip->aCPUs[i].iTSCHistoryHead = 0;
2002 pGip->aCPUs[i].u64TSCSample = 0;
2003 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2004 }
2005}
2006
2007
2008/**
2009 * Terminates the GIP.
2010 *
2011 * @param pDevExt Instance data. GIP stuff may be updated.
2012 */
2013void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2014{
2015 int rc;
2016#ifdef DEBUG_DARWIN_GIP
2017 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2018 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2019 pDevExt->pGipTimer, pDevExt->GipMemObj));
2020#endif
2021
2022 /*
2023 * Stop receiving MP notifications before tearing anything else down.
2024 */
2025 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2026
2027#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2028 /*
2029 * Terminate the TSC-delta measurement thread and resources.
2030 */
2031 supdrvTscDeltaTerm(pDevExt);
2032#endif
2033
2034 /*
2035 * Destroy the TSC-refinement timer.
2036 */
2037 if (pDevExt->pInvarTscRefineTimer)
2038 {
2039 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2040 pDevExt->pInvarTscRefineTimer = NULL;
2041 }
2042
2043 /*
2044 * Invalid the GIP data.
2045 */
2046 if (pDevExt->pGip)
2047 {
2048 supdrvGipTerm(pDevExt->pGip);
2049 pDevExt->pGip = NULL;
2050 }
2051 g_pSUPGlobalInfoPage = NULL;
2052
2053 /*
2054 * Destroy the timer and free the GIP memory object.
2055 */
2056 if (pDevExt->pGipTimer)
2057 {
2058 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2059 pDevExt->pGipTimer = NULL;
2060 }
2061
2062 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2063 {
2064 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2065 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2066 }
2067
2068 /*
2069 * Finally, make sure we've release the system timer resolution request
2070 * if one actually succeeded and is still pending.
2071 */
2072 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2073}
2074
2075
2076
2077
2078/*
2079 *
2080 *
2081 * GIP Update Timer Related Code
2082 * GIP Update Timer Related Code
2083 * GIP Update Timer Related Code
2084 *
2085 *
2086 */
2087
2088
2089/**
2090 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2091 * updates all the per cpu data except the transaction id.
2092 *
2093 * @param pDevExt The device extension.
2094 * @param pGipCpu Pointer to the per cpu data.
2095 * @param u64NanoTS The current time stamp.
2096 * @param u64TSC The current TSC.
2097 * @param iTick The current timer tick.
2098 *
2099 * @remarks Can be called with interrupts disabled!
2100 */
2101static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2102{
2103 uint64_t u64TSCDelta;
2104 uint32_t u32UpdateIntervalTSC;
2105 uint32_t u32UpdateIntervalTSCSlack;
2106 unsigned iTSCHistoryHead;
2107 bool fUpdateCpuHz;
2108 uint32_t fGipFlags;
2109 uint64_t u64CpuHz;
2110 uint32_t u32TransactionId;
2111
2112 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2113 AssertPtrReturnVoid(pGip);
2114
2115 /* Delta between this and the previous update. */
2116 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2117
2118 /*
2119 * Update the NanoTS.
2120 */
2121 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2122
2123 /*
2124 * Calc TSC delta.
2125 */
2126 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2127 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2128
2129 /* Determine when we need to update the TSC frequency. */
2130 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2131
2132 /*
2133 * Handle GIP test mode toggle.
2134 */
2135 fGipFlags = pGip->fFlags;
2136 if (!(fGipFlags & SUPGIP_FLAGS_TESTING))
2137 { /* likely*/ }
2138 else
2139 {
2140 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2141 {
2142 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2143 {
2144 /* Cache the TSC frequency before forcing updates due to test mode. */
2145 if (!fUpdateCpuHz)
2146 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2147 fGipFlags &= ~SUPGIP_FLAGS_TESTING_START;
2148 ASMAtomicWriteU32(&pGip->fFlags, fGipFlags);
2149 }
2150 fUpdateCpuHz = true;
2151 }
2152 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2153 {
2154 /* Restore the cached TSC frequency if any. */
2155 if (!fUpdateCpuHz)
2156 {
2157 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2158 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2159 }
2160 fGipFlags &= ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING);
2161 ASMAtomicWriteU32(&pGip->fFlags, fGipFlags);
2162 }
2163 }
2164
2165 /*
2166 * We don't need to keep realculating the frequency when it's invariant, so
2167 * the remainder of this function is only for the sync and async TSC modes.
2168 */
2169 if (fUpdateCpuHz)
2170 {
2171 if (u64TSCDelta >> 32)
2172 {
2173 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2174 pGipCpu->cErrors++;
2175 }
2176
2177 /*
2178 * On the 2nd and 3rd callout, reset the history with the current TSC
2179 * interval since the values entered by supdrvGipInit are totally off.
2180 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2181 * better, while the 3rd should be most reliable.
2182 */
2183 /** @todo Could we drop this now that we initializes the history
2184 * with nominal TSC frequency values? */
2185 u32TransactionId = pGipCpu->u32TransactionId;
2186 if (RT_UNLIKELY( ( u32TransactionId == 5
2187 || u32TransactionId == 7)
2188 && ( iTick == 2
2189 || iTick == 3) ))
2190 {
2191 unsigned i;
2192 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2193 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2194 }
2195
2196 /*
2197 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2198 * Wait until we have at least one full history since the above history reset. The
2199 * assumption is that the majority of the previous history values will be tolerable.
2200 * See @bugref{6710} comment #67.
2201 */
2202 /** @todo Could we drop the fuding there now that we initializes the history
2203 * with nominal TSC frequency values? */
2204 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2205 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2206 {
2207 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2208 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2209 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2210 {
2211 uint32_t u32;
2212 u32 = pGipCpu->au32TSCHistory[0];
2213 u32 += pGipCpu->au32TSCHistory[1];
2214 u32 += pGipCpu->au32TSCHistory[2];
2215 u32 += pGipCpu->au32TSCHistory[3];
2216 u32 >>= 2;
2217 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2218 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2219 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2220 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2221 u64TSCDelta >>= 2;
2222 u64TSCDelta += u32;
2223 u64TSCDelta >>= 1;
2224 }
2225 }
2226
2227 /*
2228 * TSC History.
2229 */
2230 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2231 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2232 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2233 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2234
2235 /*
2236 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2237 *
2238 * On Windows, we have an occasional (but recurring) sour value that messed up
2239 * the history but taking only 1 interval reduces the precision overall.
2240 */
2241 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2242 || pGip->u32UpdateHz >= 1000)
2243 {
2244 uint32_t u32;
2245 u32 = pGipCpu->au32TSCHistory[0];
2246 u32 += pGipCpu->au32TSCHistory[1];
2247 u32 += pGipCpu->au32TSCHistory[2];
2248 u32 += pGipCpu->au32TSCHistory[3];
2249 u32 >>= 2;
2250 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2251 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2252 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2253 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2254 u32UpdateIntervalTSC >>= 2;
2255 u32UpdateIntervalTSC += u32;
2256 u32UpdateIntervalTSC >>= 1;
2257
2258 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2259 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2260 }
2261 else if (pGip->u32UpdateHz >= 90)
2262 {
2263 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2264 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2265 u32UpdateIntervalTSC >>= 1;
2266
2267 /* value chosen on a 2GHz thinkpad running windows */
2268 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2269 }
2270 else
2271 {
2272 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2273
2274 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2275 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2276 }
2277 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2278
2279 /*
2280 * CpuHz.
2281 */
2282 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2283 u64CpuHz /= pGip->u32UpdateIntervalNS;
2284 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2285 }
2286}
2287
2288
2289/**
2290 * Updates the GIP.
2291 *
2292 * @param pDevExt The device extension.
2293 * @param u64NanoTS The current nanosecond timesamp.
2294 * @param u64TSC The current TSC timesamp.
2295 * @param idCpu The CPU ID.
2296 * @param iTick The current timer tick.
2297 *
2298 * @remarks Can be called with interrupts disabled!
2299 */
2300static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2301{
2302 /*
2303 * Determine the relevant CPU data.
2304 */
2305 PSUPGIPCPU pGipCpu;
2306 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2307 AssertPtrReturnVoid(pGip);
2308
2309 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2310 pGipCpu = &pGip->aCPUs[0];
2311 else
2312 {
2313 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
2314 if (RT_UNLIKELY(iCpu >= pGip->cCpus))
2315 return;
2316 pGipCpu = &pGip->aCPUs[iCpu];
2317 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
2318 return;
2319 }
2320
2321 /*
2322 * Start update transaction.
2323 */
2324 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2325 {
2326 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2327 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2328 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2329 pGipCpu->cErrors++;
2330 return;
2331 }
2332
2333 /*
2334 * Recalc the update frequency every 0x800th time.
2335 */
2336 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */
2337 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2338 {
2339 if (pGip->u64NanoTSLastUpdateHz)
2340 {
2341#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2342 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2343 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2344 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2345 {
2346 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2347 * calculation on non-invariant hosts if it changes the history decision
2348 * taken in supdrvGipDoUpdateCpu(). */
2349 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2350 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2351 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2352 }
2353#endif
2354 }
2355 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2356 }
2357
2358 /*
2359 * Update the data.
2360 */
2361 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2362
2363 /*
2364 * Complete transaction.
2365 */
2366 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2367}
2368
2369
2370/**
2371 * Updates the per cpu GIP data for the calling cpu.
2372 *
2373 * @param pDevExt The device extension.
2374 * @param u64NanoTS The current nanosecond timesamp.
2375 * @param u64TSC The current TSC timesamp.
2376 * @param idCpu The CPU ID.
2377 * @param idApic The APIC id for the CPU index.
2378 * @param iTick The current timer tick.
2379 *
2380 * @remarks Can be called with interrupts disabled!
2381 */
2382static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2383 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2384{
2385 uint32_t iCpu;
2386 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2387
2388 /*
2389 * Avoid a potential race when a CPU online notification doesn't fire on
2390 * the onlined CPU but the tick creeps in before the event notification is
2391 * run.
2392 */
2393 if (RT_UNLIKELY(iTick == 1))
2394 {
2395 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2396 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2397 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2398 }
2399
2400 iCpu = pGip->aiCpuFromApicId[idApic];
2401 if (RT_LIKELY(iCpu < pGip->cCpus))
2402 {
2403 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2404 if (pGipCpu->idCpu == idCpu)
2405 {
2406 /*
2407 * Start update transaction.
2408 */
2409 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2410 {
2411 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2412 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2413 pGipCpu->cErrors++;
2414 return;
2415 }
2416
2417 /*
2418 * Update the data.
2419 */
2420 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2421
2422 /*
2423 * Complete transaction.
2424 */
2425 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2426 }
2427 }
2428}
2429
2430
2431/**
2432 * Timer callback function for the sync and invariant GIP modes.
2433 *
2434 * @param pTimer The timer.
2435 * @param pvUser Opaque pointer to the device extension.
2436 * @param iTick The timer tick.
2437 */
2438static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2439{
2440 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2441 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2442 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2443 uint64_t u64TSC = ASMReadTSC();
2444 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2445
2446 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2447 {
2448 /*
2449 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2450 * missing timer ticks is not an option for GIP because the GIP users
2451 * will end up incrementing the time in 1ns per time getter call until
2452 * there is a complete timer update. So, if the delta has yet to be
2453 * calculated, we just pretend it is zero for now (the GIP users
2454 * probably won't have it for a wee while either and will do the same).
2455 *
2456 * We could maybe on some platforms try cross calling a CPU with a
2457 * working delta here, but it's not worth the hassle since the
2458 * likelyhood of this happening is really low. On Windows, Linux, and
2459 * Solaris timers fire on the CPU they were registered/started on.
2460 * Darwin timers doesn't necessarily (they are high priority threads).
2461 */
2462 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2463 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2464 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2465 Assert(!ASMIntAreEnabled());
2466 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2467 {
2468 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2469 if (iTscDelta != INT64_MAX)
2470 u64TSC -= iTscDelta;
2471 }
2472 }
2473
2474 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2475
2476 ASMSetFlags(fEFlags);
2477}
2478
2479
2480/**
2481 * Timer callback function for async GIP mode.
2482 * @param pTimer The timer.
2483 * @param pvUser Opaque pointer to the device extension.
2484 * @param iTick The timer tick.
2485 */
2486static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2487{
2488 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2489 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2490 RTCPUID idCpu = RTMpCpuId();
2491 uint64_t u64TSC = ASMReadTSC();
2492 uint64_t NanoTS = RTTimeSystemNanoTS();
2493
2494 /** @todo reset the transaction number and whatnot when iTick == 1. */
2495 if (pDevExt->idGipMaster == idCpu)
2496 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2497 else
2498 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, ASMGetApicId(), iTick);
2499
2500 ASMSetFlags(fEFlags);
2501}
2502
2503
2504
2505
2506/*
2507 *
2508 *
2509 * TSC Delta Measurements And Related Code
2510 * TSC Delta Measurements And Related Code
2511 * TSC Delta Measurements And Related Code
2512 *
2513 *
2514 */
2515
2516
2517/*
2518 * Select TSC delta measurement algorithm.
2519 */
2520#if 0
2521# define GIP_TSC_DELTA_METHOD_1
2522#else
2523# define GIP_TSC_DELTA_METHOD_2
2524#endif
2525
2526/** For padding variables to keep them away from other cache lines. Better too
2527 * large than too small!
2528 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2529 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2530 * III had 32 bytes cache lines. */
2531#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2532
2533
2534/**
2535 * TSC delta measurment algorithm \#2 result entry.
2536 */
2537typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2538{
2539 uint32_t iSeqMine;
2540 uint32_t iSeqOther;
2541 uint64_t uTsc;
2542} SUPDRVTSCDELTAMETHOD2ENTRY;
2543
2544/**
2545 * TSC delta measurment algorithm \#2 Data.
2546 */
2547typedef struct SUPDRVTSCDELTAMETHOD2
2548{
2549 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2550 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2551 /** The current sequence number of this worker. */
2552 uint32_t volatile iCurSeqNo;
2553 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2554 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2555 /** Result table. */
2556 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2557} SUPDRVTSCDELTAMETHOD2;
2558/** Pointer to the data for TSC delta mesurment algorithm \#2 .*/
2559typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2560
2561
2562/**
2563 * The TSC delta synchronization struct, version 2.
2564 *
2565 * The syncrhonization variable is completely isolated in its own cache line
2566 * (provided our max cache line size estimate is correct).
2567 */
2568typedef struct SUPTSCDELTASYNC2
2569{
2570 /** Padding to make sure the uVar1 is in its own cache line. */
2571 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2572
2573 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2574 volatile uint32_t uSyncVar;
2575 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2576 volatile uint32_t uSyncSeq;
2577
2578 /** Padding to make sure the uVar1 is in its own cache line. */
2579 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2580
2581 /** Start RDTSC value. Put here mainly to save stack space. */
2582 uint64_t uTscStart;
2583 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2584 uint64_t cMaxTscTicks;
2585} SUPTSCDELTASYNC2;
2586AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2587typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2588
2589/** Prestart wait. */
2590#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2591/** Prestart aborted. */
2592#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2593/** Ready (on your mark). */
2594#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2595/** Steady (get set). */
2596#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2597/** Go! */
2598#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2599/** Used by the verfication test. */
2600#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2601
2602/** We reached the time limit. */
2603#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2604/** The other party won't touch the sync struct ever again. */
2605#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2606
2607
2608/**
2609 * Argument package/state passed by supdrvMeasureTscDeltaOne() to the RTMpOn
2610 * callback worker.
2611 * @todo add
2612 */
2613typedef struct SUPDRVGIPTSCDELTARGS
2614{
2615 /** The device extension. */
2616 PSUPDRVDEVEXT pDevExt;
2617 /** Pointer to the GIP CPU array entry for the worker. */
2618 PSUPGIPCPU pWorker;
2619 /** Pointer to the GIP CPU array entry for the master. */
2620 PSUPGIPCPU pMaster;
2621 /** The maximum number of ticks to spend in supdrvMeasureTscDeltaCallback.
2622 * (This is what we need a rough TSC frequency for.) */
2623 uint64_t cMaxTscTicks;
2624 /** Used to abort synchronization setup. */
2625 bool volatile fAbortSetup;
2626
2627 /** Padding to make sure the master variables live in its own cache lines. */
2628 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2629
2630 /** @name Master
2631 * @{ */
2632 /** The time the master spent in the MP worker. */
2633 uint64_t cElapsedMasterTscTicks;
2634 /** The iTry value when stopped at. */
2635 uint32_t iTry;
2636 /** Set if the run timed out. */
2637 bool volatile fTimedOut;
2638 /** Pointer to the master's synchronization struct (on stack). */
2639 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2640 /** Master data union. */
2641 union
2642 {
2643 /** Data (master) for delta verification. */
2644 struct
2645 {
2646 /** Verification test TSC values for the master. */
2647 uint64_t volatile auTscs[32];
2648 } Verify;
2649 /** Data (master) for measurement method \#2. */
2650 struct
2651 {
2652 /** Data and sequence number. */
2653 SUPDRVTSCDELTAMETHOD2 Data;
2654 /** The lag setting for the next run. */
2655 bool fLag;
2656 /** Number of hits. */
2657 uint32_t cHits;
2658 } M2;
2659 } uMaster;
2660 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2661 * VERR_TRY_AGAIN on timeout. */
2662 int32_t rcVerify;
2663#ifdef TSCDELTA_VERIFY_WITH_STATS
2664 /** The maximum difference between TSC read during delta verification. */
2665 int64_t cMaxVerifyTscTicks;
2666 /** The minimum difference between two TSC reads during verification. */
2667 int64_t cMinVerifyTscTicks;
2668 /** The bad TSC diff, worker relative to master (= worker - master).
2669 * Negative value means the worker is behind the master. */
2670 int64_t iVerifyBadTscDiff;
2671#endif
2672 /** @} */
2673
2674 /** Padding to make sure the worker variables live is in its own cache line. */
2675 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2676
2677 /** @name Proletarian
2678 * @{ */
2679 /** Pointer to the worker's synchronization struct (on stack). */
2680 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2681 /** The time the worker spent in the MP worker. */
2682 uint64_t cElapsedWorkerTscTicks;
2683 /** Worker data union. */
2684 union
2685 {
2686 /** Data (worker) for delta verification. */
2687 struct
2688 {
2689 /** Verification test TSC values for the worker. */
2690 uint64_t volatile auTscs[32];
2691 } Verify;
2692 /** Data (worker) for measurement method \#2. */
2693 struct
2694 {
2695 /** Data and sequence number. */
2696 SUPDRVTSCDELTAMETHOD2 Data;
2697 /** The lag setting for the next run (set by master). */
2698 bool fLag;
2699 } M2;
2700 } uWorker;
2701 /** @} */
2702
2703 /** Padding to make sure the above is in its own cache line. */
2704 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2705} SUPDRVGIPTSCDELTARGS;
2706typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2707
2708
2709/** @name Macros that implements the basic synchronization steps common to
2710 * the algorithms.
2711 *
2712 * Must be used from loop as the timeouts are implemented via 'break' statements
2713 * at the moment.
2714 *
2715 * @{
2716 */
2717#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2718# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2719# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2720# define TSCDELTA_DBG_CHECK_LOOP() \
2721 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2722#else
2723# define TSCDELTA_DBG_VARS() ((void)0)
2724# define TSCDELTA_DBG_START_LOOP() ((void)0)
2725# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2726#endif
2727#if 0
2728# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2729#else
2730# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2731#endif
2732#if 0
2733# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
2734#else
2735# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
2736#endif
2737#if 0
2738# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
2739#else
2740# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
2741#endif
2742
2743
2744static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2745 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
2746{
2747 uint32_t iMySeq = fIsMaster ? 0 : 256;
2748 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
2749 uint32_t u32Tmp;
2750 uint32_t iSync2Loops = 0;
2751 RTCCUINTREG fEFlags;
2752 TSCDELTA_DBG_VARS();
2753
2754 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
2755
2756 /*
2757 * The master tells the worker to get on it's mark.
2758 */
2759 if (fIsMaster)
2760 {
2761 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2762 { /* likely*/ }
2763 else
2764 {
2765 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2766 return false;
2767 }
2768 }
2769
2770 /*
2771 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
2772 */
2773 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
2774 for (;;)
2775 {
2776 fEFlags = ASMIntDisableFlags();
2777 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2778 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
2779 break;
2780 ASMSetFlags(fEFlags);
2781 ASMNopPause();
2782
2783 /* Abort? */
2784 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
2785 {
2786 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2787 return false;
2788 }
2789
2790 /* Check for timeouts every so often (not every loop in case RDTSC is
2791 trapping or something). Must check the first time around. */
2792#if 0 /* For debugging the timeout paths. */
2793 static uint32_t volatile xxx;
2794#endif
2795 if ( ( (iSync2Loops & 0x3ff) == 0
2796 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
2797#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
2798 || (!fIsMaster && (++xxx & 0xf) == 0)
2799#endif
2800 )
2801 {
2802 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
2803 ignore the timeout if we've got the go ahead already (simpler). */
2804 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
2805 {
2806 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
2807 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
2808 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
2809 return false;
2810 }
2811 }
2812 iSync2Loops++;
2813 }
2814
2815 /*
2816 * Interrupts are now disabled and will remain disabled until we do
2817 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
2818 */
2819 *pfEFlags = fEFlags;
2820
2821 /*
2822 * The worker tells the master that it is on its mark and that the master
2823 * need to get into position as well.
2824 */
2825 if (!fIsMaster)
2826 {
2827 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
2828 { /* likely */ }
2829 else
2830 {
2831 ASMSetFlags(fEFlags);
2832 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2833 return false;
2834 }
2835 }
2836
2837 /*
2838 * The master sends the 'go' to the worker and wait for ACK.
2839 */
2840 if (fIsMaster)
2841 {
2842 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2843 { /* likely */ }
2844 else
2845 {
2846 ASMSetFlags(fEFlags);
2847 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2848 return false;
2849 }
2850 }
2851
2852 /*
2853 * Wait for the 'go' signal (ack in the master case).
2854 */
2855 TSCDELTA_DBG_START_LOOP();
2856 for (;;)
2857 {
2858 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2859 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
2860 break;
2861 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
2862 { /* likely */ }
2863 else
2864 {
2865 ASMSetFlags(fEFlags);
2866 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
2867 return false;
2868 }
2869
2870 TSCDELTA_DBG_CHECK_LOOP();
2871 ASMNopPause();
2872 }
2873
2874 /*
2875 * The worker acks the 'go' (shouldn't fail).
2876 */
2877 if (!fIsMaster)
2878 {
2879 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
2880 { /* likely */ }
2881 else
2882 {
2883 ASMSetFlags(fEFlags);
2884 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
2885 return false;
2886 }
2887 }
2888
2889 /*
2890 * Try enter mostly lockstep execution with it.
2891 */
2892 for (;;)
2893 {
2894 uint32_t iOtherSeq1, iOtherSeq2;
2895 ASMCompilerBarrier();
2896 ASMSerializeInstruction();
2897
2898 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
2899 ASMNopPause();
2900 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
2901 ASMNopPause();
2902 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
2903
2904 ASMCompilerBarrier();
2905 if (iOtherSeq1 == iOtherSeq2)
2906 return true;
2907
2908 /* Did the other guy give up? Should we give up? */
2909 if ( iOtherSeq1 == UINT32_MAX
2910 || iOtherSeq2 == UINT32_MAX)
2911 return true;
2912 if (++iMySeq >= iMaxSeq)
2913 {
2914 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
2915 return true;
2916 }
2917 ASMNopPause();
2918 }
2919}
2920
2921#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2922 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2923 { /*likely*/ } \
2924 else if (true) \
2925 { \
2926 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
2927 break; \
2928 } else do {} while (0)
2929#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
2930 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
2931 { /*likely*/ } \
2932 else if (true) \
2933 { \
2934 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
2935 break; \
2936 } else do {} while (0)
2937
2938
2939static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
2940 bool fIsMaster, RTCCUINTREG fEFlags)
2941{
2942 TSCDELTA_DBG_VARS();
2943
2944 /*
2945 * Wait for the 'ready' signal. In the master's case, this means the
2946 * worker has completed its data collection, while in the worker's case it
2947 * means the master is done processing the data and it's time for the next
2948 * loop iteration (or whatever).
2949 */
2950 ASMSetFlags(fEFlags);
2951 TSCDELTA_DBG_START_LOOP();
2952 for (;;)
2953 {
2954 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
2955 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
2956 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
2957 return true;
2958 ASMNopPause();
2959 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
2960 { /* likely */}
2961 else
2962 {
2963 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
2964 return false; /* shouldn't ever happen! */
2965 }
2966 TSCDELTA_DBG_CHECK_LOOP();
2967 ASMNopPause();
2968 }
2969}
2970
2971#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2972 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
2973 { /* likely */ } \
2974 else if (true) \
2975 { \
2976 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
2977 break; \
2978 } else do {} while (0)
2979
2980#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
2981 /* \
2982 * Tell the worker that we're done processing the data and ready for the next round. \
2983 */ \
2984 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2985 { /* likely */ } \
2986 else if (true)\
2987 { \
2988 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
2989 break; \
2990 } else do {} while (0)
2991
2992#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
2993 if (true) { \
2994 /* \
2995 * Tell the master that we're done collecting data and wait for the next round to start. \
2996 */ \
2997 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
2998 { /* likely */ } \
2999 else \
3000 { \
3001 ASMSetFlags(a_fEFlags); \
3002 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3003 break; \
3004 } \
3005 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3006 { /* likely */ } \
3007 else \
3008 { \
3009 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3010 break; \
3011 } \
3012 } else do {} while (0)
3013/** @} */
3014
3015
3016#ifdef GIP_TSC_DELTA_METHOD_1
3017/**
3018 * TSC delta measurment algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3019 *
3020 *
3021 * We ignore the first few runs of the loop in order to prime the
3022 * cache. Also, we need to be careful about using 'pause' instruction
3023 * in critical busy-wait loops in this code - it can cause undesired
3024 * behaviour with hyperthreading.
3025 *
3026 * We try to minimize the measurement error by computing the minimum
3027 * read time of the compare statement in the worker by taking TSC
3028 * measurements across it.
3029 *
3030 * It must be noted that the computed minimum read time is mostly to
3031 * eliminate huge deltas when the worker is too early and doesn't by
3032 * itself help produce more accurate deltas. We allow two times the
3033 * computed minimum as an arbitrary acceptable threshold. Therefore,
3034 * it is still possible to get negative deltas where there are none
3035 * when the worker is earlier. As long as these occasional negative
3036 * deltas are lower than the time it takes to exit guest-context and
3037 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3038 * that jumped backwards. It is due to the existence of the negative
3039 * deltas that we don't recompute the delta with the master and
3040 * worker interchanged to eliminate the remaining measurement error.
3041 *
3042 *
3043 * @param pArgs The argument/state data.
3044 * @param pMySync My synchronization structure.
3045 * @param pOtherSync My partner's synchronization structure.
3046 * @param fIsMaster Set if master, clear if worker.
3047 * @param iTry The attempt number.
3048 */
3049static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3050 bool fIsMaster, uint32_t iTry)
3051{
3052 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3053 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3054 uint64_t uMinCmpReadTime = UINT64_MAX;
3055 unsigned iLoop;
3056 NOREF(iTry);
3057
3058 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3059 {
3060 RTCCUINTREG fEFlags;
3061 if (fIsMaster)
3062 {
3063 /*
3064 * The master.
3065 */
3066 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3067 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3068 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3069 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3070
3071 do
3072 {
3073 ASMSerializeInstruction();
3074 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3075 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3076
3077 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3078
3079 /* Process the data. */
3080 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3081 {
3082 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3083 {
3084 int64_t iDelta = pGipCpuWorker->u64TSCSample
3085 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3086 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3087 ? iDelta < pGipCpuWorker->i64TSCDelta
3088 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3089 pGipCpuWorker->i64TSCDelta = iDelta;
3090 }
3091 }
3092
3093 /* Reset our TSC sample and tell the worker to move on. */
3094 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3095 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3096 }
3097 else
3098 {
3099 /*
3100 * The worker.
3101 */
3102 uint64_t uTscWorker;
3103 uint64_t uTscWorkerFlushed;
3104 uint64_t uCmpReadTime;
3105
3106 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3107 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3108
3109 /*
3110 * Keep reading the TSC until we notice that the master has read his. Reading
3111 * the TSC -after- the master has updated the memory is way too late. We thus
3112 * compensate by trying to measure how long it took for the worker to notice
3113 * the memory flushed from the master.
3114 */
3115 do
3116 {
3117 ASMSerializeInstruction();
3118 uTscWorker = ASMReadTSC();
3119 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3120 ASMSerializeInstruction();
3121 uTscWorkerFlushed = ASMReadTSC();
3122
3123 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3124 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3125 {
3126 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3127 if (uCmpReadTime < (uMinCmpReadTime << 1))
3128 {
3129 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3130 if (uCmpReadTime < uMinCmpReadTime)
3131 uMinCmpReadTime = uCmpReadTime;
3132 }
3133 else
3134 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3135 }
3136 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3137 {
3138 if (uCmpReadTime < uMinCmpReadTime)
3139 uMinCmpReadTime = uCmpReadTime;
3140 }
3141
3142 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3143 }
3144 }
3145
3146 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3147 pMySync->uSyncVar));
3148
3149 /*
3150 * We must reset the worker TSC sample value in case it gets picked as a
3151 * GIP master later on (it's trashed above, naturally).
3152 */
3153 if (!fIsMaster)
3154 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3155}
3156#endif /* GIP_TSC_DELTA_METHOD_1 */
3157
3158
3159#ifdef GIP_TSC_DELTA_METHOD_2
3160/*
3161 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3162 */
3163
3164# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3165# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3166
3167
3168static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, uint32_t iLoop)
3169{
3170 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3171 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3172 uint32_t idxResult;
3173 uint32_t cHits = 0;
3174
3175 /*
3176 * Look for matching entries in the master and worker tables.
3177 */
3178 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3179 {
3180 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3181 if (idxOther & 1)
3182 {
3183 idxOther >>= 1;
3184 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3185 {
3186 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3187 {
3188 int64_t iDelta;
3189 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3190 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3191 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3192 ? iDelta < iBestDelta
3193 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3194 iBestDelta = iDelta;
3195 cHits++;
3196 }
3197 }
3198 }
3199 }
3200
3201 /*
3202 * Save the results.
3203 */
3204 if (cHits > 2)
3205 pArgs->pWorker->i64TSCDelta = iBestDelta;
3206 pArgs->uMaster.M2.cHits += cHits;
3207}
3208
3209
3210/**
3211 * The core function of the 2nd TSC delta mesurment algorithm.
3212 *
3213 * The idea here is that we have the two CPUs execute the exact same code
3214 * collecting a largish set of TSC samples. The code has one data dependency on
3215 * the other CPU which intention it is to synchronize the execution as well as
3216 * help cross references the two sets of TSC samples (the sequence numbers).
3217 *
3218 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3219 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3220 * it will help with making the CPUs enter lock step execution occationally.
3221 *
3222 */
3223static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3224{
3225 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3226 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3227
3228 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3229 ASMSerializeInstruction();
3230 while (cLeft-- > 0)
3231 {
3232 uint64_t uTsc;
3233 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3234 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3235 ASMCompilerBarrier();
3236 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3237 uTsc = ASMReadTSC();
3238 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3239 ASMCompilerBarrier();
3240 ASMSerializeInstruction();
3241 pEntry->iSeqMine = iSeqMine;
3242 pEntry->iSeqOther = iSeqOther;
3243 pEntry->uTsc = uTsc;
3244 pEntry++;
3245 ASMSerializeInstruction();
3246 if (fLag)
3247 ASMNopPause();
3248 }
3249}
3250
3251
3252/**
3253 * TSC delta measurment algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3254 *
3255 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3256 *
3257 * @param pArgs The argument/state data.
3258 * @param pMySync My synchronization structure.
3259 * @param pOtherSync My partner's synchronization structure.
3260 * @param fIsMaster Set if master, clear if worker.
3261 * @param iTry The attempt number.
3262 */
3263static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3264 bool fIsMaster, uint32_t iTry)
3265{
3266 unsigned iLoop;
3267
3268 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3269 {
3270 RTCCUINTREG fEFlags;
3271 if (fIsMaster)
3272 {
3273 /*
3274 * Adjust the loop lag fudge.
3275 */
3276# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3277 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3278 {
3279 /* Lag during the priming to be nice to everyone.. */
3280 pArgs->uMaster.M2.fLag = true;
3281 pArgs->uWorker.M2.fLag = true;
3282 }
3283 else
3284# endif
3285 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3286 {
3287 /* 25 % of the body without lagging. */
3288 pArgs->uMaster.M2.fLag = false;
3289 pArgs->uWorker.M2.fLag = false;
3290 }
3291 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3292 {
3293 /* 25 % of the body with both lagging. */
3294 pArgs->uMaster.M2.fLag = true;
3295 pArgs->uWorker.M2.fLag = true;
3296 }
3297 else
3298 {
3299 /* 50% of the body with alternating lag. */
3300 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3301 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3302 }
3303
3304 /*
3305 * Sync up with the worker and collect data.
3306 */
3307 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3308 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3309 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3310
3311 /*
3312 * Process the data.
3313 */
3314# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3315 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3316# endif
3317 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, iLoop);
3318
3319 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3320 }
3321 else
3322 {
3323 /*
3324 * The worker.
3325 */
3326 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3327 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3328 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3329 }
3330 }
3331}
3332
3333#endif /* GIP_TSC_DELTA_METHOD_2 */
3334
3335
3336
3337static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3338 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3339{
3340 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3341 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3342 uint32_t i;
3343 TSCDELTA_DBG_VARS();
3344
3345 for (;;)
3346 {
3347 RTCCUINTREG fEFlags;
3348 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3349 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3350
3351 if (fIsMaster)
3352 {
3353 uint64_t uTscWorker;
3354 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3355
3356 /*
3357 * Collect TSC, master goes first.
3358 */
3359 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3360 {
3361 /* Read, kick & wait #1. */
3362 uint64_t register uTsc = ASMReadTSC();
3363 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3364 ASMSerializeInstruction();
3365 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3366 TSCDELTA_DBG_START_LOOP();
3367 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3368 {
3369 TSCDELTA_DBG_CHECK_LOOP();
3370 ASMNopPause();
3371 }
3372
3373 /* Read, kick & wait #2. */
3374 uTsc = ASMReadTSC();
3375 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3376 ASMSerializeInstruction();
3377 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3378 TSCDELTA_DBG_START_LOOP();
3379 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3380 {
3381 TSCDELTA_DBG_CHECK_LOOP();
3382 ASMNopPause();
3383 }
3384 }
3385
3386 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3387
3388 /*
3389 * Process the data.
3390 */
3391#ifdef TSCDELTA_VERIFY_WITH_STATS
3392 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3393 pArgs->cMinVerifyTscTicks = INT64_MAX;
3394 pArgs->iVerifyBadTscDiff = 0;
3395#endif
3396 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3397 uTscWorker = 0;
3398 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3399 {
3400 /* Master vs previous worker entry. */
3401 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3402 int64_t iDiff;
3403 if (i > 0)
3404 {
3405 iDiff = uTscMaster - uTscWorker;
3406#ifdef TSCDELTA_VERIFY_WITH_STATS
3407 if (iDiff > pArgs->cMaxVerifyTscTicks)
3408 pArgs->cMaxVerifyTscTicks = iDiff;
3409 if (iDiff < pArgs->cMinVerifyTscTicks)
3410 pArgs->cMinVerifyTscTicks = iDiff;
3411#endif
3412 if (iDiff < 0)
3413 {
3414#ifdef TSCDELTA_VERIFY_WITH_STATS
3415 pArgs->iVerifyBadTscDiff = -iDiff;
3416#endif
3417 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3418 break;
3419 }
3420 }
3421
3422 /* Worker vs master. */
3423 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3424 iDiff = uTscWorker - uTscMaster;
3425#ifdef TSCDELTA_VERIFY_WITH_STATS
3426 if (iDiff > pArgs->cMaxVerifyTscTicks)
3427 pArgs->cMaxVerifyTscTicks = iDiff;
3428 if (iDiff < pArgs->cMinVerifyTscTicks)
3429 pArgs->cMinVerifyTscTicks = iDiff;
3430#endif
3431 if (iDiff < 0)
3432 {
3433#ifdef TSCDELTA_VERIFY_WITH_STATS
3434 pArgs->iVerifyBadTscDiff = iDiff;
3435#endif
3436 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3437 break;
3438 }
3439 }
3440
3441 /* Done. */
3442 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3443 }
3444 else
3445 {
3446 /*
3447 * The worker, master leads.
3448 */
3449 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3450
3451 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3452 {
3453 uint64_t register uTsc;
3454
3455 /* Wait, Read and Kick #1. */
3456 TSCDELTA_DBG_START_LOOP();
3457 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3458 {
3459 TSCDELTA_DBG_CHECK_LOOP();
3460 ASMNopPause();
3461 }
3462 uTsc = ASMReadTSC();
3463 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3464 ASMSerializeInstruction();
3465 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3466
3467 /* Wait, Read and Kick #2. */
3468 TSCDELTA_DBG_START_LOOP();
3469 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3470 {
3471 TSCDELTA_DBG_CHECK_LOOP();
3472 ASMNopPause();
3473 }
3474 uTsc = ASMReadTSC();
3475 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3476 ASMSerializeInstruction();
3477 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3478 }
3479
3480 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3481 }
3482 return pArgs->rcVerify;
3483 }
3484
3485 /*
3486 * Timed out, please retry.
3487 */
3488 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3489 return VERR_TIMEOUT;
3490}
3491
3492
3493
3494/**
3495 * Handles the special abort procedure during synchronization setup in
3496 * supdrvMeasureTscDeltaCallbackUnwrapped().
3497 *
3498 * @returns 0 (dummy, ignored)
3499 * @param pArgs Pointer to argument/state data.
3500 * @param pMySync Pointer to my sync structure.
3501 * @param fIsMaster Set if we're the master, clear if worker.
3502 * @param fTimeout Set if it's a timeout.
3503 */
3504DECL_NO_INLINE(static, int)
3505supdrvMeasureTscDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3506{
3507 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3508 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3509 TSCDELTA_DBG_VARS();
3510
3511 /*
3512 * Clear our sync pointer and make sure the abort flag is set.
3513 */
3514 ASMAtomicWriteNullPtr(ppMySync);
3515 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3516 if (fTimeout)
3517 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3518
3519 /*
3520 * Make sure the other party is out of there and won't be touching our
3521 * sync state again (would cause stack corruption).
3522 */
3523 TSCDELTA_DBG_START_LOOP();
3524 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3525 {
3526 ASMNopPause();
3527 ASMNopPause();
3528 ASMNopPause();
3529 TSCDELTA_DBG_CHECK_LOOP();
3530 }
3531
3532 return 0;
3533}
3534
3535
3536/**
3537 * This is used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3538 * and compute the delta between them.
3539 *
3540 * To reduce code size a good when timeout handling was added, a dummy return
3541 * value had to be added (saves 1-3 lines per timeout case), thus this
3542 * 'Unwrapped' function and the dummy 0 return value.
3543 *
3544 * @returns 0 (dummy, ignored)
3545 * @param idCpu The CPU we are current scheduled on.
3546 * @param pArgs Pointer to a parameter package.
3547 *
3548 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3549 * read the TSC at exactly the same time on both the master and the
3550 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3551 * contention, SMI, pipelining etc. there is no guaranteed way of
3552 * doing this on x86 CPUs.
3553 */
3554static int supdrvMeasureTscDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3555{
3556 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3557 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3558 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3559 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3560 uint32_t iTry;
3561 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3562 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3563 SUPTSCDELTASYNC2 MySync;
3564 PSUPTSCDELTASYNC2 pOtherSync;
3565 int rc;
3566 TSCDELTA_DBG_VARS();
3567
3568 /* A bit of paranoia first. */
3569 if (!pGipCpuMaster || !pGipCpuWorker)
3570 return 0;
3571
3572 /*
3573 * If the CPU isn't part of the measurement, return immediately.
3574 */
3575 if ( !fIsMaster
3576 && idCpu != pGipCpuWorker->idCpu)
3577 return 0;
3578
3579 /*
3580 * Set up my synchronization stuff and wait for the other party to show up.
3581 *
3582 * We don't wait forever since the other party may be off fishing (offline,
3583 * spinning with ints disables, whatever), we must play nice to the rest of
3584 * the system as this context generally isn't one in which we will get
3585 * preempted and we may hold up a number of lower priority interrupts.
3586 */
3587 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3588 ASMAtomicWritePtr(ppMySync, &MySync);
3589 MySync.uTscStart = ASMReadTSC();
3590 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3591
3592 /* Look for the partner, might not be here yet... Special abort considerations. */
3593 iTry = 0;
3594 TSCDELTA_DBG_START_LOOP();
3595 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3596 {
3597 ASMNopPause();
3598 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3599 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu) )
3600 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3601 if ( (iTry++ & 0xff) == 0
3602 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3603 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3604 TSCDELTA_DBG_CHECK_LOOP();
3605 ASMNopPause();
3606 }
3607
3608 /* I found my partner, waiting to be found... Special abort considerations. */
3609 if (fIsMaster)
3610 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3611 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3612
3613 iTry = 0;
3614 TSCDELTA_DBG_START_LOOP();
3615 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3616 {
3617 ASMNopPause();
3618 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3619 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3620 if ( (iTry++ & 0xff) == 0
3621 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3622 {
3623 if ( fIsMaster
3624 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3625 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3626 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3627 }
3628 TSCDELTA_DBG_CHECK_LOOP();
3629 }
3630
3631 if (!fIsMaster)
3632 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3633 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3634
3635/** @todo Add a resumable state to pArgs so we don't waste time if we time
3636 * out or something. Timeouts are legit, any of the two CPUs may get
3637 * interrupted. */
3638
3639 /*
3640 * Start by seeing if we have a zero delta between the two CPUs.
3641 * This should normally be the case.
3642 */
3643 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3644 if (RT_SUCCESS(rc))
3645 {
3646 if (fIsMaster)
3647 {
3648 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3649 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3650 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3651 }
3652 }
3653 /*
3654 * If the verification didn't time out, do regular delta measurements.
3655 * We retry this until we get a reasonable value.
3656 */
3657 else if (rc != VERR_TIMEOUT)
3658 {
3659 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3660 for (iTry = 0; iTry < 12; iTry++)
3661 {
3662 /*
3663 * Check the state before we start.
3664 */
3665 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3666 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3667 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3668 {
3669 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3670 break;
3671 }
3672
3673 /*
3674 * Do the measurements.
3675 */
3676#ifdef GIP_TSC_DELTA_METHOD_1
3677 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3678#elif defined(GIP_TSC_DELTA_METHOD_2)
3679 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3680#else
3681# error "huh??"
3682#endif
3683
3684 /*
3685 * Check the state.
3686 */
3687 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3688 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3689 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3690 {
3691 if (fIsMaster)
3692 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3693 else
3694 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3695 break;
3696 }
3697
3698 /*
3699 * Success? If so, stop trying. Master decides.
3700 */
3701 if (fIsMaster)
3702 {
3703 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3704 {
3705 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3706 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3707 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3708 break;
3709 }
3710 }
3711 }
3712 if (fIsMaster)
3713 pArgs->iTry = iTry;
3714 }
3715
3716 /*
3717 * End the synchroniziation dance. We tell the other that we're done,
3718 * then wait for the same kind of reply.
3719 */
3720 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3721 ASMAtomicWriteNullPtr(ppMySync);
3722 iTry = 0;
3723 TSCDELTA_DBG_START_LOOP();
3724 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
3725 {
3726 iTry++;
3727 if ( iTry == 0
3728 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuWorker->idCpu))
3729 break; /* this really shouldn't happen. */
3730 TSCDELTA_DBG_CHECK_LOOP();
3731 ASMNopPause();
3732 }
3733
3734 /*
3735 * Collect some runtime stats.
3736 */
3737 if (fIsMaster)
3738 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
3739 else
3740 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
3741 return 0;
3742}
3743
3744/**
3745 * Callback used by supdrvMeasureInitialTscDeltas() to read the TSC on two CPUs
3746 * and compute the delta between them.
3747 *
3748 * @param idCpu The CPU we are current scheduled on.
3749 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
3750 * @param pvUser2 Unused.
3751 */
3752static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
3753{
3754 supdrvMeasureTscDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
3755}
3756
3757
3758/**
3759 * Measures the TSC delta between the master GIP CPU and one specified worker
3760 * CPU.
3761 *
3762 * @returns VBox status code.
3763 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
3764 * failure.
3765 * @param pDevExt Pointer to the device instance data.
3766 * @param idxWorker The index of the worker CPU from the GIP's array of
3767 * CPUs.
3768 *
3769 * @remarks This must be called with preemption enabled!
3770 */
3771static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
3772{
3773 int rc;
3774 int rc2;
3775 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3776 RTCPUID idMaster = pDevExt->idGipMaster;
3777 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
3778 PSUPGIPCPU pGipCpuMaster;
3779 uint32_t iGipCpuMaster;
3780 uint32_t u32Tmp;
3781
3782 /* Validate input a bit. */
3783 AssertReturn(pGip, VERR_INVALID_PARAMETER);
3784 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
3785 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3786
3787 /*
3788 * Don't attempt measuring the delta for the GIP master.
3789 */
3790 if (pGipCpuWorker->idCpu == idMaster)
3791 {
3792 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
3793 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3794 return VINF_SUCCESS;
3795 }
3796
3797 /*
3798 * One measurement at at time, at least for now. We might be using
3799 * broadcast IPIs so, so be nice to the rest of the system.
3800 */
3801#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3802 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
3803#else
3804 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
3805#endif
3806 if (RT_FAILURE(rc))
3807 return rc;
3808
3809 /*
3810 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
3811 * try pick a different master. (This fudge only works with multi core systems.)
3812 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
3813 *
3814 * We skip this on AMDs for now as their HTT is different from intel's and
3815 * it doesn't seem to have any favorable effect on the results.
3816 *
3817 * If the master is offline, we need a new master too, so share the code.
3818 */
3819 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
3820 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
3821 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
3822 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
3823 && pGip->cOnlineCpus > 2
3824 && ASMHasCpuId()
3825 && ASMIsValidStdRange(ASMCpuId_EAX(0))
3826 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
3827 && ( !ASMIsAmdCpu()
3828 || ASMGetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
3829 || ( ASMGetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
3830 && ASMGetCpuModelAMD(u32Tmp) >= 0x02) ) )
3831 || !RTMpIsCpuOnline(idMaster) )
3832 {
3833 uint32_t i;
3834 for (i = 0; i < pGip->cCpus; i++)
3835 if ( i != iGipCpuMaster
3836 && i != idxWorker
3837 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
3838 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
3839 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
3840 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
3841 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
3842 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
3843 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
3844 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
3845 {
3846 iGipCpuMaster = i;
3847 pGipCpuMaster = &pGip->aCPUs[i];
3848 idMaster = pGipCpuMaster->idCpu;
3849 break;
3850 }
3851 }
3852
3853 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
3854 {
3855 /*
3856 * Initialize data package for the RTMpOnPair callback.
3857 */
3858 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
3859 if (pArgs)
3860 {
3861 pArgs->pWorker = pGipCpuWorker;
3862 pArgs->pMaster = pGipCpuMaster;
3863 pArgs->pDevExt = pDevExt;
3864 pArgs->pSyncMaster = NULL;
3865 pArgs->pSyncWorker = NULL;
3866 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
3867
3868 /*
3869 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
3870 * and supdrvMeasureTscDeltaCallback can use it as a success check.
3871 */
3872 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
3873 * that when doing the restart loop reorg. */
3874 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
3875 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
3876 supdrvMeasureTscDeltaCallback, pArgs, NULL);
3877 if (RT_SUCCESS(rc))
3878 {
3879#if 0
3880 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
3881 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
3882 pArgs->fTimedOut ? " timed out" :"");
3883#endif
3884#if 0
3885 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
3886 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
3887#endif
3888 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
3889 {
3890 /*
3891 * Work the TSC delta applicability rating. It starts
3892 * optimistic in supdrvGipInit, we downgrade it here.
3893 */
3894 SUPGIPUSETSCDELTA enmRating;
3895 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
3896 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
3897 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
3898 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
3899 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
3900 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
3901 else
3902 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
3903 if (pGip->enmUseTscDelta < enmRating)
3904 {
3905 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
3906 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
3907 }
3908 }
3909 else
3910 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
3911 }
3912 /** @todo return try-again if we get an offline CPU error. */
3913
3914 RTMemFree(pArgs);
3915 }
3916 else
3917 rc = VERR_NO_MEMORY;
3918 }
3919 else
3920 rc = VERR_CPU_OFFLINE;
3921
3922 /*
3923 * We're done now.
3924 */
3925#ifdef SUPDRV_USE_MUTEX_FOR_GIP
3926 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3927#else
3928 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
3929#endif
3930 return rc;
3931}
3932
3933
3934/**
3935 * Resets the TSC-delta related TSC samples and optionally the deltas
3936 * themselves.
3937 *
3938 * @param pDevExt Pointer to the device instance data.
3939 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
3940 *
3941 * @remarks This might be called while holding a spinlock!
3942 */
3943static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
3944{
3945 unsigned iCpu;
3946 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3947 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
3948 {
3949 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
3950 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
3951 if (fResetTscDeltas)
3952 {
3953 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
3954 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
3955 }
3956 }
3957}
3958
3959
3960/**
3961 * Picks an online CPU as the master TSC for TSC-delta computations.
3962 *
3963 * @returns VBox status code.
3964 * @param pDevExt Pointer to the device instance data.
3965 * @param pidxMaster Where to store the CPU array index of the chosen
3966 * master. Optional, can be NULL.
3967 */
3968static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
3969{
3970 /*
3971 * Pick the first CPU online as the master TSC and make it the new GIP master based
3972 * on the APIC ID.
3973 *
3974 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
3975 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
3976 * master as this point since the sync/async timer isn't created yet.
3977 */
3978 unsigned iCpu;
3979 uint32_t idxMaster = UINT32_MAX;
3980 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
3981 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
3982 {
3983 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
3984 if (idxCpu != UINT16_MAX)
3985 {
3986 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
3987 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
3988 {
3989 idxMaster = idxCpu;
3990 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
3991 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
3992 if (pidxMaster)
3993 *pidxMaster = idxMaster;
3994 return VINF_SUCCESS;
3995 }
3996 }
3997 }
3998 return VERR_CPU_OFFLINE;
3999}
4000
4001
4002/**
4003 * Performs the initial measurements of the TSC deltas between CPUs.
4004 *
4005 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4006 * triggered by it if threaded.
4007 *
4008 * @returns VBox status code.
4009 * @param pDevExt Pointer to the device instance data.
4010 *
4011 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4012 * idCpu, GIP's online CPU set which are populated in
4013 * supdrvGipInitOnCpu().
4014 */
4015static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt)
4016{
4017 PSUPGIPCPU pGipCpuMaster;
4018 unsigned iCpu;
4019 unsigned iOddEven;
4020 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4021 uint32_t idxMaster = UINT32_MAX;
4022 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4023
4024 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4025 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4026 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4027 if (RT_FAILURE(rc))
4028 {
4029 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4030 return rc;
4031 }
4032 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4033 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4034 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4035
4036 /*
4037 * If there is only a single CPU online we have nothing to do.
4038 */
4039 if (pGip->cOnlineCpus <= 1)
4040 {
4041 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4042 return VINF_SUCCESS;
4043 }
4044
4045 /*
4046 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4047 * master). We do the CPUs with the even numbered APIC IDs first so that
4048 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4049 */
4050 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4051 {
4052 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4053 {
4054 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4055 if ( iCpu != idxMaster
4056 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4057 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4058 {
4059 rc = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4060 if (RT_FAILURE(rc))
4061 {
4062 SUPR0Printf("supdrvMeasureTscDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4063 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4064 break;
4065 }
4066
4067 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4068 {
4069 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4070 rc = VERR_TRY_AGAIN;
4071 break;
4072 }
4073 }
4074 }
4075 }
4076
4077 return rc;
4078}
4079
4080
4081#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4082
4083/**
4084 * Switches the TSC-delta measurement thread into the butchered state.
4085 *
4086 * @returns VBox status code.
4087 * @param pDevExt Pointer to the device instance data.
4088 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4089 * @param pszFailed An error message to log.
4090 * @param rcFailed The error code to exit the thread with.
4091 */
4092static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4093{
4094 if (!fSpinlockHeld)
4095 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4096
4097 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4098 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4099 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
4100 return rcFailed;
4101}
4102
4103
4104/**
4105 * The TSC-delta measurement thread.
4106 *
4107 * @returns VBox status code.
4108 * @param hThread The thread handle.
4109 * @param pvUser Opaque pointer to the device instance data.
4110 */
4111static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4112{
4113 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4114 uint32_t cConsecutiveTimeouts = 0;
4115 int rc = VERR_INTERNAL_ERROR_2;
4116 for (;;)
4117 {
4118 /*
4119 * Switch on the current state.
4120 */
4121 SUPDRVTSCDELTATHREADSTATE enmState;
4122 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4123 enmState = pDevExt->enmTscDeltaThreadState;
4124 switch (enmState)
4125 {
4126 case kTscDeltaThreadState_Creating:
4127 {
4128 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4129 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4130 if (RT_FAILURE(rc))
4131 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4132 /* fall thru */
4133 }
4134
4135 case kTscDeltaThreadState_Listening:
4136 {
4137 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4138
4139 /*
4140 * Linux counts uninterruptible sleeps as load, hence we shall do a
4141 * regular, interruptible sleep here and ignore wake ups due to signals.
4142 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4143 */
4144 rc = RTThreadUserWaitNoResume(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
4145 if ( RT_FAILURE(rc)
4146 && rc != VERR_TIMEOUT
4147 && rc != VERR_INTERRUPTED)
4148 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4149 RTThreadUserReset(pDevExt->hTscDeltaThread);
4150 break;
4151 }
4152
4153 case kTscDeltaThreadState_WaitAndMeasure:
4154 {
4155 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4156 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4157 if (RT_FAILURE(rc))
4158 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4159 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4160 RTThreadSleep(1);
4161 /* fall thru */
4162 }
4163
4164 case kTscDeltaThreadState_Measuring:
4165 {
4166 cConsecutiveTimeouts = 0;
4167 if (pDevExt->fTscThreadRecomputeAllDeltas)
4168 {
4169 int cTries = 8;
4170 int cMsWaitPerTry = 10;
4171 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4172 Assert(pGip);
4173 do
4174 {
4175 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4176 rc = supdrvMeasureInitialTscDeltas(pDevExt);
4177 if ( RT_SUCCESS(rc)
4178 || ( RT_FAILURE(rc)
4179 && rc != VERR_TRY_AGAIN
4180 && rc != VERR_CPU_OFFLINE))
4181 {
4182 break;
4183 }
4184 RTThreadSleep(cMsWaitPerTry);
4185 } while (cTries-- > 0);
4186 pDevExt->fTscThreadRecomputeAllDeltas = false;
4187 }
4188 else
4189 {
4190 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4191 unsigned iCpu;
4192
4193 /* Measure TSC-deltas only for the CPUs that are in the set. */
4194 rc = VINF_SUCCESS;
4195 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4196 {
4197 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4198 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4199 {
4200 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4201 {
4202 int rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu);
4203 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4204 rc = rc2;
4205 }
4206 else
4207 {
4208 /*
4209 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4210 * mark the delta as fine to get the timer thread off our back.
4211 */
4212 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4213 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4214 }
4215 }
4216 }
4217 }
4218 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4219 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4220 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4221 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4222 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4223 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4224 break;
4225 }
4226
4227 case kTscDeltaThreadState_Terminating:
4228 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4229 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4230 return VINF_SUCCESS;
4231
4232 case kTscDeltaThreadState_Butchered:
4233 default:
4234 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4235 }
4236 }
4237
4238 return rc;
4239}
4240
4241
4242/**
4243 * Waits for the TSC-delta measurement thread to respond to a state change.
4244 *
4245 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4246 * other error code on internal error.
4247 *
4248 * @param pThis Pointer to the grant service instance data.
4249 * @param enmCurState The current state.
4250 * @param enmNewState The new state we're waiting for it to enter.
4251 */
4252static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4253 SUPDRVTSCDELTATHREADSTATE enmNewState)
4254{
4255 /*
4256 * Wait a short while for the expected state transition.
4257 */
4258 int rc;
4259 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4260 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4261 if (pDevExt->enmTscDeltaThreadState == enmNewState)
4262 {
4263 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4264 rc = VINF_SUCCESS;
4265 }
4266 else if (pDevExt->enmTscDeltaThreadState == enmCurState)
4267 {
4268 /*
4269 * Wait longer if the state has not yet transitioned to the one we want.
4270 */
4271 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4272 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4273 if ( RT_SUCCESS(rc)
4274 || rc == VERR_TIMEOUT)
4275 {
4276 /*
4277 * Check the state whether we've succeeded.
4278 */
4279 SUPDRVTSCDELTATHREADSTATE enmState;
4280 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4281 enmState = pDevExt->enmTscDeltaThreadState;
4282 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4283 if (enmState == enmNewState)
4284 rc = VINF_SUCCESS;
4285 else if (enmState == enmCurState)
4286 {
4287 rc = VERR_TIMEOUT;
4288 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
4289 enmNewState));
4290 }
4291 else
4292 {
4293 rc = VERR_INTERNAL_ERROR;
4294 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4295 enmState, enmNewState));
4296 }
4297 }
4298 else
4299 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4300 }
4301 else
4302 {
4303 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4304 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
4305 rc = VERR_INTERNAL_ERROR;
4306 }
4307
4308 return rc;
4309}
4310
4311
4312/**
4313 * Signals the TSC-delta thread to start measuring TSC-deltas.
4314 *
4315 * @param pDevExt Pointer to the device instance data.
4316 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4317 */
4318static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4319{
4320 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4321 {
4322 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4323 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4324 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4325 {
4326 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4327 if (fForceAll)
4328 pDevExt->fTscThreadRecomputeAllDeltas = true;
4329 }
4330 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4331 && fForceAll)
4332 pDevExt->fTscThreadRecomputeAllDeltas = true;
4333 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4334 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4335 }
4336}
4337
4338
4339/**
4340 * Terminates the actual thread running supdrvTscDeltaThread().
4341 *
4342 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4343 * supdrvTscDeltaTerm().
4344 *
4345 * @param pDevExt Pointer to the device instance data.
4346 */
4347static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4348{
4349 int rc;
4350 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4351 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4352 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4353 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4354 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4355 if (RT_FAILURE(rc))
4356 {
4357 /* Signal a few more times before giving up. */
4358 int cTriesLeft = 5;
4359 while (--cTriesLeft > 0)
4360 {
4361 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4362 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4363 if (rc != VERR_TIMEOUT)
4364 break;
4365 }
4366 }
4367}
4368
4369
4370/**
4371 * Initializes and spawns the TSC-delta measurement thread.
4372 *
4373 * A thread is required for servicing re-measurement requests from events like
4374 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4375 * under all contexts on all OSs.
4376 *
4377 * @returns VBox status code.
4378 * @param pDevExt Pointer to the device instance data.
4379 *
4380 * @remarks Must only be called -after- initializing GIP and setting up MP
4381 * notifications!
4382 */
4383static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4384{
4385 int rc;
4386 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4387 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4388 if (RT_SUCCESS(rc))
4389 {
4390 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4391 if (RT_SUCCESS(rc))
4392 {
4393 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4394 pDevExt->cMsTscDeltaTimeout = 60000;
4395 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4396 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4397 if (RT_SUCCESS(rc))
4398 {
4399 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4400 if (RT_SUCCESS(rc))
4401 {
4402 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4403 return rc;
4404 }
4405
4406 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4407 supdrvTscDeltaThreadTerminate(pDevExt);
4408 }
4409 else
4410 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4411 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4412 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4413 }
4414 else
4415 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4416 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4417 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4418 }
4419 else
4420 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4421
4422 return rc;
4423}
4424
4425
4426/**
4427 * Terminates the TSC-delta measurement thread and cleanup.
4428 *
4429 * @param pDevExt Pointer to the device instance data.
4430 */
4431static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4432{
4433 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4434 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4435 {
4436 supdrvTscDeltaThreadTerminate(pDevExt);
4437 }
4438
4439 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4440 {
4441 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4442 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4443 }
4444
4445 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4446 {
4447 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4448 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4449 }
4450
4451 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4452}
4453
4454#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4455
4456/**
4457 * Measure the TSC delta for the CPU given by its CPU set index.
4458 *
4459 * @returns VBox status code.
4460 * @retval VERR_INTERRUPTED if interrupted while waiting.
4461 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4462 * measurment.
4463 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4464 *
4465 * @param pSession The caller's session. GIP must've been mapped.
4466 * @param iCpuSet The CPU set index of the CPU to measure.
4467 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4468 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4469 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4470 * ready.
4471 * @param cTries Number of times to try, pass 0 for the default.
4472 */
4473SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4474 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4475{
4476 PSUPDRVDEVEXT pDevExt;
4477 PSUPGLOBALINFOPAGE pGip;
4478 uint16_t iGipCpu;
4479 int rc;
4480#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4481 uint64_t msTsStartWait;
4482 uint32_t iWaitLoop;
4483#endif
4484
4485 /*
4486 * Validate and adjust the input.
4487 */
4488 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4489 if (!pSession->fGipReferenced)
4490 return VERR_WRONG_ORDER;
4491
4492 pDevExt = pSession->pDevExt;
4493 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4494
4495 pGip = pDevExt->pGip;
4496 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4497
4498 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4499 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4500 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4501 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4502
4503 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4504 return VERR_INVALID_FLAGS;
4505
4506 /*
4507 * The request is a noop if the TSC delta isn't being used.
4508 */
4509 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4510 return VINF_SUCCESS;
4511
4512 if (cTries == 0)
4513 cTries = 12;
4514 else if (cTries > 256)
4515 cTries = 256;
4516
4517 if (cMsWaitRetry == 0)
4518 cMsWaitRetry = 2;
4519 else if (cMsWaitRetry > 1000)
4520 cMsWaitRetry = 1000;
4521
4522#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4523 /*
4524 * Has the TSC already been measured and we're not forced to redo it?
4525 */
4526 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4527 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4528 return VINF_SUCCESS;
4529
4530 /*
4531 * Asynchronous request? Forward it to the thread, no waiting.
4532 */
4533 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4534 {
4535 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4536 * to pass those options to the thread somehow and implement it in the
4537 * thread. Check if anyone uses/needs fAsync before implementing this. */
4538 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4539 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4540 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4541 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4542 {
4543 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4544 rc = VINF_SUCCESS;
4545 }
4546 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4547 rc = VERR_THREAD_IS_DEAD;
4548 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4549 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4550 return VINF_SUCCESS;
4551 }
4552
4553 /*
4554 * If a TSC-delta measurement request is already being serviced by the thread,
4555 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4556 */
4557 msTsStartWait = RTTimeSystemMilliTS();
4558 for (iWaitLoop = 0;; iWaitLoop++)
4559 {
4560 uint64_t cMsElapsed;
4561 SUPDRVTSCDELTATHREADSTATE enmState;
4562 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4563 enmState = pDevExt->enmTscDeltaThreadState;
4564 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4565
4566 if (enmState == kTscDeltaThreadState_Measuring)
4567 { /* Must wait, the thread is busy. */ }
4568 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4569 { /* Must wait, this state only says what will happen next. */ }
4570 else if (enmState == kTscDeltaThreadState_Terminating)
4571 { /* Must wait, this state only says what should happen next. */ }
4572 else
4573 break; /* All other states, the thread is either idly listening or dead. */
4574
4575 /* Wait or fail. */
4576 if (cMsWaitThread == 0)
4577 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4578 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4579 if (cMsElapsed >= cMsWaitThread)
4580 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4581
4582 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4583 if (rc == VERR_INTERRUPTED)
4584 return rc;
4585 }
4586#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4587
4588 /*
4589 * Try measure the TSC delta the given number of times.
4590 */
4591 for (;;)
4592 {
4593 /* Unless we're forced to measure the delta, check whether it's done already. */
4594 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4595 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4596 {
4597 rc = VINF_SUCCESS;
4598 break;
4599 }
4600
4601 /* Measure it. */
4602 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4603 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4604 {
4605 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4606 break;
4607 }
4608
4609 /* Retry? */
4610 if (cTries <= 1)
4611 break;
4612 cTries--;
4613
4614 /* Always delay between retries (be nice to the rest of the system
4615 and avoid the BSOD hounds). */
4616 rc = RTThreadSleep(cMsWaitRetry);
4617 if (rc == VERR_INTERRUPTED)
4618 break;
4619 }
4620
4621 return rc;
4622}
4623
4624
4625/**
4626 * Service a TSC-delta measurement request.
4627 *
4628 * @returns VBox status code.
4629 * @param pDevExt Pointer to the device instance data.
4630 * @param pSession The support driver session.
4631 * @param pReq Pointer to the TSC-delta measurement request.
4632 */
4633int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4634{
4635 uint32_t cTries;
4636 uint32_t iCpuSet;
4637 uint32_t fFlags;
4638 RTMSINTERVAL cMsWaitRetry;
4639
4640 /*
4641 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4642 */
4643 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4644
4645 if (pReq->u.In.idCpu == NIL_RTCPUID)
4646 return VERR_INVALID_CPU_ID;
4647 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4648 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4649 return VERR_INVALID_CPU_ID;
4650
4651 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4652
4653 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4654
4655 fFlags = 0;
4656 if (pReq->u.In.fAsync)
4657 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4658 if (pReq->u.In.fForce)
4659 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4660
4661 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4662 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4663 cTries);
4664}
4665
4666
4667/**
4668 * Reads TSC with delta applied.
4669 *
4670 * Will try to resolve delta value INT64_MAX before applying it. This is the
4671 * main purpose of this function, to handle the case where the delta needs to be
4672 * determined.
4673 *
4674 * @returns VBox status code.
4675 * @param pDevExt Pointer to the device instance data.
4676 * @param pSession The support driver session.
4677 * @param pReq Pointer to the TSC-read request.
4678 */
4679int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4680{
4681 PSUPGLOBALINFOPAGE pGip;
4682 int rc;
4683
4684 /*
4685 * Validate. We require the client to have mapped GIP (no asserting on
4686 * ring-3 preconditions).
4687 */
4688 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4689 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4690 return VERR_WRONG_ORDER;
4691 pGip = pDevExt->pGip;
4692 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4693
4694 /*
4695 * We're usually here because we need to apply delta, but we shouldn't be
4696 * upset if the GIP is some different mode.
4697 */
4698 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4699 {
4700 uint32_t cTries = 0;
4701 for (;;)
4702 {
4703 /*
4704 * Start by gathering the data, using CLI for disabling preemption
4705 * while we do that.
4706 */
4707 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4708 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4709 int iGipCpu;
4710 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4711 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4712 {
4713 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4714 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4715 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4716 ASMSetFlags(fEFlags);
4717
4718 /*
4719 * If we're lucky we've got a delta, but no predicitions here
4720 * as this I/O control is normally only used when the TSC delta
4721 * is set to INT64_MAX.
4722 */
4723 if (i64Delta != INT64_MAX)
4724 {
4725 pReq->u.Out.u64AdjustedTsc -= i64Delta;
4726 rc = VINF_SUCCESS;
4727 break;
4728 }
4729
4730 /* Give up after a few times. */
4731 if (cTries >= 4)
4732 {
4733 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4734 break;
4735 }
4736
4737 /* Need to measure the delta an try again. */
4738 rc = supdrvMeasureTscDeltaOne(pDevExt, iGipCpu);
4739 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4740 /** @todo should probably delay on failure... dpc watchdogs */
4741 }
4742 else
4743 {
4744 /* This really shouldn't happen. */
4745 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
4746 pReq->u.Out.idApic = ASMGetApicId();
4747 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4748 ASMSetFlags(fEFlags);
4749 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
4750 break;
4751 }
4752 }
4753 }
4754 else
4755 {
4756 /*
4757 * No delta to apply. Easy. Deal with preemption the lazy way.
4758 */
4759 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4760 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4761 int iGipCpu;
4762 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4763 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4764 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4765 else
4766 pReq->u.Out.idApic = ASMGetApicId();
4767 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4768 ASMSetFlags(fEFlags);
4769 rc = VINF_SUCCESS;
4770 }
4771
4772 return rc;
4773}
4774
4775
4776/**
4777 * Worker for supdrvIOCtl_GipSetFlags.
4778 *
4779 * @returns VBox status code.
4780 * @param pDevExt Pointer to the device instance data.
4781 * @param pSession The support driver session.
4782 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4783 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4784 *
4785 * @remarks This function doesn't validate any of the flags.
4786 */
4787static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
4788{
4789 uint32_t cRefs;
4790 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4791
4792 /*
4793 * Compute GIP test-mode flags.
4794 */
4795 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
4796 {
4797 pSession->fGipTestMode = true;
4798 cRefs = ASMAtomicIncU32(&pDevExt->cGipTestModeRefs);
4799 if (cRefs == 1)
4800 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
4801 }
4802 else
4803 {
4804 cRefs = ASMAtomicDecU32(&pDevExt->cGipTestModeRefs);
4805 pSession->fGipTestMode = false;
4806 if (!cRefs)
4807 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
4808 }
4809
4810 /*
4811 * Commit the flags.
4812 */
4813 uint32_t fFlags = ASMAtomicUoReadU32(&pGip->fFlags);
4814 fFlags |= fOrMask;
4815 fFlags &= fAndMask;
4816 ASMAtomicWriteU32(&pGip->fFlags, fFlags);
4817 return VINF_SUCCESS;
4818}
4819
4820
4821/**
4822 * Sets GIP test mode parameters.
4823 *
4824 * @returns VBox status code.
4825 * @param pDevExt Pointer to the device instance data.
4826 * @param pSession The support driver session.
4827 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4828 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
4829 */
4830int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
4831{
4832 PSUPGLOBALINFOPAGE pGip;
4833
4834 /*
4835 * Validate. We require the client to have mapped GIP (no asserting on
4836 * ring-3 preconditions).
4837 */
4838 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
4839 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4840 return VERR_WRONG_ORDER;
4841 pGip = pDevExt->pGip;
4842 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
4843
4844 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
4845 return VERR_INVALID_PARAMETER;
4846 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
4847 return VERR_INVALID_PARAMETER;
4848
4849 return supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
4850}
4851
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette