VirtualBox

source: vbox/trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp@ 106837

Last change on this file since 106837 was 106640, checked in by vboxsync, 3 months ago

SUPDrv: Some tiny fixes and cleanup. jiraref:VBP-1253

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 188.4 KB
Line 
1/* $Id: SUPDrvGip.cpp 106640 2024-10-24 00:31:41Z vboxsync $ */
2/** @file
3 * VBoxDrv - The VirtualBox Support Driver - Common code for GIP.
4 */
5
6/*
7 * Copyright (C) 2006-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#define LOG_GROUP LOG_GROUP_SUP_DRV
42#define SUPDRV_AGNOSTIC
43#include "SUPDrvInternal.h"
44#ifndef PAGE_SHIFT
45# include <iprt/param.h>
46#endif
47#include <iprt/asm.h>
48#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
49# include <iprt/asm-amd64-x86.h>
50#elif defined(RT_ARCH_ARM64) || defined(RT_ARCH_ARM32)
51# include <iprt/asm-arm.h>
52#else
53# error "Port me!"
54#endif
55#include <iprt/asm-math.h>
56#include <iprt/cpuset.h>
57#include <iprt/handletable.h>
58#include <iprt/mem.h>
59#include <iprt/mp.h>
60#include <iprt/power.h>
61#include <iprt/process.h>
62#include <iprt/semaphore.h>
63#include <iprt/spinlock.h>
64#include <iprt/thread.h>
65#include <iprt/uuid.h>
66#include <iprt/net.h>
67#include <iprt/crc.h>
68#include <iprt/string.h>
69#include <iprt/timer.h>
70#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
71# include <iprt/rand.h>
72# include <iprt/path.h>
73#endif
74#include <iprt/uint128.h>
75#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
76# include <iprt/x86.h>
77#elif defined(RT_ARCH_ARM64)
78# include <iprt/armv8.h>
79#endif
80
81#include <VBox/param.h>
82#include <VBox/log.h>
83#include <VBox/err.h>
84
85#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
86# include "dtrace/SUPDrv.h"
87#else
88/* ... */
89#endif
90
91
92/*********************************************************************************************************************************
93* Defined Constants And Macros *
94*********************************************************************************************************************************/
95/** The frequency by which we recalculate the u32UpdateHz and
96 * u32UpdateIntervalNS GIP members. The value must be a power of 2.
97 *
98 * Warning: Bumping this too high might overflow u32UpdateIntervalNS.
99 */
100#define GIP_UPDATEHZ_RECALC_FREQ 0x800
101
102/** A reserved TSC value used for synchronization as well as measurement of
103 * TSC deltas. */
104#define GIP_TSC_DELTA_RSVD UINT64_MAX
105/** The number of TSC delta measurement loops in total (includes primer and
106 * read-time loops). */
107#define GIP_TSC_DELTA_LOOPS 96
108/** The number of cache primer loops. */
109#define GIP_TSC_DELTA_PRIMER_LOOPS 4
110/** The number of loops until we keep computing the minumum read time. */
111#define GIP_TSC_DELTA_READ_TIME_LOOPS 24
112
113/** The TSC frequency refinement period in seconds.
114 * The timer fires after 200ms, then every second, this value just says when
115 * to stop it after that. */
116#define GIP_TSC_REFINE_PERIOD_IN_SECS 12
117/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_PRACTICALLY_ZERO rating */
118#define GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO 32
119/** The TSC-delta threshold for the SUPGIPUSETSCDELTA_ROUGHLY_ZERO rating */
120#define GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO 448
121/** The TSC delta value for the initial GIP master - 0 in regular builds.
122 * To test the delta code this can be set to a non-zero value. */
123#if 0
124# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(170139095182512) /* 0x00009abd9854acb0 */
125#else
126# define GIP_TSC_DELTA_INITIAL_MASTER_VALUE INT64_C(0)
127#endif
128
129AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS < GIP_TSC_DELTA_READ_TIME_LOOPS);
130AssertCompile(GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS < GIP_TSC_DELTA_LOOPS);
131
132/** @def VBOX_SVN_REV
133 * The makefile should define this if it can. */
134#ifndef VBOX_SVN_REV
135# define VBOX_SVN_REV 0
136#endif
137
138#if 0 /* Don't start the GIP timers. Useful when debugging the IPRT timer code. */
139# define DO_NOT_START_GIP
140#endif
141
142
143/*********************************************************************************************************************************
144* Internal Functions *
145*********************************************************************************************************************************/
146static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
147static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
148static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask);
149static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz);
150static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fClearDeltas);
151#ifdef SUPDRV_USE_TSC_DELTA_THREAD
152static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
153static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
154static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll);
155#else
156static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt);
157static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
158#endif
159
160
161/*********************************************************************************************************************************
162* Global Variables *
163*********************************************************************************************************************************/
164DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
165SUPR0_EXPORT_SYMBOL(g_pSUPGlobalInfoPage);
166
167
168
169/*
170 *
171 * Misc Common GIP Code
172 * Misc Common GIP Code
173 * Misc Common GIP Code
174 *
175 *
176 */
177
178
179/**
180 * Finds the GIP CPU index corresponding to @a idCpu.
181 *
182 * @returns GIP CPU array index, UINT32_MAX if not found.
183 * @param pGip The GIP.
184 * @param idCpu The CPU ID.
185 */
186static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
187{
188 uint32_t i;
189 for (i = 0; i < pGip->cCpus; i++)
190 if (pGip->aCPUs[i].idCpu == idCpu)
191 return i;
192 return UINT32_MAX;
193}
194
195
196/**
197 * Gets the APIC ID using the best available method.
198 *
199 * @returns APIC ID.
200 * @param pGip The GIP, for SUPGIPGETCPU_XXX.
201 *
202 * @note APIC ID == CPU ID on non-x86 platforms.
203 */
204DECLINLINE(uint32_t) supdrvGipGetApicId(PSUPGLOBALINFOPAGE pGip)
205{
206#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
207 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_0B)
208 return ASMGetApicIdExt0B();
209 if (pGip->fGetGipCpu & SUPGIPGETCPU_APIC_ID_EXT_8000001E)
210 return ASMGetApicIdExt8000001E();
211 return ASMGetApicId();
212
213#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
214 RT_NOREF(pGip);
215 return (uint32_t)ASMGetThreadIdRoEL0();
216
217#else
218# error "port me"
219#endif
220}
221
222
223/**
224 * Gets the APIC ID using the best available method, slow version.
225 *
226 * @note APIC ID == CPU ID on non-x86 platforms.
227 */
228static uint32_t supdrvGipGetApicIdSlow(void)
229{
230#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
231 uint32_t const idApic = ASMGetApicId();
232
233 /* The Intel CPU topology leaf: */
234 uint32_t uOther = ASMCpuId_EAX(0);
235 if (uOther >= UINT32_C(0xb) && RTX86IsValidStdRange(uOther))
236 {
237 uint32_t uEax = 0;
238 uint32_t uEbx = 0;
239 uint32_t uEcx = 0;
240 uint32_t uEdx = 0;
241# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
242 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
243# else
244 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
245# endif
246 if ((uEcx >> 8) != 0) /* level type != invalid */
247 {
248 if ((uEdx & 0xff) == idApic)
249 return uEdx;
250 AssertMsgFailed(("ASMGetApicIdExt0B=>%#x idApic=%#x\n", uEdx, idApic));
251 }
252 }
253
254 /* The AMD leaf: */
255 uOther = ASMCpuId_EAX(UINT32_C(0x80000000));
256 if (uOther >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uOther))
257 {
258 uOther = ASMGetApicIdExt8000001E();
259 if ((uOther & 0xff) == idApic)
260 return uOther;
261 AssertMsgFailed(("ASMGetApicIdExt8000001E=>%#x idApic=%#x\n", uOther, idApic));
262 }
263 return idApic;
264
265#elif defined(RT_ARCH_ARM64) && defined(RT_OS_WINDOWS)
266 return (uint32_t)ASMGetThreadIdRoEL0();
267
268#else
269# error "port me"
270#endif
271}
272
273
274
275/*
276 *
277 * GIP Mapping and Unmapping Related Code.
278 * GIP Mapping and Unmapping Related Code.
279 * GIP Mapping and Unmapping Related Code.
280 *
281 *
282 */
283
284
285/**
286 * (Re-)initializes the per-cpu structure prior to starting or resuming the GIP
287 * updating.
288 *
289 * @param pGipCpu The per CPU structure for this CPU.
290 * @param u64NanoTS The current time.
291 */
292static void supdrvGipReInitCpu(PSUPGIPCPU pGipCpu, uint64_t u64NanoTS)
293{
294 /*
295 * Here we don't really care about applying the TSC delta. The re-initialization of this
296 * value is not relevant especially while (re)starting the GIP as the first few ones will
297 * be ignored anyway, see supdrvGipDoUpdateCpu().
298 */
299 pGipCpu->u64TSC = ASMReadTSC() - pGipCpu->u32UpdateIntervalTSC;
300 pGipCpu->u64NanoTS = u64NanoTS;
301}
302
303
304/**
305 * Set the current TSC and NanoTS value for the CPU.
306 *
307 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
308 * @param pvUser1 Pointer to the ring-0 GIP mapping.
309 * @param pvUser2 Pointer to the variable holding the current time.
310 */
311static DECLCALLBACK(void) supdrvGipReInitCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
312{
313 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser1;
314 uint32_t const idApic = supdrvGipGetApicId(pGip);
315 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
316 {
317 unsigned const iCpu = pGip->aiCpuFromApicId[idApic];
318
319 if (RT_LIKELY(iCpu < pGip->cCpus && pGip->aCPUs[iCpu].idCpu == idCpu))
320 supdrvGipReInitCpu(&pGip->aCPUs[iCpu], *(uint64_t *)pvUser2);
321 else
322 LogRelMax(64, ("supdrvGipReInitCpuCallback: iCpu=%#x out of bounds (%#zx, idApic=%#x)\n",
323 iCpu, RT_ELEMENTS(pGip->aiCpuFromApicId), idApic));
324 }
325 else
326 LogRelMax(64, ("supdrvGipReInitCpuCallback: idApic=%#x out of bounds (%#zx)\n",
327 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId)));
328
329 NOREF(pvUser2);
330}
331
332
333/**
334 * State structure for supdrvGipDetectGetGipCpuCallback.
335 */
336typedef struct SUPDRVGIPDETECTGETCPU
337{
338 /** Bitmap of APIC IDs that has been seen (initialized to zero).
339 * Used to detect duplicate APIC IDs (paranoia). */
340 uint8_t volatile bmApicId[4096 / 8];
341 /** Mask of supported GIP CPU getter methods (SUPGIPGETCPU_XXX) (all bits set
342 * initially). The callback clears the methods not detected. */
343 uint32_t volatile fSupported;
344 /** The first callback detecting any kind of range issues (initialized to
345 * NIL_RTCPUID). */
346 RTCPUID volatile idCpuProblem;
347} SUPDRVGIPDETECTGETCPU;
348/** Pointer to state structure for supdrvGipDetectGetGipCpuCallback. */
349typedef SUPDRVGIPDETECTGETCPU *PSUPDRVGIPDETECTGETCPU;
350
351
352/**
353 * Checks for alternative ways of getting the CPU ID.
354 *
355 * This also checks the APIC ID, CPU ID and CPU set index values against the
356 * GIP tables.
357 *
358 * @param idCpu The CPU ID. Unused - we have to use the APIC ID.
359 * @param pvUser1 Pointer to the state structure.
360 * @param pvUser2 Pointer to the GIP.
361 */
362static DECLCALLBACK(void) supdrvGipDetectGetGipCpuCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
363{
364 PSUPDRVGIPDETECTGETCPU pState = (PSUPDRVGIPDETECTGETCPU)pvUser1;
365 PSUPGLOBALINFOPAGE pGip = (PSUPGLOBALINFOPAGE)pvUser2;
366 int const iCpuSet = RTMpCpuIdToSetIndex(idCpu);
367 uint32_t fSupported = 0;
368 uint32_t idApic;
369#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
370 uint32_t uEax, uEbx, uEcx, uEdx;
371#else
372 uint32_t const uEax = 0; /* Dummy for LogRel. */
373#endif
374 NOREF(pGip);
375
376 AssertMsg(idCpu == RTMpCpuId(), ("idCpu=%#x RTMpCpuId()=%#x\n", idCpu, RTMpCpuId())); /* paranoia^3 */
377
378#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
379 /*
380 * Check that the CPU ID and CPU set index are interchangable.
381 */
382 if ((RTCPUID)iCpuSet == idCpu)
383 {
384 AssertCompile(RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS));
385 if ( iCpuSet >= 0
386 && iCpuSet < RTCPUSET_MAX_CPUS
387 && RT_IS_POWER_OF_TWO(RTCPUSET_MAX_CPUS))
388 {
389 PSUPGIPCPU pGipCpu = SUPGetGipCpuBySetIndex(pGip, iCpuSet);
390
391 /*
392 * Check whether the IDTR.LIMIT contains a CPU number.
393 */
394# ifdef RT_ARCH_X86
395 uint16_t const cbIdt = sizeof(X86DESC64SYSTEM) * 256;
396# else
397 uint16_t const cbIdt = sizeof(X86DESCGATE) * 256;
398# endif
399 RTIDTR Idtr;
400 ASMGetIDTR(&Idtr);
401 if (Idtr.cbIdt >= cbIdt)
402 {
403 uint32_t uTmp = Idtr.cbIdt - cbIdt;
404 uTmp &= RTCPUSET_MAX_CPUS - 1;
405 if (uTmp == idCpu)
406 {
407 RTIDTR Idtr2;
408 ASMGetIDTR(&Idtr2);
409 if (Idtr2.cbIdt == Idtr.cbIdt)
410 fSupported |= SUPGIPGETCPU_IDTR_LIMIT_MASK_MAX_SET_CPUS;
411 }
412 }
413
414 /*
415 * Check whether RDTSCP is an option.
416 */
417 if (ASMHasCpuId())
418 {
419 if ( RTX86IsValidExtRange(ASMCpuId_EAX(UINT32_C(0x80000000)))
420 && (ASMCpuId_EDX(UINT32_C(0x80000001)) & X86_CPUID_EXT_FEATURE_EDX_RDTSCP) )
421 {
422 uint32_t uAux;
423 ASMReadTscWithAux(&uAux);
424 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
425 {
426 ASMNopPause();
427 ASMReadTscWithAux(&uAux);
428 if ((uAux & (RTCPUSET_MAX_CPUS - 1)) == idCpu)
429 fSupported |= SUPGIPGETCPU_RDTSCP_MASK_MAX_SET_CPUS;
430 }
431
432 if (pGipCpu)
433 {
434 uint32_t const uGroupedAux = (uint8_t)pGipCpu->iCpuGroupMember | ((uint32_t)pGipCpu->iCpuGroup << 8);
435 if ( (uAux & UINT16_MAX) == uGroupedAux
436 && pGipCpu->iCpuGroupMember <= UINT8_MAX)
437 {
438 ASMNopPause();
439 ASMReadTscWithAux(&uAux);
440 if ((uAux & UINT16_MAX) == uGroupedAux)
441 fSupported |= SUPGIPGETCPU_RDTSCP_GROUP_IN_CH_NUMBER_IN_CL;
442 }
443 }
444 }
445 }
446 }
447 }
448
449 /*
450 * Check for extended APIC ID methods.
451 */
452 idApic = UINT32_MAX;
453 uEax = ASMCpuId_EAX(0);
454 if (uEax >= UINT32_C(0xb) && RTX86IsValidStdRange(uEax))
455 {
456# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
457 ASMCpuId_Idx_ECX(0xb, 0, &uEax, &uEbx, &uEcx, &uEdx);
458# else
459 ASMCpuIdExSlow(0xb, 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
460# endif
461 if ((uEcx >> 8) != 0) /* level type != invalid */
462 {
463 if (RT_LIKELY( uEdx < RT_ELEMENTS(pGip->aiCpuFromApicId)
464 && !ASMBitTest(pState->bmApicId, uEdx)))
465 {
466 if (uEdx == ASMGetApicIdExt0B())
467 {
468 idApic = uEdx;
469 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_0B;
470 }
471 else
472 AssertMsgFailed(("%#x vs %#x\n", uEdx, ASMGetApicIdExt0B()));
473 }
474 }
475 }
476
477 uEax = ASMCpuId_EAX(UINT32_C(0x80000000));
478 if (uEax >= UINT32_C(0x8000001e) && RTX86IsValidExtRange(uEax))
479 {
480# if defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
481 ASMCpuId_Idx_ECX(UINT32_C(0x8000001e), 0, &uEax, &uEbx, &uEcx, &uEdx);
482# else
483 ASMCpuIdExSlow(UINT32_C(0x8000001e), 0, 0, 0, &uEax, &uEbx, &uEcx, &uEdx);
484# endif
485 if (uEax || uEbx || uEcx || uEdx)
486 {
487 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
488 && ( idApic == UINT32_MAX
489 || idApic == uEax)
490 && !ASMBitTest(pState->bmApicId, uEax)))
491 {
492 if (uEax == ASMGetApicIdExt8000001E())
493 {
494 idApic = uEax;
495 fSupported |= SUPGIPGETCPU_APIC_ID_EXT_8000001E;
496 }
497 else
498 AssertMsgFailed(("%#x vs %#x\n", uEax, ASMGetApicIdExt8000001E()));
499 }
500 }
501 }
502
503#else /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
504 idApic = supdrvGipGetApicIdSlow();
505#endif /* !defined(RT_ARCH_AMD64) && !defined(RT_ARCH_X86) */
506
507 /*
508 * Check that the APIC ID is unique.
509 */
510#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
511 uEax = ASMGetApicId();
512 if (RT_LIKELY( uEax < RT_ELEMENTS(pGip->aiCpuFromApicId)
513 && ( idApic == UINT32_MAX
514 || idApic == uEax)
515 && !ASMAtomicBitTestAndSet(pState->bmApicId, uEax)))
516 {
517 idApic = uEax;
518 fSupported |= SUPGIPGETCPU_APIC_ID;
519 }
520 else
521#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
522 if ( idApic == UINT32_MAX
523 || idApic >= RT_ELEMENTS(pGip->aiCpuFromApicId) /* parnaoia */
524 || ASMAtomicBitTestAndSet(pState->bmApicId, idApic))
525 {
526 AssertCompile(sizeof(pState->bmApicId) * 8 == RT_ELEMENTS(pGip->aiCpuFromApicId));
527 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
528 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x/%#x - duplicate APIC ID.\n",
529 idCpu, iCpuSet, uEax, idApic));
530 }
531
532 /*
533 * Check that the iCpuSet is within the expected range.
534 */
535 if (RT_UNLIKELY( iCpuSet < 0
536 || (unsigned)iCpuSet >= RTCPUSET_MAX_CPUS
537 || (unsigned)iCpuSet >= RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)))
538 {
539 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
540 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU set index is out of range.\n",
541 idCpu, iCpuSet, idApic));
542 }
543 else
544 {
545 RTCPUID idCpu2 = RTMpCpuIdFromSetIndex(iCpuSet);
546 if (RT_UNLIKELY(idCpu2 != idCpu))
547 {
548 ASMAtomicCmpXchgU32(&pState->idCpuProblem, idCpu, NIL_RTCPUID);
549 LogRel(("supdrvGipDetectGetGipCpuCallback: idCpu=%#x iCpuSet=%d idApic=%#x - CPU id/index roundtrip problem: %#x\n",
550 idCpu, iCpuSet, idApic, idCpu2));
551 }
552 }
553
554 /*
555 * Update the supported feature mask before we return.
556 */
557 ASMAtomicAndU32(&pState->fSupported, fSupported);
558
559 NOREF(pvUser2);
560}
561
562
563/**
564 * Increase the timer freqency on hosts where this is possible (NT).
565 *
566 * The idea is that more interrupts is better for us... Also, it's better than
567 * we increase the timer frequence, because we might end up getting inaccurate
568 * callbacks if someone else does it.
569 *
570 * @param pDevExt Sets u32SystemTimerGranularityGrant if increased.
571 */
572static void supdrvGipRequestHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
573{
574 if (pDevExt->u32SystemTimerGranularityGrant == 0)
575 {
576 uint32_t u32SystemResolution;
577 if ( RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 976563 /* 1024 HZ */, &u32SystemResolution))
578 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1000000 /* 1000 HZ */, &u32SystemResolution))
579 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 1953125 /* 512 HZ */, &u32SystemResolution))
580 || RT_SUCCESS_NP(RTTimerRequestSystemGranularity( 2000000 /* 500 HZ */, &u32SystemResolution))
581 )
582 {
583#if 0 /* def VBOX_STRICT - this is somehow triggers bogus assertions on windows 10 */
584 uint32_t u32After = RTTimerGetSystemGranularity();
585 AssertMsg(u32After <= u32SystemResolution, ("u32After=%u u32SystemResolution=%u\n", u32After, u32SystemResolution));
586#endif
587 pDevExt->u32SystemTimerGranularityGrant = u32SystemResolution;
588 }
589 }
590}
591
592
593/**
594 * Undoes supdrvGipRequestHigherTimerFrequencyFromSystem.
595 *
596 * @param pDevExt Clears u32SystemTimerGranularityGrant.
597 */
598static void supdrvGipReleaseHigherTimerFrequencyFromSystem(PSUPDRVDEVEXT pDevExt)
599{
600 if (pDevExt->u32SystemTimerGranularityGrant)
601 {
602 int rc2 = RTTimerReleaseSystemGranularity(pDevExt->u32SystemTimerGranularityGrant);
603 AssertRC(rc2);
604 pDevExt->u32SystemTimerGranularityGrant = 0;
605 }
606}
607
608
609/**
610 * Maps the GIP into userspace and/or get the physical address of the GIP.
611 *
612 * @returns IPRT status code.
613 * @param pSession Session to which the GIP mapping should belong.
614 * @param ppGipR3 Where to store the address of the ring-3 mapping. (optional)
615 * @param pHCPhysGip Where to store the physical address. (optional)
616 *
617 * @remark There is no reference counting on the mapping, so one call to this function
618 * count globally as one reference. One call to SUPR0GipUnmap() is will unmap GIP
619 * and remove the session as a GIP user.
620 */
621SUPR0DECL(int) SUPR0GipMap(PSUPDRVSESSION pSession, PRTR3PTR ppGipR3, PRTHCPHYS pHCPhysGip)
622{
623 int rc;
624 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
625 RTR3PTR pGipR3 = NIL_RTR3PTR;
626 RTHCPHYS HCPhys = NIL_RTHCPHYS;
627 LogFlow(("SUPR0GipMap: pSession=%p ppGipR3=%p pHCPhysGip=%p\n", pSession, ppGipR3, pHCPhysGip));
628
629 /*
630 * Validate
631 */
632 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
633 AssertPtrNullReturn(ppGipR3, VERR_INVALID_POINTER);
634 AssertPtrNullReturn(pHCPhysGip, VERR_INVALID_POINTER);
635
636#ifdef SUPDRV_USE_MUTEX_FOR_GIP
637 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
638#else
639 RTSemFastMutexRequest(pDevExt->mtxGip);
640#endif
641 if (pDevExt->pGip)
642 {
643 /*
644 * Map it?
645 */
646 rc = VINF_SUCCESS;
647 if (ppGipR3)
648 {
649 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
650 rc = RTR0MemObjMapUser(&pSession->GipMapObjR3, pDevExt->GipMemObj, (RTR3PTR)-1, 0,
651 RTMEM_PROT_READ, NIL_RTR0PROCESS);
652 if (RT_SUCCESS(rc))
653 pGipR3 = RTR0MemObjAddressR3(pSession->GipMapObjR3);
654 }
655
656 /*
657 * Get physical address.
658 */
659 if (pHCPhysGip && RT_SUCCESS(rc))
660 HCPhys = pDevExt->HCPhysGip;
661
662 /*
663 * Reference globally.
664 */
665 if (!pSession->fGipReferenced && RT_SUCCESS(rc))
666 {
667 pSession->fGipReferenced = 1;
668 pDevExt->cGipUsers++;
669 if (pDevExt->cGipUsers == 1)
670 {
671 PSUPGLOBALINFOPAGE pGipR0 = pDevExt->pGip;
672 uint64_t u64NanoTS;
673
674 /*
675 * GIP starts/resumes updating again. On windows we bump the
676 * host timer frequency to make sure we don't get stuck in guest
677 * mode and to get better timer (and possibly clock) accuracy.
678 */
679 LogFlow(("SUPR0GipMap: Resumes GIP updating\n"));
680
681 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
682
683 /*
684 * document me
685 */
686 if (pGipR0->aCPUs[0].u32TransactionId != 2 /* not the first time */)
687 {
688 unsigned i;
689 for (i = 0; i < pGipR0->cCpus; i++)
690 ASMAtomicUoWriteU32(&pGipR0->aCPUs[i].u32TransactionId,
691 (pGipR0->aCPUs[i].u32TransactionId + GIP_UPDATEHZ_RECALC_FREQ * 2)
692 & ~(GIP_UPDATEHZ_RECALC_FREQ * 2 - 1));
693 ASMAtomicWriteU64(&pGipR0->u64NanoTSLastUpdateHz, 0);
694 }
695
696 /*
697 * document me
698 */
699 u64NanoTS = RTTimeSystemNanoTS() - pGipR0->u32UpdateIntervalNS;
700 if ( pGipR0->u32Mode == SUPGIPMODE_INVARIANT_TSC
701 || pGipR0->u32Mode == SUPGIPMODE_SYNC_TSC
702 || RTMpGetOnlineCount() == 1)
703 supdrvGipReInitCpu(&pGipR0->aCPUs[0], u64NanoTS);
704 else
705 RTMpOnAll(supdrvGipReInitCpuCallback, pGipR0, &u64NanoTS);
706
707 /*
708 * Detect alternative ways to figure the CPU ID in ring-3 and
709 * raw-mode context. Check the sanity of the APIC IDs, CPU IDs,
710 * and CPU set indexes while we're at it.
711 */
712 if (RT_SUCCESS(rc))
713 {
714 PSUPDRVGIPDETECTGETCPU pDetectState = (PSUPDRVGIPDETECTGETCPU)RTMemTmpAllocZ(sizeof(*pDetectState));
715 if (pDetectState)
716 {
717 pDetectState->fSupported = UINT32_MAX;
718 pDetectState->idCpuProblem = NIL_RTCPUID;
719 rc = RTMpOnAll(supdrvGipDetectGetGipCpuCallback, pDetectState, pGipR0);
720 if (pDetectState->idCpuProblem == NIL_RTCPUID)
721 {
722 if ( pDetectState->fSupported != UINT32_MAX
723 && pDetectState->fSupported != 0)
724 {
725 if (pGipR0->fGetGipCpu != pDetectState->fSupported)
726 {
727 pGipR0->fGetGipCpu = pDetectState->fSupported;
728 LogRel(("SUPR0GipMap: fGetGipCpu=%#x\n", pDetectState->fSupported));
729 }
730 }
731 else
732 {
733 LogRel(("SUPR0GipMap: No supported ways of getting the APIC ID or CPU number in ring-3! (%#x)\n",
734 pDetectState->fSupported));
735 rc = VERR_UNSUPPORTED_CPU;
736 }
737 }
738 else
739 {
740 LogRel(("SUPR0GipMap: APIC ID, CPU ID or CPU set index problem detected on CPU #%u (%#x)!\n",
741 pDetectState->idCpuProblem, pDetectState->idCpuProblem));
742 rc = VERR_INVALID_CPU_ID;
743 }
744 RTMemTmpFree(pDetectState);
745 }
746 else
747 rc = VERR_NO_TMP_MEMORY;
748 }
749
750 /*
751 * Start the GIP timer if all is well..
752 */
753 if (RT_SUCCESS(rc))
754 {
755#ifndef DO_NOT_START_GIP
756 rc = RTTimerStart(pDevExt->pGipTimer, 0 /* fire ASAP */); AssertRC(rc);
757#endif
758 rc = VINF_SUCCESS;
759 }
760
761 /*
762 * Bail out on error.
763 */
764 if (RT_FAILURE(rc))
765 {
766 LogRel(("SUPR0GipMap: failed rc=%Rrc\n", rc));
767 pDevExt->cGipUsers = 0;
768 pSession->fGipReferenced = 0;
769 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
770 {
771 int rc2 = RTR0MemObjFree(pSession->GipMapObjR3, false); AssertRC(rc2);
772 if (RT_SUCCESS(rc2))
773 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
774 }
775 HCPhys = NIL_RTHCPHYS;
776 pGipR3 = NIL_RTR3PTR;
777 }
778 }
779 }
780 }
781 else
782 {
783 rc = VERR_GENERAL_FAILURE;
784 Log(("SUPR0GipMap: GIP is not available!\n"));
785 }
786#ifdef SUPDRV_USE_MUTEX_FOR_GIP
787 RTSemMutexRelease(pDevExt->mtxGip);
788#else
789 RTSemFastMutexRelease(pDevExt->mtxGip);
790#endif
791
792 /*
793 * Write returns.
794 */
795 if (pHCPhysGip)
796 *pHCPhysGip = HCPhys;
797 if (ppGipR3)
798 *ppGipR3 = pGipR3;
799
800#ifdef DEBUG_DARWIN_GIP
801 OSDBGPRINT(("SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
802#else
803 LogFlow(( "SUPR0GipMap: returns %d *pHCPhysGip=%lx pGipR3=%p\n", rc, (unsigned long)HCPhys, (void *)pGipR3));
804#endif
805 return rc;
806}
807SUPR0_EXPORT_SYMBOL(SUPR0GipMap);
808
809
810/**
811 * Unmaps any user mapping of the GIP and terminates all GIP access
812 * from this session.
813 *
814 * @returns IPRT status code.
815 * @param pSession Session to which the GIP mapping should belong.
816 */
817SUPR0DECL(int) SUPR0GipUnmap(PSUPDRVSESSION pSession)
818{
819 int rc = VINF_SUCCESS;
820 PSUPDRVDEVEXT pDevExt = pSession->pDevExt;
821#ifdef DEBUG_DARWIN_GIP
822 OSDBGPRINT(("SUPR0GipUnmap: pSession=%p pGip=%p GipMapObjR3=%p\n",
823 pSession,
824 pSession->GipMapObjR3 != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pSession->GipMapObjR3) : NULL,
825 pSession->GipMapObjR3));
826#else
827 LogFlow(("SUPR0GipUnmap: pSession=%p\n", pSession));
828#endif
829 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
830
831#ifdef SUPDRV_USE_MUTEX_FOR_GIP
832 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
833#else
834 RTSemFastMutexRequest(pDevExt->mtxGip);
835#endif
836
837 /*
838 * GIP test-mode session?
839 */
840 if ( pSession->fGipTestMode
841 && pDevExt->pGip)
842 {
843 supdrvGipSetFlags(pDevExt, pSession, 0, ~SUPGIP_FLAGS_TESTING_ENABLE);
844 Assert(!pSession->fGipTestMode);
845 }
846
847 /*
848 * Unmap anything?
849 */
850 if (pSession->GipMapObjR3 != NIL_RTR0MEMOBJ)
851 {
852 rc = RTR0MemObjFree(pSession->GipMapObjR3, false);
853 AssertRC(rc);
854 if (RT_SUCCESS(rc))
855 pSession->GipMapObjR3 = NIL_RTR0MEMOBJ;
856 }
857
858 /*
859 * Dereference global GIP.
860 */
861 if (pSession->fGipReferenced && !rc)
862 {
863 pSession->fGipReferenced = 0;
864 if ( pDevExt->cGipUsers > 0
865 && !--pDevExt->cGipUsers)
866 {
867 LogFlow(("SUPR0GipUnmap: Suspends GIP updating\n"));
868#ifndef DO_NOT_START_GIP
869 rc = RTTimerStop(pDevExt->pGipTimer); AssertRC(rc); rc = VINF_SUCCESS;
870#endif
871 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
872 }
873 }
874
875#ifdef SUPDRV_USE_MUTEX_FOR_GIP
876 RTSemMutexRelease(pDevExt->mtxGip);
877#else
878 RTSemFastMutexRelease(pDevExt->mtxGip);
879#endif
880
881 return rc;
882}
883SUPR0_EXPORT_SYMBOL(SUPR0GipUnmap);
884
885
886/**
887 * Gets the GIP pointer.
888 *
889 * @returns Pointer to the GIP or NULL.
890 */
891SUPDECL(PSUPGLOBALINFOPAGE) SUPGetGIP(void)
892{
893 return g_pSUPGlobalInfoPage;
894}
895
896
897
898
899
900/*
901 *
902 *
903 * GIP Initialization, Termination and CPU Offline / Online Related Code.
904 * GIP Initialization, Termination and CPU Offline / Online Related Code.
905 * GIP Initialization, Termination and CPU Offline / Online Related Code.
906 *
907 *
908 */
909
910/**
911 * Used by supdrvGipInitRefineInvariantTscFreqTimer and supdrvGipInitMeasureTscFreq
912 * to update the TSC frequency related GIP variables.
913 *
914 * @param pGip The GIP.
915 * @param nsElapsed The number of nanoseconds elapsed.
916 * @param cElapsedTscTicks The corresponding number of TSC ticks.
917 * @param iTick The tick number for debugging.
918 */
919static void supdrvGipInitSetCpuFreq(PSUPGLOBALINFOPAGE pGip, uint64_t nsElapsed, uint64_t cElapsedTscTicks, uint32_t iTick)
920{
921 /*
922 * Calculate the frequency.
923 */
924 uint64_t uCpuHz;
925 if ( cElapsedTscTicks < UINT64_MAX / RT_NS_1SEC
926 && nsElapsed < UINT32_MAX)
927 uCpuHz = ASMMultU64ByU32DivByU32(cElapsedTscTicks, RT_NS_1SEC, (uint32_t)nsElapsed);
928 else
929 {
930 RTUINT128U CpuHz, Tmp, Divisor;
931 CpuHz.s.Lo = CpuHz.s.Hi = 0;
932 RTUInt128MulU64ByU64(&Tmp, cElapsedTscTicks, RT_NS_1SEC_64);
933 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, nsElapsed));
934 uCpuHz = CpuHz.s.Lo;
935 }
936
937 /*
938 * Update the GIP.
939 */
940 ASMAtomicWriteU64(&pGip->u64CpuHz, uCpuHz);
941 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
942 {
943 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, uCpuHz);
944
945 /* For inspecting the frequency calcs using tstGIP-2, debugger or similar. */
946 if (iTick + 1 < pGip->cCpus)
947 ASMAtomicWriteU64(&pGip->aCPUs[iTick + 1].u64CpuHz, uCpuHz);
948 }
949}
950
951
952/**
953 * Timer callback function for TSC frequency refinement in invariant GIP mode.
954 *
955 * This is started during driver init and fires once
956 * GIP_TSC_REFINE_PERIOD_IN_SECS seconds later.
957 *
958 * @param pTimer The timer.
959 * @param pvUser Opaque pointer to the device instance data.
960 * @param iTick The timer tick.
961 */
962static DECLCALLBACK(void) supdrvGipInitRefineInvariantTscFreqTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
963{
964 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
965 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
966 RTCPUID idCpu;
967 uint64_t cNsElapsed;
968 uint64_t cTscTicksElapsed;
969 uint64_t nsNow;
970 uint64_t uTsc;
971 RTCCUINTREG fEFlags;
972
973 /* Paranoia. */
974 AssertReturnVoid(pGip);
975 AssertReturnVoid(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
976
977 /*
978 * If we got a power event, stop the refinement process.
979 */
980 if (pDevExt->fInvTscRefinePowerEvent)
981 {
982 int rc = RTTimerStop(pTimer); AssertRC(rc);
983 return;
984 }
985
986 /*
987 * Read the TSC and time, noting which CPU we are on.
988 *
989 * Don't bother spinning until RTTimeSystemNanoTS changes, since on
990 * systems where it matters we're in a context where we cannot waste that
991 * much time (DPC watchdog, called from clock interrupt).
992 */
993 fEFlags = ASMIntDisableFlags();
994 uTsc = ASMReadTSC();
995 nsNow = RTTimeSystemNanoTS();
996 idCpu = RTMpCpuId();
997 ASMSetFlags(fEFlags);
998
999 cNsElapsed = nsNow - pDevExt->nsStartInvarTscRefine;
1000 cTscTicksElapsed = uTsc - pDevExt->uTscStartInvarTscRefine;
1001
1002 /*
1003 * If the above measurement was taken on a different CPU than the one we
1004 * started the process on, cTscTicksElapsed will need to be adjusted with
1005 * the TSC deltas of both the CPUs.
1006 *
1007 * We ASSUME that the delta calculation process takes less time than the
1008 * TSC frequency refinement timer. If it doesn't, we'll complain and
1009 * drop the frequency refinement.
1010 *
1011 * Note! We cannot entirely trust enmUseTscDelta here because it's
1012 * downgraded after each delta calculation.
1013 */
1014 if ( idCpu != pDevExt->idCpuInvarTscRefine
1015 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1016 {
1017 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine);
1018 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpu);
1019 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1020 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1021 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1022 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1023 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1024 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1025 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1026 {
1027 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1028 {
1029 /* cTscTicksElapsed = (uTsc - iStopTscDelta) - (pDevExt->uTscStartInvarTscRefine - iStartTscDelta); */
1030 cTscTicksElapsed += iStartTscDelta - iStopTscDelta;
1031 }
1032 }
1033 /*
1034 * Allow 5 times the refinement period to elapse before we give up on the TSC delta
1035 * calculations.
1036 */
1037 else if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * 5 * RT_NS_1SEC_64)
1038 {
1039 SUPR0Printf("vboxdrv: Failed to refine invariant TSC frequency because deltas are unavailable after %u (%u) seconds\n",
1040 (uint32_t)(cNsElapsed / RT_NS_1SEC), GIP_TSC_REFINE_PERIOD_IN_SECS);
1041 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1042 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1043 int rc = RTTimerStop(pTimer); AssertRC(rc);
1044 return;
1045 }
1046 }
1047
1048 /*
1049 * Calculate and update the CPU frequency variables in GIP.
1050 *
1051 * If there is a GIP user already and we've already refined the frequency
1052 * a couple of times, don't update it as we want a stable frequency value
1053 * for all VMs.
1054 */
1055 if ( pDevExt->cGipUsers == 0
1056 || cNsElapsed < RT_NS_1SEC * 2)
1057 {
1058 supdrvGipInitSetCpuFreq(pGip, cNsElapsed, cTscTicksElapsed, (uint32_t)iTick);
1059
1060 /*
1061 * Stop the timer once we've reached the defined refinement period.
1062 */
1063 if (cNsElapsed > GIP_TSC_REFINE_PERIOD_IN_SECS * RT_NS_1SEC_64)
1064 {
1065 int rc = RTTimerStop(pTimer);
1066 AssertRC(rc);
1067 }
1068 }
1069 else
1070 {
1071 int rc = RTTimerStop(pTimer);
1072 AssertRC(rc);
1073 }
1074}
1075
1076
1077/**
1078 * @callback_method_impl{FNRTPOWERNOTIFICATION}
1079 */
1080static DECLCALLBACK(void) supdrvGipPowerNotificationCallback(RTPOWEREVENT enmEvent, void *pvUser)
1081{
1082 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1083 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1084
1085 /*
1086 * If the TSC frequency refinement timer is running, we need to cancel it so it
1087 * doesn't screw up the frequency after a long suspend.
1088 *
1089 * Recalculate all TSC-deltas on host resume as it may have changed, seen
1090 * on Windows 7 running on the Dell Optiplex Intel Core i5-3570.
1091 */
1092 if (enmEvent == RTPOWEREVENT_RESUME)
1093 {
1094 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1095 if ( RT_LIKELY(pGip)
1096 && pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
1097 && !supdrvOSAreCpusOfflinedOnSuspend())
1098 {
1099#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1100 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
1101#else
1102 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
1103 supdrvTscMeasureInitialDeltas(pDevExt);
1104#endif
1105 }
1106 }
1107 else if (enmEvent == RTPOWEREVENT_SUSPEND)
1108 ASMAtomicWriteBool(&pDevExt->fInvTscRefinePowerEvent, true);
1109}
1110
1111
1112/**
1113 * Start the TSC-frequency refinment timer for the invariant TSC GIP mode.
1114 *
1115 * We cannot use this in the synchronous and asynchronous tsc GIP modes because
1116 * the CPU may change the TSC frequence between now and when the timer fires
1117 * (supdrvInitAsyncRefineTscTimer).
1118 *
1119 * @param pDevExt Pointer to the device instance data.
1120 */
1121static void supdrvGipInitStartTimerForRefiningInvariantTscFreq(PSUPDRVDEVEXT pDevExt)
1122{
1123 uint64_t u64NanoTS;
1124 RTCCUINTREG fEFlags;
1125 int rc;
1126
1127 /*
1128 * Register a power management callback.
1129 */
1130 pDevExt->fInvTscRefinePowerEvent = false;
1131 rc = RTPowerNotificationRegister(supdrvGipPowerNotificationCallback, pDevExt);
1132 AssertRC(rc); /* ignore */
1133
1134 /*
1135 * Record the TSC and NanoTS as the starting anchor point for refinement
1136 * of the TSC. We try get as close to a clock tick as possible on systems
1137 * which does not provide high resolution time.
1138 */
1139 u64NanoTS = RTTimeSystemNanoTS();
1140 while (RTTimeSystemNanoTS() == u64NanoTS)
1141 ASMNopPause();
1142
1143 fEFlags = ASMIntDisableFlags();
1144 pDevExt->uTscStartInvarTscRefine = ASMReadTSC();
1145 pDevExt->nsStartInvarTscRefine = RTTimeSystemNanoTS();
1146 pDevExt->idCpuInvarTscRefine = RTMpCpuId();
1147 ASMSetFlags(fEFlags);
1148
1149 /*
1150 * Create a timer that runs on the same CPU so we won't have a depencency
1151 * on the TSC-delta and can run in parallel to it. On systems that does not
1152 * implement CPU specific timers we'll apply deltas in the timer callback,
1153 * just like we do for CPUs going offline.
1154 *
1155 * The longer the refinement interval the better the accuracy, at least in
1156 * theory. If it's too long though, ring-3 may already be starting its
1157 * first VMs before we're done. On most systems we will be loading the
1158 * support driver during boot and VMs won't be started for a while yet,
1159 * it is really only a problem during development (especially with
1160 * on-demand driver starting on windows).
1161 *
1162 * To avoid wasting time doing a long supdrvGipInitMeasureTscFreq() call
1163 * to calculate the frequency during driver loading, the timer is set
1164 * to fire after 200 ms the first time. It will then reschedule itself
1165 * to fire every second until GIP_TSC_REFINE_PERIOD_IN_SECS has been
1166 * reached or it notices that there is a user land client with GIP
1167 * mapped (we want a stable frequency for all VMs).
1168 */
1169 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC,
1170 RTTIMER_FLAGS_CPU(RTMpCpuIdToSetIndex(pDevExt->idCpuInvarTscRefine)),
1171 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1172 if (RT_SUCCESS(rc))
1173 {
1174 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1175 if (RT_SUCCESS(rc))
1176 return;
1177 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1178 }
1179
1180 if (rc == VERR_CPU_OFFLINE || rc == VERR_NOT_SUPPORTED)
1181 {
1182 rc = RTTimerCreateEx(&pDevExt->pInvarTscRefineTimer, RT_NS_1SEC, RTTIMER_FLAGS_CPU_ANY,
1183 supdrvGipInitRefineInvariantTscFreqTimer, pDevExt);
1184 if (RT_SUCCESS(rc))
1185 {
1186 rc = RTTimerStart(pDevExt->pInvarTscRefineTimer, 2*RT_NS_100MS);
1187 if (RT_SUCCESS(rc))
1188 return;
1189 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
1190 }
1191 }
1192
1193 pDevExt->pInvarTscRefineTimer = NULL;
1194 OSDBGPRINT(("vboxdrv: Failed to create or start TSC frequency refinement timer: rc=%Rrc\n", rc));
1195}
1196
1197
1198/**
1199 * @callback_method_impl{PFNRTMPWORKER,
1200 * RTMpOnSpecific callback for reading TSC and time on the CPU we started
1201 * the measurements on.}
1202 */
1203static DECLCALLBACK(void) supdrvGipInitReadTscAndNanoTsOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1204{
1205 RTCCUINTREG fEFlags = ASMIntDisableFlags();
1206 uint64_t *puTscStop = (uint64_t *)pvUser1;
1207 uint64_t *pnsStop = (uint64_t *)pvUser2;
1208 RT_NOREF1(idCpu);
1209
1210 *puTscStop = ASMReadTSC();
1211 *pnsStop = RTTimeSystemNanoTS();
1212
1213 ASMSetFlags(fEFlags);
1214}
1215
1216
1217/**
1218 * Measures the TSC frequency of the system.
1219 *
1220 * The TSC frequency can vary on systems which are not reported as invariant.
1221 * On such systems the object of this function is to find out what the nominal,
1222 * maximum TSC frequency under 'normal' CPU operation.
1223 *
1224 * @returns VBox status code.
1225 * @param pGip Pointer to the GIP.
1226 * @param fRough Set if we're doing the rough calculation that the
1227 * TSC measuring code needs, where accuracy isn't all
1228 * that important (too high is better than too low).
1229 * When clear we try for best accuracy that we can
1230 * achieve in reasonably short time.
1231 */
1232static int supdrvGipInitMeasureTscFreq(PSUPGLOBALINFOPAGE pGip, bool fRough)
1233{
1234 uint32_t nsTimerIncr = RTTimerGetSystemGranularity();
1235 int cTriesLeft = fRough ? 4 : 2;
1236 while (cTriesLeft-- > 0)
1237 {
1238 RTCCUINTREG fEFlags;
1239 uint64_t nsStart;
1240 uint64_t nsStop;
1241 uint64_t uTscStart;
1242 uint64_t uTscStop;
1243 RTCPUID idCpuStart;
1244 RTCPUID idCpuStop;
1245
1246 /*
1247 * Synchronize with the host OS clock tick on systems without high
1248 * resolution time API (older Windows version for example).
1249 */
1250 nsStart = RTTimeSystemNanoTS();
1251 while (RTTimeSystemNanoTS() == nsStart)
1252 ASMNopPause();
1253
1254 /*
1255 * Read the TSC and current time, noting which CPU we're on.
1256 */
1257 fEFlags = ASMIntDisableFlags();
1258 uTscStart = ASMReadTSC();
1259 nsStart = RTTimeSystemNanoTS();
1260 idCpuStart = RTMpCpuId();
1261 ASMSetFlags(fEFlags);
1262
1263 /*
1264 * Delay for a while.
1265 */
1266 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1267 {
1268 /*
1269 * Sleep-wait since the TSC frequency is constant, it eases host load.
1270 * Shorter interval produces more variance in the frequency (esp. Windows).
1271 */
1272 uint64_t msElapsed = 0;
1273 uint64_t msDelay = ( ((fRough ? 16 : 200) * RT_NS_1MS + nsTimerIncr - 1) / nsTimerIncr * nsTimerIncr - RT_NS_100US )
1274 / RT_NS_1MS;
1275 do
1276 {
1277 RTThreadSleep((RTMSINTERVAL)(msDelay - msElapsed));
1278 nsStop = RTTimeSystemNanoTS();
1279 msElapsed = (nsStop - nsStart) / RT_NS_1MS;
1280 } while (msElapsed < msDelay);
1281
1282 while (RTTimeSystemNanoTS() == nsStop)
1283 ASMNopPause();
1284 }
1285 else
1286 {
1287 /*
1288 * Busy-wait keeping the frequency up.
1289 */
1290 do
1291 {
1292 ASMNopPause();
1293 nsStop = RTTimeSystemNanoTS();
1294 } while (nsStop - nsStart < RT_NS_100MS);
1295 }
1296
1297 /*
1298 * Read the TSC and time again.
1299 */
1300 fEFlags = ASMIntDisableFlags();
1301 uTscStop = ASMReadTSC();
1302 nsStop = RTTimeSystemNanoTS();
1303 idCpuStop = RTMpCpuId();
1304 ASMSetFlags(fEFlags);
1305
1306 /*
1307 * If the CPU changes, things get a bit complicated and what we
1308 * can get away with depends on the GIP mode / TSC reliability.
1309 */
1310 if (idCpuStop != idCpuStart)
1311 {
1312 bool fDoXCall = false;
1313
1314 /*
1315 * Synchronous TSC mode: we're probably fine as it's unlikely
1316 * that we were rescheduled because of TSC throttling or power
1317 * management reasons, so just go ahead.
1318 */
1319 if (pGip->u32Mode == SUPGIPMODE_SYNC_TSC)
1320 {
1321 /* Probably ok, maybe we should retry once?. */
1322 Assert(pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_NOT_APPLICABLE);
1323 }
1324 /*
1325 * If we're just doing the rough measurement, do the cross call and
1326 * get on with things (we don't have deltas!).
1327 */
1328 else if (fRough)
1329 fDoXCall = true;
1330 /*
1331 * Invariant TSC mode: It doesn't matter if we have delta available
1332 * for both CPUs. That is not something we can assume at this point.
1333 *
1334 * Note! We cannot necessarily trust enmUseTscDelta here because it's
1335 * downgraded after each delta calculation and the delta
1336 * calculations may not be complete yet.
1337 */
1338 else if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
1339 {
1340/** @todo This section of code is never reached atm, consider dropping it later on... */
1341 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1342 {
1343 uint32_t iStartCpuSet = RTMpCpuIdToSetIndex(idCpuStart);
1344 uint32_t iStopCpuSet = RTMpCpuIdToSetIndex(idCpuStop);
1345 uint16_t iStartGipCpu = iStartCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1346 ? pGip->aiCpuFromCpuSetIdx[iStartCpuSet] : UINT16_MAX;
1347 uint16_t iStopGipCpu = iStopCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
1348 ? pGip->aiCpuFromCpuSetIdx[iStopCpuSet] : UINT16_MAX;
1349 int64_t iStartTscDelta = iStartGipCpu < pGip->cCpus ? pGip->aCPUs[iStartGipCpu].i64TSCDelta : INT64_MAX;
1350 int64_t iStopTscDelta = iStopGipCpu < pGip->cCpus ? pGip->aCPUs[iStopGipCpu].i64TSCDelta : INT64_MAX;
1351 if (RT_LIKELY(iStartTscDelta != INT64_MAX && iStopTscDelta != INT64_MAX))
1352 {
1353 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
1354 {
1355 uTscStart -= iStartTscDelta;
1356 uTscStop -= iStopTscDelta;
1357 }
1358 }
1359 /*
1360 * Invalid CPU indexes are not caused by online/offline races, so
1361 * we have to trigger driver load failure if that happens as GIP
1362 * and IPRT assumptions are busted on this system.
1363 */
1364 else if (iStopGipCpu >= pGip->cCpus || iStartGipCpu >= pGip->cCpus)
1365 {
1366 SUPR0Printf("vboxdrv: Unexpected CPU index in supdrvGipInitMeasureTscFreq.\n");
1367 SUPR0Printf("vboxdrv: start: %u, %u, %#llx stop: %u, %u, %#llx\n",
1368 iStartCpuSet, iStartGipCpu, iStartTscDelta, iStopCpuSet, iStopGipCpu, iStopTscDelta);
1369 return VERR_INVALID_CPU_INDEX;
1370 }
1371 /*
1372 * No valid deltas. We retry, if we're on our last retry
1373 * we do the cross call instead just to get a result. The
1374 * frequency will be refined in a few seconds anyway.
1375 */
1376 else if (cTriesLeft > 0)
1377 continue;
1378 else
1379 fDoXCall = true;
1380 }
1381 }
1382 /*
1383 * Asynchronous TSC mode: This is bad, as the reason we usually
1384 * use this mode is to deal with variable TSC frequencies and
1385 * deltas. So, we need to get the TSC from the same CPU as
1386 * started it, we also need to keep that CPU busy. So, retry
1387 * and fall back to the cross call on the last attempt.
1388 */
1389 else
1390 {
1391 Assert(pGip->u32Mode == SUPGIPMODE_ASYNC_TSC);
1392 if (cTriesLeft > 0)
1393 continue;
1394 fDoXCall = true;
1395 }
1396
1397 if (fDoXCall)
1398 {
1399 /*
1400 * Try read the TSC and timestamp on the start CPU.
1401 */
1402 int rc = RTMpOnSpecific(idCpuStart, supdrvGipInitReadTscAndNanoTsOnCpu, &uTscStop, &nsStop);
1403 if (RT_FAILURE(rc) && (!fRough || cTriesLeft > 0))
1404 continue;
1405 }
1406 }
1407
1408 /*
1409 * Calculate the TSC frequency and update it (shared with the refinement timer).
1410 */
1411 supdrvGipInitSetCpuFreq(pGip, nsStop - nsStart, uTscStop - uTscStart, 0);
1412 return VINF_SUCCESS;
1413 }
1414
1415 Assert(!fRough);
1416 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
1417}
1418
1419
1420/**
1421 * Finds our (@a idCpu) entry, or allocates a new one if not found.
1422 *
1423 * @returns Index of the CPU in the cache set.
1424 * @param pGip The GIP.
1425 * @param idCpu The CPU ID.
1426 */
1427static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
1428{
1429 uint32_t i, cTries;
1430
1431 /*
1432 * ASSUMES that CPU IDs are constant.
1433 */
1434 for (i = 0; i < pGip->cCpus; i++)
1435 if (pGip->aCPUs[i].idCpu == idCpu)
1436 return i;
1437
1438 cTries = 0;
1439 do
1440 {
1441 for (i = 0; i < pGip->cCpus; i++)
1442 {
1443 bool fRc;
1444 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
1445 if (fRc)
1446 return i;
1447 }
1448 } while (cTries++ < 32);
1449 AssertReleaseFailed();
1450 return i - 1;
1451}
1452
1453
1454/**
1455 * The calling CPU should be accounted as online, update GIP accordingly.
1456 *
1457 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
1458 *
1459 * @param pDevExt The device extension.
1460 * @param idCpu The CPU ID.
1461 */
1462static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1463{
1464 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1465 int iCpuSet = 0;
1466 uint32_t idApic;
1467 uint32_t i = 0;
1468 uint64_t u64NanoTS = 0;
1469
1470 AssertPtrReturnVoid(pGip);
1471 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1472 AssertRelease(idCpu == RTMpCpuId());
1473 Assert(pGip->cPossibleCpus == RTMpGetCount());
1474
1475 /*
1476 * Do this behind a spinlock with interrupts disabled as this can fire
1477 * on all CPUs simultaneously, see @bugref{6110}.
1478 */
1479 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1480
1481 /*
1482 * Update the globals.
1483 */
1484 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount());
1485 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount());
1486 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1487 if (iCpuSet >= 0)
1488 {
1489 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1490 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
1491 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
1492 }
1493
1494 /*
1495 * Update the entry.
1496 */
1497 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
1498 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1499
1500 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, pGip->u64CpuHz);
1501
1502 idApic = supdrvGipGetApicIdSlow();
1503 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic);
1504 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
1505 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu);
1506
1507 pGip->aCPUs[i].iCpuGroup = 0;
1508 pGip->aCPUs[i].iCpuGroupMember = iCpuSet;
1509#ifdef RT_OS_WINDOWS
1510 supdrvOSGipInitGroupBitsForCpu(pDevExt, pGip, &pGip->aCPUs[i]);
1511#endif
1512
1513 /*
1514 * Update the APIC ID and CPU set index mappings.
1515 */
1516 if (idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))
1517 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i);
1518 else
1519 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: idApic=%#x is out of bounds (%#zx, i=%u, iCpuSet=%d)\n",
1520 idApic, RT_ELEMENTS(pGip->aiCpuFromApicId), i, iCpuSet));
1521 if ((unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
1522 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
1523 else
1524 LogRelMax(64, ("supdrvGipMpEventOnlineOrInitOnCpu: iCpuSet=%d is out of bounds (%#zx, i=%u, idApic=%d)\n",
1525 iCpuSet, RT_ELEMENTS(pGip->aiCpuFromApicId), i, idApic));
1526
1527 /* Add this CPU to this set of CPUs we need to calculate the TSC-delta for. */
1528 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, RTMpCpuIdToSetIndex(idCpu));
1529
1530 /* Update the Mp online/offline counter. */
1531 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1532
1533 /* Commit it. */
1534 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
1535
1536 RTSpinlockRelease(pDevExt->hGipSpinlock);
1537}
1538
1539
1540/**
1541 * RTMpOnSpecific callback wrapper for supdrvGipMpEventOnlineOrInitOnCpu().
1542 *
1543 * @param idCpu The CPU ID we are running on.
1544 * @param pvUser1 Opaque pointer to the device instance data.
1545 * @param pvUser2 Not used.
1546 */
1547static DECLCALLBACK(void) supdrvGipMpEventOnlineCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1548{
1549 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser1;
1550 NOREF(pvUser2);
1551 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1552}
1553
1554
1555/**
1556 * The CPU should be accounted as offline, update the GIP accordingly.
1557 *
1558 * This is used by supdrvGipMpEvent.
1559 *
1560 * @param pDevExt The device extension.
1561 * @param idCpu The CPU ID.
1562 */
1563static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
1564{
1565 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1566 int iCpuSet;
1567 unsigned i;
1568
1569 AssertPtrReturnVoid(pGip);
1570 RTSpinlockAcquire(pDevExt->hGipSpinlock);
1571
1572 iCpuSet = RTMpCpuIdToSetIndex(idCpu);
1573 AssertReturnVoid(iCpuSet >= 0);
1574
1575 i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
1576 AssertReturnVoid(i < pGip->cCpus);
1577 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
1578
1579 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
1580 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
1581
1582 /* Update the Mp online/offline counter. */
1583 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
1584
1585 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1586 {
1587 /* Reset the TSC delta, we will recalculate it lazily. */
1588 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
1589 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
1590 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
1591 }
1592
1593 /* Commit it. */
1594 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
1595
1596 RTSpinlockRelease(pDevExt->hGipSpinlock);
1597}
1598
1599
1600/**
1601 * Multiprocessor event notification callback.
1602 *
1603 * This is used to make sure that the GIP master gets passed on to
1604 * another CPU. It also updates the associated CPU data.
1605 *
1606 * @param enmEvent The event.
1607 * @param idCpu The cpu it applies to.
1608 * @param pvUser Pointer to the device extension.
1609 */
1610static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
1611{
1612 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
1613 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
1614
1615 if (pGip)
1616 {
1617 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
1618 switch (enmEvent)
1619 {
1620 case RTMPEVENT_ONLINE:
1621 {
1622 RTThreadPreemptDisable(&PreemptState);
1623 if (idCpu == RTMpCpuId())
1624 {
1625 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
1626 RTThreadPreemptRestore(&PreemptState);
1627 }
1628 else
1629 {
1630 RTThreadPreemptRestore(&PreemptState);
1631 RTMpOnSpecific(idCpu, supdrvGipMpEventOnlineCallback, pDevExt, NULL /* pvUser2 */);
1632 }
1633
1634 /*
1635 * Recompute TSC-delta for the newly online'd CPU.
1636 */
1637 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
1638 {
1639#ifdef SUPDRV_USE_TSC_DELTA_THREAD
1640 supdrvTscDeltaThreadStartMeasurement(pDevExt, false /* fForceAll */);
1641#else
1642 uint32_t iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
1643 supdrvTscMeasureDeltaOne(pDevExt, iCpu);
1644#endif
1645 }
1646 break;
1647 }
1648
1649 case RTMPEVENT_OFFLINE:
1650 supdrvGipMpEventOffline(pDevExt, idCpu);
1651 break;
1652 }
1653 }
1654
1655 /*
1656 * Make sure there is a master GIP.
1657 */
1658 if (enmEvent == RTMPEVENT_OFFLINE)
1659 {
1660 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
1661 if (idGipMaster == idCpu)
1662 {
1663 /*
1664 * The GIP master is going offline, find a new one.
1665 */
1666 bool fIgnored;
1667 unsigned i;
1668 RTCPUID idNewGipMaster = NIL_RTCPUID;
1669 RTCPUSET OnlineCpus;
1670 RTMpGetOnlineSet(&OnlineCpus);
1671
1672 for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
1673 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
1674 {
1675 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
1676 if (idCurCpu != idGipMaster)
1677 {
1678 idNewGipMaster = idCurCpu;
1679 break;
1680 }
1681 }
1682
1683 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
1684 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
1685 NOREF(fIgnored);
1686 }
1687 }
1688}
1689
1690
1691/**
1692 * On CPU initialization callback for RTMpOnAll.
1693 *
1694 * @param idCpu The CPU ID.
1695 * @param pvUser1 The device extension.
1696 * @param pvUser2 The GIP.
1697 */
1698static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1699{
1700 /* This is good enough, even though it will update some of the globals a
1701 bit to much. */
1702 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
1703 NOREF(pvUser2);
1704}
1705
1706
1707/**
1708 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
1709 *
1710 * @param idCpu Ignored.
1711 * @param pvUser1 Where to put the TSC.
1712 * @param pvUser2 Ignored.
1713 */
1714static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
1715{
1716 Assert(RTMpCpuIdToSetIndex(idCpu) == (intptr_t)pvUser2);
1717 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
1718 RT_NOREF2(idCpu, pvUser2);
1719}
1720
1721
1722/**
1723 * Determine if Async GIP mode is required because of TSC drift.
1724 *
1725 * When using the default/normal timer code it is essential that the time stamp counter
1726 * (TSC) runs never backwards, that is, a read operation to the counter should return
1727 * a bigger value than any previous read operation. This is guaranteed by the latest
1728 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
1729 * case we have to choose the asynchronous timer mode.
1730 *
1731 * @param poffMin Pointer to the determined difference between different
1732 * cores (optional, can be NULL).
1733 * @return false if the time stamp counters appear to be synchronized, true otherwise.
1734 */
1735static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
1736{
1737 /*
1738 * Just iterate all the cpus 8 times and make sure that the TSC is
1739 * ever increasing. We don't bother taking TSC rollover into account.
1740 */
1741 int iEndCpu = RTMpGetArraySize();
1742 int iCpu;
1743 int cLoops = 8;
1744 bool fAsync = false;
1745 int rc = VINF_SUCCESS;
1746 uint64_t offMax = 0;
1747 uint64_t offMin = ~(uint64_t)0;
1748 uint64_t PrevTsc = ASMReadTSC();
1749
1750 while (cLoops-- > 0)
1751 {
1752 for (iCpu = 0; iCpu < iEndCpu; iCpu++)
1753 {
1754 uint64_t CurTsc;
1755 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker,
1756 &CurTsc, (void *)(uintptr_t)iCpu);
1757 if (RT_SUCCESS(rc))
1758 {
1759 if (CurTsc <= PrevTsc)
1760 {
1761 fAsync = true;
1762 offMin = offMax = PrevTsc - CurTsc;
1763 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
1764 iCpu, cLoops, CurTsc, PrevTsc));
1765 break;
1766 }
1767
1768 /* Gather statistics (except the first time). */
1769 if (iCpu != 0 || cLoops != 7)
1770 {
1771 uint64_t off = CurTsc - PrevTsc;
1772 if (off < offMin)
1773 offMin = off;
1774 if (off > offMax)
1775 offMax = off;
1776 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
1777 }
1778
1779 /* Next */
1780 PrevTsc = CurTsc;
1781 }
1782 else if (rc == VERR_NOT_SUPPORTED)
1783 break;
1784 else
1785 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
1786 }
1787
1788 /* broke out of the loop. */
1789 if (iCpu < iEndCpu)
1790 break;
1791 }
1792
1793 if (poffMin)
1794 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
1795 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
1796 fAsync, iEndCpu, rc, offMin, offMax));
1797#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
1798 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
1799#endif
1800 return fAsync;
1801}
1802
1803
1804/**
1805 * supdrvGipInit() worker that determines the GIP TSC mode.
1806 *
1807 * @returns The most suitable TSC mode.
1808 * @param pDevExt Pointer to the device instance data.
1809 */
1810static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
1811{
1812#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
1813 uint64_t u64DiffCoresIgnored;
1814 uint32_t uEAX, uEBX, uECX, uEDX;
1815
1816 /*
1817 * Establish whether the CPU advertises TSC as invariant, we need that in
1818 * a couple of places below.
1819 */
1820 bool fInvariantTsc = false;
1821 if (ASMHasCpuId())
1822 {
1823 uEAX = ASMCpuId_EAX(0x80000000);
1824 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1825 {
1826 uEDX = ASMCpuId_EDX(0x80000007);
1827 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
1828 fInvariantTsc = true;
1829 }
1830 }
1831
1832 /*
1833 * On single CPU systems, we don't need to consider ASYNC mode.
1834 */
1835 if (RTMpGetCount() <= 1)
1836 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
1837
1838 /*
1839 * Allow the user and/or OS specific bits to force async mode.
1840 */
1841 if (supdrvOSGetForcedAsyncTscMode(pDevExt))
1842 return SUPGIPMODE_ASYNC_TSC;
1843
1844 /*
1845 * Use invariant mode if the CPU says TSC is invariant.
1846 */
1847 if (fInvariantTsc)
1848 return SUPGIPMODE_INVARIANT_TSC;
1849
1850 /*
1851 * TSC is not invariant and we're on SMP, this presents two problems:
1852 *
1853 * (1) There might be a skew between the CPU, so that cpu0
1854 * returns a TSC that is slightly different from cpu1.
1855 * This screw may be due to (2), bad TSC initialization
1856 * or slightly different TSC rates.
1857 *
1858 * (2) Power management (and other things) may cause the TSC
1859 * to run at a non-constant speed, and cause the speed
1860 * to be different on the cpus. This will result in (1).
1861 *
1862 * If any of the above is detected, we will have to use ASYNC mode.
1863 */
1864 /* (1). Try check for current differences between the cpus. */
1865 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
1866 return SUPGIPMODE_ASYNC_TSC;
1867
1868 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
1869 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
1870 if ( RTX86IsValidStdRange(uEAX)
1871 && (RTX86IsAmdCpu(uEBX, uECX, uEDX) || RTX86IsHygonCpu(uEBX, uECX, uEDX)) )
1872 {
1873 /* Check for APM support. */
1874 uEAX = ASMCpuId_EAX(0x80000000);
1875 if (RTX86IsValidExtRange(uEAX) && uEAX >= 0x80000007)
1876 {
1877 uEDX = ASMCpuId_EDX(0x80000007);
1878 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
1879 return SUPGIPMODE_ASYNC_TSC;
1880 }
1881 }
1882
1883 return SUPGIPMODE_SYNC_TSC;
1884
1885#elif defined(RT_ARCH_ARM64)
1886 RT_NOREF(pDevExt);
1887 return SUPGIPMODE_INVARIANT_TSC;
1888
1889#else
1890# error "Port me"
1891#endif
1892}
1893
1894
1895/**
1896 * Initializes per-CPU GIP information.
1897 *
1898 * @param pGip Pointer to the GIP.
1899 * @param pCpu Pointer to which GIP CPU to initialize.
1900 * @param u64NanoTS The current nanosecond timestamp.
1901 * @param uCpuHz The CPU frequency to set, 0 if the caller doesn't know.
1902 */
1903static void supdrvGipInitCpu(PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS, uint64_t uCpuHz)
1904{
1905 pCpu->u32TransactionId = 2;
1906 pCpu->u64NanoTS = u64NanoTS;
1907 pCpu->u64TSC = ASMReadTSC();
1908 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD;
1909 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
1910
1911 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
1912 ASMAtomicWriteU32(&pCpu->idCpu, NIL_RTCPUID);
1913 ASMAtomicWriteS16(&pCpu->iCpuSet, -1);
1914 ASMAtomicWriteU16(&pCpu->iCpuGroup, 0);
1915 ASMAtomicWriteU16(&pCpu->iCpuGroupMember, UINT16_MAX);
1916 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX);
1917 ASMAtomicWriteU32(&pCpu->iReservedForNumaNode, 0);
1918
1919 /*
1920 * The first time we're called, we don't have a CPU frequency handy,
1921 * so pretend it's a 4 GHz CPU. On CPUs that are online, we'll get
1922 * called again and at that point we have a more plausible CPU frequency
1923 * value handy. The frequency history will also be adjusted again on
1924 * the 2nd timer callout (maybe we can skip that now?).
1925 */
1926 if (!uCpuHz)
1927 {
1928 pCpu->u64CpuHz = _4G - 1;
1929 pCpu->u32UpdateIntervalTSC = (uint32_t)((_4G - 1) / pGip->u32UpdateHz);
1930 }
1931 else
1932 {
1933 pCpu->u64CpuHz = uCpuHz;
1934 pCpu->u32UpdateIntervalTSC = (uint32_t)(uCpuHz / pGip->u32UpdateHz);
1935 }
1936 pCpu->au32TSCHistory[0]
1937 = pCpu->au32TSCHistory[1]
1938 = pCpu->au32TSCHistory[2]
1939 = pCpu->au32TSCHistory[3]
1940 = pCpu->au32TSCHistory[4]
1941 = pCpu->au32TSCHistory[5]
1942 = pCpu->au32TSCHistory[6]
1943 = pCpu->au32TSCHistory[7]
1944 = pCpu->u32UpdateIntervalTSC;
1945}
1946
1947
1948/**
1949 * Initializes the GIP data.
1950 *
1951 * @returns VBox status code.
1952 * @param pDevExt Pointer to the device instance data.
1953 * @param pGip Pointer to the read-write kernel mapping of the GIP.
1954 * @param HCPhys The physical address of the GIP.
1955 * @param u64NanoTS The current nanosecond timestamp.
1956 * @param uUpdateHz The update frequency.
1957 * @param uUpdateIntervalNS The update interval in nanoseconds.
1958 * @param cCpus The CPU count.
1959 * @param cbGipCpuGroups The supdrvOSGipGetGroupTableSize return value we
1960 * used when allocating the GIP structure.
1961 */
1962static int supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
1963 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS,
1964 unsigned cCpus, size_t cbGipCpuGroups)
1965{
1966 size_t const cbGip = RT_ALIGN_Z(RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups, PAGE_SIZE);
1967 unsigned i;
1968#ifdef DEBUG_DARWIN_GIP
1969 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1970#else
1971 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
1972#endif
1973
1974 /*
1975 * Initialize the structure.
1976 */
1977 memset(pGip, 0, cbGip);
1978
1979 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC;
1980 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION;
1981 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt);
1982 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
1983 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
1984 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
1985 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
1986 else
1987 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
1988 pGip->cCpus = (uint16_t)cCpus;
1989 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE);
1990 pGip->u32UpdateHz = uUpdateHz;
1991 pGip->u32UpdateIntervalNS = uUpdateIntervalNS;
1992 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID;
1993 RTCpuSetEmpty(&pGip->OnlineCpuSet);
1994 RTCpuSetEmpty(&pGip->PresentCpuSet);
1995 RTMpGetSet(&pGip->PossibleCpuSet);
1996 pGip->cOnlineCpus = RTMpGetOnlineCount();
1997 pGip->cPresentCpus = RTMpGetPresentCount();
1998 pGip->cPossibleCpus = RTMpGetCount();
1999 pGip->cPossibleCpuGroups = 1;
2000 pGip->idCpuMax = RTMpGetMaxCpuId();
2001 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
2002 pGip->aiCpuFromApicId[i] = UINT16_MAX;
2003 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
2004 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
2005 for (i = 0; i < RT_ELEMENTS(pGip->aoffCpuGroup); i++)
2006 pGip->aoffCpuGroup[i] = UINT32_MAX;
2007 for (i = 0; i < cCpus; i++)
2008 supdrvGipInitCpu(pGip, &pGip->aCPUs[i], u64NanoTS, 0 /*uCpuHz*/);
2009#ifdef RT_OS_WINDOWS
2010 int rc = supdrvOSInitGipGroupTable(pDevExt, pGip, cbGipCpuGroups);
2011 AssertRCReturn(rc, rc);
2012#endif
2013
2014 /*
2015 * Link it to the device extension.
2016 */
2017 pDevExt->pGip = pGip;
2018 pDevExt->HCPhysGip = HCPhys;
2019 pDevExt->cGipUsers = 0;
2020
2021 return VINF_SUCCESS;
2022}
2023
2024
2025/**
2026 * Creates the GIP.
2027 *
2028 * @returns VBox status code.
2029 * @param pDevExt Instance data. GIP stuff may be updated.
2030 */
2031int VBOXCALL supdrvGipCreate(PSUPDRVDEVEXT pDevExt)
2032{
2033 PSUPGLOBALINFOPAGE pGip;
2034 size_t cbGip;
2035 size_t cbGipCpuGroups;
2036 RTHCPHYS HCPhysGip;
2037 uint32_t u32SystemResolution;
2038 uint32_t u32Interval;
2039 uint32_t u32MinInterval;
2040 uint32_t uMod;
2041 unsigned cCpus;
2042 int rc;
2043
2044 LogFlow(("supdrvGipCreate:\n"));
2045
2046 /*
2047 * Assert order.
2048 */
2049 Assert(pDevExt->u32SystemTimerGranularityGrant == 0);
2050 Assert(pDevExt->GipMemObj == NIL_RTR0MEMOBJ);
2051 Assert(!pDevExt->pGipTimer);
2052#ifdef SUPDRV_USE_MUTEX_FOR_GIP
2053 Assert(pDevExt->mtxGip != NIL_RTSEMMUTEX);
2054 Assert(pDevExt->mtxTscDelta != NIL_RTSEMMUTEX);
2055#else
2056 Assert(pDevExt->mtxGip != NIL_RTSEMFASTMUTEX);
2057 Assert(pDevExt->mtxTscDelta != NIL_RTSEMFASTMUTEX);
2058#endif
2059
2060 /*
2061 * Check the CPU count.
2062 */
2063 cCpus = RTMpGetArraySize();
2064 if (cCpus > RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)))
2065 {
2066 SUPR0Printf("VBoxDrv: Too many CPUs (%u) for the GIP (max %u)\n", cCpus, RT_MIN(RTCPUSET_MAX_CPUS, RT_ELEMENTS(pGip->aiCpuFromApicId)));
2067 return VERR_TOO_MANY_CPUS;
2068 }
2069
2070 /*
2071 * Allocate a contiguous set of pages with a default kernel mapping.
2072 */
2073#ifdef RT_OS_WINDOWS
2074 cbGipCpuGroups = supdrvOSGipGetGroupTableSize(pDevExt);
2075#else
2076 cbGipCpuGroups = 0;
2077#endif
2078 cbGip = RT_UOFFSETOF_DYN(SUPGLOBALINFOPAGE, aCPUs[cCpus]) + cbGipCpuGroups;
2079 rc = RTR0MemObjAllocCont(&pDevExt->GipMemObj, cbGip, NIL_RTHCPHYS /*PhysHighest*/, false /*fExecutable*/);
2080 if (RT_FAILURE(rc))
2081 {
2082 OSDBGPRINT(("supdrvGipCreate: failed to allocate the GIP page. rc=%d\n", rc));
2083 return rc;
2084 }
2085 pGip = (PSUPGLOBALINFOPAGE)RTR0MemObjAddress(pDevExt->GipMemObj); AssertPtr(pGip);
2086 HCPhysGip = RTR0MemObjGetPagePhysAddr(pDevExt->GipMemObj, 0); Assert(HCPhysGip != NIL_RTHCPHYS);
2087
2088 /*
2089 * Find a reasonable update interval and initialize the structure.
2090 */
2091 supdrvGipRequestHigherTimerFrequencyFromSystem(pDevExt);
2092 /** @todo figure out why using a 100Ms interval upsets timekeeping in VMs.
2093 * See @bugref{6710}. */
2094 u32MinInterval = RT_NS_10MS;
2095 u32SystemResolution = RTTimerGetSystemGranularity();
2096 u32Interval = u32MinInterval;
2097 uMod = u32MinInterval % u32SystemResolution;
2098 if (uMod)
2099 u32Interval += u32SystemResolution - uMod;
2100
2101 rc = supdrvGipInit(pDevExt, pGip, HCPhysGip, RTTimeSystemNanoTS(), RT_NS_1SEC / u32Interval /*=Hz*/, u32Interval,
2102 cCpus, cbGipCpuGroups);
2103
2104 /*
2105 * Important sanity check... (Sets rc)
2106 */
2107 if (RT_UNLIKELY( pGip->enmUseTscDelta == SUPGIPUSETSCDELTA_ZERO_CLAIMED
2108 && pGip->u32Mode == SUPGIPMODE_ASYNC_TSC
2109 && !supdrvOSGetForcedAsyncTscMode(pDevExt)))
2110 {
2111 OSDBGPRINT(("supdrvGipCreate: Host-OS/user claims the TSC-deltas are zero but we detected async. TSC! Bad.\n"));
2112 rc = VERR_INTERNAL_ERROR_2;
2113 }
2114
2115 /* It doesn't make sense to do TSC-delta detection on systems we detect as async. */
2116 AssertStmt( pGip->u32Mode != SUPGIPMODE_ASYNC_TSC
2117 || pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED,
2118 rc = VERR_INTERNAL_ERROR_3);
2119
2120 /*
2121 * Do the TSC frequency measurements.
2122 *
2123 * If we're in invariant TSC mode, just to a quick preliminary measurement
2124 * that the TSC-delta measurement code can use to yield cross calls.
2125 *
2126 * If we're in any of the other two modes, neither which require MP init,
2127 * notifications or deltas for the job, do the full measurement now so
2128 * that supdrvGipInitOnCpu() can populate the TSC interval and history
2129 * array with more reasonable values.
2130 */
2131 if (RT_SUCCESS(rc))
2132 {
2133 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
2134 {
2135 rc = supdrvGipInitMeasureTscFreq(pGip, true /*fRough*/); /* cannot fail */
2136 supdrvGipInitStartTimerForRefiningInvariantTscFreq(pDevExt);
2137 }
2138 else
2139 rc = supdrvGipInitMeasureTscFreq(pGip, false /*fRough*/);
2140 if (RT_SUCCESS(rc))
2141 {
2142 /*
2143 * Start TSC-delta measurement thread before we start getting MP
2144 * events that will try kick it into action (includes the
2145 * RTMpOnAll/supdrvGipInitOnCpu call below).
2146 */
2147 RTCpuSetEmpty(&pDevExt->TscDeltaCpuSet);
2148 RTCpuSetEmpty(&pDevExt->TscDeltaObtainedCpuSet);
2149#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2150 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2151 rc = supdrvTscDeltaThreadInit(pDevExt);
2152#endif
2153 if (RT_SUCCESS(rc))
2154 {
2155 rc = RTMpNotificationRegister(supdrvGipMpEvent, pDevExt);
2156 if (RT_SUCCESS(rc))
2157 {
2158 /*
2159 * Do GIP initialization on all online CPUs. Wake up the
2160 * TSC-delta thread afterwards.
2161 */
2162 rc = RTMpOnAll(supdrvGipInitOnCpu, pDevExt, pGip);
2163 if (RT_SUCCESS(rc))
2164 {
2165#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2166 supdrvTscDeltaThreadStartMeasurement(pDevExt, true /* fForceAll */);
2167#else
2168 uint16_t iCpu;
2169 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
2170 {
2171 /*
2172 * Measure the TSC deltas now that we have MP notifications.
2173 */
2174 int cTries = 5;
2175 do
2176 {
2177 rc = supdrvTscMeasureInitialDeltas(pDevExt);
2178 if ( rc != VERR_TRY_AGAIN
2179 && rc != VERR_CPU_OFFLINE)
2180 break;
2181 } while (--cTries > 0);
2182 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2183 Log(("supdrvTscDeltaInit: cpu[%u] delta %lld\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta));
2184 }
2185 else
2186 {
2187 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
2188 AssertMsg(!pGip->aCPUs[iCpu].i64TSCDelta, ("iCpu=%u %lld mode=%d\n", iCpu, pGip->aCPUs[iCpu].i64TSCDelta, pGip->u32Mode));
2189 }
2190 if (RT_SUCCESS(rc))
2191#endif
2192 {
2193 /*
2194 * Create the timer.
2195 * If CPU_ALL isn't supported we'll have to fall back to synchronous mode.
2196 */
2197 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
2198 {
2199 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
2200 supdrvGipAsyncTimer, pDevExt);
2201 if (rc == VERR_NOT_SUPPORTED)
2202 {
2203 OSDBGPRINT(("supdrvGipCreate: omni timer not supported, falling back to synchronous mode\n"));
2204 pGip->u32Mode = SUPGIPMODE_SYNC_TSC;
2205 }
2206 }
2207 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2208 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, 0 /* fFlags */,
2209 supdrvGipSyncAndInvariantTimer, pDevExt);
2210 if (RT_SUCCESS(rc))
2211 {
2212 /*
2213 * We're good.
2214 */
2215 Log(("supdrvGipCreate: %u ns interval.\n", u32Interval));
2216 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2217
2218 g_pSUPGlobalInfoPage = pGip;
2219 return VINF_SUCCESS;
2220 }
2221
2222 OSDBGPRINT(("supdrvGipCreate: failed create GIP timer at %u ns interval. rc=%Rrc\n", u32Interval, rc));
2223 Assert(!pDevExt->pGipTimer);
2224 }
2225 }
2226 else
2227 OSDBGPRINT(("supdrvGipCreate: RTMpOnAll failed. rc=%Rrc\n", rc));
2228 }
2229 else
2230 OSDBGPRINT(("supdrvGipCreate: failed to register MP event notfication. rc=%Rrc\n", rc));
2231 }
2232 else
2233 OSDBGPRINT(("supdrvGipCreate: supdrvTscDeltaInit failed. rc=%Rrc\n", rc));
2234 }
2235 else
2236 OSDBGPRINT(("supdrvGipCreate: supdrvTscMeasureInitialDeltas failed. rc=%Rrc\n", rc));
2237 }
2238
2239 /* Releases timer frequency increase too. */
2240 supdrvGipDestroy(pDevExt);
2241 return rc;
2242}
2243
2244
2245/**
2246 * Invalidates the GIP data upon termination.
2247 *
2248 * @param pGip Pointer to the read-write kernel mapping of the GIP.
2249 */
2250static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
2251{
2252 unsigned i;
2253 pGip->u32Magic = 0;
2254 for (i = 0; i < pGip->cCpus; i++)
2255 {
2256 pGip->aCPUs[i].u64NanoTS = 0;
2257 pGip->aCPUs[i].u64TSC = 0;
2258 pGip->aCPUs[i].iTSCHistoryHead = 0;
2259 pGip->aCPUs[i].u64TSCSample = 0;
2260 pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
2261 }
2262}
2263
2264
2265/**
2266 * Terminates the GIP.
2267 *
2268 * @param pDevExt Instance data. GIP stuff may be updated.
2269 */
2270void VBOXCALL supdrvGipDestroy(PSUPDRVDEVEXT pDevExt)
2271{
2272 int rc;
2273#ifdef DEBUG_DARWIN_GIP
2274 OSDBGPRINT(("supdrvGipDestroy: pDevExt=%p pGip=%p pGipTimer=%p GipMemObj=%p\n", pDevExt,
2275 pDevExt->GipMemObj != NIL_RTR0MEMOBJ ? RTR0MemObjAddress(pDevExt->GipMemObj) : NULL,
2276 pDevExt->pGipTimer, pDevExt->GipMemObj));
2277#endif
2278
2279 /*
2280 * Stop receiving MP notifications before tearing anything else down.
2281 */
2282 RTMpNotificationDeregister(supdrvGipMpEvent, pDevExt);
2283
2284#ifdef SUPDRV_USE_TSC_DELTA_THREAD
2285 /*
2286 * Terminate the TSC-delta measurement thread and resources.
2287 */
2288 supdrvTscDeltaTerm(pDevExt);
2289#endif
2290
2291 /*
2292 * Destroy the TSC-refinement timer.
2293 */
2294 if (pDevExt->pInvarTscRefineTimer)
2295 {
2296 RTTimerDestroy(pDevExt->pInvarTscRefineTimer);
2297 pDevExt->pInvarTscRefineTimer = NULL;
2298 }
2299
2300 /*
2301 * Invalid the GIP data.
2302 */
2303 if (pDevExt->pGip)
2304 {
2305 supdrvGipTerm(pDevExt->pGip);
2306 pDevExt->pGip = NULL;
2307 }
2308 g_pSUPGlobalInfoPage = NULL;
2309
2310 /*
2311 * Destroy the timer and free the GIP memory object.
2312 */
2313 if (pDevExt->pGipTimer)
2314 {
2315 rc = RTTimerDestroy(pDevExt->pGipTimer); AssertRC(rc);
2316 pDevExt->pGipTimer = NULL;
2317 }
2318
2319 if (pDevExt->GipMemObj != NIL_RTR0MEMOBJ)
2320 {
2321 rc = RTR0MemObjFree(pDevExt->GipMemObj, true /* free mappings */); AssertRC(rc);
2322 pDevExt->GipMemObj = NIL_RTR0MEMOBJ;
2323 }
2324
2325 /*
2326 * Finally, make sure we've release the system timer resolution request
2327 * if one actually succeeded and is still pending.
2328 */
2329 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
2330}
2331
2332
2333
2334
2335/*
2336 *
2337 *
2338 * GIP Update Timer Related Code
2339 * GIP Update Timer Related Code
2340 * GIP Update Timer Related Code
2341 *
2342 *
2343 */
2344
2345
2346/**
2347 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
2348 * updates all the per cpu data except the transaction id.
2349 *
2350 * @param pDevExt The device extension.
2351 * @param pGipCpu Pointer to the per cpu data.
2352 * @param u64NanoTS The current time stamp.
2353 * @param u64TSC The current TSC.
2354 * @param iTick The current timer tick.
2355 *
2356 * @remarks Can be called with interrupts disabled!
2357 */
2358static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
2359{
2360 uint64_t u64TSCDelta;
2361 bool fUpdateCpuHz;
2362 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2363 AssertPtrReturnVoid(pGip);
2364
2365 /* Delta between this and the previous update. */
2366 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
2367
2368 /*
2369 * Update the NanoTS.
2370 */
2371 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
2372
2373 /*
2374 * Calc TSC delta.
2375 */
2376 u64TSCDelta = u64TSC - pGipCpu->u64TSC;
2377 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
2378
2379 /*
2380 * Determine if we need to update the CPU (TSC) frequency calculation.
2381 *
2382 * We don't need to keep recalculating the frequency when it's invariant,
2383 * unless the special tstGIP-2 testing mode is enabled.
2384 */
2385 fUpdateCpuHz = pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC;
2386 if (!(pGip->fFlags & SUPGIP_FLAGS_TESTING))
2387 { /* likely*/ }
2388 else
2389 {
2390 uint32_t fGipFlags = pGip->fFlags;
2391 if (fGipFlags & (SUPGIP_FLAGS_TESTING_ENABLE | SUPGIP_FLAGS_TESTING_START))
2392 {
2393 if (fGipFlags & SUPGIP_FLAGS_TESTING_START)
2394 {
2395 /* Cache the TSC frequency before forcing updates due to test mode. */
2396 if (!fUpdateCpuHz)
2397 pDevExt->uGipTestModeInvariantCpuHz = pGip->aCPUs[0].u64CpuHz;
2398 ASMAtomicAndU32(&pGip->fFlags, ~SUPGIP_FLAGS_TESTING_START);
2399 }
2400 fUpdateCpuHz = true;
2401 }
2402 else if (fGipFlags & SUPGIP_FLAGS_TESTING_STOP)
2403 {
2404 /* Restore the cached TSC frequency if any. */
2405 if (!fUpdateCpuHz)
2406 {
2407 Assert(pDevExt->uGipTestModeInvariantCpuHz);
2408 ASMAtomicWriteU64(&pGip->aCPUs[0].u64CpuHz, pDevExt->uGipTestModeInvariantCpuHz);
2409 }
2410 ASMAtomicAndU32(&pGip->fFlags, ~(SUPGIP_FLAGS_TESTING_STOP | SUPGIP_FLAGS_TESTING));
2411 }
2412 }
2413
2414 /*
2415 * Calculate the CPU (TSC) frequency if necessary.
2416 */
2417 if (fUpdateCpuHz)
2418 {
2419 uint64_t u64CpuHz;
2420 uint32_t u32UpdateIntervalTSC;
2421 uint32_t u32UpdateIntervalTSCSlack;
2422 uint32_t u32TransactionId;
2423 unsigned iTSCHistoryHead;
2424
2425 if (u64TSCDelta >> 32)
2426 {
2427 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
2428 pGipCpu->cErrors++;
2429 }
2430
2431 /*
2432 * On the 2nd and 3rd callout, reset the history with the current TSC
2433 * interval since the values entered by supdrvGipInit are totally off.
2434 * The interval on the 1st callout completely unreliable, the 2nd is a bit
2435 * better, while the 3rd should be most reliable.
2436 */
2437 /** @todo Could we drop this now that we initializes the history
2438 * with nominal TSC frequency values? */
2439 u32TransactionId = pGipCpu->u32TransactionId;
2440 if (RT_UNLIKELY( ( u32TransactionId == 5
2441 || u32TransactionId == 7)
2442 && ( iTick == 2
2443 || iTick == 3) ))
2444 {
2445 unsigned i;
2446 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
2447 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
2448 }
2449
2450 /*
2451 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
2452 * Wait until we have at least one full history since the above history reset. The
2453 * assumption is that the majority of the previous history values will be tolerable.
2454 * See @bugref{6710#c67}.
2455 */
2456 /** @todo Could we drop the fudging there now that we initializes the history
2457 * with nominal TSC frequency values? */
2458 if ( u32TransactionId > 23 /* 7 + (8 * 2) */
2459 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2460 {
2461 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
2462 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
2463 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
2464 {
2465 uint32_t u32;
2466 u32 = pGipCpu->au32TSCHistory[0];
2467 u32 += pGipCpu->au32TSCHistory[1];
2468 u32 += pGipCpu->au32TSCHistory[2];
2469 u32 += pGipCpu->au32TSCHistory[3];
2470 u32 >>= 2;
2471 u64TSCDelta = pGipCpu->au32TSCHistory[4];
2472 u64TSCDelta += pGipCpu->au32TSCHistory[5];
2473 u64TSCDelta += pGipCpu->au32TSCHistory[6];
2474 u64TSCDelta += pGipCpu->au32TSCHistory[7];
2475 u64TSCDelta >>= 2;
2476 u64TSCDelta += u32;
2477 u64TSCDelta >>= 1;
2478 }
2479 }
2480
2481 /*
2482 * TSC History.
2483 */
2484 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
2485 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
2486 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
2487 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
2488
2489 /*
2490 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
2491 *
2492 * On Windows, we have an occasional (but recurring) sour value that messed up
2493 * the history but taking only 1 interval reduces the precision overall.
2494 */
2495 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
2496 || pGip->u32UpdateHz >= 1000)
2497 {
2498 uint32_t u32;
2499 u32 = pGipCpu->au32TSCHistory[0];
2500 u32 += pGipCpu->au32TSCHistory[1];
2501 u32 += pGipCpu->au32TSCHistory[2];
2502 u32 += pGipCpu->au32TSCHistory[3];
2503 u32 >>= 2;
2504 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4];
2505 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
2506 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
2507 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
2508 u32UpdateIntervalTSC >>= 2;
2509 u32UpdateIntervalTSC += u32;
2510 u32UpdateIntervalTSC >>= 1;
2511
2512 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
2513 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
2514 }
2515 else if (pGip->u32UpdateHz >= 90)
2516 {
2517 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2518 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
2519 u32UpdateIntervalTSC >>= 1;
2520
2521 /* value chosen on a 2GHz thinkpad running windows */
2522 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
2523 }
2524 else
2525 {
2526 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta;
2527
2528 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
2529 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
2530 }
2531 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
2532
2533 /*
2534 * CpuHz.
2535 */
2536 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
2537 u64CpuHz /= pGip->u32UpdateIntervalNS;
2538 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
2539 }
2540}
2541
2542
2543/**
2544 * Updates the GIP.
2545 *
2546 * @param pDevExt The device extension.
2547 * @param u64NanoTS The current nanosecond timestamp.
2548 * @param u64TSC The current TSC timestamp.
2549 * @param idCpu The CPU ID.
2550 * @param iTick The current timer tick.
2551 *
2552 * @remarks Can be called with interrupts disabled!
2553 */
2554static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
2555{
2556 /*
2557 * Determine the relevant CPU data.
2558 */
2559 PSUPGIPCPU pGipCpu;
2560 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2561 AssertPtrReturnVoid(pGip);
2562
2563 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
2564 pGipCpu = &pGip->aCPUs[0];
2565 else
2566 {
2567 unsigned iCpu;
2568 uint32_t idApic = supdrvGipGetApicId(pGip);
2569 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
2570 { /* likely */ }
2571 else
2572 return;
2573 iCpu = pGip->aiCpuFromApicId[idApic];
2574 if (RT_LIKELY(iCpu < pGip->cCpus))
2575 { /* likely */ }
2576 else
2577 return;
2578 pGipCpu = &pGip->aCPUs[iCpu];
2579 if (RT_LIKELY(pGipCpu->idCpu == idCpu))
2580 { /* likely */ }
2581 else
2582 return;
2583 }
2584
2585 /*
2586 * Start update transaction.
2587 */
2588 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2589 {
2590 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
2591 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2592 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2593 pGipCpu->cErrors++;
2594 return;
2595 }
2596
2597 /*
2598 * Recalc the update frequency every 0x800th time.
2599 */
2600 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariant hosts. */
2601 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
2602 {
2603 if (pGip->u64NanoTSLastUpdateHz)
2604 {
2605#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
2606 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
2607 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
2608 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
2609 {
2610 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
2611 * calculation on non-invariant hosts if it changes the history decision
2612 * taken in supdrvGipDoUpdateCpu(). */
2613 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
2614 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
2615 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
2616 }
2617#endif
2618 }
2619 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
2620 }
2621
2622 /*
2623 * Update the data.
2624 */
2625 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2626
2627 /*
2628 * Complete transaction.
2629 */
2630 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2631}
2632
2633
2634/**
2635 * Updates the per cpu GIP data for the calling cpu.
2636 *
2637 * @param pDevExt The device extension.
2638 * @param u64NanoTS The current nanosecond timestamp.
2639 * @param u64TSC The current TSC timesaver.
2640 * @param idCpu The CPU ID.
2641 * @param idApic The APIC id for the CPU index.
2642 * @param iTick The current timer tick.
2643 *
2644 * @remarks Can be called with interrupts disabled!
2645 */
2646static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
2647 RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
2648{
2649 uint32_t iCpu;
2650 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2651
2652 /*
2653 * Avoid a potential race when a CPU online notification doesn't fire on
2654 * the onlined CPU but the tick creeps in before the event notification is
2655 * run.
2656 */
2657 if (RT_LIKELY(iTick != 1))
2658 { /* likely*/ }
2659 else
2660 {
2661 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
2662 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
2663 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
2664 }
2665
2666 iCpu = pGip->aiCpuFromApicId[idApic];
2667 if (RT_LIKELY(iCpu < pGip->cCpus))
2668 {
2669 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
2670 if (pGipCpu->idCpu == idCpu)
2671 {
2672 /*
2673 * Start update transaction.
2674 */
2675 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
2676 {
2677 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
2678 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2679 pGipCpu->cErrors++;
2680 return;
2681 }
2682
2683 /*
2684 * Update the data.
2685 */
2686 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
2687
2688 /*
2689 * Complete transaction.
2690 */
2691 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
2692 }
2693 }
2694}
2695
2696
2697/**
2698 * Timer callback function for the sync and invariant GIP modes.
2699 *
2700 * @param pTimer The timer.
2701 * @param pvUser Opaque pointer to the device extension.
2702 * @param iTick The timer tick.
2703 */
2704static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2705{
2706 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2707 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
2708 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2709 uint64_t u64TSC = ASMReadTSC();
2710 uint64_t u64NanoTS = RTTimeSystemNanoTS();
2711 RT_NOREF1(pTimer);
2712
2713 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
2714 {
2715 /*
2716 * The calculations in supdrvGipUpdate() is somewhat timing sensitive,
2717 * missing timer ticks is not an option for GIP because the GIP users
2718 * will end up incrementing the time in 1ns per time getter call until
2719 * there is a complete timer update. So, if the delta has yet to be
2720 * calculated, we just pretend it is zero for now (the GIP users
2721 * probably won't have it for a wee while either and will do the same).
2722 *
2723 * We could maybe on some platforms try cross calling a CPU with a
2724 * working delta here, but it's not worth the hassle since the
2725 * likelihood of this happening is really low. On Windows, Linux, and
2726 * Solaris timers fire on the CPU they were registered/started on.
2727 * Darwin timers doesn't necessarily (they are high priority threads).
2728 */
2729 uint32_t iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
2730 uint16_t iGipCpu = RT_LIKELY(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx))
2731 ? pGip->aiCpuFromCpuSetIdx[iCpuSet] : UINT16_MAX;
2732 Assert(!ASMIntAreEnabled());
2733 if (RT_LIKELY(iGipCpu < pGip->cCpus))
2734 {
2735 int64_t iTscDelta = pGip->aCPUs[iGipCpu].i64TSCDelta;
2736 if (iTscDelta != INT64_MAX)
2737 u64TSC -= iTscDelta;
2738 }
2739 }
2740
2741 supdrvGipUpdate(pDevExt, u64NanoTS, u64TSC, NIL_RTCPUID, iTick);
2742
2743 ASMSetFlags(fEFlags);
2744}
2745
2746
2747/**
2748 * Timer callback function for async GIP mode.
2749 * @param pTimer The timer.
2750 * @param pvUser Opaque pointer to the device extension.
2751 * @param iTick The timer tick.
2752 */
2753static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2754{
2755 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
2756 RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* No interruptions please (real problem on S10). */
2757 RTCPUID idCpu = RTMpCpuId();
2758 uint64_t u64TSC = ASMReadTSC();
2759 uint64_t NanoTS = RTTimeSystemNanoTS();
2760 RT_NOREF1(pTimer);
2761
2762 /** @todo reset the transaction number and whatnot when iTick == 1. */
2763 if (pDevExt->idGipMaster == idCpu)
2764 supdrvGipUpdate(pDevExt, NanoTS, u64TSC, idCpu, iTick);
2765 else
2766 supdrvGipUpdatePerCpu(pDevExt, NanoTS, u64TSC, idCpu, supdrvGipGetApicId(pDevExt->pGip), iTick);
2767
2768 ASMSetFlags(fEFlags);
2769}
2770
2771
2772
2773
2774/*
2775 *
2776 *
2777 * TSC Delta Measurements And Related Code
2778 * TSC Delta Measurements And Related Code
2779 * TSC Delta Measurements And Related Code
2780 *
2781 *
2782 */
2783
2784
2785/*
2786 * Select TSC delta measurement algorithm.
2787 */
2788#if 0
2789# define GIP_TSC_DELTA_METHOD_1
2790#else
2791# define GIP_TSC_DELTA_METHOD_2
2792#endif
2793
2794/** For padding variables to keep them away from other cache lines. Better too
2795 * large than too small!
2796 * @remarks Current AMD64 and x86 CPUs seems to use 64 bytes. There are claims
2797 * that NetBurst had 128 byte cache lines while the 486 thru Pentium
2798 * III had 32 bytes cache lines. */
2799#define GIP_TSC_DELTA_CACHE_LINE_SIZE 128
2800
2801
2802/**
2803 * TSC delta measurement algorithm \#2 result entry.
2804 */
2805typedef struct SUPDRVTSCDELTAMETHOD2ENTRY
2806{
2807 uint32_t iSeqMine;
2808 uint32_t iSeqOther;
2809 uint64_t uTsc;
2810} SUPDRVTSCDELTAMETHOD2ENTRY;
2811
2812/**
2813 * TSC delta measurement algorithm \#2 Data.
2814 */
2815typedef struct SUPDRVTSCDELTAMETHOD2
2816{
2817 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2818 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2819 /** The current sequence number of this worker. */
2820 uint32_t volatile iCurSeqNo;
2821 /** Padding to make sure the iCurSeqNo is in its own cache line. */
2822 uint32_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint32_t) - 1];
2823 /** Result table. */
2824 SUPDRVTSCDELTAMETHOD2ENTRY aResults[64];
2825} SUPDRVTSCDELTAMETHOD2;
2826/** Pointer to the data for TSC delta measurement algorithm \#2 .*/
2827typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
2828
2829
2830/**
2831 * The TSC delta synchronization struct, version 2.
2832 *
2833 * The synchronization variable is completely isolated in its own cache line
2834 * (provided our max cache line size estimate is correct).
2835 */
2836typedef struct SUPTSCDELTASYNC2
2837{
2838 /** Padding to make sure the uVar1 is in its own cache line. */
2839 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2840
2841 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */
2842 volatile uint32_t uSyncVar;
2843 /** Sequence synchronizing variable used for post 'GO' synchronization. */
2844 volatile uint32_t uSyncSeq;
2845
2846 /** Padding to make sure the uVar1 is in its own cache line. */
2847 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2];
2848
2849 /** Start RDTSC value. Put here mainly to save stack space. */
2850 uint64_t uTscStart;
2851 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */
2852 uint64_t cMaxTscTicks;
2853} SUPTSCDELTASYNC2;
2854AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t));
2855typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2;
2856
2857/** Prestart wait. */
2858#define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)
2859/** Prestart aborted. */
2860#define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)
2861/** Ready (on your mark). */
2862#define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000)
2863/** Steady (get set). */
2864#define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001)
2865/** Go! */
2866#define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002)
2867/** Used by the verification test. */
2868#define GIP_TSC_DELTA_SYNC2_GO_GO UINT32_C(0x1003)
2869
2870/** We reached the time limit. */
2871#define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe)
2872/** The other party won't touch the sync struct ever again. */
2873#define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)
2874
2875
2876/**
2877 * Argument package/state passed by supdrvTscMeasureDeltaOne() to the RTMpOn
2878 * callback worker.
2879 * @todo add
2880 */
2881typedef struct SUPDRVGIPTSCDELTARGS
2882{
2883 /** The device extension. */
2884 PSUPDRVDEVEXT pDevExt;
2885 /** Pointer to the GIP CPU array entry for the worker. */
2886 PSUPGIPCPU pWorker;
2887 /** Pointer to the GIP CPU array entry for the master. */
2888 PSUPGIPCPU pMaster;
2889 /** The maximum number of ticks to spend in supdrvTscMeasureDeltaCallback.
2890 * (This is what we need a rough TSC frequency for.) */
2891 uint64_t cMaxTscTicks;
2892 /** Used to abort synchronization setup. */
2893 bool volatile fAbortSetup;
2894
2895 /** Padding to make sure the master variables live in its own cache lines. */
2896 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2897
2898 /** @name Master
2899 * @{ */
2900 /** The time the master spent in the MP worker. */
2901 uint64_t cElapsedMasterTscTicks;
2902 /** The iTry value when stopped at. */
2903 uint32_t iTry;
2904 /** Set if the run timed out. */
2905 bool volatile fTimedOut;
2906 /** Pointer to the master's synchronization struct (on stack). */
2907 PSUPTSCDELTASYNC2 volatile pSyncMaster;
2908 /** Master data union. */
2909 union
2910 {
2911 /** Data (master) for delta verification. */
2912 struct
2913 {
2914 /** Verification test TSC values for the master. */
2915 uint64_t volatile auTscs[32];
2916 } Verify;
2917 /** Data (master) for measurement method \#2. */
2918 struct
2919 {
2920 /** Data and sequence number. */
2921 SUPDRVTSCDELTAMETHOD2 Data;
2922 /** The lag setting for the next run. */
2923 bool fLag;
2924 /** Number of hits. */
2925 uint32_t cHits;
2926 } M2;
2927 } uMaster;
2928 /** The verifier verdict, VINF_SUCCESS if ok, VERR_OUT_OF_RANGE if not,
2929 * VERR_TRY_AGAIN on timeout. */
2930 int32_t rcVerify;
2931#ifdef TSCDELTA_VERIFY_WITH_STATS
2932 /** The maximum difference between TSC read during delta verification. */
2933 int64_t cMaxVerifyTscTicks;
2934 /** The minimum difference between two TSC reads during verification. */
2935 int64_t cMinVerifyTscTicks;
2936 /** The bad TSC diff, worker relative to master (= worker - master).
2937 * Negative value means the worker is behind the master. */
2938 int64_t iVerifyBadTscDiff;
2939#endif
2940 /** @} */
2941
2942 /** Padding to make sure the worker variables live is in its own cache line. */
2943 uint64_t au64CacheLinePaddingBetween[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2944
2945 /** @name Proletarian
2946 * @{ */
2947 /** Pointer to the worker's synchronization struct (on stack). */
2948 PSUPTSCDELTASYNC2 volatile pSyncWorker;
2949 /** The time the worker spent in the MP worker. */
2950 uint64_t cElapsedWorkerTscTicks;
2951 /** Worker data union. */
2952 union
2953 {
2954 /** Data (worker) for delta verification. */
2955 struct
2956 {
2957 /** Verification test TSC values for the worker. */
2958 uint64_t volatile auTscs[32];
2959 } Verify;
2960 /** Data (worker) for measurement method \#2. */
2961 struct
2962 {
2963 /** Data and sequence number. */
2964 SUPDRVTSCDELTAMETHOD2 Data;
2965 /** The lag setting for the next run (set by master). */
2966 bool fLag;
2967 } M2;
2968 } uWorker;
2969 /** @} */
2970
2971 /** Padding to make sure the above is in its own cache line. */
2972 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)];
2973} SUPDRVGIPTSCDELTARGS;
2974typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
2975
2976
2977/** @name Macros that implements the basic synchronization steps common to
2978 * the algorithms.
2979 *
2980 * Must be used from loop as the timeouts are implemented via 'break' statements
2981 * at the moment.
2982 *
2983 * @{
2984 */
2985#if defined(DEBUG_bird) /* || defined(VBOX_STRICT) */
2986# define TSCDELTA_DBG_VARS() uint32_t iDbgCounter
2987# define TSCDELTA_DBG_START_LOOP() do { iDbgCounter = 0; } while (0)
2988# define TSCDELTA_DBG_CHECK_LOOP() \
2989 do { iDbgCounter++; if ((iDbgCounter & UINT32_C(0x01ffffff)) == 0) RT_BREAKPOINT(); } while (0)
2990#else
2991# define TSCDELTA_DBG_VARS() ((void)0)
2992# define TSCDELTA_DBG_START_LOOP() ((void)0)
2993# define TSCDELTA_DBG_CHECK_LOOP() ((void)0)
2994#endif
2995#if 0
2996# define TSCDELTA_DBG_SYNC_MSG(a_Args) SUPR0Printf a_Args
2997#else
2998# define TSCDELTA_DBG_SYNC_MSG(a_Args) ((void)0)
2999#endif
3000#if 0
3001# define TSCDELTA_DBG_SYNC_MSG2(a_Args) SUPR0Printf a_Args
3002#else
3003# define TSCDELTA_DBG_SYNC_MSG2(a_Args) ((void)0)
3004#endif
3005#if 0
3006# define TSCDELTA_DBG_SYNC_MSG9(a_Args) SUPR0Printf a_Args
3007#else
3008# define TSCDELTA_DBG_SYNC_MSG9(a_Args) ((void)0)
3009#endif
3010
3011
3012static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3013 bool fIsMaster, PRTCCUINTREG pfEFlags, PSUPDRVGIPTSCDELTARGS pArgs)
3014{
3015 uint32_t iMySeq = fIsMaster ? 0 : 256;
3016 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */
3017 uint32_t u32Tmp;
3018 uint32_t iSync2Loops = 0;
3019 RTCCUINTREG fEFlags;
3020 TSCDELTA_DBG_VARS();
3021
3022#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
3023 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */
3024#else
3025 *pfEFlags = 0;
3026#endif
3027
3028 /*
3029 * The master tells the worker to get on it's mark.
3030 */
3031 if (fIsMaster)
3032 {
3033 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3034 { /* likely*/ }
3035 else
3036 {
3037 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #1 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3038 return false;
3039 }
3040 }
3041
3042 /*
3043 * Wait for the on your mark signal (ack in the master case). We process timeouts here.
3044 */
3045 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0);
3046 for (;;)
3047 {
3048 fEFlags = ASMIntDisableFlags();
3049 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3050 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)
3051 break;
3052 ASMSetFlags(fEFlags);
3053 ASMNopPause();
3054
3055 /* Abort? */
3056 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY)
3057 {
3058 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #2 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3059 return false;
3060 }
3061
3062 /* Check for timeouts every so often (not every loop in case RDTSC is
3063 trapping or something). Must check the first time around. */
3064#if 0 /* For debugging the timeout paths. */
3065 static uint32_t volatile xxx;
3066#endif
3067 if ( ( (iSync2Loops & 0x3ff) == 0
3068 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks)
3069#if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */
3070 || (!fIsMaster && (++xxx & 0xf) == 0)
3071#endif
3072 )
3073 {
3074 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO',
3075 ignore the timeout if we've got the go ahead already (simpler). */
3076 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY))
3077 {
3078 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: timeout\n", fIsMaster ? "master" : "worker"));
3079 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY);
3080 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3081 return false;
3082 }
3083 }
3084 iSync2Loops++;
3085 }
3086
3087 /*
3088 * Interrupts are now disabled and will remain disabled until we do
3089 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER.
3090 */
3091 *pfEFlags = fEFlags;
3092
3093 /*
3094 * The worker tells the master that it is on its mark and that the master
3095 * need to get into position as well.
3096 */
3097 if (!fIsMaster)
3098 {
3099 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY)))
3100 { /* likely */ }
3101 else
3102 {
3103 ASMSetFlags(fEFlags);
3104 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #3 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3105 return false;
3106 }
3107 }
3108
3109 /*
3110 * The master sends the 'go' to the worker and wait for ACK.
3111 */
3112 if (fIsMaster)
3113 {
3114 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3115 { /* likely */ }
3116 else
3117 {
3118 ASMSetFlags(fEFlags);
3119 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #4 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3120 return false;
3121 }
3122 }
3123
3124 /*
3125 * Wait for the 'go' signal (ack in the master case).
3126 */
3127 TSCDELTA_DBG_START_LOOP();
3128 for (;;)
3129 {
3130 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3131 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO)
3132 break;
3133 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY))
3134 { /* likely */ }
3135 else
3136 {
3137 ASMSetFlags(fEFlags);
3138 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #5 u32Tmp=%#x\n", fIsMaster ? "master" : "worker", u32Tmp));
3139 return false;
3140 }
3141
3142 TSCDELTA_DBG_CHECK_LOOP();
3143 ASMNopPause();
3144 }
3145
3146 /*
3147 * The worker acks the 'go' (shouldn't fail).
3148 */
3149 if (!fIsMaster)
3150 {
3151 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY)))
3152 { /* likely */ }
3153 else
3154 {
3155 ASMSetFlags(fEFlags);
3156 TSCDELTA_DBG_SYNC_MSG(("sync/before/%s: #6 uSyncVar=%#x\n", fIsMaster ? "master" : "worker", pOtherSync->uSyncVar));
3157 return false;
3158 }
3159 }
3160
3161 /*
3162 * Try enter mostly lockstep execution with it.
3163 */
3164 for (;;)
3165 {
3166 uint32_t iOtherSeq1, iOtherSeq2;
3167 ASMCompilerBarrier();
3168 ASMSerializeInstruction();
3169
3170 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq);
3171 ASMNopPause();
3172 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq);
3173 ASMNopPause();
3174 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq);
3175
3176 ASMCompilerBarrier();
3177 if (iOtherSeq1 == iOtherSeq2)
3178 return true;
3179
3180 /* Did the other guy give up? Should we give up? */
3181 if ( iOtherSeq1 == UINT32_MAX
3182 || iOtherSeq2 == UINT32_MAX)
3183 return true;
3184 if (++iMySeq >= iMaxSeq)
3185 {
3186 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX);
3187 return true;
3188 }
3189 ASMNopPause();
3190 }
3191}
3192
3193#define TSCDELTA_MASTER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3194 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3195 { /*likely*/ } \
3196 else if (true) \
3197 { \
3198 TSCDELTA_DBG_SYNC_MSG9(("sync/before/master: #89\n")); \
3199 break; \
3200 } else do {} while (0)
3201#define TSCDELTA_OTHER_SYNC_BEFORE(a_pMySync, a_pOtherSync, a_pfEFlags, a_pArgs) \
3202 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_pfEFlags, a_pArgs))) \
3203 { /*likely*/ } \
3204 else if (true) \
3205 { \
3206 TSCDELTA_DBG_SYNC_MSG9(("sync/before/other: #89\n")); \
3207 break; \
3208 } else do {} while (0)
3209
3210
3211static bool supdrvTscDeltaSync2_After(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3212 bool fIsMaster, RTCCUINTREG fEFlags)
3213{
3214 TSCDELTA_DBG_VARS();
3215 RT_NOREF1(pOtherSync);
3216
3217 /*
3218 * Wait for the 'ready' signal. In the master's case, this means the
3219 * worker has completed its data collection, while in the worker's case it
3220 * means the master is done processing the data and it's time for the next
3221 * loop iteration (or whatever).
3222 */
3223 ASMSetFlags(fEFlags);
3224 TSCDELTA_DBG_START_LOOP();
3225 for (;;)
3226 {
3227 uint32_t u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar);
3228 if ( u32Tmp == GIP_TSC_DELTA_SYNC2_READY
3229 || (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY && !fIsMaster) /* kicked twice => race */ )
3230 return true;
3231 ASMNopPause();
3232 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_GO))
3233 { /* likely */}
3234 else
3235 {
3236 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #1 u32Tmp=%#x\n", u32Tmp));
3237 return false; /* shouldn't ever happen! */
3238 }
3239 TSCDELTA_DBG_CHECK_LOOP();
3240 ASMNopPause();
3241 }
3242}
3243
3244#define TSCDELTA_MASTER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3245 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, true /*fIsMaster*/, a_fEFlags))) \
3246 { /* likely */ } \
3247 else if (true) \
3248 { \
3249 TSCDELTA_DBG_SYNC_MSG9(("sync/after/master: #97\n")); \
3250 break; \
3251 } else do {} while (0)
3252
3253#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pMySync, a_pOtherSync) \
3254 /* \
3255 * Tell the worker that we're done processing the data and ready for the next round. \
3256 */ \
3257 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3258 { /* likely */ } \
3259 else if (true)\
3260 { \
3261 TSCDELTA_DBG_SYNC_MSG(("sync/after/master: #99 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3262 break; \
3263 } else do {} while (0)
3264
3265#define TSCDELTA_OTHER_SYNC_AFTER(a_pMySync, a_pOtherSync, a_fEFlags) \
3266 if (true) { \
3267 /* \
3268 * Tell the master that we're done collecting data and wait for the next round to start. \
3269 */ \
3270 if (RT_LIKELY(ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO))) \
3271 { /* likely */ } \
3272 else \
3273 { \
3274 ASMSetFlags(a_fEFlags); \
3275 TSCDELTA_DBG_SYNC_MSG(("sync/after/other: #0 uSyncVar=%#x\n", (a_pOtherSync)->uSyncVar)); \
3276 break; \
3277 } \
3278 if (RT_LIKELY(supdrvTscDeltaSync2_After(a_pMySync, a_pOtherSync, false /*fIsMaster*/, a_fEFlags))) \
3279 { /* likely */ } \
3280 else \
3281 { \
3282 TSCDELTA_DBG_SYNC_MSG9(("sync/after/other: #98\n")); \
3283 break; \
3284 } \
3285 } else do {} while (0)
3286/** @} */
3287
3288
3289#ifdef GIP_TSC_DELTA_METHOD_1
3290/**
3291 * TSC delta measurement algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
3292 *
3293 *
3294 * We ignore the first few runs of the loop in order to prime the
3295 * cache. Also, we need to be careful about using 'pause' instruction
3296 * in critical busy-wait loops in this code - it can cause undesired
3297 * behaviour with hyperthreading.
3298 *
3299 * We try to minimize the measurement error by computing the minimum
3300 * read time of the compare statement in the worker by taking TSC
3301 * measurements across it.
3302 *
3303 * It must be noted that the computed minimum read time is mostly to
3304 * eliminate huge deltas when the worker is too early and doesn't by
3305 * itself help produce more accurate deltas. We allow two times the
3306 * computed minimum as an arbitrary acceptable threshold. Therefore,
3307 * it is still possible to get negative deltas where there are none
3308 * when the worker is earlier. As long as these occasional negative
3309 * deltas are lower than the time it takes to exit guest-context and
3310 * the OS to reschedule EMT on a different CPU, we won't expose a TSC
3311 * that jumped backwards. It is due to the existence of the negative
3312 * deltas that we don't recompute the delta with the master and
3313 * worker interchanged to eliminate the remaining measurement error.
3314 *
3315 *
3316 * @param pArgs The argument/state data.
3317 * @param pMySync My synchronization structure.
3318 * @param pOtherSync My partner's synchronization structure.
3319 * @param fIsMaster Set if master, clear if worker.
3320 * @param iTry The attempt number.
3321 */
3322static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3323 bool fIsMaster, uint32_t iTry)
3324{
3325 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3326 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3327 uint64_t uMinCmpReadTime = UINT64_MAX;
3328 unsigned iLoop;
3329 NOREF(iTry);
3330
3331 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
3332 {
3333 RTCCUINTREG fEFlags;
3334 if (fIsMaster)
3335 {
3336 /*
3337 * The master.
3338 */
3339 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
3340 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
3341 pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
3342 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3343
3344 do
3345 {
3346 ASMSerializeInstruction();
3347 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
3348 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3349
3350 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3351
3352 /* Process the data. */
3353 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3354 {
3355 if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
3356 {
3357 int64_t iDelta = pGipCpuWorker->u64TSCSample
3358 - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
3359 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3360 ? iDelta < pGipCpuWorker->i64TSCDelta
3361 : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
3362 pGipCpuWorker->i64TSCDelta = iDelta;
3363 }
3364 }
3365
3366 /* Reset our TSC sample and tell the worker to move on. */
3367 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
3368 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3369 }
3370 else
3371 {
3372 /*
3373 * The worker.
3374 */
3375 uint64_t uTscWorker;
3376 uint64_t uTscWorkerFlushed;
3377 uint64_t uCmpReadTime;
3378
3379 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample); /* Warm the cache line. */
3380 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3381
3382 /*
3383 * Keep reading the TSC until we notice that the master has read his. Reading
3384 * the TSC -after- the master has updated the memory is way too late. We thus
3385 * compensate by trying to measure how long it took for the worker to notice
3386 * the memory flushed from the master.
3387 */
3388 do
3389 {
3390 ASMSerializeInstruction();
3391 uTscWorker = ASMReadTSC();
3392 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
3393 ASMSerializeInstruction();
3394 uTscWorkerFlushed = ASMReadTSC();
3395
3396 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
3397 if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
3398 {
3399 /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
3400 if (uCmpReadTime < (uMinCmpReadTime << 1))
3401 {
3402 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
3403 if (uCmpReadTime < uMinCmpReadTime)
3404 uMinCmpReadTime = uCmpReadTime;
3405 }
3406 else
3407 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3408 }
3409 else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
3410 {
3411 if (uCmpReadTime < uMinCmpReadTime)
3412 uMinCmpReadTime = uCmpReadTime;
3413 }
3414
3415 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3416 }
3417 }
3418
3419 TSCDELTA_DBG_SYNC_MSG9(("sync/method1loop/%s: #92 iLoop=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iLoop,
3420 pMySync->uSyncVar));
3421
3422 /*
3423 * We must reset the worker TSC sample value in case it gets picked as a
3424 * GIP master later on (it's trashed above, naturally).
3425 */
3426 if (!fIsMaster)
3427 ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
3428}
3429#endif /* GIP_TSC_DELTA_METHOD_1 */
3430
3431
3432#ifdef GIP_TSC_DELTA_METHOD_2
3433/*
3434 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
3435 */
3436
3437# define GIP_TSC_DELTA_M2_LOOPS (7 + GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3438# define GIP_TSC_DELTA_M2_PRIMER_LOOPS 0
3439
3440
3441static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs)
3442{
3443 int64_t iMasterTscDelta = pArgs->pMaster->i64TSCDelta;
3444 int64_t iBestDelta = pArgs->pWorker->i64TSCDelta;
3445 uint32_t idxResult;
3446 uint32_t cHits = 0;
3447
3448 /*
3449 * Look for matching entries in the master and worker tables.
3450 */
3451 for (idxResult = 0; idxResult < RT_ELEMENTS(pArgs->uMaster.M2.Data.aResults); idxResult++)
3452 {
3453 uint32_t idxOther = pArgs->uMaster.M2.Data.aResults[idxResult].iSeqOther;
3454 if (idxOther & 1)
3455 {
3456 idxOther >>= 1;
3457 if (idxOther < RT_ELEMENTS(pArgs->uWorker.M2.Data.aResults))
3458 {
3459 if (pArgs->uWorker.M2.Data.aResults[idxOther].iSeqOther == pArgs->uMaster.M2.Data.aResults[idxResult].iSeqMine)
3460 {
3461 int64_t iDelta;
3462 iDelta = pArgs->uWorker.M2.Data.aResults[idxOther].uTsc
3463 - (pArgs->uMaster.M2.Data.aResults[idxResult].uTsc - iMasterTscDelta);
3464 if ( iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
3465 ? iDelta < iBestDelta
3466 : iDelta > iBestDelta || iBestDelta == INT64_MAX)
3467 iBestDelta = iDelta;
3468 cHits++;
3469 }
3470 }
3471 }
3472 }
3473
3474 /*
3475 * Save the results.
3476 */
3477 if (cHits > 2)
3478 pArgs->pWorker->i64TSCDelta = iBestDelta;
3479 pArgs->uMaster.M2.cHits += cHits;
3480}
3481
3482
3483/**
3484 * The core function of the 2nd TSC delta measurement algorithm.
3485 *
3486 * The idea here is that we have the two CPUs execute the exact same code
3487 * collecting a largish set of TSC samples. The code has one data dependency on
3488 * the other CPU which intention it is to synchronize the execution as well as
3489 * help cross references the two sets of TSC samples (the sequence numbers).
3490 *
3491 * The @a fLag parameter is used to modify the execution a tiny bit on one or
3492 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that
3493 * it will help with making the CPUs enter lock step execution occasionally.
3494 *
3495 */
3496static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
3497{
3498 SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
3499 uint32_t cLeft = RT_ELEMENTS(pMyData->aResults);
3500
3501 ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
3502 ASMSerializeInstruction();
3503 while (cLeft-- > 0)
3504 {
3505 uint64_t uTsc;
3506 uint32_t iSeqMine = ASMAtomicIncU32(&pMyData->iCurSeqNo);
3507 uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
3508 ASMCompilerBarrier();
3509 ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
3510 uTsc = ASMReadTSC();
3511 ASMAtomicIncU32(&pMyData->iCurSeqNo);
3512 ASMCompilerBarrier();
3513 ASMSerializeInstruction();
3514 pEntry->iSeqMine = iSeqMine;
3515 pEntry->iSeqOther = iSeqOther;
3516 pEntry->uTsc = uTsc;
3517 pEntry++;
3518 ASMSerializeInstruction();
3519 if (fLag)
3520 ASMNopPause();
3521 }
3522}
3523
3524
3525/**
3526 * TSC delta measurement algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
3527 *
3528 * See supdrvTscDeltaMethod2CollectData for algorithm details.
3529 *
3530 * @param pArgs The argument/state data.
3531 * @param pMySync My synchronization structure.
3532 * @param pOtherSync My partner's synchronization structure.
3533 * @param fIsMaster Set if master, clear if worker.
3534 * @param iTry The attempt number.
3535 */
3536static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync,
3537 bool fIsMaster, uint32_t iTry)
3538{
3539 unsigned iLoop;
3540 RT_NOREF1(iTry);
3541
3542 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++)
3543 {
3544 RTCCUINTREG fEFlags;
3545 if (fIsMaster)
3546 {
3547 /*
3548 * Adjust the loop lag fudge.
3549 */
3550# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3551 if (iLoop < GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3552 {
3553 /* Lag during the priming to be nice to everyone.. */
3554 pArgs->uMaster.M2.fLag = true;
3555 pArgs->uWorker.M2.fLag = true;
3556 }
3557 else
3558# endif
3559 if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4)
3560 {
3561 /* 25 % of the body without lagging. */
3562 pArgs->uMaster.M2.fLag = false;
3563 pArgs->uWorker.M2.fLag = false;
3564 }
3565 else if (iLoop < (GIP_TSC_DELTA_M2_LOOPS - GIP_TSC_DELTA_M2_PRIMER_LOOPS) / 4 * 2)
3566 {
3567 /* 25 % of the body with both lagging. */
3568 pArgs->uMaster.M2.fLag = true;
3569 pArgs->uWorker.M2.fLag = true;
3570 }
3571 else
3572 {
3573 /* 50% of the body with alternating lag. */
3574 pArgs->uMaster.M2.fLag = (iLoop & 1) == 0;
3575 pArgs->uWorker.M2.fLag= (iLoop & 1) == 1;
3576 }
3577
3578 /*
3579 * Sync up with the worker and collect data.
3580 */
3581 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3582 supdrvTscDeltaMethod2CollectData(&pArgs->uMaster.M2.Data, &pArgs->uWorker.M2.Data.iCurSeqNo, pArgs->uMaster.M2.fLag);
3583 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3584
3585 /*
3586 * Process the data.
3587 */
3588# if GIP_TSC_DELTA_M2_PRIMER_LOOPS > 0
3589 if (iLoop >= GIP_TSC_DELTA_M2_PRIMER_LOOPS)
3590# endif
3591 supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs);
3592
3593 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3594 }
3595 else
3596 {
3597 /*
3598 * The worker.
3599 */
3600 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3601 supdrvTscDeltaMethod2CollectData(&pArgs->uWorker.M2.Data, &pArgs->uMaster.M2.Data.iCurSeqNo, pArgs->uWorker.M2.fLag);
3602 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3603 }
3604 }
3605}
3606
3607#endif /* GIP_TSC_DELTA_METHOD_2 */
3608
3609
3610
3611static int supdrvTscDeltaVerify(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync,
3612 PSUPTSCDELTASYNC2 pOtherSync, bool fIsMaster, int64_t iWorkerTscDelta)
3613{
3614 /*PSUPGIPCPU pGipCpuWorker = pArgs->pWorker; - unused */
3615 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3616 uint32_t i;
3617 TSCDELTA_DBG_VARS();
3618
3619 for (;;)
3620 {
3621 RTCCUINTREG fEFlags;
3622 AssertCompile((RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) & 1) == 0);
3623 AssertCompile(RT_ELEMENTS(pArgs->uMaster.Verify.auTscs) == RT_ELEMENTS(pArgs->uWorker.Verify.auTscs));
3624
3625 if (fIsMaster)
3626 {
3627 uint64_t uTscWorker;
3628 TSCDELTA_MASTER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3629
3630 /*
3631 * Collect TSC, master goes first.
3632 */
3633 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i += 2)
3634 {
3635 /* Read, kick & wait #1. */
3636 uint64_t uTsc = ASMReadTSC();
3637 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3638 ASMSerializeInstruction();
3639 pArgs->uMaster.Verify.auTscs[i] = uTsc;
3640 TSCDELTA_DBG_START_LOOP();
3641 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3642 {
3643 TSCDELTA_DBG_CHECK_LOOP();
3644 ASMNopPause();
3645 }
3646
3647 /* Read, kick & wait #2. */
3648 uTsc = ASMReadTSC();
3649 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3650 ASMSerializeInstruction();
3651 pArgs->uMaster.Verify.auTscs[i + 1] = uTsc;
3652 TSCDELTA_DBG_START_LOOP();
3653 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3654 {
3655 TSCDELTA_DBG_CHECK_LOOP();
3656 ASMNopPause();
3657 }
3658 }
3659
3660 TSCDELTA_MASTER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3661
3662 /*
3663 * Process the data.
3664 */
3665#ifdef TSCDELTA_VERIFY_WITH_STATS
3666 pArgs->cMaxVerifyTscTicks = INT64_MIN;
3667 pArgs->cMinVerifyTscTicks = INT64_MAX;
3668 pArgs->iVerifyBadTscDiff = 0;
3669#endif
3670 ASMAtomicWriteS32(&pArgs->rcVerify, VINF_SUCCESS);
3671 uTscWorker = 0;
3672 for (i = 0; i < RT_ELEMENTS(pArgs->uMaster.Verify.auTscs); i++)
3673 {
3674 /* Master vs previous worker entry. */
3675 uint64_t uTscMaster = pArgs->uMaster.Verify.auTscs[i] - pGipCpuMaster->i64TSCDelta;
3676 int64_t iDiff;
3677 if (i > 0)
3678 {
3679 iDiff = uTscMaster - uTscWorker;
3680#ifdef TSCDELTA_VERIFY_WITH_STATS
3681 if (iDiff > pArgs->cMaxVerifyTscTicks)
3682 pArgs->cMaxVerifyTscTicks = iDiff;
3683 if (iDiff < pArgs->cMinVerifyTscTicks)
3684 pArgs->cMinVerifyTscTicks = iDiff;
3685#endif
3686 if (iDiff < 0)
3687 {
3688#ifdef TSCDELTA_VERIFY_WITH_STATS
3689 pArgs->iVerifyBadTscDiff = -iDiff;
3690#endif
3691 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3692 break;
3693 }
3694 }
3695
3696 /* Worker vs master. */
3697 uTscWorker = pArgs->uWorker.Verify.auTscs[i] - iWorkerTscDelta;
3698 iDiff = uTscWorker - uTscMaster;
3699#ifdef TSCDELTA_VERIFY_WITH_STATS
3700 if (iDiff > pArgs->cMaxVerifyTscTicks)
3701 pArgs->cMaxVerifyTscTicks = iDiff;
3702 if (iDiff < pArgs->cMinVerifyTscTicks)
3703 pArgs->cMinVerifyTscTicks = iDiff;
3704#endif
3705 if (iDiff < 0)
3706 {
3707#ifdef TSCDELTA_VERIFY_WITH_STATS
3708 pArgs->iVerifyBadTscDiff = iDiff;
3709#endif
3710 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_OUT_OF_RANGE);
3711 break;
3712 }
3713 }
3714
3715 /* Done. */
3716 TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pMySync, pOtherSync);
3717 }
3718 else
3719 {
3720 /*
3721 * The worker, master leads.
3722 */
3723 TSCDELTA_OTHER_SYNC_BEFORE(pMySync, pOtherSync, &fEFlags, pArgs);
3724
3725 for (i = 0; i < RT_ELEMENTS(pArgs->uWorker.Verify.auTscs); i += 2)
3726 {
3727 uint64_t uTsc;
3728
3729 /* Wait, Read and Kick #1. */
3730 TSCDELTA_DBG_START_LOOP();
3731 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO)
3732 {
3733 TSCDELTA_DBG_CHECK_LOOP();
3734 ASMNopPause();
3735 }
3736 uTsc = ASMReadTSC();
3737 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO_GO);
3738 ASMSerializeInstruction();
3739 pArgs->uWorker.Verify.auTscs[i] = uTsc;
3740
3741 /* Wait, Read and Kick #2. */
3742 TSCDELTA_DBG_START_LOOP();
3743 while (ASMAtomicReadU32(&pMySync->uSyncVar) == GIP_TSC_DELTA_SYNC2_GO_GO)
3744 {
3745 TSCDELTA_DBG_CHECK_LOOP();
3746 ASMNopPause();
3747 }
3748 uTsc = ASMReadTSC();
3749 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO);
3750 ASMSerializeInstruction();
3751 pArgs->uWorker.Verify.auTscs[i + 1] = uTsc;
3752 }
3753
3754 TSCDELTA_OTHER_SYNC_AFTER(pMySync, pOtherSync, fEFlags);
3755 }
3756 return pArgs->rcVerify;
3757 }
3758
3759 /*
3760 * Timed out, please retry.
3761 */
3762 ASMAtomicWriteS32(&pArgs->rcVerify, VERR_TRY_AGAIN);
3763 return VERR_TIMEOUT;
3764}
3765
3766
3767
3768/**
3769 * Handles the special abort procedure during synchronization setup in
3770 * supdrvTscMeasureDeltaCallbackUnwrapped().
3771 *
3772 * @returns 0 (dummy, ignored)
3773 * @param pArgs Pointer to argument/state data.
3774 * @param pMySync Pointer to my sync structure.
3775 * @param fIsMaster Set if we're the master, clear if worker.
3776 * @param fTimeout Set if it's a timeout.
3777 */
3778DECL_NO_INLINE(static, int)
3779supdrvTscMeasureDeltaCallbackAbortSyncSetup(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC2 pMySync, bool fIsMaster, bool fTimeout)
3780{
3781 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3782 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3783 TSCDELTA_DBG_VARS();
3784 RT_NOREF1(pMySync);
3785
3786 /*
3787 * Clear our sync pointer and make sure the abort flag is set.
3788 */
3789 ASMAtomicWriteNullPtr(ppMySync);
3790 ASMAtomicWriteBool(&pArgs->fAbortSetup, true);
3791 if (fTimeout)
3792 ASMAtomicWriteBool(&pArgs->fTimedOut, true);
3793
3794 /*
3795 * Make sure the other party is out of there and won't be touching our
3796 * sync state again (would cause stack corruption).
3797 */
3798 TSCDELTA_DBG_START_LOOP();
3799 while (ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2) != NULL)
3800 {
3801 ASMNopPause();
3802 ASMNopPause();
3803 ASMNopPause();
3804 TSCDELTA_DBG_CHECK_LOOP();
3805 }
3806
3807 return 0;
3808}
3809
3810
3811/**
3812 * This is used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
3813 * and compute the delta between them.
3814 *
3815 * To reduce code size a good when timeout handling was added, a dummy return
3816 * value had to be added (saves 1-3 lines per timeout case), thus this
3817 * 'Unwrapped' function and the dummy 0 return value.
3818 *
3819 * @returns 0 (dummy, ignored)
3820 * @param idCpu The CPU we are current scheduled on.
3821 * @param pArgs Pointer to a parameter package.
3822 *
3823 * @remarks Measuring TSC deltas between the CPUs is tricky because we need to
3824 * read the TSC at exactly the same time on both the master and the
3825 * worker CPUs. Due to DMA, bus arbitration, cache locality,
3826 * contention, SMI, pipelining etc. there is no guaranteed way of
3827 * doing this on x86 CPUs.
3828 */
3829static int supdrvTscMeasureDeltaCallbackUnwrapped(RTCPUID idCpu, PSUPDRVGIPTSCDELTARGS pArgs)
3830{
3831 PSUPDRVDEVEXT pDevExt = pArgs->pDevExt;
3832 PSUPGIPCPU pGipCpuWorker = pArgs->pWorker;
3833 PSUPGIPCPU pGipCpuMaster = pArgs->pMaster;
3834 bool const fIsMaster = idCpu == pGipCpuMaster->idCpu;
3835 uint32_t iTry;
3836 PSUPTSCDELTASYNC2 volatile *ppMySync = fIsMaster ? &pArgs->pSyncMaster : &pArgs->pSyncWorker;
3837 PSUPTSCDELTASYNC2 volatile *ppOtherSync = fIsMaster ? &pArgs->pSyncWorker : &pArgs->pSyncMaster;
3838 SUPTSCDELTASYNC2 MySync;
3839 PSUPTSCDELTASYNC2 pOtherSync;
3840 int rc;
3841 TSCDELTA_DBG_VARS();
3842
3843 /* A bit of paranoia first. */
3844 if (!pGipCpuMaster || !pGipCpuWorker)
3845 return 0;
3846
3847 /*
3848 * If the CPU isn't part of the measurement, return immediately.
3849 */
3850 if ( !fIsMaster
3851 && idCpu != pGipCpuWorker->idCpu)
3852 return 0;
3853
3854 /*
3855 * Set up my synchronization stuff and wait for the other party to show up.
3856 *
3857 * We don't wait forever since the other party may be off fishing (offline,
3858 * spinning with ints disables, whatever), we must play nice to the rest of
3859 * the system as this context generally isn't one in which we will get
3860 * preempted and we may hold up a number of lower priority interrupts.
3861 */
3862 ASMAtomicWriteU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT);
3863 ASMAtomicWritePtr(ppMySync, &MySync);
3864 MySync.uTscStart = ASMReadTSC();
3865 MySync.cMaxTscTicks = pArgs->cMaxTscTicks;
3866
3867 /* Look for the partner, might not be here yet... Special abort considerations. */
3868 iTry = 0;
3869 TSCDELTA_DBG_START_LOOP();
3870 while ((pOtherSync = ASMAtomicReadPtrT(ppOtherSync, PSUPTSCDELTASYNC2)) == NULL)
3871 {
3872 ASMNopPause();
3873 if ( ASMAtomicReadBool(&pArgs->fAbortSetup)
3874 || !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu) )
3875 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3876 if ( (iTry++ & 0xff) == 0
3877 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3878 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3879 TSCDELTA_DBG_CHECK_LOOP();
3880 ASMNopPause();
3881 }
3882
3883 /* I found my partner, waiting to be found... Special abort considerations. */
3884 if (fIsMaster)
3885 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */
3886 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3887
3888 iTry = 0;
3889 TSCDELTA_DBG_START_LOOP();
3890 while (ASMAtomicReadU32(&MySync.uSyncVar) == GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)
3891 {
3892 ASMNopPause();
3893 if (ASMAtomicReadBool(&pArgs->fAbortSetup))
3894 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3895 if ( (iTry++ & 0xff) == 0
3896 && ASMReadTSC() - MySync.uTscStart > pArgs->cMaxTscTicks)
3897 {
3898 if ( fIsMaster
3899 && !ASMAtomicCmpXchgU32(&MySync.uSyncVar, GIP_TSC_DELTA_SYNC2_PRESTART_ABORT, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT))
3900 break; /* race #1: slave has moved on, handle timeout in loop instead. */
3901 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, true /*fTimeout*/);
3902 }
3903 TSCDELTA_DBG_CHECK_LOOP();
3904 }
3905
3906 if (!fIsMaster)
3907 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */
3908 return supdrvTscMeasureDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/);
3909
3910/** @todo Add a resumable state to pArgs so we don't waste time if we time
3911 * out or something. Timeouts are legit, any of the two CPUs may get
3912 * interrupted. */
3913
3914 /*
3915 * Start by seeing if we have a zero delta between the two CPUs.
3916 * This should normally be the case.
3917 */
3918 rc = supdrvTscDeltaVerify(pArgs, &MySync, pOtherSync, fIsMaster, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3919 if (RT_SUCCESS(rc))
3920 {
3921 if (fIsMaster)
3922 {
3923 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
3924 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3925 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3926 }
3927 }
3928 /*
3929 * If the verification didn't time out, do regular delta measurements.
3930 * We retry this until we get a reasonable value.
3931 */
3932 else if (rc != VERR_TIMEOUT)
3933 {
3934 Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
3935 for (iTry = 0; iTry < 12; iTry++)
3936 {
3937 /*
3938 * Check the state before we start.
3939 */
3940 uint32_t u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3941 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3942 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3943 {
3944 TSCDELTA_DBG_SYNC_MSG(("sync/loop/%s: #0 iTry=%u MyState=%#x\n", fIsMaster ? "master" : "worker", iTry, u32Tmp));
3945 break;
3946 }
3947
3948 /*
3949 * Do the measurements.
3950 */
3951#ifdef GIP_TSC_DELTA_METHOD_1
3952 supdrvTscDeltaMethod1Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3953#elif defined(GIP_TSC_DELTA_METHOD_2)
3954 supdrvTscDeltaMethod2Loop(pArgs, &MySync, pOtherSync, fIsMaster, iTry);
3955#else
3956# error "huh??"
3957#endif
3958
3959 /*
3960 * Check the state.
3961 */
3962 u32Tmp = ASMAtomicReadU32(&MySync.uSyncVar);
3963 if ( u32Tmp != GIP_TSC_DELTA_SYNC2_READY
3964 && (fIsMaster || u32Tmp != GIP_TSC_DELTA_SYNC2_STEADY) /* worker may be late prepping for the next round */ )
3965 {
3966 if (fIsMaster)
3967 TSCDELTA_DBG_SYNC_MSG(("sync/loop/master: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3968 else
3969 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/worker: #1 iTry=%u MyState=%#x\n", iTry, u32Tmp));
3970 break;
3971 }
3972
3973 /*
3974 * Success? If so, stop trying. Master decides.
3975 */
3976 if (fIsMaster)
3977 {
3978 if (pGipCpuWorker->i64TSCDelta != INT64_MAX)
3979 {
3980 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
3981 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
3982 TSCDELTA_DBG_SYNC_MSG2(("sync/loop/master: #9 iTry=%u MyState=%#x\n", iTry, MySync.uSyncVar));
3983 break;
3984 }
3985 }
3986 }
3987 if (fIsMaster)
3988 pArgs->iTry = iTry;
3989 }
3990
3991 /*
3992 * End the synchronization dance. We tell the other that we're done,
3993 * then wait for the same kind of reply.
3994 */
3995 ASMAtomicWriteU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_FINAL);
3996 ASMAtomicWriteNullPtr(ppMySync);
3997 iTry = 0;
3998 TSCDELTA_DBG_START_LOOP();
3999 while (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_FINAL)
4000 {
4001 iTry++;
4002 if ( iTry == 0
4003 && !RTMpIsCpuOnline(fIsMaster ? pGipCpuWorker->idCpu : pGipCpuMaster->idCpu))
4004 break; /* this really shouldn't happen. */
4005 TSCDELTA_DBG_CHECK_LOOP();
4006 ASMNopPause();
4007 }
4008
4009 /*
4010 * Collect some runtime stats.
4011 */
4012 if (fIsMaster)
4013 pArgs->cElapsedMasterTscTicks = ASMReadTSC() - MySync.uTscStart;
4014 else
4015 pArgs->cElapsedWorkerTscTicks = ASMReadTSC() - MySync.uTscStart;
4016 return 0;
4017}
4018
4019/**
4020 * Callback used by supdrvTscMeasureInitialDeltas() to read the TSC on two CPUs
4021 * and compute the delta between them.
4022 *
4023 * @param idCpu The CPU we are current scheduled on.
4024 * @param pvUser1 Pointer to a parameter package (SUPDRVGIPTSCDELTARGS).
4025 * @param pvUser2 Unused.
4026 */
4027static DECLCALLBACK(void) supdrvTscMeasureDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
4028{
4029 supdrvTscMeasureDeltaCallbackUnwrapped(idCpu, (PSUPDRVGIPTSCDELTARGS)pvUser1);
4030 RT_NOREF1(pvUser2);
4031}
4032
4033
4034/**
4035 * Measures the TSC delta between the master GIP CPU and one specified worker
4036 * CPU.
4037 *
4038 * @returns VBox status code.
4039 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED on pure measurement
4040 * failure.
4041 * @param pDevExt Pointer to the device instance data.
4042 * @param idxWorker The index of the worker CPU from the GIP's array of
4043 * CPUs.
4044 *
4045 * @remarks This must be called with preemption enabled!
4046 */
4047static int supdrvTscMeasureDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker)
4048{
4049 int rc;
4050 int rc2;
4051 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4052 RTCPUID idMaster = pDevExt->idGipMaster;
4053 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[idxWorker];
4054 PSUPGIPCPU pGipCpuMaster;
4055 uint32_t iGipCpuMaster;
4056#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4057 uint32_t u32Tmp;
4058#endif
4059
4060 /* Validate input a bit. */
4061 AssertReturn(pGip, VERR_INVALID_PARAMETER);
4062 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4063 Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4064
4065 /*
4066 * Don't attempt measuring the delta for the GIP master.
4067 */
4068 if (pGipCpuWorker->idCpu == idMaster)
4069 {
4070 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) /* This shouldn't happen, but just in case. */
4071 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, GIP_TSC_DELTA_INITIAL_MASTER_VALUE);
4072 return VINF_SUCCESS;
4073 }
4074
4075 /*
4076 * One measurement at a time, at least for now. We might be using
4077 * broadcast IPIs so, so be nice to the rest of the system.
4078 */
4079#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4080 rc = RTSemMutexRequest(pDevExt->mtxTscDelta, RT_INDEFINITE_WAIT);
4081#else
4082 rc = RTSemFastMutexRequest(pDevExt->mtxTscDelta);
4083#endif
4084 if (RT_FAILURE(rc))
4085 return rc;
4086
4087 /*
4088 * If the CPU has hyper-threading and the APIC IDs of the master and worker are adjacent,
4089 * try pick a different master. (This fudge only works with multi core systems.)
4090 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core.
4091 *
4092 * We skip this on AMDs for now as their HTT is different from Intel's and
4093 * it doesn't seem to have any favorable effect on the results.
4094 *
4095 * If the master is offline, we need a new master too, so share the code.
4096 */
4097 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster);
4098 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID);
4099 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster];
4100#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
4101 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1)
4102 && pGip->cOnlineCpus > 2
4103 && ASMHasCpuId()
4104 && RTX86IsValidStdRange(ASMCpuId_EAX(0))
4105 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT)
4106 && ( !ASMIsAmdCpu()
4107 || RTX86GetCpuFamily(u32Tmp = ASMCpuId_EAX(1)) > 0x15
4108 || ( RTX86GetCpuFamily(u32Tmp) == 0x15 /* Piledriver+, not bulldozer (FX-4150 didn't like it). */
4109 && RTX86GetCpuModelAMD(u32Tmp) >= 0x02) ) )
4110 || !RTMpIsCpuOnline(idMaster) )
4111 {
4112 uint32_t i;
4113 for (i = 0; i < pGip->cCpus; i++)
4114 if ( i != iGipCpuMaster
4115 && i != idxWorker
4116 && pGip->aCPUs[i].enmState == SUPGIPCPUSTATE_ONLINE
4117 && pGip->aCPUs[i].i64TSCDelta != INT64_MAX
4118 && pGip->aCPUs[i].idCpu != NIL_RTCPUID
4119 && pGip->aCPUs[i].idCpu != idMaster /* paranoia starts here... */
4120 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu
4121 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic
4122 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic
4123 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu))
4124 {
4125 iGipCpuMaster = i;
4126 pGipCpuMaster = &pGip->aCPUs[i];
4127 idMaster = pGipCpuMaster->idCpu;
4128 break;
4129 }
4130 }
4131#endif /* defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86) */
4132
4133 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpuWorker->iCpuSet))
4134 {
4135 /*
4136 * Initialize data package for the RTMpOnPair callback.
4137 */
4138 PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)RTMemAllocZ(sizeof(*pArgs));
4139 if (pArgs)
4140 {
4141 pArgs->pWorker = pGipCpuWorker;
4142 pArgs->pMaster = pGipCpuMaster;
4143 pArgs->pDevExt = pDevExt;
4144 pArgs->pSyncMaster = NULL;
4145 pArgs->pSyncWorker = NULL;
4146 pArgs->cMaxTscTicks = ASMAtomicReadU64(&pGip->u64CpuHz) / 512; /* 1953 us */
4147
4148 /*
4149 * Do the RTMpOnPair call. We reset i64TSCDelta first so we
4150 * and supdrvTscMeasureDeltaCallback can use it as a success check.
4151 */
4152 /** @todo Store the i64TSCDelta result in pArgs first? Perhaps deals with
4153 * that when doing the restart loop reorg. */
4154 ASMAtomicWriteS64(&pGipCpuWorker->i64TSCDelta, INT64_MAX);
4155 rc = RTMpOnPair(pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, RTMPON_F_CONCURRENT_EXEC,
4156 supdrvTscMeasureDeltaCallback, pArgs, NULL);
4157 if (RT_SUCCESS(rc))
4158 {
4159#if 0
4160 SUPR0Printf("mponpair ticks: %9llu %9llu max: %9llu iTry: %u%s\n", pArgs->cElapsedMasterTscTicks,
4161 pArgs->cElapsedWorkerTscTicks, pArgs->cMaxTscTicks, pArgs->iTry,
4162 pArgs->fTimedOut ? " timed out" :"");
4163#endif
4164#if 0
4165 SUPR0Printf("rcVerify=%d iVerifyBadTscDiff=%lld cMinVerifyTscTicks=%lld cMaxVerifyTscTicks=%lld\n",
4166 pArgs->rcVerify, pArgs->iVerifyBadTscDiff, pArgs->cMinVerifyTscTicks, pArgs->cMaxVerifyTscTicks);
4167#endif
4168 if (RT_LIKELY(pGipCpuWorker->i64TSCDelta != INT64_MAX))
4169 {
4170 /*
4171 * Work the TSC delta applicability rating. It starts
4172 * optimistic in supdrvGipInit, we downgrade it here.
4173 */
4174 SUPGIPUSETSCDELTA enmRating;
4175 if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO
4176 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_ROUGHLY_ZERO)
4177 enmRating = SUPGIPUSETSCDELTA_NOT_ZERO;
4178 else if ( pGipCpuWorker->i64TSCDelta > GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO
4179 || pGipCpuWorker->i64TSCDelta < -GIP_TSC_DELTA_THRESHOLD_PRACTICALLY_ZERO)
4180 enmRating = SUPGIPUSETSCDELTA_ROUGHLY_ZERO;
4181 else
4182 enmRating = SUPGIPUSETSCDELTA_PRACTICALLY_ZERO;
4183 if (pGip->enmUseTscDelta < enmRating)
4184 {
4185 AssertCompile(sizeof(pGip->enmUseTscDelta) == sizeof(uint32_t));
4186 ASMAtomicWriteU32((uint32_t volatile *)&pGip->enmUseTscDelta, enmRating);
4187 }
4188 }
4189 else
4190 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
4191 }
4192 /** @todo return try-again if we get an offline CPU error. */
4193
4194 RTMemFree(pArgs);
4195 }
4196 else
4197 rc = VERR_NO_MEMORY;
4198 }
4199 else
4200 rc = VERR_CPU_OFFLINE;
4201
4202 /*
4203 * We're done now.
4204 */
4205#ifdef SUPDRV_USE_MUTEX_FOR_GIP
4206 rc2 = RTSemMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4207#else
4208 rc2 = RTSemFastMutexRelease(pDevExt->mtxTscDelta); AssertRC(rc2);
4209#endif
4210 return rc;
4211}
4212
4213
4214/**
4215 * Resets the TSC-delta related TSC samples and optionally the deltas
4216 * themselves.
4217 *
4218 * @param pDevExt Pointer to the device instance data.
4219 * @param fResetTscDeltas Whether the TSC-deltas are also to be reset.
4220 *
4221 * @remarks This might be called while holding a spinlock!
4222 */
4223static void supdrvTscResetSamples(PSUPDRVDEVEXT pDevExt, bool fResetTscDeltas)
4224{
4225 unsigned iCpu;
4226 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4227 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4228 {
4229 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
4230 ASMAtomicWriteU64(&pGipCpu->u64TSCSample, GIP_TSC_DELTA_RSVD);
4231 if (fResetTscDeltas)
4232 {
4233 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpu->iCpuSet);
4234 ASMAtomicWriteS64(&pGipCpu->i64TSCDelta, INT64_MAX);
4235 }
4236 }
4237}
4238
4239
4240/**
4241 * Picks an online CPU as the master TSC for TSC-delta computations.
4242 *
4243 * @returns VBox status code.
4244 * @param pDevExt Pointer to the device instance data.
4245 * @param pidxMaster Where to store the CPU array index of the chosen
4246 * master. Optional, can be NULL.
4247 */
4248static int supdrvTscPickMaster(PSUPDRVDEVEXT pDevExt, uint32_t *pidxMaster)
4249{
4250 /*
4251 * Pick the first CPU online as the master TSC and make it the new GIP master based
4252 * on the APIC ID.
4253 *
4254 * Technically we can simply use "idGipMaster" but doing this gives us master as CPU 0
4255 * in most cases making it nicer/easier for comparisons. It is safe to update the GIP
4256 * master as this point since the sync/async timer isn't created yet.
4257 */
4258 unsigned iCpu;
4259 uint32_t idxMaster = UINT32_MAX;
4260 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4261 for (iCpu = 0; iCpu < RT_ELEMENTS(pGip->aiCpuFromApicId); iCpu++)
4262 {
4263 uint16_t idxCpu = pGip->aiCpuFromApicId[iCpu];
4264 if (idxCpu != UINT16_MAX)
4265 {
4266 PSUPGIPCPU pGipCpu = &pGip->aCPUs[idxCpu];
4267 if (RTCpuSetIsMemberByIndex(&pGip->OnlineCpuSet, pGipCpu->iCpuSet))
4268 {
4269 idxMaster = idxCpu;
4270 pGipCpu->i64TSCDelta = GIP_TSC_DELTA_INITIAL_MASTER_VALUE;
4271 ASMAtomicWriteSize(&pDevExt->idGipMaster, pGipCpu->idCpu);
4272 if (pidxMaster)
4273 *pidxMaster = idxMaster;
4274 return VINF_SUCCESS;
4275 }
4276 }
4277 }
4278 return VERR_CPU_OFFLINE;
4279}
4280
4281
4282/**
4283 * Performs the initial measurements of the TSC deltas between CPUs.
4284 *
4285 * This is called by supdrvGipCreate(), supdrvGipPowerNotificationCallback() or
4286 * triggered by it if threaded.
4287 *
4288 * @returns VBox status code.
4289 * @param pDevExt Pointer to the device instance data.
4290 *
4291 * @remarks Must be called only after supdrvGipInitOnCpu() as this function uses
4292 * idCpu, GIP's online CPU set which are populated in
4293 * supdrvGipInitOnCpu().
4294 */
4295static int supdrvTscMeasureInitialDeltas(PSUPDRVDEVEXT pDevExt)
4296{
4297 PSUPGIPCPU pGipCpuMaster;
4298 unsigned iCpu;
4299 unsigned iOddEven;
4300 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4301 uint32_t idxMaster = UINT32_MAX;
4302 uint32_t cMpOnOffEvents = ASMAtomicReadU32(&pDevExt->cMpOnOffEvents);
4303
4304 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4305 supdrvTscResetSamples(pDevExt, true /* fClearDeltas */);
4306 int rc = supdrvTscPickMaster(pDevExt, &idxMaster);
4307 if (RT_FAILURE(rc))
4308 {
4309 SUPR0Printf("Failed to pick a CPU master for TSC-delta measurements rc=%Rrc\n", rc);
4310 return rc;
4311 }
4312 AssertReturn(idxMaster < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4313 pGipCpuMaster = &pGip->aCPUs[idxMaster];
4314 Assert(pDevExt->idGipMaster == pGipCpuMaster->idCpu);
4315
4316 /*
4317 * If there is only a single CPU online we have nothing to do.
4318 */
4319 if (pGip->cOnlineCpus <= 1)
4320 {
4321 AssertReturn(pGip->cOnlineCpus > 0, VERR_INTERNAL_ERROR_5);
4322 return VINF_SUCCESS;
4323 }
4324
4325 /*
4326 * Loop thru the GIP CPU array and get deltas for each CPU (except the
4327 * master). We do the CPUs with the even numbered APIC IDs first so that
4328 * we've got alternative master CPUs to pick from on hyper-threaded systems.
4329 */
4330 for (iOddEven = 0; iOddEven < 2; iOddEven++)
4331 {
4332 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4333 {
4334 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4335 if ( iCpu != idxMaster
4336 && (iOddEven > 0 || (pGipCpuWorker->idApic & 1) == 0)
4337 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4338 {
4339 rc = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4340 if (RT_FAILURE(rc))
4341 {
4342 SUPR0Printf("supdrvTscMeasureDeltaOne failed. rc=%d CPU[%u].idCpu=%u Master[%u].idCpu=%u\n", rc, iCpu,
4343 pGipCpuWorker->idCpu, idxMaster, pDevExt->idGipMaster, pGipCpuMaster->idCpu);
4344 break;
4345 }
4346
4347 if (ASMAtomicReadU32(&pDevExt->cMpOnOffEvents) != cMpOnOffEvents)
4348 {
4349 SUPR0Printf("One or more CPUs transitioned between online & offline states. I'm confused, retry...\n");
4350 rc = VERR_TRY_AGAIN;
4351 break;
4352 }
4353 }
4354 }
4355 }
4356
4357 return rc;
4358}
4359
4360
4361#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4362
4363/**
4364 * Switches the TSC-delta measurement thread into the butchered state.
4365 *
4366 * @returns VBox status code.
4367 * @param pDevExt Pointer to the device instance data.
4368 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not.
4369 * @param pszFailed An error message to log.
4370 * @param rcFailed The error code to exit the thread with.
4371 */
4372static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
4373{
4374 if (!fSpinlockHeld)
4375 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4376
4377 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
4378 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4379 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", pszFailed, rcFailed));
4380 return rcFailed;
4381}
4382
4383
4384/**
4385 * The TSC-delta measurement thread.
4386 *
4387 * @returns VBox status code.
4388 * @param hThread The thread handle.
4389 * @param pvUser Opaque pointer to the device instance data.
4390 */
4391static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
4392{
4393 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser;
4394 int rc = VERR_INTERNAL_ERROR_2;
4395 for (;;)
4396 {
4397 /*
4398 * Switch on the current state.
4399 */
4400 SUPDRVTSCDELTATHREADSTATE enmState;
4401 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4402 enmState = pDevExt->enmTscDeltaThreadState;
4403 switch (enmState)
4404 {
4405 case kTscDeltaThreadState_Creating:
4406 {
4407 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4408 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
4409 if (RT_FAILURE(rc))
4410 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4411 RT_FALL_THRU();
4412 }
4413
4414 case kTscDeltaThreadState_Listening:
4415 {
4416 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4417
4418 /*
4419 * Linux counts uninterruptible sleeps as load, hence we shall do a
4420 * regular, interruptible sleep here and ignore wake ups due to signals.
4421 * See task_contributes_to_load() in include/linux/sched.h in the Linux sources.
4422 */
4423 rc = RTThreadUserWaitNoResume(hThread, pDevExt->cMsTscDeltaTimeout);
4424 if ( RT_FAILURE(rc)
4425 && rc != VERR_TIMEOUT
4426 && rc != VERR_INTERRUPTED)
4427 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
4428 RTThreadUserReset(hThread);
4429 break;
4430 }
4431
4432 case kTscDeltaThreadState_WaitAndMeasure:
4433 {
4434 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
4435 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
4436 if (RT_FAILURE(rc))
4437 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
4438 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4439 RTThreadSleep(1);
4440 RT_FALL_THRU();
4441 }
4442
4443 case kTscDeltaThreadState_Measuring:
4444 {
4445 if (pDevExt->fTscThreadRecomputeAllDeltas)
4446 {
4447 int cTries = 8;
4448 int cMsWaitPerTry = 10;
4449 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4450 Assert(pGip);
4451 do
4452 {
4453 RTCpuSetCopy(&pDevExt->TscDeltaCpuSet, &pGip->OnlineCpuSet);
4454 rc = supdrvTscMeasureInitialDeltas(pDevExt);
4455 if ( RT_SUCCESS(rc)
4456 || ( RT_FAILURE(rc)
4457 && rc != VERR_TRY_AGAIN
4458 && rc != VERR_CPU_OFFLINE))
4459 {
4460 break;
4461 }
4462 RTThreadSleep(cMsWaitPerTry);
4463 } while (cTries-- > 0);
4464 pDevExt->fTscThreadRecomputeAllDeltas = false;
4465 }
4466 else
4467 {
4468 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
4469 unsigned iCpu;
4470
4471 /* Measure TSC-deltas only for the CPUs that are in the set. */
4472 rc = VINF_SUCCESS;
4473 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
4474 {
4475 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
4476 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
4477 {
4478 if (pGipCpuWorker->i64TSCDelta == INT64_MAX)
4479 {
4480 int rc2 = supdrvTscMeasureDeltaOne(pDevExt, iCpu);
4481 if (RT_FAILURE(rc2) && RT_SUCCESS(rc))
4482 rc = rc2;
4483 }
4484 else
4485 {
4486 /*
4487 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex(),
4488 * mark the delta as fine to get the timer thread off our back.
4489 */
4490 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet);
4491 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet);
4492 }
4493 }
4494 }
4495 }
4496 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4497 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4498 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
4499 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4500 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as init value, see supdrvTscDeltaThreadInit(). */
4501 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
4502 break;
4503 }
4504
4505 case kTscDeltaThreadState_Terminating:
4506 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
4507 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4508 return VINF_SUCCESS;
4509
4510 case kTscDeltaThreadState_Butchered:
4511 default:
4512 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
4513 }
4514 }
4515 /* not reached */
4516}
4517
4518
4519/**
4520 * Waits for the TSC-delta measurement thread to respond to a state change.
4521 *
4522 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
4523 * other error code on internal error.
4524 *
4525 * @param pDevExt The device instance data.
4526 * @param enmCurState The current state.
4527 * @param enmNewState The new state we're waiting for it to enter.
4528 */
4529static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
4530 SUPDRVTSCDELTATHREADSTATE enmNewState)
4531{
4532 SUPDRVTSCDELTATHREADSTATE enmActualState;
4533 int rc;
4534
4535 /*
4536 * Wait a short while for the expected state transition.
4537 */
4538 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
4539 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4540 enmActualState = pDevExt->enmTscDeltaThreadState;
4541 if (enmActualState == enmNewState)
4542 {
4543 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4544 rc = VINF_SUCCESS;
4545 }
4546 else if (enmActualState == enmCurState)
4547 {
4548 /*
4549 * Wait longer if the state has not yet transitioned to the one we want.
4550 */
4551 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4552 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
4553 if ( RT_SUCCESS(rc)
4554 || rc == VERR_TIMEOUT)
4555 {
4556 /*
4557 * Check the state whether we've succeeded.
4558 */
4559 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4560 enmActualState = pDevExt->enmTscDeltaThreadState;
4561 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4562 if (enmActualState == enmNewState)
4563 rc = VINF_SUCCESS;
4564 else if (enmActualState == enmCurState)
4565 {
4566 rc = VERR_TIMEOUT;
4567 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmActualState=%d enmNewState=%d\n",
4568 enmActualState, enmNewState));
4569 }
4570 else
4571 {
4572 rc = VERR_INTERNAL_ERROR;
4573 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
4574 enmActualState, enmNewState));
4575 }
4576 }
4577 else
4578 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
4579 }
4580 else
4581 {
4582 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4583 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state %d when transitioning from %d to %d\n",
4584 enmActualState, enmCurState, enmNewState));
4585 rc = VERR_INTERNAL_ERROR;
4586 }
4587
4588 return rc;
4589}
4590
4591
4592/**
4593 * Signals the TSC-delta thread to start measuring TSC-deltas.
4594 *
4595 * @param pDevExt Pointer to the device instance data.
4596 * @param fForceAll Force re-calculating TSC-deltas on all CPUs.
4597 */
4598static void supdrvTscDeltaThreadStartMeasurement(PSUPDRVDEVEXT pDevExt, bool fForceAll)
4599{
4600 if (pDevExt->hTscDeltaThread != NIL_RTTHREAD)
4601 {
4602 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4603 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4604 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4605 {
4606 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4607 if (fForceAll)
4608 pDevExt->fTscThreadRecomputeAllDeltas = true;
4609 }
4610 else if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_WaitAndMeasure
4611 && fForceAll)
4612 pDevExt->fTscThreadRecomputeAllDeltas = true;
4613 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4614 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4615 }
4616}
4617
4618
4619/**
4620 * Terminates the actual thread running supdrvTscDeltaThread().
4621 *
4622 * This is an internal worker function for supdrvTscDeltaThreadInit() and
4623 * supdrvTscDeltaTerm().
4624 *
4625 * @param pDevExt Pointer to the device instance data.
4626 */
4627static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
4628{
4629 int rc;
4630 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4631 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
4632 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4633 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4634 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
4635 if (RT_FAILURE(rc))
4636 {
4637 /* Signal a few more times before giving up. */
4638 int cTriesLeft = 5;
4639 while (--cTriesLeft > 0)
4640 {
4641 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4642 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
4643 if (rc != VERR_TIMEOUT)
4644 break;
4645 }
4646 }
4647}
4648
4649
4650/**
4651 * Initializes and spawns the TSC-delta measurement thread.
4652 *
4653 * A thread is required for servicing re-measurement requests from events like
4654 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
4655 * under all contexts on all OSs.
4656 *
4657 * @returns VBox status code.
4658 * @param pDevExt Pointer to the device instance data.
4659 *
4660 * @remarks Must only be called -after- initializing GIP and setting up MP
4661 * notifications!
4662 */
4663static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
4664{
4665 int rc;
4666 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
4667 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
4668 if (RT_SUCCESS(rc))
4669 {
4670 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
4671 if (RT_SUCCESS(rc))
4672 {
4673 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
4674 pDevExt->cMsTscDeltaTimeout = 60000;
4675 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
4676 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
4677 if (RT_SUCCESS(rc))
4678 {
4679 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
4680 if (RT_SUCCESS(rc))
4681 {
4682 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4683 return rc;
4684 }
4685
4686 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
4687 supdrvTscDeltaThreadTerminate(pDevExt);
4688 }
4689 else
4690 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
4691 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4692 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4693 }
4694 else
4695 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
4696 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4697 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4698 }
4699 else
4700 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
4701
4702 return rc;
4703}
4704
4705
4706/**
4707 * Terminates the TSC-delta measurement thread and cleanup.
4708 *
4709 * @param pDevExt Pointer to the device instance data.
4710 */
4711static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
4712{
4713 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
4714 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4715 {
4716 supdrvTscDeltaThreadTerminate(pDevExt);
4717 }
4718
4719 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
4720 {
4721 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
4722 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
4723 }
4724
4725 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
4726 {
4727 RTSemEventDestroy(pDevExt->hTscDeltaEvent);
4728 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
4729 }
4730
4731 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
4732}
4733
4734#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4735
4736/**
4737 * Measure the TSC delta for the CPU given by its CPU set index.
4738 *
4739 * @returns VBox status code.
4740 * @retval VERR_INTERRUPTED if interrupted while waiting.
4741 * @retval VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED if we were unable to get a
4742 * measurement.
4743 * @retval VERR_CPU_OFFLINE if the specified CPU is offline.
4744 *
4745 * @param pSession The caller's session. GIP must've been mapped.
4746 * @param iCpuSet The CPU set index of the CPU to measure.
4747 * @param fFlags Flags, SUP_TSCDELTA_MEASURE_F_XXX.
4748 * @param cMsWaitRetry Number of milliseconds to wait between each retry.
4749 * @param cMsWaitThread Number of milliseconds to wait for the thread to get
4750 * ready.
4751 * @param cTries Number of times to try, pass 0 for the default.
4752 */
4753SUPR0DECL(int) SUPR0TscDeltaMeasureBySetIndex(PSUPDRVSESSION pSession, uint32_t iCpuSet, uint32_t fFlags,
4754 RTMSINTERVAL cMsWaitRetry, RTMSINTERVAL cMsWaitThread, uint32_t cTries)
4755{
4756 PSUPDRVDEVEXT pDevExt;
4757 PSUPGLOBALINFOPAGE pGip;
4758 uint16_t iGipCpu;
4759 int rc;
4760#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4761 uint64_t msTsStartWait;
4762 uint32_t iWaitLoop;
4763#endif
4764
4765 /*
4766 * Validate and adjust the input.
4767 */
4768 AssertReturn(SUP_IS_SESSION_VALID(pSession), VERR_INVALID_PARAMETER);
4769 if (!pSession->fGipReferenced)
4770 return VERR_WRONG_ORDER;
4771
4772 pDevExt = pSession->pDevExt;
4773 AssertReturn(SUP_IS_DEVEXT_VALID(pDevExt), VERR_INVALID_PARAMETER);
4774
4775 pGip = pDevExt->pGip;
4776 AssertPtrReturn(pGip, VERR_INTERNAL_ERROR_2);
4777
4778 AssertReturn(iCpuSet < RTCPUSET_MAX_CPUS, VERR_INVALID_CPU_INDEX);
4779 AssertReturn(iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx), VERR_INVALID_CPU_INDEX);
4780 iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet];
4781 AssertReturn(iGipCpu < pGip->cCpus, VERR_INVALID_CPU_INDEX);
4782
4783 if (fFlags & ~SUP_TSCDELTA_MEASURE_F_VALID_MASK)
4784 return VERR_INVALID_FLAGS;
4785
4786 /*
4787 * The request is a noop if the TSC delta isn't being used.
4788 */
4789 if (pGip->enmUseTscDelta <= SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4790 return VINF_SUCCESS;
4791
4792 if (cTries == 0)
4793 cTries = 12;
4794 else if (cTries > 256)
4795 cTries = 256;
4796
4797 if (cMsWaitRetry == 0)
4798 cMsWaitRetry = 2;
4799 else if (cMsWaitRetry > 1000)
4800 cMsWaitRetry = 1000;
4801
4802#ifdef SUPDRV_USE_TSC_DELTA_THREAD
4803 /*
4804 * Has the TSC already been measured and we're not forced to redo it?
4805 */
4806 if ( pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX
4807 && !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE))
4808 return VINF_SUCCESS;
4809
4810 /*
4811 * Asynchronous request? Forward it to the thread, no waiting.
4812 */
4813 if (fFlags & SUP_TSCDELTA_MEASURE_F_ASYNC)
4814 {
4815 /** @todo Async. doesn't implement options like retries, waiting. We'll need
4816 * to pass those options to the thread somehow and implement it in the
4817 * thread. Check if anyone uses/needs fAsync before implementing this. */
4818 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4819 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
4820 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
4821 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
4822 {
4823 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
4824 rc = VINF_SUCCESS;
4825 }
4826 else if (pDevExt->enmTscDeltaThreadState != kTscDeltaThreadState_WaitAndMeasure)
4827 rc = VERR_THREAD_IS_DEAD;
4828 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4829 RTThreadUserSignal(pDevExt->hTscDeltaThread);
4830 return VINF_SUCCESS;
4831 }
4832
4833 /*
4834 * If a TSC-delta measurement request is already being serviced by the thread,
4835 * wait 'cTries' times if a retry-timeout is provided, otherwise bail as busy.
4836 */
4837 msTsStartWait = RTTimeSystemMilliTS();
4838 for (iWaitLoop = 0;; iWaitLoop++)
4839 {
4840 uint64_t cMsElapsed;
4841 SUPDRVTSCDELTATHREADSTATE enmState;
4842 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
4843 enmState = pDevExt->enmTscDeltaThreadState;
4844 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
4845
4846 if (enmState == kTscDeltaThreadState_Measuring)
4847 { /* Must wait, the thread is busy. */ }
4848 else if (enmState == kTscDeltaThreadState_WaitAndMeasure)
4849 { /* Must wait, this state only says what will happen next. */ }
4850 else if (enmState == kTscDeltaThreadState_Terminating)
4851 { /* Must wait, this state only says what should happen next. */ }
4852 else
4853 break; /* All other states, the thread is either idly listening or dead. */
4854
4855 /* Wait or fail. */
4856 if (cMsWaitThread == 0)
4857 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4858 cMsElapsed = RTTimeSystemMilliTS() - msTsStartWait;
4859 if (cMsElapsed >= cMsWaitThread)
4860 return VERR_SUPDRV_TSC_DELTA_MEASUREMENT_BUSY;
4861
4862 rc = RTThreadSleep(RT_MIN((RTMSINTERVAL)(cMsWaitThread - cMsElapsed), RT_MIN(iWaitLoop + 1, 10)));
4863 if (rc == VERR_INTERRUPTED)
4864 return rc;
4865 }
4866#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
4867
4868 /*
4869 * Try measure the TSC delta the given number of times.
4870 */
4871 for (;;)
4872 {
4873 /* Unless we're forced to measure the delta, check whether it's done already. */
4874 if ( !(fFlags & SUP_TSCDELTA_MEASURE_F_FORCE)
4875 && pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX)
4876 {
4877 rc = VINF_SUCCESS;
4878 break;
4879 }
4880
4881 /* Measure it. */
4882 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
4883 if (rc != VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED)
4884 {
4885 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
4886 break;
4887 }
4888
4889 /* Retry? */
4890 if (cTries <= 1)
4891 break;
4892 cTries--;
4893
4894 /* Always delay between retries (be nice to the rest of the system
4895 and avoid the BSOD hounds). */
4896 rc = RTThreadSleep(cMsWaitRetry);
4897 if (rc == VERR_INTERRUPTED)
4898 break;
4899 }
4900
4901 return rc;
4902}
4903SUPR0_EXPORT_SYMBOL(SUPR0TscDeltaMeasureBySetIndex);
4904
4905
4906/**
4907 * Service a TSC-delta measurement request.
4908 *
4909 * @returns VBox status code.
4910 * @param pDevExt Pointer to the device instance data.
4911 * @param pSession The support driver session.
4912 * @param pReq Pointer to the TSC-delta measurement request.
4913 */
4914int VBOXCALL supdrvIOCtl_TscDeltaMeasure(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCDELTAMEASURE pReq)
4915{
4916 uint32_t cTries;
4917 uint32_t iCpuSet;
4918 uint32_t fFlags;
4919 RTMSINTERVAL cMsWaitRetry;
4920 RT_NOREF1(pDevExt);
4921
4922 /*
4923 * Validate and adjust/resolve the input so they can be passed onto SUPR0TscDeltaMeasureBySetIndex.
4924 */
4925 AssertPtr(pDevExt); AssertPtr(pSession); AssertPtr(pReq); /* paranoia^2 */
4926
4927 if (pReq->u.In.idCpu == NIL_RTCPUID)
4928 return VERR_INVALID_CPU_ID;
4929 iCpuSet = RTMpCpuIdToSetIndex(pReq->u.In.idCpu);
4930 if (iCpuSet >= RTCPUSET_MAX_CPUS)
4931 return VERR_INVALID_CPU_ID;
4932
4933 cTries = pReq->u.In.cRetries == 0 ? 0 : (uint32_t)pReq->u.In.cRetries + 1;
4934
4935 cMsWaitRetry = RT_MAX(pReq->u.In.cMsWaitRetry, 5);
4936
4937 fFlags = 0;
4938 if (pReq->u.In.fAsync)
4939 fFlags |= SUP_TSCDELTA_MEASURE_F_ASYNC;
4940 if (pReq->u.In.fForce)
4941 fFlags |= SUP_TSCDELTA_MEASURE_F_FORCE;
4942
4943 return SUPR0TscDeltaMeasureBySetIndex(pSession, iCpuSet, fFlags, cMsWaitRetry,
4944 cTries == 0 ? 5 * RT_MS_1SEC : cMsWaitRetry * cTries /*cMsWaitThread*/,
4945 cTries);
4946}
4947
4948
4949/**
4950 * Reads TSC with delta applied.
4951 *
4952 * Will try to resolve delta value INT64_MAX before applying it. This is the
4953 * main purpose of this function, to handle the case where the delta needs to be
4954 * determined.
4955 *
4956 * @returns VBox status code.
4957 * @param pDevExt Pointer to the device instance data.
4958 * @param pSession The support driver session.
4959 * @param pReq Pointer to the TSC-read request.
4960 */
4961int VBOXCALL supdrvIOCtl_TscRead(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, PSUPTSCREAD pReq)
4962{
4963 PSUPGLOBALINFOPAGE pGip;
4964 int rc;
4965
4966 /*
4967 * Validate. We require the client to have mapped GIP (no asserting on
4968 * ring-3 preconditions).
4969 */
4970 AssertPtr(pDevExt); AssertPtr(pReq); AssertPtr(pSession); /* paranoia^2 */
4971 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
4972 return VERR_WRONG_ORDER;
4973 pGip = pDevExt->pGip;
4974 AssertReturn(pGip, VERR_INTERNAL_ERROR_2);
4975
4976 /*
4977 * We're usually here because we need to apply delta, but we shouldn't be
4978 * upset if the GIP is some different mode.
4979 */
4980 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
4981 {
4982 uint32_t cTries = 0;
4983 for (;;)
4984 {
4985 /*
4986 * Start by gathering the data, using CLI for disabling preemption
4987 * while we do that.
4988 */
4989 RTCCUINTREG fEFlags = ASMIntDisableFlags();
4990 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
4991 int iGipCpu = 0; /* gcc maybe used uninitialized */
4992 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
4993 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
4994 {
4995 int64_t i64Delta = pGip->aCPUs[iGipCpu].i64TSCDelta;
4996 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
4997 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
4998 ASMSetFlags(fEFlags);
4999
5000 /*
5001 * If we're lucky we've got a delta, but no predictions here
5002 * as this I/O control is normally only used when the TSC delta
5003 * is set to INT64_MAX.
5004 */
5005 if (i64Delta != INT64_MAX)
5006 {
5007 pReq->u.Out.u64AdjustedTsc -= i64Delta;
5008 rc = VINF_SUCCESS;
5009 break;
5010 }
5011
5012 /* Give up after a few times. */
5013 if (cTries >= 4)
5014 {
5015 rc = VWRN_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED;
5016 break;
5017 }
5018
5019 /* Need to measure the delta an try again. */
5020 rc = supdrvTscMeasureDeltaOne(pDevExt, iGipCpu);
5021 Assert(pGip->aCPUs[iGipCpu].i64TSCDelta != INT64_MAX || RT_FAILURE_NP(rc));
5022 /** @todo should probably delay on failure... dpc watchdogs */
5023 }
5024 else
5025 {
5026 /* This really shouldn't happen. */
5027 AssertMsgFailed(("idCpu=%#x iCpuSet=%#x (%d)\n", RTMpCpuId(), iCpuSet, iCpuSet));
5028 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5029 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5030 ASMSetFlags(fEFlags);
5031 rc = VERR_INTERNAL_ERROR_5; /** @todo change to warning. */
5032 break;
5033 }
5034 }
5035 }
5036 else
5037 {
5038 /*
5039 * No delta to apply. Easy. Deal with preemption the lazy way.
5040 */
5041 RTCCUINTREG fEFlags = ASMIntDisableFlags();
5042 int iCpuSet = RTMpCpuIdToSetIndex(RTMpCpuId());
5043 int iGipCpu = 0; /* gcc may be used uninitialized */
5044 if (RT_LIKELY( (unsigned)iCpuSet < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx)
5045 && (iGipCpu = pGip->aiCpuFromCpuSetIdx[iCpuSet]) < pGip->cCpus ))
5046 pReq->u.Out.idApic = pGip->aCPUs[iGipCpu].idApic;
5047 else
5048 pReq->u.Out.idApic = supdrvGipGetApicIdSlow();
5049 pReq->u.Out.u64AdjustedTsc = ASMReadTSC();
5050 ASMSetFlags(fEFlags);
5051 rc = VINF_SUCCESS;
5052 }
5053
5054 return rc;
5055}
5056
5057
5058/**
5059 * Worker for supdrvIOCtl_GipSetFlags.
5060 *
5061 * @returns VBox status code.
5062 * @retval VERR_WRONG_ORDER if an enable-once-per-session flag is set again for
5063 * a session.
5064 *
5065 * @param pDevExt Pointer to the device instance data.
5066 * @param pSession The support driver session.
5067 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5068 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5069 *
5070 * @remarks Caller must own the GIP mutex.
5071 *
5072 * @remarks This function doesn't validate any of the flags.
5073 */
5074static int supdrvGipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5075{
5076 uint32_t cRefs;
5077 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
5078 AssertMsg((fOrMask & fAndMask) == fOrMask, ("%#x & %#x\n", fOrMask, fAndMask)); /* ASSUMED by code below */
5079
5080 /*
5081 * Compute GIP test-mode flags.
5082 */
5083 if (fOrMask & SUPGIP_FLAGS_TESTING_ENABLE)
5084 {
5085 if (!pSession->fGipTestMode)
5086 {
5087 Assert(pDevExt->cGipTestModeRefs < _64K);
5088 pSession->fGipTestMode = true;
5089 cRefs = ++pDevExt->cGipTestModeRefs;
5090 if (cRefs == 1)
5091 {
5092 fOrMask |= SUPGIP_FLAGS_TESTING | SUPGIP_FLAGS_TESTING_START;
5093 fAndMask &= ~SUPGIP_FLAGS_TESTING_STOP;
5094 }
5095 }
5096 else
5097 {
5098 LogRelMax(10, ("supdrvGipSetFlags: SUPGIP_FLAGS_TESTING_ENABLE already set for this session\n"));
5099 return VERR_WRONG_ORDER;
5100 }
5101 }
5102 else if ( !(fAndMask & SUPGIP_FLAGS_TESTING_ENABLE)
5103 && pSession->fGipTestMode)
5104 {
5105 Assert(pDevExt->cGipTestModeRefs > 0);
5106 Assert(pDevExt->cGipTestModeRefs < _64K);
5107 pSession->fGipTestMode = false;
5108 cRefs = --pDevExt->cGipTestModeRefs;
5109 if (!cRefs)
5110 fOrMask |= SUPGIP_FLAGS_TESTING_STOP;
5111 else
5112 fAndMask |= SUPGIP_FLAGS_TESTING_ENABLE;
5113 }
5114
5115 /*
5116 * Commit the flags. This should be done as atomically as possible
5117 * since the flag consumers won't be holding the GIP mutex.
5118 */
5119 ASMAtomicOrU32(&pGip->fFlags, fOrMask);
5120 ASMAtomicAndU32(&pGip->fFlags, fAndMask);
5121
5122 return VINF_SUCCESS;
5123}
5124
5125
5126/**
5127 * Sets GIP test mode parameters.
5128 *
5129 * @returns VBox status code.
5130 * @param pDevExt Pointer to the device instance data.
5131 * @param pSession The support driver session.
5132 * @param fOrMask The OR mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5133 * @param fAndMask The AND mask of the GIP flags, see SUPGIP_FLAGS_XXX.
5134 */
5135int VBOXCALL supdrvIOCtl_GipSetFlags(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, uint32_t fOrMask, uint32_t fAndMask)
5136{
5137 PSUPGLOBALINFOPAGE pGip;
5138 int rc;
5139
5140 /*
5141 * Validate. We require the client to have mapped GIP (no asserting on
5142 * ring-3 preconditions).
5143 */
5144 AssertPtr(pDevExt); AssertPtr(pSession); /* paranoia^2 */
5145 if (pSession->GipMapObjR3 == NIL_RTR0MEMOBJ)
5146 return VERR_WRONG_ORDER;
5147 pGip = pDevExt->pGip;
5148 AssertReturn(pGip, VERR_INTERNAL_ERROR_3);
5149
5150 if (fOrMask & ~SUPGIP_FLAGS_VALID_MASK)
5151 return VERR_INVALID_PARAMETER;
5152 if ((fAndMask & ~SUPGIP_FLAGS_VALID_MASK) != ~SUPGIP_FLAGS_VALID_MASK)
5153 return VERR_INVALID_PARAMETER;
5154
5155 /*
5156 * Don't confuse supdrvGipSetFlags or anyone else by both setting
5157 * and clearing the same flags. AND takes precedence.
5158 */
5159 fOrMask &= fAndMask;
5160
5161 /*
5162 * Take the loader lock to avoid having to think about races between two
5163 * clients changing the flags at the same time (state is not simple).
5164 */
5165#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5166 RTSemMutexRequest(pDevExt->mtxGip, RT_INDEFINITE_WAIT);
5167#else
5168 RTSemFastMutexRequest(pDevExt->mtxGip);
5169#endif
5170
5171 rc = supdrvGipSetFlags(pDevExt, pSession, fOrMask, fAndMask);
5172
5173#ifdef SUPDRV_USE_MUTEX_FOR_GIP
5174 RTSemMutexRelease(pDevExt->mtxGip);
5175#else
5176 RTSemFastMutexRelease(pDevExt->mtxGip);
5177#endif
5178 return rc;
5179}
5180
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette