VirtualBox

Changeset 54328 in vbox


Ignore:
Timestamp:
Feb 20, 2015 2:06:51 PM (10 years ago)
Author:
vboxsync
Message:

SUPDrvGip.cpp: Reordered the code.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp

    r54327 r54328  
    130130#endif
    131131
     132
    132133/*******************************************************************************
    133134*   Internal Functions                                                         *
     
    135136static DECLCALLBACK(void)   supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
    136137static DECLCALLBACK(void)   supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
    137 static DECLCALLBACK(void)   supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
    138 static void                 supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,
    139                                           unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);
    140 static DECLCALLBACK(void)   supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);
    141 static void                 supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);
    142 static void                 supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);
    143 static void                 supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
    144                                                   RTCPUID idCpu, uint8_t idApic, uint64_t iTick);
    145138static void                 supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS);
    146 static int                  supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt);
    147 static int                  supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker);
     139#ifdef SUPDRV_USE_TSC_DELTA_THREAD
     140static int                  supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt);
     141static void                 supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt);
     142static int                  supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt);
     143#endif
    148144
    149145
     
    153149DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL;
    154150
     151
     152
     153/*
     154 *
     155 * Misc Common GIP Code
     156 * Misc Common GIP Code
     157 * Misc Common GIP Code
     158 *
     159 *
     160 */
     161
     162
     163/**
     164 * Finds the GIP CPU index corresponding to @a idCpu.
     165 *
     166 * @returns GIP CPU array index, UINT32_MAX if not found.
     167 * @param   pGip                The GIP.
     168 * @param   idCpu               The CPU ID.
     169 */
     170static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
     171{
     172    uint32_t i;
     173    for (i = 0; i < pGip->cCpus; i++)
     174        if (pGip->aCPUs[i].idCpu == idCpu)
     175            return i;
     176    return UINT32_MAX;
     177}
     178
     179
     180/**
     181 * Applies the TSC delta to the supplied raw TSC value.
     182 *
     183 * @returns VBox status code. (Ignored by all users, just FYI.)
     184 * @param   pGip            Pointer to the GIP.
     185 * @param   puTsc           Pointer to a valid TSC value before the TSC delta has been applied.
     186 * @param   idApic          The APIC ID of the CPU @c puTsc corresponds to.
     187 * @param   fDeltaApplied   Where to store whether the TSC delta was succesfully
     188 *                          applied or not (optional, can be NULL).
     189 *
     190 * @remarks Maybe called with interrupts disabled in ring-0!
     191 *
     192 * @note    Don't you dare change the delta calculation.  If you really do, make
     193 *          sure you update all places where it's used (IPRT, SUPLibAll.cpp,
     194 *          SUPDrv.c, supdrvGipMpEvent, and more).
     195 */
     196DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
     197{
     198    int rc;
     199
     200    /*
     201     * Validate input.
     202     */
     203    AssertPtr(puTsc);
     204    AssertPtr(pGip);
     205    Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
     206
     207    /*
     208     * Carefully convert the idApic into a GIPCPU entry.
     209     */
     210    if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
     211    {
     212        uint16_t iCpu = pGip->aiCpuFromApicId[idApic];
     213        if (RT_LIKELY(iCpu < pGip->cCpus))
     214        {
     215            PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
     216
     217            /*
     218             * Apply the delta if valid.
     219             */
     220            if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX))
     221            {
     222                *puTsc -= pGipCpu->i64TSCDelta;
     223                if (pfDeltaApplied)
     224                    *pfDeltaApplied = true;
     225                return VINF_SUCCESS;
     226            }
     227
     228            rc = VINF_SUCCESS;
     229        }
     230        else
     231        {
     232            AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus));
     233            rc = VERR_INVALID_CPU_INDEX;
     234        }
     235    }
     236    else
     237    {
     238        AssertMsgFailed(("idApic=%u\n", idApic));
     239        rc = VERR_INVALID_CPU_ID;
     240    }
     241    if (pfDeltaApplied)
     242        *pfDeltaApplied = false;
     243    return rc;
     244}
     245
     246
     247/*
     248 *
     249 * GIP Mapping and Unmapping Related Code.
     250 * GIP Mapping and Unmapping Related Code.
     251 * GIP Mapping and Unmapping Related Code.
     252 *
     253 *
     254 */
    155255
    156256
     
    649749}
    650750
     751
     752
     753
     754
     755/*
     756 *
     757 *
     758 * GIP Initialization, Termination and CPU Offline / Online Related Code.
     759 * GIP Initialization, Termination and CPU Offline / Online Related Code.
     760 * GIP Initialization, Termination and CPU Offline / Online Related Code.
     761 *
     762 *
     763 */
     764
     765
     766/**
     767 * Timer callback function for TSC frequency refinement in invariant GIP mode.
     768 *
     769 * @param   pTimer      The timer.
     770 * @param   pvUser      Opaque pointer to the device instance data.
     771 * @param   iTick       The timer tick.
     772 */
     773static DECLCALLBACK(void) supdrvInitAsyncRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
     774{
     775    PSUPDRVDEVEXT      pDevExt = (PSUPDRVDEVEXT)pvUser;
     776    PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
     777    bool               fDeltaApplied = false;
     778    uint8_t            idApic;
     779    uint64_t           u64DeltaNanoTS;
     780    uint64_t           u64DeltaTsc;
     781    uint64_t           u64NanoTS;
     782    uint64_t           u64Tsc;
     783    RTCCUINTREG        uFlags;
     784
     785    /* Paranoia. */
     786    Assert(pGip);
     787    Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
     788
     789#if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
     790    u64NanoTS = RTTimeSystemNanoTS();
     791    while (RTTimeSystemNanoTS() == u64NanoTS)
     792        ASMNopPause();
     793#endif
     794    uFlags    = ASMIntDisableFlags();
     795    idApic    = ASMGetApicId();
     796    u64Tsc    = ASMReadTSC();
     797    u64NanoTS = RTTimeSystemNanoTS();
     798    ASMSetFlags(uFlags);
     799    if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
     800        supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
     801    u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
     802    u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
     803
     804    if (RT_UNLIKELY(   pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO
     805                    && !fDeltaApplied))
     806    {
     807        Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
     808                    GIP_TSC_REFINE_INTERVAL));
     809        return;
     810    }
     811
     812    /* Calculate the TSC frequency. */
     813    if (   u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
     814        && u64DeltaNanoTS < UINT32_MAX)
     815        pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
     816    else
     817    {
     818        RTUINT128U CpuHz, Tmp, Divisor;
     819        CpuHz.s.Lo = CpuHz.s.Hi = 0;
     820        RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
     821        RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
     822        pGip->u64CpuHz = CpuHz.s.Lo;
     823    }
     824
     825    /* Update rest of GIP. */
     826    Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
     827    pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
     828}
     829
     830
     831/**
     832 * Starts the TSC-frequency refinement phase asynchronously.
     833 *
     834 * @param   pDevExt        Pointer to the device instance data.
     835 */
     836static void supdrvGipInitAsyncRefineTscFreq(PSUPDRVDEVEXT pDevExt)
     837{
     838    uint64_t            u64NanoTS;
     839    RTCCUINTREG         uFlags;
     840    uint8_t             idApic;
     841    int                 rc;
     842    PSUPGLOBALINFOPAGE  pGip;
     843
     844    /* Validate. */
     845    Assert(pDevExt);
     846    Assert(pDevExt->pGip);
     847    pGip = pDevExt->pGip;
     848
    651849#ifdef SUPDRV_USE_TSC_DELTA_THREAD
    652 
    653 /**
    654  * Switches the TSC-delta measurement thread into the butchered state.
    655  *
    656  * @returns VBox status code.
    657  * @param pDevExt           Pointer to the device instance data.
    658  * @param fSpinlockHeld     Whether the TSC-delta spinlock is held or not.
    659  * @param pszFailed         An error message to log.
    660  * @param rcFailed          The error code to exit the thread with.
    661  */
    662 static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
    663 {
    664     if (!fSpinlockHeld)
    665         RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    666 
    667     pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
    668     RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    669     OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
    670     return rcFailed;
    671 }
    672 
    673 
    674 /**
    675  * The TSC-delta measurement thread.
    676  *
    677  * @returns VBox status code.
    678  * @param hThread   The thread handle.
    679  * @param pvUser    Opaque pointer to the device instance data.
    680  */
    681 static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
    682 {
    683     PSUPDRVDEVEXT     pDevExt = (PSUPDRVDEVEXT)pvUser;
    684     bool              fInitialMeasurement = true;
    685     uint32_t          cConsecutiveTimeouts = 0;
    686     int               rc = VERR_INTERNAL_ERROR_2;
    687     for (;;)
     850    /*
     851     * If the TSC-delta thread is created, wait until it's done calculating
     852     * the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
     853     */
     854    if (   pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
     855        && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE)
     856    {
     857        rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
     858        if (rc == VERR_TIMEOUT)
     859        {
     860            SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
     861            return;
     862        }
     863    }
     864#endif
     865
     866    /*
     867     * Record the TSC and NanoTS as the starting anchor point for refinement of the
     868     * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
     869     * reading of the TSC and the NanoTS as close as possible.
     870     */
     871    u64NanoTS = RTTimeSystemNanoTS();
     872    while (RTTimeSystemNanoTS() == u64NanoTS)
     873        ASMNopPause();
     874    uFlags                   = ASMIntDisableFlags();
     875    idApic                   = ASMGetApicId();
     876    pDevExt->u64TscAnchor    = ASMReadTSC();
     877    pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
     878    ASMSetFlags(uFlags);
     879    if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
     880        supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */);
     881
     882    rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY,
     883                         supdrvInitAsyncRefineTscTimer, pDevExt);
     884    if (RT_SUCCESS(rc))
    688885    {
    689886        /*
    690          * Switch on the current state.
     887         * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
     888         * interval as small as possible while gaining the most consistent and accurate frequency
     889         * (compared to what the host OS might have measured).
     890         *
     891         * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
     892         * same TSC frequency whenever possible so we need to keep the interval short.
    691893         */
    692         SUPDRVTSCDELTATHREADSTATE enmState;
    693         RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    694         enmState = pDevExt->enmTscDeltaThreadState;
    695         switch (enmState)
    696         {
    697             case kTscDeltaThreadState_Creating:
    698             {
    699                 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
    700                 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
    701                 if (RT_FAILURE(rc))
    702                     return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
    703                 /* fall thru */
    704             }
    705 
    706             case kTscDeltaThreadState_Listening:
    707             {
    708                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    709 
    710                 /* Simple adaptive timeout. */
    711                 if (cConsecutiveTimeouts++ == 10)
    712                 {
    713                     if (pDevExt->cMsTscDeltaTimeout == 1)           /* 10 ms */
    714                         pDevExt->cMsTscDeltaTimeout = 10;
    715                     else if (pDevExt->cMsTscDeltaTimeout == 10)     /* +100 ms */
    716                         pDevExt->cMsTscDeltaTimeout = 100;
    717                     else if (pDevExt->cMsTscDeltaTimeout == 100)    /* +1000 ms */
    718                         pDevExt->cMsTscDeltaTimeout = 500;
    719                     cConsecutiveTimeouts = 0;
    720                 }
    721                 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
    722                 if (   RT_FAILURE(rc)
    723                     && rc != VERR_TIMEOUT)
    724                     return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
    725                 RTThreadUserReset(pDevExt->hTscDeltaThread);
    726                 break;
    727             }
    728 
    729             case kTscDeltaThreadState_WaitAndMeasure:
    730             {
    731                 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
    732                 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
    733                 if (RT_FAILURE(rc))
    734                     return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
    735                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    736                 pDevExt->cMsTscDeltaTimeout = 1;
    737                 RTThreadSleep(10);
    738                 /* fall thru */
    739             }
    740 
    741             case kTscDeltaThreadState_Measuring:
    742             {
    743                 cConsecutiveTimeouts = 0;
    744                 if (fInitialMeasurement)
    745                 {
    746                     int cTries = 8;
    747                     int cMsWaitPerTry = 10;
    748                     fInitialMeasurement = false;
    749                     do
    750                     {
    751                         rc = supdrvMeasureInitialTscDeltas(pDevExt);
    752                         if (   RT_SUCCESS(rc)
    753                             || (   RT_FAILURE(rc)
    754                                 && rc != VERR_TRY_AGAIN
    755                                 && rc != VERR_CPU_OFFLINE))
    756                         {
    757                             break;
    758                         }
    759                         RTThreadSleep(cMsWaitPerTry);
    760                     } while (cTries-- > 0);
    761                 }
    762                 else
    763                 {
    764                     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    765                     unsigned iCpu;
    766 
    767                     /* Measure TSC-deltas only for the CPUs that are in the set. */
    768                     rc = VINF_SUCCESS;
    769                     for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
    770                     {
    771                         PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
    772                         if (   pGipCpuWorker->i64TSCDelta == INT64_MAX
    773                             && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
    774                         {
    775                             rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
    776                         }
    777                     }
    778                 }
    779                 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    780                 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
    781                     pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
    782                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    783                 Assert(rc != VERR_NOT_AVAILABLE);   /* VERR_NOT_AVAILABLE is used as the initial value. */
    784                 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
    785                 break;
    786             }
    787 
    788             case kTscDeltaThreadState_Terminating:
    789                 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
    790                 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    791                 return VINF_SUCCESS;
    792 
    793             case kTscDeltaThreadState_Butchered:
    794             default:
    795                 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
    796         }
    797     }
    798 
    799     return rc;
    800 }
    801 
    802 
    803 /**
    804  * Waits for the TSC-delta measurement thread to respond to a state change.
    805  *
    806  * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
    807  *          other error code on internal error.
    808  *
    809  * @param   pThis           Pointer to the grant service instance data.
    810  * @param   enmCurState     The current state.
    811  * @param   enmNewState     The new state we're waiting for it to enter.
    812  */
    813 static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
    814                                     SUPDRVTSCDELTATHREADSTATE enmNewState)
    815 {
    816     /*
    817      * Wait a short while for the expected state transition.
    818      */
    819     int rc;
    820     RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
    821     RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    822     if (pDevExt->enmTscDeltaThreadState == enmNewState)
    823     {
    824         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    825         rc = VINF_SUCCESS;
    826     }
    827     else if (pDevExt->enmTscDeltaThreadState == enmCurState)
    828     {
    829         /*
    830          * Wait longer if the state has not yet transitioned to the one we want.
    831          */
    832         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    833         rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
    834         if (   RT_SUCCESS(rc)
    835             || rc == VERR_TIMEOUT)
    836         {
    837             /*
    838              * Check the state whether we've succeeded.
    839              */
    840             SUPDRVTSCDELTATHREADSTATE enmState;
    841             RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    842             enmState = pDevExt->enmTscDeltaThreadState;
    843             RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    844             if (enmState == enmNewState)
    845                 rc = VINF_SUCCESS;
    846             else if (enmState == enmCurState)
    847             {
    848                 rc = VERR_TIMEOUT;
    849                 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
    850                             enmNewState));
    851             }
    852             else
    853             {
    854                 rc = VERR_INTERNAL_ERROR;
    855                 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
    856                             enmState, enmNewState));
    857             }
    858         }
    859         else
    860             OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
     894        rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
     895        AssertRC(rc);
    861896    }
    862897    else
    863     {
    864         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    865         OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
    866         rc = VERR_INTERNAL_ERROR;
    867     }
    868 
    869     return rc;
    870 }
    871 
    872 
    873 /**
    874  * Terminates the TSC-delta measurement thread.
    875  *
    876  * @param   pDevExt   Pointer to the device instance data.
    877  */
    878 static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
    879 {
    880     int rc;
    881     RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    882     pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
    883     RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    884     RTThreadUserSignal(pDevExt->hTscDeltaThread);
    885     rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
    886     if (RT_FAILURE(rc))
    887     {
    888         /* Signal a few more times before giving up. */
    889         int cTriesLeft = 5;
    890         while (--cTriesLeft > 0)
    891         {
    892             RTThreadUserSignal(pDevExt->hTscDeltaThread);
    893             rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
    894             if (rc != VERR_TIMEOUT)
    895                 break;
    896         }
    897     }
    898 }
    899 
    900 
    901 /**
    902  * Initializes and spawns the TSC-delta measurement thread.
    903  *
    904  * A thread is required for servicing re-measurement requests from events like
    905  * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
    906  * under all contexts on all OSs.
    907  *
    908  * @returns VBox status code.
    909  * @param   pDevExt           Pointer to the device instance data.
    910  *
    911  * @remarks Must only be called -after- initializing GIP and setting up MP
    912  *          notifications!
    913  */
    914 static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
    915 {
    916     int rc;
    917     Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
    918     rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
    919     if (RT_SUCCESS(rc))
    920     {
    921         rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
    922         if (RT_SUCCESS(rc))
    923         {
    924             pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
    925             pDevExt->cMsTscDeltaTimeout = 1;
    926             rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
    927                                 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
    928             if (RT_SUCCESS(rc))
    929             {
    930                 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
    931                 if (RT_SUCCESS(rc))
    932                 {
    933                     ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
    934                     return rc;
    935                 }
    936 
    937                 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
    938                 supdrvTscDeltaThreadTerminate(pDevExt);
    939             }
    940             else
    941                 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
    942             RTSemEventDestroy(pDevExt->hTscDeltaEvent);
    943             pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
    944         }
    945         else
    946             OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
    947         RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
    948         pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
    949     }
    950     else
    951         OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
    952 
    953     return rc;
    954 }
    955 
    956 
    957 /**
    958  * Terminates the TSC-delta measurement thread and cleanup.
    959  *
    960  * @param   pDevExt         Pointer to the device instance data.
    961  */
    962 static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
    963 {
    964     if (   pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
    965         && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
    966     {
    967         supdrvTscDeltaThreadTerminate(pDevExt);
    968     }
    969 
    970     if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
    971     {
    972         RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
    973         pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
    974     }
    975 
    976     if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
    977     {
    978         RTSemEventDestroy(pDevExt->hTscDeltaEvent);
    979         pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
    980     }
    981 
    982     ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
    983 }
    984 
    985 
    986 /**
    987  * Waits for TSC-delta measurements to be completed for all online CPUs.
    988  *
    989  * @returns VBox status code.
    990  * @param   pDevExt         Pointer to the device instance data.
    991  */
    992 static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
    993 {
    994     int cTriesLeft = 5;
    995     int cMsTotalWait;
    996     int cMsWaited = 0;
    997     int cMsWaitGranularity = 1;
    998 
    999     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    1000     AssertReturn(pGip, VERR_INVALID_POINTER);
    1001 
    1002     if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD))
    1003         return VERR_THREAD_NOT_WAITABLE;
    1004 
    1005     cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200);
    1006     while (cTriesLeft-- > 0)
    1007     {
    1008         if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
    1009             return VINF_SUCCESS;
    1010         RTThreadSleep(cMsWaitGranularity);
    1011         cMsWaited += cMsWaitGranularity;
    1012         if (cMsWaited >= cMsTotalWait)
    1013             break;
    1014     }
    1015 
    1016     return VERR_TIMEOUT;
    1017 }
    1018 
    1019 #endif /* SUPDRV_USE_TSC_DELTA_THREAD */
    1020 
    1021 /**
    1022  * Applies the TSC delta to the supplied raw TSC value.
    1023  *
    1024  * @returns VBox status code. (Ignored by all users, just FYI.)
    1025  * @param   pGip            Pointer to the GIP.
    1026  * @param   puTsc           Pointer to a valid TSC value before the TSC delta has been applied.
    1027  * @param   idApic          The APIC ID of the CPU @c puTsc corresponds to.
    1028  * @param   fDeltaApplied   Where to store whether the TSC delta was succesfully
    1029  *                          applied or not (optional, can be NULL).
    1030  *
    1031  * @remarks Maybe called with interrupts disabled in ring-0!
    1032  *
    1033  * @note    Don't you dare change the delta calculation.  If you really do, make
    1034  *          sure you update all places where it's used (IPRT, SUPLibAll.cpp,
    1035  *          SUPDrv.c, supdrvGipMpEvent, and more).
    1036  */
    1037 DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied)
    1038 {
    1039     int rc;
    1040 
    1041     /*
    1042      * Validate input.
    1043      */
    1044     AssertPtr(puTsc);
    1045     AssertPtr(pGip);
    1046     Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
    1047 
    1048     /*
    1049      * Carefully convert the idApic into a GIPCPU entry.
    1050      */
    1051     if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId)))
    1052     {
    1053         uint16_t iCpu = pGip->aiCpuFromApicId[idApic];
    1054         if (RT_LIKELY(iCpu < pGip->cCpus))
    1055         {
    1056             PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
    1057 
    1058             /*
    1059              * Apply the delta if valid.
    1060              */
    1061             if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX))
    1062             {
    1063                 *puTsc -= pGipCpu->i64TSCDelta;
    1064                 if (pfDeltaApplied)
    1065                     *pfDeltaApplied = true;
    1066                 return VINF_SUCCESS;
    1067             }
    1068 
    1069             rc = VINF_SUCCESS;
    1070         }
    1071         else
    1072         {
    1073             AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus));
    1074             rc = VERR_INVALID_CPU_INDEX;
    1075         }
    1076     }
    1077     else
    1078     {
    1079         AssertMsgFailed(("idApic=%u\n", idApic));
    1080         rc = VERR_INVALID_CPU_ID;
    1081     }
    1082     if (pfDeltaApplied)
    1083         *pfDeltaApplied = false;
    1084     return rc;
     898        OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
    1085899}
    1086900
     
    1101915 * @remarks Must be called only -after- measuring the TSC deltas.
    1102916 */
    1103 static int supdrvGipMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
     917static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt)
    1104918{
    1105919    int cTriesLeft = 4;
     
    1185999                if (rc == VERR_TIMEOUT)
    11861000                {
    1187                     SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
     1001                    SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: timedout waiting for TSC-delta measurements.\n");
    11881002                    return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED;
    11891003                }
    11901004#else
    1191                 SUPR0Printf("vboxdrv: supdrvGipMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
     1005                SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",
    11921006                            idApicBefore, idApicAfter, cTriesLeft);
    11931007#endif
     
    12101024
    12111025/**
    1212  * Timer callback function for TSC frequency refinement in invariant GIP mode.
    1213  *
    1214  * @param   pTimer      The timer.
    1215  * @param   pvUser      Opaque pointer to the device instance data.
    1216  * @param   iTick       The timer tick.
    1217  */
    1218 static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
    1219 {
    1220     PSUPDRVDEVEXT      pDevExt = (PSUPDRVDEVEXT)pvUser;
     1026 * Finds our (@a idCpu) entry, or allocates a new one if not found.
     1027 *
     1028 * @returns Index of the CPU in the cache set.
     1029 * @param   pGip                The GIP.
     1030 * @param   idCpu               The CPU ID.
     1031 */
     1032static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
     1033{
     1034    uint32_t i, cTries;
     1035
     1036    /*
     1037     * ASSUMES that CPU IDs are constant.
     1038     */
     1039    for (i = 0; i < pGip->cCpus; i++)
     1040        if (pGip->aCPUs[i].idCpu == idCpu)
     1041            return i;
     1042
     1043    cTries = 0;
     1044    do
     1045    {
     1046        for (i = 0; i < pGip->cCpus; i++)
     1047        {
     1048            bool fRc;
     1049            ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
     1050            if (fRc)
     1051                return i;
     1052        }
     1053    } while (cTries++ < 32);
     1054    AssertReleaseFailed();
     1055    return i - 1;
     1056}
     1057
     1058
     1059/**
     1060 * The calling CPU should be accounted as online, update GIP accordingly.
     1061 *
     1062 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
     1063 *
     1064 * @param   pDevExt             The device extension.
     1065 * @param   idCpu               The CPU ID.
     1066 */
     1067static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
     1068{
     1069    int         iCpuSet = 0;
     1070    uint16_t    idApic = UINT16_MAX;
     1071    uint32_t    i = 0;
     1072    uint64_t    u64NanoTS = 0;
    12211073    PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    1222     bool               fDeltaApplied = false;
    1223     uint8_t            idApic;
    1224     uint64_t           u64DeltaNanoTS;
    1225     uint64_t           u64DeltaTsc;
    1226     uint64_t           u64NanoTS;
    1227     uint64_t           u64Tsc;
    1228     RTCCUINTREG        uFlags;
    1229 
    1230     /* Paranoia. */
    1231     Assert(pGip);
    1232     Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC);
    1233 
    1234 #if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */
    1235     u64NanoTS = RTTimeSystemNanoTS();
    1236     while (RTTimeSystemNanoTS() == u64NanoTS)
    1237         ASMNopPause();
    1238 #endif
    1239     uFlags    = ASMIntDisableFlags();
    1240     idApic    = ASMGetApicId();
    1241     u64Tsc    = ASMReadTSC();
    1242     u64NanoTS = RTTimeSystemNanoTS();
    1243     ASMSetFlags(uFlags);
    1244     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
    1245         supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied);
    1246     u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor;
    1247     u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor;
    1248 
    1249     if (RT_UNLIKELY(   pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO
    1250                     && !fDeltaApplied))
    1251     {
    1252         Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n",
    1253                     GIP_TSC_REFINE_INTERVAL));
    1254         return;
    1255     }
    1256 
    1257     /* Calculate the TSC frequency. */
    1258     if (   u64DeltaTsc < UINT64_MAX / RT_NS_1SEC
    1259         && u64DeltaNanoTS < UINT32_MAX)
    1260         pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS);
     1074
     1075    AssertPtrReturnVoid(pGip);
     1076    AssertRelease(idCpu == RTMpCpuId());
     1077    Assert(pGip->cPossibleCpus == RTMpGetCount());
     1078
     1079    /*
     1080     * Do this behind a spinlock with interrupts disabled as this can fire
     1081     * on all CPUs simultaneously, see @bugref{6110}.
     1082     */
     1083    RTSpinlockAcquire(pDevExt->hGipSpinlock);
     1084
     1085    /*
     1086     * Update the globals.
     1087     */
     1088    ASMAtomicWriteU16(&pGip->cPresentCpus,  RTMpGetPresentCount());
     1089    ASMAtomicWriteU16(&pGip->cOnlineCpus,   RTMpGetOnlineCount());
     1090    iCpuSet = RTMpCpuIdToSetIndex(idCpu);
     1091    if (iCpuSet >= 0)
     1092    {
     1093        Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
     1094        RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
     1095        RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
     1096    }
     1097
     1098    /*
     1099     * Update the entry.
     1100     */
     1101    u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
     1102    i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
     1103    supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
     1104    idApic = ASMGetApicId();
     1105    ASMAtomicWriteU16(&pGip->aCPUs[i].idApic,  idApic);
     1106    ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
     1107    ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu,  idCpu);
     1108
     1109    /*
     1110     * Update the APIC ID and CPU set index mappings.
     1111     */
     1112    ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic],     i);
     1113    ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
     1114
     1115    /* Update the Mp online/offline counter. */
     1116    ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
     1117
     1118    /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */
     1119    if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
     1120    {
     1121        RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
     1122#ifdef SUPDRV_USE_TSC_DELTA_THREAD
     1123        RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
     1124        if (   pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
     1125            || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
     1126        {
     1127            pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
     1128        }
     1129        RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     1130#endif
     1131    }
     1132
     1133    /* commit it */
     1134    ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
     1135
     1136    RTSpinlockRelease(pDevExt->hGipSpinlock);
     1137}
     1138
     1139
     1140/**
     1141 * The CPU should be accounted as offline, update the GIP accordingly.
     1142 *
     1143 * This is used by supdrvGipMpEvent.
     1144 *
     1145 * @param   pDevExt             The device extension.
     1146 * @param   idCpu               The CPU ID.
     1147 */
     1148static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
     1149{
     1150    PSUPGLOBALINFOPAGE  pGip = pDevExt->pGip;
     1151    int                 iCpuSet;
     1152    unsigned            i;
     1153
     1154    AssertPtrReturnVoid(pGip);
     1155    RTSpinlockAcquire(pDevExt->hGipSpinlock);
     1156
     1157    iCpuSet = RTMpCpuIdToSetIndex(idCpu);
     1158    AssertReturnVoid(iCpuSet >= 0);
     1159
     1160    i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
     1161    AssertReturnVoid(i < pGip->cCpus);
     1162    AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
     1163
     1164    Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
     1165    RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
     1166
     1167    /* Update the Mp online/offline counter. */
     1168    ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
     1169
     1170    /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
     1171    if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
     1172    {
     1173        ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
     1174        ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
     1175    }
     1176
     1177    if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
     1178    {
     1179        /* Reset the TSC delta, we will recalculate it lazily. */
     1180        ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
     1181        /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
     1182        RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
     1183    }
     1184
     1185    /* commit it */
     1186    ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
     1187
     1188    RTSpinlockRelease(pDevExt->hGipSpinlock);
     1189}
     1190
     1191
     1192/**
     1193 * Multiprocessor event notification callback.
     1194 *
     1195 * This is used to make sure that the GIP master gets passed on to
     1196 * another CPU.  It also updates the associated CPU data.
     1197 *
     1198 * @param   enmEvent    The event.
     1199 * @param   idCpu       The cpu it applies to.
     1200 * @param   pvUser      Pointer to the device extension.
     1201 *
     1202 * @remarks This function -must- fire on the newly online'd CPU for the
     1203 *          RTMPEVENT_ONLINE case and can fire on any CPU for the
     1204 *          RTMPEVENT_OFFLINE case.
     1205 */
     1206static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
     1207{
     1208    PSUPDRVDEVEXT       pDevExt = (PSUPDRVDEVEXT)pvUser;
     1209    PSUPGLOBALINFOPAGE  pGip    = pDevExt->pGip;
     1210
     1211    AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
     1212
     1213    /*
     1214     * Update the GIP CPU data.
     1215     */
     1216    if (pGip)
     1217    {
     1218        switch (enmEvent)
     1219        {
     1220            case RTMPEVENT_ONLINE:
     1221                AssertRelease(idCpu == RTMpCpuId());
     1222                supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
     1223                break;
     1224            case RTMPEVENT_OFFLINE:
     1225                supdrvGipMpEventOffline(pDevExt, idCpu);
     1226                break;
     1227        }
     1228    }
     1229
     1230    /*
     1231     * Make sure there is a master GIP.
     1232     */
     1233    if (enmEvent == RTMPEVENT_OFFLINE)
     1234    {
     1235        RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
     1236        if (idGipMaster == idCpu)
     1237        {
     1238            /*
     1239             * The GIP master is going offline, find a new one.
     1240             */
     1241            bool        fIgnored;
     1242            unsigned    i;
     1243            RTCPUID     idNewGipMaster = NIL_RTCPUID;
     1244            RTCPUSET    OnlineCpus;
     1245            RTMpGetOnlineSet(&OnlineCpus);
     1246
     1247            for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
     1248                if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
     1249                {
     1250                    RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
     1251                    if (idCurCpu != idGipMaster)
     1252                    {
     1253                        idNewGipMaster = idCurCpu;
     1254                        break;
     1255                    }
     1256                }
     1257
     1258            Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
     1259            ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
     1260            NOREF(fIgnored);
     1261        }
     1262    }
     1263}
     1264
     1265
     1266/**
     1267 * On CPU initialization callback for RTMpOnAll.
     1268 *
     1269 * @param   idCpu               The CPU ID.
     1270 * @param   pvUser1             The device extension.
     1271 * @param   pvUser2             The GIP.
     1272 */
     1273static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
     1274{
     1275    /* This is good enough, even though it will update some of the globals a
     1276       bit to much. */
     1277    supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu);
     1278}
     1279
     1280
     1281/**
     1282 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
     1283 *
     1284 * @param   idCpu       Ignored.
     1285 * @param   pvUser1     Where to put the TSC.
     1286 * @param   pvUser2     Ignored.
     1287 */
     1288static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
     1289{
     1290    ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
     1291}
     1292
     1293
     1294/**
     1295 * Determine if Async GIP mode is required because of TSC drift.
     1296 *
     1297 * When using the default/normal timer code it is essential that the time stamp counter
     1298 * (TSC) runs never backwards, that is, a read operation to the counter should return
     1299 * a bigger value than any previous read operation. This is guaranteed by the latest
     1300 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
     1301 * case we have to choose the asynchronous timer mode.
     1302 *
     1303 * @param   poffMin     Pointer to the determined difference between different
     1304 *                      cores (optional, can be NULL).
     1305 * @return  false if the time stamp counters appear to be synchronized, true otherwise.
     1306 */
     1307static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin)
     1308{
     1309    /*
     1310     * Just iterate all the cpus 8 times and make sure that the TSC is
     1311     * ever increasing. We don't bother taking TSC rollover into account.
     1312     */
     1313    int         iEndCpu = RTMpGetArraySize();
     1314    int         iCpu;
     1315    int         cLoops = 8;
     1316    bool        fAsync = false;
     1317    int         rc = VINF_SUCCESS;
     1318    uint64_t    offMax = 0;
     1319    uint64_t    offMin = ~(uint64_t)0;
     1320    uint64_t    PrevTsc = ASMReadTSC();
     1321
     1322    while (cLoops-- > 0)
     1323    {
     1324        for (iCpu = 0; iCpu < iEndCpu; iCpu++)
     1325        {
     1326            uint64_t CurTsc;
     1327            rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker, &CurTsc, NULL);
     1328            if (RT_SUCCESS(rc))
     1329            {
     1330                if (CurTsc <= PrevTsc)
     1331                {
     1332                    fAsync = true;
     1333                    offMin = offMax = PrevTsc - CurTsc;
     1334                    Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
     1335                         iCpu, cLoops, CurTsc, PrevTsc));
     1336                    break;
     1337                }
     1338
     1339                /* Gather statistics (except the first time). */
     1340                if (iCpu != 0 || cLoops != 7)
     1341                {
     1342                    uint64_t off = CurTsc - PrevTsc;
     1343                    if (off < offMin)
     1344                        offMin = off;
     1345                    if (off > offMax)
     1346                        offMax = off;
     1347                    Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
     1348                }
     1349
     1350                /* Next */
     1351                PrevTsc = CurTsc;
     1352            }
     1353            else if (rc == VERR_NOT_SUPPORTED)
     1354                break;
     1355            else
     1356                AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
     1357        }
     1358
     1359        /* broke out of the loop. */
     1360        if (iCpu < iEndCpu)
     1361            break;
     1362    }
     1363
     1364    if (poffMin)
     1365        *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
     1366    Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
     1367         fAsync, iEndCpu, rc, offMin, offMax));
     1368#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
     1369    OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
     1370#endif
     1371    return fAsync;
     1372}
     1373
     1374
     1375/**
     1376 * supdrvGipInit() worker that determines the GIP TSC mode.
     1377 *
     1378 * @returns The most suitable TSC mode.
     1379 * @param   pDevExt     Pointer to the device instance data.
     1380 */
     1381static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
     1382{
     1383    uint64_t u64DiffCoresIgnored;
     1384    uint32_t uEAX, uEBX, uECX, uEDX;
     1385
     1386    /*
     1387     * Establish whether the CPU advertises TSC as invariant, we need that in
     1388     * a couple of places below.
     1389     */
     1390    bool fInvariantTsc = false;
     1391    if (ASMHasCpuId())
     1392    {
     1393        uEAX = ASMCpuId_EAX(0x80000000);
     1394        if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
     1395        {
     1396            uEDX = ASMCpuId_EDX(0x80000007);
     1397            if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
     1398                fInvariantTsc = true;
     1399        }
     1400    }
     1401
     1402    /*
     1403     * On single CPU systems, we don't need to consider ASYNC mode.
     1404     */
     1405    if (RTMpGetCount() <= 1)
     1406        return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
     1407
     1408    /*
     1409     * Allow the user and/or OS specific bits to force async mode.
     1410     */
     1411    if (supdrvOSGetForcedAsyncTscMode(pDevExt))
     1412        return SUPGIPMODE_ASYNC_TSC;
     1413
     1414
     1415#if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */
     1416    /*
     1417     * Use invariant mode if the CPU says TSC is invariant.
     1418     */
     1419    if (fInvariantTsc)
     1420        return SUPGIPMODE_INVARIANT_TSC;
     1421#endif
     1422
     1423    /*
     1424     * TSC is not invariant and we're on SMP, this presents two problems:
     1425     *
     1426     *      (1) There might be a skew between the CPU, so that cpu0
     1427     *          returns a TSC that is slightly different from cpu1.
     1428     *          This screw may be due to (2), bad TSC initialization
     1429     *          or slightly different TSC rates.
     1430     *
     1431     *      (2) Power management (and other things) may cause the TSC
     1432     *          to run at a non-constant speed, and cause the speed
     1433     *          to be different on the cpus. This will result in (1).
     1434     *
     1435     * If any of the above is detected, we will have to use ASYNC mode.
     1436     */
     1437
     1438    /* (1). Try check for current differences between the cpus. */
     1439    if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored))
     1440        return SUPGIPMODE_ASYNC_TSC;
     1441
     1442#if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */
     1443    if (fInvariantTsc)
     1444        return SUPGIPMODE_INVARIANT_TSC;
     1445#endif
     1446
     1447    /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
     1448    ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
     1449    if (   ASMIsValidStdRange(uEAX)
     1450        && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
     1451    {
     1452        /* Check for APM support. */
     1453        uEAX = ASMCpuId_EAX(0x80000000);
     1454        if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
     1455        {
     1456            uEDX = ASMCpuId_EDX(0x80000007);
     1457            if (uEDX & 0x3e)  /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
     1458                return SUPGIPMODE_ASYNC_TSC;
     1459        }
     1460    }
     1461
     1462    return SUPGIPMODE_SYNC_TSC;
     1463}
     1464
     1465
     1466/**
     1467 * Initializes per-CPU GIP information.
     1468 *
     1469 * @param   pDevExt     Pointer to the device instance data.
     1470 * @param   pGip        Pointer to the GIP.
     1471 * @param   pCpu        Pointer to which GIP CPU to initalize.
     1472 * @param   u64NanoTS   The current nanosecond timestamp.
     1473 */
     1474static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
     1475{
     1476    /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
     1477       which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
     1478    pCpu->u32TransactionId   = 2;
     1479    pCpu->u64NanoTS          = u64NanoTS;
     1480    pCpu->u64TSC             = ASMReadTSC();
     1481    pCpu->u64TSCSample       = GIP_TSC_DELTA_RSVD;
     1482    pCpu->i64TSCDelta        = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
     1483
     1484    ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
     1485    ASMAtomicWriteSize(&pCpu->idCpu,    NIL_RTCPUID);
     1486    ASMAtomicWriteS16(&pCpu->iCpuSet,   -1);
     1487    ASMAtomicWriteU16(&pCpu->idApic,    UINT16_MAX);
     1488
     1489    /*
     1490     * We don't know the following values until we've executed updates.
     1491     * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
     1492     * the 2nd timer callout.
     1493     */
     1494    pCpu->u64CpuHz          = _4G + 1; /* tstGIP-2 depends on this. */
     1495    pCpu->u32UpdateIntervalTSC
     1496        = pCpu->au32TSCHistory[0]
     1497        = pCpu->au32TSCHistory[1]
     1498        = pCpu->au32TSCHistory[2]
     1499        = pCpu->au32TSCHistory[3]
     1500        = pCpu->au32TSCHistory[4]
     1501        = pCpu->au32TSCHistory[5]
     1502        = pCpu->au32TSCHistory[6]
     1503        = pCpu->au32TSCHistory[7]
     1504        = (uint32_t)(_4G / pGip->u32UpdateHz);
     1505}
     1506
     1507
     1508/**
     1509 * Initializes the GIP data.
     1510 *
     1511 * @param   pDevExt             Pointer to the device instance data.
     1512 * @param   pGip                Pointer to the read-write kernel mapping of the GIP.
     1513 * @param   HCPhys              The physical address of the GIP.
     1514 * @param   u64NanoTS           The current nanosecond timestamp.
     1515 * @param   uUpdateHz           The update frequency.
     1516 * @param   uUpdateIntervalNS   The update interval in nanoseconds.
     1517 * @param   cCpus               The CPU count.
     1518 */
     1519static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
     1520                          uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
     1521{
     1522    size_t const    cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
     1523    unsigned        i;
     1524#ifdef DEBUG_DARWIN_GIP
     1525    OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
     1526#else
     1527    LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
     1528#endif
     1529
     1530    /*
     1531     * Initialize the structure.
     1532     */
     1533    memset(pGip, 0, cbGip);
     1534
     1535    pGip->u32Magic                = SUPGLOBALINFOPAGE_MAGIC;
     1536    pGip->u32Version              = SUPGLOBALINFOPAGE_VERSION;
     1537    pGip->u32Mode                 = supdrvGipInitDetermineTscMode(pDevExt);
     1538    if (   pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
     1539        /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
     1540        pGip->enmUseTscDelta      = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
     1541                                  ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
    12611542    else
    1262     {
    1263         RTUINT128U CpuHz, Tmp, Divisor;
    1264         CpuHz.s.Lo = CpuHz.s.Hi = 0;
    1265         RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64);
    1266         RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS));
    1267         pGip->u64CpuHz = CpuHz.s.Lo;
    1268     }
    1269 
    1270     /* Update rest of GIP. */
    1271     Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/
    1272     pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz;
    1273 }
    1274 
    1275 
    1276 /**
    1277  * Starts the TSC-frequency refinement phase asynchronously.
    1278  *
    1279  * @param   pDevExt        Pointer to the device instance data.
    1280  */
    1281 static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt)
    1282 {
    1283     uint64_t            u64NanoTS;
    1284     RTCCUINTREG         uFlags;
    1285     uint8_t             idApic;
    1286     int                 rc;
    1287     PSUPGLOBALINFOPAGE  pGip;
    1288 
    1289     /* Validate. */
    1290     Assert(pDevExt);
    1291     Assert(pDevExt->pGip);
    1292     pGip = pDevExt->pGip;
    1293 
    1294 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    1295     /*
    1296      * If the TSC-delta thread is created, wait until it's done calculating
    1297      * the TSC-deltas on the relevant online CPUs before we start the TSC refinement.
    1298      */
    1299     if (   pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED
    1300         && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE)
    1301     {
    1302         rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt);
    1303         if (rc == VERR_TIMEOUT)
    1304         {
    1305             SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n");
    1306             return;
    1307         }
    1308     }
    1309 #endif
    1310 
    1311     /*
    1312      * Record the TSC and NanoTS as the starting anchor point for refinement of the
    1313      * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the
    1314      * reading of the TSC and the NanoTS as close as possible.
    1315      */
    1316     u64NanoTS = RTTimeSystemNanoTS();
    1317     while (RTTimeSystemNanoTS() == u64NanoTS)
    1318         ASMNopPause();
    1319     uFlags                   = ASMIntDisableFlags();
    1320     idApic                   = ASMGetApicId();
    1321     pDevExt->u64TscAnchor    = ASMReadTSC();
    1322     pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS();
    1323     ASMSetFlags(uFlags);
    1324     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO)
    1325         supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */);
    1326 
    1327     rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt);
    1328     if (RT_SUCCESS(rc))
    1329     {
    1330         /*
    1331          * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the
    1332          * interval as small as possible while gaining the most consistent and accurate frequency
    1333          * (compared to what the host OS might have measured).
    1334          *
    1335          * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the
    1336          * same TSC frequency whenever possible so we need to keep the interval short.
    1337          */
    1338         rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64);
    1339         AssertRC(rc);
    1340     }
    1341     else
    1342         OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc));
     1543        pGip->enmUseTscDelta      = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
     1544    pGip->cCpus                   = (uint16_t)cCpus;
     1545    pGip->cPages                  = (uint16_t)(cbGip / PAGE_SIZE);
     1546    pGip->u32UpdateHz             = uUpdateHz;
     1547    pGip->u32UpdateIntervalNS     = uUpdateIntervalNS;
     1548    pGip->fGetGipCpu              = SUPGIPGETCPU_APIC_ID;
     1549    RTCpuSetEmpty(&pGip->OnlineCpuSet);
     1550    RTCpuSetEmpty(&pGip->PresentCpuSet);
     1551    RTMpGetSet(&pGip->PossibleCpuSet);
     1552    pGip->cOnlineCpus             = RTMpGetOnlineCount();
     1553    pGip->cPresentCpus            = RTMpGetPresentCount();
     1554    pGip->cPossibleCpus           = RTMpGetCount();
     1555    pGip->idCpuMax                = RTMpGetMaxCpuId();
     1556    for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
     1557        pGip->aiCpuFromApicId[i]    = UINT16_MAX;
     1558    for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
     1559        pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
     1560    for (i = 0; i < cCpus; i++)
     1561        supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
     1562
     1563    /*
     1564     * Link it to the device extension.
     1565     */
     1566    pDevExt->pGip      = pGip;
     1567    pDevExt->HCPhysGip = HCPhys;
     1568    pDevExt->cGipUsers = 0;
    13431569}
    13441570
     
    14651691                if (RT_SUCCESS(rc))
    14661692                {
    1467                     rc = supdrvGipMeasureTscFreq(pDevExt);
     1693                    rc = supdrvGipInitMeasureTscFreq(pDevExt);
    14681694                    if (RT_SUCCESS(rc))
    14691695                    {
     
    14741700                        if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC)
    14751701                        {
    1476                             rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,
    1477                                                  pDevExt);
     1702                            rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL,
     1703                                                 supdrvGipAsyncTimer, pDevExt);
    14781704                            if (rc == VERR_NOT_SUPPORTED)
    14791705                            {
     
    14951721                            g_pSUPGlobalInfoPage = pGip;
    14961722                            if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
    1497                                 supdrvRefineTscFreq(pDevExt);
     1723                                supdrvGipInitAsyncRefineTscFreq(pDevExt);
    14981724                            return VINF_SUCCESS;
    14991725                        }
     
    15031729                    }
    15041730                    else
    1505                         OSDBGPRINT(("supdrvGipCreate: supdrvGipMeasureTscFreq failed. rc=%Rrc\n", rc));
     1731                        OSDBGPRINT(("supdrvGipCreate: supdrvGipInitMeasureTscFreq failed. rc=%Rrc\n", rc));
    15061732                }
    15071733                else
     
    15231749
    15241750/**
     1751 * Invalidates the GIP data upon termination.
     1752 *
     1753 * @param   pGip        Pointer to the read-write kernel mapping of the GIP.
     1754 */
     1755static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
     1756{
     1757    unsigned i;
     1758    pGip->u32Magic = 0;
     1759    for (i = 0; i < pGip->cCpus; i++)
     1760    {
     1761        pGip->aCPUs[i].u64NanoTS = 0;
     1762        pGip->aCPUs[i].u64TSC = 0;
     1763        pGip->aCPUs[i].iTSCHistoryHead = 0;
     1764        pGip->aCPUs[i].u64TSCSample = 0;
     1765        pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
     1766    }
     1767}
     1768
     1769
     1770/**
    15251771 * Terminates the GIP.
    15261772 *
     
    15941840     */
    15951841    supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt);
     1842}
     1843
     1844
     1845
     1846
     1847/*
     1848 *
     1849 *
     1850 * GIP Update Timer Related Code
     1851 * GIP Update Timer Related Code
     1852 * GIP Update Timer Related Code
     1853 *
     1854 *
     1855 */
     1856
     1857
     1858/**
     1859 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
     1860 * updates all the per cpu data except the transaction id.
     1861 *
     1862 * @param   pDevExt         The device extension.
     1863 * @param   pGipCpu         Pointer to the per cpu data.
     1864 * @param   u64NanoTS       The current time stamp.
     1865 * @param   u64TSC          The current TSC.
     1866 * @param   iTick           The current timer tick.
     1867 *
     1868 * @remarks Can be called with interrupts disabled!
     1869 */
     1870static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
     1871{
     1872    uint64_t    u64TSCDelta;
     1873    uint32_t    u32UpdateIntervalTSC;
     1874    uint32_t    u32UpdateIntervalTSCSlack;
     1875    unsigned    iTSCHistoryHead;
     1876    uint64_t    u64CpuHz;
     1877    uint32_t    u32TransactionId;
     1878
     1879    PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
     1880    AssertPtrReturnVoid(pGip);
     1881
     1882    /* Delta between this and the previous update. */
     1883    ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
     1884
     1885    /*
     1886     * Update the NanoTS.
     1887     */
     1888    ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
     1889
     1890    /*
     1891     * Calc TSC delta.
     1892     */
     1893    u64TSCDelta = u64TSC - pGipCpu->u64TSC;
     1894    ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
     1895
     1896    /* We don't need to keep realculating the frequency when it's invariant. */
     1897    if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
     1898        return;
     1899
     1900    if (u64TSCDelta >> 32)
     1901    {
     1902        u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
     1903        pGipCpu->cErrors++;
     1904    }
     1905
     1906    /*
     1907     * On the 2nd and 3rd callout, reset the history with the current TSC
     1908     * interval since the values entered by supdrvGipInit are totally off.
     1909     * The interval on the 1st callout completely unreliable, the 2nd is a bit
     1910     * better, while the 3rd should be most reliable.
     1911     */
     1912    u32TransactionId = pGipCpu->u32TransactionId;
     1913    if (RT_UNLIKELY(   (   u32TransactionId == 5
     1914                        || u32TransactionId == 7)
     1915                    && (   iTick == 2
     1916                        || iTick == 3) ))
     1917    {
     1918        unsigned i;
     1919        for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
     1920            ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
     1921    }
     1922
     1923    /*
     1924     * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
     1925     * Wait until we have at least one full history since the above history reset. The
     1926     * assumption is that the majority of the previous history values will be tolerable.
     1927     * See @bugref{6710} comment #67.
     1928     */
     1929    if (   u32TransactionId > 23 /* 7 + (8 * 2) */
     1930        && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
     1931    {
     1932        uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
     1933        if (   pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
     1934            || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
     1935        {
     1936            uint32_t u32;
     1937            u32  = pGipCpu->au32TSCHistory[0];
     1938            u32 += pGipCpu->au32TSCHistory[1];
     1939            u32 += pGipCpu->au32TSCHistory[2];
     1940            u32 += pGipCpu->au32TSCHistory[3];
     1941            u32 >>= 2;
     1942            u64TSCDelta  = pGipCpu->au32TSCHistory[4];
     1943            u64TSCDelta += pGipCpu->au32TSCHistory[5];
     1944            u64TSCDelta += pGipCpu->au32TSCHistory[6];
     1945            u64TSCDelta += pGipCpu->au32TSCHistory[7];
     1946            u64TSCDelta >>= 2;
     1947            u64TSCDelta += u32;
     1948            u64TSCDelta >>= 1;
     1949        }
     1950    }
     1951
     1952    /*
     1953     * TSC History.
     1954     */
     1955    Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
     1956    iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
     1957    ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
     1958    ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
     1959
     1960    /*
     1961     * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
     1962     *
     1963     * On Windows, we have an occasional (but recurring) sour value that messed up
     1964     * the history but taking only 1 interval reduces the precision overall.
     1965     * However, this problem existed before the invariant mode was introduced.
     1966     */
     1967    if (   pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
     1968        || pGip->u32UpdateHz >= 1000)
     1969    {
     1970        uint32_t u32;
     1971        u32  = pGipCpu->au32TSCHistory[0];
     1972        u32 += pGipCpu->au32TSCHistory[1];
     1973        u32 += pGipCpu->au32TSCHistory[2];
     1974        u32 += pGipCpu->au32TSCHistory[3];
     1975        u32 >>= 2;
     1976        u32UpdateIntervalTSC  = pGipCpu->au32TSCHistory[4];
     1977        u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
     1978        u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
     1979        u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
     1980        u32UpdateIntervalTSC >>= 2;
     1981        u32UpdateIntervalTSC += u32;
     1982        u32UpdateIntervalTSC >>= 1;
     1983
     1984        /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
     1985        u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
     1986    }
     1987    else if (pGip->u32UpdateHz >= 90)
     1988    {
     1989        u32UpdateIntervalTSC  = (uint32_t)u64TSCDelta;
     1990        u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
     1991        u32UpdateIntervalTSC >>= 1;
     1992
     1993        /* value chosen on a 2GHz thinkpad running windows */
     1994        u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
     1995    }
     1996    else
     1997    {
     1998        u32UpdateIntervalTSC  = (uint32_t)u64TSCDelta;
     1999
     2000        /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
     2001        u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
     2002    }
     2003    ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
     2004
     2005    /*
     2006     * CpuHz.
     2007     */
     2008    u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
     2009    u64CpuHz /= pGip->u32UpdateIntervalNS;
     2010    ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
     2011}
     2012
     2013
     2014/**
     2015 * Updates the GIP.
     2016 *
     2017 * @param   pDevExt         The device extension.
     2018 * @param   u64NanoTS       The current nanosecond timesamp.
     2019 * @param   u64TSC          The current TSC timesamp.
     2020 * @param   idCpu           The CPU ID.
     2021 * @param   iTick           The current timer tick.
     2022 *
     2023 * @remarks Can be called with interrupts disabled!
     2024 */
     2025static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
     2026{
     2027    /*
     2028     * Determine the relevant CPU data.
     2029     */
     2030    PSUPGIPCPU pGipCpu;
     2031    PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
     2032    AssertPtrReturnVoid(pGip);
     2033
     2034    if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
     2035        pGipCpu = &pGip->aCPUs[0];
     2036    else
     2037    {
     2038        unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
     2039        if (RT_UNLIKELY(iCpu >= pGip->cCpus))
     2040            return;
     2041        pGipCpu = &pGip->aCPUs[iCpu];
     2042        if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
     2043            return;
     2044    }
     2045
     2046    /*
     2047     * Start update transaction.
     2048     */
     2049    if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
     2050    {
     2051        /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
     2052        AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
     2053        ASMAtomicIncU32(&pGipCpu->u32TransactionId);
     2054        pGipCpu->cErrors++;
     2055        return;
     2056    }
     2057
     2058    /*
     2059     * Recalc the update frequency every 0x800th time.
     2060     */
     2061    if (   pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC   /* cuz we're not recalculating the frequency on invariants hosts. */
     2062        && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
     2063    {
     2064        if (pGip->u64NanoTSLastUpdateHz)
     2065        {
     2066#ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
     2067            uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
     2068            uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
     2069            if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
     2070            {
     2071                /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
     2072                 *        calculation on non-invariant hosts if it changes the history decision
     2073                 *        taken in supdrvGipDoUpdateCpu(). */
     2074                uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
     2075                ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
     2076                ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
     2077            }
     2078#endif
     2079        }
     2080        ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
     2081    }
     2082
     2083    /*
     2084     * Update the data.
     2085     */
     2086    supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
     2087
     2088    /*
     2089     * Complete transaction.
     2090     */
     2091    ASMAtomicIncU32(&pGipCpu->u32TransactionId);
     2092}
     2093
     2094
     2095/**
     2096 * Updates the per cpu GIP data for the calling cpu.
     2097 *
     2098 * @param   pDevExt         The device extension.
     2099 * @param   u64NanoTS       The current nanosecond timesamp.
     2100 * @param   u64TSC          The current TSC timesamp.
     2101 * @param   idCpu           The CPU ID.
     2102 * @param   idApic          The APIC id for the CPU index.
     2103 * @param   iTick           The current timer tick.
     2104 *
     2105 * @remarks Can be called with interrupts disabled!
     2106 */
     2107static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
     2108                                  RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
     2109{
     2110    uint32_t iCpu;
     2111    PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
     2112
     2113    /*
     2114     * Avoid a potential race when a CPU online notification doesn't fire on
     2115     * the onlined CPU but the tick creeps in before the event notification is
     2116     * run.
     2117     */
     2118    if (RT_UNLIKELY(iTick == 1))
     2119    {
     2120        iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
     2121        if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
     2122            supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu);
     2123    }
     2124
     2125    iCpu = pGip->aiCpuFromApicId[idApic];
     2126    if (RT_LIKELY(iCpu < pGip->cCpus))
     2127    {
     2128        PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
     2129        if (pGipCpu->idCpu == idCpu)
     2130        {
     2131            /*
     2132             * Start update transaction.
     2133             */
     2134            if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
     2135            {
     2136                AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
     2137                ASMAtomicIncU32(&pGipCpu->u32TransactionId);
     2138                pGipCpu->cErrors++;
     2139                return;
     2140            }
     2141
     2142            /*
     2143             * Update the data.
     2144             */
     2145            supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
     2146
     2147            /*
     2148             * Complete transaction.
     2149             */
     2150            ASMAtomicIncU32(&pGipCpu->u32TransactionId);
     2151        }
     2152    }
    15962153}
    15972154
     
    16752232
    16762233
    1677 /**
    1678  * Finds our (@a idCpu) entry, or allocates a new one if not found.
    1679  *
    1680  * @returns Index of the CPU in the cache set.
    1681  * @param   pGip                The GIP.
    1682  * @param   idCpu               The CPU ID.
    1683  */
    1684 static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
    1685 {
    1686     uint32_t i, cTries;
    1687 
    1688     /*
    1689      * ASSUMES that CPU IDs are constant.
    1690      */
    1691     for (i = 0; i < pGip->cCpus; i++)
    1692         if (pGip->aCPUs[i].idCpu == idCpu)
    1693             return i;
    1694 
    1695     cTries = 0;
    1696     do
    1697     {
    1698         for (i = 0; i < pGip->cCpus; i++)
    1699         {
    1700             bool fRc;
    1701             ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc);
    1702             if (fRc)
    1703                 return i;
    1704         }
    1705     } while (cTries++ < 32);
    1706     AssertReleaseFailed();
    1707     return i - 1;
    1708 }
    1709 
    1710 
    1711 /**
    1712  * Finds the GIP CPU index corresponding to @a idCpu.
    1713  *
    1714  * @returns GIP CPU array index, UINT32_MAX if not found.
    1715  * @param   pGip                The GIP.
    1716  * @param   idCpu               The CPU ID.
    1717  */
    1718 static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu)
    1719 {
    1720     uint32_t i;
    1721     for (i = 0; i < pGip->cCpus; i++)
    1722         if (pGip->aCPUs[i].idCpu == idCpu)
    1723             return i;
    1724     return UINT32_MAX;
    1725 }
    1726 
    1727 
    1728 /**
    1729  * The calling CPU should be accounted as online, update GIP accordingly.
    1730  *
    1731  * This is used by supdrvGipCreate() as well as supdrvGipMpEvent().
    1732  *
    1733  * @param   pDevExt             The device extension.
    1734  * @param   idCpu               The CPU ID.
    1735  */
    1736 static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
    1737 {
    1738     int         iCpuSet = 0;
    1739     uint16_t    idApic = UINT16_MAX;
    1740     uint32_t    i = 0;
    1741     uint64_t    u64NanoTS = 0;
    1742     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    1743 
    1744     AssertPtrReturnVoid(pGip);
    1745     AssertRelease(idCpu == RTMpCpuId());
    1746     Assert(pGip->cPossibleCpus == RTMpGetCount());
    1747 
    1748     /*
    1749      * Do this behind a spinlock with interrupts disabled as this can fire
    1750      * on all CPUs simultaneously, see @bugref{6110}.
    1751      */
    1752     RTSpinlockAcquire(pDevExt->hGipSpinlock);
    1753 
    1754     /*
    1755      * Update the globals.
    1756      */
    1757     ASMAtomicWriteU16(&pGip->cPresentCpus,  RTMpGetPresentCount());
    1758     ASMAtomicWriteU16(&pGip->cOnlineCpus,   RTMpGetOnlineCount());
    1759     iCpuSet = RTMpCpuIdToSetIndex(idCpu);
    1760     if (iCpuSet >= 0)
    1761     {
    1762         Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
    1763         RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet);
    1764         RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet);
    1765     }
    1766 
    1767     /*
    1768      * Update the entry.
    1769      */
    1770     u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS;
    1771     i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
    1772     supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
    1773     idApic = ASMGetApicId();
    1774     ASMAtomicWriteU16(&pGip->aCPUs[i].idApic,  idApic);
    1775     ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet);
    1776     ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu,  idCpu);
    1777 
    1778     /*
    1779      * Update the APIC ID and CPU set index mappings.
    1780      */
    1781     ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic],     i);
    1782     ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i);
    1783 
    1784     /* Update the Mp online/offline counter. */
    1785     ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
    1786 
    1787     /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */
    1788     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    1789     {
    1790         RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet);
    1791 #ifdef SUPDRV_USE_TSC_DELTA_THREAD
    1792         RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
    1793         if (   pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening
    1794             || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
    1795         {
    1796             pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure;
    1797         }
    1798         RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
    1799 #endif
    1800     }
    1801 
    1802     /* commit it */
    1803     ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE);
    1804 
    1805     RTSpinlockRelease(pDevExt->hGipSpinlock);
    1806 }
    1807 
    1808 
    1809 /**
    1810  * The CPU should be accounted as offline, update the GIP accordingly.
    1811  *
    1812  * This is used by supdrvGipMpEvent.
    1813  *
    1814  * @param   pDevExt             The device extension.
    1815  * @param   idCpu               The CPU ID.
    1816  */
    1817 static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu)
    1818 {
    1819     int         iCpuSet;
    1820     unsigned    i;
    1821 
    1822     PSUPGLOBALINFOPAGE pGip   = pDevExt->pGip;
    1823 
    1824     AssertPtrReturnVoid(pGip);
    1825     RTSpinlockAcquire(pDevExt->hGipSpinlock);
    1826 
    1827     iCpuSet = RTMpCpuIdToSetIndex(idCpu);
    1828     AssertReturnVoid(iCpuSet >= 0);
    1829 
    1830     i = pGip->aiCpuFromCpuSetIdx[iCpuSet];
    1831     AssertReturnVoid(i < pGip->cCpus);
    1832     AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu);
    1833 
    1834     Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet));
    1835     RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet);
    1836 
    1837     /* Update the Mp online/offline counter. */
    1838     ASMAtomicIncU32(&pDevExt->cMpOnOffEvents);
    1839 
    1840     /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */
    1841     if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu)
    1842     {
    1843         ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
    1844         ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD);
    1845     }
    1846 
    1847     if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED)
    1848     {
    1849         /* Reset the TSC delta, we will recalculate it lazily. */
    1850         ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX);
    1851         /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */
    1852         RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet);
    1853     }
    1854 
    1855     /* commit it */
    1856     ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE);
    1857 
    1858     RTSpinlockRelease(pDevExt->hGipSpinlock);
    1859 }
    1860 
    1861 
    1862 /**
    1863  * Multiprocessor event notification callback.
    1864  *
    1865  * This is used to make sure that the GIP master gets passed on to
    1866  * another CPU.  It also updates the associated CPU data.
    1867  *
    1868  * @param   enmEvent    The event.
    1869  * @param   idCpu       The cpu it applies to.
    1870  * @param   pvUser      Pointer to the device extension.
    1871  *
    1872  * @remarks This function -must- fire on the newly online'd CPU for the
    1873  *          RTMPEVENT_ONLINE case and can fire on any CPU for the
    1874  *          RTMPEVENT_OFFLINE case.
    1875  */
    1876 static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
    1877 {
    1878     PSUPDRVDEVEXT       pDevExt = (PSUPDRVDEVEXT)pvUser;
    1879     PSUPGLOBALINFOPAGE  pGip    = pDevExt->pGip;
    1880 
    1881     AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
    1882 
    1883     /*
    1884      * Update the GIP CPU data.
    1885      */
    1886     if (pGip)
    1887     {
    1888         switch (enmEvent)
    1889         {
    1890             case RTMPEVENT_ONLINE:
    1891                 AssertRelease(idCpu == RTMpCpuId());
    1892                 supdrvGipMpEventOnline(pDevExt, idCpu);
    1893                 break;
    1894             case RTMPEVENT_OFFLINE:
    1895                 supdrvGipMpEventOffline(pDevExt, idCpu);
    1896                 break;
    1897         }
    1898     }
    1899 
    1900     /*
    1901      * Make sure there is a master GIP.
    1902      */
    1903     if (enmEvent == RTMPEVENT_OFFLINE)
    1904     {
    1905         RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster);
    1906         if (idGipMaster == idCpu)
    1907         {
    1908             /*
    1909              * The GIP master is going offline, find a new one.
    1910              */
    1911             bool        fIgnored;
    1912             unsigned    i;
    1913             RTCPUID     idNewGipMaster = NIL_RTCPUID;
    1914             RTCPUSET    OnlineCpus;
    1915             RTMpGetOnlineSet(&OnlineCpus);
    1916 
    1917             for (i = 0; i < RTCPUSET_MAX_CPUS; i++)
    1918                 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i))
    1919                 {
    1920                     RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i);
    1921                     if (idCurCpu != idGipMaster)
    1922                     {
    1923                         idNewGipMaster = idCurCpu;
    1924                         break;
    1925                     }
    1926                 }
    1927 
    1928             Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster));
    1929             ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored);
    1930             NOREF(fIgnored);
    1931         }
    1932     }
    1933 }
    1934 
    1935 
    1936 /*
     2234
     2235
     2236/*
     2237 *
     2238 *
     2239 * TSC Delta Measurements And Related Code
     2240 * TSC Delta Measurements And Related Code
     2241 * TSC Delta Measurements And Related Code
     2242 *
     2243 *
     2244 */
     2245
     2246
     2247/*
    19372248 * Select TSC delta measurement algorithm.
    19382249 */
     
    19462257#ifdef GIP_TSC_DELTA_METHOD_2
    19472258
    1948 /** 
     2259/**
    19492260 * TSC delta measurment algorithm \#2 result entry.
    19502261 */
     
    19562267} SUPDRVTSCDELTAMETHOD2ENTRY;
    19572268
    1958 /** 
     2269/**
    19592270 * TSC delta measurment algorithm \#2 Data.
    19602271 */
     
    19772288#endif /* GIP_TSC_DELTA_METHOD_2 */
    19782289
    1979 /** 
    1980  * Argument package/state passed by supdrvMeasureTscDeltaOne to the RTMpOn 
    1981  * callback worker. 
     2290/**
     2291 * Argument package/state passed by supdrvMeasureTscDeltaOne to the RTMpOn
     2292 * callback worker.
    19822293 */
    19832294typedef struct SUPDRVGIPTSCDELTARGS
     
    20012312
    20022313#ifdef GIP_TSC_DELTA_METHOD_2
    2003 /* 
     2314/*
    20042315 * TSC delta measurement algorithm \#2 configuration and code - Experimental!!
    20052316 */
     
    20662377
    20672378/**
    2068  * The core function of the 2nd TSC delta mesurment algorithm. 
    2069  * 
    2070  * The idea here is that we have the two CPUs execute the exact same code 
    2071  * collecting a largish set of TSC samples.  The code has one data dependency on 
    2072  * the other CPU which intention it is to synchronize the execution as well as 
    2073  * help cross references the two sets of TSC samples (the sequence numbers). 
    2074  * 
     2379 * The core function of the 2nd TSC delta mesurment algorithm.
     2380 *
     2381 * The idea here is that we have the two CPUs execute the exact same code
     2382 * collecting a largish set of TSC samples.  The code has one data dependency on
     2383 * the other CPU which intention it is to synchronize the execution as well as
     2384 * help cross references the two sets of TSC samples (the sequence numbers).
     2385 *
    20752386 * The @a fLag parameter is used to modify the execution a tiny bit on one or
    20762387 * both of the CPUs.  When @a fLag differs between the CPUs, it is thought that
    20772388 * it will help with making the CPUs enter lock step execution occationally.
    2078  * 
     2389 *
    20792390 */
    20802391static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
     
    27293040
    27303041
    2731 /**
    2732  * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU.
    2733  *
    2734  * @param   idCpu       Ignored.
    2735  * @param   pvUser1     Where to put the TSC.
    2736  * @param   pvUser2     Ignored.
    2737  */
    2738 static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    2739 {
    2740     ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC());
    2741 }
    2742 
    2743 
    2744 /**
    2745  * Determine if Async GIP mode is required because of TSC drift.
    2746  *
    2747  * When using the default/normal timer code it is essential that the time stamp counter
    2748  * (TSC) runs never backwards, that is, a read operation to the counter should return
    2749  * a bigger value than any previous read operation. This is guaranteed by the latest
    2750  * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other
    2751  * case we have to choose the asynchronous timer mode.
    2752  *
    2753  * @param   poffMin     Pointer to the determined difference between different
    2754  *                      cores (optional, can be NULL).
    2755  * @return  false if the time stamp counters appear to be synchronized, true otherwise.
    2756  */
    2757 static bool supdrvDetermineAsyncTsc(uint64_t *poffMin)
    2758 {
    2759     /*
    2760      * Just iterate all the cpus 8 times and make sure that the TSC is
    2761      * ever increasing. We don't bother taking TSC rollover into account.
    2762      */
    2763     int         iEndCpu = RTMpGetArraySize();
    2764     int         iCpu;
    2765     int         cLoops = 8;
    2766     bool        fAsync = false;
    2767     int         rc = VINF_SUCCESS;
    2768     uint64_t    offMax = 0;
    2769     uint64_t    offMin = ~(uint64_t)0;
    2770     uint64_t    PrevTsc = ASMReadTSC();
    2771 
    2772     while (cLoops-- > 0)
    2773     {
    2774         for (iCpu = 0; iCpu < iEndCpu; iCpu++)
    2775         {
    2776             uint64_t CurTsc;
    2777             rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL);
     3042#ifdef SUPDRV_USE_TSC_DELTA_THREAD
     3043
     3044/**
     3045 * Switches the TSC-delta measurement thread into the butchered state.
     3046 *
     3047 * @returns VBox status code.
     3048 * @param pDevExt           Pointer to the device instance data.
     3049 * @param fSpinlockHeld     Whether the TSC-delta spinlock is held or not.
     3050 * @param pszFailed         An error message to log.
     3051 * @param rcFailed          The error code to exit the thread with.
     3052 */
     3053static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed)
     3054{
     3055    if (!fSpinlockHeld)
     3056        RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
     3057
     3058    pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered;
     3059    RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3060    OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed));
     3061    return rcFailed;
     3062}
     3063
     3064
     3065/**
     3066 * The TSC-delta measurement thread.
     3067 *
     3068 * @returns VBox status code.
     3069 * @param hThread   The thread handle.
     3070 * @param pvUser    Opaque pointer to the device instance data.
     3071 */
     3072static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser)
     3073{
     3074    PSUPDRVDEVEXT     pDevExt = (PSUPDRVDEVEXT)pvUser;
     3075    bool              fInitialMeasurement = true;
     3076    uint32_t          cConsecutiveTimeouts = 0;
     3077    int               rc = VERR_INTERNAL_ERROR_2;
     3078    for (;;)
     3079    {
     3080        /*
     3081         * Switch on the current state.
     3082         */
     3083        SUPDRVTSCDELTATHREADSTATE enmState;
     3084        RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
     3085        enmState = pDevExt->enmTscDeltaThreadState;
     3086        switch (enmState)
     3087        {
     3088            case kTscDeltaThreadState_Creating:
     3089            {
     3090                pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
     3091                rc = RTSemEventSignal(pDevExt->hTscDeltaEvent);
     3092                if (RT_FAILURE(rc))
     3093                    return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
     3094                /* fall thru */
     3095            }
     3096
     3097            case kTscDeltaThreadState_Listening:
     3098            {
     3099                RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3100
     3101                /* Simple adaptive timeout. */
     3102                if (cConsecutiveTimeouts++ == 10)
     3103                {
     3104                    if (pDevExt->cMsTscDeltaTimeout == 1)           /* 10 ms */
     3105                        pDevExt->cMsTscDeltaTimeout = 10;
     3106                    else if (pDevExt->cMsTscDeltaTimeout == 10)     /* +100 ms */
     3107                        pDevExt->cMsTscDeltaTimeout = 100;
     3108                    else if (pDevExt->cMsTscDeltaTimeout == 100)    /* +1000 ms */
     3109                        pDevExt->cMsTscDeltaTimeout = 500;
     3110                    cConsecutiveTimeouts = 0;
     3111                }
     3112                rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout);
     3113                if (   RT_FAILURE(rc)
     3114                    && rc != VERR_TIMEOUT)
     3115                    return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc);
     3116                RTThreadUserReset(pDevExt->hTscDeltaThread);
     3117                break;
     3118            }
     3119
     3120            case kTscDeltaThreadState_WaitAndMeasure:
     3121            {
     3122                pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring;
     3123                rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */
     3124                if (RT_FAILURE(rc))
     3125                    return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc);
     3126                RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3127                pDevExt->cMsTscDeltaTimeout = 1;
     3128                RTThreadSleep(10);
     3129                /* fall thru */
     3130            }
     3131
     3132            case kTscDeltaThreadState_Measuring:
     3133            {
     3134                cConsecutiveTimeouts = 0;
     3135                if (fInitialMeasurement)
     3136                {
     3137                    int cTries = 8;
     3138                    int cMsWaitPerTry = 10;
     3139                    fInitialMeasurement = false;
     3140                    do
     3141                    {
     3142                        rc = supdrvMeasureInitialTscDeltas(pDevExt);
     3143                        if (   RT_SUCCESS(rc)
     3144                            || (   RT_FAILURE(rc)
     3145                                && rc != VERR_TRY_AGAIN
     3146                                && rc != VERR_CPU_OFFLINE))
     3147                        {
     3148                            break;
     3149                        }
     3150                        RTThreadSleep(cMsWaitPerTry);
     3151                    } while (cTries-- > 0);
     3152                }
     3153                else
     3154                {
     3155                    PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
     3156                    unsigned iCpu;
     3157
     3158                    /* Measure TSC-deltas only for the CPUs that are in the set. */
     3159                    rc = VINF_SUCCESS;
     3160                    for (iCpu = 0; iCpu < pGip->cCpus; iCpu++)
     3161                    {
     3162                        PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu];
     3163                        if (   pGipCpuWorker->i64TSCDelta == INT64_MAX
     3164                            && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet))
     3165                        {
     3166                            rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu);
     3167                        }
     3168                    }
     3169                }
     3170                RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
     3171                if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring)
     3172                    pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening;
     3173                RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3174                Assert(rc != VERR_NOT_AVAILABLE);   /* VERR_NOT_AVAILABLE is used as the initial value. */
     3175                ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc);
     3176                break;
     3177            }
     3178
     3179            case kTscDeltaThreadState_Terminating:
     3180                pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed;
     3181                RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3182                return VINF_SUCCESS;
     3183
     3184            case kTscDeltaThreadState_Butchered:
     3185            default:
     3186                return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE);
     3187        }
     3188    }
     3189
     3190    return rc;
     3191}
     3192
     3193
     3194/**
     3195 * Waits for the TSC-delta measurement thread to respond to a state change.
     3196 *
     3197 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time,
     3198 *          other error code on internal error.
     3199 *
     3200 * @param   pThis           Pointer to the grant service instance data.
     3201 * @param   enmCurState     The current state.
     3202 * @param   enmNewState     The new state we're waiting for it to enter.
     3203 */
     3204static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState,
     3205                                    SUPDRVTSCDELTATHREADSTATE enmNewState)
     3206{
     3207    /*
     3208     * Wait a short while for the expected state transition.
     3209     */
     3210    int rc;
     3211    RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC);
     3212    RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
     3213    if (pDevExt->enmTscDeltaThreadState == enmNewState)
     3214    {
     3215        RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3216        rc = VINF_SUCCESS;
     3217    }
     3218    else if (pDevExt->enmTscDeltaThreadState == enmCurState)
     3219    {
     3220        /*
     3221         * Wait longer if the state has not yet transitioned to the one we want.
     3222         */
     3223        RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3224        rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC);
     3225        if (   RT_SUCCESS(rc)
     3226            || rc == VERR_TIMEOUT)
     3227        {
     3228            /*
     3229             * Check the state whether we've succeeded.
     3230             */
     3231            SUPDRVTSCDELTATHREADSTATE enmState;
     3232            RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
     3233            enmState = pDevExt->enmTscDeltaThreadState;
     3234            RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3235            if (enmState == enmNewState)
     3236                rc = VINF_SUCCESS;
     3237            else if (enmState == enmCurState)
     3238            {
     3239                rc = VERR_TIMEOUT;
     3240                OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState,
     3241                            enmNewState));
     3242            }
     3243            else
     3244            {
     3245                rc = VERR_INTERNAL_ERROR;
     3246                OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState,
     3247                            enmState, enmNewState));
     3248            }
     3249        }
     3250        else
     3251            OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc));
     3252    }
     3253    else
     3254    {
     3255        RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3256        OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState));
     3257        rc = VERR_INTERNAL_ERROR;
     3258    }
     3259
     3260    return rc;
     3261}
     3262
     3263
     3264/**
     3265 * Terminates the TSC-delta measurement thread.
     3266 *
     3267 * @param   pDevExt   Pointer to the device instance data.
     3268 */
     3269static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt)
     3270{
     3271    int rc;
     3272    RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock);
     3273    pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating;
     3274    RTSpinlockRelease(pDevExt->hTscDeltaSpinlock);
     3275    RTThreadUserSignal(pDevExt->hTscDeltaThread);
     3276    rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */);
     3277    if (RT_FAILURE(rc))
     3278    {
     3279        /* Signal a few more times before giving up. */
     3280        int cTriesLeft = 5;
     3281        while (--cTriesLeft > 0)
     3282        {
     3283            RTThreadUserSignal(pDevExt->hTscDeltaThread);
     3284            rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */);
     3285            if (rc != VERR_TIMEOUT)
     3286                break;
     3287        }
     3288    }
     3289}
     3290
     3291
     3292/**
     3293 * Initializes and spawns the TSC-delta measurement thread.
     3294 *
     3295 * A thread is required for servicing re-measurement requests from events like
     3296 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously
     3297 * under all contexts on all OSs.
     3298 *
     3299 * @returns VBox status code.
     3300 * @param   pDevExt           Pointer to the device instance data.
     3301 *
     3302 * @remarks Must only be called -after- initializing GIP and setting up MP
     3303 *          notifications!
     3304 */
     3305static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt)
     3306{
     3307    int rc;
     3308    Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED);
     3309    rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck");
     3310    if (RT_SUCCESS(rc))
     3311    {
     3312        rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent);
     3313        if (RT_SUCCESS(rc))
     3314        {
     3315            pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating;
     3316            pDevExt->cMsTscDeltaTimeout = 1;
     3317            rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */,
     3318                                RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread");
    27783319            if (RT_SUCCESS(rc))
    27793320            {
    2780                 if (CurTsc <= PrevTsc)
     3321                rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening);
     3322                if (RT_SUCCESS(rc))
    27813323                {
    2782                     fAsync = true;
    2783                     offMin = offMax = PrevTsc - CurTsc;
    2784                     Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n",
    2785                          iCpu, cLoops, CurTsc, PrevTsc));
    2786                     break;
     3324                    ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
     3325                    return rc;
    27873326                }
    27883327
    2789                 /* Gather statistics (except the first time). */
    2790                 if (iCpu != 0 || cLoops != 7)
    2791                 {
    2792                     uint64_t off = CurTsc - PrevTsc;
    2793                     if (off < offMin)
    2794                         offMin = off;
    2795                     if (off > offMax)
    2796                         offMax = off;
    2797                     Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off));
    2798                 }
    2799 
    2800                 /* Next */
    2801                 PrevTsc = CurTsc;
     3328                OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc));
     3329                supdrvTscDeltaThreadTerminate(pDevExt);
    28023330            }
    2803             else if (rc == VERR_NOT_SUPPORTED)
    2804                 break;
    28053331            else
    2806                 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc));
    2807         }
    2808 
    2809         /* broke out of the loop. */
    2810         if (iCpu < iEndCpu)
     3332                OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc));
     3333            RTSemEventDestroy(pDevExt->hTscDeltaEvent);
     3334            pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
     3335        }
     3336        else
     3337            OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc));
     3338        RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
     3339        pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
     3340    }
     3341    else
     3342        OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc));
     3343
     3344    return rc;
     3345}
     3346
     3347
     3348/**
     3349 * Terminates the TSC-delta measurement thread and cleanup.
     3350 *
     3351 * @param   pDevExt         Pointer to the device instance data.
     3352 */
     3353static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt)
     3354{
     3355    if (   pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK
     3356        && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
     3357    {
     3358        supdrvTscDeltaThreadTerminate(pDevExt);
     3359    }
     3360
     3361    if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK)
     3362    {
     3363        RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock);
     3364        pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK;
     3365    }
     3366
     3367    if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT)
     3368    {
     3369        RTSemEventDestroy(pDevExt->hTscDeltaEvent);
     3370        pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT;
     3371    }
     3372
     3373    ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE);
     3374}
     3375
     3376
     3377/**
     3378 * Waits for TSC-delta measurements to be completed for all online CPUs.
     3379 *
     3380 * @returns VBox status code.
     3381 * @param   pDevExt         Pointer to the device instance data.
     3382 */
     3383static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt)
     3384{
     3385    int cTriesLeft = 5;
     3386    int cMsTotalWait;
     3387    int cMsWaited = 0;
     3388    int cMsWaitGranularity = 1;
     3389
     3390    PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
     3391    AssertReturn(pGip, VERR_INVALID_POINTER);
     3392
     3393    if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD))
     3394        return VERR_THREAD_NOT_WAITABLE;
     3395
     3396    cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200);
     3397    while (cTriesLeft-- > 0)
     3398    {
     3399        if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet))
     3400            return VINF_SUCCESS;
     3401        RTThreadSleep(cMsWaitGranularity);
     3402        cMsWaited += cMsWaitGranularity;
     3403        if (cMsWaited >= cMsTotalWait)
    28113404            break;
    28123405    }
    28133406
    2814     if (poffMin)
    2815         *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */
    2816     Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n",
    2817          fAsync, iEndCpu, rc, offMin, offMax));
    2818 #if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
    2819     OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax));
    2820 #endif
    2821     return fAsync;
    2822 }
    2823 
    2824 
    2825 /**
    2826  * supdrvGipInit() worker that determines the GIP TSC mode.
    2827  *
    2828  * @returns The most suitable TSC mode.
    2829  * @param   pDevExt     Pointer to the device instance data.
    2830  */
    2831 static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt)
    2832 {
    2833     uint64_t u64DiffCoresIgnored;
    2834     uint32_t uEAX, uEBX, uECX, uEDX;
    2835 
    2836     /*
    2837      * Establish whether the CPU advertises TSC as invariant, we need that in
    2838      * a couple of places below.
    2839      */
    2840     bool fInvariantTsc = false;
    2841     if (ASMHasCpuId())
    2842     {
    2843         uEAX = ASMCpuId_EAX(0x80000000);
    2844         if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
    2845         {
    2846             uEDX = ASMCpuId_EDX(0x80000007);
    2847             if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR)
    2848                 fInvariantTsc = true;
    2849         }
    2850     }
    2851 
    2852     /*
    2853      * On single CPU systems, we don't need to consider ASYNC mode.
    2854      */
    2855     if (RTMpGetCount() <= 1)
    2856         return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC;
    2857 
    2858     /*
    2859      * Allow the user and/or OS specific bits to force async mode.
    2860      */
    2861     if (supdrvOSGetForcedAsyncTscMode(pDevExt))
    2862         return SUPGIPMODE_ASYNC_TSC;
    2863 
    2864 
    2865 #if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */
    2866     /*
    2867      * Use invariant mode if the CPU says TSC is invariant.
    2868      */
    2869     if (fInvariantTsc)
    2870         return SUPGIPMODE_INVARIANT_TSC;
    2871 #endif
    2872 
    2873     /*
    2874      * TSC is not invariant and we're on SMP, this presents two problems:
    2875      *
    2876      *      (1) There might be a skew between the CPU, so that cpu0
    2877      *          returns a TSC that is slightly different from cpu1.
    2878      *          This screw may be due to (2), bad TSC initialization
    2879      *          or slightly different TSC rates.
    2880      *
    2881      *      (2) Power management (and other things) may cause the TSC
    2882      *          to run at a non-constant speed, and cause the speed
    2883      *          to be different on the cpus. This will result in (1).
    2884      *
    2885      * If any of the above is detected, we will have to use ASYNC mode.
    2886      */
    2887 
    2888     /* (1). Try check for current differences between the cpus. */
    2889     if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored))
    2890         return SUPGIPMODE_ASYNC_TSC;
    2891 
    2892 #if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */
    2893     if (fInvariantTsc)
    2894         return SUPGIPMODE_INVARIANT_TSC;
    2895 #endif
    2896 
    2897     /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */
    2898     ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX);
    2899     if (   ASMIsValidStdRange(uEAX)
    2900         && ASMIsAmdCpuEx(uEBX, uECX, uEDX))
    2901     {
    2902         /* Check for APM support. */
    2903         uEAX = ASMCpuId_EAX(0x80000000);
    2904         if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007)
    2905         {
    2906             uEDX = ASMCpuId_EDX(0x80000007);
    2907             if (uEDX & 0x3e)  /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */
    2908                 return SUPGIPMODE_ASYNC_TSC;
    2909         }
    2910     }
    2911 
    2912     return SUPGIPMODE_SYNC_TSC;
    2913 }
    2914 
    2915 
    2916 /**
    2917  * Initializes per-CPU GIP information.
    2918  *
    2919  * @param   pDevExt     Pointer to the device instance data.
    2920  * @param   pGip        Pointer to the GIP.
    2921  * @param   pCpu        Pointer to which GIP CPU to initalize.
    2922  * @param   u64NanoTS   The current nanosecond timestamp.
    2923  */
    2924 static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS)
    2925 {
    2926     /* !!! Warning !!! The GIP may not be linked to the device instance data at this point!
    2927        which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */
    2928     pCpu->u32TransactionId   = 2;
    2929     pCpu->u64NanoTS          = u64NanoTS;
    2930     pCpu->u64TSC             = ASMReadTSC();
    2931     pCpu->u64TSCSample       = GIP_TSC_DELTA_RSVD;
    2932     pCpu->i64TSCDelta        = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0;
    2933 
    2934     ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID);
    2935     ASMAtomicWriteSize(&pCpu->idCpu,    NIL_RTCPUID);
    2936     ASMAtomicWriteS16(&pCpu->iCpuSet,   -1);
    2937     ASMAtomicWriteU16(&pCpu->idApic,    UINT16_MAX);
    2938 
    2939     /*
    2940      * We don't know the following values until we've executed updates.
    2941      * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on
    2942      * the 2nd timer callout.
    2943      */
    2944     pCpu->u64CpuHz          = _4G + 1; /* tstGIP-2 depends on this. */
    2945     pCpu->u32UpdateIntervalTSC
    2946         = pCpu->au32TSCHistory[0]
    2947         = pCpu->au32TSCHistory[1]
    2948         = pCpu->au32TSCHistory[2]
    2949         = pCpu->au32TSCHistory[3]
    2950         = pCpu->au32TSCHistory[4]
    2951         = pCpu->au32TSCHistory[5]
    2952         = pCpu->au32TSCHistory[6]
    2953         = pCpu->au32TSCHistory[7]
    2954         = (uint32_t)(_4G / pGip->u32UpdateHz);
    2955 }
    2956 
    2957 
    2958 /**
    2959  * Initializes the GIP data.
    2960  *
    2961  * @param   pDevExt             Pointer to the device instance data.
    2962  * @param   pGip                Pointer to the read-write kernel mapping of the GIP.
    2963  * @param   HCPhys              The physical address of the GIP.
    2964  * @param   u64NanoTS           The current nanosecond timestamp.
    2965  * @param   uUpdateHz           The update frequency.
    2966  * @param   uUpdateIntervalNS   The update interval in nanoseconds.
    2967  * @param   cCpus               The CPU count.
    2968  */
    2969 static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys,
    2970                           uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus)
    2971 {
    2972     size_t const    cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE);
    2973     unsigned        i;
    2974 #ifdef DEBUG_DARWIN_GIP
    2975     OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
    2976 #else
    2977     LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus));
    2978 #endif
    2979 
    2980     /*
    2981      * Initialize the structure.
    2982      */
    2983     memset(pGip, 0, cbGip);
    2984 
    2985     pGip->u32Magic                = SUPGLOBALINFOPAGE_MAGIC;
    2986     pGip->u32Version              = SUPGLOBALINFOPAGE_VERSION;
    2987     pGip->u32Mode                 = supdrvGipInitDetermineTscMode(pDevExt);
    2988     if (   pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
    2989         /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */)
    2990         pGip->enmUseTscDelta      = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */
    2991                                   ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */;
    2992     else
    2993         pGip->enmUseTscDelta      = SUPGIPUSETSCDELTA_NOT_APPLICABLE;
    2994     pGip->cCpus                   = (uint16_t)cCpus;
    2995     pGip->cPages                  = (uint16_t)(cbGip / PAGE_SIZE);
    2996     pGip->u32UpdateHz             = uUpdateHz;
    2997     pGip->u32UpdateIntervalNS     = uUpdateIntervalNS;
    2998     pGip->fGetGipCpu              = SUPGIPGETCPU_APIC_ID;
    2999     RTCpuSetEmpty(&pGip->OnlineCpuSet);
    3000     RTCpuSetEmpty(&pGip->PresentCpuSet);
    3001     RTMpGetSet(&pGip->PossibleCpuSet);
    3002     pGip->cOnlineCpus             = RTMpGetOnlineCount();
    3003     pGip->cPresentCpus            = RTMpGetPresentCount();
    3004     pGip->cPossibleCpus           = RTMpGetCount();
    3005     pGip->idCpuMax                = RTMpGetMaxCpuId();
    3006     for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++)
    3007         pGip->aiCpuFromApicId[i]    = UINT16_MAX;
    3008     for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++)
    3009         pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX;
    3010     for (i = 0; i < cCpus; i++)
    3011         supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS);
    3012 
    3013     /*
    3014      * Link it to the device extension.
    3015      */
    3016     pDevExt->pGip      = pGip;
    3017     pDevExt->HCPhysGip = HCPhys;
    3018     pDevExt->cGipUsers = 0;
    3019 }
    3020 
    3021 
    3022 /**
    3023  * On CPU initialization callback for RTMpOnAll.
    3024  *
    3025  * @param   idCpu               The CPU ID.
    3026  * @param   pvUser1             The device extension.
    3027  * @param   pvUser2             The GIP.
    3028  */
    3029 static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    3030 {
    3031     /* This is good enough, even though it will update some of the globals a
    3032        bit to much. */
    3033     supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu);
    3034 }
    3035 
    3036 
    3037 /**
    3038  * Invalidates the GIP data upon termination.
    3039  *
    3040  * @param   pGip        Pointer to the read-write kernel mapping of the GIP.
    3041  */
    3042 static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip)
    3043 {
    3044     unsigned i;
    3045     pGip->u32Magic = 0;
    3046     for (i = 0; i < pGip->cCpus; i++)
    3047     {
    3048         pGip->aCPUs[i].u64NanoTS = 0;
    3049         pGip->aCPUs[i].u64TSC = 0;
    3050         pGip->aCPUs[i].iTSCHistoryHead = 0;
    3051         pGip->aCPUs[i].u64TSCSample = 0;
    3052         pGip->aCPUs[i].i64TSCDelta = INT64_MAX;
    3053     }
    3054 }
    3055 
    3056 
    3057 /**
    3058  * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that
    3059  * updates all the per cpu data except the transaction id.
    3060  *
    3061  * @param   pDevExt         The device extension.
    3062  * @param   pGipCpu         Pointer to the per cpu data.
    3063  * @param   u64NanoTS       The current time stamp.
    3064  * @param   u64TSC          The current TSC.
    3065  * @param   iTick           The current timer tick.
    3066  *
    3067  * @remarks Can be called with interrupts disabled!
    3068  */
    3069 static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick)
    3070 {
    3071     uint64_t    u64TSCDelta;
    3072     uint32_t    u32UpdateIntervalTSC;
    3073     uint32_t    u32UpdateIntervalTSCSlack;
    3074     unsigned    iTSCHistoryHead;
    3075     uint64_t    u64CpuHz;
    3076     uint32_t    u32TransactionId;
    3077 
    3078     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    3079     AssertPtrReturnVoid(pGip);
    3080 
    3081     /* Delta between this and the previous update. */
    3082     ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS));
    3083 
    3084     /*
    3085      * Update the NanoTS.
    3086      */
    3087     ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS);
    3088 
    3089     /*
    3090      * Calc TSC delta.
    3091      */
    3092     u64TSCDelta = u64TSC - pGipCpu->u64TSC;
    3093     ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC);
    3094 
    3095     /* We don't need to keep realculating the frequency when it's invariant. */
    3096     if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC)
    3097         return;
    3098 
    3099     if (u64TSCDelta >> 32)
    3100     {
    3101         u64TSCDelta = pGipCpu->u32UpdateIntervalTSC;
    3102         pGipCpu->cErrors++;
    3103     }
    3104 
    3105     /*
    3106      * On the 2nd and 3rd callout, reset the history with the current TSC
    3107      * interval since the values entered by supdrvGipInit are totally off.
    3108      * The interval on the 1st callout completely unreliable, the 2nd is a bit
    3109      * better, while the 3rd should be most reliable.
    3110      */
    3111     u32TransactionId = pGipCpu->u32TransactionId;
    3112     if (RT_UNLIKELY(   (   u32TransactionId == 5
    3113                         || u32TransactionId == 7)
    3114                     && (   iTick == 2
    3115                         || iTick == 3) ))
    3116     {
    3117         unsigned i;
    3118         for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++)
    3119             ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta);
    3120     }
    3121 
    3122     /*
    3123      * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%.
    3124      * Wait until we have at least one full history since the above history reset. The
    3125      * assumption is that the majority of the previous history values will be tolerable.
    3126      * See @bugref{6710} comment #67.
    3127      */
    3128     if (   u32TransactionId > 23 /* 7 + (8 * 2) */
    3129         && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
    3130     {
    3131         uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200;
    3132         if (   pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold
    3133             || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold)
    3134         {
    3135             uint32_t u32;
    3136             u32  = pGipCpu->au32TSCHistory[0];
    3137             u32 += pGipCpu->au32TSCHistory[1];
    3138             u32 += pGipCpu->au32TSCHistory[2];
    3139             u32 += pGipCpu->au32TSCHistory[3];
    3140             u32 >>= 2;
    3141             u64TSCDelta  = pGipCpu->au32TSCHistory[4];
    3142             u64TSCDelta += pGipCpu->au32TSCHistory[5];
    3143             u64TSCDelta += pGipCpu->au32TSCHistory[6];
    3144             u64TSCDelta += pGipCpu->au32TSCHistory[7];
    3145             u64TSCDelta >>= 2;
    3146             u64TSCDelta += u32;
    3147             u64TSCDelta >>= 1;
    3148         }
    3149     }
    3150 
    3151     /*
    3152      * TSC History.
    3153      */
    3154     Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8);
    3155     iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7;
    3156     ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead);
    3157     ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta);
    3158 
    3159     /*
    3160      * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ.
    3161      *
    3162      * On Windows, we have an occasional (but recurring) sour value that messed up
    3163      * the history but taking only 1 interval reduces the precision overall.
    3164      * However, this problem existed before the invariant mode was introduced.
    3165      */
    3166     if (   pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC
    3167         || pGip->u32UpdateHz >= 1000)
    3168     {
    3169         uint32_t u32;
    3170         u32  = pGipCpu->au32TSCHistory[0];
    3171         u32 += pGipCpu->au32TSCHistory[1];
    3172         u32 += pGipCpu->au32TSCHistory[2];
    3173         u32 += pGipCpu->au32TSCHistory[3];
    3174         u32 >>= 2;
    3175         u32UpdateIntervalTSC  = pGipCpu->au32TSCHistory[4];
    3176         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5];
    3177         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6];
    3178         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7];
    3179         u32UpdateIntervalTSC >>= 2;
    3180         u32UpdateIntervalTSC += u32;
    3181         u32UpdateIntervalTSC >>= 1;
    3182 
    3183         /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */
    3184         u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14;
    3185     }
    3186     else if (pGip->u32UpdateHz >= 90)
    3187     {
    3188         u32UpdateIntervalTSC  = (uint32_t)u64TSCDelta;
    3189         u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7];
    3190         u32UpdateIntervalTSC >>= 1;
    3191 
    3192         /* value chosen on a 2GHz thinkpad running windows */
    3193         u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7;
    3194     }
    3195     else
    3196     {
    3197         u32UpdateIntervalTSC  = (uint32_t)u64TSCDelta;
    3198 
    3199         /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */
    3200         u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6;
    3201     }
    3202     ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack);
    3203 
    3204     /*
    3205      * CpuHz.
    3206      */
    3207     u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC);
    3208     u64CpuHz /= pGip->u32UpdateIntervalNS;
    3209     ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz);
    3210 }
    3211 
    3212 
    3213 /**
    3214  * Updates the GIP.
    3215  *
    3216  * @param   pDevExt         The device extension.
    3217  * @param   u64NanoTS       The current nanosecond timesamp.
    3218  * @param   u64TSC          The current TSC timesamp.
    3219  * @param   idCpu           The CPU ID.
    3220  * @param   iTick           The current timer tick.
    3221  *
    3222  * @remarks Can be called with interrupts disabled!
    3223  */
    3224 static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick)
    3225 {
    3226     /*
    3227      * Determine the relevant CPU data.
    3228      */
    3229     PSUPGIPCPU pGipCpu;
    3230     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    3231     AssertPtrReturnVoid(pGip);
    3232 
    3233     if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC)
    3234         pGipCpu = &pGip->aCPUs[0];
    3235     else
    3236     {
    3237         unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()];
    3238         if (RT_UNLIKELY(iCpu >= pGip->cCpus))
    3239             return;
    3240         pGipCpu = &pGip->aCPUs[iCpu];
    3241         if (RT_UNLIKELY(pGipCpu->idCpu != idCpu))
    3242             return;
    3243     }
    3244 
    3245     /*
    3246      * Start update transaction.
    3247      */
    3248     if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
    3249     {
    3250         /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */
    3251         AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
    3252         ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    3253         pGipCpu->cErrors++;
    3254         return;
    3255     }
    3256 
    3257     /*
    3258      * Recalc the update frequency every 0x800th time.
    3259      */
    3260     if (   pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC   /* cuz we're not recalculating the frequency on invariants hosts. */
    3261         && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2)))
    3262     {
    3263         if (pGip->u64NanoTSLastUpdateHz)
    3264         {
    3265 #ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */
    3266             uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz;
    3267             uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta);
    3268             if (u32UpdateHz <= 2000 && u32UpdateHz >= 30)
    3269             {
    3270                 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency
    3271                  *        calculation on non-invariant hosts if it changes the history decision
    3272                  *        taken in supdrvGipDoUpdateCpu(). */
    3273                 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ;
    3274                 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz);
    3275                 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval);
    3276             }
    3277 #endif
    3278         }
    3279         ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1);
    3280     }
    3281 
    3282     /*
    3283      * Update the data.
    3284      */
    3285     supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
    3286 
    3287     /*
    3288      * Complete transaction.
    3289      */
    3290     ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    3291 }
    3292 
    3293 
    3294 /**
    3295  * Updates the per cpu GIP data for the calling cpu.
    3296  *
    3297  * @param   pDevExt         The device extension.
    3298  * @param   u64NanoTS       The current nanosecond timesamp.
    3299  * @param   u64TSC          The current TSC timesamp.
    3300  * @param   idCpu           The CPU ID.
    3301  * @param   idApic          The APIC id for the CPU index.
    3302  * @param   iTick           The current timer tick.
    3303  *
    3304  * @remarks Can be called with interrupts disabled!
    3305  */
    3306 static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,
    3307                                   RTCPUID idCpu, uint8_t idApic, uint64_t iTick)
    3308 {
    3309     uint32_t iCpu;
    3310     PSUPGLOBALINFOPAGE pGip = pDevExt->pGip;
    3311 
    3312     /*
    3313      * Avoid a potential race when a CPU online notification doesn't fire on
    3314      * the onlined CPU but the tick creeps in before the event notification is
    3315      * run.
    3316      */
    3317     if (RT_UNLIKELY(iTick == 1))
    3318     {
    3319         iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu);
    3320         if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE)
    3321             supdrvGipMpEventOnline(pDevExt, idCpu);
    3322     }
    3323 
    3324     iCpu = pGip->aiCpuFromApicId[idApic];
    3325     if (RT_LIKELY(iCpu < pGip->cCpus))
    3326     {
    3327         PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu];
    3328         if (pGipCpu->idCpu == idCpu)
    3329         {
    3330             /*
    3331              * Start update transaction.
    3332              */
    3333             if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1))
    3334             {
    3335                 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId));
    3336                 ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    3337                 pGipCpu->cErrors++;
    3338                 return;
    3339             }
    3340 
    3341             /*
    3342              * Update the data.
    3343              */
    3344             supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick);
    3345 
    3346             /*
    3347              * Complete transaction.
    3348              */
    3349             ASMAtomicIncU32(&pGipCpu->u32TransactionId);
    3350         }
    3351     }
    3352 }
     3407    return VERR_TIMEOUT;
     3408}
     3409
     3410#endif /* SUPDRV_USE_TSC_DELTA_THREAD */
    33533411
    33543412
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette