Changeset 54328 in vbox
- Timestamp:
- Feb 20, 2015 2:06:51 PM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp
r54327 r54328 130 130 #endif 131 131 132 132 133 /******************************************************************************* 133 134 * Internal Functions * … … 135 136 static DECLCALLBACK(void) supdrvGipSyncAndInvariantTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick); 136 137 static DECLCALLBACK(void) supdrvGipAsyncTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick); 137 static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);138 static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, uint64_t u64NanoTS,139 unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus);140 static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2);141 static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip);142 static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick);143 static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC,144 RTCPUID idCpu, uint8_t idApic, uint64_t iTick);145 138 static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS); 146 static int supdrvMeasureInitialTscDeltas(PSUPDRVDEVEXT pDevExt); 147 static int supdrvMeasureTscDeltaOne(PSUPDRVDEVEXT pDevExt, uint32_t idxWorker); 139 #ifdef SUPDRV_USE_TSC_DELTA_THREAD 140 static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt); 141 static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt); 142 static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt); 143 #endif 148 144 149 145 … … 153 149 DECLEXPORT(PSUPGLOBALINFOPAGE) g_pSUPGlobalInfoPage = NULL; 154 150 151 152 153 /* 154 * 155 * Misc Common GIP Code 156 * Misc Common GIP Code 157 * Misc Common GIP Code 158 * 159 * 160 */ 161 162 163 /** 164 * Finds the GIP CPU index corresponding to @a idCpu. 165 * 166 * @returns GIP CPU array index, UINT32_MAX if not found. 167 * @param pGip The GIP. 168 * @param idCpu The CPU ID. 169 */ 170 static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu) 171 { 172 uint32_t i; 173 for (i = 0; i < pGip->cCpus; i++) 174 if (pGip->aCPUs[i].idCpu == idCpu) 175 return i; 176 return UINT32_MAX; 177 } 178 179 180 /** 181 * Applies the TSC delta to the supplied raw TSC value. 182 * 183 * @returns VBox status code. (Ignored by all users, just FYI.) 184 * @param pGip Pointer to the GIP. 185 * @param puTsc Pointer to a valid TSC value before the TSC delta has been applied. 186 * @param idApic The APIC ID of the CPU @c puTsc corresponds to. 187 * @param fDeltaApplied Where to store whether the TSC delta was succesfully 188 * applied or not (optional, can be NULL). 189 * 190 * @remarks Maybe called with interrupts disabled in ring-0! 191 * 192 * @note Don't you dare change the delta calculation. If you really do, make 193 * sure you update all places where it's used (IPRT, SUPLibAll.cpp, 194 * SUPDrv.c, supdrvGipMpEvent, and more). 195 */ 196 DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied) 197 { 198 int rc; 199 200 /* 201 * Validate input. 202 */ 203 AssertPtr(puTsc); 204 AssertPtr(pGip); 205 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED); 206 207 /* 208 * Carefully convert the idApic into a GIPCPU entry. 209 */ 210 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))) 211 { 212 uint16_t iCpu = pGip->aiCpuFromApicId[idApic]; 213 if (RT_LIKELY(iCpu < pGip->cCpus)) 214 { 215 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu]; 216 217 /* 218 * Apply the delta if valid. 219 */ 220 if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX)) 221 { 222 *puTsc -= pGipCpu->i64TSCDelta; 223 if (pfDeltaApplied) 224 *pfDeltaApplied = true; 225 return VINF_SUCCESS; 226 } 227 228 rc = VINF_SUCCESS; 229 } 230 else 231 { 232 AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus)); 233 rc = VERR_INVALID_CPU_INDEX; 234 } 235 } 236 else 237 { 238 AssertMsgFailed(("idApic=%u\n", idApic)); 239 rc = VERR_INVALID_CPU_ID; 240 } 241 if (pfDeltaApplied) 242 *pfDeltaApplied = false; 243 return rc; 244 } 245 246 247 /* 248 * 249 * GIP Mapping and Unmapping Related Code. 250 * GIP Mapping and Unmapping Related Code. 251 * GIP Mapping and Unmapping Related Code. 252 * 253 * 254 */ 155 255 156 256 … … 649 749 } 650 750 751 752 753 754 755 /* 756 * 757 * 758 * GIP Initialization, Termination and CPU Offline / Online Related Code. 759 * GIP Initialization, Termination and CPU Offline / Online Related Code. 760 * GIP Initialization, Termination and CPU Offline / Online Related Code. 761 * 762 * 763 */ 764 765 766 /** 767 * Timer callback function for TSC frequency refinement in invariant GIP mode. 768 * 769 * @param pTimer The timer. 770 * @param pvUser Opaque pointer to the device instance data. 771 * @param iTick The timer tick. 772 */ 773 static DECLCALLBACK(void) supdrvInitAsyncRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick) 774 { 775 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser; 776 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 777 bool fDeltaApplied = false; 778 uint8_t idApic; 779 uint64_t u64DeltaNanoTS; 780 uint64_t u64DeltaTsc; 781 uint64_t u64NanoTS; 782 uint64_t u64Tsc; 783 RTCCUINTREG uFlags; 784 785 /* Paranoia. */ 786 Assert(pGip); 787 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC); 788 789 #if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */ 790 u64NanoTS = RTTimeSystemNanoTS(); 791 while (RTTimeSystemNanoTS() == u64NanoTS) 792 ASMNopPause(); 793 #endif 794 uFlags = ASMIntDisableFlags(); 795 idApic = ASMGetApicId(); 796 u64Tsc = ASMReadTSC(); 797 u64NanoTS = RTTimeSystemNanoTS(); 798 ASMSetFlags(uFlags); 799 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO) 800 supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied); 801 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor; 802 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor; 803 804 if (RT_UNLIKELY( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO 805 && !fDeltaApplied)) 806 { 807 Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n", 808 GIP_TSC_REFINE_INTERVAL)); 809 return; 810 } 811 812 /* Calculate the TSC frequency. */ 813 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC 814 && u64DeltaNanoTS < UINT32_MAX) 815 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS); 816 else 817 { 818 RTUINT128U CpuHz, Tmp, Divisor; 819 CpuHz.s.Lo = CpuHz.s.Hi = 0; 820 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64); 821 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS)); 822 pGip->u64CpuHz = CpuHz.s.Lo; 823 } 824 825 /* Update rest of GIP. */ 826 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/ 827 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz; 828 } 829 830 831 /** 832 * Starts the TSC-frequency refinement phase asynchronously. 833 * 834 * @param pDevExt Pointer to the device instance data. 835 */ 836 static void supdrvGipInitAsyncRefineTscFreq(PSUPDRVDEVEXT pDevExt) 837 { 838 uint64_t u64NanoTS; 839 RTCCUINTREG uFlags; 840 uint8_t idApic; 841 int rc; 842 PSUPGLOBALINFOPAGE pGip; 843 844 /* Validate. */ 845 Assert(pDevExt); 846 Assert(pDevExt->pGip); 847 pGip = pDevExt->pGip; 848 651 849 #ifdef SUPDRV_USE_TSC_DELTA_THREAD 652 653 /** 654 * Switches the TSC-delta measurement thread into the butchered state. 655 * 656 * @returns VBox status code. 657 * @param pDevExt Pointer to the device instance data. 658 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not. 659 * @param pszFailed An error message to log. 660 * @param rcFailed The error code to exit the thread with. 661 */ 662 static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed) 663 { 664 if (!fSpinlockHeld) 665 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 666 667 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered; 668 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 669 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed)); 670 return rcFailed; 671 } 672 673 674 /** 675 * The TSC-delta measurement thread. 676 * 677 * @returns VBox status code. 678 * @param hThread The thread handle. 679 * @param pvUser Opaque pointer to the device instance data. 680 */ 681 static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser) 682 { 683 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser; 684 bool fInitialMeasurement = true; 685 uint32_t cConsecutiveTimeouts = 0; 686 int rc = VERR_INTERNAL_ERROR_2; 687 for (;;) 850 /* 851 * If the TSC-delta thread is created, wait until it's done calculating 852 * the TSC-deltas on the relevant online CPUs before we start the TSC refinement. 853 */ 854 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED 855 && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE) 856 { 857 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt); 858 if (rc == VERR_TIMEOUT) 859 { 860 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n"); 861 return; 862 } 863 } 864 #endif 865 866 /* 867 * Record the TSC and NanoTS as the starting anchor point for refinement of the 868 * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the 869 * reading of the TSC and the NanoTS as close as possible. 870 */ 871 u64NanoTS = RTTimeSystemNanoTS(); 872 while (RTTimeSystemNanoTS() == u64NanoTS) 873 ASMNopPause(); 874 uFlags = ASMIntDisableFlags(); 875 idApic = ASMGetApicId(); 876 pDevExt->u64TscAnchor = ASMReadTSC(); 877 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS(); 878 ASMSetFlags(uFlags); 879 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO) 880 supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */); 881 882 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, 883 supdrvInitAsyncRefineTscTimer, pDevExt); 884 if (RT_SUCCESS(rc)) 688 885 { 689 886 /* 690 * Switch on the current state. 887 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the 888 * interval as small as possible while gaining the most consistent and accurate frequency 889 * (compared to what the host OS might have measured). 890 * 891 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the 892 * same TSC frequency whenever possible so we need to keep the interval short. 691 893 */ 692 SUPDRVTSCDELTATHREADSTATE enmState; 693 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 694 enmState = pDevExt->enmTscDeltaThreadState; 695 switch (enmState) 696 { 697 case kTscDeltaThreadState_Creating: 698 { 699 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening; 700 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); 701 if (RT_FAILURE(rc)) 702 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc); 703 /* fall thru */ 704 } 705 706 case kTscDeltaThreadState_Listening: 707 { 708 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 709 710 /* Simple adaptive timeout. */ 711 if (cConsecutiveTimeouts++ == 10) 712 { 713 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */ 714 pDevExt->cMsTscDeltaTimeout = 10; 715 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */ 716 pDevExt->cMsTscDeltaTimeout = 100; 717 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */ 718 pDevExt->cMsTscDeltaTimeout = 500; 719 cConsecutiveTimeouts = 0; 720 } 721 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout); 722 if ( RT_FAILURE(rc) 723 && rc != VERR_TIMEOUT) 724 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc); 725 RTThreadUserReset(pDevExt->hTscDeltaThread); 726 break; 727 } 728 729 case kTscDeltaThreadState_WaitAndMeasure: 730 { 731 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring; 732 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */ 733 if (RT_FAILURE(rc)) 734 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc); 735 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 736 pDevExt->cMsTscDeltaTimeout = 1; 737 RTThreadSleep(10); 738 /* fall thru */ 739 } 740 741 case kTscDeltaThreadState_Measuring: 742 { 743 cConsecutiveTimeouts = 0; 744 if (fInitialMeasurement) 745 { 746 int cTries = 8; 747 int cMsWaitPerTry = 10; 748 fInitialMeasurement = false; 749 do 750 { 751 rc = supdrvMeasureInitialTscDeltas(pDevExt); 752 if ( RT_SUCCESS(rc) 753 || ( RT_FAILURE(rc) 754 && rc != VERR_TRY_AGAIN 755 && rc != VERR_CPU_OFFLINE)) 756 { 757 break; 758 } 759 RTThreadSleep(cMsWaitPerTry); 760 } while (cTries-- > 0); 761 } 762 else 763 { 764 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 765 unsigned iCpu; 766 767 /* Measure TSC-deltas only for the CPUs that are in the set. */ 768 rc = VINF_SUCCESS; 769 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++) 770 { 771 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu]; 772 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX 773 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet)) 774 { 775 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu); 776 } 777 } 778 } 779 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 780 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring) 781 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening; 782 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 783 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */ 784 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc); 785 break; 786 } 787 788 case kTscDeltaThreadState_Terminating: 789 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed; 790 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 791 return VINF_SUCCESS; 792 793 case kTscDeltaThreadState_Butchered: 794 default: 795 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE); 796 } 797 } 798 799 return rc; 800 } 801 802 803 /** 804 * Waits for the TSC-delta measurement thread to respond to a state change. 805 * 806 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time, 807 * other error code on internal error. 808 * 809 * @param pThis Pointer to the grant service instance data. 810 * @param enmCurState The current state. 811 * @param enmNewState The new state we're waiting for it to enter. 812 */ 813 static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState, 814 SUPDRVTSCDELTATHREADSTATE enmNewState) 815 { 816 /* 817 * Wait a short while for the expected state transition. 818 */ 819 int rc; 820 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC); 821 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 822 if (pDevExt->enmTscDeltaThreadState == enmNewState) 823 { 824 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 825 rc = VINF_SUCCESS; 826 } 827 else if (pDevExt->enmTscDeltaThreadState == enmCurState) 828 { 829 /* 830 * Wait longer if the state has not yet transitioned to the one we want. 831 */ 832 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 833 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC); 834 if ( RT_SUCCESS(rc) 835 || rc == VERR_TIMEOUT) 836 { 837 /* 838 * Check the state whether we've succeeded. 839 */ 840 SUPDRVTSCDELTATHREADSTATE enmState; 841 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 842 enmState = pDevExt->enmTscDeltaThreadState; 843 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 844 if (enmState == enmNewState) 845 rc = VINF_SUCCESS; 846 else if (enmState == enmCurState) 847 { 848 rc = VERR_TIMEOUT; 849 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState, 850 enmNewState)); 851 } 852 else 853 { 854 rc = VERR_INTERNAL_ERROR; 855 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState, 856 enmState, enmNewState)); 857 } 858 } 859 else 860 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc)); 894 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64); 895 AssertRC(rc); 861 896 } 862 897 else 863 { 864 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 865 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState)); 866 rc = VERR_INTERNAL_ERROR; 867 } 868 869 return rc; 870 } 871 872 873 /** 874 * Terminates the TSC-delta measurement thread. 875 * 876 * @param pDevExt Pointer to the device instance data. 877 */ 878 static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt) 879 { 880 int rc; 881 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 882 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating; 883 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 884 RTThreadUserSignal(pDevExt->hTscDeltaThread); 885 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */); 886 if (RT_FAILURE(rc)) 887 { 888 /* Signal a few more times before giving up. */ 889 int cTriesLeft = 5; 890 while (--cTriesLeft > 0) 891 { 892 RTThreadUserSignal(pDevExt->hTscDeltaThread); 893 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */); 894 if (rc != VERR_TIMEOUT) 895 break; 896 } 897 } 898 } 899 900 901 /** 902 * Initializes and spawns the TSC-delta measurement thread. 903 * 904 * A thread is required for servicing re-measurement requests from events like 905 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously 906 * under all contexts on all OSs. 907 * 908 * @returns VBox status code. 909 * @param pDevExt Pointer to the device instance data. 910 * 911 * @remarks Must only be called -after- initializing GIP and setting up MP 912 * notifications! 913 */ 914 static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt) 915 { 916 int rc; 917 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED); 918 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck"); 919 if (RT_SUCCESS(rc)) 920 { 921 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent); 922 if (RT_SUCCESS(rc)) 923 { 924 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating; 925 pDevExt->cMsTscDeltaTimeout = 1; 926 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */, 927 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread"); 928 if (RT_SUCCESS(rc)) 929 { 930 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening); 931 if (RT_SUCCESS(rc)) 932 { 933 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE); 934 return rc; 935 } 936 937 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc)); 938 supdrvTscDeltaThreadTerminate(pDevExt); 939 } 940 else 941 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc)); 942 RTSemEventDestroy(pDevExt->hTscDeltaEvent); 943 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT; 944 } 945 else 946 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc)); 947 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock); 948 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK; 949 } 950 else 951 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc)); 952 953 return rc; 954 } 955 956 957 /** 958 * Terminates the TSC-delta measurement thread and cleanup. 959 * 960 * @param pDevExt Pointer to the device instance data. 961 */ 962 static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt) 963 { 964 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK 965 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT) 966 { 967 supdrvTscDeltaThreadTerminate(pDevExt); 968 } 969 970 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK) 971 { 972 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock); 973 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK; 974 } 975 976 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT) 977 { 978 RTSemEventDestroy(pDevExt->hTscDeltaEvent); 979 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT; 980 } 981 982 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE); 983 } 984 985 986 /** 987 * Waits for TSC-delta measurements to be completed for all online CPUs. 988 * 989 * @returns VBox status code. 990 * @param pDevExt Pointer to the device instance data. 991 */ 992 static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt) 993 { 994 int cTriesLeft = 5; 995 int cMsTotalWait; 996 int cMsWaited = 0; 997 int cMsWaitGranularity = 1; 998 999 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1000 AssertReturn(pGip, VERR_INVALID_POINTER); 1001 1002 if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD)) 1003 return VERR_THREAD_NOT_WAITABLE; 1004 1005 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200); 1006 while (cTriesLeft-- > 0) 1007 { 1008 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet)) 1009 return VINF_SUCCESS; 1010 RTThreadSleep(cMsWaitGranularity); 1011 cMsWaited += cMsWaitGranularity; 1012 if (cMsWaited >= cMsTotalWait) 1013 break; 1014 } 1015 1016 return VERR_TIMEOUT; 1017 } 1018 1019 #endif /* SUPDRV_USE_TSC_DELTA_THREAD */ 1020 1021 /** 1022 * Applies the TSC delta to the supplied raw TSC value. 1023 * 1024 * @returns VBox status code. (Ignored by all users, just FYI.) 1025 * @param pGip Pointer to the GIP. 1026 * @param puTsc Pointer to a valid TSC value before the TSC delta has been applied. 1027 * @param idApic The APIC ID of the CPU @c puTsc corresponds to. 1028 * @param fDeltaApplied Where to store whether the TSC delta was succesfully 1029 * applied or not (optional, can be NULL). 1030 * 1031 * @remarks Maybe called with interrupts disabled in ring-0! 1032 * 1033 * @note Don't you dare change the delta calculation. If you really do, make 1034 * sure you update all places where it's used (IPRT, SUPLibAll.cpp, 1035 * SUPDrv.c, supdrvGipMpEvent, and more). 1036 */ 1037 DECLINLINE(int) supdrvTscDeltaApply(PSUPGLOBALINFOPAGE pGip, uint64_t *puTsc, uint16_t idApic, bool *pfDeltaApplied) 1038 { 1039 int rc; 1040 1041 /* 1042 * Validate input. 1043 */ 1044 AssertPtr(puTsc); 1045 AssertPtr(pGip); 1046 Assert(pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED); 1047 1048 /* 1049 * Carefully convert the idApic into a GIPCPU entry. 1050 */ 1051 if (RT_LIKELY(idApic < RT_ELEMENTS(pGip->aiCpuFromApicId))) 1052 { 1053 uint16_t iCpu = pGip->aiCpuFromApicId[idApic]; 1054 if (RT_LIKELY(iCpu < pGip->cCpus)) 1055 { 1056 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu]; 1057 1058 /* 1059 * Apply the delta if valid. 1060 */ 1061 if (RT_LIKELY(pGipCpu->i64TSCDelta != INT64_MAX)) 1062 { 1063 *puTsc -= pGipCpu->i64TSCDelta; 1064 if (pfDeltaApplied) 1065 *pfDeltaApplied = true; 1066 return VINF_SUCCESS; 1067 } 1068 1069 rc = VINF_SUCCESS; 1070 } 1071 else 1072 { 1073 AssertMsgFailed(("iCpu=%u cCpus=%u\n", iCpu, pGip->cCpus)); 1074 rc = VERR_INVALID_CPU_INDEX; 1075 } 1076 } 1077 else 1078 { 1079 AssertMsgFailed(("idApic=%u\n", idApic)); 1080 rc = VERR_INVALID_CPU_ID; 1081 } 1082 if (pfDeltaApplied) 1083 *pfDeltaApplied = false; 1084 return rc; 898 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc)); 1085 899 } 1086 900 … … 1101 915 * @remarks Must be called only -after- measuring the TSC deltas. 1102 916 */ 1103 static int supdrvGip MeasureTscFreq(PSUPDRVDEVEXT pDevExt)917 static int supdrvGipInitMeasureTscFreq(PSUPDRVDEVEXT pDevExt) 1104 918 { 1105 919 int cTriesLeft = 4; … … 1185 999 if (rc == VERR_TIMEOUT) 1186 1000 { 1187 SUPR0Printf("vboxdrv: supdrvGip MeasureTscFreq: timedout waiting for TSC-delta measurements.\n");1001 SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: timedout waiting for TSC-delta measurements.\n"); 1188 1002 return VERR_SUPDRV_TSC_FREQ_MEASUREMENT_FAILED; 1189 1003 } 1190 1004 #else 1191 SUPR0Printf("vboxdrv: supdrvGip MeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n",1005 SUPR0Printf("vboxdrv: supdrvGipInitMeasureTscFreq: idApicBefore=%u idApicAfter=%u cTriesLeft=%u\n", 1192 1006 idApicBefore, idApicAfter, cTriesLeft); 1193 1007 #endif … … 1210 1024 1211 1025 /** 1212 * Timer callback function for TSC frequency refinement in invariant GIP mode. 1213 * 1214 * @param pTimer The timer. 1215 * @param pvUser Opaque pointer to the device instance data. 1216 * @param iTick The timer tick. 1217 */ 1218 static DECLCALLBACK(void) supdrvRefineTscTimer(PRTTIMER pTimer, void *pvUser, uint64_t iTick) 1219 { 1220 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser; 1026 * Finds our (@a idCpu) entry, or allocates a new one if not found. 1027 * 1028 * @returns Index of the CPU in the cache set. 1029 * @param pGip The GIP. 1030 * @param idCpu The CPU ID. 1031 */ 1032 static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu) 1033 { 1034 uint32_t i, cTries; 1035 1036 /* 1037 * ASSUMES that CPU IDs are constant. 1038 */ 1039 for (i = 0; i < pGip->cCpus; i++) 1040 if (pGip->aCPUs[i].idCpu == idCpu) 1041 return i; 1042 1043 cTries = 0; 1044 do 1045 { 1046 for (i = 0; i < pGip->cCpus; i++) 1047 { 1048 bool fRc; 1049 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc); 1050 if (fRc) 1051 return i; 1052 } 1053 } while (cTries++ < 32); 1054 AssertReleaseFailed(); 1055 return i - 1; 1056 } 1057 1058 1059 /** 1060 * The calling CPU should be accounted as online, update GIP accordingly. 1061 * 1062 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent(). 1063 * 1064 * @param pDevExt The device extension. 1065 * @param idCpu The CPU ID. 1066 */ 1067 static void supdrvGipMpEventOnlineOrInitOnCpu(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu) 1068 { 1069 int iCpuSet = 0; 1070 uint16_t idApic = UINT16_MAX; 1071 uint32_t i = 0; 1072 uint64_t u64NanoTS = 0; 1221 1073 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1222 bool fDeltaApplied = false; 1223 uint8_t idApic; 1224 uint64_t u64DeltaNanoTS; 1225 uint64_t u64DeltaTsc; 1226 uint64_t u64NanoTS; 1227 uint64_t u64Tsc; 1228 RTCCUINTREG uFlags; 1229 1230 /* Paranoia. */ 1231 Assert(pGip); 1232 Assert(pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC); 1233 1234 #if !defined(RT_OS_OS2) /* PORTME: Disable if timers are called from clock interrupt handler or with interrupts disabled. */ 1235 u64NanoTS = RTTimeSystemNanoTS(); 1236 while (RTTimeSystemNanoTS() == u64NanoTS) 1237 ASMNopPause(); 1238 #endif 1239 uFlags = ASMIntDisableFlags(); 1240 idApic = ASMGetApicId(); 1241 u64Tsc = ASMReadTSC(); 1242 u64NanoTS = RTTimeSystemNanoTS(); 1243 ASMSetFlags(uFlags); 1244 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO) 1245 supdrvTscDeltaApply(pGip, &u64Tsc, idApic, &fDeltaApplied); 1246 u64DeltaNanoTS = u64NanoTS - pDevExt->u64NanoTSAnchor; 1247 u64DeltaTsc = u64Tsc - pDevExt->u64TscAnchor; 1248 1249 if (RT_UNLIKELY( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO 1250 && !fDeltaApplied)) 1251 { 1252 Log(("vboxdrv: failed to refine TSC frequency as TSC-deltas unavailable after %d seconds!\n", 1253 GIP_TSC_REFINE_INTERVAL)); 1254 return; 1255 } 1256 1257 /* Calculate the TSC frequency. */ 1258 if ( u64DeltaTsc < UINT64_MAX / RT_NS_1SEC 1259 && u64DeltaNanoTS < UINT32_MAX) 1260 pGip->u64CpuHz = ASMMultU64ByU32DivByU32(u64DeltaTsc, RT_NS_1SEC, (uint32_t)u64DeltaNanoTS); 1074 1075 AssertPtrReturnVoid(pGip); 1076 AssertRelease(idCpu == RTMpCpuId()); 1077 Assert(pGip->cPossibleCpus == RTMpGetCount()); 1078 1079 /* 1080 * Do this behind a spinlock with interrupts disabled as this can fire 1081 * on all CPUs simultaneously, see @bugref{6110}. 1082 */ 1083 RTSpinlockAcquire(pDevExt->hGipSpinlock); 1084 1085 /* 1086 * Update the globals. 1087 */ 1088 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount()); 1089 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount()); 1090 iCpuSet = RTMpCpuIdToSetIndex(idCpu); 1091 if (iCpuSet >= 0) 1092 { 1093 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet)); 1094 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet); 1095 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet); 1096 } 1097 1098 /* 1099 * Update the entry. 1100 */ 1101 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS; 1102 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu); 1103 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS); 1104 idApic = ASMGetApicId(); 1105 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic); 1106 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet); 1107 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu); 1108 1109 /* 1110 * Update the APIC ID and CPU set index mappings. 1111 */ 1112 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i); 1113 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i); 1114 1115 /* Update the Mp online/offline counter. */ 1116 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents); 1117 1118 /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */ 1119 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED) 1120 { 1121 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet); 1122 #ifdef SUPDRV_USE_TSC_DELTA_THREAD 1123 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 1124 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening 1125 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring) 1126 { 1127 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure; 1128 } 1129 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 1130 #endif 1131 } 1132 1133 /* commit it */ 1134 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE); 1135 1136 RTSpinlockRelease(pDevExt->hGipSpinlock); 1137 } 1138 1139 1140 /** 1141 * The CPU should be accounted as offline, update the GIP accordingly. 1142 * 1143 * This is used by supdrvGipMpEvent. 1144 * 1145 * @param pDevExt The device extension. 1146 * @param idCpu The CPU ID. 1147 */ 1148 static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu) 1149 { 1150 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1151 int iCpuSet; 1152 unsigned i; 1153 1154 AssertPtrReturnVoid(pGip); 1155 RTSpinlockAcquire(pDevExt->hGipSpinlock); 1156 1157 iCpuSet = RTMpCpuIdToSetIndex(idCpu); 1158 AssertReturnVoid(iCpuSet >= 0); 1159 1160 i = pGip->aiCpuFromCpuSetIdx[iCpuSet]; 1161 AssertReturnVoid(i < pGip->cCpus); 1162 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu); 1163 1164 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet)); 1165 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet); 1166 1167 /* Update the Mp online/offline counter. */ 1168 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents); 1169 1170 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */ 1171 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu) 1172 { 1173 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START); 1174 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD); 1175 } 1176 1177 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED) 1178 { 1179 /* Reset the TSC delta, we will recalculate it lazily. */ 1180 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX); 1181 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */ 1182 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet); 1183 } 1184 1185 /* commit it */ 1186 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE); 1187 1188 RTSpinlockRelease(pDevExt->hGipSpinlock); 1189 } 1190 1191 1192 /** 1193 * Multiprocessor event notification callback. 1194 * 1195 * This is used to make sure that the GIP master gets passed on to 1196 * another CPU. It also updates the associated CPU data. 1197 * 1198 * @param enmEvent The event. 1199 * @param idCpu The cpu it applies to. 1200 * @param pvUser Pointer to the device extension. 1201 * 1202 * @remarks This function -must- fire on the newly online'd CPU for the 1203 * RTMPEVENT_ONLINE case and can fire on any CPU for the 1204 * RTMPEVENT_OFFLINE case. 1205 */ 1206 static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser) 1207 { 1208 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser; 1209 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1210 1211 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); 1212 1213 /* 1214 * Update the GIP CPU data. 1215 */ 1216 if (pGip) 1217 { 1218 switch (enmEvent) 1219 { 1220 case RTMPEVENT_ONLINE: 1221 AssertRelease(idCpu == RTMpCpuId()); 1222 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu); 1223 break; 1224 case RTMPEVENT_OFFLINE: 1225 supdrvGipMpEventOffline(pDevExt, idCpu); 1226 break; 1227 } 1228 } 1229 1230 /* 1231 * Make sure there is a master GIP. 1232 */ 1233 if (enmEvent == RTMPEVENT_OFFLINE) 1234 { 1235 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster); 1236 if (idGipMaster == idCpu) 1237 { 1238 /* 1239 * The GIP master is going offline, find a new one. 1240 */ 1241 bool fIgnored; 1242 unsigned i; 1243 RTCPUID idNewGipMaster = NIL_RTCPUID; 1244 RTCPUSET OnlineCpus; 1245 RTMpGetOnlineSet(&OnlineCpus); 1246 1247 for (i = 0; i < RTCPUSET_MAX_CPUS; i++) 1248 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i)) 1249 { 1250 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i); 1251 if (idCurCpu != idGipMaster) 1252 { 1253 idNewGipMaster = idCurCpu; 1254 break; 1255 } 1256 } 1257 1258 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster)); 1259 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored); 1260 NOREF(fIgnored); 1261 } 1262 } 1263 } 1264 1265 1266 /** 1267 * On CPU initialization callback for RTMpOnAll. 1268 * 1269 * @param idCpu The CPU ID. 1270 * @param pvUser1 The device extension. 1271 * @param pvUser2 The GIP. 1272 */ 1273 static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) 1274 { 1275 /* This is good enough, even though it will update some of the globals a 1276 bit to much. */ 1277 supdrvGipMpEventOnlineOrInitOnCpu((PSUPDRVDEVEXT)pvUser1, idCpu); 1278 } 1279 1280 1281 /** 1282 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU. 1283 * 1284 * @param idCpu Ignored. 1285 * @param pvUser1 Where to put the TSC. 1286 * @param pvUser2 Ignored. 1287 */ 1288 static DECLCALLBACK(void) supdrvGipInitDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2) 1289 { 1290 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC()); 1291 } 1292 1293 1294 /** 1295 * Determine if Async GIP mode is required because of TSC drift. 1296 * 1297 * When using the default/normal timer code it is essential that the time stamp counter 1298 * (TSC) runs never backwards, that is, a read operation to the counter should return 1299 * a bigger value than any previous read operation. This is guaranteed by the latest 1300 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other 1301 * case we have to choose the asynchronous timer mode. 1302 * 1303 * @param poffMin Pointer to the determined difference between different 1304 * cores (optional, can be NULL). 1305 * @return false if the time stamp counters appear to be synchronized, true otherwise. 1306 */ 1307 static bool supdrvGipInitDetermineAsyncTsc(uint64_t *poffMin) 1308 { 1309 /* 1310 * Just iterate all the cpus 8 times and make sure that the TSC is 1311 * ever increasing. We don't bother taking TSC rollover into account. 1312 */ 1313 int iEndCpu = RTMpGetArraySize(); 1314 int iCpu; 1315 int cLoops = 8; 1316 bool fAsync = false; 1317 int rc = VINF_SUCCESS; 1318 uint64_t offMax = 0; 1319 uint64_t offMin = ~(uint64_t)0; 1320 uint64_t PrevTsc = ASMReadTSC(); 1321 1322 while (cLoops-- > 0) 1323 { 1324 for (iCpu = 0; iCpu < iEndCpu; iCpu++) 1325 { 1326 uint64_t CurTsc; 1327 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvGipInitDetermineAsyncTscWorker, &CurTsc, NULL); 1328 if (RT_SUCCESS(rc)) 1329 { 1330 if (CurTsc <= PrevTsc) 1331 { 1332 fAsync = true; 1333 offMin = offMax = PrevTsc - CurTsc; 1334 Log(("supdrvGipInitDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n", 1335 iCpu, cLoops, CurTsc, PrevTsc)); 1336 break; 1337 } 1338 1339 /* Gather statistics (except the first time). */ 1340 if (iCpu != 0 || cLoops != 7) 1341 { 1342 uint64_t off = CurTsc - PrevTsc; 1343 if (off < offMin) 1344 offMin = off; 1345 if (off > offMax) 1346 offMax = off; 1347 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off)); 1348 } 1349 1350 /* Next */ 1351 PrevTsc = CurTsc; 1352 } 1353 else if (rc == VERR_NOT_SUPPORTED) 1354 break; 1355 else 1356 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc)); 1357 } 1358 1359 /* broke out of the loop. */ 1360 if (iCpu < iEndCpu) 1361 break; 1362 } 1363 1364 if (poffMin) 1365 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */ 1366 Log(("supdrvGipInitDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n", 1367 fAsync, iEndCpu, rc, offMin, offMax)); 1368 #if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS) 1369 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax)); 1370 #endif 1371 return fAsync; 1372 } 1373 1374 1375 /** 1376 * supdrvGipInit() worker that determines the GIP TSC mode. 1377 * 1378 * @returns The most suitable TSC mode. 1379 * @param pDevExt Pointer to the device instance data. 1380 */ 1381 static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt) 1382 { 1383 uint64_t u64DiffCoresIgnored; 1384 uint32_t uEAX, uEBX, uECX, uEDX; 1385 1386 /* 1387 * Establish whether the CPU advertises TSC as invariant, we need that in 1388 * a couple of places below. 1389 */ 1390 bool fInvariantTsc = false; 1391 if (ASMHasCpuId()) 1392 { 1393 uEAX = ASMCpuId_EAX(0x80000000); 1394 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007) 1395 { 1396 uEDX = ASMCpuId_EDX(0x80000007); 1397 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) 1398 fInvariantTsc = true; 1399 } 1400 } 1401 1402 /* 1403 * On single CPU systems, we don't need to consider ASYNC mode. 1404 */ 1405 if (RTMpGetCount() <= 1) 1406 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC; 1407 1408 /* 1409 * Allow the user and/or OS specific bits to force async mode. 1410 */ 1411 if (supdrvOSGetForcedAsyncTscMode(pDevExt)) 1412 return SUPGIPMODE_ASYNC_TSC; 1413 1414 1415 #if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */ 1416 /* 1417 * Use invariant mode if the CPU says TSC is invariant. 1418 */ 1419 if (fInvariantTsc) 1420 return SUPGIPMODE_INVARIANT_TSC; 1421 #endif 1422 1423 /* 1424 * TSC is not invariant and we're on SMP, this presents two problems: 1425 * 1426 * (1) There might be a skew between the CPU, so that cpu0 1427 * returns a TSC that is slightly different from cpu1. 1428 * This screw may be due to (2), bad TSC initialization 1429 * or slightly different TSC rates. 1430 * 1431 * (2) Power management (and other things) may cause the TSC 1432 * to run at a non-constant speed, and cause the speed 1433 * to be different on the cpus. This will result in (1). 1434 * 1435 * If any of the above is detected, we will have to use ASYNC mode. 1436 */ 1437 1438 /* (1). Try check for current differences between the cpus. */ 1439 if (supdrvGipInitDetermineAsyncTsc(&u64DiffCoresIgnored)) 1440 return SUPGIPMODE_ASYNC_TSC; 1441 1442 #if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */ 1443 if (fInvariantTsc) 1444 return SUPGIPMODE_INVARIANT_TSC; 1445 #endif 1446 1447 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */ 1448 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX); 1449 if ( ASMIsValidStdRange(uEAX) 1450 && ASMIsAmdCpuEx(uEBX, uECX, uEDX)) 1451 { 1452 /* Check for APM support. */ 1453 uEAX = ASMCpuId_EAX(0x80000000); 1454 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007) 1455 { 1456 uEDX = ASMCpuId_EDX(0x80000007); 1457 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */ 1458 return SUPGIPMODE_ASYNC_TSC; 1459 } 1460 } 1461 1462 return SUPGIPMODE_SYNC_TSC; 1463 } 1464 1465 1466 /** 1467 * Initializes per-CPU GIP information. 1468 * 1469 * @param pDevExt Pointer to the device instance data. 1470 * @param pGip Pointer to the GIP. 1471 * @param pCpu Pointer to which GIP CPU to initalize. 1472 * @param u64NanoTS The current nanosecond timestamp. 1473 */ 1474 static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS) 1475 { 1476 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point! 1477 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */ 1478 pCpu->u32TransactionId = 2; 1479 pCpu->u64NanoTS = u64NanoTS; 1480 pCpu->u64TSC = ASMReadTSC(); 1481 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD; 1482 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0; 1483 1484 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID); 1485 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID); 1486 ASMAtomicWriteS16(&pCpu->iCpuSet, -1); 1487 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX); 1488 1489 /* 1490 * We don't know the following values until we've executed updates. 1491 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on 1492 * the 2nd timer callout. 1493 */ 1494 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */ 1495 pCpu->u32UpdateIntervalTSC 1496 = pCpu->au32TSCHistory[0] 1497 = pCpu->au32TSCHistory[1] 1498 = pCpu->au32TSCHistory[2] 1499 = pCpu->au32TSCHistory[3] 1500 = pCpu->au32TSCHistory[4] 1501 = pCpu->au32TSCHistory[5] 1502 = pCpu->au32TSCHistory[6] 1503 = pCpu->au32TSCHistory[7] 1504 = (uint32_t)(_4G / pGip->u32UpdateHz); 1505 } 1506 1507 1508 /** 1509 * Initializes the GIP data. 1510 * 1511 * @param pDevExt Pointer to the device instance data. 1512 * @param pGip Pointer to the read-write kernel mapping of the GIP. 1513 * @param HCPhys The physical address of the GIP. 1514 * @param u64NanoTS The current nanosecond timestamp. 1515 * @param uUpdateHz The update frequency. 1516 * @param uUpdateIntervalNS The update interval in nanoseconds. 1517 * @param cCpus The CPU count. 1518 */ 1519 static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, 1520 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus) 1521 { 1522 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE); 1523 unsigned i; 1524 #ifdef DEBUG_DARWIN_GIP 1525 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus)); 1526 #else 1527 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus)); 1528 #endif 1529 1530 /* 1531 * Initialize the structure. 1532 */ 1533 memset(pGip, 0, cbGip); 1534 1535 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC; 1536 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION; 1537 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt); 1538 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC 1539 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */) 1540 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */ 1541 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */; 1261 1542 else 1262 { 1263 RTUINT128U CpuHz, Tmp, Divisor; 1264 CpuHz.s.Lo = CpuHz.s.Hi = 0; 1265 RTUInt128MulU64ByU64(&Tmp, u64DeltaTsc, RT_NS_1SEC_64); 1266 RTUInt128Div(&CpuHz, &Tmp, RTUInt128AssignU64(&Divisor, u64DeltaNanoTS)); 1267 pGip->u64CpuHz = CpuHz.s.Lo; 1268 } 1269 1270 /* Update rest of GIP. */ 1271 Assert(pGip->u32Mode != SUPGIPMODE_ASYNC_TSC); /* See SUPGetCpuHzFromGIP().*/ 1272 pGip->aCPUs[0].u64CpuHz = pGip->u64CpuHz; 1273 } 1274 1275 1276 /** 1277 * Starts the TSC-frequency refinement phase asynchronously. 1278 * 1279 * @param pDevExt Pointer to the device instance data. 1280 */ 1281 static void supdrvRefineTscFreq(PSUPDRVDEVEXT pDevExt) 1282 { 1283 uint64_t u64NanoTS; 1284 RTCCUINTREG uFlags; 1285 uint8_t idApic; 1286 int rc; 1287 PSUPGLOBALINFOPAGE pGip; 1288 1289 /* Validate. */ 1290 Assert(pDevExt); 1291 Assert(pDevExt->pGip); 1292 pGip = pDevExt->pGip; 1293 1294 #ifdef SUPDRV_USE_TSC_DELTA_THREAD 1295 /* 1296 * If the TSC-delta thread is created, wait until it's done calculating 1297 * the TSC-deltas on the relevant online CPUs before we start the TSC refinement. 1298 */ 1299 if ( pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED 1300 && ASMAtomicReadS32(&pDevExt->rcTscDelta) == VERR_NOT_AVAILABLE) 1301 { 1302 rc = supdrvTscDeltaThreadWaitForOnlineCpus(pDevExt); 1303 if (rc == VERR_TIMEOUT) 1304 { 1305 SUPR0Printf("vboxdrv: Skipping refinement of TSC frequency as TSC-delta measurement timed out!\n"); 1306 return; 1307 } 1308 } 1309 #endif 1310 1311 /* 1312 * Record the TSC and NanoTS as the starting anchor point for refinement of the 1313 * TSC. We deliberately avoid using SUPReadTSC() here as we want to keep the 1314 * reading of the TSC and the NanoTS as close as possible. 1315 */ 1316 u64NanoTS = RTTimeSystemNanoTS(); 1317 while (RTTimeSystemNanoTS() == u64NanoTS) 1318 ASMNopPause(); 1319 uFlags = ASMIntDisableFlags(); 1320 idApic = ASMGetApicId(); 1321 pDevExt->u64TscAnchor = ASMReadTSC(); 1322 pDevExt->u64NanoTSAnchor = RTTimeSystemNanoTS(); 1323 ASMSetFlags(uFlags); 1324 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_PRACTICALLY_ZERO) 1325 supdrvTscDeltaApply(pGip, &pDevExt->u64TscAnchor, idApic, NULL /* pfDeltaApplied */); 1326 1327 rc = RTTimerCreateEx(&pDevExt->pTscRefineTimer, 0 /* one-shot */, RTTIMER_FLAGS_CPU_ANY, supdrvRefineTscTimer, pDevExt); 1328 if (RT_SUCCESS(rc)) 1329 { 1330 /* 1331 * Refine the TSC frequency measurement over a long interval. Ideally, we want to keep the 1332 * interval as small as possible while gaining the most consistent and accurate frequency 1333 * (compared to what the host OS might have measured). 1334 * 1335 * In theory, we gain more accuracy with longer intervals, but we want VMs to startup with the 1336 * same TSC frequency whenever possible so we need to keep the interval short. 1337 */ 1338 rc = RTTimerStart(pDevExt->pTscRefineTimer, GIP_TSC_REFINE_INTERVAL * RT_NS_1SEC_64); 1339 AssertRC(rc); 1340 } 1341 else 1342 OSDBGPRINT(("RTTimerCreateEx failed to create one-shot timer. rc=%Rrc\n", rc)); 1543 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE; 1544 pGip->cCpus = (uint16_t)cCpus; 1545 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE); 1546 pGip->u32UpdateHz = uUpdateHz; 1547 pGip->u32UpdateIntervalNS = uUpdateIntervalNS; 1548 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID; 1549 RTCpuSetEmpty(&pGip->OnlineCpuSet); 1550 RTCpuSetEmpty(&pGip->PresentCpuSet); 1551 RTMpGetSet(&pGip->PossibleCpuSet); 1552 pGip->cOnlineCpus = RTMpGetOnlineCount(); 1553 pGip->cPresentCpus = RTMpGetPresentCount(); 1554 pGip->cPossibleCpus = RTMpGetCount(); 1555 pGip->idCpuMax = RTMpGetMaxCpuId(); 1556 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++) 1557 pGip->aiCpuFromApicId[i] = UINT16_MAX; 1558 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++) 1559 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX; 1560 for (i = 0; i < cCpus; i++) 1561 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS); 1562 1563 /* 1564 * Link it to the device extension. 1565 */ 1566 pDevExt->pGip = pGip; 1567 pDevExt->HCPhysGip = HCPhys; 1568 pDevExt->cGipUsers = 0; 1343 1569 } 1344 1570 … … 1465 1691 if (RT_SUCCESS(rc)) 1466 1692 { 1467 rc = supdrvGip MeasureTscFreq(pDevExt);1693 rc = supdrvGipInitMeasureTscFreq(pDevExt); 1468 1694 if (RT_SUCCESS(rc)) 1469 1695 { … … 1474 1700 if (pGip->u32Mode == SUPGIPMODE_ASYNC_TSC) 1475 1701 { 1476 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, supdrvGipAsyncTimer,1477 pDevExt);1702 rc = RTTimerCreateEx(&pDevExt->pGipTimer, u32Interval, RTTIMER_FLAGS_CPU_ALL, 1703 supdrvGipAsyncTimer, pDevExt); 1478 1704 if (rc == VERR_NOT_SUPPORTED) 1479 1705 { … … 1495 1721 g_pSUPGlobalInfoPage = pGip; 1496 1722 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC) 1497 supdrv RefineTscFreq(pDevExt);1723 supdrvGipInitAsyncRefineTscFreq(pDevExt); 1498 1724 return VINF_SUCCESS; 1499 1725 } … … 1503 1729 } 1504 1730 else 1505 OSDBGPRINT(("supdrvGipCreate: supdrvGip MeasureTscFreq failed. rc=%Rrc\n", rc));1731 OSDBGPRINT(("supdrvGipCreate: supdrvGipInitMeasureTscFreq failed. rc=%Rrc\n", rc)); 1506 1732 } 1507 1733 else … … 1523 1749 1524 1750 /** 1751 * Invalidates the GIP data upon termination. 1752 * 1753 * @param pGip Pointer to the read-write kernel mapping of the GIP. 1754 */ 1755 static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip) 1756 { 1757 unsigned i; 1758 pGip->u32Magic = 0; 1759 for (i = 0; i < pGip->cCpus; i++) 1760 { 1761 pGip->aCPUs[i].u64NanoTS = 0; 1762 pGip->aCPUs[i].u64TSC = 0; 1763 pGip->aCPUs[i].iTSCHistoryHead = 0; 1764 pGip->aCPUs[i].u64TSCSample = 0; 1765 pGip->aCPUs[i].i64TSCDelta = INT64_MAX; 1766 } 1767 } 1768 1769 1770 /** 1525 1771 * Terminates the GIP. 1526 1772 * … … 1594 1840 */ 1595 1841 supdrvGipReleaseHigherTimerFrequencyFromSystem(pDevExt); 1842 } 1843 1844 1845 1846 1847 /* 1848 * 1849 * 1850 * GIP Update Timer Related Code 1851 * GIP Update Timer Related Code 1852 * GIP Update Timer Related Code 1853 * 1854 * 1855 */ 1856 1857 1858 /** 1859 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that 1860 * updates all the per cpu data except the transaction id. 1861 * 1862 * @param pDevExt The device extension. 1863 * @param pGipCpu Pointer to the per cpu data. 1864 * @param u64NanoTS The current time stamp. 1865 * @param u64TSC The current TSC. 1866 * @param iTick The current timer tick. 1867 * 1868 * @remarks Can be called with interrupts disabled! 1869 */ 1870 static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick) 1871 { 1872 uint64_t u64TSCDelta; 1873 uint32_t u32UpdateIntervalTSC; 1874 uint32_t u32UpdateIntervalTSCSlack; 1875 unsigned iTSCHistoryHead; 1876 uint64_t u64CpuHz; 1877 uint32_t u32TransactionId; 1878 1879 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1880 AssertPtrReturnVoid(pGip); 1881 1882 /* Delta between this and the previous update. */ 1883 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS)); 1884 1885 /* 1886 * Update the NanoTS. 1887 */ 1888 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS); 1889 1890 /* 1891 * Calc TSC delta. 1892 */ 1893 u64TSCDelta = u64TSC - pGipCpu->u64TSC; 1894 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC); 1895 1896 /* We don't need to keep realculating the frequency when it's invariant. */ 1897 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC) 1898 return; 1899 1900 if (u64TSCDelta >> 32) 1901 { 1902 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC; 1903 pGipCpu->cErrors++; 1904 } 1905 1906 /* 1907 * On the 2nd and 3rd callout, reset the history with the current TSC 1908 * interval since the values entered by supdrvGipInit are totally off. 1909 * The interval on the 1st callout completely unreliable, the 2nd is a bit 1910 * better, while the 3rd should be most reliable. 1911 */ 1912 u32TransactionId = pGipCpu->u32TransactionId; 1913 if (RT_UNLIKELY( ( u32TransactionId == 5 1914 || u32TransactionId == 7) 1915 && ( iTick == 2 1916 || iTick == 3) )) 1917 { 1918 unsigned i; 1919 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++) 1920 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta); 1921 } 1922 1923 /* 1924 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%. 1925 * Wait until we have at least one full history since the above history reset. The 1926 * assumption is that the majority of the previous history values will be tolerable. 1927 * See @bugref{6710} comment #67. 1928 */ 1929 if ( u32TransactionId > 23 /* 7 + (8 * 2) */ 1930 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC) 1931 { 1932 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200; 1933 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold 1934 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold) 1935 { 1936 uint32_t u32; 1937 u32 = pGipCpu->au32TSCHistory[0]; 1938 u32 += pGipCpu->au32TSCHistory[1]; 1939 u32 += pGipCpu->au32TSCHistory[2]; 1940 u32 += pGipCpu->au32TSCHistory[3]; 1941 u32 >>= 2; 1942 u64TSCDelta = pGipCpu->au32TSCHistory[4]; 1943 u64TSCDelta += pGipCpu->au32TSCHistory[5]; 1944 u64TSCDelta += pGipCpu->au32TSCHistory[6]; 1945 u64TSCDelta += pGipCpu->au32TSCHistory[7]; 1946 u64TSCDelta >>= 2; 1947 u64TSCDelta += u32; 1948 u64TSCDelta >>= 1; 1949 } 1950 } 1951 1952 /* 1953 * TSC History. 1954 */ 1955 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8); 1956 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7; 1957 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead); 1958 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta); 1959 1960 /* 1961 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ. 1962 * 1963 * On Windows, we have an occasional (but recurring) sour value that messed up 1964 * the history but taking only 1 interval reduces the precision overall. 1965 * However, this problem existed before the invariant mode was introduced. 1966 */ 1967 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC 1968 || pGip->u32UpdateHz >= 1000) 1969 { 1970 uint32_t u32; 1971 u32 = pGipCpu->au32TSCHistory[0]; 1972 u32 += pGipCpu->au32TSCHistory[1]; 1973 u32 += pGipCpu->au32TSCHistory[2]; 1974 u32 += pGipCpu->au32TSCHistory[3]; 1975 u32 >>= 2; 1976 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4]; 1977 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5]; 1978 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6]; 1979 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7]; 1980 u32UpdateIntervalTSC >>= 2; 1981 u32UpdateIntervalTSC += u32; 1982 u32UpdateIntervalTSC >>= 1; 1983 1984 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */ 1985 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14; 1986 } 1987 else if (pGip->u32UpdateHz >= 90) 1988 { 1989 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta; 1990 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7]; 1991 u32UpdateIntervalTSC >>= 1; 1992 1993 /* value chosen on a 2GHz thinkpad running windows */ 1994 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7; 1995 } 1996 else 1997 { 1998 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta; 1999 2000 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */ 2001 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6; 2002 } 2003 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack); 2004 2005 /* 2006 * CpuHz. 2007 */ 2008 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC); 2009 u64CpuHz /= pGip->u32UpdateIntervalNS; 2010 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz); 2011 } 2012 2013 2014 /** 2015 * Updates the GIP. 2016 * 2017 * @param pDevExt The device extension. 2018 * @param u64NanoTS The current nanosecond timesamp. 2019 * @param u64TSC The current TSC timesamp. 2020 * @param idCpu The CPU ID. 2021 * @param iTick The current timer tick. 2022 * 2023 * @remarks Can be called with interrupts disabled! 2024 */ 2025 static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick) 2026 { 2027 /* 2028 * Determine the relevant CPU data. 2029 */ 2030 PSUPGIPCPU pGipCpu; 2031 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 2032 AssertPtrReturnVoid(pGip); 2033 2034 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC) 2035 pGipCpu = &pGip->aCPUs[0]; 2036 else 2037 { 2038 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()]; 2039 if (RT_UNLIKELY(iCpu >= pGip->cCpus)) 2040 return; 2041 pGipCpu = &pGip->aCPUs[iCpu]; 2042 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu)) 2043 return; 2044 } 2045 2046 /* 2047 * Start update transaction. 2048 */ 2049 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1)) 2050 { 2051 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */ 2052 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId)); 2053 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 2054 pGipCpu->cErrors++; 2055 return; 2056 } 2057 2058 /* 2059 * Recalc the update frequency every 0x800th time. 2060 */ 2061 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */ 2062 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2))) 2063 { 2064 if (pGip->u64NanoTSLastUpdateHz) 2065 { 2066 #ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */ 2067 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz; 2068 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta); 2069 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30) 2070 { 2071 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency 2072 * calculation on non-invariant hosts if it changes the history decision 2073 * taken in supdrvGipDoUpdateCpu(). */ 2074 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ; 2075 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz); 2076 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval); 2077 } 2078 #endif 2079 } 2080 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1); 2081 } 2082 2083 /* 2084 * Update the data. 2085 */ 2086 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick); 2087 2088 /* 2089 * Complete transaction. 2090 */ 2091 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 2092 } 2093 2094 2095 /** 2096 * Updates the per cpu GIP data for the calling cpu. 2097 * 2098 * @param pDevExt The device extension. 2099 * @param u64NanoTS The current nanosecond timesamp. 2100 * @param u64TSC The current TSC timesamp. 2101 * @param idCpu The CPU ID. 2102 * @param idApic The APIC id for the CPU index. 2103 * @param iTick The current timer tick. 2104 * 2105 * @remarks Can be called with interrupts disabled! 2106 */ 2107 static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, 2108 RTCPUID idCpu, uint8_t idApic, uint64_t iTick) 2109 { 2110 uint32_t iCpu; 2111 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 2112 2113 /* 2114 * Avoid a potential race when a CPU online notification doesn't fire on 2115 * the onlined CPU but the tick creeps in before the event notification is 2116 * run. 2117 */ 2118 if (RT_UNLIKELY(iTick == 1)) 2119 { 2120 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu); 2121 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE) 2122 supdrvGipMpEventOnlineOrInitOnCpu(pDevExt, idCpu); 2123 } 2124 2125 iCpu = pGip->aiCpuFromApicId[idApic]; 2126 if (RT_LIKELY(iCpu < pGip->cCpus)) 2127 { 2128 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu]; 2129 if (pGipCpu->idCpu == idCpu) 2130 { 2131 /* 2132 * Start update transaction. 2133 */ 2134 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1)) 2135 { 2136 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId)); 2137 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 2138 pGipCpu->cErrors++; 2139 return; 2140 } 2141 2142 /* 2143 * Update the data. 2144 */ 2145 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick); 2146 2147 /* 2148 * Complete transaction. 2149 */ 2150 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 2151 } 2152 } 1596 2153 } 1597 2154 … … 1675 2232 1676 2233 1677 /** 1678 * Finds our (@a idCpu) entry, or allocates a new one if not found. 1679 * 1680 * @returns Index of the CPU in the cache set. 1681 * @param pGip The GIP. 1682 * @param idCpu The CPU ID. 1683 */ 1684 static uint32_t supdrvGipFindOrAllocCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu) 1685 { 1686 uint32_t i, cTries; 1687 1688 /* 1689 * ASSUMES that CPU IDs are constant. 1690 */ 1691 for (i = 0; i < pGip->cCpus; i++) 1692 if (pGip->aCPUs[i].idCpu == idCpu) 1693 return i; 1694 1695 cTries = 0; 1696 do 1697 { 1698 for (i = 0; i < pGip->cCpus; i++) 1699 { 1700 bool fRc; 1701 ASMAtomicCmpXchgSize(&pGip->aCPUs[i].idCpu, idCpu, NIL_RTCPUID, fRc); 1702 if (fRc) 1703 return i; 1704 } 1705 } while (cTries++ < 32); 1706 AssertReleaseFailed(); 1707 return i - 1; 1708 } 1709 1710 1711 /** 1712 * Finds the GIP CPU index corresponding to @a idCpu. 1713 * 1714 * @returns GIP CPU array index, UINT32_MAX if not found. 1715 * @param pGip The GIP. 1716 * @param idCpu The CPU ID. 1717 */ 1718 static uint32_t supdrvGipFindCpuIndexForCpuId(PSUPGLOBALINFOPAGE pGip, RTCPUID idCpu) 1719 { 1720 uint32_t i; 1721 for (i = 0; i < pGip->cCpus; i++) 1722 if (pGip->aCPUs[i].idCpu == idCpu) 1723 return i; 1724 return UINT32_MAX; 1725 } 1726 1727 1728 /** 1729 * The calling CPU should be accounted as online, update GIP accordingly. 1730 * 1731 * This is used by supdrvGipCreate() as well as supdrvGipMpEvent(). 1732 * 1733 * @param pDevExt The device extension. 1734 * @param idCpu The CPU ID. 1735 */ 1736 static void supdrvGipMpEventOnline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu) 1737 { 1738 int iCpuSet = 0; 1739 uint16_t idApic = UINT16_MAX; 1740 uint32_t i = 0; 1741 uint64_t u64NanoTS = 0; 1742 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1743 1744 AssertPtrReturnVoid(pGip); 1745 AssertRelease(idCpu == RTMpCpuId()); 1746 Assert(pGip->cPossibleCpus == RTMpGetCount()); 1747 1748 /* 1749 * Do this behind a spinlock with interrupts disabled as this can fire 1750 * on all CPUs simultaneously, see @bugref{6110}. 1751 */ 1752 RTSpinlockAcquire(pDevExt->hGipSpinlock); 1753 1754 /* 1755 * Update the globals. 1756 */ 1757 ASMAtomicWriteU16(&pGip->cPresentCpus, RTMpGetPresentCount()); 1758 ASMAtomicWriteU16(&pGip->cOnlineCpus, RTMpGetOnlineCount()); 1759 iCpuSet = RTMpCpuIdToSetIndex(idCpu); 1760 if (iCpuSet >= 0) 1761 { 1762 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet)); 1763 RTCpuSetAddByIndex(&pGip->OnlineCpuSet, iCpuSet); 1764 RTCpuSetAddByIndex(&pGip->PresentCpuSet, iCpuSet); 1765 } 1766 1767 /* 1768 * Update the entry. 1769 */ 1770 u64NanoTS = RTTimeSystemNanoTS() - pGip->u32UpdateIntervalNS; 1771 i = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu); 1772 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS); 1773 idApic = ASMGetApicId(); 1774 ASMAtomicWriteU16(&pGip->aCPUs[i].idApic, idApic); 1775 ASMAtomicWriteS16(&pGip->aCPUs[i].iCpuSet, (int16_t)iCpuSet); 1776 ASMAtomicWriteSize(&pGip->aCPUs[i].idCpu, idCpu); 1777 1778 /* 1779 * Update the APIC ID and CPU set index mappings. 1780 */ 1781 ASMAtomicWriteU16(&pGip->aiCpuFromApicId[idApic], i); 1782 ASMAtomicWriteU16(&pGip->aiCpuFromCpuSetIdx[iCpuSet], i); 1783 1784 /* Update the Mp online/offline counter. */ 1785 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents); 1786 1787 /* Add this CPU to the set of CPUs for which we need to calculate their TSC-deltas. */ 1788 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED) 1789 { 1790 RTCpuSetAddByIndex(&pDevExt->TscDeltaCpuSet, iCpuSet); 1791 #ifdef SUPDRV_USE_TSC_DELTA_THREAD 1792 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 1793 if ( pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Listening 1794 || pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring) 1795 { 1796 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_WaitAndMeasure; 1797 } 1798 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 1799 #endif 1800 } 1801 1802 /* commit it */ 1803 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_ONLINE); 1804 1805 RTSpinlockRelease(pDevExt->hGipSpinlock); 1806 } 1807 1808 1809 /** 1810 * The CPU should be accounted as offline, update the GIP accordingly. 1811 * 1812 * This is used by supdrvGipMpEvent. 1813 * 1814 * @param pDevExt The device extension. 1815 * @param idCpu The CPU ID. 1816 */ 1817 static void supdrvGipMpEventOffline(PSUPDRVDEVEXT pDevExt, RTCPUID idCpu) 1818 { 1819 int iCpuSet; 1820 unsigned i; 1821 1822 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1823 1824 AssertPtrReturnVoid(pGip); 1825 RTSpinlockAcquire(pDevExt->hGipSpinlock); 1826 1827 iCpuSet = RTMpCpuIdToSetIndex(idCpu); 1828 AssertReturnVoid(iCpuSet >= 0); 1829 1830 i = pGip->aiCpuFromCpuSetIdx[iCpuSet]; 1831 AssertReturnVoid(i < pGip->cCpus); 1832 AssertReturnVoid(pGip->aCPUs[i].idCpu == idCpu); 1833 1834 Assert(RTCpuSetIsMemberByIndex(&pGip->PossibleCpuSet, iCpuSet)); 1835 RTCpuSetDelByIndex(&pGip->OnlineCpuSet, iCpuSet); 1836 1837 /* Update the Mp online/offline counter. */ 1838 ASMAtomicIncU32(&pDevExt->cMpOnOffEvents); 1839 1840 /* If we are the initiator going offline while measuring the TSC delta, unspin other waiting CPUs! */ 1841 if (ASMAtomicReadU32(&pDevExt->idTscDeltaInitiator) == idCpu) 1842 { 1843 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START); 1844 ASMAtomicWriteU64(&pGip->aCPUs[i].u64TSCSample, ~GIP_TSC_DELTA_RSVD); 1845 } 1846 1847 if (pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED) 1848 { 1849 /* Reset the TSC delta, we will recalculate it lazily. */ 1850 ASMAtomicWriteS64(&pGip->aCPUs[i].i64TSCDelta, INT64_MAX); 1851 /* Remove this CPU from the set of CPUs that we have obtained the TSC deltas. */ 1852 RTCpuSetDelByIndex(&pDevExt->TscDeltaObtainedCpuSet, iCpuSet); 1853 } 1854 1855 /* commit it */ 1856 ASMAtomicWriteSize(&pGip->aCPUs[i].enmState, SUPGIPCPUSTATE_OFFLINE); 1857 1858 RTSpinlockRelease(pDevExt->hGipSpinlock); 1859 } 1860 1861 1862 /** 1863 * Multiprocessor event notification callback. 1864 * 1865 * This is used to make sure that the GIP master gets passed on to 1866 * another CPU. It also updates the associated CPU data. 1867 * 1868 * @param enmEvent The event. 1869 * @param idCpu The cpu it applies to. 1870 * @param pvUser Pointer to the device extension. 1871 * 1872 * @remarks This function -must- fire on the newly online'd CPU for the 1873 * RTMPEVENT_ONLINE case and can fire on any CPU for the 1874 * RTMPEVENT_OFFLINE case. 1875 */ 1876 static DECLCALLBACK(void) supdrvGipMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser) 1877 { 1878 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser; 1879 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 1880 1881 AssertRelease(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); 1882 1883 /* 1884 * Update the GIP CPU data. 1885 */ 1886 if (pGip) 1887 { 1888 switch (enmEvent) 1889 { 1890 case RTMPEVENT_ONLINE: 1891 AssertRelease(idCpu == RTMpCpuId()); 1892 supdrvGipMpEventOnline(pDevExt, idCpu); 1893 break; 1894 case RTMPEVENT_OFFLINE: 1895 supdrvGipMpEventOffline(pDevExt, idCpu); 1896 break; 1897 } 1898 } 1899 1900 /* 1901 * Make sure there is a master GIP. 1902 */ 1903 if (enmEvent == RTMPEVENT_OFFLINE) 1904 { 1905 RTCPUID idGipMaster = ASMAtomicReadU32(&pDevExt->idGipMaster); 1906 if (idGipMaster == idCpu) 1907 { 1908 /* 1909 * The GIP master is going offline, find a new one. 1910 */ 1911 bool fIgnored; 1912 unsigned i; 1913 RTCPUID idNewGipMaster = NIL_RTCPUID; 1914 RTCPUSET OnlineCpus; 1915 RTMpGetOnlineSet(&OnlineCpus); 1916 1917 for (i = 0; i < RTCPUSET_MAX_CPUS; i++) 1918 if (RTCpuSetIsMemberByIndex(&OnlineCpus, i)) 1919 { 1920 RTCPUID idCurCpu = RTMpCpuIdFromSetIndex(i); 1921 if (idCurCpu != idGipMaster) 1922 { 1923 idNewGipMaster = idCurCpu; 1924 break; 1925 } 1926 } 1927 1928 Log(("supdrvGipMpEvent: Gip master %#lx -> %#lx\n", (long)idGipMaster, (long)idNewGipMaster)); 1929 ASMAtomicCmpXchgSize(&pDevExt->idGipMaster, idNewGipMaster, idGipMaster, fIgnored); 1930 NOREF(fIgnored); 1931 } 1932 } 1933 } 1934 1935 1936 /* 2234 2235 2236 /* 2237 * 2238 * 2239 * TSC Delta Measurements And Related Code 2240 * TSC Delta Measurements And Related Code 2241 * TSC Delta Measurements And Related Code 2242 * 2243 * 2244 */ 2245 2246 2247 /* 1937 2248 * Select TSC delta measurement algorithm. 1938 2249 */ … … 1946 2257 #ifdef GIP_TSC_DELTA_METHOD_2 1947 2258 1948 /** 2259 /** 1949 2260 * TSC delta measurment algorithm \#2 result entry. 1950 2261 */ … … 1956 2267 } SUPDRVTSCDELTAMETHOD2ENTRY; 1957 2268 1958 /** 2269 /** 1959 2270 * TSC delta measurment algorithm \#2 Data. 1960 2271 */ … … 1977 2288 #endif /* GIP_TSC_DELTA_METHOD_2 */ 1978 2289 1979 /** 1980 * Argument package/state passed by supdrvMeasureTscDeltaOne to the RTMpOn 1981 * callback worker. 2290 /** 2291 * Argument package/state passed by supdrvMeasureTscDeltaOne to the RTMpOn 2292 * callback worker. 1982 2293 */ 1983 2294 typedef struct SUPDRVGIPTSCDELTARGS … … 2001 2312 2002 2313 #ifdef GIP_TSC_DELTA_METHOD_2 2003 /* 2314 /* 2004 2315 * TSC delta measurement algorithm \#2 configuration and code - Experimental!! 2005 2316 */ … … 2066 2377 2067 2378 /** 2068 * The core function of the 2nd TSC delta mesurment algorithm. 2069 * 2070 * The idea here is that we have the two CPUs execute the exact same code 2071 * collecting a largish set of TSC samples. The code has one data dependency on 2072 * the other CPU which intention it is to synchronize the execution as well as 2073 * help cross references the two sets of TSC samples (the sequence numbers). 2074 * 2379 * The core function of the 2nd TSC delta mesurment algorithm. 2380 * 2381 * The idea here is that we have the two CPUs execute the exact same code 2382 * collecting a largish set of TSC samples. The code has one data dependency on 2383 * the other CPU which intention it is to synchronize the execution as well as 2384 * help cross references the two sets of TSC samples (the sequence numbers). 2385 * 2075 2386 * The @a fLag parameter is used to modify the execution a tiny bit on one or 2076 2387 * both of the CPUs. When @a fLag differs between the CPUs, it is thought that 2077 2388 * it will help with making the CPUs enter lock step execution occationally. 2078 * 2389 * 2079 2390 */ 2080 2391 static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag) … … 2729 3040 2730 3041 2731 /** 2732 * Callback used by supdrvDetermineAsyncTSC to read the TSC on a CPU. 2733 * 2734 * @param idCpu Ignored. 2735 * @param pvUser1 Where to put the TSC. 2736 * @param pvUser2 Ignored. 2737 */ 2738 static DECLCALLBACK(void) supdrvDetermineAsyncTscWorker(RTCPUID idCpu, void *pvUser1, void *pvUser2) 2739 { 2740 ASMAtomicWriteU64((uint64_t volatile *)pvUser1, ASMReadTSC()); 2741 } 2742 2743 2744 /** 2745 * Determine if Async GIP mode is required because of TSC drift. 2746 * 2747 * When using the default/normal timer code it is essential that the time stamp counter 2748 * (TSC) runs never backwards, that is, a read operation to the counter should return 2749 * a bigger value than any previous read operation. This is guaranteed by the latest 2750 * AMD CPUs and by newer Intel CPUs which never enter the C2 state (P4). In any other 2751 * case we have to choose the asynchronous timer mode. 2752 * 2753 * @param poffMin Pointer to the determined difference between different 2754 * cores (optional, can be NULL). 2755 * @return false if the time stamp counters appear to be synchronized, true otherwise. 2756 */ 2757 static bool supdrvDetermineAsyncTsc(uint64_t *poffMin) 2758 { 2759 /* 2760 * Just iterate all the cpus 8 times and make sure that the TSC is 2761 * ever increasing. We don't bother taking TSC rollover into account. 2762 */ 2763 int iEndCpu = RTMpGetArraySize(); 2764 int iCpu; 2765 int cLoops = 8; 2766 bool fAsync = false; 2767 int rc = VINF_SUCCESS; 2768 uint64_t offMax = 0; 2769 uint64_t offMin = ~(uint64_t)0; 2770 uint64_t PrevTsc = ASMReadTSC(); 2771 2772 while (cLoops-- > 0) 2773 { 2774 for (iCpu = 0; iCpu < iEndCpu; iCpu++) 2775 { 2776 uint64_t CurTsc; 2777 rc = RTMpOnSpecific(RTMpCpuIdFromSetIndex(iCpu), supdrvDetermineAsyncTscWorker, &CurTsc, NULL); 3042 #ifdef SUPDRV_USE_TSC_DELTA_THREAD 3043 3044 /** 3045 * Switches the TSC-delta measurement thread into the butchered state. 3046 * 3047 * @returns VBox status code. 3048 * @param pDevExt Pointer to the device instance data. 3049 * @param fSpinlockHeld Whether the TSC-delta spinlock is held or not. 3050 * @param pszFailed An error message to log. 3051 * @param rcFailed The error code to exit the thread with. 3052 */ 3053 static int supdrvTscDeltaThreadButchered(PSUPDRVDEVEXT pDevExt, bool fSpinlockHeld, const char *pszFailed, int rcFailed) 3054 { 3055 if (!fSpinlockHeld) 3056 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 3057 3058 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Butchered; 3059 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3060 OSDBGPRINT(("supdrvTscDeltaThreadButchered: %s. rc=%Rrc\n", rcFailed)); 3061 return rcFailed; 3062 } 3063 3064 3065 /** 3066 * The TSC-delta measurement thread. 3067 * 3068 * @returns VBox status code. 3069 * @param hThread The thread handle. 3070 * @param pvUser Opaque pointer to the device instance data. 3071 */ 3072 static DECLCALLBACK(int) supdrvTscDeltaThread(RTTHREAD hThread, void *pvUser) 3073 { 3074 PSUPDRVDEVEXT pDevExt = (PSUPDRVDEVEXT)pvUser; 3075 bool fInitialMeasurement = true; 3076 uint32_t cConsecutiveTimeouts = 0; 3077 int rc = VERR_INTERNAL_ERROR_2; 3078 for (;;) 3079 { 3080 /* 3081 * Switch on the current state. 3082 */ 3083 SUPDRVTSCDELTATHREADSTATE enmState; 3084 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 3085 enmState = pDevExt->enmTscDeltaThreadState; 3086 switch (enmState) 3087 { 3088 case kTscDeltaThreadState_Creating: 3089 { 3090 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening; 3091 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); 3092 if (RT_FAILURE(rc)) 3093 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc); 3094 /* fall thru */ 3095 } 3096 3097 case kTscDeltaThreadState_Listening: 3098 { 3099 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3100 3101 /* Simple adaptive timeout. */ 3102 if (cConsecutiveTimeouts++ == 10) 3103 { 3104 if (pDevExt->cMsTscDeltaTimeout == 1) /* 10 ms */ 3105 pDevExt->cMsTscDeltaTimeout = 10; 3106 else if (pDevExt->cMsTscDeltaTimeout == 10) /* +100 ms */ 3107 pDevExt->cMsTscDeltaTimeout = 100; 3108 else if (pDevExt->cMsTscDeltaTimeout == 100) /* +1000 ms */ 3109 pDevExt->cMsTscDeltaTimeout = 500; 3110 cConsecutiveTimeouts = 0; 3111 } 3112 rc = RTThreadUserWait(pDevExt->hTscDeltaThread, pDevExt->cMsTscDeltaTimeout); 3113 if ( RT_FAILURE(rc) 3114 && rc != VERR_TIMEOUT) 3115 return supdrvTscDeltaThreadButchered(pDevExt, false /* fSpinlockHeld */, "RTThreadUserWait", rc); 3116 RTThreadUserReset(pDevExt->hTscDeltaThread); 3117 break; 3118 } 3119 3120 case kTscDeltaThreadState_WaitAndMeasure: 3121 { 3122 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Measuring; 3123 rc = RTSemEventSignal(pDevExt->hTscDeltaEvent); /* (Safe on windows as long as spinlock isn't IRQ safe.) */ 3124 if (RT_FAILURE(rc)) 3125 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "RTSemEventSignal", rc); 3126 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3127 pDevExt->cMsTscDeltaTimeout = 1; 3128 RTThreadSleep(10); 3129 /* fall thru */ 3130 } 3131 3132 case kTscDeltaThreadState_Measuring: 3133 { 3134 cConsecutiveTimeouts = 0; 3135 if (fInitialMeasurement) 3136 { 3137 int cTries = 8; 3138 int cMsWaitPerTry = 10; 3139 fInitialMeasurement = false; 3140 do 3141 { 3142 rc = supdrvMeasureInitialTscDeltas(pDevExt); 3143 if ( RT_SUCCESS(rc) 3144 || ( RT_FAILURE(rc) 3145 && rc != VERR_TRY_AGAIN 3146 && rc != VERR_CPU_OFFLINE)) 3147 { 3148 break; 3149 } 3150 RTThreadSleep(cMsWaitPerTry); 3151 } while (cTries-- > 0); 3152 } 3153 else 3154 { 3155 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 3156 unsigned iCpu; 3157 3158 /* Measure TSC-deltas only for the CPUs that are in the set. */ 3159 rc = VINF_SUCCESS; 3160 for (iCpu = 0; iCpu < pGip->cCpus; iCpu++) 3161 { 3162 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu]; 3163 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX 3164 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet)) 3165 { 3166 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu); 3167 } 3168 } 3169 } 3170 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 3171 if (pDevExt->enmTscDeltaThreadState == kTscDeltaThreadState_Measuring) 3172 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Listening; 3173 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3174 Assert(rc != VERR_NOT_AVAILABLE); /* VERR_NOT_AVAILABLE is used as the initial value. */ 3175 ASMAtomicWriteS32(&pDevExt->rcTscDelta, rc); 3176 break; 3177 } 3178 3179 case kTscDeltaThreadState_Terminating: 3180 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Destroyed; 3181 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3182 return VINF_SUCCESS; 3183 3184 case kTscDeltaThreadState_Butchered: 3185 default: 3186 return supdrvTscDeltaThreadButchered(pDevExt, true /* fSpinlockHeld */, "Invalid state", VERR_INVALID_STATE); 3187 } 3188 } 3189 3190 return rc; 3191 } 3192 3193 3194 /** 3195 * Waits for the TSC-delta measurement thread to respond to a state change. 3196 * 3197 * @returns VINF_SUCCESS on success, VERR_TIMEOUT if it doesn't respond in time, 3198 * other error code on internal error. 3199 * 3200 * @param pThis Pointer to the grant service instance data. 3201 * @param enmCurState The current state. 3202 * @param enmNewState The new state we're waiting for it to enter. 3203 */ 3204 static int supdrvTscDeltaThreadWait(PSUPDRVDEVEXT pDevExt, SUPDRVTSCDELTATHREADSTATE enmCurState, 3205 SUPDRVTSCDELTATHREADSTATE enmNewState) 3206 { 3207 /* 3208 * Wait a short while for the expected state transition. 3209 */ 3210 int rc; 3211 RTSemEventWait(pDevExt->hTscDeltaEvent, RT_MS_1SEC); 3212 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 3213 if (pDevExt->enmTscDeltaThreadState == enmNewState) 3214 { 3215 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3216 rc = VINF_SUCCESS; 3217 } 3218 else if (pDevExt->enmTscDeltaThreadState == enmCurState) 3219 { 3220 /* 3221 * Wait longer if the state has not yet transitioned to the one we want. 3222 */ 3223 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3224 rc = RTSemEventWait(pDevExt->hTscDeltaEvent, 50 * RT_MS_1SEC); 3225 if ( RT_SUCCESS(rc) 3226 || rc == VERR_TIMEOUT) 3227 { 3228 /* 3229 * Check the state whether we've succeeded. 3230 */ 3231 SUPDRVTSCDELTATHREADSTATE enmState; 3232 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 3233 enmState = pDevExt->enmTscDeltaThreadState; 3234 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3235 if (enmState == enmNewState) 3236 rc = VINF_SUCCESS; 3237 else if (enmState == enmCurState) 3238 { 3239 rc = VERR_TIMEOUT; 3240 OSDBGPRINT(("supdrvTscDeltaThreadWait: timed out state transition. enmState=%d enmNewState=%d\n", enmState, 3241 enmNewState)); 3242 } 3243 else 3244 { 3245 rc = VERR_INTERNAL_ERROR; 3246 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d, expected %d\n", enmCurState, 3247 enmState, enmNewState)); 3248 } 3249 } 3250 else 3251 OSDBGPRINT(("supdrvTscDeltaThreadWait: RTSemEventWait failed. rc=%Rrc\n", rc)); 3252 } 3253 else 3254 { 3255 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3256 OSDBGPRINT(("supdrvTscDeltaThreadWait: invalid state transition from %d to %d\n", enmCurState, enmNewState)); 3257 rc = VERR_INTERNAL_ERROR; 3258 } 3259 3260 return rc; 3261 } 3262 3263 3264 /** 3265 * Terminates the TSC-delta measurement thread. 3266 * 3267 * @param pDevExt Pointer to the device instance data. 3268 */ 3269 static void supdrvTscDeltaThreadTerminate(PSUPDRVDEVEXT pDevExt) 3270 { 3271 int rc; 3272 RTSpinlockAcquire(pDevExt->hTscDeltaSpinlock); 3273 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Terminating; 3274 RTSpinlockRelease(pDevExt->hTscDeltaSpinlock); 3275 RTThreadUserSignal(pDevExt->hTscDeltaThread); 3276 rc = RTThreadWait(pDevExt->hTscDeltaThread, 50 * RT_MS_1SEC, NULL /* prc */); 3277 if (RT_FAILURE(rc)) 3278 { 3279 /* Signal a few more times before giving up. */ 3280 int cTriesLeft = 5; 3281 while (--cTriesLeft > 0) 3282 { 3283 RTThreadUserSignal(pDevExt->hTscDeltaThread); 3284 rc = RTThreadWait(pDevExt->hTscDeltaThread, 2 * RT_MS_1SEC, NULL /* prc */); 3285 if (rc != VERR_TIMEOUT) 3286 break; 3287 } 3288 } 3289 } 3290 3291 3292 /** 3293 * Initializes and spawns the TSC-delta measurement thread. 3294 * 3295 * A thread is required for servicing re-measurement requests from events like 3296 * CPUs coming online, suspend/resume etc. as it cannot be done synchronously 3297 * under all contexts on all OSs. 3298 * 3299 * @returns VBox status code. 3300 * @param pDevExt Pointer to the device instance data. 3301 * 3302 * @remarks Must only be called -after- initializing GIP and setting up MP 3303 * notifications! 3304 */ 3305 static int supdrvTscDeltaThreadInit(PSUPDRVDEVEXT pDevExt) 3306 { 3307 int rc; 3308 Assert(pDevExt->pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED); 3309 rc = RTSpinlockCreate(&pDevExt->hTscDeltaSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, "VBoxTscSpnLck"); 3310 if (RT_SUCCESS(rc)) 3311 { 3312 rc = RTSemEventCreate(&pDevExt->hTscDeltaEvent); 3313 if (RT_SUCCESS(rc)) 3314 { 3315 pDevExt->enmTscDeltaThreadState = kTscDeltaThreadState_Creating; 3316 pDevExt->cMsTscDeltaTimeout = 1; 3317 rc = RTThreadCreate(&pDevExt->hTscDeltaThread, supdrvTscDeltaThread, pDevExt, 0 /* cbStack */, 3318 RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "VBoxTscThread"); 2778 3319 if (RT_SUCCESS(rc)) 2779 3320 { 2780 if (CurTsc <= PrevTsc) 3321 rc = supdrvTscDeltaThreadWait(pDevExt, kTscDeltaThreadState_Creating, kTscDeltaThreadState_Listening); 3322 if (RT_SUCCESS(rc)) 2781 3323 { 2782 fAsync = true; 2783 offMin = offMax = PrevTsc - CurTsc; 2784 Log(("supdrvDetermineAsyncTsc: iCpu=%d cLoops=%d CurTsc=%llx PrevTsc=%llx\n", 2785 iCpu, cLoops, CurTsc, PrevTsc)); 2786 break; 3324 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE); 3325 return rc; 2787 3326 } 2788 3327 2789 /* Gather statistics (except the first time). */ 2790 if (iCpu != 0 || cLoops != 7) 2791 { 2792 uint64_t off = CurTsc - PrevTsc; 2793 if (off < offMin) 2794 offMin = off; 2795 if (off > offMax) 2796 offMax = off; 2797 Log2(("%d/%d: off=%llx\n", cLoops, iCpu, off)); 2798 } 2799 2800 /* Next */ 2801 PrevTsc = CurTsc; 3328 OSDBGPRINT(("supdrvTscDeltaInit: supdrvTscDeltaThreadWait failed. rc=%Rrc\n", rc)); 3329 supdrvTscDeltaThreadTerminate(pDevExt); 2802 3330 } 2803 else if (rc == VERR_NOT_SUPPORTED)2804 break;2805 3331 else 2806 AssertMsg(rc == VERR_CPU_NOT_FOUND || rc == VERR_CPU_OFFLINE, ("%d\n", rc)); 2807 } 2808 2809 /* broke out of the loop. */ 2810 if (iCpu < iEndCpu) 3332 OSDBGPRINT(("supdrvTscDeltaInit: RTThreadCreate failed. rc=%Rrc\n", rc)); 3333 RTSemEventDestroy(pDevExt->hTscDeltaEvent); 3334 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT; 3335 } 3336 else 3337 OSDBGPRINT(("supdrvTscDeltaInit: RTSemEventCreate failed. rc=%Rrc\n", rc)); 3338 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock); 3339 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK; 3340 } 3341 else 3342 OSDBGPRINT(("supdrvTscDeltaInit: RTSpinlockCreate failed. rc=%Rrc\n", rc)); 3343 3344 return rc; 3345 } 3346 3347 3348 /** 3349 * Terminates the TSC-delta measurement thread and cleanup. 3350 * 3351 * @param pDevExt Pointer to the device instance data. 3352 */ 3353 static void supdrvTscDeltaTerm(PSUPDRVDEVEXT pDevExt) 3354 { 3355 if ( pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK 3356 && pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT) 3357 { 3358 supdrvTscDeltaThreadTerminate(pDevExt); 3359 } 3360 3361 if (pDevExt->hTscDeltaSpinlock != NIL_RTSPINLOCK) 3362 { 3363 RTSpinlockDestroy(pDevExt->hTscDeltaSpinlock); 3364 pDevExt->hTscDeltaSpinlock = NIL_RTSPINLOCK; 3365 } 3366 3367 if (pDevExt->hTscDeltaEvent != NIL_RTSEMEVENT) 3368 { 3369 RTSemEventDestroy(pDevExt->hTscDeltaEvent); 3370 pDevExt->hTscDeltaEvent = NIL_RTSEMEVENT; 3371 } 3372 3373 ASMAtomicWriteS32(&pDevExt->rcTscDelta, VERR_NOT_AVAILABLE); 3374 } 3375 3376 3377 /** 3378 * Waits for TSC-delta measurements to be completed for all online CPUs. 3379 * 3380 * @returns VBox status code. 3381 * @param pDevExt Pointer to the device instance data. 3382 */ 3383 static int supdrvTscDeltaThreadWaitForOnlineCpus(PSUPDRVDEVEXT pDevExt) 3384 { 3385 int cTriesLeft = 5; 3386 int cMsTotalWait; 3387 int cMsWaited = 0; 3388 int cMsWaitGranularity = 1; 3389 3390 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 3391 AssertReturn(pGip, VERR_INVALID_POINTER); 3392 3393 if (RT_UNLIKELY(pDevExt->hTscDeltaThread == NIL_RTTHREAD)) 3394 return VERR_THREAD_NOT_WAITABLE; 3395 3396 cMsTotalWait = RT_MIN(pGip->cPresentCpus + 10, 200); 3397 while (cTriesLeft-- > 0) 3398 { 3399 if (RTCpuSetIsEqual(&pDevExt->TscDeltaObtainedCpuSet, &pGip->OnlineCpuSet)) 3400 return VINF_SUCCESS; 3401 RTThreadSleep(cMsWaitGranularity); 3402 cMsWaited += cMsWaitGranularity; 3403 if (cMsWaited >= cMsTotalWait) 2811 3404 break; 2812 3405 } 2813 3406 2814 if (poffMin) 2815 *poffMin = offMin; /* Almost RTMpOnSpecific profiling. */ 2816 Log(("supdrvDetermineAsyncTsc: returns %d; iEndCpu=%d rc=%d offMin=%llx offMax=%llx\n", 2817 fAsync, iEndCpu, rc, offMin, offMax)); 2818 #if !defined(RT_OS_SOLARIS) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS) 2819 OSDBGPRINT(("vboxdrv: fAsync=%d offMin=%#lx offMax=%#lx\n", fAsync, (long)offMin, (long)offMax)); 2820 #endif 2821 return fAsync; 2822 } 2823 2824 2825 /** 2826 * supdrvGipInit() worker that determines the GIP TSC mode. 2827 * 2828 * @returns The most suitable TSC mode. 2829 * @param pDevExt Pointer to the device instance data. 2830 */ 2831 static SUPGIPMODE supdrvGipInitDetermineTscMode(PSUPDRVDEVEXT pDevExt) 2832 { 2833 uint64_t u64DiffCoresIgnored; 2834 uint32_t uEAX, uEBX, uECX, uEDX; 2835 2836 /* 2837 * Establish whether the CPU advertises TSC as invariant, we need that in 2838 * a couple of places below. 2839 */ 2840 bool fInvariantTsc = false; 2841 if (ASMHasCpuId()) 2842 { 2843 uEAX = ASMCpuId_EAX(0x80000000); 2844 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007) 2845 { 2846 uEDX = ASMCpuId_EDX(0x80000007); 2847 if (uEDX & X86_CPUID_AMD_ADVPOWER_EDX_TSCINVAR) 2848 fInvariantTsc = true; 2849 } 2850 } 2851 2852 /* 2853 * On single CPU systems, we don't need to consider ASYNC mode. 2854 */ 2855 if (RTMpGetCount() <= 1) 2856 return fInvariantTsc ? SUPGIPMODE_INVARIANT_TSC : SUPGIPMODE_SYNC_TSC; 2857 2858 /* 2859 * Allow the user and/or OS specific bits to force async mode. 2860 */ 2861 if (supdrvOSGetForcedAsyncTscMode(pDevExt)) 2862 return SUPGIPMODE_ASYNC_TSC; 2863 2864 2865 #if 0 /** @todo enable this when i64TscDelta is applied in all places where it's needed */ 2866 /* 2867 * Use invariant mode if the CPU says TSC is invariant. 2868 */ 2869 if (fInvariantTsc) 2870 return SUPGIPMODE_INVARIANT_TSC; 2871 #endif 2872 2873 /* 2874 * TSC is not invariant and we're on SMP, this presents two problems: 2875 * 2876 * (1) There might be a skew between the CPU, so that cpu0 2877 * returns a TSC that is slightly different from cpu1. 2878 * This screw may be due to (2), bad TSC initialization 2879 * or slightly different TSC rates. 2880 * 2881 * (2) Power management (and other things) may cause the TSC 2882 * to run at a non-constant speed, and cause the speed 2883 * to be different on the cpus. This will result in (1). 2884 * 2885 * If any of the above is detected, we will have to use ASYNC mode. 2886 */ 2887 2888 /* (1). Try check for current differences between the cpus. */ 2889 if (supdrvDetermineAsyncTsc(&u64DiffCoresIgnored)) 2890 return SUPGIPMODE_ASYNC_TSC; 2891 2892 #if 1 /** @todo remove once i64TscDelta is applied everywhere. Enable #if 0 above. */ 2893 if (fInvariantTsc) 2894 return SUPGIPMODE_INVARIANT_TSC; 2895 #endif 2896 2897 /* (2) If it's an AMD CPU with power management, we won't trust its TSC. */ 2898 ASMCpuId(0, &uEAX, &uEBX, &uECX, &uEDX); 2899 if ( ASMIsValidStdRange(uEAX) 2900 && ASMIsAmdCpuEx(uEBX, uECX, uEDX)) 2901 { 2902 /* Check for APM support. */ 2903 uEAX = ASMCpuId_EAX(0x80000000); 2904 if (ASMIsValidExtRange(uEAX) && uEAX >= 0x80000007) 2905 { 2906 uEDX = ASMCpuId_EDX(0x80000007); 2907 if (uEDX & 0x3e) /* STC|TM|THERMTRIP|VID|FID. Ignore TS. */ 2908 return SUPGIPMODE_ASYNC_TSC; 2909 } 2910 } 2911 2912 return SUPGIPMODE_SYNC_TSC; 2913 } 2914 2915 2916 /** 2917 * Initializes per-CPU GIP information. 2918 * 2919 * @param pDevExt Pointer to the device instance data. 2920 * @param pGip Pointer to the GIP. 2921 * @param pCpu Pointer to which GIP CPU to initalize. 2922 * @param u64NanoTS The current nanosecond timestamp. 2923 */ 2924 static void supdrvGipInitCpu(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, PSUPGIPCPU pCpu, uint64_t u64NanoTS) 2925 { 2926 /* !!! Warning !!! The GIP may not be linked to the device instance data at this point! 2927 which is why we have 2 separate parameters. Don't dereference pDevExt->pGip here. */ 2928 pCpu->u32TransactionId = 2; 2929 pCpu->u64NanoTS = u64NanoTS; 2930 pCpu->u64TSC = ASMReadTSC(); 2931 pCpu->u64TSCSample = GIP_TSC_DELTA_RSVD; 2932 pCpu->i64TSCDelta = pGip->enmUseTscDelta > SUPGIPUSETSCDELTA_ZERO_CLAIMED ? INT64_MAX : 0; 2933 2934 ASMAtomicWriteSize(&pCpu->enmState, SUPGIPCPUSTATE_INVALID); 2935 ASMAtomicWriteSize(&pCpu->idCpu, NIL_RTCPUID); 2936 ASMAtomicWriteS16(&pCpu->iCpuSet, -1); 2937 ASMAtomicWriteU16(&pCpu->idApic, UINT16_MAX); 2938 2939 /* 2940 * We don't know the following values until we've executed updates. 2941 * So, we'll just pretend it's a 4 GHz CPU and adjust the history it on 2942 * the 2nd timer callout. 2943 */ 2944 pCpu->u64CpuHz = _4G + 1; /* tstGIP-2 depends on this. */ 2945 pCpu->u32UpdateIntervalTSC 2946 = pCpu->au32TSCHistory[0] 2947 = pCpu->au32TSCHistory[1] 2948 = pCpu->au32TSCHistory[2] 2949 = pCpu->au32TSCHistory[3] 2950 = pCpu->au32TSCHistory[4] 2951 = pCpu->au32TSCHistory[5] 2952 = pCpu->au32TSCHistory[6] 2953 = pCpu->au32TSCHistory[7] 2954 = (uint32_t)(_4G / pGip->u32UpdateHz); 2955 } 2956 2957 2958 /** 2959 * Initializes the GIP data. 2960 * 2961 * @param pDevExt Pointer to the device instance data. 2962 * @param pGip Pointer to the read-write kernel mapping of the GIP. 2963 * @param HCPhys The physical address of the GIP. 2964 * @param u64NanoTS The current nanosecond timestamp. 2965 * @param uUpdateHz The update frequency. 2966 * @param uUpdateIntervalNS The update interval in nanoseconds. 2967 * @param cCpus The CPU count. 2968 */ 2969 static void supdrvGipInit(PSUPDRVDEVEXT pDevExt, PSUPGLOBALINFOPAGE pGip, RTHCPHYS HCPhys, 2970 uint64_t u64NanoTS, unsigned uUpdateHz, unsigned uUpdateIntervalNS, unsigned cCpus) 2971 { 2972 size_t const cbGip = RT_ALIGN_Z(RT_OFFSETOF(SUPGLOBALINFOPAGE, aCPUs[cCpus]), PAGE_SIZE); 2973 unsigned i; 2974 #ifdef DEBUG_DARWIN_GIP 2975 OSDBGPRINT(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus)); 2976 #else 2977 LogFlow(("supdrvGipInit: pGip=%p HCPhys=%lx u64NanoTS=%llu uUpdateHz=%d cCpus=%u\n", pGip, (long)HCPhys, u64NanoTS, uUpdateHz, cCpus)); 2978 #endif 2979 2980 /* 2981 * Initialize the structure. 2982 */ 2983 memset(pGip, 0, cbGip); 2984 2985 pGip->u32Magic = SUPGLOBALINFOPAGE_MAGIC; 2986 pGip->u32Version = SUPGLOBALINFOPAGE_VERSION; 2987 pGip->u32Mode = supdrvGipInitDetermineTscMode(pDevExt); 2988 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC 2989 /*|| pGip->u32Mode == SUPGIPMODE_SYNC_TSC */) 2990 pGip->enmUseTscDelta = supdrvOSAreTscDeltasInSync() /* Allow OS override (windows). */ 2991 ? SUPGIPUSETSCDELTA_ZERO_CLAIMED : SUPGIPUSETSCDELTA_PRACTICALLY_ZERO /* downgrade later */; 2992 else 2993 pGip->enmUseTscDelta = SUPGIPUSETSCDELTA_NOT_APPLICABLE; 2994 pGip->cCpus = (uint16_t)cCpus; 2995 pGip->cPages = (uint16_t)(cbGip / PAGE_SIZE); 2996 pGip->u32UpdateHz = uUpdateHz; 2997 pGip->u32UpdateIntervalNS = uUpdateIntervalNS; 2998 pGip->fGetGipCpu = SUPGIPGETCPU_APIC_ID; 2999 RTCpuSetEmpty(&pGip->OnlineCpuSet); 3000 RTCpuSetEmpty(&pGip->PresentCpuSet); 3001 RTMpGetSet(&pGip->PossibleCpuSet); 3002 pGip->cOnlineCpus = RTMpGetOnlineCount(); 3003 pGip->cPresentCpus = RTMpGetPresentCount(); 3004 pGip->cPossibleCpus = RTMpGetCount(); 3005 pGip->idCpuMax = RTMpGetMaxCpuId(); 3006 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromApicId); i++) 3007 pGip->aiCpuFromApicId[i] = UINT16_MAX; 3008 for (i = 0; i < RT_ELEMENTS(pGip->aiCpuFromCpuSetIdx); i++) 3009 pGip->aiCpuFromCpuSetIdx[i] = UINT16_MAX; 3010 for (i = 0; i < cCpus; i++) 3011 supdrvGipInitCpu(pDevExt, pGip, &pGip->aCPUs[i], u64NanoTS); 3012 3013 /* 3014 * Link it to the device extension. 3015 */ 3016 pDevExt->pGip = pGip; 3017 pDevExt->HCPhysGip = HCPhys; 3018 pDevExt->cGipUsers = 0; 3019 } 3020 3021 3022 /** 3023 * On CPU initialization callback for RTMpOnAll. 3024 * 3025 * @param idCpu The CPU ID. 3026 * @param pvUser1 The device extension. 3027 * @param pvUser2 The GIP. 3028 */ 3029 static DECLCALLBACK(void) supdrvGipInitOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2) 3030 { 3031 /* This is good enough, even though it will update some of the globals a 3032 bit to much. */ 3033 supdrvGipMpEventOnline((PSUPDRVDEVEXT)pvUser1, idCpu); 3034 } 3035 3036 3037 /** 3038 * Invalidates the GIP data upon termination. 3039 * 3040 * @param pGip Pointer to the read-write kernel mapping of the GIP. 3041 */ 3042 static void supdrvGipTerm(PSUPGLOBALINFOPAGE pGip) 3043 { 3044 unsigned i; 3045 pGip->u32Magic = 0; 3046 for (i = 0; i < pGip->cCpus; i++) 3047 { 3048 pGip->aCPUs[i].u64NanoTS = 0; 3049 pGip->aCPUs[i].u64TSC = 0; 3050 pGip->aCPUs[i].iTSCHistoryHead = 0; 3051 pGip->aCPUs[i].u64TSCSample = 0; 3052 pGip->aCPUs[i].i64TSCDelta = INT64_MAX; 3053 } 3054 } 3055 3056 3057 /** 3058 * Worker routine for supdrvGipUpdate() and supdrvGipUpdatePerCpu() that 3059 * updates all the per cpu data except the transaction id. 3060 * 3061 * @param pDevExt The device extension. 3062 * @param pGipCpu Pointer to the per cpu data. 3063 * @param u64NanoTS The current time stamp. 3064 * @param u64TSC The current TSC. 3065 * @param iTick The current timer tick. 3066 * 3067 * @remarks Can be called with interrupts disabled! 3068 */ 3069 static void supdrvGipDoUpdateCpu(PSUPDRVDEVEXT pDevExt, PSUPGIPCPU pGipCpu, uint64_t u64NanoTS, uint64_t u64TSC, uint64_t iTick) 3070 { 3071 uint64_t u64TSCDelta; 3072 uint32_t u32UpdateIntervalTSC; 3073 uint32_t u32UpdateIntervalTSCSlack; 3074 unsigned iTSCHistoryHead; 3075 uint64_t u64CpuHz; 3076 uint32_t u32TransactionId; 3077 3078 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 3079 AssertPtrReturnVoid(pGip); 3080 3081 /* Delta between this and the previous update. */ 3082 ASMAtomicUoWriteU32(&pGipCpu->u32PrevUpdateIntervalNS, (uint32_t)(u64NanoTS - pGipCpu->u64NanoTS)); 3083 3084 /* 3085 * Update the NanoTS. 3086 */ 3087 ASMAtomicWriteU64(&pGipCpu->u64NanoTS, u64NanoTS); 3088 3089 /* 3090 * Calc TSC delta. 3091 */ 3092 u64TSCDelta = u64TSC - pGipCpu->u64TSC; 3093 ASMAtomicWriteU64(&pGipCpu->u64TSC, u64TSC); 3094 3095 /* We don't need to keep realculating the frequency when it's invariant. */ 3096 if (pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC) 3097 return; 3098 3099 if (u64TSCDelta >> 32) 3100 { 3101 u64TSCDelta = pGipCpu->u32UpdateIntervalTSC; 3102 pGipCpu->cErrors++; 3103 } 3104 3105 /* 3106 * On the 2nd and 3rd callout, reset the history with the current TSC 3107 * interval since the values entered by supdrvGipInit are totally off. 3108 * The interval on the 1st callout completely unreliable, the 2nd is a bit 3109 * better, while the 3rd should be most reliable. 3110 */ 3111 u32TransactionId = pGipCpu->u32TransactionId; 3112 if (RT_UNLIKELY( ( u32TransactionId == 5 3113 || u32TransactionId == 7) 3114 && ( iTick == 2 3115 || iTick == 3) )) 3116 { 3117 unsigned i; 3118 for (i = 0; i < RT_ELEMENTS(pGipCpu->au32TSCHistory); i++) 3119 ASMAtomicUoWriteU32(&pGipCpu->au32TSCHistory[i], (uint32_t)u64TSCDelta); 3120 } 3121 3122 /* 3123 * Validate the NanoTS deltas between timer fires with an arbitrary threshold of 0.5%. 3124 * Wait until we have at least one full history since the above history reset. The 3125 * assumption is that the majority of the previous history values will be tolerable. 3126 * See @bugref{6710} comment #67. 3127 */ 3128 if ( u32TransactionId > 23 /* 7 + (8 * 2) */ 3129 && pGip->u32Mode != SUPGIPMODE_ASYNC_TSC) 3130 { 3131 uint32_t uNanoTsThreshold = pGip->u32UpdateIntervalNS / 200; 3132 if ( pGipCpu->u32PrevUpdateIntervalNS > pGip->u32UpdateIntervalNS + uNanoTsThreshold 3133 || pGipCpu->u32PrevUpdateIntervalNS < pGip->u32UpdateIntervalNS - uNanoTsThreshold) 3134 { 3135 uint32_t u32; 3136 u32 = pGipCpu->au32TSCHistory[0]; 3137 u32 += pGipCpu->au32TSCHistory[1]; 3138 u32 += pGipCpu->au32TSCHistory[2]; 3139 u32 += pGipCpu->au32TSCHistory[3]; 3140 u32 >>= 2; 3141 u64TSCDelta = pGipCpu->au32TSCHistory[4]; 3142 u64TSCDelta += pGipCpu->au32TSCHistory[5]; 3143 u64TSCDelta += pGipCpu->au32TSCHistory[6]; 3144 u64TSCDelta += pGipCpu->au32TSCHistory[7]; 3145 u64TSCDelta >>= 2; 3146 u64TSCDelta += u32; 3147 u64TSCDelta >>= 1; 3148 } 3149 } 3150 3151 /* 3152 * TSC History. 3153 */ 3154 Assert(RT_ELEMENTS(pGipCpu->au32TSCHistory) == 8); 3155 iTSCHistoryHead = (pGipCpu->iTSCHistoryHead + 1) & 7; 3156 ASMAtomicWriteU32(&pGipCpu->iTSCHistoryHead, iTSCHistoryHead); 3157 ASMAtomicWriteU32(&pGipCpu->au32TSCHistory[iTSCHistoryHead], (uint32_t)u64TSCDelta); 3158 3159 /* 3160 * UpdateIntervalTSC = average of last 8,2,1 intervals depending on update HZ. 3161 * 3162 * On Windows, we have an occasional (but recurring) sour value that messed up 3163 * the history but taking only 1 interval reduces the precision overall. 3164 * However, this problem existed before the invariant mode was introduced. 3165 */ 3166 if ( pGip->u32Mode == SUPGIPMODE_INVARIANT_TSC 3167 || pGip->u32UpdateHz >= 1000) 3168 { 3169 uint32_t u32; 3170 u32 = pGipCpu->au32TSCHistory[0]; 3171 u32 += pGipCpu->au32TSCHistory[1]; 3172 u32 += pGipCpu->au32TSCHistory[2]; 3173 u32 += pGipCpu->au32TSCHistory[3]; 3174 u32 >>= 2; 3175 u32UpdateIntervalTSC = pGipCpu->au32TSCHistory[4]; 3176 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[5]; 3177 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[6]; 3178 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[7]; 3179 u32UpdateIntervalTSC >>= 2; 3180 u32UpdateIntervalTSC += u32; 3181 u32UpdateIntervalTSC >>= 1; 3182 3183 /* Value chosen for a 2GHz Athlon64 running linux 2.6.10/11. */ 3184 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 14; 3185 } 3186 else if (pGip->u32UpdateHz >= 90) 3187 { 3188 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta; 3189 u32UpdateIntervalTSC += pGipCpu->au32TSCHistory[(iTSCHistoryHead - 1) & 7]; 3190 u32UpdateIntervalTSC >>= 1; 3191 3192 /* value chosen on a 2GHz thinkpad running windows */ 3193 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 7; 3194 } 3195 else 3196 { 3197 u32UpdateIntervalTSC = (uint32_t)u64TSCDelta; 3198 3199 /* This value hasn't be checked yet.. waiting for OS/2 and 33Hz timers.. :-) */ 3200 u32UpdateIntervalTSCSlack = u32UpdateIntervalTSC >> 6; 3201 } 3202 ASMAtomicWriteU32(&pGipCpu->u32UpdateIntervalTSC, u32UpdateIntervalTSC + u32UpdateIntervalTSCSlack); 3203 3204 /* 3205 * CpuHz. 3206 */ 3207 u64CpuHz = ASMMult2xU32RetU64(u32UpdateIntervalTSC, RT_NS_1SEC); 3208 u64CpuHz /= pGip->u32UpdateIntervalNS; 3209 ASMAtomicWriteU64(&pGipCpu->u64CpuHz, u64CpuHz); 3210 } 3211 3212 3213 /** 3214 * Updates the GIP. 3215 * 3216 * @param pDevExt The device extension. 3217 * @param u64NanoTS The current nanosecond timesamp. 3218 * @param u64TSC The current TSC timesamp. 3219 * @param idCpu The CPU ID. 3220 * @param iTick The current timer tick. 3221 * 3222 * @remarks Can be called with interrupts disabled! 3223 */ 3224 static void supdrvGipUpdate(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, RTCPUID idCpu, uint64_t iTick) 3225 { 3226 /* 3227 * Determine the relevant CPU data. 3228 */ 3229 PSUPGIPCPU pGipCpu; 3230 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 3231 AssertPtrReturnVoid(pGip); 3232 3233 if (pGip->u32Mode != SUPGIPMODE_ASYNC_TSC) 3234 pGipCpu = &pGip->aCPUs[0]; 3235 else 3236 { 3237 unsigned iCpu = pGip->aiCpuFromApicId[ASMGetApicId()]; 3238 if (RT_UNLIKELY(iCpu >= pGip->cCpus)) 3239 return; 3240 pGipCpu = &pGip->aCPUs[iCpu]; 3241 if (RT_UNLIKELY(pGipCpu->idCpu != idCpu)) 3242 return; 3243 } 3244 3245 /* 3246 * Start update transaction. 3247 */ 3248 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1)) 3249 { 3250 /* this can happen on win32 if we're taking to long and there are more CPUs around. shouldn't happen though. */ 3251 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId)); 3252 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 3253 pGipCpu->cErrors++; 3254 return; 3255 } 3256 3257 /* 3258 * Recalc the update frequency every 0x800th time. 3259 */ 3260 if ( pGip->u32Mode != SUPGIPMODE_INVARIANT_TSC /* cuz we're not recalculating the frequency on invariants hosts. */ 3261 && !(pGipCpu->u32TransactionId & (GIP_UPDATEHZ_RECALC_FREQ * 2 - 2))) 3262 { 3263 if (pGip->u64NanoTSLastUpdateHz) 3264 { 3265 #ifdef RT_ARCH_AMD64 /** @todo fix 64-bit div here to work on x86 linux. */ 3266 uint64_t u64Delta = u64NanoTS - pGip->u64NanoTSLastUpdateHz; 3267 uint32_t u32UpdateHz = (uint32_t)((RT_NS_1SEC_64 * GIP_UPDATEHZ_RECALC_FREQ) / u64Delta); 3268 if (u32UpdateHz <= 2000 && u32UpdateHz >= 30) 3269 { 3270 /** @todo r=ramshankar: Changing u32UpdateHz might screw up TSC frequency 3271 * calculation on non-invariant hosts if it changes the history decision 3272 * taken in supdrvGipDoUpdateCpu(). */ 3273 uint64_t u64Interval = u64Delta / GIP_UPDATEHZ_RECALC_FREQ; 3274 ASMAtomicWriteU32(&pGip->u32UpdateHz, u32UpdateHz); 3275 ASMAtomicWriteU32(&pGip->u32UpdateIntervalNS, (uint32_t)u64Interval); 3276 } 3277 #endif 3278 } 3279 ASMAtomicWriteU64(&pGip->u64NanoTSLastUpdateHz, u64NanoTS | 1); 3280 } 3281 3282 /* 3283 * Update the data. 3284 */ 3285 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick); 3286 3287 /* 3288 * Complete transaction. 3289 */ 3290 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 3291 } 3292 3293 3294 /** 3295 * Updates the per cpu GIP data for the calling cpu. 3296 * 3297 * @param pDevExt The device extension. 3298 * @param u64NanoTS The current nanosecond timesamp. 3299 * @param u64TSC The current TSC timesamp. 3300 * @param idCpu The CPU ID. 3301 * @param idApic The APIC id for the CPU index. 3302 * @param iTick The current timer tick. 3303 * 3304 * @remarks Can be called with interrupts disabled! 3305 */ 3306 static void supdrvGipUpdatePerCpu(PSUPDRVDEVEXT pDevExt, uint64_t u64NanoTS, uint64_t u64TSC, 3307 RTCPUID idCpu, uint8_t idApic, uint64_t iTick) 3308 { 3309 uint32_t iCpu; 3310 PSUPGLOBALINFOPAGE pGip = pDevExt->pGip; 3311 3312 /* 3313 * Avoid a potential race when a CPU online notification doesn't fire on 3314 * the onlined CPU but the tick creeps in before the event notification is 3315 * run. 3316 */ 3317 if (RT_UNLIKELY(iTick == 1)) 3318 { 3319 iCpu = supdrvGipFindOrAllocCpuIndexForCpuId(pGip, idCpu); 3320 if (pGip->aCPUs[iCpu].enmState == SUPGIPCPUSTATE_OFFLINE) 3321 supdrvGipMpEventOnline(pDevExt, idCpu); 3322 } 3323 3324 iCpu = pGip->aiCpuFromApicId[idApic]; 3325 if (RT_LIKELY(iCpu < pGip->cCpus)) 3326 { 3327 PSUPGIPCPU pGipCpu = &pGip->aCPUs[iCpu]; 3328 if (pGipCpu->idCpu == idCpu) 3329 { 3330 /* 3331 * Start update transaction. 3332 */ 3333 if (!(ASMAtomicIncU32(&pGipCpu->u32TransactionId) & 1)) 3334 { 3335 AssertMsgFailed(("Invalid transaction id, %#x, not odd!\n", pGipCpu->u32TransactionId)); 3336 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 3337 pGipCpu->cErrors++; 3338 return; 3339 } 3340 3341 /* 3342 * Update the data. 3343 */ 3344 supdrvGipDoUpdateCpu(pDevExt, pGipCpu, u64NanoTS, u64TSC, iTick); 3345 3346 /* 3347 * Complete transaction. 3348 */ 3349 ASMAtomicIncU32(&pGipCpu->u32TransactionId); 3350 } 3351 } 3352 } 3407 return VERR_TIMEOUT; 3408 } 3409 3410 #endif /* SUPDRV_USE_TSC_DELTA_THREAD */ 3353 3411 3354 3412
Note:
See TracChangeset
for help on using the changeset viewer.