- Timestamp:
- Feb 23, 2015 2:35:28 AM (10 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp
r54357 r54365 2549 2549 /** Padding to make sure the uVar1 is in its own cache line. */ 2550 2550 uint64_t au64CacheLinePaddingBefore[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t)]; 2551 2551 2552 /** The synchronization variable, holds values GIP_TSC_DELTA_SYNC_*. */ 2552 2553 volatile uint32_t uSyncVar; 2553 /** Unused. */2554 volatile uint32_t u 32Padding;2554 /** Sequence synchronizing variable used for post 'GO' synchronization. */ 2555 volatile uint32_t uSyncSeq; 2555 2556 2556 2557 /** Padding to make sure the uVar1 is in its own cache line. */ 2557 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 1];2558 uint64_t au64CacheLinePaddingAfter[GIP_TSC_DELTA_CACHE_LINE_SIZE / sizeof(uint64_t) - 2]; 2558 2559 2559 2560 /** Start RDTSC value. Put here mainly to save stack space. */ 2560 2561 uint64_t uTscStart; 2562 /** Copy of SUPDRVGIPTSCDELTARGS::cMaxTscTicks. */ 2563 uint64_t cMaxTscTicks; 2561 2564 } SUPTSCDELTASYNC2; 2562 2565 AssertCompileSize(SUPTSCDELTASYNC2, GIP_TSC_DELTA_CACHE_LINE_SIZE * 2 + sizeof(uint64_t)); 2563 2566 typedef SUPTSCDELTASYNC2 *PSUPTSCDELTASYNC2; 2567 2568 /** Prestart wait. */ 2569 #define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe) 2570 /** Prestart aborted. */ 2571 #define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff) 2572 /** Ready (on your mark). */ 2573 #define GIP_TSC_DELTA_SYNC2_READY UINT32_C(0x1000) 2574 /** Steady (get set). */ 2575 #define GIP_TSC_DELTA_SYNC2_STEADY UINT32_C(0x1001) 2576 /** Go! */ 2577 #define GIP_TSC_DELTA_SYNC2_GO UINT32_C(0x1002) 2578 2579 /** We reached the time limit. */ 2580 #define GIP_TSC_DELTA_SYNC2_TIMEOUT UINT32_C(0x1ffe) 2581 /** The other party won't touch the sync struct ever again. */ 2582 #define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff) 2564 2583 2565 2584 … … 2610 2629 /** @name Macros that implements the basic synchronization steps common to 2611 2630 * the algorithms. 2631 * 2632 * Must be used from loop as the timeouts are implemented via 'break' statements 2633 * at the moment. 2634 * 2612 2635 * @{ 2613 2636 */ 2637 #if 1 2614 2638 #define TSCDELTA_MASTER_SYNC_BEFORE(a_pSync1, a_pMySync, a_pOtherSync) \ 2615 2639 do {\ … … 2652 2676 ASMNopPause(); \ 2653 2677 } while (0) 2678 #else 2679 2680 #if defined(DEBUG_bird) && defined(RT_OS_WINDOWS) 2681 # define TSCDELTA_DBG_VARS() uint32_t iDbgCounter 2682 # define TSCDELTA_DBG_START_LOOP() do {iDbgCounter = 0;} while (0) 2683 # define TSCDELTA_DBG_CHECK_LOOP() do { if (++iDbgCounter == 0) __debugbreak(); } while (0) 2684 #else 2685 # define TSCDELTA_DBG_VARS() ((void)0) 2686 # define TSCDELTA_DBG_START_LOOP() ((void)0) 2687 # define TSCDELTA_DBG_CHECK_LOOP() ((void)0) 2688 #endif 2689 2690 2691 static bool supdrvTscDeltaSync2_Before(PSUPTSCDELTASYNC2 pMySync, PSUPTSCDELTASYNC2 pOtherSync, 2692 bool fIsMaster, PRTCCUINTREG pfEFlags) 2693 { 2694 uint32_t iMySeq = fIsMaster ? 0 : 256; 2695 uint32_t const iMaxSeq = iMySeq + 16; /* For the last loop, darn linux/freebsd C-ishness. */ 2696 uint32_t u32Tmp; 2697 uint32_t iSync2Loops = 0; 2698 RTCCUINTREG fEFlags; 2699 TSCDELTA_DBG_VARS(); 2700 2701 *pfEFlags = X86_EFL_IF | X86_EFL_1; /* should shut up most nagging compilers. */ 2702 2703 /* 2704 * The master tells the worker to get on it's mark. 2705 */ 2706 if (fIsMaster) 2707 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY))) 2708 { /* likely*/ } 2709 else 2710 return false; 2711 2712 /* 2713 * Wait for the on your mark signal (ack in the master case). We process timeouts here. 2714 */ 2715 ASMAtomicWriteU32(&(pMySync)->uSyncSeq, 0); 2716 for (;;) 2717 { 2718 fEFlags = ASMIntDisableFlags(); 2719 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar); 2720 if (u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY) 2721 break; 2722 2723 ASMSetFlags(fEFlags); 2724 ASMNopPause(); 2725 2726 /* Abort? */ 2727 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY) 2728 break; 2729 2730 /* Check for timeouts every so often (not every loop in case RDTSC is trapping or something). */ 2731 #if 0 /* For debugging the timeout paths. */ 2732 static uint32_t volatile xxx; 2733 #endif 2734 iSync2Loops++; 2735 if ( ( (iSync2Loops & 0x3ff) == 0 2736 && ASMReadTSC() - pMySync->uTscStart > pMySync->cMaxTscTicks) 2737 #if 0 /* This is crazy, I know, but enable this code and the results are markedly better when enabled on the 1.4GHz AMD (debug). */ 2738 || (!fIsMaster && (++xxx & 0xf) == 0) 2739 #endif 2740 ) 2741 { 2742 /* Try switch our own state into timeout mode so the master cannot tell us to 'GO', 2743 ignore the timeout if we've got the go ahead already (simpler). */ 2744 if (ASMAtomicCmpXchgU32(&pMySync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_READY)) 2745 { 2746 ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_TIMEOUT, GIP_TSC_DELTA_SYNC2_STEADY); 2747 return false; 2748 } 2749 } 2750 } 2751 2752 /* 2753 * Interrupts are now disabled and will remain disabled until we do 2754 * TSCDELTA_MASTER_SYNC_AFTER / TSCDELTA_OTHER_SYNC_AFTER. 2755 */ 2756 *pfEFlags = fEFlags; 2757 2758 /* 2759 * The worker tells the master that it is on its mark and that the master 2760 * need to get into position as well. 2761 */ 2762 if (!fIsMaster) 2763 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_STEADY, GIP_TSC_DELTA_SYNC2_READY))) 2764 { /* likely */ } 2765 else 2766 { 2767 ASMSetFlags(fEFlags); 2768 return false; 2769 } 2770 2771 /* 2772 * The master sends the 'go' to the worker and wait for ACK. 2773 */ 2774 if (fIsMaster) 2775 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY))) 2776 { /* likely */ } 2777 else 2778 { 2779 ASMSetFlags(fEFlags); 2780 return false; 2781 } 2782 2783 /* 2784 * Wait for the 'go' signal (ack in the master case). 2785 */ 2786 TSCDELTA_DBG_START_LOOP(); 2787 for (;;) 2788 { 2789 u32Tmp = ASMAtomicReadU32(&pMySync->uSyncVar); 2790 if (u32Tmp == GIP_TSC_DELTA_SYNC2_GO) 2791 break; 2792 if (RT_LIKELY(u32Tmp == GIP_TSC_DELTA_SYNC2_STEADY)) 2793 { /* likely */ } 2794 else 2795 { 2796 ASMSetFlags(fEFlags); 2797 return false; 2798 } 2799 2800 TSCDELTA_DBG_CHECK_LOOP(); 2801 ASMNopPause(); 2802 } 2803 2804 /* 2805 * The worker acks the 'go' (shouldn't fail). 2806 */ 2807 if (!fIsMaster) 2808 if (RT_LIKELY(ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_GO, GIP_TSC_DELTA_SYNC2_STEADY))) 2809 { /* likely */ } 2810 else 2811 { 2812 ASMSetFlags(fEFlags); 2813 return false; 2814 } 2815 2816 /* 2817 * Try enter mostly lockstep execution with it. 2818 */ 2819 for (;;) 2820 { 2821 uint32_t iOtherSeq1, iOtherSeq2; 2822 ASMCompilerBarrier(); 2823 ASMSerializeInstruction(); 2824 2825 ASMAtomicWriteU32(&pMySync->uSyncSeq, iMySeq); 2826 ASMNopPause(); 2827 iOtherSeq1 = ASMAtomicXchgU32(&pOtherSync->uSyncSeq, iMySeq); 2828 ASMNopPause(); 2829 iOtherSeq2 = ASMAtomicReadU32(&pMySync->uSyncSeq); 2830 2831 ASMCompilerBarrier(); 2832 if (iOtherSeq1 == iOtherSeq2) 2833 return true; 2834 2835 /* Did the other guy give up? Should we give up? */ 2836 if ( iOtherSeq1 == UINT32_MAX 2837 || iOtherSeq2 == UINT32_MAX) 2838 return true; 2839 if (++iMySeq >= iMaxSeq) 2840 { 2841 ASMAtomicWriteU32(&pMySync->uSyncSeq, UINT32_MAX); 2842 return true; 2843 } 2844 ASMNopPause(); 2845 } 2846 } 2847 2848 #define TSCDELTA_MASTER_SYNC_BEFORE(a_pSync1, a_pMySync, a_pOtherSync) \ 2849 do { \ 2850 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, true /*fMaster*/, &uFlags))) \ 2851 { /*likely*/ } \ 2852 else break; \ 2853 } while (0) 2854 #define TSCDELTA_OTHER_SYNC_BEFORE(a_pSync1, a_pMySync, a_pOtherSync, a_MidSyncExpr) \ 2855 do { \ 2856 if (RT_LIKELY(supdrvTscDeltaSync2_Before(a_pMySync, a_pOtherSync, false /*fMaster*/, &uFlags))) \ 2857 { /*likely*/ } \ 2858 else break; \ 2859 } while (0) 2860 2861 #define TSCDELTA_MASTER_SYNC_AFTER(a_pSync1, a_pMySync, a_pOtherSync) \ 2862 do {\ 2863 /* \ 2864 * Wait for the worker to give us the 'ready' signal. \ 2865 */ \ 2866 uint32_t u32Tmp; \ 2867 TSCDELTA_DBG_VARS(); \ 2868 ASMSetFlags(uFlags); \ 2869 TSCDELTA_DBG_START_LOOP(); \ 2870 l_master_wait_done: \ 2871 u32Tmp = ASMAtomicReadU32(&(a_pMySync)->uSyncVar); \ 2872 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY) \ 2873 { \ 2874 ASMNopPause(); \ 2875 if (u32Tmp != GIP_TSC_DELTA_SYNC2_GO) \ 2876 break; /* shouldn't ever happen! */ \ 2877 TSCDELTA_DBG_CHECK_LOOP(); \ 2878 ASMNopPause(); \ 2879 goto l_master_wait_done; \ 2880 } \ 2881 } while (0) 2882 2883 #define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pSync1, a_pMySync, a_pOtherSync) \ 2884 do {\ 2885 /* \ 2886 * Tell the woker that we're done processing the data and ready for the next round. \ 2887 */ \ 2888 if (!ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO)) \ 2889 { \ 2890 ASMSetFlags(uFlags); \ 2891 break; \ 2892 } \ 2893 } while (0) 2894 2895 2896 #define TSCDELTA_OTHER_SYNC_AFTER(a_pSync1, a_pMySync, a_pOtherSync) \ 2897 do { \ 2898 /* \ 2899 * Tell the master that we're done and wait for the data to be processed and the next round to start. \ 2900 */ \ 2901 uint32_t u32Tmp; \ 2902 TSCDELTA_DBG_VARS(); \ 2903 if (!ASMAtomicCmpXchgU32(&(a_pOtherSync)->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_GO)) \ 2904 { \ 2905 ASMSetFlags(uFlags); \ 2906 break; \ 2907 } \ 2908 ASMSetFlags(uFlags); \ 2909 TSCDELTA_DBG_START_LOOP(); \ 2910 l_other_wait_done: \ 2911 u32Tmp = ASMAtomicReadU32(&(a_pMySync)->uSyncVar); \ 2912 if (u32Tmp != GIP_TSC_DELTA_SYNC2_READY) \ 2913 { \ 2914 ASMNopPause(); \ 2915 if (u32Tmp != GIP_TSC_DELTA_SYNC2_GO) \ 2916 break; /* shouldn't ever happen! */ \ 2917 TSCDELTA_DBG_CHECK_LOOP(); \ 2918 ASMNopPause(); \ 2919 goto l_other_wait_done; \ 2920 } \ 2921 } while (0) 2922 #endif 2654 2923 /** @} */ 2655 2924 … … 2699 2968 for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++) 2700 2969 { 2970 RTCCUINTREG uFlags; 2701 2971 if (fIsMaster) 2702 2972 { … … 2704 2974 * The master. 2705 2975 */ 2706 RTCCUINTREG uFlags;2707 2976 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD, 2708 2977 ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n", … … 2965 3234 for (iLoop = 0; iLoop < GIP_TSC_DELTA_M2_LOOPS; iLoop++) 2966 3235 { 3236 RTCCUINTREG uFlags; 2967 3237 if (fIsMaster) 2968 3238 { 2969 RTCCUINTREG uFlags;2970 2971 3239 /* 2972 3240 * Adjust the loop lag fudge. … … 3070 3338 #endif /* GIP_TSC_DELTA_METHOD_2 */ 3071 3339 3072 /** Prestart wait. */3073 #define GIP_TSC_DELTA_SYNC2_PRESTART_WAIT UINT32_C(0x0ffe)3074 /** Prestart aborted. */3075 #define GIP_TSC_DELTA_SYNC2_PRESTART_ABORT UINT32_C(0x0fff)3076 /** Start testing. */3077 #define GIP_TSC_DELTA_SYNC2_START UINT32_C(0x1000)3078 3079 3080 /** The other party won't touch the sync struct ever again. */3081 #define GIP_TSC_DELTA_SYNC2_FINAL UINT32_C(0x1fff)3082 3083 3340 3084 3341 … … 3169 3426 ASMAtomicWritePtr(ppMySync, &MySync); 3170 3427 MySync.uTscStart = ASMReadTSC(); 3428 MySync.cMaxTscTicks = pArgs->cMaxTscTicks; 3171 3429 3172 3430 /* Look for the partner, might not be here yet... Special abort considerations. */ … … 3189 3447 /* I found my partner, waiting to be found... Special abort considerations. */ 3190 3448 if (fIsMaster) 3191 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_ START, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */3449 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* parnaoia */ 3192 3450 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/); 3193 3451 … … 3212 3470 3213 3471 if (!fIsMaster) 3214 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_ START, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */3472 if (!ASMAtomicCmpXchgU32(&pOtherSync->uSyncVar, GIP_TSC_DELTA_SYNC2_READY, GIP_TSC_DELTA_SYNC2_PRESTART_WAIT)) /* race #1 */ 3215 3473 return supdrvMeasureTscDeltaCallbackAbortSyncSetup(pArgs, &MySync, fIsMaster, false /*fTimeout*/); 3216 3474 … … 3221 3479 for (iTry = 0; iTry < 12; iTry++) 3222 3480 { 3223 if (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_ START)3481 if (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_READY) 3224 3482 break; 3225 3483 … … 3234 3492 # error "huh??" 3235 3493 #endif 3236 if (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_ START)3494 if (ASMAtomicReadU32(&MySync.uSyncVar) != GIP_TSC_DELTA_SYNC2_READY) 3237 3495 break; 3238 3496 … … 3333 3591 * try pick a different master. (This fudge only works with multi core systems.) 3334 3592 * ASSUMES related threads have adjacent APIC IDs. ASSUMES two threads per core. 3593 * 3594 * We skip this on AMDs for now as their HTT is different from intel's and 3595 * it doesn't seem to have any favorable effect on the results. 3596 * 3597 * If the master is offline, we need a new master too, so share the code. 3335 3598 */ 3336 3599 iGipCpuMaster = supdrvGipFindCpuIndexForCpuId(pGip, idMaster); 3337 3600 AssertReturn(iGipCpuMaster < pGip->cCpus, VERR_INVALID_CPU_ID); 3338 3601 pGipCpuMaster = &pGip->aCPUs[iGipCpuMaster]; 3339 if ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1) 3340 && ASMHasCpuId() 3341 && ASMIsValidStdRange(ASMCpuId_EAX(0)) 3342 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT) 3343 && pGip->cOnlineCpus > 2) 3602 if ( ( (pGipCpuMaster->idApic & ~1) == (pGipCpuWorker->idApic & ~1) 3603 && ASMHasCpuId() 3604 && ASMIsValidStdRange(ASMCpuId_EAX(0)) 3605 && (ASMCpuId_EDX(1) & X86_CPUID_FEATURE_EDX_HTT) 3606 && !ASMIsAmdCpu() 3607 && pGip->cOnlineCpus > 2) 3608 || !RTMpIsCpuOnline(idMaster) ) 3344 3609 { 3345 3610 uint32_t i; … … 3353 3618 && pGip->aCPUs[i].idCpu != pGipCpuWorker->idCpu 3354 3619 && pGip->aCPUs[i].idApic != pGipCpuWorker->idApic 3355 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic) 3620 && pGip->aCPUs[i].idApic != pGipCpuMaster->idApic 3621 && RTMpIsCpuOnline(pGip->aCPUs[i].idCpu)) 3356 3622 { 3357 3623 iGipCpuMaster = i; … … 3365 3631 * Set the master TSC as the initiator. This serializes delta measurments. 3366 3632 */ 3633 /** @todo We can use a mutex or five for this now, and move it up before we 3634 * do the HTT/offline-master stuff. */ 3367 3635 while (!ASMAtomicCmpXchgU32(&pDevExt->idTscDeltaInitiator, idMaster, NIL_RTCPUID)) 3368 3636 { … … 3381 3649 * Initialize data package for the RTMpOnAll callback. 3382 3650 */ 3651 /** @todo this must be allocated, not residing on the stack. */ 3383 3652 SUPDRVGIPTSCDELTARGS Args; 3384 3653 RT_ZERO(Args); … … 3410 3679 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP); 3411 3680 3681 /** @todo Add RTMpOnPair and replace this ineffecient broadcast IPI. */ 3412 3682 rc = RTMpOnAll(supdrvMeasureTscDeltaCallback, &Args, NULL); 3413 3683 if (RT_SUCCESS(rc)) … … 3437 3707 rc = VERR_SUPDRV_TSC_DELTA_MEASUREMENT_FAILED; 3438 3708 } 3709 /** @todo return try-again if we get an offline CPU error. */ 3439 3710 } 3440 3711 … … 3696 3967 { 3697 3968 PSUPGIPCPU pGipCpuWorker = &pGip->aCPUs[iCpu]; 3698 if ( pGipCpuWorker->i64TSCDelta == INT64_MAX 3699 && RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet)) 3969 if (RTCpuSetIsMemberByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet)) 3700 3970 { 3701 rc |= supdrvMeasureTscDeltaOne(pDevExt, iCpu); 3971 if (pGipCpuWorker->i64TSCDelta == INT64_MAX) 3972 { 3973 int rc2 = supdrvMeasureTscDeltaOne(pDevExt, iCpu); 3974 if (RT_FAILURE(rc2) && RT_SUCCESS(rc)) 3975 rc = rc2; 3976 } 3977 else 3978 { 3979 /* 3980 * The thread/someone must've called SUPR0TscDeltaMeasureBySetIndex, 3981 * mark the delta as fine to get the timer thread off our back. 3982 */ 3983 RTCpuSetDelByIndex(&pDevExt->TscDeltaCpuSet, pGipCpuWorker->iCpuSet); 3984 RTCpuSetAddByIndex(&pDevExt->TscDeltaObtainedCpuSet, pGipCpuWorker->iCpuSet); 3985 } 3702 3986 } 3703 3987 }
Note:
See TracChangeset
for help on using the changeset viewer.