VirtualBox

Changeset 54342 in vbox


Ignore:
Timestamp:
Feb 20, 2015 7:14:56 PM (10 years ago)
Author:
vboxsync
Message:

SUPDrvGip.cpp: First part of multiple tsc delta algorithm reorg.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/HostDrivers/Support/SUPDrvGip.cpp

    r54339 r54342  
    22472247
    22482248
    2249 #ifdef GIP_TSC_DELTA_METHOD_2
    2250 
    22512249/**
    22522250 * TSC delta measurment algorithm \#2 result entry.
     
    22782276typedef SUPDRVTSCDELTAMETHOD2 *PSUPDRVTSCDELTAMETHOD2;
    22792277
    2280 #endif /* GIP_TSC_DELTA_METHOD_2 */
    22812278
    22822279/**
     
    22902287    PSUPGIPCPU              pMaster;
    22912288    RTCPUID                 idMaster;
     2289
     2290#if 0
     2291    /** Method 1 data. */
     2292    struct
     2293    {
     2294    } M1;
     2295#endif
     2296
    22922297#ifdef GIP_TSC_DELTA_METHOD_2
    2293     PSUPDRVTSCDELTAMETHOD2  pMasterData;
    2294     PSUPDRVTSCDELTAMETHOD2  pWorkerData;
    2295     uint32_t                cHits;
    2296     /*uint32_t                cOffByOne;*/
    2297     uint32_t                iAttempt;       /**< 1-base outer loop counter. */
    2298     bool                    fLagMaster;
    2299     bool                    fLagWorker;
     2298    struct
     2299    {
     2300        PSUPDRVTSCDELTAMETHOD2  pMasterData;
     2301        PSUPDRVTSCDELTAMETHOD2  pWorkerData;
     2302        uint32_t                cHits;
     2303        /*uint32_t                cOffByOne;*/
     2304        bool                    fLagMaster;
     2305        bool                    fLagWorker;
     2306    } M2;
    23002307#endif
    23012308} SUPDRVGIPTSCDELTARGS;
    23022309typedef SUPDRVGIPTSCDELTARGS *PSUPDRVGIPTSCDELTARGS;
     2310
     2311
     2312/** @name Macros that implements the basic synchronization steps common to
     2313 *        the algorithms.
     2314 * @{
     2315 */
     2316#define TSCDELTA_MASTER_SYNC_BEFORE(a_pTscDeltaSync) \
     2317    do {\
     2318        ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_START); \
     2319        \
     2320        /* Disable interrupts only in the master for as short a period \
     2321           as possible, thanks again to Windows. See @bugref{6710} comment #73. */ \
     2322        uFlags = ASMIntDisableFlags(); \
     2323        \
     2324        while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) == GIP_TSC_DELTA_SYNC_START) \
     2325        { /* nothing */ } \
     2326    } while (0)
     2327#define TSCDELTA_MASTER_SYNC_AFTER(a_pTscDeltaSync) \
     2328    do {\
     2329        /* Sync up with worker. */ \
     2330        ASMSetFlags(uFlags); \
     2331        \
     2332        while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE) \
     2333        { /* nothing */ } \
     2334    } while (0)
     2335#define TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(a_pTscDeltaSync) \
     2336    do {\
     2337        ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_STOP); \
     2338    } while (0)
     2339
     2340#define TSCDELTA_OTHER_SYNC_BEFORE(a_pTscDeltaSync, a_MidSyncExpr) \
     2341    do { \
     2342        while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) != GIP_TSC_DELTA_SYNC_START) \
     2343        { /* nothing */ } \
     2344        a_MidSyncExpr; \
     2345        ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_WORKER_READY); \
     2346    } while (0)
     2347#define TSCDELTA_OTHER_SYNC_AFTER(a_pTscDeltaSync) \
     2348    do { \
     2349        /* Tell master we're done collecting our data. */ \
     2350        ASMAtomicWriteU32(&(a_pTscDeltaSync)->u, GIP_TSC_DELTA_SYNC_WORKER_DONE); \
     2351        \
     2352        /* Wait for the master to process the data. */ \
     2353        while (ASMAtomicReadU32(&(a_pTscDeltaSync)->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE) \
     2354            ASMNopPause(); \
     2355    } while (0)
     2356/** @} */
     2357
     2358#ifdef GIP_TSC_DELTA_METHOD_1
     2359
     2360/**
     2361 * TSC delta measurment algorithm \#1 (GIP_TSC_DELTA_METHOD_1).
     2362 *
     2363 *
     2364 * We ignore the first few runs of the loop in order to prime the
     2365 * cache. Also, we need to be careful about using 'pause' instruction
     2366 * in critical busy-wait loops in this code - it can cause undesired
     2367 * behaviour with hyperthreading.
     2368 *
     2369 * We try to minimize the measurement error by computing the minimum
     2370 * read time of the compare statement in the worker by taking TSC
     2371 * measurements across it.
     2372 *
     2373 * It must be noted that the computed minimum read time is mostly to
     2374 * eliminate huge deltas when the worker is too early and doesn't by
     2375 * itself help produce more accurate deltas. We allow two times the
     2376 * computed minimum as an arbibtrary acceptable threshold. Therefore,
     2377 * it is still possible to get negative deltas where there are none
     2378 * when the worker is earlier. As long as these occasional negative
     2379 * deltas are lower than the time it takes to exit guest-context and
     2380 * the OS to reschedule EMT on a different CPU we won't expose a TSC
     2381 * that jumped backwards. It is because of the existence of the
     2382 * negative deltas we don't recompute the delta with the master and
     2383 * worker interchanged to eliminate the remaining measurement error.
     2384 *
     2385 *
     2386 * @param   pArgs               The argument/state data.
     2387 * @param   pSync               The synchronization structure
     2388 *                              (pDevExt->pTscDeltaSync).
     2389 * @param   fIsMaster           Set if master, clear if worker.
     2390 * @param   iTry                The attempt number.
     2391 */
     2392static void supdrvTscDeltaMethod1Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC pSync, bool fIsMaster, uint32_t iTry)
     2393{
     2394    PSUPGIPCPU  pGipCpuWorker   = pArgs->pWorker;
     2395    PSUPGIPCPU  pGipCpuMaster   = pArgs->pMaster;
     2396    uint64_t    uMinCmpReadTime = UINT64_MAX;
     2397    unsigned    iLoop;
     2398    NOREF(iTry);
     2399
     2400    for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
     2401    {
     2402        if (fIsMaster)
     2403        {
     2404            /*
     2405             * The master.
     2406             */
     2407            RTCCUINTREG uFlags;
     2408            AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
     2409                      ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
     2410                       pGipCpuMaster->u64TSCSample, pGipCpuMaster->idCpu, pGipCpuWorker->idCpu, pArgs->pDevExt->idGipMaster));
     2411            TSCDELTA_MASTER_SYNC_BEFORE(pSync);
     2412
     2413            do
     2414            {
     2415                ASMSerializeInstruction();
     2416                ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
     2417            } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
     2418
     2419            TSCDELTA_MASTER_SYNC_AFTER(pSync);
     2420
     2421            /* Process the data. */
     2422            if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
     2423            {
     2424                if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
     2425                {
     2426                    int64_t iDelta = pGipCpuWorker->u64TSCSample
     2427                                   - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
     2428                    if (  iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
     2429                        ? iDelta < pGipCpuWorker->i64TSCDelta
     2430                        : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
     2431                        pGipCpuWorker->i64TSCDelta = iDelta;
     2432                }
     2433            }
     2434
     2435            /* Reset our TSC sample and tell the worker to move on. */
     2436            ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
     2437            TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pSync);
     2438        }
     2439        else
     2440        {
     2441            /*
     2442             * The worker.
     2443             */
     2444            uint64_t uTscWorker;
     2445            uint64_t uTscWorkerFlushed;
     2446            uint64_t uCmpReadTime;
     2447
     2448            ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample);     /* Warm the cache line. */
     2449            TSCDELTA_OTHER_SYNC_BEFORE(pSync, Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD));
     2450
     2451            /*
     2452             * Keep reading the TSC until we notice that the master has read his. Reading
     2453             * the TSC -after- the master has updated the memory is way too late. We thus
     2454             * compensate by trying to measure how long it took for the worker to notice
     2455             * the memory flushed from the master.
     2456             */
     2457            do
     2458            {
     2459                ASMSerializeInstruction();
     2460                uTscWorker = ASMReadTSC();
     2461            } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
     2462            ASMSerializeInstruction();
     2463            uTscWorkerFlushed = ASMReadTSC();
     2464
     2465            uCmpReadTime = uTscWorkerFlushed - uTscWorker;
     2466            if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
     2467            {
     2468                /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
     2469                if (uCmpReadTime < (uMinCmpReadTime << 1))
     2470                {
     2471                    ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
     2472                    if (uCmpReadTime < uMinCmpReadTime)
     2473                        uMinCmpReadTime = uCmpReadTime;
     2474                }
     2475                else
     2476                    ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
     2477            }
     2478            else if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
     2479            {
     2480                if (uCmpReadTime < uMinCmpReadTime)
     2481                    uMinCmpReadTime = uCmpReadTime;
     2482            }
     2483
     2484            TSCDELTA_OTHER_SYNC_AFTER(pSync);
     2485        }
     2486    }
     2487
     2488    /*
     2489     * We must reset the worker TSC sample value in case it gets picked as a
     2490     * GIP master later on (it's trashed above, naturally).
     2491     */
     2492    if (!fIsMaster)
     2493        ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
     2494}
     2495
     2496
     2497/**
     2498 * Initializes the argument/state data belonging to algorithm \#1.
     2499 *
     2500 * @returns VBox status code.
     2501 * @param   pArgs               The argument/state data.
     2502 */
     2503static int supdrvTscDeltaMethod1Init(PSUPDRVGIPTSCDELTARGS pArgs)
     2504{
     2505    NOREF(pArgs);
     2506    return VINF_SUCCESS;
     2507}
     2508
     2509
     2510/**
     2511 * Undoes what supdrvTscDeltaMethod1Init() did.
     2512 *
     2513 * @param   pArgs               The argument/state data.
     2514 */
     2515static void supdrvTscDeltaMethod1Delete(PSUPDRVGIPTSCDELTARGS pArgs)
     2516{
     2517    NOREF(pArgs);
     2518}
     2519
     2520#endif /* GIP_TSC_DELTA_METHOD_1 */
    23032521
    23042522
     
    23132531# define GIP_TSC_DELTA_PRIMER_LOOPS      1
    23142532# define GIP_TSC_DELTA_READ_TIME_LOOPS   GIP_TSC_DELTA_PRIMER_LOOPS /* no read-time-loops necessary */
    2315 
    2316 
    2317 static int supdrvTscDeltaMethod2Init(PSUPDRVGIPTSCDELTARGS pArgs)
    2318 {
    2319     uint32_t const fFlags = /*RTMEMALLOCEX_FLAGS_ANY_CTX |*/ RTMEMALLOCEX_FLAGS_ZEROED;
    2320     int rc = RTMemAllocEx(sizeof(*pArgs->pMasterData), 0, fFlags, (void **)&pArgs->pWorkerData);
    2321     if (RT_SUCCESS(rc))
    2322         rc = RTMemAllocEx(sizeof(*pArgs->pMasterData), 0, fFlags, (void **)&pArgs->pMasterData);
    2323     return rc;
    2324 }
    2325 
    2326 
    2327 static void supdrvTscDeltaMethod2Term(PSUPDRVGIPTSCDELTARGS pArgs)
    2328 {
    2329     RTMemFreeEx(pArgs->pMasterData, sizeof(*pArgs->pMasterData));
    2330     RTMemFreeEx(pArgs->pWorkerData, sizeof(*pArgs->pWorkerData));
    2331     /*SUPR0Printf("cHits=%d cOffByOne=%d m=%d w=%d\n", pArgs->cHits, pArgs->cOffByOne, pArgs->pMaster->idApic, pArgs->pWorker->idApic);*/
    2332 }
    2333 
    2334 
    2335 static void supdrvTscDeltaMethod2Looped(PSUPDRVGIPTSCDELTARGS pArgs, RTCPUID idCpu, unsigned iLoop)
    2336 {
    2337     if (pArgs->idMaster == idCpu)
    2338     {
    2339         if (iLoop < GIP_TSC_DELTA_PRIMER_LOOPS)
    2340         {
    2341             if (iLoop == 0)
    2342                 pArgs->iAttempt++;
    2343 
    2344             /* Lag during the priming to be nice to everyone.. */
    2345             pArgs->fLagMaster = true;
    2346             pArgs->fLagWorker = true;
    2347         }
    2348         else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4)
    2349         {
    2350             /* 25 % of the body without lagging. */
    2351             pArgs->fLagMaster = false;
    2352             pArgs->fLagWorker = false;
    2353         }
    2354         else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4 * 2)
    2355         {
    2356             /* 25 % of the body with both lagging. */
    2357             pArgs->fLagMaster = true;
    2358             pArgs->fLagWorker = true;
    2359         }
    2360         else
    2361         {
    2362             /* 50% of the body with alternating lag. */
    2363             pArgs->fLagMaster = (iLoop & 1) == 0;
    2364             pArgs->fLagWorker = (iLoop & 1) == 1;
    2365         }
    2366     }
    2367 }
    2368 
    2369 
    2370 /**
    2371  * The core function of the 2nd TSC delta mesurment algorithm.
    2372  *
    2373  * The idea here is that we have the two CPUs execute the exact same code
    2374  * collecting a largish set of TSC samples.  The code has one data dependency on
    2375  * the other CPU which intention it is to synchronize the execution as well as
    2376  * help cross references the two sets of TSC samples (the sequence numbers).
    2377  *
    2378  * The @a fLag parameter is used to modify the execution a tiny bit on one or
    2379  * both of the CPUs.  When @a fLag differs between the CPUs, it is thought that
    2380  * it will help with making the CPUs enter lock step execution occationally.
    2381  *
    2382  */
    2383 static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
    2384 {
    2385     SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
    2386     uint32_t                    cLeft  = RT_ELEMENTS(pMyData->aResults);
    2387 
    2388     ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
    2389     ASMSerializeInstruction();
    2390     while (cLeft-- > 0)
    2391     {
    2392         uint64_t uTsc;
    2393         uint32_t iSeqMine  = ASMAtomicIncU32(&pMyData->iCurSeqNo);
    2394         uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
    2395         ASMCompilerBarrier();
    2396         ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
    2397         uTsc = ASMReadTSC();
    2398         ASMAtomicIncU32(&pMyData->iCurSeqNo);
    2399         ASMCompilerBarrier();
    2400         ASMSerializeInstruction();
    2401         pEntry->iSeqMine  = iSeqMine;
    2402         pEntry->iSeqOther = iSeqOther;
    2403         pEntry->uTsc      = uTsc;
    2404         pEntry++;
    2405         ASMSerializeInstruction();
    2406         if (fLag)
    2407             ASMNopPause();
    2408     }
    2409 }
    24102533
    24112534
     
    24632586    if (cHits > 0)
    24642587        *piWorkerTscDelta = iBestDelta;
    2465     pArgs->cHits     += cHits;
     2588    pArgs->M2.cHits     += cHits;
    24662589#if 0
    2467     pArgs->cOffByOne += cOffByOne;
     2590    pArgs->M2.cOffByOne += cOffByOne;
    24682591#endif
    24692592}
    24702593
    24712594
    2472 static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs, bool fFinalLoop)
     2595static void supdrvTscDeltaMethod2ProcessDataOnMaster(PSUPDRVGIPTSCDELTARGS pArgs)
    24732596{
    24742597    supdrvTscDeltaMethod2ProcessDataSet(pArgs,
    2475                                         pArgs->pMasterData,
     2598                                        pArgs->M2.pMasterData,
    24762599                                        true /*fIsMaster*/,
    2477                                         RT_ELEMENTS(pArgs->pMasterData->aResults),
    2478                                         pArgs->pWorkerData,
     2600                                        RT_ELEMENTS(pArgs->M2.pMasterData->aResults),
     2601                                        pArgs->M2.pWorkerData,
    24792602                                        pArgs->pMaster->i64TSCDelta,
    24802603                                        &pArgs->pWorker->i64TSCDelta);
    24812604
    24822605    supdrvTscDeltaMethod2ProcessDataSet(pArgs,
    2483                                         pArgs->pWorkerData,
     2606                                        pArgs->M2.pWorkerData,
    24842607                                        false /*fIsMaster*/,
    2485                                         ASMAtomicReadU32(&pArgs->pWorkerData->iCurSeqNo) >> 1,
    2486                                         pArgs->pMasterData,
     2608                                        ASMAtomicReadU32(&pArgs->M2.pWorkerData->iCurSeqNo) >> 1,
     2609                                        pArgs->M2.pMasterData,
    24872610                                        pArgs->pMaster->i64TSCDelta,
    24882611                                        &pArgs->pWorker->i64TSCDelta);
    24892612}
     2613
     2614
     2615
     2616/**
     2617 * The core function of the 2nd TSC delta mesurment algorithm.
     2618 *
     2619 * The idea here is that we have the two CPUs execute the exact same code
     2620 * collecting a largish set of TSC samples.  The code has one data dependency on
     2621 * the other CPU which intention it is to synchronize the execution as well as
     2622 * help cross references the two sets of TSC samples (the sequence numbers).
     2623 *
     2624 * The @a fLag parameter is used to modify the execution a tiny bit on one or
     2625 * both of the CPUs.  When @a fLag differs between the CPUs, it is thought that
     2626 * it will help with making the CPUs enter lock step execution occationally.
     2627 *
     2628 */
     2629static void supdrvTscDeltaMethod2CollectData(PSUPDRVTSCDELTAMETHOD2 pMyData, uint32_t volatile *piOtherSeqNo, bool fLag)
     2630{
     2631    SUPDRVTSCDELTAMETHOD2ENTRY *pEntry = &pMyData->aResults[0];
     2632    uint32_t                    cLeft  = RT_ELEMENTS(pMyData->aResults);
     2633
     2634    ASMAtomicWriteU32(&pMyData->iCurSeqNo, 0);
     2635    ASMSerializeInstruction();
     2636    while (cLeft-- > 0)
     2637    {
     2638        uint64_t uTsc;
     2639        uint32_t iSeqMine  = ASMAtomicIncU32(&pMyData->iCurSeqNo);
     2640        uint32_t iSeqOther = ASMAtomicReadU32(piOtherSeqNo);
     2641        ASMCompilerBarrier();
     2642        ASMSerializeInstruction(); /* Way better result than with ASMMemoryFenceSSE2() in this position! */
     2643        uTsc = ASMReadTSC();
     2644        ASMAtomicIncU32(&pMyData->iCurSeqNo);
     2645        ASMCompilerBarrier();
     2646        ASMSerializeInstruction();
     2647        pEntry->iSeqMine  = iSeqMine;
     2648        pEntry->iSeqOther = iSeqOther;
     2649        pEntry->uTsc      = uTsc;
     2650        pEntry++;
     2651        ASMSerializeInstruction();
     2652        if (fLag)
     2653            ASMNopPause();
     2654    }
     2655}
     2656
     2657
     2658/**
     2659 * TSC delta measurment algorithm \#2 (GIP_TSC_DELTA_METHOD_2).
     2660 *
     2661 * See supdrvTscDeltaMethod2CollectData for algorithm details.
     2662 *
     2663 * @param   pArgs               The argument/state data.
     2664 * @param   pSync               The synchronization structure
     2665 *                              (pDevExt->pTscDeltaSync).
     2666 * @param   fIsMaster           Set if master, clear if worker.
     2667 * @param   iTry                The attempt number.
     2668 */
     2669static void supdrvTscDeltaMethod2Loop(PSUPDRVGIPTSCDELTARGS pArgs, PSUPTSCDELTASYNC pSync, bool fIsMaster, uint32_t iTry)
     2670{
     2671    unsigned iLoop;
     2672    for (iLoop = 0; iLoop < GIP_TSC_DELTA_LOOPS; iLoop++)
     2673    {
     2674        if (fIsMaster)
     2675        {
     2676            RTCCUINTREG uFlags;
     2677
     2678            /*
     2679             * Adjust the loop lag fudge.
     2680             */
     2681            if (iLoop < GIP_TSC_DELTA_PRIMER_LOOPS)
     2682            {
     2683                /* Lag during the priming to be nice to everyone.. */
     2684                pArgs->M2.fLagMaster = true;
     2685                pArgs->M2.fLagWorker = true;
     2686            }
     2687            else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4)
     2688            {
     2689                /* 25 % of the body without lagging. */
     2690                pArgs->M2.fLagMaster = false;
     2691                pArgs->M2.fLagWorker = false;
     2692            }
     2693            else if (iLoop < (GIP_TSC_DELTA_LOOPS - GIP_TSC_DELTA_PRIMER_LOOPS) / 4 * 2)
     2694            {
     2695                /* 25 % of the body with both lagging. */
     2696                pArgs->M2.fLagMaster = true;
     2697                pArgs->M2.fLagWorker = true;
     2698            }
     2699            else
     2700            {
     2701                /* 50% of the body with alternating lag. */
     2702                pArgs->M2.fLagMaster = (iLoop & 1) == 0;
     2703                pArgs->M2.fLagWorker = (iLoop & 1) == 1;
     2704            }
     2705
     2706            /*
     2707             * Sync up with the worker and collect data.
     2708             */
     2709            TSCDELTA_MASTER_SYNC_BEFORE(pSync);
     2710            supdrvTscDeltaMethod2CollectData(pArgs->M2.pMasterData, &pArgs->M2.pWorkerData->iCurSeqNo, pArgs->M2.fLagMaster);
     2711            TSCDELTA_MASTER_SYNC_AFTER(pSync);
     2712
     2713            /*
     2714             * Process the data.
     2715             */
     2716            if (iLoop > GIP_TSC_DELTA_PRIMER_LOOPS)
     2717                supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs);
     2718
     2719            TSCDELTA_MASTER_KICK_OTHER_OUT_OF_AFTER(pSync);
     2720        }
     2721        else
     2722        {
     2723            /*
     2724             * The worker.
     2725             */
     2726            TSCDELTA_OTHER_SYNC_BEFORE(pSync, (void)0);
     2727            supdrvTscDeltaMethod2CollectData(pArgs->M2.pWorkerData, &pArgs->M2.pMasterData->iCurSeqNo, pArgs->M2.fLagWorker);
     2728            TSCDELTA_OTHER_SYNC_AFTER(pSync);
     2729        }
     2730    }
     2731}
     2732
     2733
     2734/**
     2735 * Initializes the argument/state data belonging to algorithm \#2.
     2736 *
     2737 * @returns VBox status code.
     2738 * @param   pArgs               The argument/state data.
     2739 */
     2740static int supdrvTscDeltaMethod2Init(PSUPDRVGIPTSCDELTARGS pArgs)
     2741{
     2742    pArgs->M2.pMasterData = NULL;
     2743    pArgs->M2.pWorkerData = NULL;
     2744
     2745    uint32_t const fFlags = /*RTMEMALLOCEX_FLAGS_ANY_CTX |*/ RTMEMALLOCEX_FLAGS_ZEROED;
     2746    int rc = RTMemAllocEx(sizeof(*pArgs->M2.pWorkerData), 0, fFlags, (void **)&pArgs->M2.pWorkerData);
     2747    if (RT_SUCCESS(rc))
     2748        rc = RTMemAllocEx(sizeof(*pArgs->M2.pMasterData), 0, fFlags, (void **)&pArgs->M2.pMasterData);
     2749    return rc;
     2750}
     2751
     2752
     2753/**
     2754 * Undoes what supdrvTscDeltaMethod2Init() did.
     2755 *
     2756 * @param   pArgs               The argument/state data.
     2757 */
     2758static void supdrvTscDeltaMethod2Delete(PSUPDRVGIPTSCDELTARGS pArgs)
     2759{
     2760    RTMemFreeEx(pArgs->M2.pMasterData, sizeof(*pArgs->M2.pMasterData));
     2761    RTMemFreeEx(pArgs->M2.pWorkerData, sizeof(*pArgs->M2.pWorkerData));
     2762    /*SUPR0Printf("cHits=%d cOffByOne=%d m=%d w=%d\n", pArgs->M2.cHits, pArgs->M2.cOffByOne, pArgs->pMaster->idApic, pArgs->pWorker->idApic);*/
     2763}
     2764
    24902765
    24912766#endif /* GIP_TSC_DELTA_METHOD_2 */
     
    25052780 *          contention, SMI, pipelining etc. there is no guaranteed way of
    25062781 *          doing this on x86 CPUs.
    2507  *
    2508  *          GIP_TSC_DELTA_METHOD_1:
    2509  *          We ignore the first few runs of the loop in order to prime the
    2510  *          cache. Also, we need to be careful about using 'pause' instruction
    2511  *          in critical busy-wait loops in this code - it can cause undesired
    2512  *          behaviour with hyperthreading.
    2513  *
    2514  *          We try to minimize the measurement error by computing the minimum
    2515  *          read time of the compare statement in the worker by taking TSC
    2516  *          measurements across it.
    2517  *
    2518  *          It must be noted that the computed minimum read time is mostly to
    2519  *          eliminate huge deltas when the worker is too early and doesn't by
    2520  *          itself help produce more accurate deltas. We allow two times the
    2521  *          computed minimum as an arbibtrary acceptable threshold. Therefore,
    2522  *          it is still possible to get negative deltas where there are none
    2523  *          when the worker is earlier. As long as these occasional negative
    2524  *          deltas are lower than the time it takes to exit guest-context and
    2525  *          the OS to reschedule EMT on a different CPU we won't expose a TSC
    2526  *          that jumped backwards. It is because of the existence of the
    2527  *          negative deltas we don't recompute the delta with the master and
    2528  *          worker interchanged to eliminate the remaining measurement error.
    2529  *
    2530  *          For GIP_TSC_DELTA_METHOD_2, see supdrvTscDeltaMethod2CollectData.
    25312782 */
    25322783static DECLCALLBACK(void) supdrvMeasureTscDeltaCallback(RTCPUID idCpu, void *pvUser1, void *pvUser2)
    25332784{
    2534     PSUPDRVGIPTSCDELTARGS pArgs = (PSUPDRVGIPTSCDELTARGS)pvUser1;
    2535     PSUPDRVDEVEXT      pDevExt          = pArgs->pDevExt;
    2536     PSUPGIPCPU         pGipCpuWorker    = pArgs->pWorker;
    2537     PSUPGIPCPU         pGipCpuMaster    = pArgs->pMaster;
    2538     RTCPUID            idMaster         = pArgs->idMaster;
    2539     int                cTriesLeft;
     2785    PSUPDRVGIPTSCDELTARGS   pArgs = (PSUPDRVGIPTSCDELTARGS)pvUser1;
     2786    PSUPDRVDEVEXT           pDevExt          = pArgs->pDevExt;
     2787    PSUPTSCDELTASYNC        pSync            = pDevExt->pTscDeltaSync;
     2788    PSUPGIPCPU              pGipCpuWorker    = pArgs->pWorker;
     2789    PSUPGIPCPU              pGipCpuMaster    = pArgs->pMaster;
     2790    RTCPUID                 idMaster         = pArgs->idMaster;
     2791    uint32_t                iTry;
    25402792
    25412793    /* A bit of paranoia first. */
     
    25972849
    25982850    /*
    2599      * ...
     2851     * Retry loop.
    26002852     */
    26012853    Assert(pGipCpuWorker->i64TSCDelta == INT64_MAX);
    2602     cTriesLeft = 12;
    2603     while (cTriesLeft-- > 0)
    2604     {
    2605         unsigned i;
    2606         uint64_t uMinCmpReadTime = UINT64_MAX;
    2607         for (i = 0; i < GIP_TSC_DELTA_LOOPS; i++)
    2608         {
    2609 #ifdef GIP_TSC_DELTA_METHOD_2
    2610             supdrvTscDeltaMethod2Looped(pArgs, idCpu, i);
     2854    for (iTry = 0; iTry < 12; iTry++)
     2855    {
     2856        /*
     2857         * Do the measurements.
     2858         */
     2859#ifdef GIP_TSC_DELTA_METHOD_1
     2860        supdrvTscDeltaMethod1Loop(pArgs, pSync, idCpu == idMaster, iTry);
     2861#elif defined(GIP_TSC_DELTA_METHOD_2)
     2862        supdrvTscDeltaMethod2Loop(pArgs, pSync, idCpu == idMaster, iTry);
     2863#else
     2864# error "huh??"
    26112865#endif
    2612             if (idCpu == idMaster)
    2613             {
    2614                 /*
    2615                  * The master.
    2616                  */
    2617                 RTCCUINTREG uFlags;
    2618                 AssertMsg(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD,
    2619                           ("%#llx idMaster=%#x idWorker=%#x (idGipMaster=%#x)\n",
    2620                            pGipCpuMaster->u64TSCSample, idMaster, pGipCpuWorker->idCpu, pDevExt->idGipMaster));
    2621                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_START);
    2622 
    2623                 /* Disable interrupts only in the master for as short a period
    2624                    as possible, thanks again to Windows. See @bugref{6710} comment #73. */
    2625                 uFlags = ASMIntDisableFlags();
    2626 
    2627                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_START)
    2628                 { /* nothing */ }
    2629 
    2630 #ifdef GIP_TSC_DELTA_METHOD_1
    2631                 do
    2632                 {
    2633                     ASMSerializeInstruction();
    2634                     ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, ASMReadTSC());
    2635                 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
    2636 
    2637 #elif defined(GIP_TSC_DELTA_METHOD_2)
    2638                 supdrvTscDeltaMethod2CollectData(pArgs->pMasterData, &pArgs->pWorkerData->iCurSeqNo, pArgs->fLagMaster);
    2639 #else
    2640 # error "tsc delta method not selected"
    2641 #endif
    2642 
    2643                 /* Sync up with worker. */
    2644                 ASMSetFlags(uFlags);
    2645 
    2646                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_WORKER_DONE)
    2647                 { /* nothing */ }
    2648 
    2649                 /* Process the data. */
    2650                 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
    2651                 {
    2652 #ifdef GIP_TSC_DELTA_METHOD_1
    2653                     if (pGipCpuWorker->u64TSCSample != GIP_TSC_DELTA_RSVD)
    2654                     {
    2655                         int64_t iDelta = pGipCpuWorker->u64TSCSample
    2656                                        - (pGipCpuMaster->u64TSCSample - pGipCpuMaster->i64TSCDelta);
    2657                         if (  iDelta >= GIP_TSC_DELTA_INITIAL_MASTER_VALUE
    2658                             ? iDelta < pGipCpuWorker->i64TSCDelta
    2659                             : iDelta > pGipCpuWorker->i64TSCDelta || pGipCpuWorker->i64TSCDelta == INT64_MAX)
    2660                             pGipCpuWorker->i64TSCDelta = iDelta;
    2661                     }
    2662 #elif defined(GIP_TSC_DELTA_METHOD_2)
    2663                     if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
    2664                         supdrvTscDeltaMethod2ProcessDataOnMaster(pArgs, i == GIP_TSC_DELTA_LOOPS - 1);
    2665 #else
    2666 # error "tsc delta method not selected"
    2667 #endif
    2668                 }
    2669 
    2670                 /* Reset our TSC sample and tell the worker to move on. */
    2671                 ASMAtomicWriteU64(&pGipCpuMaster->u64TSCSample, GIP_TSC_DELTA_RSVD);
    2672                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_STOP);
    2673             }
    2674             else
    2675             {
    2676                 /*
    2677                  * The worker.
    2678                  */
    2679                 uint64_t uTscWorker;
    2680                 uint64_t uTscWorkerFlushed;
    2681                 uint64_t uCmpReadTime;
    2682 
    2683                 ASMAtomicReadU64(&pGipCpuMaster->u64TSCSample);     /* Warm the cache line. */
    2684                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) != GIP_TSC_DELTA_SYNC_START)
    2685                 { /* nothing */ }
    2686                 Assert(pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
    2687                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_READY);
    2688 
    2689 #ifdef GIP_TSC_DELTA_METHOD_1
    2690                 /*
    2691                  * Keep reading the TSC until we notice that the master has read his. Reading
    2692                  * the TSC -after- the master has updated the memory is way too late. We thus
    2693                  * compensate by trying to measure how long it took for the worker to notice
    2694                  * the memory flushed from the master.
    2695                  */
    2696                 do
    2697                 {
    2698                     ASMSerializeInstruction();
    2699                     uTscWorker = ASMReadTSC();
    2700                 } while (pGipCpuMaster->u64TSCSample == GIP_TSC_DELTA_RSVD);
    2701                 ASMSerializeInstruction();
    2702                 uTscWorkerFlushed = ASMReadTSC();
    2703 
    2704                 uCmpReadTime = uTscWorkerFlushed - uTscWorker;
    2705                 if (i > GIP_TSC_DELTA_PRIMER_LOOPS + GIP_TSC_DELTA_READ_TIME_LOOPS)
    2706                 {
    2707                     /* This is totally arbitrary a.k.a I don't like it but I have no better ideas for now. */
    2708                     if (uCmpReadTime < (uMinCmpReadTime << 1))
    2709                     {
    2710                         ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, uTscWorker);
    2711                         if (uCmpReadTime < uMinCmpReadTime)
    2712                             uMinCmpReadTime = uCmpReadTime;
    2713                     }
    2714                     else
    2715                         ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
    2716                 }
    2717                 else if (i > GIP_TSC_DELTA_PRIMER_LOOPS)
    2718                 {
    2719                     if (uCmpReadTime < uMinCmpReadTime)
    2720                         uMinCmpReadTime = uCmpReadTime;
    2721                 }
    2722 
    2723 #elif defined(GIP_TSC_DELTA_METHOD_2)
    2724                 supdrvTscDeltaMethod2CollectData(pArgs->pWorkerData, &pArgs->pMasterData->iCurSeqNo, pArgs->fLagWorker);
    2725 #else
    2726 # error "tsc delta method not selected"
    2727 #endif
    2728 
    2729                 /* Tell master we're done collecting our data. */
    2730                 ASMAtomicWriteU32(&pDevExt->pTscDeltaSync->u, GIP_TSC_DELTA_SYNC_WORKER_DONE);
    2731 
    2732                 /* Wait for the master to process the data. */
    2733                 while (ASMAtomicReadU32(&pDevExt->pTscDeltaSync->u) == GIP_TSC_DELTA_SYNC_WORKER_DONE)
    2734                     ASMNopPause();
    2735             }
    2736         }
    2737 
    2738         /*
    2739          * We must reset the worker TSC sample value in case it gets picked as a
    2740          * GIP master later on (it's trashed above, naturally).
    2741          */
    2742         if (idCpu == idMaster)
    2743             ASMAtomicWriteU64(&pGipCpuWorker->u64TSCSample, GIP_TSC_DELTA_RSVD);
    27442866
    27452867        /*
     
    28823004        Args.pDevExt  = pDevExt;
    28833005#ifdef GIP_TSC_DELTA_METHOD_1
    2884         rc = VINF_SUCCESS;
     3006        rc = supdrvTscDeltaMethod1Init(&Args);
    28853007#elif defined(GIP_TSC_DELTA_METHOD_2)
    28863008        rc = supdrvTscDeltaMethod2Init(&Args);
     
    29263048        }
    29273049
    2928 #ifdef GIP_TSC_DELTA_METHOD_2
    2929         supdrvTscDeltaMethod2Term(&Args);
     3050#ifdef GIP_TSC_DELTA_METHOD_1
     3051        supdrvTscDeltaMethod1Delete(&Args);
     3052#elif defined(GIP_TSC_DELTA_METHOD_2)
     3053        supdrvTscDeltaMethod2Delete(&Args);
     3054#else
     3055# error "huh?"
    29303056#endif
    29313057    }
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette