VirtualBox

Changeset 101088 in vbox


Ignore:
Timestamp:
Sep 12, 2023 10:22:20 AM (20 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
159057
Message:

VMM/IEM,VMM/TM: Basic TB managment and allocation rewrite. bugref:10369

Location:
trunk
Files:
7 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/VBox/vmm/iem.h

    r100970 r101088  
    159159VMMDECL(VBOXSTRICTRC)       IEMExecOneIgnoreLock(PVMCPUCC pVCpu);
    160160VMMDECL(VBOXSTRICTRC)       IEMExecLots(PVMCPUCC pVCpu, uint32_t cMaxInstructions, uint32_t cPollRate, uint32_t *pcInstructions);
    161 VMMDECL(VBOXSTRICTRC)       IEMExecRecompilerThreaded(PVMCC pVM, PVMCPUCC pVCpu);
     161VMMDECL(VBOXSTRICTRC)       IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu);
    162162/** Statistics returned by IEMExecForExits. */
    163163typedef struct IEMEXECFOREXITSTATS
  • trunk/include/VBox/vmm/tm.h

    r100000 r101088  
    272272
    273273VMMDECL(bool)           TMTimerPollBool(PVMCC pVM, PVMCPUCC pVCpu);
     274VMMDECL(bool)           TMTimerPollBoolWith32BitMilliTS(PVMCC pVM, PVMCPUCC pVCpu, uint32_t *pmsNow);
    274275VMM_INT_DECL(void)      TMTimerPollVoid(PVMCC pVM, PVMCPUCC pVCpu);
    275276VMM_INT_DECL(uint64_t)  TMTimerPollGIP(PVMCC pVM, PVMCPUCC pVCpu, uint64_t *pu64Delta);
  • trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp

    r100869 r101088  
    8383#include <iprt/mem.h>
    8484#include <iprt/string.h>
     85#include <iprt/sort.h>
    8586#include <iprt/x86.h>
    8687
     
    115116*********************************************************************************************************************************/
    116117static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb);
     118static void         iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
    117119
    118120
     
    438440
    439441
     442/*********************************************************************************************************************************
     443*   Translation Block Cache.                                                                                                     *
     444*********************************************************************************************************************************/
     445
     446/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.}  */
     447static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
     448{
     449    PCIEMTB const  pTb1 = (PCIEMTB)pvElement1;
     450    PCIEMTB const  pTb2 = (PCIEMTB)pvElement2;
     451    uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
     452    uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
     453    if (cMsSinceUse1 != cMsSinceUse2)
     454        return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
     455    if (pTb1->cUsed != pTb2->cUsed)
     456        return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
     457    if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
     458        return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
     459    return 0;
     460}
     461
     462
     463DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
     464{
     465    STAM_PROFILE_START(&pTbCache->StatPrune, a);
     466
     467    /*
     468     * First convert the collision list to an array.
     469     */
     470    PIEMTB    apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
     471    uintptr_t cInserted    = 0;
     472    PIEMTB    pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
     473    pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
     474
     475    while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
     476    {
     477        apSortedTbs[cInserted++] = pTbCollision;
     478        pTbCollision = pTbCollision->pNext;
     479    }
     480
     481    /* Free any excess (impossible). */
     482    if (RT_LIKELY(!pTbCollision))
     483        Assert(cInserted == RT_ELEMENTS(apSortedTbs));
     484    else
     485        do
     486        {
     487            PIEMTB pTbToFree = pTbCollision;
     488            pTbCollision = pTbToFree->pNext;
     489            iemTbAllocatorFree(pVCpu, pTbToFree);
     490        } while (pTbCollision);
     491
     492    /*
     493     * Sort it by most recently used and usage count.
     494     */
     495    RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
     496
     497    /* We keep half the list for now. Perhaps a bit aggressive... */
     498    uintptr_t const cKeep = cInserted / 2;
     499
     500    /* First free up the TBs we don't wish to keep (before creating the new
     501       list because otherwise the free code will scan the list for each one
     502       without ever finding it). */
     503    for (uintptr_t idx = cKeep; idx < cInserted; idx++)
     504        iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
     505
     506    /* Chain the new TB together with the ones we like to keep of the existing
     507       ones and insert this list into the hash table. */
     508    pTbCollision = pTb;
     509    for (uintptr_t idx = 0; idx < cKeep; idx++)
     510        pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
     511    pTbCollision->pNext = NULL;
     512
     513    pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
     514
     515    STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
     516}
     517
     518
     519static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
     520{
     521    uint32_t const idxHash    = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
     522    PIEMTB const   pTbOldHead = pTbCache->apHash[idxHash];
     523    if (!pTbOldHead)
     524    {
     525        pTb->pNext = NULL;
     526        pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1);  /** @todo could make 1 implicit... */
     527    }
     528    else
     529    {
     530        STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
     531        uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
     532        if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
     533        {
     534            pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
     535            pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
     536        }
     537        else
     538            iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
     539    }
     540}
     541
     542
     543/**
     544 * Unlinks @a pTb from the hash table if found in it.
     545 *
     546 * @returns true if unlinked, false if not present.
     547 * @param   pTbCache    The hash table.
     548 * @param   pTb         The TB to remove.
     549 */
     550static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
     551{
     552    uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
     553    PIEMTB         pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
     554
     555    /* At the head of the collision list? */
     556    if (pTbHash == pTb)
     557    {
     558        if (!pTb->pNext)
     559            pTbCache->apHash[idxHash] = NULL;
     560        else
     561            pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
     562                                                            IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
     563        return true;
     564    }
     565
     566    /* Search the collision list. */
     567    while (pTbHash)
     568    {
     569        PIEMTB const pNextTb = pTbHash->pNext;
     570        if (pNextTb == pTb)
     571        {
     572            pTbHash->pNext = pTb->pNext;
     573            pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbCache->apHash[idxHash],
     574                                                            IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
     575            return true;
     576        }
     577        pTbHash = pNextTb;
     578    }
     579    return false;
     580}
     581
     582
     583/**
     584 * Looks up a TB for the given PC and flags in the cache.
     585 *
     586 * @returns Pointer to TB on success, NULL if not found.
     587 * @param   pVCpu           The cross context virtual CPU structure of the
     588 *                          calling thread.
     589 * @param   pTbCache        The translation block cache.
     590 * @param   GCPhysPc        The PC to look up a TB for.
     591 * @param   fExtraFlags     The extra flags to join with IEMCPU::fExec for
     592 *                          the lookup.
     593 * @thread  EMT(pVCpu)
     594 */
     595static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
     596                               RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
     597{
     598    uint32_t const fFlags  = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags | IEMTB_F_STATE_READY) & IEMTB_F_KEY_MASK;
     599    uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
     600    PIEMTB         pTb     = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
     601#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
     602    int            cLeft   = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
     603#endif
     604    Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p L %d\n", fFlags, GCPhysPc, idxHash, pTb, cLeft));
     605    while (pTb)
     606    {
     607        if (pTb->GCPhysPc == GCPhysPc)
     608        {
     609            if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
     610            {
     611                if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
     612                {
     613                    pTb->cUsed++;
     614                    pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
     615                    STAM_COUNTER_INC(&pTbCache->cLookupHits);
     616                    AssertMsg(cLeft > 0, ("%d\n", cLeft));
     617                    return pTb;
     618                }
     619                Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
     620            }
     621            else
     622                Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
     623        }
     624        else
     625            Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
     626
     627        pTb = pTb->pNext;
     628#ifdef VBOX_STRICT
     629        cLeft--;
     630#endif
     631    }
     632    AssertMsg(cLeft == 0, ("%d\n", cLeft));
     633    STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
     634    return pTb;
     635}
     636
     637
     638/*********************************************************************************************************************************
     639*   Translation Block Allocator.
     640*********************************************************************************************************************************/
    440641/*
    441  * Translation block management.
    442  */
    443 
    444 typedef struct IEMTBCACHE
    445 {
    446     uint32_t cHash;
    447     uint32_t uHashMask;
    448     PIEMTB   apHash[_1M];
    449 } IEMTBCACHE;
    450 
    451 static IEMTBCACHE g_TbCache = { _1M, _1M - 1, }; /**< Quick and dirty. */
    452 
    453 #define IEMTBCACHE_HASH(a_paCache, a_fTbFlags, a_GCPhysPc) \
    454     ( ((uint32_t)(a_GCPhysPc) ^ (a_fTbFlags)) & (a_paCache)->uHashMask)
     642 * Translation block allocationmanagement.
     643 */
     644
     645#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
     646# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
     647    ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
     648# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
     649    ((a_idxTb) &  (a_pTbAllocator)->fChunkMask)
     650# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
     651    ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
     652#else
     653# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
     654    ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
     655# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
     656    ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
     657# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
     658    ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
     659#endif
     660/** Makes a TB index from a chunk index and TB index within that chunk. */
     661#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
     662    (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
     663
     664
     665/**
     666 * Initializes the TB allocator and cache for an EMT.
     667 *
     668 * @returns VBox status code.
     669 * @param   pVM         The VM handle.
     670 * @param   cInitialTbs The initial number of translation blocks to
     671 *                      preallocator.
     672 * @param   cMaxTbs     The max number of translation blocks allowed.
     673 * @thread  EMT
     674 */
     675DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs)
     676{
     677    PVMCPUCC pVCpu = VMMGetCpu(pVM);
     678    Assert(!pVCpu->iem.s.pTbCacheR3);
     679    Assert(!pVCpu->iem.s.pTbAllocatorR3);
     680
     681    /*
     682     * Calculate the chunk size of the TB allocator.
     683     * The minimum chunk size is 2MiB.
     684     */
     685    AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
     686    uint32_t      cbPerChunk   = _2M;
     687    uint32_t      cTbsPerChunk = _2M / sizeof(IEMTB);
     688#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
     689    uint8_t const cTbShift     = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
     690    uint8_t       cChunkShift  = 21 - cTbShift;
     691    AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
     692#endif
     693    for (;;)
     694    {
     695        if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
     696            break;
     697        cbPerChunk  *= 2;
     698        cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
     699#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
     700        cChunkShift += 1;
     701#endif
     702    }
     703
     704    uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
     705    Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
     706    Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
     707
     708    cMaxTbs = cMaxChunks * cTbsPerChunk;
     709
     710    /*
     711     * Allocate and initalize it.
     712     */
     713    uint32_t const        c64BitWords   = RT_ALIGN_32(cMaxTbs, 64) / 64;
     714    size_t const          cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
     715    PIEMTBALLOCATOR const pTbAllocator  = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
     716    if (!pTbAllocator)
     717        return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
     718                          "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
     719                          cbTbAllocator, cMaxTbs, pVCpu->idCpu);
     720    pTbAllocator->uMagic        = IEMTBALLOCATOR_MAGIC;
     721    pTbAllocator->cMaxChunks    = (uint8_t)cMaxChunks;
     722    pTbAllocator->cTbsPerChunk  = cTbsPerChunk;
     723    pTbAllocator->cbPerChunk    = cbPerChunk;
     724    pTbAllocator->cMaxTbs       = cMaxTbs;
     725#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
     726    pTbAllocator->fChunkMask    = cTbsPerChunk - 1;
     727    pTbAllocator->cChunkShift   = cChunkShift;
     728    Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
     729#endif
     730
     731    memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
     732    pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
     733
     734    /*
     735     * Allocate the initial chunks.
     736     */
     737    for (uint32_t idxChunk = 0; ; idxChunk++)
     738    {
     739        PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
     740        if (!paTbs)
     741            return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
     742                              "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
     743                              cbPerChunk, idxChunk, pVCpu->idCpu);
     744
     745        for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
     746            paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
     747        ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
     748        pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
     749        pTbAllocator->cTotalTbs       += cTbsPerChunk;
     750
     751        if (idxChunk * cTbsPerChunk >= cInitialTbs)
     752            break;
     753    }
     754
     755    /*
     756     * Calculate the size of the hash table. We double the max TB count and
     757     * round it up to the nearest power of two.
     758     */
     759    uint32_t cCacheEntries = cMaxTbs * 2;
     760    if (!RT_IS_POWER_OF_TWO(cCacheEntries))
     761    {
     762        uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
     763        cCacheEntries = RT_BIT_32(iBitTop);
     764        Assert(cCacheEntries >= cMaxTbs * 2);
     765    }
     766
     767    size_t const      cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
     768    PIEMTBCACHE const pTbCache  = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
     769    if (!pTbCache)
     770        return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
     771                          "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
     772                          cbTbCache, cCacheEntries, pVCpu->idCpu);
     773
     774    /*
     775     * Initialize it (assumes zeroed by the allocator).
     776     */
     777    pTbCache->uMagic    = IEMTBCACHE_MAGIC;
     778    pTbCache->cHash     = cCacheEntries;
     779    pTbCache->uHashMask = cCacheEntries - 1;
     780    Assert(pTbCache->cHash > pTbCache->uHashMask);
     781    pVCpu->iem.s.pTbCacheR3 = pTbCache;
     782
     783    return VINF_SUCCESS;
     784}
     785
     786
     787/**
     788 * Inner free worker.
     789 */
     790static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
     791                                    PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
     792{
     793    Assert(idxChunk < pTbAllocator->cAllocatedChunks);
     794    Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
     795    Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
     796    Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
     797
     798    /*
     799     * Unlink the TB from the hash table.
     800     */
     801    iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
     802
     803    /*
     804     * Free the TB itself.
     805     */
     806    switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
     807    {
     808        case IEMTB_F_TYPE_THREADED:
     809            pTbAllocator->cThreadedTbs -= 1;
     810            RTMemFree(pTb->Thrd.paCalls);
     811            break;
     812        case IEMTB_F_TYPE_NATIVE:
     813            pTbAllocator->cNativeTbs -= 1;
     814            RTMemFree(pTb->Native.pbCode); /// @todo native: fix me
     815            break;
     816        default:
     817            AssertFailed();
     818    }
     819    RTMemFree(pTb->pabOpcodes);
     820
     821    pTb->pNext              = NULL;
     822    pTb->fFlags             = 0;
     823    pTb->GCPhysPc           = UINT64_MAX;
     824    pTb->Gen.uPtr           = 0;
     825    pTb->Gen.uData          = 0;
     826    pTb->cbOpcodes          = 0;
     827    pTb->cbOpcodesAllocated = 0;
     828    pTb->pabOpcodes         = NULL;
     829
     830    ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
     831    Assert(pTbAllocator->cInUseTbs > 0);
     832
     833    pTbAllocator->cInUseTbs -= 1;
     834    STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
     835}
     836
     837
     838/**
     839 * Frees the given TB.
     840 *
     841 * @param   pVCpu   The cross context virtual CPU structure of the calling
     842 *                  thread.
     843 * @param   pTb     The translation block to free..
     844 * @thread  EMT(pVCpu)
     845 */
     846static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
     847{
     848    /*
     849     * Validate state.
     850     */
     851    PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
     852    Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
     853    uint8_t const idxChunk = pTb->idxAllocChunk;
     854    AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
     855    uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
     856    AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
     857
     858    /*
     859     * Call inner worker.
     860     */
     861    iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
     862}
     863
     864
     865/**
     866 * Grow the translation block allocator with another chunk.
     867 */
     868static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
     869{
     870    /*
     871     * Validate state.
     872     */
     873    PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
     874    AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
     875    AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
     876    uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
     877    AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
     878
     879    /*
     880     * Allocate a new chunk and add it to the allocator.
     881     */
     882    PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
     883    AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
     884    pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
     885
     886    uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
     887    for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
     888        paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
     889    ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
     890    pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
     891    pTbAllocator->cTotalTbs       += cTbsPerChunk;
     892    pTbAllocator->iStartHint       = idxChunk * cTbsPerChunk;
     893
     894    return VINF_SUCCESS;
     895}
     896
     897
     898/**
     899 * Allocates a TB from allocator with free block.
     900 *
     901 * This is common code to both the fast and slow allocator code paths.
     902 */
     903DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
     904{
     905    Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
     906
     907    int idxTb;
     908    if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
     909        idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
     910                                pTbAllocator->cTotalTbs,
     911                                pTbAllocator->iStartHint & ~(uint32_t)63);
     912    else
     913        idxTb = -1;
     914    if (idxTb < 0)
     915    {
     916        idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
     917        AssertLogRelReturn(idxTb >= 0, NULL);
     918    }
     919    Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
     920    ASMBitSet(pTbAllocator->bmAllocated, idxTb);
     921
     922    /** @todo shift/mask optimization for power of two IEMTB sizes. */
     923    uint32_t const idxChunk     = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
     924    uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
     925    PIEMTB const   pTb          = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
     926    Assert(pTb->idxAllocChunk == idxChunk);
     927
     928    pTbAllocator->cInUseTbs        += 1;
     929    if (fThreaded)
     930        pTbAllocator->cThreadedTbs += 1;
     931    else
     932        pTbAllocator->cNativeTbs   += 1;
     933    STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
     934    return pTb;
     935}
     936
     937
     938/**
     939 * Slow path for iemTbAllocatorAlloc.
     940 */
     941static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
     942{
     943    /*
     944     * With some luck we can add another chunk.
     945     */
     946    if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
     947    {
     948        int rc = iemTbAllocatorGrow(pVCpu);
     949        if (RT_SUCCESS(rc))
     950            return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
     951    }
     952
     953    /*
     954     * We have to prune stuff. Sigh.
     955     *
     956     * This requires scanning for older TBs and kick them out.  Not sure how to
     957     * best do this as we don't want to maintain any list of TBs ordered by last
     958     * usage time. But one reasonably simple approach would be that each time we
     959     * get here we continue a sequential scan of the allocation chunks,
     960     * considering just a smallish number of TBs and freeing a fixed portion of
     961     * them.  Say, we consider the next 128 TBs, freeing the least recently used
     962     * in out of groups of 4 TBs, resulting in 32 free TBs.
     963     */
     964    STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
     965    uint32_t const msNow          = pVCpu->iem.s.msRecompilerPollNow;
     966    uint32_t       cFreedTbs      = 0;
     967#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
     968    uint32_t       idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)3;
     969#else
     970    uint32_t       idxTbPruneFrom = pTbAllocator->iPruneFrom;
     971#endif
     972    if (idxTbPruneFrom > pTbAllocator->cMaxTbs)
     973        idxTbPruneFrom = 0;
     974    for (uint32_t i = 0; i < 128; i += 4, idxTbPruneFrom += 4)
     975    {
     976        uint32_t idxChunk   = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
     977        uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
     978        PIEMTB   pTb        = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
     979        uint32_t cMsAge     = msNow - pTb->msLastUsed;
     980        for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < 4; j++, idxInChunk2++)
     981        {
     982#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
     983            if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
     984            { /* likely */ }
     985            else
     986            {
     987                idxInChunk2 = 0;
     988                idxChunk2  += 1;
     989                if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
     990                    idxChunk2 = 0;
     991            }
     992#endif
     993            PIEMTB   const pTb2    = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
     994            uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
     995            if (   cMsAge2 > cMsAge
     996                || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed)
     997                || (pTb2->fFlags & IEMTB_F_STATE_MASK) == IEMTB_F_STATE_OBSOLETE) /** @todo Consider state (and clean it up)! */
     998            {
     999                pTb        = pTb2;
     1000                idxChunk   = idxChunk2;
     1001                idxInChunk = idxInChunk2;
     1002                cMsAge     = cMsAge2;
     1003            }
     1004        }
     1005
     1006        /* Free the TB if in the right state. */
     1007        /** @todo They shall all be freeable! Otherwise we've buggered up the
     1008         *        accounting.  The TB state crap needs elimnating. */
     1009        if (   (pTb->fFlags & IEMTB_F_STATE_MASK) == IEMTB_F_STATE_READY
     1010            || (pTb->fFlags & IEMTB_F_STATE_MASK) == IEMTB_F_STATE_OBSOLETE)
     1011        {
     1012            iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
     1013            cFreedTbs++; /* just for safety */
     1014        }
     1015    }
     1016    pTbAllocator->iPruneFrom = idxTbPruneFrom;
     1017    STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
     1018
     1019    /*
     1020     * Allocate a TB from the ones we've pruned.
     1021     */
     1022    if (cFreedTbs)
     1023        return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
     1024    return NULL;
     1025}
     1026
     1027
     1028/**
     1029 * Allocate a translation block.
     1030 *
     1031 * @returns Pointer to block on success, NULL if we're out and is unable to
     1032 *          free up an existing one (very unlikely once implemented).
     1033 * @param   pVCpu       The cross context virtual CPU structure of the calling
     1034 *                      thread.
     1035 * @param   fThreaded   Set if threaded TB being allocated, clear if native TB.
     1036 *                      For statistics.
     1037 */
     1038DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
     1039{
     1040    PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
     1041    Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
     1042
     1043    /* If the allocator is full, take slow code path.*/
     1044    if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
     1045        return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
     1046    return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
     1047}
    4551048
    4561049
     
    4711064static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
    4721065{
    473     PIEMTB pTb = (PIEMTB)RTMemAlloc(sizeof(IEMTB));
     1066    PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
    4741067    if (pTb)
    4751068    {
     
    4861079                pTb->cbOpcodes              = 0;
    4871080                pTb->pNext                  = NULL;
    488                 RTListInit(&pTb->LocalList);
     1081                pTb->cUsed                  = 0;
     1082                pTb->msLastUsed             = pVCpu->iem.s.msRecompilerPollNow;
     1083                pTb->idxAllocChunk          = UINT8_MAX;
    4891084                pTb->GCPhysPc               = GCPhysPc;
    4901085                pTb->x86.fAttr              = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
     
    5021097                pTb->aGCPhysPages[1]        = NIL_RTGCPHYS;
    5031098
    504                 pVCpu->iem.s.cTbAllocs++;
    5051099                return pTb;
    5061100            }
     
    5591153     * complicated later, don't worry. :-)
    5601154     */
    561     PIEMTB pTb = (PIEMTB)RTMemAlloc(sizeof(IEMTB));
     1155    PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
    5621156    if (pTb)
    5631157    {
     1158        uint8_t const idxAllocChunk = pTb->idxAllocChunk;
    5641159        memcpy(pTb, pTbSrc, sizeof(*pTb));
     1160        pTb->idxAllocChunk = idxAllocChunk;
    5651161
    5661162        unsigned const cCalls = pTbSrc->Thrd.cCalls;
     
    5771173                pTb->cbOpcodesAllocated     = cbOpcodes;
    5781174                pTb->pNext                  = NULL;
    579                 RTListInit(&pTb->LocalList);
     1175                pTb->cUsed                  = 0;
     1176                pTb->msLastUsed             = pVCpu->iem.s.msRecompilerPollNow;
    5801177                pTb->fFlags                 = (pTbSrc->fFlags & ~IEMTB_F_STATE_MASK) | IEMTB_F_STATE_READY;
    5811178
    582                 pVCpu->iem.s.cTbAllocs++;
    5831179                return pTb;
    5841180            }
    5851181            RTMemFree(pTb->Thrd.paCalls);
    5861182        }
    587         RTMemFree(pTb);
     1183        iemTbAllocatorFree(pVCpu, pTb);
    5881184    }
    5891185    RT_NOREF(pVM);
     
    5961192 * Adds the given TB to the hash table.
    5971193 *
    598  * @param   pVM         The cross context virtual machine structure.
    5991194 * @param   pVCpu       The cross context virtual CPU structure of the calling
    6001195 *                      thread.
     1196 * @param   pTbCache    The cache to add it to.
    6011197 * @param   pTb         The translation block to add.
    6021198 */
    603 static void iemThreadedTbAdd(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
    604 {
    605     uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
    606     pTb->pNext = g_TbCache.apHash[idxHash];
    607     g_TbCache.apHash[idxHash] = pTb;
     1199static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
     1200{
     1201    iemTbCacheAdd(pVCpu, pTbCache, pTb);
     1202
    6081203    STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
    6091204    STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
     
    6111206    {
    6121207        Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
    613                pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, idxHash, pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
     1208               pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
     1209               pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
    6141210        for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
    6151211            Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
     
    6191215                   : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
    6201216    }
    621     RT_NOREF(pVM);
    622 }
    623 
    624 
    625 /**
    626  * Frees the given TB.
    627  *
    628  * @param   pVM     The cross context virtual machine structure.
    629  * @param   pVCpu   The cross context virtual CPU structure of the calling
    630  *                  thread.
    631  * @param   pTb     The translation block to free..
    632  */
    633 static void iemThreadedTbFree(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)
    634 {
    635     RT_NOREF(pVM);
    636     AssertPtr(pTb);
    637 
    638     AssertCompile(IEMTB_F_STATE_OBSOLETE == IEMTB_F_STATE_MASK);
    639     pTb->fFlags |= IEMTB_F_STATE_OBSOLETE; /* works, both bits set */
    640 
    641     /* Unlink it from the hash table: */
    642     uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);
    643     PIEMTB pTbCur = g_TbCache.apHash[idxHash];
    644     if (pTbCur == pTb)
    645         g_TbCache.apHash[idxHash] = pTb->pNext;
    646     else
    647         while (pTbCur)
    648         {
    649             PIEMTB const pNextTb = pTbCur->pNext;
    650             if (pNextTb == pTb)
    651             {
    652                 pTbCur->pNext = pTb->pNext;
    653                 break;
    654             }
    655             pTbCur = pNextTb;
    656         }
    657 
    658     /* Free it. */
    659     RTMemFree(pTb->Thrd.paCalls);
    660     pTb->Thrd.paCalls = NULL;
    661 
    662     RTMemFree(pTb->pabOpcodes);
    663     pTb->pabOpcodes = NULL;
    664 
    665     RTMemFree(pTb);
    666     pVCpu->iem.s.cTbFrees++;
    6671217}
    6681218
     
    6731223void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb)
    6741224{
    675     iemThreadedTbFree(pVCpu->CTX_SUFF(pVM), pVCpu, pTb);
    676 }
    677 
    678 
    679 static PIEMTB iemThreadedTbLookup(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
    680 {
    681     uint32_t const fFlags  = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags | IEMTB_F_STATE_READY;
    682     uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, fFlags, GCPhysPc);
    683     Log10(("TB lookup: idxHash=%#x fFlags=%#x GCPhysPc=%RGp\n", idxHash, fFlags, GCPhysPc));
    684     PIEMTB pTb = g_TbCache.apHash[idxHash];
    685     while (pTb)
    686     {
    687         if (pTb->GCPhysPc == GCPhysPc)
    688         {
    689             if (pTb->fFlags == fFlags)
    690             {
    691                 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
    692                 {
    693 #ifdef VBOX_WITH_STATISTICS
    694                     pVCpu->iem.s.cTbLookupHits++;
    695 #endif
    696                     return pTb;
    697                 }
    698                 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
    699             }
    700             else
    701                 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
    702         }
    703         else
    704             Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
    705 
    706         pTb = pTb->pNext;
    707     }
    708     RT_NOREF(pVM);
    709     pVCpu->iem.s.cTbLookupMisses++;
    710     return pTb;
     1225    iemTbAllocatorFree(pVCpu, pTb);
    7111226}
    7121227
     
    14431958 * @param   GCPhysPc    The physical address corresponding to the current
    14441959 *                      RIP+CS.BASE.
    1445  * @param   fExtraFlags Extra translation block flags: IEMTB_F_TYPE_THREADED and
    1446  *                      maybe IEMTB_F_RIP_CHECKS.
     1960 * @param   fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
     1961 *                      IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
    14471962 */
    14481963static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
    14491964{
     1965    Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
     1966    fExtraFlags |= IEMTB_F_TYPE_THREADED;
     1967
    14501968    /*
    14511969     * Get the TB we use for the recompiling.  This is a maxed-out TB so
     
    15442062    AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
    15452063
    1546     iemThreadedTbAdd(pVM, pVCpu, pTb);
     2064    iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
    15472065
    15482066#ifdef IEM_COMPILE_ONLY_MODE
     
    15742092    {
    15752093        Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
    1576         iemThreadedTbFree(pVCpu->pVMR3, pVCpu, pTb);
     2094        iemThreadedTbObsolete(pVCpu, pTb);
    15772095        return VINF_SUCCESS;
    15782096    }
     
    16802198 * Determines the extra IEMTB_F_XXX flags.
    16812199 *
    1682  * @returns IEMTB_F_TYPE_THREADED and maybe IEMTB_F_RIP_CHECKS.
     2200 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
     2201 *          IEMTB_F_CS_LIM_CHECKS (or zero).
    16832202 * @param   pVCpu   The cross context virtual CPU structure of the calling
    16842203 *                  thread.
     
    16862205DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
    16872206{
    1688     uint32_t fRet = IEMTB_F_TYPE_THREADED;
     2207    uint32_t fRet = 0;
    16892208
    16902209    /*
     
    17152234
    17162235
    1717 VMMDECL(VBOXSTRICTRC) IEMExecRecompilerThreaded(PVMCC pVM, PVMCPUCC pVCpu)
     2236VMMDECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
    17182237{
    17192238    /*
     
    17352254     */
    17362255    iemInitExec(pVCpu, 0 /*fExecOpts*/);
     2256    if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
     2257    { }
     2258    else
     2259        pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
    17372260
    17382261    /*
     
    17422265     * having to call setjmp for each block we're executing.
    17432266     */
     2267    PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
    17442268    for (;;)
    17452269    {
     
    17552279                uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
    17562280
    1757                 pTb = iemThreadedTbLookup(pVM, pVCpu, GCPhysPc, fExtraFlags);
     2281                pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
    17582282                if (pTb)
    1759                     rcStrict = iemThreadedTbExec(pVCpu, pTb);
     2283                {
     2284                    if (pTb->fFlags & IEMTB_F_TYPE_THREADED)
     2285                        rcStrict = iemThreadedTbExec(pVCpu, pTb);
     2286                    else
     2287                        AssertFailedStmt(rcStrict = VERR_INTERNAL_ERROR_4);
     2288                }
    17602289                else
    17612290                    rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
     
    17772306                    {
    17782307                        if (RT_LIKELY(   (iIterations & cPollRate) != 0
    1779                                       || !TMTimerPollBool(pVM, pVCpu)))
     2308                                      || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
    17802309                        {
    17812310
  • trunk/src/VBox/VMM/VMMAll/TMAll.cpp

    r98103 r101088  
    890890 * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
    891891 * @param   pu64Delta   Where to store the delta.
     892 * @param   pu64Now     Where to store the current time. Optional.
    892893 *
    893894 * @thread  The emulation thread.
     
    895896 * @remarks GIP uses ns ticks.
    896897 */
    897 DECL_FORCE_INLINE(uint64_t) tmTimerPollInternal(PVMCC pVM, PVMCPUCC pVCpu, uint64_t *pu64Delta)
     898DECL_FORCE_INLINE(uint64_t) tmTimerPollInternal(PVMCC pVM, PVMCPUCC pVCpu, uint64_t *pu64Delta, uint64_t *pu64Now)
    898899{
    899900    VMCPUID idCpu = pVM->tm.s.idTimerCpu;
     
    903904    const uint64_t u64Now = TMVirtualGetNoCheck(pVM);
    904905    STAM_COUNTER_INC(&pVM->tm.s.StatPoll);
     906    if (pu64Now)
     907        *pu64Now = u64Now;
    905908
    906909    /*
     
    11041107    AssertCompile(TMCLOCK_FREQ_VIRTUAL == 1000000000);
    11051108    uint64_t off = 0;
    1106     tmTimerPollInternal(pVM, pVCpu, &off);
     1109    tmTimerPollInternal(pVM, pVCpu, &off, NULL);
     1110    return off == 0;
     1111}
     1112
     1113
     1114/**
     1115 * Set FF if we've passed the next virtual event and return virtual time as MS.
     1116 *
     1117 * This function is called before FFs are checked in the inner execution EM loops.
     1118 *
     1119 * This is used by the IEM recompiler for polling timers while also providing a
     1120 * free time source for recent use tracking and such.
     1121 *
     1122 * @returns true if timers are pending, false if not.
     1123 *
     1124 * @param   pVM         The cross context VM structure.
     1125 * @param   pVCpu       The cross context virtual CPU structure of the calling EMT.
     1126 * @param   pmsNow      Where to return the current virtual time in
     1127 *                      milliseconds.
     1128 * @thread  The emulation thread.
     1129 */
     1130VMMDECL(bool) TMTimerPollBoolWith32BitMilliTS(PVMCC pVM, PVMCPUCC pVCpu, uint32_t *pmsNow)
     1131{
     1132    AssertCompile(TMCLOCK_FREQ_VIRTUAL == 1000000000);
     1133    uint64_t off = 0;
     1134    uint64_t u64Now = 0;
     1135    tmTimerPollInternal(pVM, pVCpu, &off, &u64Now);
     1136    *pmsNow = (uint32_t)(u64Now / RT_NS_1MS);
    11071137    return off == 0;
    11081138}
     
    11211151{
    11221152    uint64_t off;
    1123     tmTimerPollInternal(pVM, pVCpu, &off);
     1153    tmTimerPollInternal(pVM, pVCpu, &off, NULL);
    11241154}
    11251155
     
    11391169VMM_INT_DECL(uint64_t) TMTimerPollGIP(PVMCC pVM, PVMCPUCC pVCpu, uint64_t *pu64Delta)
    11401170{
    1141     return tmTimerPollInternal(pVM, pVCpu, pu64Delta);
     1171    return tmTimerPollInternal(pVM, pVCpu, pu64Delta, NULL);
    11421172}
    11431173
  • trunk/src/VBox/VMM/VMMR3/EM.cpp

    r100805 r101088  
    10881088#ifdef VBOX_WITH_IEM_RECOMPILER
    10891089            if (pVM->em.s.fIemRecompiled)
    1090                 rcStrict = IEMExecRecompilerThreaded(pVM, pVCpu);
     1090                rcStrict = IEMExecRecompiler(pVM, pVCpu);
    10911091            else
    10921092#endif
  • trunk/src/VBox/VMM/VMMR3/IEMR3.cpp

    r100857 r101088  
    9595VMMR3DECL(int)      IEMR3Init(PVM pVM)
    9696{
    97 #if !defined(VBOX_VMM_TARGET_ARMV8) && !defined(VBOX_WITHOUT_CPUID_HOST_CALL)
    9897    /*
    9998     * Read configuration.
    10099     */
    101     PCFGMNODE pIem = CFGMR3GetChild(CFGMR3GetRoot(pVM), "IEM");
    102 
     100#if (!defined(VBOX_VMM_TARGET_ARMV8) && !defined(VBOX_WITHOUT_CPUID_HOST_CALL)) || defined(VBOX_WITH_IEM_RECOMPILER)
     101    PCFGMNODE const pIem = CFGMR3GetChild(CFGMR3GetRoot(pVM), "IEM");
     102    int rc;
     103#endif
     104
     105#if !defined(VBOX_VMM_TARGET_ARMV8) && !defined(VBOX_WITHOUT_CPUID_HOST_CALL)
    103106    /** @cfgm{/IEM/CpuIdHostCall, boolean, false}
    104107     * Controls whether the custom VBox specific CPUID host call interface is
    105108     * enabled or not. */
    106109# ifdef DEBUG_bird
    107     int rc = CFGMR3QueryBoolDef(pIem, "CpuIdHostCall", &pVM->iem.s.fCpuIdHostCall, true);
     110    rc = CFGMR3QueryBoolDef(pIem, "CpuIdHostCall", &pVM->iem.s.fCpuIdHostCall, true);
    108111# else
    109     int rc = CFGMR3QueryBoolDef(pIem, "CpuIdHostCall", &pVM->iem.s.fCpuIdHostCall, false);
     112    rc = CFGMR3QueryBoolDef(pIem, "CpuIdHostCall", &pVM->iem.s.fCpuIdHostCall, false);
    110113# endif
    111114    AssertLogRelRCReturn(rc, rc);
     115#endif
     116
     117#ifdef VBOX_WITH_IEM_RECOMPILER
     118    /** @cfgm{/IEM/InitialTbCount, uint32_t, 32768}
     119     * Initial (minimum) number of TBs per EMT in ring-3. */
     120    uint32_t cInitialTbs = 0;
     121    rc = CFGMR3QueryU32Def(pIem, "InitialTbCount", &cInitialTbs, _32K);
     122    AssertLogRelRCReturn(rc, rc);
     123    if (cInitialTbs < _16K || cInitialTbs > _8M)
     124        return VMSetError(pVM, VERR_OUT_OF_RANGE, RT_SRC_POS,
     125                          "InitialTbCount value %u (%#x) is out of range (min %u, max %u)", cInitialTbs, cInitialTbs, _16K, _8M);
     126
     127    /** @cfgm{/IEM/MaxTbCount, uint32_t, 524288}
     128     * Max number of TBs per EMT. */
     129    uint32_t cMaxTbs = 0;
     130    rc = CFGMR3QueryU32Def(pIem, "MaxTbCount", &cMaxTbs, _512K);
     131    AssertLogRelRCReturn(rc, rc);
     132    if (cMaxTbs < cInitialTbs || cMaxTbs > _8M)
     133        return VMSetError(pVM, VERR_OUT_OF_RANGE, RT_SRC_POS,
     134                          "MaxTbCount value %u (%#x) is out of range (min %u, max %u)", cMaxTbs, cMaxTbs, cInitialTbs, _8M);
    112135#endif
    113136
     
    125148        pVCpu->iem.s.CodeTlb.uTlbRevision = pVCpu->iem.s.DataTlb.uTlbRevision = uInitialTlbRevision;
    126149        pVCpu->iem.s.CodeTlb.uTlbPhysRev  = pVCpu->iem.s.DataTlb.uTlbPhysRev  = uInitialTlbPhysRev;
    127 
    128 #if !defined(VBOX_VMM_TARGET_ARMV8) && defined(VBOX_WITH_NESTED_HWVIRT_VMX) /* quick fix for stupid structure duplication non-sense */
    129 
    130         STAMR3RegisterF(pVM, &pVCpu->iem.s.cInstructions,               STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    131                         "Instructions interpreted",                     "/IEM/CPU%u/cInstructions", idCpu);
    132         STAMR3RegisterF(pVM, &pVCpu->iem.s.cLongJumps,                  STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    133                         "Number of longjmp calls",                      "/IEM/CPU%u/cLongJumps", idCpu);
    134         STAMR3RegisterF(pVM, &pVCpu->iem.s.cPotentialExits,             STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    135                         "Potential exits",                              "/IEM/CPU%u/cPotentialExits", idCpu);
    136         STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetAspectNotImplemented,    STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    137                         "VERR_IEM_ASPECT_NOT_IMPLEMENTED",              "/IEM/CPU%u/cRetAspectNotImplemented", idCpu);
    138         STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetInstrNotImplemented,     STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    139                         "VERR_IEM_INSTR_NOT_IMPLEMENTED",               "/IEM/CPU%u/cRetInstrNotImplemented", idCpu);
    140         STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetInfStatuses,             STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    141                         "Informational statuses returned",              "/IEM/CPU%u/cRetInfStatuses", idCpu);
    142         STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetErrStatuses,             STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    143                         "Error statuses returned",                      "/IEM/CPU%u/cRetErrStatuses", idCpu);
    144         STAMR3RegisterF(pVM, &pVCpu->iem.s.cbWritten,                   STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    145                         "Approx bytes written",                         "/IEM/CPU%u/cbWritten", idCpu);
    146         STAMR3RegisterF(pVM, &pVCpu->iem.s.cPendingCommit,              STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
    147                         "Times RC/R0 had to postpone instruction committing to ring-3", "/IEM/CPU%u/cPendingCommit", idCpu);
    148 
    149 #ifdef VBOX_WITH_STATISTICS
    150         STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbHits,            STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    151                         "Code TLB hits",                            "/IEM/CPU%u/CodeTlb-Hits", idCpu);
    152         STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbHits,            STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    153                         "Data TLB hits",                            "/IEM/CPU%u/DataTlb-Hits", idCpu);
    154 #endif
    155         STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbMisses,          STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    156                         "Code TLB misses",                          "/IEM/CPU%u/CodeTlb-Misses", idCpu);
    157         STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.uTlbRevision,        STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    158                         "Code TLB revision",                        "/IEM/CPU%u/CodeTlb-Revision", idCpu);
    159         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.CodeTlb.uTlbPhysRev, STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    160                         "Code TLB physical revision",               "/IEM/CPU%u/CodeTlb-PhysRev", idCpu);
    161         STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbSlowReadPath,    STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    162                         "Code TLB slow read path",                  "/IEM/CPU%u/CodeTlb-SlowReads", idCpu);
    163 
    164         STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbMisses,          STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    165                         "Data TLB misses",                          "/IEM/CPU%u/DataTlb-Misses", idCpu);
    166         STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbSafeReadPath,    STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    167                         "Data TLB safe read path",                  "/IEM/CPU%u/DataTlb-SafeReads", idCpu);
    168         STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbSafeWritePath,   STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    169                         "Data TLB safe write path",                 "/IEM/CPU%u/DataTlb-SafeWrites", idCpu);
    170         STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.uTlbRevision,        STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    171                         "Data TLB revision",                        "/IEM/CPU%u/DataTlb-Revision", idCpu);
    172         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.DataTlb.uTlbPhysRev, STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    173                         "Data TLB physical revision",               "/IEM/CPU%u/DataTlb-PhysRev", idCpu);
    174 
    175 
    176 #ifdef VBOX_WITH_IEM_RECOMPILER
    177         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.cTbExec,             STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    178                         "Executed translation block",                   "/IEM/CPU%u/re/cTbExec", idCpu);
    179         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatTbExecBreaks,    STAMTYPE_COUNTER,   STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    180                         "Times TB execution was interrupted/broken off", "/IEM/CPU%u/re/cTbExecBreaks", idCpu);
    181         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.cTbAllocs,           STAMTYPE_U64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    182                         "Translation block allocations",                "/IEM/CPU%u/re/cTbAllocs", idCpu);
    183         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.cTbFrees,            STAMTYPE_U64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    184                         "Translation block frees",                      "/IEM/CPU%u/re/cTbFrees", idCpu);
    185         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.cTbLookupHits,       STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    186                         "Translation block lookup hits",                "/IEM/CPU%u/re/cTbLookupHits", idCpu);
    187         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.cTbLookupMisses,     STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
    188                         "Translation block lookup misses",              "/IEM/CPU%u/re/cTbLookupMisses", idCpu);
    189 
    190         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatTbThreadedCalls, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS_PER_TB,
    191                         "Calls per threaded translation block",         "/IEM/CPU%u/re/ThrdCallsPerTb", idCpu);
    192         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatTbThreadedInstr, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_INSTR_PER_TB,
    193                         "Instruction per threaded translation block",   "/IEM/CPU%u/re/ThrdInstrPerTb", idCpu);
    194 
    195         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckIrqBreaks,  STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    196                         "TB breaks by CheckIrq",                        "/IEM/CPU%u/re/CheckIrqBreaks", idCpu);
    197         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckModeBreaks, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    198                         "TB breaks by CheckMode",                       "/IEM/CPU%u/re/CheckModeBreaks", idCpu);
    199         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckBranchMisses, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    200                         "Branch target misses",                         "/IEM/CPU%u/re/CheckTbJmpMisses", idCpu);
    201         STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckNeedCsLimChecking, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
    202                         "Needing CS.LIM checking TB after branch or on page crossing", "/IEM/CPU%u/re/CheckTbNeedCsLimChecking", idCpu);
    203 #endif
    204 
    205         for (uint32_t i = 0; i < RT_ELEMENTS(pVCpu->iem.s.aStatXcpts); i++)
    206             STAMR3RegisterF(pVM, &pVCpu->iem.s.aStatXcpts[i], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES,
    207                             "", "/IEM/CPU%u/Exceptions/%02x", idCpu, i);
    208         for (uint32_t i = 0; i < RT_ELEMENTS(pVCpu->iem.s.aStatInts); i++)
    209             STAMR3RegisterF(pVM, &pVCpu->iem.s.aStatInts[i], STAMTYPE_U32_RESET, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES,
    210                             "", "/IEM/CPU%u/Interrupts/%02x", idCpu, i);
    211 
    212 #if !defined(VBOX_VMM_TARGET_ARMV8) && defined(VBOX_WITH_STATISTICS) && !defined(DOXYGEN_RUNNING)
    213         /* Instruction statistics: */
    214 # define IEM_DO_INSTR_STAT(a_Name, a_szDesc) \
    215             STAMR3RegisterF(pVM, &pVCpu->iem.s.StatsRZ.a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \
    216                             STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-RZ/" #a_Name, idCpu); \
    217             STAMR3RegisterF(pVM, &pVCpu->iem.s.StatsR3.a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \
    218                             STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-R3/" #a_Name, idCpu);
    219 # include "IEMInstructionStatisticsTmpl.h"
    220 # undef IEM_DO_INSTR_STAT
    221 #endif
    222 
    223 #endif /* !defined(VBOX_VMM_TARGET_ARMV8) && defined(VBOX_WITH_NESTED_HWVIRT_VMX) - quick fix for stupid structure duplication non-sense */
    224150
    225151        /*
     
    285211        while (iMemMap-- > 0)
    286212            pVCpu->iem.s.aMemMappings[iMemMap].fAccess = IEM_ACCESS_INVALID;
     213    }
     214
     215
     216#ifdef VBOX_WITH_IEM_RECOMPILER
     217    /*
     218     * Initialize the TB allocator and cache (/ hash table).
     219     *
     220     * This is done by each EMT to try get more optimal thread/numa locality of
     221     * the allocations.
     222     */
     223    rc = VMR3ReqCallWait(pVM, VMCPUID_ALL, (PFNRT)iemTbInit, 3, pVM, cInitialTbs, cMaxTbs);
     224    AssertLogRelRCReturn(rc, rc);
     225#endif
     226
     227    /*
     228     * Register statistics.
     229     */
     230    for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
     231    {
     232#if !defined(VBOX_VMM_TARGET_ARMV8) && defined(VBOX_WITH_NESTED_HWVIRT_VMX) /* quick fix for stupid structure duplication non-sense */
     233        PVMCPU pVCpu = pVM->apCpusR3[idCpu];
     234
     235        STAMR3RegisterF(pVM, &pVCpu->iem.s.cInstructions,               STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     236                        "Instructions interpreted",                     "/IEM/CPU%u/cInstructions", idCpu);
     237        STAMR3RegisterF(pVM, &pVCpu->iem.s.cLongJumps,                  STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
     238                        "Number of longjmp calls",                      "/IEM/CPU%u/cLongJumps", idCpu);
     239        STAMR3RegisterF(pVM, &pVCpu->iem.s.cPotentialExits,             STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     240                        "Potential exits",                              "/IEM/CPU%u/cPotentialExits", idCpu);
     241        STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetAspectNotImplemented,    STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     242                        "VERR_IEM_ASPECT_NOT_IMPLEMENTED",              "/IEM/CPU%u/cRetAspectNotImplemented", idCpu);
     243        STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetInstrNotImplemented,     STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     244                        "VERR_IEM_INSTR_NOT_IMPLEMENTED",               "/IEM/CPU%u/cRetInstrNotImplemented", idCpu);
     245        STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetInfStatuses,             STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     246                        "Informational statuses returned",              "/IEM/CPU%u/cRetInfStatuses", idCpu);
     247        STAMR3RegisterF(pVM, &pVCpu->iem.s.cRetErrStatuses,             STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     248                        "Error statuses returned",                      "/IEM/CPU%u/cRetErrStatuses", idCpu);
     249        STAMR3RegisterF(pVM, &pVCpu->iem.s.cbWritten,                   STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
     250                        "Approx bytes written",                         "/IEM/CPU%u/cbWritten", idCpu);
     251        STAMR3RegisterF(pVM, &pVCpu->iem.s.cPendingCommit,              STAMTYPE_U32,       STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
     252                        "Times RC/R0 had to postpone instruction committing to ring-3", "/IEM/CPU%u/cPendingCommit", idCpu);
     253
     254# ifdef VBOX_WITH_STATISTICS
     255        STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbHits,            STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     256                        "Code TLB hits",                            "/IEM/CPU%u/CodeTlb-Hits", idCpu);
     257        STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbHits,            STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     258                        "Data TLB hits",                            "/IEM/CPU%u/DataTlb-Hits", idCpu);
     259# endif
     260        STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbMisses,          STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     261                        "Code TLB misses",                          "/IEM/CPU%u/CodeTlb-Misses", idCpu);
     262        STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.uTlbRevision,        STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
     263                        "Code TLB revision",                        "/IEM/CPU%u/CodeTlb-Revision", idCpu);
     264        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.CodeTlb.uTlbPhysRev, STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
     265                        "Code TLB physical revision",               "/IEM/CPU%u/CodeTlb-PhysRev", idCpu);
     266        STAMR3RegisterF(pVM, &pVCpu->iem.s.CodeTlb.cTlbSlowReadPath,    STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
     267                        "Code TLB slow read path",                  "/IEM/CPU%u/CodeTlb-SlowReads", idCpu);
     268
     269        STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbMisses,          STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     270                        "Data TLB misses",                          "/IEM/CPU%u/DataTlb-Misses", idCpu);
     271        STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbSafeReadPath,    STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     272                        "Data TLB safe read path",                  "/IEM/CPU%u/DataTlb-SafeReads", idCpu);
     273        STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.cTlbSafeWritePath,   STAMTYPE_U32_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     274                        "Data TLB safe write path",                 "/IEM/CPU%u/DataTlb-SafeWrites", idCpu);
     275        STAMR3RegisterF(pVM, &pVCpu->iem.s.DataTlb.uTlbRevision,        STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
     276                        "Data TLB revision",                        "/IEM/CPU%u/DataTlb-Revision", idCpu);
     277        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.DataTlb.uTlbPhysRev, STAMTYPE_X64,       STAMVISIBILITY_ALWAYS, STAMUNIT_NONE,
     278                        "Data TLB physical revision",               "/IEM/CPU%u/DataTlb-PhysRev", idCpu);
     279
     280#ifdef VBOX_WITH_IEM_RECOMPILER
     281        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.cTbExec,             STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     282                        "Executed translation block",                   "/IEM/CPU%u/re/cTbExec", idCpu);
     283        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatTbExecBreaks,    STAMTYPE_COUNTER,   STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES,
     284                        "Times TB execution was interrupted/broken off", "/IEM/CPU%u/re/cTbExecBreaks", idCpu);
     285
     286        PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
     287        STAMR3RegisterF(pVM, (void *)&pTbAllocator->StatAllocs,         STAMTYPE_COUNTER,   STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES,
     288                        "Translation block allocations",                "/IEM/CPU%u/re/cTbAllocs", idCpu);
     289        STAMR3RegisterF(pVM, (void *)&pTbAllocator->StatFrees,          STAMTYPE_COUNTER,   STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES,
     290                        "Translation block frees",                      "/IEM/CPU%u/re/cTbFrees", idCpu);
     291# ifdef VBOX_WITH_STATISTICS
     292        STAMR3RegisterF(pVM, (void *)&pTbAllocator->StatPrune,          STAMTYPE_PROFILE,   STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
     293                        "Time spent freeing up TBs when full at alloc", "/IEM/CPU%u/re/TbPruningAlloc", idCpu);
     294# endif
     295        STAMR3RegisterF(pVM, (void *)&pTbAllocator->cAllocatedChunks,   STAMTYPE_U16,   STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     296                        "Populated TB chunks",                          "/IEM/CPU%u/re/cTbChunks", idCpu);
     297        STAMR3RegisterF(pVM, (void *)&pTbAllocator->cMaxChunks,         STAMTYPE_U8,    STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     298                        "Max number of TB chunks",                      "/IEM/CPU%u/re/cTbChunksMax", idCpu);
     299        STAMR3RegisterF(pVM, (void *)&pTbAllocator->cTotalTbs,          STAMTYPE_U32,   STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     300                        "Total number of TBs in the allocator",         "/IEM/CPU%u/re/cTbTotal", idCpu);
     301        STAMR3RegisterF(pVM, (void *)&pTbAllocator->cMaxTbs,            STAMTYPE_U32,   STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     302                        "Max total number of TBs allowed",              "/IEM/CPU%u/re/cTbTotalMax", idCpu);
     303        STAMR3RegisterF(pVM, (void *)&pTbAllocator->cInUseTbs,          STAMTYPE_U32,   STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     304                        "Number of currently allocated TBs",            "/IEM/CPU%u/re/cTbAllocated", idCpu);
     305        STAMR3RegisterF(pVM, (void *)&pTbAllocator->cNativeTbs,         STAMTYPE_U32,   STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     306                        "Number of currently allocated native TBs",     "/IEM/CPU%u/re/cTbAllocatedNative", idCpu);
     307        STAMR3RegisterF(pVM, (void *)&pTbAllocator->cThreadedTbs,       STAMTYPE_U32,   STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     308                        "Number of currently allocated threaded TBs",   "/IEM/CPU%u/re/cTbAllocatedThreaded", idCpu);
     309
     310        PIEMTBCACHE     const pTbCache     = pVCpu->iem.s.pTbCacheR3;
     311        STAMR3RegisterF(pVM, (void *)&pTbCache->cHash,                  STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     312                        "Translation block lookup table size",          "/IEM/CPU%u/re/cTbHashTab", idCpu);
     313
     314        STAMR3RegisterF(pVM, (void *)&pTbCache->cLookupHits,            STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES,
     315                        "Translation block lookup hits",                "/IEM/CPU%u/re/cTbLookupHits", idCpu);
     316        STAMR3RegisterF(pVM, (void *)&pTbCache->cLookupMisses,          STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES,
     317                        "Translation block lookup misses",              "/IEM/CPU%u/re/cTbLookupMisses", idCpu);
     318        STAMR3RegisterF(pVM, (void *)&pTbCache->cCollisions,            STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_OCCURENCES,
     319                        "Translation block hash table collisions",      "/IEM/CPU%u/re/cTbCollisions", idCpu);
     320# ifdef VBOX_WITH_STATISTICS
     321        STAMR3RegisterF(pVM, (void *)&pTbCache->StatPrune,              STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
     322                        "Time spent shortening collision lists",        "/IEM/CPU%u/re/TbPruningCollisions", idCpu);
     323# endif
     324
     325        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatTbThreadedCalls, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_CALLS_PER_TB,
     326                        "Calls per threaded translation block",         "/IEM/CPU%u/re/ThrdCallsPerTb", idCpu);
     327        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatTbThreadedInstr, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_INSTR_PER_TB,
     328                        "Instruction per threaded translation block",   "/IEM/CPU%u/re/ThrdInstrPerTb", idCpu);
     329
     330        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckIrqBreaks,  STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     331                        "TB breaks by CheckIrq",                        "/IEM/CPU%u/re/CheckIrqBreaks", idCpu);
     332        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckModeBreaks, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     333                        "TB breaks by CheckMode",                       "/IEM/CPU%u/re/CheckModeBreaks", idCpu);
     334        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckBranchMisses, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     335                        "Branch target misses",                         "/IEM/CPU%u/re/CheckTbJmpMisses", idCpu);
     336        STAMR3RegisterF(pVM, (void *)&pVCpu->iem.s.StatCheckNeedCsLimChecking, STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
     337                        "Needing CS.LIM checking TB after branch or on page crossing", "/IEM/CPU%u/re/CheckTbNeedCsLimChecking", idCpu);
     338#endif
     339
     340        for (uint32_t i = 0; i < RT_ELEMENTS(pVCpu->iem.s.aStatXcpts); i++)
     341            STAMR3RegisterF(pVM, &pVCpu->iem.s.aStatXcpts[i], STAMTYPE_COUNTER, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES,
     342                            "", "/IEM/CPU%u/Exceptions/%02x", idCpu, i);
     343        for (uint32_t i = 0; i < RT_ELEMENTS(pVCpu->iem.s.aStatInts); i++)
     344            STAMR3RegisterF(pVM, &pVCpu->iem.s.aStatInts[i], STAMTYPE_U32_RESET, STAMVISIBILITY_USED, STAMUNIT_OCCURENCES,
     345                            "", "/IEM/CPU%u/Interrupts/%02x", idCpu, i);
     346
     347# if !defined(VBOX_VMM_TARGET_ARMV8) && defined(VBOX_WITH_STATISTICS) && !defined(DOXYGEN_RUNNING)
     348        /* Instruction statistics: */
     349#  define IEM_DO_INSTR_STAT(a_Name, a_szDesc) \
     350            STAMR3RegisterF(pVM, &pVCpu->iem.s.StatsRZ.a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \
     351                            STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-RZ/" #a_Name, idCpu); \
     352            STAMR3RegisterF(pVM, &pVCpu->iem.s.StatsR3.a_Name, STAMTYPE_U32_RESET, STAMVISIBILITY_USED, \
     353                            STAMUNIT_COUNT, a_szDesc, "/IEM/CPU%u/instr-R3/" #a_Name, idCpu);
     354#  include "IEMInstructionStatisticsTmpl.h"
     355#  undef IEM_DO_INSTR_STAT
     356# endif
     357
     358#endif /* !defined(VBOX_VMM_TARGET_ARMV8) && defined(VBOX_WITH_NESTED_HWVIRT_VMX) - quick fix for stupid structure duplication non-sense */
    287359    }
    288360
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r100966 r101088  
    712712 *       For the same reasons, we skip all of IEM_F_X86_CTX_MASK, with the
    713713 *       exception of SMM (which we don't implement). */
    714 #define IEMTB_F_KEY_MASK                ((UINT32_C(0xffffffff) & ~(IEM_F_X86_CTX_MASK | IEM_F_X86_CPL_MASK)) | IEM_F_X86_CTX_SMM)
     714#define IEMTB_F_KEY_MASK                (  (UINT32_MAX & ~(IEM_F_X86_CTX_MASK | IEM_F_X86_CPL_MASK | IEMTB_F_TYPE_MASK)) \
     715                                         | IEM_F_X86_CTX_SMM)
    715716/** @} */
    716717
     
    781782/**
    782783 * Translation block.
     784 *
     785 * The current plan is to just keep TBs and associated lookup hash table private
     786 * to each VCpu as that simplifies TB removal greatly (no races) and generally
     787 * avoids using expensive atomic primitives for updating lists and stuff.
    783788 */
    784789#pragma pack(2) /* to prevent the Thrd structure from being padded unnecessarily */
     
    786791{
    787792    /** Next block with the same hash table entry. */
    788     struct IEMTB * volatile pNext;
    789     /** List on the local VCPU for blocks. */
    790     RTLISTNODE          LocalList;
     793    struct IEMTB       *pNext;
     794    /** Usage counter. */
     795    uint32_t            cUsed;
     796    /** The IEMCPU::msRecompilerPollNow last time it was used. */
     797    uint32_t            msLastUsed;
     798    /** The allocation chunk this TB belongs to. */
     799    uint8_t             idxAllocChunk;
     800
     801    uint8_t             abUnused[3];
     802    uint32_t            uUnused;
     803
    791804
    792805    /** @name What uniquely identifies the block.
     
    822835            uint16_t            cAllocated;
    823836        } Thrd;
     837        struct
     838        {
     839            uint8_t            *pbCode;
     840            /** Amount of code that pbCode points to. */
     841            uint32_t            cbAllocated;
     842        } Native;
     843        /** Generic view for zeroing when freeing. */
     844        struct
     845        {
     846            uintptr_t           uPtr;
     847            uint32_t            uData;
     848        } Gen;
    824849    };
    825850
     
    872897AssertCompileMemberOffset(IEMTB, cbOpcodes, 52);
    873898AssertCompileMemberSize(IEMTB, aRanges[0], 6);
     899#if 1
    874900AssertCompileSize(IEMTB, 128);
     901# define IEMTB_SIZE_IS_POWER_OF_TWO /**< The IEMTB size is a power of two. */
     902#else
     903AssertCompileSize(IEMTB, 168);
     904# undef  IEMTB_SIZE_IS_POWER_OF_TWO
     905#endif
     906
    875907/** Pointer to a translation block. */
    876908typedef IEMTB *PIEMTB;
    877909/** Pointer to a const translation block. */
    878910typedef IEMTB const *PCIEMTB;
     911
     912/**
     913 * A chunk of memory in the TB allocator.
     914 */
     915typedef struct IEMTBCHUNK
     916{
     917    /** Pointer to the translation blocks in this chunk. */
     918    PIEMTB          paTbs;
     919#ifdef IN_RING0
     920    /** Allocation handle. */
     921    RTR0MEMOBJ      hMemObj;
     922#endif
     923} IEMTBCHUNK;
     924
     925/**
     926 * A per-CPU translation block allocator.
     927 *
     928 * Because of how the IEMTBCACHE uses the lower 6 bits of the TB address to keep
     929 * the length of the collision list, and of course also for cache line alignment
     930 * reasons, the TBs must be allocated with at least 64-byte alignment.
     931 * Memory is there therefore allocated using one of the page aligned allocators.
     932 *
     933 *
     934 * To avoid wasting too much memory, it is allocated piecemeal as needed,
     935 * in chunks (IEMTBCHUNK) of 2 MiB or more.  The TB has an 8-bit chunk index
     936 * that enables us to quickly calculate the allocation bitmap position when
     937 * freeing the translation block.
     938 */
     939typedef struct IEMTBALLOCATOR
     940{
     941    /** Magic value (IEMTBALLOCATOR_MAGIC). */
     942    uint32_t        uMagic;
     943
     944#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
     945    /** Mask corresponding to cTbsPerChunk - 1. */
     946    uint32_t        fChunkMask;
     947    /** Shift count corresponding to cTbsPerChunk. */
     948    uint8_t         cChunkShift;
     949#else
     950    uint32_t        uUnused;
     951    uint8_t         bUnused;
     952#endif
     953    /** Number of chunks we're allowed to allocate. */
     954    uint8_t         cMaxChunks;
     955    /** Number of chunks currently populated. */
     956    uint16_t        cAllocatedChunks;
     957    /** Number of translation blocks per chunk. */
     958    uint32_t        cTbsPerChunk;
     959    /** Chunk size. */
     960    uint32_t        cbPerChunk;
     961
     962    /** The maximum number of TBs. */
     963    uint32_t        cMaxTbs;
     964    /** Total number of TBs in the populated chunks.
     965     * (cAllocatedChunks * cTbsPerChunk) */
     966    uint32_t        cTotalTbs;
     967    /** The current number of TBs in use.
     968     * The number of free TBs: cAllocatedTbs - cInUseTbs; */
     969    uint32_t        cInUseTbs;
     970    /** Statistics: Number of the cInUseTbs that are native ones. */
     971    uint32_t        cNativeTbs;
     972    /** Statistics: Number of the cInUseTbs that are threaded ones. */
     973    uint32_t        cThreadedTbs;
     974
     975    /** Where to start pruning TBs from when we're out.
     976     *  See iemTbAllocatorAllocSlow for details. */
     977    uint32_t        iPruneFrom;
     978    /** Hint about which bit to start scanning the bitmap from. */
     979    uint32_t        iStartHint;
     980
     981    /** Statistics: Number of TB allocation calls. */
     982    STAMCOUNTER     StatAllocs;
     983    /** Statistics: Number of TB free calls. */
     984    STAMCOUNTER     StatFrees;
     985    /** Statistics: Time spend pruning. */
     986    STAMPROFILE     StatPrune;
     987
     988    /** Allocation chunks. */
     989    IEMTBCHUNK      aChunks[256];
     990
     991    /** Allocation bitmap for all possible chunk chunks. */
     992    RT_FLEXIBLE_ARRAY_EXTENSION
     993    uint64_t        bmAllocated[RT_FLEXIBLE_ARRAY];
     994} IEMTBALLOCATOR;
     995/** Pointer to a TB allocator. */
     996typedef struct IEMTBALLOCATOR *PIEMTBALLOCATOR;
     997
     998/** Magic value for the TB allocator (Emmet Harley Cohen). */
     999#define IEMTBALLOCATOR_MAGIC        UINT32_C(0x19900525)
     1000
     1001
     1002/**
     1003 * A per-CPU translation block cache (hash table).
     1004 *
     1005 * The hash table is allocated once during IEM initialization and size double
     1006 * the max TB count, rounded up to the nearest power of two (so we can use and
     1007 * AND mask rather than a rest division when hashing).
     1008 */
     1009typedef struct IEMTBCACHE
     1010{
     1011    /** Magic value (IEMTBCACHE_MAGIC). */
     1012    uint32_t        uMagic;
     1013    /** Size of the hash table.  This is a power of two. */
     1014    uint32_t        cHash;
     1015    /** The mask corresponding to cHash. */
     1016    uint32_t        uHashMask;
     1017    uint32_t        uPadding;
     1018
     1019    /** @name Statistics
     1020     * @{ */
     1021    /** Number of collisions ever. */
     1022    STAMCOUNTER     cCollisions;
     1023
     1024    /** Statistics: Number of TB lookup misses. */
     1025    STAMCOUNTER     cLookupMisses;
     1026    /** Statistics: Number of TB lookup hits (debug only). */
     1027    STAMCOUNTER     cLookupHits;
     1028    STAMCOUNTER     auPadding2[3];
     1029    /** Statistics: Collision list length pruning. */
     1030    STAMPROFILE     StatPrune;
     1031    /** @} */
     1032
     1033    /** The hash table itself.
     1034     * @note The lower 6 bits of the pointer is used for keeping the collision
     1035     *       list length, so we can take action when it grows too long.
     1036     *       This works because TBs are allocated using a 64 byte (or
     1037     *       higher) alignment from page aligned chunks of memory, so the lower
     1038     *       6 bits of the address will always be zero.
     1039     *       See IEMTBCACHE_PTR_COUNT_MASK, IEMTBCACHE_PTR_MAKE and friends.
     1040     */
     1041    RT_FLEXIBLE_ARRAY_EXTENSION
     1042    PIEMTB          apHash[RT_FLEXIBLE_ARRAY];
     1043} IEMTBCACHE;
     1044/** Pointer to a per-CPU translation block cahce. */
     1045typedef IEMTBCACHE *PIEMTBCACHE;
     1046
     1047/** Magic value for IEMTBCACHE (Johnny O'Neal). */
     1048#define IEMTBCACHE_MAGIC            UINT32_C(0x19561010)
     1049
     1050/** The collision count mask for IEMTBCACHE::apHash entries. */
     1051#define IEMTBCACHE_PTR_COUNT_MASK               ((uintptr_t)0x3f)
     1052/** The max collision count for IEMTBCACHE::apHash entries before pruning. */
     1053#define IEMTBCACHE_PTR_MAX_COUNT                ((uintptr_t)0x30)
     1054/** Combine a TB pointer and a collision list length into a value for an
     1055 *  IEMTBCACHE::apHash entry. */
     1056#define IEMTBCACHE_PTR_MAKE(a_pTb, a_cCount)    (PIEMTB)((uintptr_t)(a_pTb) | (a_cCount))
     1057/** Combine a TB pointer and a collision list length into a value for an
     1058 *  IEMTBCACHE::apHash entry. */
     1059#define IEMTBCACHE_PTR_GET_TB(a_pHashEntry)     (PIEMTB)((uintptr_t)(a_pHashEntry) & ~IEMTBCACHE_PTR_COUNT_MASK)
     1060/** Combine a TB pointer and a collision list length into a value for an
     1061 *  IEMTBCACHE::apHash entry. */
     1062#define IEMTBCACHE_PTR_GET_COUNT(a_pHashEntry)  ((uintptr_t)(a_pHashEntry) & IEMTBCACHE_PTR_COUNT_MASK)
     1063
     1064/**
     1065 * Calculates the hash table slot for a TB from physical PC address and TB flags.
     1066 */
     1067#define IEMTBCACHE_HASH(a_paCache, a_fTbFlags, a_GCPhysPc) \
     1068    IEMTBCACHE_HASH_NO_KEY_MASK(a_paCache, (a_fTbFlags) & IEMTB_F_KEY_MASK, a_GCPhysPc)
     1069
     1070/**
     1071 * Calculates the hash table slot for a TB from physical PC address and TB
     1072 * flags, ASSUMING the caller has applied IEMTB_F_KEY_MASK to @a a_fTbFlags.
     1073 */
     1074#define IEMTBCACHE_HASH_NO_KEY_MASK(a_paCache, a_fTbFlags, a_GCPhysPc) \
     1075    (((uint32_t)(a_GCPhysPc) ^ (a_fTbFlags)) & (a_paCache)->uHashMask)
     1076
    8791077
    8801078/** @name IEMBRANCHED_F_XXX - Branched indicator (IEMCPU::fTbBranched).
     
    11851383     * components as needed. */
    11861384    R3PTRTYPE(PIEMTB)       pNativeCompileTbR3;
     1385    /** Pointer to the ring-3 TB cache for this EMT. */
     1386    R3PTRTYPE(PIEMTBCACHE)  pTbCacheR3;
    11871387    /** The PC (RIP) at the start of pCurTbR3/pCurTbR0.
    11881388     * The TBs are based on physical addresses, so this is needed to correleated
    11891389     * RIP to opcode bytes stored in the TB (AMD-V / VT-x). */
    11901390    uint64_t                uCurTbStartPc;
    1191     /** Statistics: Number of TB lookup misses. */
    1192     uint64_t                cTbLookupMisses;
    1193     /** Statistics: Number of TB lookup hits (debug only). */
    1194     uint64_t                cTbLookupHits;
    11951391    /** Number of TBs executed. */
    11961392    uint64_t                cTbExec;
     
    12151411    bool                    fTbCurInstrIsSti;
    12161412    /** Spaced reserved for recompiler data / alignment. */
    1217     bool                    afRecompilerStuff1[2];
     1413    bool                    afRecompilerStuff1[2+4];
     1414    /** The virtual sync time at the last timer poll call. */
     1415    uint32_t                msRecompilerPollNow;
    12181416    /** Previous GCPhysInstrBuf value - only valid if fTbCrossedPage is set.   */
    12191417    RTGCPHYS                GCPhysInstrBufPrev;
     
    12251423    /** Copy of IEMCPU::uInstrBufPc after decoding a branch instruction.  */
    12261424    uint64_t                GCVirtTbBranchSrcBuf;
     1425    /** Pointer to the ring-3 TB allocator for this EMT. */
     1426    R3PTRTYPE(PIEMTBALLOCATOR) pTbAllocatorR3;
    12271427    /* Alignment. */
    1228     uint64_t                auAlignment10[6];
    1229     /** Statistics: Number of TB allocation calls. */
    1230     uint64_t                cTbAllocs;
    1231     /** Statistics: Number of TB free calls. */
    1232     uint64_t                cTbFrees;
     1428    uint64_t                auAlignment10[7];
    12331429    /** Statistics: Times TB execution was broken off before reaching the end. */
    12341430    STAMCOUNTER             StatTbExecBreaks;
     
    51055301extern const PFNIEMOP g_apfnIemThreadedRecompilerVecMap3[1024];
    51065302
     5303DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs);
    51075304void            iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb);
    51085305
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette