VirtualBox

Changeset 106326 in vbox for trunk/src/VBox/VMM


Ignore:
Timestamp:
Oct 15, 2024 1:29:25 PM (3 months ago)
Author:
vboxsync
Message:

VMM/IEM: Two small perf tweaks in iemExecMemAllocatorPrune. bugref:10720

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp

    r106313 r106326  
    323323typedef struct IEMEXECMEMALLOCHDR
    324324{
    325     /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
    326     uint32_t        uMagic;
    327     /** The allocation chunk (for speeding up freeing). */
    328     uint32_t        idxChunk;
     325    union
     326    {
     327        struct
     328        {
     329            /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
     330            uint32_t        uMagic;
     331            /** The allocation chunk (for speeding up freeing). */
     332            uint32_t        idxChunk;
     333        };
     334        /** Combined magic and chunk index, for the pruning scanner code. */
     335        uint64_t u64MagicAndChunkIdx;
     336    };
    329337    /** Pointer to the translation block the allocation belongs to.
    330338     * This is the whole point of the header. */
     
    408416    /*
    409417     * Do the pruning.  The current approach is the sever kind.
    410      */
    411     uint64_t            cbPruned = 0;
    412     uint8_t * const     pbChunk  = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
     418     *
     419     * This is memory bound, as we must load both the allocation header and the
     420     * associated TB and then modify them. So, the CPU isn't all that unitilized
     421     * here.  Try apply some prefetching to speed it up a tiny bit.
     422     */
     423    uint64_t            cbPruned            = 0;
     424    uint64_t const      u64MagicAndChunkIdx = RT_MAKE_U64(IEMEXECMEMALLOCHDR_MAGIC, idxChunk);
     425    uint8_t * const     pbChunk             = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx;
    413426    while (offChunk < offPruneEnd)
    414427    {
    415428        PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk];
    416429
    417         /* Is this the start of an allocation block for TB? (We typically have
    418            one allocation at the start of each chunk for the unwind info where
    419            pTb is NULL.)  */
    420         if (   pHdr->uMagic   == IEMEXECMEMALLOCHDR_MAGIC
    421             && pHdr->pTb      != NULL
    422             && pHdr->idxChunk == idxChunk)
     430        /* Is this the start of an allocation block for a TB? (We typically
     431           have one allocation at the start of each chunk for the unwind info
     432           where pTb is NULL.)  */
     433        PIEMTB pTb;
     434        if (   pHdr->u64MagicAndChunkIdx == u64MagicAndChunkIdx
     435            && RT_LIKELY((pTb = pHdr->pTb) != NULL))
    423436        {
    424             PIEMTB const pTb = pHdr->pTb;
    425437            AssertPtr(pTb);
    426438
    427439            uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr),
    428440                                                 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    429             AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */
     441
     442            /* Prefetch the next header before freeing the current one and its TB. */
     443            /** @todo Iff the block size was part of the header in some way, this could be
     444             *        a tiny bit faster. */
     445            offChunk += cbBlock;
     446#if defined(_MSC_VER) && defined(RT_ARCH_AMD64)
     447            _mm_prefetch((char *)&pbChunk[offChunk], _MM_HINT_T0);
     448#elif defined(_MSC_VER) && defined(RT_ARCH_ARM64)
     449            __prefetch(&pbChunk[offChunk]);
     450#else
     451            __builtin_prefetch(&pbChunk[offChunk], 1 /*rw*/);
     452#endif
     453            /* Some paranoia first, though.  */
     454            AssertBreakStmt(offChunk <= cbChunk, offChunk -= cbBlock - IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
     455            cbPruned += cbBlock;
    430456
    431457            iemTbAllocatorFree(pVCpu, pTb);
    432 
    433             cbPruned += cbBlock;
    434             offChunk += cbBlock;
    435458        }
    436459        else
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette