VirtualBox

Changeset 101249 in vbox for trunk/src/VBox/VMM


Ignore:
Timestamp:
Sep 25, 2023 12:42:13 AM (18 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
159232
Message:

VMM/IEM: Wrote a very simple sub-allocator for the executable memory chunks. bugref:10370

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r101248 r101249  
    107107*   Executable Memory Allocator                                                                                                  *
    108108*********************************************************************************************************************************/
     109/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     110 * Use an alternative chunk sub-allocator that does store internal data
     111 * in the chunk.
     112 *
     113 * Using the RTHeapSimple is not practial on newer darwin systems where
     114 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
     115 * memory.  We would have to change the protection of the whole chunk for
     116 * every call to RTHeapSimple, which would be rather expensive.
     117 *
     118 * This alternative implemenation let restrict page protection modifications
     119 * to the pages backing the executable memory we just allocated.
     120 */
     121#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     122/** The chunk sub-allocation unit size in bytes. */
     123#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE      128
     124/** The chunk sub-allocation unit size as a shift factor. */
     125#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT     7
    109126
    110127#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
     
    134151typedef struct IEMEXECMEMCHUNK
    135152{
     153#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     154    /** Number of free items in this chunk. */
     155    uint32_t                cFreeUnits;
     156    /** Hint were to start searching for free space in the allocation bitmap. */
     157    uint32_t                idxFreeHint;
     158#else
    136159    /** The heap handle. */
    137160    RTHEAPSIMPLE            hHeap;
     161#endif
    138162    /** Pointer to the chunk. */
    139163    void                   *pvChunk;
     
    187211    uint64_t                cbAllocated;
    188212
     213#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     214    /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
     215     *
     216     * Since the chunk size is a power of two and the minimum chunk size is a lot
     217     * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
     218     * require a whole number of uint64_t elements in the allocation bitmap.  So,
     219     * for sake of simplicity, they are allocated as one continous chunk for
     220     * simplicity/laziness. */
     221    uint64_t               *pbmAlloc;
     222    /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
     223    uint32_t                cUnitsPerChunk;
     224    /** Number of bitmap elements per chunk (for quickly locating the bitmap
     225     * portion corresponding to an chunk). */
     226    uint32_t                cBitmapElementsPerChunk;
     227#else
    189228    /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
    190229     * @{ */
     
    199238    void                   *pvAlignTweak;
    200239    /** @} */
     240#endif
     241
     242#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
     243    /** Pointer to the array of unwind info running parallel to aChunks (same
     244     * allocation as this structure, located after the bitmaps).
     245     * (For Windows, the structures must reside in 32-bit RVA distance to the
     246     * actual chunk, so they are allocated off the chunk.) */
     247    PIEMEXECMEMCHUNKEHFRAME paEhFrames;
     248#endif
    201249
    202250    /** The allocation chunks. */
     
    211259
    212260
     261static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator);
     262
     263
     264/**
     265 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
     266 * the heap statistics.
     267 */
     268static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
     269                                               uint32_t cbReq, uint32_t idxChunk)
     270{
     271    pExecMemAllocator->cAllocations += 1;
     272    pExecMemAllocator->cbAllocated  += cbReq;
     273#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     274    pExecMemAllocator->cbFree       -= cbReq;
     275#else
     276    pExecMemAllocator->cbFree       -= RT_ALIGN_32(cbReq, 64);
     277#endif
     278    pExecMemAllocator->idxChunkHint  = idxChunk;
     279
     280#ifdef RT_OS_DARWIN
     281    /*
     282     * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
     283     * on darwin.  So, we mark the pages returned as read+write after alloc and
     284     * expect the caller to call iemExecMemAllocatorReadyForUse when done
     285     * writing to the allocation.
     286     */
     287    /** @todo detect if this is necessary... it wasn't required on 10.15 or
     288     *        whatever older version it was. */
     289    int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
     290    AssertRC(rc);
     291#endif
     292
     293    return pvRet;
     294}
     295
     296
     297#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     298static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
     299                                                uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
     300{
     301    /*
     302     * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
     303     */
     304    Assert(!(cToScan & 63));
     305    Assert(!(idxFirst & 63));
     306    Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
     307    pbmAlloc += idxFirst / 64;
     308
     309    /*
     310     * Scan the bitmap for cReqUnits of consequtive clear bits
     311     */
     312    /** @todo This can probably be done more efficiently for non-x86 systems. */
     313    int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
     314    while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
     315    {
     316        uint32_t idxAddBit = 1;
     317        while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
     318            idxAddBit++;
     319        if (idxAddBit >= cReqUnits)
     320        {
     321            ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
     322
     323            PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
     324            pChunk->cFreeUnits -= cReqUnits;
     325            pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
     326
     327            void * const pvRet  = (uint8_t *)pChunk->pvChunk
     328                                + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
     329
     330            return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
     331                                                    cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
     332        }
     333
     334        iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
     335    }
     336    return NULL;
     337}
     338#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
     339
     340
     341static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
     342{
     343#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     344    /*
     345     * Figure out how much to allocate.
     346     */
     347    uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
     348    if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
     349    {
     350        uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
     351        uint32_t const   idxHint  = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
     352        if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
     353        {
     354            void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
     355                                                             pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
     356            if (pvRet)
     357                return pvRet;
     358        }
     359        return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
     360                                                  RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
     361                                                  cReqUnits, idxChunk);
     362    }
     363#else
     364    void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
     365    if (pvRet)
     366        return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
     367#endif
     368    return NULL;
     369
     370}
     371
     372
     373/**
     374 * Allocates @a cbReq bytes of executable memory.
     375 *
     376 * @returns Pointer to the memory, NULL if out of memory or other problem
     377 *          encountered.
     378 * @param   pVCpu   The cross context virtual CPU structure of the calling
     379 *                  thread.
     380 * @param   cbReq   How many bytes are required.
     381 */
     382static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
     383{
     384    PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
     385    AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
     386    AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
     387
     388    /*
     389     * Adjust the request size so it'll fit the allocator alignment/whatnot.
     390     *
     391     * For the RTHeapSimple allocator this means to follow the logic described
     392     * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
     393     * existing chunks if we think we've got sufficient free memory around.
     394     *
     395     * While for the alternative one we just align it up to a whole unit size.
     396     */
     397#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     398    cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
     399#else
     400    cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
     401#endif
     402    if (cbReq <= pExecMemAllocator->cbFree)
     403    {
     404        uint32_t const cChunks      = pExecMemAllocator->cChunks;
     405        uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
     406        for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
     407        {
     408            void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
     409            if (pvRet)
     410                return pvRet;
     411        }
     412        for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
     413        {
     414            void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
     415            if (pvRet)
     416                return pvRet;
     417        }
     418    }
     419
     420    /*
     421     * Can we grow it with another chunk?
     422     */
     423    if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
     424    {
     425        int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
     426        AssertLogRelRCReturn(rc, NULL);
     427
     428        uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
     429        void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
     430        if (pvRet)
     431            return pvRet;
     432        AssertFailed();
     433    }
     434
     435    /* What now? Prune native translation blocks from the cache? */
     436    AssertFailed();
     437    return NULL;
     438}
     439
     440
     441/** This is a hook that we may need later for changing memory protection back
     442 *  to readonly+exec */
     443static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
     444{
     445#ifdef RT_OS_DARWIN
     446    /* See iemExecMemAllocatorAllocTailCode for the explanation. */
     447# if 0 /** @todo getting weird EXC_BAD_INSTRUCTION exceptions, trying to figure out / work around why... */
     448    int rc2 = RTMemProtect(pv, cb, RTMEM_PROT_NONE);
     449    AssertRC(rc2); RT_NOREF(pVCpu);
     450# endif
     451    int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
     452    AssertRC(rc); RT_NOREF(pVCpu);
     453# if 0 /** @todo getting weird EXC_BAD_INSTRUCTION exceptions, trying to figure out / work around why... */
     454    ASMProbeReadBuffer(pv, cb);
     455#  ifdef RT_ARCH_ARM64
     456    __asm__ __volatile__("dmb sy\n\t"
     457                         "dsb sy\n\t"
     458                         "isb\n\t"
     459                         ::: "memory");
     460#  endif
     461# endif
     462#else
     463    RT_NOREF(pVCpu, pv, cb);
     464#endif
     465}
     466
     467
     468/**
     469 * Frees executable memory.
     470 */
     471void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
     472{
     473    PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
     474    Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
     475    Assert(pv);
     476#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     477    Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
     478#else
     479    Assert(!((uintptr_t)pv & 63));
     480#endif
     481
     482    /* Align the size as we did when allocating the block. */
     483#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     484    cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
     485#else
     486    cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
     487#endif
     488
     489    /* Free it / assert sanity. */
     490#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
     491    uint32_t const cChunks = pExecMemAllocator->cChunks;
     492    uint32_t const cbChunk = pExecMemAllocator->cbChunk;
     493    bool           fFound  = false;
     494    for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
     495    {
     496        uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
     497        fFound = offChunk < cbChunk;
     498        if (fFound)
     499        {
     500#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     501            uint32_t const idxFirst  = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
     502            uint32_t const cReqUnits = cb       >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
     503
     504            /* Check that it's valid and free it. */
     505            uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
     506            AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
     507            for (uint32_t i = 1; i < cReqUnits; i++)
     508                AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
     509            ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
     510
     511            pExecMemAllocator->aChunks[idxChunk].cFreeUnits  += cReqUnits;
     512            pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = idxFirst;
     513
     514            /* Update the stats. */
     515            pExecMemAllocator->cbAllocated  -= cb;
     516            pExecMemAllocator->cbFree       += cb;
     517            pExecMemAllocator->cAllocations -= 1;
     518            return;
     519#else
     520            Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
     521            break;
     522#endif
     523        }
     524    }
     525# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     526    AssertFailed();
     527# else
     528    Assert(fFound);
     529# endif
     530#endif
     531
     532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     533    /* Update stats while cb is freshly calculated.*/
     534    pExecMemAllocator->cbAllocated  -= cb;
     535    pExecMemAllocator->cbFree       += RT_ALIGN_Z(cb, 64);
     536    pExecMemAllocator->cAllocations -= 1;
     537
     538    /* Free it. */
     539    RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
     540#endif
     541}
     542
     543
     544
    213545#ifdef IN_RING3
    214546# ifdef RT_OS_WINDOWS
     
    217549 * Initializes the unwind info structures for windows hosts.
    218550 */
    219 static void *
    220 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, RTHEAPSIMPLE hHeap, void *pvChunk)
     551static int
     552iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
    221553{
    222554    /*
     
    272604    unsigned const cbUnwindInfo     = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
    273605    unsigned const cbNeeded         = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
     606#  ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     607    unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
     608    PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
     609        = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
     610#  else
    274611    unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    275612                                    - pExecMemAllocator->cbHeapBlockHdr;
    276 
    277613    PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
    278614                                                                                                       32 /*cbAlignment*/);
    279     AssertReturn(paFunctions, NULL);
     615#  endif
     616    AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
     617    pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
    280618
    281619    /*
     
    391729 * Initializes the unwind info section for non-windows hosts.
    392730 */
    393 static PIEMEXECMEMCHUNKEHFRAME
    394 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk)
    395 {
    396     /*
    397      * Allocate the structure for the eh_frame data and associate registration stuff.
    398      */
    399     PIEMEXECMEMCHUNKEHFRAME pEhFrame = (PIEMEXECMEMCHUNKEHFRAME)RTMemAllocZ(sizeof(IEMEXECMEMCHUNKEHFRAME));
    400     AssertReturn(pEhFrame, NULL);
     731static int
     732iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk)
     733{
     734    PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
     735    pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
    401736
    402737    RTPTRUNION Ptr = { pEhFrame->abEhFrame };
     
    481816    __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
    482817#  endif
    483     return pEhFrame;
     818
     819    return VINF_SUCCESS;
    484820}
    485821
     
    508844    AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
    509845
     846#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     847    int rc = VINF_SUCCESS;
     848#else
    510849    /* Initialize the heap for the chunk. */
    511850    RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
     
    559898        }
    560899        if (RT_SUCCESS(rc))
     900#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    561901        {
     902            /*
     903             * Add the chunk.
     904             *
     905             * This must be done before the unwind init so windows can allocate
     906             * memory from the chunk when using the alternative sub-allocator.
     907             */
     908            pExecMemAllocator->aChunks[idxChunk].pvChunk      = pvChunk;
    562909#ifdef IN_RING3
    563 # ifdef RT_OS_WINDOWS
    564             void *pvUnwindInfo = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, hHeap, pvChunk);
    565             AssertStmt(pvUnwindInfo, rc = VERR_INTERNAL_ERROR_3);
    566 # else
    567             void *pvUnwindInfo = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk);
    568             AssertStmt(pvUnwindInfo, rc = VERR_NO_MEMORY);
    569 # endif
     910            pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
     911#endif
     912#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     913            pExecMemAllocator->aChunks[idxChunk].hHeap        = hHeap;
     914#else
     915            pExecMemAllocator->aChunks[idxChunk].cFreeUnits   = pExecMemAllocator->cUnitsPerChunk;
     916            pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = 0;
     917            memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
     918                   0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
     919#endif
     920
     921            pExecMemAllocator->cChunks      = idxChunk + 1;
     922            pExecMemAllocator->idxChunkHint = idxChunk;
     923
     924#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     925            pExecMemAllocator->cbTotal     += pExecMemAllocator->cbChunk;
     926            pExecMemAllocator->cbFree      += pExecMemAllocator->cbChunk;
     927#else
     928            size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
     929            pExecMemAllocator->cbTotal     += cbFree;
     930            pExecMemAllocator->cbFree      += cbFree;
     931#endif
     932
     933#ifdef IN_RING3
     934            /*
     935             * Initialize the unwind information (this cannot really fail atm).
     936             * (This sets pvUnwindInfo.)
     937             */
     938            rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk, idxChunk);
    570939            if (RT_SUCCESS(rc))
    571940#endif
    572941            {
    573                 /*
    574                  * Finalize the adding of the chunk.
    575                  */
    576                 pExecMemAllocator->aChunks[idxChunk].pvChunk      = pvChunk;
    577                 pExecMemAllocator->aChunks[idxChunk].hHeap        = hHeap;
    578 #ifdef IN_RING3
    579                 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pvUnwindInfo;
    580 #endif
    581 
    582                 pExecMemAllocator->cChunks      = idxChunk + 1;
    583                 pExecMemAllocator->idxChunkHint = idxChunk;
    584 
    585                 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
    586                 pExecMemAllocator->cbTotal     += cbFree;
    587                 pExecMemAllocator->cbFree      += cbFree;
    588 
    589942                return VINF_SUCCESS;
    590943            }
     944
     945#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     946            /* Just in case the impossible happens, undo the above up: */
     947            pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
     948            pExecMemAllocator->cbFree  -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
     949            pExecMemAllocator->cChunks  = idxChunk;
     950            memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
     951                   0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
     952            pExecMemAllocator->aChunks[idxChunk].pvChunk    = NULL;
     953            pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
     954#endif
    591955        }
    592     }
     956#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     957    }
     958#endif
    593959    RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
    594960    return rc;
     
    6521018     * Allocate and initialize the allocatore instance.
    6531019     */
    654     PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR,
    655                                                                                                 aChunks[cMaxChunks]));
    656     AssertReturn(pExecMemAllocator, VERR_NO_MEMORY);
     1020    size_t       cbNeeded   = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
     1021#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     1022    size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
     1023    size_t const cbBitmap   = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
     1024    cbNeeded += cbBitmap * cMaxChunks;
     1025    AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
     1026    Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
     1027#endif
     1028#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
     1029    size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
     1030    cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
     1031#endif
     1032    PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
     1033    AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
     1034                          VERR_NO_MEMORY);
    6571035    pExecMemAllocator->uMagic       = IEMEXECMEMALLOCATOR_MAGIC;
    6581036    pExecMemAllocator->cbChunk      = cbChunk;
     
    6641042    pExecMemAllocator->cbFree       = 0;
    6651043    pExecMemAllocator->cbAllocated  = 0;
     1044#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     1045    pExecMemAllocator->pbmAlloc                 = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
     1046    pExecMemAllocator->cUnitsPerChunk           = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
     1047    pExecMemAllocator->cBitmapElementsPerChunk  = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
     1048    memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
     1049#endif
     1050#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
     1051    pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
     1052#endif
    6661053    for (uint32_t i = 0; i < cMaxChunks; i++)
    6671054    {
     1055#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     1056        pExecMemAllocator->aChunks[i].cFreeUnits   = 0;
     1057        pExecMemAllocator->aChunks[i].idxFreeHint  = 0;
     1058#else
    6681059        pExecMemAllocator->aChunks[i].hHeap        = NIL_RTHEAPSIMPLE;
     1060#endif
    6691061        pExecMemAllocator->aChunks[i].pvChunk      = NULL;
    6701062#ifdef IN_RING0
     
    6881080
    6891081    return VINF_SUCCESS;
    690 }
    691 
    692 /**
    693  * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
    694  * the heap statistics.
    695  */
    696 DECL_FORCE_INLINE(void *) iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
    697                                                            uint32_t cbReq, uint32_t idxChunk)
    698 {
    699     pExecMemAllocator->cAllocations += 1;
    700     pExecMemAllocator->cbAllocated  += cbReq;
    701     pExecMemAllocator->cbFree       -= RT_ALIGN_32(cbReq, 64);
    702     pExecMemAllocator->idxChunkHint  = idxChunk;
    703     return pvRet;
    704 }
    705 
    706 
    707 /**
    708  * Allocates @a cbReq bytes of executable memory.
    709  *
    710  * @returns Pointer to the memory, NULL if out of memory or other problem
    711  *          encountered.
    712  * @param   pVCpu   The cross context virtual CPU structure of the calling
    713  *                  thread.
    714  * @param   cbReq   How many bytes are required.
    715  */
    716 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
    717 {
    718     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    719     AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
    720     AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
    721 
    722     /*
    723      * Adjust the request size as per the logic described in
    724      * iemExecMemAllocatorGrow and attempt to allocate it from one of the
    725      * existing chunks if we think we've got sufficient free memory around.
    726      */
    727     cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    728     if (cbReq <= pExecMemAllocator->cbFree)
    729     {
    730         uint32_t const cChunks      = pExecMemAllocator->cChunks;
    731         uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
    732         for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
    733         {
    734             void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
    735             if (pvRet)
    736                 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
    737         }
    738         for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
    739         {
    740             void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
    741             if (pvRet)
    742                 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
    743         }
    744     }
    745 
    746     /*
    747      * Can we grow it with another chunk?
    748      */
    749     if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
    750     {
    751         int rc = iemExecMemAllocatorGrow(pExecMemAllocator);
    752         AssertLogRelRCReturn(rc, NULL);
    753 
    754         uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
    755         void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
    756         if (pvRet)
    757             return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
    758         AssertFailed();
    759     }
    760 
    761     /* What now? Prune native translation blocks from the cache? */
    762     AssertFailed();
    763     return NULL;
    764 }
    765 
    766 
    767 /** This is a hook that we may need later for changing memory protection back
    768  *  to readonly+exec */
    769 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
    770 {
    771 #ifdef RT_OS_DARWIN
    772     int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
    773     AssertRC(rc); RT_NOREF(pVCpu);
    774 #else
    775     RT_NOREF(pVCpu, pv, cb);
    776 #endif
    777 }
    778 
    779 
    780 /**
    781  * Frees executable memory.
    782  */
    783 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
    784 {
    785     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    786     Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
    787     Assert(pv);
    788 
    789     /* Align the size as we did when allocating the block. */
    790     cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    791 
    792     /* Assert sanity if strict build. */
    793 #ifdef VBOX_STRICT
    794     uint32_t const cChunks = pExecMemAllocator->cChunks;
    795     uint32_t const cbChunk = pExecMemAllocator->cbChunk;
    796     bool           fFound  = false;
    797     for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
    798     {
    799         fFound = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk < cbChunk;
    800         if (fFound)
    801         {
    802             Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
    803             break;
    804         }
    805     }
    806     Assert(fFound);
    807 #endif
    808 
    809     /* Update stats while cb is freshly calculated.*/
    810     pExecMemAllocator->cbAllocated  -= cb;
    811     pExecMemAllocator->cbFree       += RT_ALIGN_Z(cb, 64);
    812     pExecMemAllocator->cAllocations -= 1;
    813 
    814     /* Do the actual freeing. */
    815     RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
    8161082}
    8171083
     
    15731839        iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
    15741840
     1841#if 1 /** @todo getting weird EXC_BAD_INSTRUCTION exceptions, trying to figure out / work around why... */
    15751842    PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
    15761843    AssertReturn(paFinalInstrBuf, pTb);
    15771844    memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
     1845#else
     1846    IEMNATIVEINSTR volatile * const paFinalInstrBuf
     1847        = (IEMNATIVEINSTR volatile *)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
     1848    AssertReturn(paFinalInstrBuf, pTb);
     1849    for (uint32_t i = 0; i < off; i++)
     1850        paFinalInstrBuf[i] = pReNative->pInstrBuf[i];
     1851    __asm__ __volatile__("dmb sy\n\t" ::: "memory");
     1852#endif
    15781853
    15791854    /*
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette