Changeset 106313 in vbox for trunk/src/VBox/VMM
- Timestamp:
- Oct 14, 2024 10:45:56 PM (7 weeks ago)
- Location:
- trunk/src/VBox/VMM/VMMAll
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp
r106310 r106313 294 294 /** Total amount of memory not being usable currently due to IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE. */ 295 295 uint64_t cbUnusable; 296 /** Allocation size distribution (in alloc units; 0 is the slop bucket). */ 297 STAMCOUNTER aStatSizes[16]; 296 298 #endif 297 299 … … 501 503 * 502 504 * The complicated code below is a bit faster on arm. Reducing the per TB cost 503 * from 4255ns to 4106ns (best run out of 10). On win/ x86 the gain isn't so504 * marked, despite more full bitmap scans.505 * from 4255ns to 4106ns (best run out of 10). On win/amd64 there isn't an 506 * obvious gain here, at least not with the data currently being profiled. 505 507 */ 506 508 #if 1 … … 639 641 } 640 642 643 /* 644 * If we get down here, we have a word that isn't UINT64_MAX. 645 */ 641 646 if (uWord != 0) 642 647 { 643 648 /* 644 * Fend of large request we cannot satisfy before first.649 * Fend of large request we cannot satisfy before the first set bit. 645 650 */ 646 651 if (!a_fBig || cReqUnits < 64 + cPrevLeadingZeros) … … 648 653 #ifdef __GNUC__ 649 654 unsigned cZerosInWord = __builtin_popcountl(~uWord); 655 #elif defined(_MSC_VER) && defined(RT_ARCH_AMD64) 656 unsigned cZerosInWord = __popcnt64(~uWord); 657 #elif defined(_MSC_VER) && defined(RT_ARCH_ARM64) 658 unsigned cZerosInWord = _CountOneBits64(~uWord); 650 659 #else 651 # ifdef RT_ARCH_AMD64 652 unsigned cZerosInWord = __popcnt64(~uWord); 653 # elif defined(RT_ARCH_ARM64) 654 unsigned cZerosInWord = _CountOneBits64(~uWord); 655 # else 656 # pragma message("need popcount intrinsic or something...") /** @todo port me: Win/ARM. */ 660 # pragma message("need popcount intrinsic or something...") 657 661 unsigned cZerosInWord = 0; 658 662 for (uint64_t uTmp = ~uWords; uTmp; cZerosInWord++) 659 663 uTmp &= uTmp - 1; /* Clears the least significant bit set. */ 660 # endif661 664 #endif 662 665 if (cZerosInWord + cPrevLeadingZeros >= cReqUnits) … … 737 740 if RT_CONSTEXPR_IF(!a_fBig) 738 741 return off * 64 - cPrevLeadingZeros; 739 else 742 else /* keep else */ 740 743 { 741 744 if (cPrevLeadingZeros + 64 >= cReqUnits) … … 890 893 RT_ALIGN_32(idxHint + cReqUnits, 64*4)), 891 894 cReqUnits, idxChunk, pTb, (void **)ppaExec, ppChunkCtx); 892 if (!pvRet) 893 pExecMemAllocator->cFruitlessChunkScans += 1; 894 return (PIEMNATIVEINSTR)pvRet; 895 } 896 897 898 DECL_FORCE_INLINE(PIEMNATIVEINSTR) 899 iemExecMemAllocatorAllocUnitsInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cReqUnits, PIEMTB pTb, 900 PIEMNATIVEINSTR *ppaExec, PCIEMNATIVEPERCHUNKCTX *ppChunkCtx) 901 { 902 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits) 903 return iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk, cReqUnits, pTb, ppaExec, ppChunkCtx); 895 if (pvRet) 896 return (PIEMNATIVEINSTR)pvRet; 897 898 pExecMemAllocator->cFruitlessChunkScans += 1; 904 899 return NULL; 905 900 } … … 910 905 PIEMNATIVEINSTR *ppaExec) 911 906 { 912 return iemExecMemAllocatorAllocUnitsInChunk(pExecMemAllocator, idxChunk, iemExecMemAllocBytesToUnits(cbReq), NULL /*pTb*/, 913 ppaExec, NULL /*ppChunkCtx*/); 907 uint32_t const cReqUnits = iemExecMemAllocBytesToUnits(cbReq); 908 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits) 909 return iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk, cReqUnits, NULL /*pTb*/, 910 ppaExec, NULL /*ppChunkCtx*/); 911 return NULL; 914 912 } 915 913 … … 938 936 939 937 uint32_t const cReqUnits = iemExecMemAllocBytesToUnits(cbReq); 938 STAM_COUNTER_INC(&pExecMemAllocator->aStatSizes[cReqUnits < RT_ELEMENTS(pExecMemAllocator->aStatSizes) ? cReqUnits : 0]); 940 939 for (unsigned iIteration = 0;; iIteration++) 941 940 { 942 if (cbReq <= pExecMemAllocator->cbFree) 941 if ( cbReq * 2 <= pExecMemAllocator->cbFree 942 || (cReqUnits == 1 || pExecMemAllocator->cbFree >= IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) ) 943 943 { 944 uint32_t const cChunks = pExecMemAllocator->cChunks; 945 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0; 946 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++) 944 uint32_t const cChunks = pExecMemAllocator->cChunks; 945 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0; 946 947 /* 948 * We do two passes here, the first pass we skip chunks with fewer than cReqUnits * 16, 949 * the 2nd pass we skip chunks. The second pass checks the one skipped in the first pass. 950 */ 951 for (uint32_t cMinFreePass = cReqUnits == 1 ? cReqUnits : cReqUnits * 16, cMaxFreePass = UINT32_MAX;;) 947 952 { 948 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunk(pExecMemAllocator, idxChunk, cReqUnits, pTb, 949 ppaExec, ppChunkCtx); 950 if (pRet) 951 { 952 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a); 953 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++) 954 if ( pExecMemAllocator->aChunks[idxChunk].cFreeUnits >= cMinFreePass 955 && pExecMemAllocator->aChunks[idxChunk].cFreeUnits <= cMaxFreePass) 956 { 957 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk, 958 cReqUnits, pTb, ppaExec, ppChunkCtx); 959 if (pRet) 960 { 961 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a); 953 962 #ifdef VBOX_WITH_STATISTICS 954 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq; 955 #endif 956 return pRet; 957 } 958 } 959 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++) 960 { 961 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunk(pExecMemAllocator, idxChunk, cReqUnits, pTb, 962 ppaExec, ppChunkCtx); 963 if (pRet) 964 { 965 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a); 963 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq; 964 #endif 965 return pRet; 966 } 967 } 968 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++) 969 if ( pExecMemAllocator->aChunks[idxChunk].cFreeUnits >= cMinFreePass 970 && pExecMemAllocator->aChunks[idxChunk].cFreeUnits <= cMaxFreePass) 971 { 972 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk, 973 cReqUnits, pTb, ppaExec, ppChunkCtx); 974 if (pRet) 975 { 976 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a); 966 977 #ifdef VBOX_WITH_STATISTICS 967 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq; 968 #endif 969 return pRet; 970 } 978 pExecMemAllocator->cbUnusable += (cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT) - cbReq; 979 #endif 980 return pRet; 981 } 982 } 983 if (cMinFreePass <= cReqUnits * 2) 984 break; 985 cMaxFreePass = cMinFreePass - 1; 986 cMinFreePass = cReqUnits * 2; 971 987 } 972 988 } … … 981 997 982 998 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1; 983 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunk (pExecMemAllocator, idxChunk, cReqUnits, pTb,984 ppaExec, ppChunkCtx);999 PIEMNATIVEINSTR const pRet = iemExecMemAllocatorAllocUnitsInChunkInner(pExecMemAllocator, idxChunk, cReqUnits, pTb, 1000 ppaExec, ppChunkCtx); 985 1001 if (pRet) 986 1002 { … … 2124 2140 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL, 2125 2141 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu); 2142 for (unsigned i = 1; i < RT_ELEMENTS(pExecMemAllocator->aStatSizes); i++) 2143 STAMR3RegisterFU(pUVM, &pExecMemAllocator->aStatSizes[i], STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, 2144 "Number of allocations of this number of allocation units", 2145 "/IEM/CPU%u/re/ExecMem/aSize%02u", pVCpu->idCpu, i); 2146 STAMR3RegisterFU(pUVM, &pExecMemAllocator->aStatSizes[0], STAMTYPE_COUNTER, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, 2147 "Number of allocations 16 units or larger", "/IEM/CPU%u/re/ExecMem/aSize16OrLarger", pVCpu->idCpu); 2126 2148 #endif 2127 2149 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING -
trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp
r106296 r106313 3191 3191 pTb = pTb->pNext) 3192 3192 { 3193 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb Head);3193 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb); 3194 3194 if (!pTbCopy) 3195 3195 break;
Note:
See TracChangeset
for help on using the changeset viewer.