Changeset 106326 in vbox
- Timestamp:
- Oct 15, 2024 1:29:25 PM (6 weeks ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp
r106313 r106326 323 323 typedef struct IEMEXECMEMALLOCHDR 324 324 { 325 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */ 326 uint32_t uMagic; 327 /** The allocation chunk (for speeding up freeing). */ 328 uint32_t idxChunk; 325 union 326 { 327 struct 328 { 329 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */ 330 uint32_t uMagic; 331 /** The allocation chunk (for speeding up freeing). */ 332 uint32_t idxChunk; 333 }; 334 /** Combined magic and chunk index, for the pruning scanner code. */ 335 uint64_t u64MagicAndChunkIdx; 336 }; 329 337 /** Pointer to the translation block the allocation belongs to. 330 338 * This is the whole point of the header. */ … … 408 416 /* 409 417 * Do the pruning. The current approach is the sever kind. 410 */ 411 uint64_t cbPruned = 0; 412 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx; 418 * 419 * This is memory bound, as we must load both the allocation header and the 420 * associated TB and then modify them. So, the CPU isn't all that unitilized 421 * here. Try apply some prefetching to speed it up a tiny bit. 422 */ 423 uint64_t cbPruned = 0; 424 uint64_t const u64MagicAndChunkIdx = RT_MAKE_U64(IEMEXECMEMALLOCHDR_MAGIC, idxChunk); 425 uint8_t * const pbChunk = (uint8_t *)pExecMemAllocator->aChunks[idxChunk].pvChunkRx; 413 426 while (offChunk < offPruneEnd) 414 427 { 415 428 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)&pbChunk[offChunk]; 416 429 417 /* Is this the start of an allocation block for TB? (We typically have418 one allocation at the start of each chunk for the unwind info where419 pTb is NULL.) */420 if ( pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC421 && pHdr->pTb != NULL422 && pHdr->idxChunk == idxChunk)430 /* Is this the start of an allocation block for a TB? (We typically 431 have one allocation at the start of each chunk for the unwind info 432 where pTb is NULL.) */ 433 PIEMTB pTb; 434 if ( pHdr->u64MagicAndChunkIdx == u64MagicAndChunkIdx 435 && RT_LIKELY((pTb = pHdr->pTb) != NULL)) 423 436 { 424 PIEMTB const pTb = pHdr->pTb;425 437 AssertPtr(pTb); 426 438 427 439 uint32_t const cbBlock = RT_ALIGN_32(pTb->Native.cInstructions * sizeof(IEMNATIVEINSTR) + sizeof(*pHdr), 428 440 IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); 429 AssertBreakStmt(offChunk + cbBlock <= cbChunk, offChunk += IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); /* paranoia */ 441 442 /* Prefetch the next header before freeing the current one and its TB. */ 443 /** @todo Iff the block size was part of the header in some way, this could be 444 * a tiny bit faster. */ 445 offChunk += cbBlock; 446 #if defined(_MSC_VER) && defined(RT_ARCH_AMD64) 447 _mm_prefetch((char *)&pbChunk[offChunk], _MM_HINT_T0); 448 #elif defined(_MSC_VER) && defined(RT_ARCH_ARM64) 449 __prefetch(&pbChunk[offChunk]); 450 #else 451 __builtin_prefetch(&pbChunk[offChunk], 1 /*rw*/); 452 #endif 453 /* Some paranoia first, though. */ 454 AssertBreakStmt(offChunk <= cbChunk, offChunk -= cbBlock - IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); 455 cbPruned += cbBlock; 430 456 431 457 iemTbAllocatorFree(pVCpu, pTb); 432 433 cbPruned += cbBlock;434 offChunk += cbBlock;435 458 } 436 459 else
Note:
See TracChangeset
for help on using the changeset viewer.