Changeset 101249 in vbox for trunk/src/VBox/VMM
- Timestamp:
- Sep 25, 2023 12:42:13 AM (18 months ago)
- svn:sync-xref-src-repo-rev:
- 159232
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r101248 r101249 107 107 * Executable Memory Allocator * 108 108 *********************************************************************************************************************************/ 109 /** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 110 * Use an alternative chunk sub-allocator that does store internal data 111 * in the chunk. 112 * 113 * Using the RTHeapSimple is not practial on newer darwin systems where 114 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process 115 * memory. We would have to change the protection of the whole chunk for 116 * every call to RTHeapSimple, which would be rather expensive. 117 * 118 * This alternative implemenation let restrict page protection modifications 119 * to the pages backing the executable memory we just allocated. 120 */ 121 #define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 122 /** The chunk sub-allocation unit size in bytes. */ 123 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128 124 /** The chunk sub-allocation unit size as a shift factor. */ 125 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7 109 126 110 127 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS) … … 134 151 typedef struct IEMEXECMEMCHUNK 135 152 { 153 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 154 /** Number of free items in this chunk. */ 155 uint32_t cFreeUnits; 156 /** Hint were to start searching for free space in the allocation bitmap. */ 157 uint32_t idxFreeHint; 158 #else 136 159 /** The heap handle. */ 137 160 RTHEAPSIMPLE hHeap; 161 #endif 138 162 /** Pointer to the chunk. */ 139 163 void *pvChunk; … … 187 211 uint64_t cbAllocated; 188 212 213 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 214 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks). 215 * 216 * Since the chunk size is a power of two and the minimum chunk size is a lot 217 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always 218 * require a whole number of uint64_t elements in the allocation bitmap. So, 219 * for sake of simplicity, they are allocated as one continous chunk for 220 * simplicity/laziness. */ 221 uint64_t *pbmAlloc; 222 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */ 223 uint32_t cUnitsPerChunk; 224 /** Number of bitmap elements per chunk (for quickly locating the bitmap 225 * portion corresponding to an chunk). */ 226 uint32_t cBitmapElementsPerChunk; 227 #else 189 228 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation. 190 229 * @{ */ … … 199 238 void *pvAlignTweak; 200 239 /** @} */ 240 #endif 241 242 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS) 243 /** Pointer to the array of unwind info running parallel to aChunks (same 244 * allocation as this structure, located after the bitmaps). 245 * (For Windows, the structures must reside in 32-bit RVA distance to the 246 * actual chunk, so they are allocated off the chunk.) */ 247 PIEMEXECMEMCHUNKEHFRAME paEhFrames; 248 #endif 201 249 202 250 /** The allocation chunks. */ … … 211 259 212 260 261 static int iemExecMemAllocatorGrow(PIEMEXECMEMALLOCATOR pExecMemAllocator); 262 263 264 /** 265 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating 266 * the heap statistics. 267 */ 268 static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet, 269 uint32_t cbReq, uint32_t idxChunk) 270 { 271 pExecMemAllocator->cAllocations += 1; 272 pExecMemAllocator->cbAllocated += cbReq; 273 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 274 pExecMemAllocator->cbFree -= cbReq; 275 #else 276 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64); 277 #endif 278 pExecMemAllocator->idxChunkHint = idxChunk; 279 280 #ifdef RT_OS_DARWIN 281 /* 282 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive 283 * on darwin. So, we mark the pages returned as read+write after alloc and 284 * expect the caller to call iemExecMemAllocatorReadyForUse when done 285 * writing to the allocation. 286 */ 287 /** @todo detect if this is necessary... it wasn't required on 10.15 or 288 * whatever older version it was. */ 289 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ); 290 AssertRC(rc); 291 #endif 292 293 return pvRet; 294 } 295 296 297 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 298 static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst, 299 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk) 300 { 301 /* 302 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear. 303 */ 304 Assert(!(cToScan & 63)); 305 Assert(!(idxFirst & 63)); 306 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk); 307 pbmAlloc += idxFirst / 64; 308 309 /* 310 * Scan the bitmap for cReqUnits of consequtive clear bits 311 */ 312 /** @todo This can probably be done more efficiently for non-x86 systems. */ 313 int iBit = ASMBitFirstClear(pbmAlloc, cToScan); 314 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits) 315 { 316 uint32_t idxAddBit = 1; 317 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit)) 318 idxAddBit++; 319 if (idxAddBit >= cReqUnits) 320 { 321 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits); 322 323 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk]; 324 pChunk->cFreeUnits -= cReqUnits; 325 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits; 326 327 void * const pvRet = (uint8_t *)pChunk->pvChunk 328 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT); 329 330 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, 331 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk); 332 } 333 334 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1); 335 } 336 return NULL; 337 } 338 #endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */ 339 340 341 static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq) 342 { 343 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 344 /* 345 * Figure out how much to allocate. 346 */ 347 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT; 348 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits) 349 { 350 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk]; 351 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63; 352 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk) 353 { 354 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint, 355 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk); 356 if (pvRet) 357 return pvRet; 358 } 359 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0, 360 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)), 361 cReqUnits, idxChunk); 362 } 363 #else 364 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32); 365 if (pvRet) 366 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk); 367 #endif 368 return NULL; 369 370 } 371 372 373 /** 374 * Allocates @a cbReq bytes of executable memory. 375 * 376 * @returns Pointer to the memory, NULL if out of memory or other problem 377 * encountered. 378 * @param pVCpu The cross context virtual CPU structure of the calling 379 * thread. 380 * @param cbReq How many bytes are required. 381 */ 382 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq) 383 { 384 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3; 385 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL); 386 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL); 387 388 /* 389 * Adjust the request size so it'll fit the allocator alignment/whatnot. 390 * 391 * For the RTHeapSimple allocator this means to follow the logic described 392 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the 393 * existing chunks if we think we've got sufficient free memory around. 394 * 395 * While for the alternative one we just align it up to a whole unit size. 396 */ 397 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 398 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); 399 #else 400 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr; 401 #endif 402 if (cbReq <= pExecMemAllocator->cbFree) 403 { 404 uint32_t const cChunks = pExecMemAllocator->cChunks; 405 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0; 406 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++) 407 { 408 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq); 409 if (pvRet) 410 return pvRet; 411 } 412 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++) 413 { 414 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq); 415 if (pvRet) 416 return pvRet; 417 } 418 } 419 420 /* 421 * Can we grow it with another chunk? 422 */ 423 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks) 424 { 425 int rc = iemExecMemAllocatorGrow(pExecMemAllocator); 426 AssertLogRelRCReturn(rc, NULL); 427 428 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1; 429 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq); 430 if (pvRet) 431 return pvRet; 432 AssertFailed(); 433 } 434 435 /* What now? Prune native translation blocks from the cache? */ 436 AssertFailed(); 437 return NULL; 438 } 439 440 441 /** This is a hook that we may need later for changing memory protection back 442 * to readonly+exec */ 443 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb) 444 { 445 #ifdef RT_OS_DARWIN 446 /* See iemExecMemAllocatorAllocTailCode for the explanation. */ 447 # if 0 /** @todo getting weird EXC_BAD_INSTRUCTION exceptions, trying to figure out / work around why... */ 448 int rc2 = RTMemProtect(pv, cb, RTMEM_PROT_NONE); 449 AssertRC(rc2); RT_NOREF(pVCpu); 450 # endif 451 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ); 452 AssertRC(rc); RT_NOREF(pVCpu); 453 # if 0 /** @todo getting weird EXC_BAD_INSTRUCTION exceptions, trying to figure out / work around why... */ 454 ASMProbeReadBuffer(pv, cb); 455 # ifdef RT_ARCH_ARM64 456 __asm__ __volatile__("dmb sy\n\t" 457 "dsb sy\n\t" 458 "isb\n\t" 459 ::: "memory"); 460 # endif 461 # endif 462 #else 463 RT_NOREF(pVCpu, pv, cb); 464 #endif 465 } 466 467 468 /** 469 * Frees executable memory. 470 */ 471 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb) 472 { 473 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3; 474 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC); 475 Assert(pv); 476 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 477 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1))); 478 #else 479 Assert(!((uintptr_t)pv & 63)); 480 #endif 481 482 /* Align the size as we did when allocating the block. */ 483 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 484 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); 485 #else 486 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr; 487 #endif 488 489 /* Free it / assert sanity. */ 490 #if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR) 491 uint32_t const cChunks = pExecMemAllocator->cChunks; 492 uint32_t const cbChunk = pExecMemAllocator->cbChunk; 493 bool fFound = false; 494 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++) 495 { 496 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk; 497 fFound = offChunk < cbChunk; 498 if (fFound) 499 { 500 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 501 uint32_t const idxFirst = offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT; 502 uint32_t const cReqUnits = cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT; 503 504 /* Check that it's valid and free it. */ 505 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk]; 506 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst)); 507 for (uint32_t i = 1; i < cReqUnits; i++) 508 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i)); 509 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits); 510 511 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits; 512 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst; 513 514 /* Update the stats. */ 515 pExecMemAllocator->cbAllocated -= cb; 516 pExecMemAllocator->cbFree += cb; 517 pExecMemAllocator->cAllocations -= 1; 518 return; 519 #else 520 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb); 521 break; 522 #endif 523 } 524 } 525 # ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 526 AssertFailed(); 527 # else 528 Assert(fFound); 529 # endif 530 #endif 531 532 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 533 /* Update stats while cb is freshly calculated.*/ 534 pExecMemAllocator->cbAllocated -= cb; 535 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64); 536 pExecMemAllocator->cAllocations -= 1; 537 538 /* Free it. */ 539 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv); 540 #endif 541 } 542 543 544 213 545 #ifdef IN_RING3 214 546 # ifdef RT_OS_WINDOWS … … 217 549 * Initializes the unwind info structures for windows hosts. 218 550 */ 219 static void *220 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, RTHEAPSIMPLE hHeap, void *pvChunk)551 static int 552 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk) 221 553 { 222 554 /* … … 272 604 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes); 273 605 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo; 606 # ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 607 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE); 608 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions 609 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned); 610 # else 274 611 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64) 275 612 - pExecMemAllocator->cbHeapBlockHdr; 276 277 613 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 278 614 32 /*cbAlignment*/); 279 AssertReturn(paFunctions, NULL); 615 # endif 616 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5); 617 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions; 280 618 281 619 /* … … 391 729 * Initializes the unwind info section for non-windows hosts. 392 730 */ 393 static PIEMEXECMEMCHUNKEHFRAME 394 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk) 395 { 396 /* 397 * Allocate the structure for the eh_frame data and associate registration stuff. 398 */ 399 PIEMEXECMEMCHUNKEHFRAME pEhFrame = (PIEMEXECMEMCHUNKEHFRAME)RTMemAllocZ(sizeof(IEMEXECMEMCHUNKEHFRAME)); 400 AssertReturn(pEhFrame, NULL); 731 static int 732 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvChunk, uint32_t idxChunk) 733 { 734 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk]; 735 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */ 401 736 402 737 RTPTRUNION Ptr = { pEhFrame->abEhFrame }; … … 481 816 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject); 482 817 # endif 483 return pEhFrame; 818 819 return VINF_SUCCESS; 484 820 } 485 821 … … 508 844 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY); 509 845 846 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 847 int rc = VINF_SUCCESS; 848 #else 510 849 /* Initialize the heap for the chunk. */ 511 850 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE; … … 559 898 } 560 899 if (RT_SUCCESS(rc)) 900 #endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */ 561 901 { 902 /* 903 * Add the chunk. 904 * 905 * This must be done before the unwind init so windows can allocate 906 * memory from the chunk when using the alternative sub-allocator. 907 */ 908 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk; 562 909 #ifdef IN_RING3 563 # ifdef RT_OS_WINDOWS 564 void *pvUnwindInfo = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, hHeap, pvChunk); 565 AssertStmt(pvUnwindInfo, rc = VERR_INTERNAL_ERROR_3); 566 # else 567 void *pvUnwindInfo = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk); 568 AssertStmt(pvUnwindInfo, rc = VERR_NO_MEMORY); 569 # endif 910 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL; 911 #endif 912 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 913 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap; 914 #else 915 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk; 916 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0; 917 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk], 918 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk); 919 #endif 920 921 pExecMemAllocator->cChunks = idxChunk + 1; 922 pExecMemAllocator->idxChunkHint = idxChunk; 923 924 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 925 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk; 926 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk; 927 #else 928 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap); 929 pExecMemAllocator->cbTotal += cbFree; 930 pExecMemAllocator->cbFree += cbFree; 931 #endif 932 933 #ifdef IN_RING3 934 /* 935 * Initialize the unwind information (this cannot really fail atm). 936 * (This sets pvUnwindInfo.) 937 */ 938 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pExecMemAllocator, pvChunk, idxChunk); 570 939 if (RT_SUCCESS(rc)) 571 940 #endif 572 941 { 573 /*574 * Finalize the adding of the chunk.575 */576 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;577 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;578 #ifdef IN_RING3579 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pvUnwindInfo;580 #endif581 582 pExecMemAllocator->cChunks = idxChunk + 1;583 pExecMemAllocator->idxChunkHint = idxChunk;584 585 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);586 pExecMemAllocator->cbTotal += cbFree;587 pExecMemAllocator->cbFree += cbFree;588 589 942 return VINF_SUCCESS; 590 943 } 944 945 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 946 /* Just in case the impossible happens, undo the above up: */ 947 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk; 948 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT; 949 pExecMemAllocator->cChunks = idxChunk; 950 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk], 951 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk); 952 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL; 953 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0; 954 #endif 591 955 } 592 } 956 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 957 } 958 #endif 593 959 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk); 594 960 return rc; … … 652 1018 * Allocate and initialize the allocatore instance. 653 1019 */ 654 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, 655 aChunks[cMaxChunks])); 656 AssertReturn(pExecMemAllocator, VERR_NO_MEMORY); 1020 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]); 1021 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 1022 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE); 1023 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3); 1024 cbNeeded += cbBitmap * cMaxChunks; 1025 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10); 1026 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3)); 1027 #endif 1028 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS) 1029 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE); 1030 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks; 1031 #endif 1032 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded); 1033 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk), 1034 VERR_NO_MEMORY); 657 1035 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC; 658 1036 pExecMemAllocator->cbChunk = cbChunk; … … 664 1042 pExecMemAllocator->cbFree = 0; 665 1043 pExecMemAllocator->cbAllocated = 0; 1044 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 1045 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps); 1046 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT; 1047 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6); 1048 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */ 1049 #endif 1050 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS) 1051 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames); 1052 #endif 666 1053 for (uint32_t i = 0; i < cMaxChunks; i++) 667 1054 { 1055 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR 1056 pExecMemAllocator->aChunks[i].cFreeUnits = 0; 1057 pExecMemAllocator->aChunks[i].idxFreeHint = 0; 1058 #else 668 1059 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE; 1060 #endif 669 1061 pExecMemAllocator->aChunks[i].pvChunk = NULL; 670 1062 #ifdef IN_RING0 … … 688 1080 689 1081 return VINF_SUCCESS; 690 }691 692 /**693 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating694 * the heap statistics.695 */696 DECL_FORCE_INLINE(void *) iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,697 uint32_t cbReq, uint32_t idxChunk)698 {699 pExecMemAllocator->cAllocations += 1;700 pExecMemAllocator->cbAllocated += cbReq;701 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);702 pExecMemAllocator->idxChunkHint = idxChunk;703 return pvRet;704 }705 706 707 /**708 * Allocates @a cbReq bytes of executable memory.709 *710 * @returns Pointer to the memory, NULL if out of memory or other problem711 * encountered.712 * @param pVCpu The cross context virtual CPU structure of the calling713 * thread.714 * @param cbReq How many bytes are required.715 */716 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)717 {718 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;719 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);720 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);721 722 /*723 * Adjust the request size as per the logic described in724 * iemExecMemAllocatorGrow and attempt to allocate it from one of the725 * existing chunks if we think we've got sufficient free memory around.726 */727 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;728 if (cbReq <= pExecMemAllocator->cbFree)729 {730 uint32_t const cChunks = pExecMemAllocator->cChunks;731 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;732 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)733 {734 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);735 if (pvRet)736 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);737 }738 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)739 {740 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);741 if (pvRet)742 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);743 }744 }745 746 /*747 * Can we grow it with another chunk?748 */749 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)750 {751 int rc = iemExecMemAllocatorGrow(pExecMemAllocator);752 AssertLogRelRCReturn(rc, NULL);753 754 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;755 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);756 if (pvRet)757 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);758 AssertFailed();759 }760 761 /* What now? Prune native translation blocks from the cache? */762 AssertFailed();763 return NULL;764 }765 766 767 /** This is a hook that we may need later for changing memory protection back768 * to readonly+exec */769 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)770 {771 #ifdef RT_OS_DARWIN772 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);773 AssertRC(rc); RT_NOREF(pVCpu);774 #else775 RT_NOREF(pVCpu, pv, cb);776 #endif777 }778 779 780 /**781 * Frees executable memory.782 */783 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)784 {785 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;786 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);787 Assert(pv);788 789 /* Align the size as we did when allocating the block. */790 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;791 792 /* Assert sanity if strict build. */793 #ifdef VBOX_STRICT794 uint32_t const cChunks = pExecMemAllocator->cChunks;795 uint32_t const cbChunk = pExecMemAllocator->cbChunk;796 bool fFound = false;797 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)798 {799 fFound = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk < cbChunk;800 if (fFound)801 {802 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);803 break;804 }805 }806 Assert(fFound);807 #endif808 809 /* Update stats while cb is freshly calculated.*/810 pExecMemAllocator->cbAllocated -= cb;811 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);812 pExecMemAllocator->cAllocations -= 1;813 814 /* Do the actual freeing. */815 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);816 1082 } 817 1083 … … 1573 1839 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3); 1574 1840 1841 #if 1 /** @todo getting weird EXC_BAD_INSTRUCTION exceptions, trying to figure out / work around why... */ 1575 1842 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR)); 1576 1843 AssertReturn(paFinalInstrBuf, pTb); 1577 1844 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0])); 1845 #else 1846 IEMNATIVEINSTR volatile * const paFinalInstrBuf 1847 = (IEMNATIVEINSTR volatile *)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR)); 1848 AssertReturn(paFinalInstrBuf, pTb); 1849 for (uint32_t i = 0; i < off; i++) 1850 paFinalInstrBuf[i] = pReNative->pInstrBuf[i]; 1851 __asm__ __volatile__("dmb sy\n\t" ::: "memory"); 1852 #endif 1578 1853 1579 1854 /*
Note:
See TracChangeset
for help on using the changeset viewer.