Changeset 106307 in vbox
- Timestamp:
- Oct 14, 2024 2:45:58 PM (6 weeks ago)
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veExecMem.cpp
r106297 r106307 276 276 uint32_t cBitmapElementsPerChunk; 277 277 278 /** Number of times we fruitlessly scanned a chunk for free space. */ 279 uint64_t cFruitlessChunkScans; 280 278 281 #ifdef IEMEXECMEM_ALT_SUB_WITH_ALT_PRUNING 279 282 /** The next chunk to prune in. */ … … 292 295 uint64_t cbUnusable; 293 296 #endif 294 295 297 296 298 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS) … … 474 476 /** 475 477 * Bitmap scanner code that looks for a bunch of @a cReqUnits zero bits. 476 * 478 * 477 479 * Booting win11 with a r165098 release build the average native TB size is 478 480 * around 9 units (of 256 bytes). So, it is unlikely we need to scan any … … 494 496 if (uWord == UINT64_MAX) 495 497 { 498 /* 499 * Getting thru patches of UINT64_MAX is a frequent problem when the allocator 500 * fills up, so it's definitely worth optimizing. 501 * 502 * The complicated code below is a bit faster on arm. Reducing the per TB cost 503 * from 4255ns to 4106ns (best run out of 10). On win/x86 the gain isn't so 504 * marked, despite more full bitmap scans. 505 */ 506 #if 1 507 off++; 508 uint32_t cQuads = (c64WordsToScan - off) / 4; 509 510 /* Align. */ 511 if (cQuads > 1) 512 switch (((uintptr_t)&pbmAlloc[off] / sizeof(uint64_t)) & 3) 513 { 514 case 0: 515 break; 516 case 1: 517 { 518 uWord = pbmAlloc[off]; 519 uint64_t uWord1 = pbmAlloc[off + 1]; 520 uint64_t uWord2 = pbmAlloc[off + 2]; 521 if ((uWord & uWord1 & uWord2) == UINT64_MAX) 522 { 523 off += 3; 524 cQuads = (c64WordsToScan - off) / 4; 525 } 526 else if (uWord == UINT64_MAX) 527 { 528 if (uWord1 != UINT64_MAX) 529 { 530 uWord = uWord1; 531 off += 1; 532 } 533 else 534 { 535 uWord = uWord2; 536 off += 2; 537 } 538 } 539 break; 540 } 541 case 2: 542 { 543 uWord = pbmAlloc[off]; 544 uint64_t uWord1 = pbmAlloc[off + 1]; 545 if ((uWord & uWord1) == UINT64_MAX) 546 { 547 off += 2; 548 cQuads = (c64WordsToScan - off) / 4; 549 } 550 else if (uWord == UINT64_MAX) 551 { 552 uWord = uWord1; 553 off += 1; 554 } 555 break; 556 } 557 case 3: 558 uWord = pbmAlloc[off]; 559 if (uWord == UINT64_MAX) 560 { 561 off++; 562 cQuads = (c64WordsToScan - off) / 4; 563 } 564 break; 565 } 566 if (uWord == UINT64_MAX) 567 { 568 /* Looping over 32 bytes at a time. */ 569 for (;;) 570 { 571 if (cQuads-- > 0) 572 { 573 uWord = pbmAlloc[off + 0]; 574 uint64_t uWord1 = pbmAlloc[off + 1]; 575 uint64_t uWord2 = pbmAlloc[off + 2]; 576 uint64_t uWord3 = pbmAlloc[off + 3]; 577 if ((uWord & uWord1 & uWord2 & uWord3) == UINT64_MAX) 578 off += 4; 579 else 580 { 581 if (uWord != UINT64_MAX) 582 { } 583 else if (uWord1 != UINT64_MAX) 584 { 585 uWord = uWord1; 586 off += 1; 587 } 588 else if (uWord2 != UINT64_MAX) 589 { 590 uWord = uWord2; 591 off += 2; 592 } 593 else 594 { 595 uWord = uWord3; 596 off += 3; 597 } 598 break; 599 } 600 } 601 else 602 { 603 if (off < c64WordsToScan) 604 { 605 uWord = pbmAlloc[off]; 606 if (uWord != UINT64_MAX) 607 break; 608 off++; 609 if (off < c64WordsToScan) 610 { 611 uWord = pbmAlloc[off]; 612 if (uWord != UINT64_MAX) 613 break; 614 off++; 615 if (off < c64WordsToScan) 616 { 617 uWord = pbmAlloc[off]; 618 if (uWord != UINT64_MAX) 619 break; 620 Assert(off + 1 == c64WordsToScan); 621 } 622 } 623 } 624 return UINT32_MAX; 625 } 626 } 627 } 628 #else 496 629 do 497 630 { … … 502 635 return UINT32_MAX; 503 636 } while (uWord == UINT64_MAX); 504 637 #endif 505 638 cPrevLeadingZeros = 0; 506 639 } … … 517 650 #else 518 651 # ifdef RT_ARCH_AMD64 519 unsigned cZerosInWord = __popcnt64(~uWord s);652 unsigned cZerosInWord = __popcnt64(~uWord); 520 653 # else 521 654 # pragma message("need popcount intrinsic or something...") /** @todo port me: Win/ARM. */ … … 624 757 { 625 758 #ifdef __GNUC__ 626 unsigned cTrailingZeros = uWord ? __builtin_ctzl(uWord) : 64;759 unsigned cTrailingZeros = __builtin_ctzl(uWord); 627 760 #else 628 unsigned cTrailingZeros = uWord ? ASMBitFirstSetU64(uWord);761 unsigned cTrailingZeros = ASMBitFirstSetU64(uWord) - 1; 629 762 #endif 630 763 if (cPrevLeadingZeros + (off2 - off) * 64 + cTrailingZeros >= cReqUnits) … … 719 852 } 720 853 854 pExecMemAllocator->cFruitlessChunkScans += 1; 721 855 return NULL; 722 856 } … … 1973 2107 "Bytes recovered while pruning", "/IEM/CPU%u/re/ExecMem/PruningRecovered", pVCpu->idCpu); 1974 2108 #endif 2109 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cFruitlessChunkScans, STAMTYPE_U64_RESET, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT, 2110 "Chunks fruitlessly scanned for free space", "/IEM/CPU%u/re/ExecMem/FruitlessChunkScans", pVCpu->idCpu); 1975 2111 1976 2112 return VINF_SUCCESS; -
trunk/src/VBox/VMM/testcase/tstIEMN8veProfiling.cpp
r106212 r106307 38 38 #include <VBox/vmm/uvm.h> 39 39 #include <VBox/vmm/vmm.h> 40 #include <VBox/vmm/vmmr3vtable.h> 40 41 41 42 #include <iprt/errcore.h> … … 46 47 #include <iprt/stream.h> 47 48 #include <iprt/string.h> 49 50 51 /********************************************************************************************************************************* 52 * Global Variables * 53 *********************************************************************************************************************************/ 54 static uint32_t g_cMaxTbs = 0; 55 static uint64_t g_cbMaxExecMem = 0; 56 static PCVMMR3VTABLE g_pVMM = NULL; 57 58 59 /** 60 * @callback_method_impl{FNCFGMCONSTRUCTOR} 61 */ 62 static DECLCALLBACK(int) tstIEMN8veProfilingCfgConstructor(PUVM pUVM, PVM pVM, PCVMMR3VTABLE pVMM, void *pvUser) 63 { 64 RT_NOREF(pUVM, pvUser); 65 g_pVMM = pVMM; 66 67 /* 68 * Create a default configuration tree. 69 */ 70 int rc = pVMM->pfnCFGMR3ConstructDefaultTree(pVM); 71 AssertRCReturn(rc, rc); 72 73 /* 74 * Now for our overrides. 75 */ 76 PCFGMNODE const pRoot = pVMM->pfnCFGMR3GetRoot(pVM); 77 PCFGMNODE pIemNode = pVMM->pfnCFGMR3GetChild(pRoot, "IEM"); 78 if (!pIemNode) 79 { 80 rc = pVMM->pfnCFGMR3InsertNode(pRoot, "IEM", &pIemNode); 81 if (RT_FAILURE(rc)) 82 return RTMsgErrorRc(rc, "CFGMR3InsertNode/IEM failed: %Rrc", rc); 83 } 84 85 if (g_cMaxTbs != 0) 86 { 87 rc = pVMM->pfnCFGMR3InsertInteger(pIemNode, "MaxTbCount", g_cMaxTbs); 88 if (RT_FAILURE(rc)) 89 return RTMsgErrorRc(rc, "CFGMR3InsertInteger/MaxTbCount failed: %Rrc", rc); 90 } 91 92 if (g_cbMaxExecMem != 0) 93 { 94 rc = pVMM->pfnCFGMR3InsertInteger(pIemNode, "MaxExecMem", g_cbMaxExecMem); 95 if (RT_FAILURE(rc)) 96 return RTMsgErrorRc(rc, "CFGMR3InsertInteger/MaxExecMemaxTbCount failed: %Rrc", rc); 97 } 98 99 return VINF_SUCCESS; 100 } 101 48 102 49 103 … … 67 121 unsigned cVerbosity = 0; 68 122 uint32_t cMinTbs = 0; 123 const char *pszStats = NULL; 69 124 static const RTGETOPTDEF s_aOptions[] = 70 125 { 71 { "--verbose", 'v', RTGETOPT_REQ_NOTHING }, 72 { "--min-tbs", 'm', RTGETOPT_REQ_UINT32 }, 126 { "--verbose", 'v', RTGETOPT_REQ_NOTHING }, 127 { "--min-tbs", 'm', RTGETOPT_REQ_UINT32 }, 128 { "--stats", 's', RTGETOPT_REQ_STRING }, 129 { "--max-tb-count", 't', RTGETOPT_REQ_UINT32 }, 130 { "--max-exec-mem", 'x', RTGETOPT_REQ_UINT64 }, 73 131 }; 74 132 RTGETOPTSTATE GetState; … … 94 152 break; 95 153 154 case 's': 155 pszStats = *ValueUnion.psz ? ValueUnion.psz : NULL; 156 break; 157 158 case 't': 159 g_cMaxTbs = ValueUnion.u32; 160 break; 161 162 case 'x': 163 g_cbMaxExecMem = ValueUnion.u64; 164 break; 165 96 166 case 'h': 97 167 RTPrintf("Usage: %Rbn [options] <ThreadedTBsForRecompilerProfiling.sav>\n" … … 109 179 if (!pVM) 110 180 { 111 rc = VMR3Create(1 /*cCpus*/, NULL, VMCREATE_F_DRIVERLESS, NULL, NULL, NULL, NULL, &pVM, &pUVM); 181 rc = VMR3Create(1 /*cCpus*/, NULL, VMCREATE_F_DRIVERLESS, NULL, NULL, 182 tstIEMN8veProfilingCfgConstructor, NULL, &pVM, &pUVM); 112 183 if (RT_FAILURE(rc)) 113 184 { … … 116 187 } 117 188 } 118 rc = VMR3ReqCallWaitU(pUVM, 0, (PFNRT)IEMR3ThreadedProfileRecompilingSavedTbs, 119 3, pVM, ValueUnion.psz, cMinTbs); 120 if (RT_FAILURE(rc)) 121 rcExit = RTEXITCODE_FAILURE; 189 rc = g_pVMM->pfnVMR3ReqCallWaitU(pUVM, 0, (PFNRT)IEMR3ThreadedProfileRecompilingSavedTbs, 190 3, pVM, ValueUnion.psz, cMinTbs); 191 if (RT_SUCCESS(rc)) 192 { 193 if (pszStats) 194 g_pVMM->pfnSTAMR3Print(pUVM, pszStats); 195 } 196 else 197 rcExit = RTMsgErrorExitFailure("VMR3ReqCallWaitU/IEMR3ThreadedProfileRecompilingSavedTbs failed: %Rrc", 198 rc); 122 199 break; 123 200
Note:
See TracChangeset
for help on using the changeset viewer.