Changeset 106212 in vbox for trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp
- Timestamp:
- Oct 3, 2024 2:42:55 AM (2 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp
r106126 r106212 440 440 441 441 442 442 443 /********************************************************************************************************************************* 443 444 * Translation Block Cache. * … … 661 662 } 662 663 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup)); 664 # ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING 665 iemThreadedSaveTbForProfiling(pVCpu, pTb); 666 # endif 663 667 return iemNativeRecompile(pVCpu, pTb); 664 668 #else … … 2937 2941 2938 2942 /********************************************************************************************************************************* 2943 * Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler * 2944 *********************************************************************************************************************************/ 2945 #if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING) 2946 # include <iprt/message.h> 2947 2948 static const SSMFIELD g_aIemThreadedTbFields[] = 2949 { 2950 SSMFIELD_ENTRY( IEMTB, cUsed), 2951 SSMFIELD_ENTRY( IEMTB, msLastUsed), 2952 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc), 2953 SSMFIELD_ENTRY( IEMTB, fFlags), 2954 SSMFIELD_ENTRY( IEMTB, x86.fAttr), 2955 SSMFIELD_ENTRY( IEMTB, cRanges), 2956 SSMFIELD_ENTRY( IEMTB, cInstructions), 2957 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls), 2958 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries), 2959 SSMFIELD_ENTRY( IEMTB, cbOpcodes), 2960 SSMFIELD_ENTRY( IEMTB, FlatPc), 2961 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]), 2962 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]), 2963 SSMFIELD_ENTRY_TERM() 2964 }; 2965 2966 /** 2967 * Saves a threaded TB to a dedicated saved state file. 2968 */ 2969 static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb) 2970 { 2971 /* Only VCPU #0 for now. */ 2972 if (pVCpu->idCpu != 0) 2973 return; 2974 2975 /* 2976 * Get the SSM handle, lazily opening the output file. 2977 */ 2978 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil)); 2979 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling; 2980 if (pSSM && pSSM != pNil) 2981 { /* likely */ } 2982 else if (pSSM) 2983 return; 2984 else 2985 { 2986 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil; 2987 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM); 2988 AssertLogRelRCReturnVoid(rc); 2989 2990 rc = SSMR3WriteFileHeader(pSSM, 1); 2991 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */ 2992 2993 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0); 2994 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */ 2995 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM; 2996 } 2997 2998 /* 2999 * Do the actual saving. 3000 */ 3001 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */ 3002 3003 /* The basic structure. */ 3004 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL); 3005 3006 /* The ranges. */ 3007 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++) 3008 { 3009 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes); 3010 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes); 3011 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14)); 3012 } 3013 3014 /* The opcodes. */ 3015 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes); 3016 3017 /* The threaded call table. */ 3018 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls); 3019 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil); 3020 } 3021 3022 3023 /** 3024 * Called by IEMR3Term to finish any open profile files. 3025 * 3026 * @note This is not called on the EMT for @a pVCpu, but rather on the thread 3027 * driving the VM termination. 3028 */ 3029 DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu) 3030 { 3031 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling; 3032 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL; 3033 if (RT_VALID_PTR(pSSM)) 3034 { 3035 /* Indicate that this is the end. */ 3036 SSMR3PutU32(pSSM, UINT32_MAX); 3037 3038 int rc = SSMR3WriteUnitComplete(pSSM); 3039 AssertLogRelRC(rc); 3040 rc = SSMR3WriteFileFooter(pSSM); 3041 AssertLogRelRC(rc); 3042 rc = SSMR3Close(pSSM); 3043 AssertLogRelRC(rc); 3044 } 3045 } 3046 3047 #endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */ 3048 3049 #ifdef IN_RING3 3050 /** 3051 * API use to process what iemThreadedSaveTbForProfiling() saved. 3052 * 3053 * @note Do not mix build types or revisions. Local changes between saving the 3054 * TBs and calling this API may cause unexpected trouble. 3055 */ 3056 VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs) 3057 { 3058 # if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING) 3059 PVMCPU const pVCpu = pVM->apCpusR3[0]; 3060 3061 /* We need to keep an eye on the TB allocator. */ 3062 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3; 3063 3064 /* 3065 * Load the TBs from the file. 3066 */ 3067 PSSMHANDLE pSSM = NULL; 3068 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM); 3069 if (RT_SUCCESS(rc)) 3070 { 3071 uint32_t cTbs = 0; 3072 PIEMTB pTbHead = NULL; 3073 PIEMTB *ppTbTail = &pTbHead; 3074 uint32_t uVersion; 3075 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion); 3076 if (RT_SUCCESS(rc)) 3077 { 3078 for (;; cTbs++) 3079 { 3080 /* Check for the end tag. */ 3081 uint32_t uTag = 0; 3082 rc = SSMR3GetU32(pSSM, &uTag); 3083 AssertRCBreak(rc); 3084 if (uTag == UINT32_MAX) 3085 break; 3086 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3087 3088 /* Do we have room for another TB? */ 3089 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs) 3090 { 3091 RTMsgInfo("Too many TBs to load, stopping loading early.\n"); 3092 break; 3093 } 3094 3095 /* Allocate a new TB. */ 3096 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/); 3097 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES); 3098 3099 uint8_t const idxAllocChunk = pTb->idxAllocChunk; 3100 RT_ZERO(*pTb); 3101 pTb->idxAllocChunk = idxAllocChunk; 3102 3103 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL); 3104 if (RT_SUCCESS(rc)) 3105 { 3106 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3107 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3108 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3109 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= _1K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3110 3111 if (RT_SUCCESS(rc)) 3112 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++) 3113 { 3114 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes); 3115 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes); 3116 uint16_t uTmp = 0; 3117 rc = SSMR3GetU16(pSSM, &uTmp); 3118 AssertRCBreak(rc); 3119 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK; 3120 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14; 3121 3122 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages), 3123 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3124 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes, 3125 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3126 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes, 3127 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED); 3128 } 3129 3130 if (RT_SUCCESS(rc)) 3131 { 3132 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls); 3133 if (pTb->Thrd.paCalls) 3134 { 3135 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB); 3136 Assert(cbTbLookup > 0); 3137 size_t const cbOpcodes = pTb->cbOpcodes; 3138 Assert(cbOpcodes > 0); 3139 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB)); 3140 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth); 3141 if (pbBoth) 3142 { 3143 pTb->pabOpcodes = &pbBoth[cbTbLookup]; 3144 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes); 3145 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls); 3146 if (RT_SUCCESS(rc)) 3147 { 3148 *ppTbTail = pTb; 3149 ppTbTail = &pTb->pNext; 3150 continue; 3151 } 3152 } 3153 else 3154 rc = VERR_NO_MEMORY; 3155 RTMemFree(pTb->Thrd.paCalls); 3156 } 3157 else 3158 rc = VERR_NO_MEMORY; 3159 } 3160 } 3161 iemTbAllocatorFree(pVCpu, pTb); 3162 break; 3163 } 3164 if (RT_FAILURE(rc)) 3165 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs); 3166 } 3167 else 3168 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc); 3169 SSMR3Close(pSSM); 3170 if (RT_SUCCESS(rc)) 3171 { 3172 /* 3173 * Recompile the TBs. 3174 */ 3175 if (pTbHead) 3176 { 3177 RTMsgInfo("Loaded %u TBs\n", cTbs); 3178 if (cTbs < cMinTbs) 3179 { 3180 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs); 3181 for (PIEMTB pTb = pTbHead; 3182 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs; 3183 pTb = pTb->pNext) 3184 { 3185 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead); 3186 if (!pTbCopy) 3187 break; 3188 *ppTbTail = pTbCopy; 3189 ppTbTail = &pTbCopy->pNext; 3190 cTbs++; 3191 } 3192 } 3193 3194 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead); 3195 if (pTbWarmup) 3196 { 3197 iemNativeRecompile(pVCpu, pTbWarmup); 3198 RTThreadSleep(512); /* to make the start visible in the profiler. */ 3199 RTMsgInfo("Ready, set, go!\n"); 3200 3201 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE) 3202 { 3203 uint32_t cFailed = 0; 3204 uint64_t const nsStart = RTTimeNanoTS(); 3205 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext) 3206 { 3207 iemNativeRecompile(pVCpu, pTb); 3208 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE) 3209 cFailed++; 3210 } 3211 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart; 3212 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n", 3213 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs); 3214 if (cFailed) 3215 { 3216 RTMsgError("Unforuntately %u TB failed!", cFailed); 3217 rc = VERR_GENERAL_FAILURE; 3218 } 3219 RTThreadSleep(128); /* Another gap in the profiler timeline. */ 3220 } 3221 else 3222 { 3223 RTMsgError("Failed to recompile the first TB!"); 3224 rc = VERR_GENERAL_FAILURE; 3225 } 3226 } 3227 else 3228 rc = VERR_NO_MEMORY; 3229 } 3230 else 3231 { 3232 RTMsgError("'%s' contains no TBs!", pszFilename); 3233 rc = VERR_NO_DATA; 3234 } 3235 } 3236 } 3237 else 3238 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc); 3239 return rc; 3240 3241 # else 3242 RT_NOREF(pVM, pszFilename, cMinTbs); 3243 return VERR_NOT_IMPLEMENTED; 3244 # endif 3245 } 3246 #endif /* IN_RING3 */ 3247 3248 3249 /********************************************************************************************************************************* 2939 3250 * Recompiled Execution Core * 2940 3251 *********************************************************************************************************************************/
Note:
See TracChangeset
for help on using the changeset viewer.