Changeset 100736 in vbox
- Timestamp:
- Jul 30, 2023 12:54:04 AM (16 months ago)
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 3 edited
- 1 copied
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/Makefile.kmk
r100733 r100736 249 249 VBoxVMM_SOURCES += \ 250 250 VMMAll/IEMAllThrdRecompiler.cpp \ 251 VMMAll/IEMAllThrdTables.cpp \ 251 252 VMMAll/IEMAllThrdFuncs.cpp \ 252 253 VMMAll/IEMAllThrdFuncsBltIn.cpp -
trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp
r100734 r100736 114 114 * Internal Functions * 115 115 *********************************************************************************************************************************/ 116 static bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb);117 116 static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb); 118 117 119 120 /*********************************************************************************************************************************121 * Defined Constants And Macros *122 *********************************************************************************************************************************/123 #define g_apfnOneByteMap g_apfnIemThreadedRecompilerOneByteMap124 125 126 /*127 * Override IEM_MC_CALC_RM_EFF_ADDR to use iemOpHlpCalcRmEffAddrJmpEx and produce uEffAddrInfo.128 */129 #undef IEM_MC_CALC_RM_EFF_ADDR130 #ifndef IEM_WITH_SETJMP131 # define IEM_MC_CALC_RM_EFF_ADDR(a_GCPtrEff, a_bRm, a_cbImmAndRspOffset) \132 uint64_t uEffAddrInfo; \133 IEM_MC_RETURN_ON_FAILURE(iemOpHlpCalcRmEffAddrJmpEx(pVCpu, (a_bRm), (a_cbImmAndRspOffset), &(a_GCPtrEff), &uEffAddrInfo))134 #else135 # define IEM_MC_CALC_RM_EFF_ADDR(a_GCPtrEff, a_bRm, a_cbImmAndRspOffset) \136 uint64_t uEffAddrInfo; \137 ((a_GCPtrEff) = iemOpHlpCalcRmEffAddrJmpEx(pVCpu, (a_bRm), (a_cbImmAndRspOffset), &uEffAddrInfo))138 #endif139 140 /*141 * Likewise override IEM_OPCODE_SKIP_RM_EFF_ADDR_BYTES so we fetch all the opcodes.142 */143 #undef IEM_OPCODE_SKIP_RM_EFF_ADDR_BYTES144 #define IEM_OPCODE_SKIP_RM_EFF_ADDR_BYTES(a_bRm) do { \145 uint64_t uEffAddrInfo; \146 (void)iemOpHlpCalcRmEffAddrJmpEx(pVCpu, bRm, 0, &uEffAddrInfo); \147 } while (0)148 149 /*150 * Override the IEM_MC_REL_JMP_S*_AND_FINISH macros to check for zero byte jumps.151 */152 #undef IEM_MC_REL_JMP_S8_AND_FINISH153 #define IEM_MC_REL_JMP_S8_AND_FINISH(a_i8) do { \154 Assert(pVCpu->iem.s.fTbBranched != 0); \155 if ((a_i8) == 0) \156 pVCpu->iem.s.fTbBranched |= IEMBRANCHED_F_ZERO; \157 return iemRegRipRelativeJumpS8AndFinishClearingRF(pVCpu, IEM_GET_INSTR_LEN(pVCpu), (a_i8), pVCpu->iem.s.enmEffOpSize); \158 } while (0)159 160 #undef IEM_MC_REL_JMP_S16_AND_FINISH161 #define IEM_MC_REL_JMP_S16_AND_FINISH(a_i16) do { \162 Assert(pVCpu->iem.s.fTbBranched != 0); \163 if ((a_i16) == 0) \164 pVCpu->iem.s.fTbBranched |= IEMBRANCHED_F_ZERO; \165 return iemRegRipRelativeJumpS16AndFinishClearingRF(pVCpu, IEM_GET_INSTR_LEN(pVCpu), (a_i16)); \166 } while (0)167 168 #undef IEM_MC_REL_JMP_S32_AND_FINISH169 #define IEM_MC_REL_JMP_S32_AND_FINISH(a_i32) do { \170 Assert(pVCpu->iem.s.fTbBranched != 0); \171 if ((a_i32) == 0) \172 pVCpu->iem.s.fTbBranched |= IEMBRANCHED_F_ZERO; \173 return iemRegRipRelativeJumpS32AndFinishClearingRF(pVCpu, IEM_GET_INSTR_LEN(pVCpu), (a_i32), pVCpu->iem.s.enmEffOpSize); \174 } while (0)175 176 177 /*178 * Emit call macros.179 */180 #define IEM_MC2_BEGIN_EMIT_CALLS() \181 { \182 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3; \183 uint8_t const cbInstrMc2 = IEM_GET_INSTR_LEN(pVCpu); \184 AssertMsg(pVCpu->iem.s.offOpcode == cbInstrMc2, \185 ("%u vs %u (%04x:%08RX64)\n", pVCpu->iem.s.offOpcode, cbInstrMc2, \186 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip)); \187 \188 /* No page crossing, right? */ \189 uint16_t const offOpcodeMc2 = pTb->cbOpcodes; \190 uint8_t const idxRangeMc2 = pTb->cRanges - 1; \191 if ( !pVCpu->iem.s.fTbCrossedPage \192 && !pVCpu->iem.s.fTbCheckOpcodes \193 && !pVCpu->iem.s.fTbBranched \194 && !(pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)) \195 { \196 /** @todo Custom copy function, given range is 1 thru 15 bytes. */ \197 memcpy(&pTb->pabOpcodes[offOpcodeMc2], pVCpu->iem.s.abOpcode, pVCpu->iem.s.offOpcode); \198 pTb->cbOpcodes = offOpcodeMc2 + pVCpu->iem.s.offOpcode; \199 pTb->aRanges[idxRangeMc2].cbOpcodes += cbInstrMc2; \200 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated); \201 } \202 else if (iemThreadedCompileBeginEmitCallsComplications(pVCpu, pTb)) \203 { /* likely */ } \204 else \205 return VINF_IEM_RECOMPILE_END_TB; \206 \207 do { } while (0)208 #define IEM_MC2_EMIT_CALL_0(a_enmFunction) do { \209 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \210 \211 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \212 pCall->enmFunction = a_enmFunction; \213 pCall->offOpcode = offOpcodeMc2; \214 pCall->cbOpcode = cbInstrMc2; \215 pCall->idxRange = idxRangeMc2; \216 pCall->auParams[0] = 0; \217 pCall->auParams[1] = 0; \218 pCall->auParams[2] = 0; \219 } while (0)220 #define IEM_MC2_EMIT_CALL_1(a_enmFunction, a_uArg0) do { \221 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \222 uint64_t const uArg0Check = (a_uArg0); RT_NOREF(uArg0Check); \223 \224 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \225 pCall->enmFunction = a_enmFunction; \226 pCall->offOpcode = offOpcodeMc2; \227 pCall->cbOpcode = cbInstrMc2; \228 pCall->idxRange = idxRangeMc2; \229 pCall->auParams[0] = a_uArg0; \230 pCall->auParams[1] = 0; \231 pCall->auParams[2] = 0; \232 } while (0)233 #define IEM_MC2_EMIT_CALL_2(a_enmFunction, a_uArg0, a_uArg1) do { \234 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \235 uint64_t const uArg0Check = (a_uArg0); RT_NOREF(uArg0Check); \236 uint64_t const uArg1Check = (a_uArg1); RT_NOREF(uArg1Check); \237 \238 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \239 pCall->enmFunction = a_enmFunction; \240 pCall->offOpcode = offOpcodeMc2; \241 pCall->cbOpcode = cbInstrMc2; \242 pCall->idxRange = idxRangeMc2; \243 pCall->auParams[0] = a_uArg0; \244 pCall->auParams[1] = a_uArg1; \245 pCall->auParams[2] = 0; \246 } while (0)247 #define IEM_MC2_EMIT_CALL_3(a_enmFunction, a_uArg0, a_uArg1, a_uArg2) do { \248 IEMTHREADEDFUNCS const enmFunctionCheck = a_enmFunction; RT_NOREF(enmFunctionCheck); \249 uint64_t const uArg0Check = (a_uArg0); RT_NOREF(uArg0Check); \250 uint64_t const uArg1Check = (a_uArg1); RT_NOREF(uArg1Check); \251 uint64_t const uArg2Check = (a_uArg2); RT_NOREF(uArg2Check); \252 \253 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++]; \254 pCall->enmFunction = a_enmFunction; \255 pCall->offOpcode = offOpcodeMc2; \256 pCall->cbOpcode = cbInstrMc2; \257 pCall->idxRange = idxRangeMc2; \258 pCall->auParams[0] = a_uArg0; \259 pCall->auParams[1] = a_uArg1; \260 pCall->auParams[2] = a_uArg2; \261 } while (0)262 #define IEM_MC2_END_EMIT_CALLS(a_fCImplFlags) \263 Assert(pTb->cInstructions <= pTb->Thrd.cCalls); \264 if (pTb->cInstructions < 255) \265 pTb->cInstructions++; \266 uint32_t const fCImplFlagsMc2 = (a_fCImplFlags); \267 RT_NOREF(fCImplFlagsMc2); \268 } while (0)269 270 271 /*272 * IEM_MC_DEFER_TO_CIMPL_0 is easily wrapped up.273 *274 * Doing so will also take care of IEMOP_RAISE_DIVIDE_ERROR, IEMOP_RAISE_INVALID_LOCK_PREFIX,275 * IEMOP_RAISE_INVALID_OPCODE and their users.276 */277 #undef IEM_MC_DEFER_TO_CIMPL_0_RET278 #define IEM_MC_DEFER_TO_CIMPL_0_RET(a_fFlags, a_pfnCImpl) \279 return iemThreadedRecompilerMcDeferToCImpl0(pVCpu, a_fFlags, a_pfnCImpl)280 281 DECLINLINE(VBOXSTRICTRC) iemThreadedRecompilerMcDeferToCImpl0(PVMCPUCC pVCpu, uint32_t fFlags, PFNIEMCIMPL0 pfnCImpl)282 {283 Log8(("CImpl0: %04x:%08RX64 LB %#x: %#x %p\n",284 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, IEM_GET_INSTR_LEN(pVCpu), fFlags, pfnCImpl));285 286 IEM_MC2_BEGIN_EMIT_CALLS();287 IEM_MC2_EMIT_CALL_2(kIemThreadedFunc_DeferToCImpl0, (uintptr_t)pfnCImpl, IEM_GET_INSTR_LEN(pVCpu));288 IEM_MC2_END_EMIT_CALLS(fFlags);289 290 /* We have to repeat work normally done by kdCImplFlags and291 ThreadedFunctionVariation.emitThreadedCallStmts here. */292 if (fFlags & (IEM_CIMPL_F_END_TB | IEM_CIMPL_F_MODE | IEM_CIMPL_F_VMEXIT | IEM_CIMPL_F_BRANCH_FAR | IEM_CIMPL_F_REP))293 pVCpu->iem.s.fEndTb = true;294 295 AssertCompile(IEM_CIMPL_F_BRANCH_DIRECT == IEMBRANCHED_F_DIRECT);296 AssertCompile(IEM_CIMPL_F_BRANCH_INDIRECT == IEMBRANCHED_F_INDIRECT);297 AssertCompile(IEM_CIMPL_F_BRANCH_RELATIVE == IEMBRANCHED_F_RELATIVE);298 AssertCompile(IEM_CIMPL_F_BRANCH_CONDITIONAL == IEMBRANCHED_F_CONDITIONAL);299 AssertCompile(IEM_CIMPL_F_BRANCH_FAR == IEMBRANCHED_F_FAR);300 if (fFlags & IEM_CIMPL_F_BRANCH_ANY)301 pVCpu->iem.s.fTbBranched = fFlags & (IEM_CIMPL_F_BRANCH_ANY | IEM_CIMPL_F_BRANCH_FAR | IEM_CIMPL_F_BRANCH_CONDITIONAL);302 303 return pfnCImpl(pVCpu, IEM_GET_INSTR_LEN(pVCpu));304 }305 118 306 119 /** … … 625 438 626 439 627 /**628 * Helper for indicating that we've branched.629 */630 DECL_FORCE_INLINE(void) iemThreadedSetBranched(PVMCPUCC pVCpu, uint8_t fTbBranched)631 {632 pVCpu->iem.s.fTbBranched = fTbBranched;633 pVCpu->iem.s.GCPhysTbBranchSrcBuf = pVCpu->iem.s.GCPhysInstrBuf;634 pVCpu->iem.s.GCVirtTbBranchSrcBuf = pVCpu->iem.s.uInstrBufPc;635 }636 637 638 /*639 * Include the "annotated" IEMAllInst*.cpp.h files.640 */641 #include "IEMThreadedInstructions.cpp.h"642 643 440 /* 644 441 * Translation block management. 645 442 */ 443 646 444 typedef struct IEMTBCACHE 647 445 { … … 1067 865 * @param pTb The translation block being compiled. 1068 866 */ 1069 staticbool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)867 bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb) 1070 868 { 1071 869 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0); 870 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled()) 871 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED); 1072 872 1073 873 /* -
trunk/src/VBox/VMM/VMMAll/IEMAllThrdTables.cpp
r100734 r100736 1 1 /* $Id$ */ 2 2 /** @file 3 * IEM - Instruction Decoding and Threaded Recompilation. 4 * 5 * Logging group IEM_RE_THREADED assignments: 6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM] 7 * - Flow (LogFlow) : 8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM] 9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM] 10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM] 11 * - Level 5 (Log5) : Decoding details. [same as IEM] 12 * - Level 6 (Log6) : 13 * - Level 7 (Log7) : TB obsoletion. 14 * - Level 8 (Log8) : TB compilation. 15 * - Level 9 (Log9) : TB exec. 16 * - Level 10 (Log10): TB block lookup. 17 * - Level 11 (Log11): TB block lookup details. 18 * - Level 12 (Log12): TB insertion. 3 * IEM - Instruction Decoding and Threaded Recompilation, Instruction Tables. 19 4 */ 20 5 … … 109 94 # error The setjmp approach must be enabled for the recompiler. 110 95 #endif 111 112 113 /*********************************************************************************************************************************114 * Internal Functions *115 *********************************************************************************************************************************/116 static bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb);117 static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb);118 96 119 97 … … 304 282 } 305 283 306 /**307 * Calculates the effective address of a ModR/M memory operand, extended version308 * for use in the recompilers.309 *310 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.311 *312 * May longjmp on internal error.313 *314 * @return The effective address.315 * @param pVCpu The cross context virtual CPU structure of the calling thread.316 * @param bRm The ModRM byte.317 * @param cbImmAndRspOffset - First byte: The size of any immediate318 * following the effective address opcode bytes319 * (only for RIP relative addressing).320 * - Second byte: RSP displacement (for POP [ESP]).321 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and322 * SIB byte (bits 39:32).323 *324 * @note This must be defined in a source file with matching325 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default326 * or implemented differently...327 */328 RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP329 {330 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));331 # define SET_SS_DEF() \332 do \333 { \334 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \335 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \336 } while (0)337 338 if (!IEM_IS_64BIT_CODE(pVCpu))339 {340 /** @todo Check the effective address size crap! */341 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)342 {343 uint16_t u16EffAddr;344 345 /* Handle the disp16 form with no registers first. */346 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)347 {348 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);349 *puInfo = u16EffAddr;350 }351 else352 {353 /* Get the displacment. */354 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)355 {356 case 0: u16EffAddr = 0; break;357 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;358 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;359 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */360 }361 *puInfo = u16EffAddr;362 363 /* Add the base and index registers to the disp. */364 switch (bRm & X86_MODRM_RM_MASK)365 {366 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;367 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;368 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;369 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;370 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;371 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;372 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;373 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;374 }375 }376 377 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));378 return u16EffAddr;379 }380 381 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);382 uint32_t u32EffAddr;383 uint64_t uInfo;384 385 /* Handle the disp32 form with no registers first. */386 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)387 {388 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);389 uInfo = u32EffAddr;390 }391 else392 {393 /* Get the register (or SIB) value. */394 uInfo = 0;395 switch ((bRm & X86_MODRM_RM_MASK))396 {397 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;398 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;399 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;400 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;401 case 4: /* SIB */402 {403 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);404 uInfo = (uint64_t)bSib << 32;405 406 /* Get the index and scale it. */407 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)408 {409 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;410 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;411 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;412 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;413 case 4: u32EffAddr = 0; /*none */ break;414 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;415 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;416 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;417 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);418 }419 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;420 421 /* add base */422 switch (bSib & X86_SIB_BASE_MASK)423 {424 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;425 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;426 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;427 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;428 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;429 case 5:430 if ((bRm & X86_MODRM_MOD_MASK) != 0)431 {432 u32EffAddr += pVCpu->cpum.GstCtx.ebp;433 SET_SS_DEF();434 }435 else436 {437 uint32_t u32Disp;438 IEM_OPCODE_GET_NEXT_U32(&u32Disp);439 u32EffAddr += u32Disp;440 uInfo |= u32Disp;441 }442 break;443 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;444 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;445 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);446 }447 break;448 }449 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;450 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;451 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;452 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);453 }454 455 /* Get and add the displacement. */456 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)457 {458 case 0:459 break;460 case 1:461 {462 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);463 u32EffAddr += i8Disp;464 uInfo |= (uint32_t)(int32_t)i8Disp;465 break;466 }467 case 2:468 {469 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);470 u32EffAddr += u32Disp;471 uInfo |= u32Disp;472 break;473 }474 default:475 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */476 }477 }478 479 *puInfo = uInfo;480 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));481 return u32EffAddr;482 }483 484 uint64_t u64EffAddr;485 uint64_t uInfo;486 487 /* Handle the rip+disp32 form with no registers first. */488 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)489 {490 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);491 uInfo = (uint32_t)u64EffAddr;492 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));493 }494 else495 {496 /* Get the register (or SIB) value. */497 uInfo = 0;498 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)499 {500 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;501 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;502 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;503 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;504 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;505 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;506 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;507 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;508 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;509 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;510 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;511 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;512 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;513 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;514 /* SIB */515 case 4:516 case 12:517 {518 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);519 uInfo = (uint64_t)bSib << 32;520 521 /* Get the index and scale it. */522 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)523 {524 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;525 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;526 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;527 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;528 case 4: u64EffAddr = 0; /*none */ break;529 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;530 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;531 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;532 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;533 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;534 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;535 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;536 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;537 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;538 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;539 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;540 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);541 }542 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;543 544 /* add base */545 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)546 {547 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;548 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;549 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;550 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;551 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;552 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;553 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;554 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;555 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;556 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;557 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;558 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;559 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;560 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;561 /* complicated encodings */562 case 5:563 case 13:564 if ((bRm & X86_MODRM_MOD_MASK) != 0)565 {566 if (!pVCpu->iem.s.uRexB)567 {568 u64EffAddr += pVCpu->cpum.GstCtx.rbp;569 SET_SS_DEF();570 }571 else572 u64EffAddr += pVCpu->cpum.GstCtx.r13;573 }574 else575 {576 uint32_t u32Disp;577 IEM_OPCODE_GET_NEXT_U32(&u32Disp);578 u64EffAddr += (int32_t)u32Disp;579 uInfo |= u32Disp;580 }581 break;582 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);583 }584 break;585 }586 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);587 }588 589 /* Get and add the displacement. */590 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)591 {592 case 0:593 break;594 case 1:595 {596 int8_t i8Disp;597 IEM_OPCODE_GET_NEXT_S8(&i8Disp);598 u64EffAddr += i8Disp;599 uInfo |= (uint32_t)(int32_t)i8Disp;600 break;601 }602 case 2:603 {604 uint32_t u32Disp;605 IEM_OPCODE_GET_NEXT_U32(&u32Disp);606 u64EffAddr += (int32_t)u32Disp;607 uInfo |= u32Disp;608 break;609 }610 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */611 }612 613 }614 615 *puInfo = uInfo;616 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)617 {618 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));619 return u64EffAddr;620 }621 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);622 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));623 return u64EffAddr & UINT32_MAX;624 }625 626 284 627 285 /** … … 641 299 #include "IEMThreadedInstructions.cpp.h" 642 300 643 /*644 * Translation block management.645 */646 typedef struct IEMTBCACHE647 {648 uint32_t cHash;649 uint32_t uHashMask;650 PIEMTB apHash[_64K];651 } IEMTBCACHE;652 653 static IEMTBCACHE g_TbCache = { _64K, 0xffff, }; /**< Quick and dirty. */654 655 #define IEMTBCACHE_HASH(a_paCache, a_fTbFlags, a_GCPhysPc) \656 ( ((uint32_t)(a_GCPhysPc) ^ (a_fTbFlags)) & (a_paCache)->uHashMask)657 658 659 /**660 * Allocate a translation block for threadeded recompilation.661 *662 * @returns Pointer to the translation block on success, NULL on failure.663 * @param pVM The cross context virtual machine structure.664 * @param pVCpu The cross context virtual CPU structure of the calling665 * thread.666 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.667 * @param fExtraFlags Extra flags (IEMTB_F_XXX).668 */669 static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)670 {671 /*672 * Just using the heap for now. Will make this more efficient and673 * complicated later, don't worry. :-)674 */675 PIEMTB pTb = (PIEMTB)RTMemAlloc(sizeof(IEMTB));676 if (pTb)677 {678 unsigned const cCalls = 128;679 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);680 if (pTb->Thrd.paCalls)681 {682 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16); /* This will be reallocated later. */683 if (pTb->pabOpcodes)684 {685 pTb->Thrd.cAllocated = cCalls;686 pTb->cbOpcodesAllocated = cCalls * 16;687 pTb->Thrd.cCalls = 0;688 pTb->cbOpcodes = 0;689 pTb->pNext = NULL;690 RTListInit(&pTb->LocalList);691 pTb->GCPhysPc = GCPhysPc;692 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;693 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;694 pTb->cInstructions = 0;695 696 /* Init the first opcode range. */697 pTb->cRanges = 1;698 pTb->aRanges[0].cbOpcodes = 0;699 pTb->aRanges[0].offOpcodes = 0;700 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;701 pTb->aRanges[0].u2Unused = 0;702 pTb->aRanges[0].idxPhysPage = 0;703 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;704 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;705 706 pVCpu->iem.s.cTbAllocs++;707 return pTb;708 }709 RTMemFree(pTb->Thrd.paCalls);710 }711 RTMemFree(pTb);712 }713 RT_NOREF(pVM);714 return NULL;715 }716 717 718 /**719 * Frees pTb.720 *721 * @param pVM The cross context virtual machine structure.722 * @param pVCpu The cross context virtual CPU structure of the calling723 * thread.724 * @param pTb The translation block to free..725 */726 static void iemThreadedTbFree(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)727 {728 RT_NOREF(pVM);729 AssertPtr(pTb);730 731 AssertCompile(IEMTB_F_STATE_OBSOLETE == IEMTB_F_STATE_MASK);732 pTb->fFlags |= IEMTB_F_STATE_OBSOLETE; /* works, both bits set */733 734 /* Unlink it from the hash table: */735 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);736 PIEMTB pTbCur = g_TbCache.apHash[idxHash];737 if (pTbCur == pTb)738 g_TbCache.apHash[idxHash] = pTb->pNext;739 else740 while (pTbCur)741 {742 PIEMTB const pNextTb = pTbCur->pNext;743 if (pNextTb == pTb)744 {745 pTbCur->pNext = pTb->pNext;746 break;747 }748 pTbCur = pNextTb;749 }750 751 /* Free it. */752 RTMemFree(pTb->Thrd.paCalls);753 pTb->Thrd.paCalls = NULL;754 755 RTMemFree(pTb->pabOpcodes);756 pTb->pabOpcodes = NULL;757 758 RTMemFree(pTb);759 pVCpu->iem.s.cTbFrees++;760 }761 762 763 /**764 * Called by opcode verifier functions when they detect a problem.765 */766 void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb)767 {768 iemThreadedTbFree(pVCpu->CTX_SUFF(pVM), pVCpu, pTb);769 }770 771 772 static PIEMTB iemThreadedTbLookup(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)773 {774 uint32_t const fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags | IEMTB_F_STATE_READY;775 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, fFlags, GCPhysPc);776 Log10(("TB lookup: idxHash=%#x fFlags=%#x GCPhysPc=%RGp\n", idxHash, fFlags, GCPhysPc));777 PIEMTB pTb = g_TbCache.apHash[idxHash];778 while (pTb)779 {780 if (pTb->GCPhysPc == GCPhysPc)781 {782 if (pTb->fFlags == fFlags)783 {784 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)785 {786 #ifdef VBOX_WITH_STATISTICS787 pVCpu->iem.s.cTbLookupHits++;788 #endif789 return pTb;790 }791 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));792 }793 else794 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));795 }796 else797 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));798 799 pTb = pTb->pNext;800 }801 RT_NOREF(pVM);802 pVCpu->iem.s.cTbLookupMisses++;803 return pTb;804 }805 806 807 static void iemThreadedTbAdd(PVMCC pVM, PVMCPUCC pVCpu, PIEMTB pTb)808 {809 uint32_t const idxHash = IEMTBCACHE_HASH(&g_TbCache, pTb->fFlags, pTb->GCPhysPc);810 pTb->pNext = g_TbCache.apHash[idxHash];811 g_TbCache.apHash[idxHash] = pTb;812 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);813 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);814 if (LogIs12Enabled())815 {816 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",817 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, idxHash, pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));818 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)819 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,820 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,821 pTb->aRanges[idxRange].idxPhysPage == 0822 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK823 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));824 }825 RT_NOREF(pVM);826 }827 828 829 /*830 * Real code.831 */832 833 #ifdef LOG_ENABLED834 /**835 * Logs the current instruction.836 * @param pVCpu The cross context virtual CPU structure of the calling EMT.837 * @param pszFunction The IEM function doing the execution.838 */839 static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction) RT_NOEXCEPT840 {841 # ifdef IN_RING3842 if (LogIs2Enabled())843 {844 char szInstr[256];845 uint32_t cbInstr = 0;846 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,847 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,848 szInstr, sizeof(szInstr), &cbInstr);849 850 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;851 Log2(("**** %s fExec=%x pTb=%p\n"852 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"853 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"854 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"855 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"856 " %s\n"857 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3,858 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,859 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,860 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,861 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,862 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,863 szInstr));864 865 if (LogIs3Enabled())866 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);867 }868 else869 # endif870 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,871 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));872 }873 #endif /* LOG_ENABLED */874 875 876 static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)877 {878 RT_NOREF(pVM, pVCpu);879 return rcStrict;880 }881 882 883 /**884 * Initializes the decoder state when compiling TBs.885 *886 * This presumes that fExec has already be initialized.887 *888 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need889 * to apply fixes to them as well.890 *891 * @param pVCpu The cross context virtual CPU structure of the calling892 * thread.893 * @param fReInit Clear for the first call for a TB, set for subsequent calls894 * from inside the compile loop where we can skip a couple of895 * things.896 */897 DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit)898 {899 /* ASSUMES: That iemInitExec was already called and that anyone changing900 CPU state affecting the fExec bits since then will have updated fExec! */901 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),902 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));903 904 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);905 906 /* Decoder state: */907 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */908 pVCpu->iem.s.enmEffAddrMode = enmMode;909 if (enmMode != IEMMODE_64BIT)910 {911 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */912 pVCpu->iem.s.enmEffOpSize = enmMode;913 }914 else915 {916 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;917 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;918 }919 pVCpu->iem.s.fPrefixes = 0;920 pVCpu->iem.s.uRexReg = 0;921 pVCpu->iem.s.uRexB = 0;922 pVCpu->iem.s.uRexIndex = 0;923 pVCpu->iem.s.idxPrefix = 0;924 pVCpu->iem.s.uVex3rdReg = 0;925 pVCpu->iem.s.uVexLength = 0;926 pVCpu->iem.s.fEvexStuff = 0;927 pVCpu->iem.s.iEffSeg = X86_SREG_DS;928 pVCpu->iem.s.offModRm = 0;929 pVCpu->iem.s.iNextMapping = 0;930 931 if (!fReInit)932 {933 pVCpu->iem.s.cActiveMappings = 0;934 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;935 pVCpu->iem.s.fEndTb = false;936 pVCpu->iem.s.fTbCheckOpcodes = false;937 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;938 pVCpu->iem.s.fTbCrossedPage = false;939 }940 else941 {942 Assert(pVCpu->iem.s.cActiveMappings == 0);943 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);944 Assert(pVCpu->iem.s.fEndTb == false);945 Assert(pVCpu->iem.s.fTbCrossedPage == false);946 }947 948 #ifdef DBGFTRACE_ENABLED949 switch (IEM_GET_CPU_MODE(pVCpu))950 {951 case IEMMODE_64BIT:952 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);953 break;954 case IEMMODE_32BIT:955 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);956 break;957 case IEMMODE_16BIT:958 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);959 break;960 }961 #endif962 }963 964 965 /**966 * Initializes the opcode fetcher when starting the compilation.967 *968 * @param pVCpu The cross context virtual CPU structure of the calling969 * thread.970 */971 DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)972 {973 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */974 #ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF975 pVCpu->iem.s.offOpcode = 0;976 #else977 RT_NOREF(pVCpu);978 #endif979 }980 981 982 /**983 * Re-initializes the opcode fetcher between instructions while compiling.984 *985 * @param pVCpu The cross context virtual CPU structure of the calling986 * thread.987 */988 DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)989 {990 if (pVCpu->iem.s.pbInstrBuf)991 {992 uint64_t off = pVCpu->cpum.GstCtx.rip;993 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));994 off += pVCpu->cpum.GstCtx.cs.u64Base;995 off -= pVCpu->iem.s.uInstrBufPc;996 if (off < pVCpu->iem.s.cbInstrBufTotal)997 {998 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;999 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;1000 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)1001 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;1002 else1003 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;1004 }1005 else1006 {1007 pVCpu->iem.s.pbInstrBuf = NULL;1008 pVCpu->iem.s.offInstrNextByte = 0;1009 pVCpu->iem.s.offCurInstrStart = 0;1010 pVCpu->iem.s.cbInstrBuf = 0;1011 pVCpu->iem.s.cbInstrBufTotal = 0;1012 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;1013 }1014 }1015 else1016 {1017 pVCpu->iem.s.offInstrNextByte = 0;1018 pVCpu->iem.s.offCurInstrStart = 0;1019 pVCpu->iem.s.cbInstrBuf = 0;1020 pVCpu->iem.s.cbInstrBufTotal = 0;1021 #ifdef VBOX_STRICT1022 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;1023 #endif1024 }1025 #ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF1026 pVCpu->iem.s.offOpcode = 0;1027 #endif1028 }1029 1030 1031 DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)1032 {1033 switch (cbInstr)1034 {1035 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();1036 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();1037 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();1038 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();1039 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();1040 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();1041 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();1042 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();1043 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();1044 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();1045 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();1046 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();1047 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();1048 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();1049 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();1050 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;1051 }1052 }1053 1054 1055 /**1056 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:1057 *1058 * - CS LIM check required.1059 * - Must recheck opcode bytes.1060 * - Previous instruction branched.1061 * - TLB load detected, probably due to page crossing.1062 *1063 * @returns true if everything went well, false if we're out of space in the TB1064 * (e.g. opcode ranges).1065 * @param pVCpu The cross context virtual CPU structure of the calling1066 * thread.1067 * @param pTb The translation block being compiled.1068 */1069 static bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)1070 {1071 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);1072 1073 /*1074 * Prepare call now, even before we know if can accept the instruction in this TB.1075 * This allows us amending parameters w/o making every case suffer.1076 */1077 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);1078 uint16_t const offOpcode = pTb->cbOpcodes;1079 uint8_t idxRange = pTb->cRanges - 1;1080 1081 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];1082 pCall->offOpcode = offOpcode;1083 pCall->idxRange = idxRange;1084 pCall->cbOpcode = cbInstr;1085 pCall->auParams[0] = cbInstr;1086 pCall->auParams[1] = idxRange;1087 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;1088 1089 /** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've1090 * gotten onto. If we do, stop */1091 1092 /*1093 * Case 1: We've branched (RIP changed).1094 *1095 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).1096 * Req: 1 extra range, no extra phys.1097 *1098 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load1099 * necessary (fTbCrossedPage is true).1100 * Req: 1 extra range, probably 1 extra phys page entry.1101 *1102 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),1103 * but in addition we cross into the following page and require1104 * another TLB load.1105 * Req: 2 extra ranges, probably 2 extra phys page entries.1106 *1107 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into1108 * the following page (thus fTbCrossedPage is true).1109 * Req: 2 extra ranges, probably 1 extra phys page entry.1110 *1111 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but1112 * it may trigger "spuriously" from the CPU point of view because of1113 * physical page changes that'll invalid the physical TLB and trigger a1114 * call to the function. In theory this be a big deal, just a bit1115 * performance loss as we'll pick the LoadingTlb variants.1116 *1117 * Note! We do not currently optimize branching to the next instruction (sorry1118 * 32-bit PIC code). We could maybe do that in the branching code that1119 * sets (or not) fTbBranched.1120 */1121 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp1122 * variant in win 3.1 code and the call variant in 32-bit linux PIC1123 * code. This'll require filtering out far jmps and calls, as they1124 * load CS which should technically be considered indirect since the1125 * GDT/LDT entry's base address can be modified independently from1126 * the code. */1127 if (pVCpu->iem.s.fTbBranched != 0)1128 {1129 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */1130 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )1131 {1132 /* 1a + 1b - instruction fully within the branched to page. */1133 Assert(pVCpu->iem.s.offCurInstrStart >= 0);1134 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);1135 1136 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))1137 {1138 /* Check that we've got a free range. */1139 idxRange += 1;1140 if (idxRange < RT_ELEMENTS(pTb->aRanges))1141 { /* likely */ }1142 else1143 {1144 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));1145 return false;1146 }1147 pCall->idxRange = idxRange;1148 pCall->auParams[1] = idxRange;1149 pCall->auParams[2] = 0;1150 1151 /* Check that we've got a free page slot. */1152 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);1153 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;1154 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)1155 pTb->aRanges[idxRange].idxPhysPage = 0;1156 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS1157 || pTb->aGCPhysPages[0] == GCPhysNew)1158 {1159 pTb->aGCPhysPages[0] = GCPhysNew;1160 pTb->aRanges[idxRange].idxPhysPage = 1;1161 }1162 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS1163 || pTb->aGCPhysPages[1] == GCPhysNew)1164 {1165 pTb->aGCPhysPages[1] = GCPhysNew;1166 pTb->aRanges[idxRange].idxPhysPage = 2;1167 }1168 else1169 {1170 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));1171 return false;1172 }1173 1174 /* Finish setting up the new range. */1175 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;1176 pTb->aRanges[idxRange].offOpcodes = offOpcode;1177 pTb->aRanges[idxRange].cbOpcodes = cbInstr;1178 pTb->aRanges[idxRange].u2Unused = 0;1179 pTb->cRanges++;1180 }1181 else1182 {1183 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));1184 pTb->aRanges[idxRange].cbOpcodes += cbInstr;1185 }1186 1187 /* Determin which function we need to load & check.1188 Note! For jumps to a new page, we'll set both fTbBranched and1189 fTbCrossedPage to avoid unnecessary TLB work for intra1190 page branching */1191 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */1192 || pVCpu->iem.s.fTbCrossedPage)1193 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS1194 ? kIemThreadedFunc_CheckCsLimAndOpcodesLoadingTlb1195 : kIemThreadedFunc_CheckOpcodesLoadingTlb;1196 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))1197 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS1198 ? kIemThreadedFunc_CheckCsLimAndPcAndOpcodes1199 : kIemThreadedFunc_CheckPcAndOpcodes;1200 else1201 {1202 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);1203 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS1204 ? kIemThreadedFunc_CheckCsLimAndOpcodes1205 : kIemThreadedFunc_CheckOpcodes;1206 }1207 }1208 else1209 {1210 /* 1c + 1d - instruction crosses pages. */1211 Assert(pVCpu->iem.s.offCurInstrStart < 0);1212 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);1213 1214 /* Lazy bird: Check that this isn't case 1c, since we've already1215 load the first physical address. End the TB and1216 make it a case 2b instead.1217 1218 Hmm. Too much bother to detect, so just do the same1219 with case 1d as well. */1220 #if 0 /** @todo get back to this later when we've got the actual branch code in1221 * place. */1222 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;1223 1224 /* Check that we've got two free ranges. */1225 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))1226 { /* likely */ }1227 else1228 return false;1229 idxRange += 1;1230 pCall->idxRange = idxRange;1231 pCall->auParams[1] = idxRange;1232 pCall->auParams[2] = 0;1233 1234 /* ... */1235 1236 #else1237 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));1238 return false;1239 #endif1240 }1241 }1242 1243 /*1244 * Case 2: Page crossing.1245 *1246 * Sub-case 2a: The instruction starts on the first byte in the next page.1247 *1248 * Sub-case 2b: The instruction has opcode bytes in both the current and1249 * following page.1250 *1251 * Both cases requires a new range table entry and probably a new physical1252 * page entry. The difference is in which functions to emit and whether to1253 * add bytes to the current range.1254 */1255 else if (pVCpu->iem.s.fTbCrossedPage)1256 {1257 /* Check that we've got a free range. */1258 idxRange += 1;1259 if (idxRange < RT_ELEMENTS(pTb->aRanges))1260 { /* likely */ }1261 else1262 {1263 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));1264 return false;1265 }1266 1267 /* Check that we've got a free page slot. */1268 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);1269 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;1270 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)1271 pTb->aRanges[idxRange].idxPhysPage = 0;1272 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS1273 || pTb->aGCPhysPages[0] == GCPhysNew)1274 {1275 pTb->aGCPhysPages[0] = GCPhysNew;1276 pTb->aRanges[idxRange].idxPhysPage = 1;1277 }1278 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS1279 || pTb->aGCPhysPages[1] == GCPhysNew)1280 {1281 pTb->aGCPhysPages[1] = GCPhysNew;1282 pTb->aRanges[idxRange].idxPhysPage = 2;1283 }1284 else1285 {1286 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));1287 return false;1288 }1289 1290 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)1291 {1292 Assert(pVCpu->iem.s.offCurInstrStart == 0);1293 pCall->idxRange = idxRange;1294 pCall->auParams[1] = idxRange;1295 pCall->auParams[2] = 0;1296 1297 /* Finish setting up the new range. */1298 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;1299 pTb->aRanges[idxRange].offOpcodes = offOpcode;1300 pTb->aRanges[idxRange].cbOpcodes = cbInstr;1301 pTb->aRanges[idxRange].u2Unused = 0;1302 pTb->cRanges++;1303 1304 /* Determin which function we need to load & check. */1305 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS1306 ? kIemThreadedFunc_CheckCsLimAndOpcodesOnNewPageLoadingTlb1307 : kIemThreadedFunc_CheckOpcodesOnNewPageLoadingTlb;1308 }1309 else1310 {1311 Assert(pVCpu->iem.s.offCurInstrStart < 0);1312 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);1313 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;1314 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;1315 1316 /* We've good. Split the instruction over the old and new range table entries. */1317 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;1318 1319 pTb->aRanges[idxRange].offPhysPage = 0;1320 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;1321 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;1322 pTb->aRanges[idxRange].u2Unused = 0;1323 pTb->cRanges++;1324 1325 /* Determin which function we need to load & check. */1326 if (pVCpu->iem.s.fTbCheckOpcodes)1327 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS1328 ? kIemThreadedFunc_CheckCsLimAndOpcodesAcrossPageLoadingTlb1329 : kIemThreadedFunc_CheckOpcodesAcrossPageLoadingTlb;1330 else1331 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS1332 ? kIemThreadedFunc_CheckCsLimAndOpcodesOnNextPageLoadingTlb1333 : kIemThreadedFunc_CheckOpcodesOnNextPageLoadingTlb;1334 }1335 }1336 1337 /*1338 * Regular case: No new range required.1339 */1340 else1341 {1342 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));1343 if (pVCpu->iem.s.fTbCheckOpcodes)1344 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS1345 ? kIemThreadedFunc_CheckCsLimAndOpcodes1346 : kIemThreadedFunc_CheckOpcodes;1347 else1348 pCall->enmFunction = kIemThreadedFunc_CheckCsLim;1349 1350 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);1351 pTb->cbOpcodes = offOpcode + cbInstr;1352 pTb->aRanges[idxRange].cbOpcodes += cbInstr;1353 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);1354 }1355 1356 /*1357 * Commit the call.1358 */1359 pTb->Thrd.cCalls++;1360 1361 /*1362 * Clear state.1363 */1364 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;1365 pVCpu->iem.s.fTbCrossedPage = false;1366 pVCpu->iem.s.fTbCheckOpcodes = false;1367 1368 /*1369 * Copy opcode bytes.1370 */1371 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);1372 pTb->cbOpcodes = offOpcode + cbInstr;1373 Assert(pTb->cbOpcodes <= pTb->cbOpcodesAllocated);1374 1375 return true;1376 }1377 1378 1379 1380 /**1381 * Compiles a new TB and executes it.1382 *1383 * We combine compilation and execution here as it makes it simpler code flow1384 * in the main loop and it allows interpreting while compiling if we want to1385 * explore that option.1386 *1387 * @returns Strict VBox status code.1388 * @param pVM The cross context virtual machine structure.1389 * @param pVCpu The cross context virtual CPU structure of the calling1390 * thread.1391 * @param GCPhysPc The physical address corresponding to the current1392 * RIP+CS.BASE.1393 * @param fExtraFlags Extra translation block flags: IEMTB_F_TYPE_THREADED and1394 * maybe IEMTB_F_RIP_CHECKS.1395 */1396 static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)1397 {1398 /*1399 * Allocate a new translation block.1400 */1401 PIEMTB pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags | IEMTB_F_STATE_COMPILING);1402 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);1403 1404 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL1405 functions may get at it. */1406 pVCpu->iem.s.pCurTbR3 = pTb;1407 1408 /*1409 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)1410 */1411 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/);1412 iemThreadedCompileInitOpcodeFetching(pVCpu);1413 VBOXSTRICTRC rcStrict;1414 for (;;)1415 {1416 /* Process the next instruction. */1417 #ifdef LOG_ENABLED1418 iemThreadedLogCurInstr(pVCpu, "CC");1419 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;1420 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;1421 #endif1422 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);1423 uint16_t const cCallsPrev = pTb->Thrd.cCalls;1424 1425 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);1426 if ( rcStrict == VINF_SUCCESS1427 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS1428 && !pVCpu->iem.s.fEndTb)1429 {1430 Assert(pTb->Thrd.cCalls > cCallsPrev);1431 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);1432 1433 pVCpu->iem.s.cInstructions++;1434 }1435 else1436 {1437 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",1438 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));1439 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)1440 rcStrict = VINF_SUCCESS;1441 1442 if (pTb->Thrd.cCalls > 0)1443 {1444 if (cCallsPrev != pTb->Thrd.cCalls)1445 pVCpu->iem.s.cInstructions++;1446 break;1447 }1448 1449 pVCpu->iem.s.pCurTbR3 = NULL;1450 iemThreadedTbFree(pVM, pVCpu, pTb);1451 return iemExecStatusCodeFiddling(pVCpu, rcStrict);1452 }1453 1454 /* Still space in the TB? */1455 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated1456 && pTb->cbOpcodes + 16 <= pTb->cbOpcodesAllocated)1457 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/);1458 else1459 {1460 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",1461 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));1462 break;1463 }1464 iemThreadedCompileReInitOpcodeFetching(pVCpu);1465 }1466 1467 /*1468 * Complete the TB and link it.1469 */1470 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_STATE_MASK) | IEMTB_F_STATE_READY;1471 iemThreadedTbAdd(pVM, pVCpu, pTb);1472 1473 #ifdef IEM_COMPILE_ONLY_MODE1474 /*1475 * Execute the translation block.1476 */1477 #endif1478 1479 return iemExecStatusCodeFiddling(pVCpu, rcStrict);1480 }1481 1482 1483 /**1484 * Executes a translation block.1485 *1486 * @returns Strict VBox status code.1487 * @param pVCpu The cross context virtual CPU structure of the calling1488 * thread.1489 * @param pTb The translation block to execute.1490 */1491 static VBOXSTRICTRC iemThreadedTbExec(PVMCPUCC pVCpu, PIEMTB pTb)1492 {1493 /* Check the opcodes in the first page before starting execution. */1494 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));1495 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);1496 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)1497 { /* likely */ }1498 else1499 {1500 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));1501 iemThreadedTbFree(pVCpu->pVMR3, pVCpu, pTb);1502 return VINF_SUCCESS;1503 }1504 1505 /* Set the current TB so CIMPL function may get at it. */1506 pVCpu->iem.s.pCurTbR3 = pTb;1507 pVCpu->iem.s.cTbExec++;1508 1509 /* The execution loop. */1510 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;1511 uint32_t cCallsLeft = pTb->Thrd.cCalls;1512 while (cCallsLeft-- > 0)1513 {1514 #ifdef LOG_ENABLED1515 iemThreadedLogCurInstr(pVCpu, "EX");1516 Log9(("%04x:%08RX64: #%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,1517 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->enmFunction, g_apszIemThreadedFunctions[pCallEntry->enmFunction]));1518 #endif1519 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,1520 pCallEntry->auParams[0],1521 pCallEntry->auParams[1],1522 pCallEntry->auParams[2]);1523 1524 if (RT_LIKELY( rcStrict == VINF_SUCCESS1525 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))1526 pCallEntry++;1527 else1528 {1529 pVCpu->iem.s.pCurTbR3 = NULL;1530 1531 /* Some status codes are just to get us out of this loop and1532 continue in a different translation block. */1533 if (rcStrict == VINF_IEM_REEXEC_MODE_CHANGED)1534 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);1535 return iemExecStatusCodeFiddling(pVCpu, rcStrict);1536 }1537 }1538 1539 pVCpu->iem.s.pCurTbR3 = NULL;1540 return VINF_SUCCESS;1541 }1542 1543 1544 /**1545 * This is called when the PC doesn't match the current pbInstrBuf.1546 *1547 * Upon return, we're ready for opcode fetching. But please note that1548 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.1549 * MMIO or unassigned).1550 */1551 static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)1552 {1553 pVCpu->iem.s.pbInstrBuf = NULL;1554 pVCpu->iem.s.offCurInstrStart = 0;1555 pVCpu->iem.s.offInstrNextByte = 0;1556 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);1557 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;1558 }1559 1560 1561 /** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */1562 DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)1563 {1564 /*1565 * Set uCurTbStartPc to RIP and calc the effective PC.1566 */1567 uint64_t uPc = pVCpu->cpum.GstCtx.rip;1568 pVCpu->iem.s.uCurTbStartPc = uPc;1569 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));1570 uPc += pVCpu->cpum.GstCtx.cs.u64Base;1571 1572 /*1573 * Advance within the current buffer (PAGE) when possible.1574 */1575 if (pVCpu->iem.s.pbInstrBuf)1576 {1577 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;1578 if (off < pVCpu->iem.s.cbInstrBufTotal)1579 {1580 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;1581 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;1582 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)1583 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;1584 else1585 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;1586 1587 return pVCpu->iem.s.GCPhysInstrBuf + off;1588 }1589 }1590 return iemGetPcWithPhysAndCodeMissed(pVCpu);1591 }1592 1593 1594 /**1595 * Determines the extra IEMTB_F_XXX flags.1596 *1597 * @returns IEMTB_F_TYPE_THREADED and maybe IEMTB_F_RIP_CHECKS.1598 * @param pVCpu The cross context virtual CPU structure of the calling1599 * thread.1600 */1601 DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)1602 {1603 /*1604 * Return IEMTB_F_RIP_CHECKS if the current PC is invalid or if it is1605 * likely to go invalid before the end of the translation block.1606 */1607 if (IEM_IS_64BIT_CODE(pVCpu))1608 return IEMTB_F_TYPE_THREADED;1609 1610 if (RT_LIKELY( pVCpu->cpum.GstCtx.eip < pVCpu->cpum.GstCtx.cs.u32Limit1611 && pVCpu->cpum.GstCtx.eip - pVCpu->cpum.GstCtx.cs.u32Limit >= X86_PAGE_SIZE))1612 return IEMTB_F_TYPE_THREADED;1613 1614 return IEMTB_F_TYPE_THREADED | IEMTB_F_CS_LIM_CHECKS;1615 }1616 1617 1618 VMMDECL(VBOXSTRICTRC) IEMExecRecompilerThreaded(PVMCC pVM, PVMCPUCC pVCpu)1619 {1620 /*1621 * See if there is an interrupt pending in TRPM, inject it if we can.1622 */1623 if (!TRPMHasTrap(pVCpu))1624 { /* likely */ }1625 else1626 {1627 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);1628 if (RT_LIKELY(rcStrict == VINF_SUCCESS))1629 { /*likely */ }1630 else1631 return rcStrict;1632 }1633 1634 /*1635 * Init the execution environment.1636 */1637 iemInitExec(pVCpu, 0 /*fExecOpts*/);1638 1639 /*1640 * Run-loop.1641 *1642 * If we're using setjmp/longjmp we combine all the catching here to avoid1643 * having to call setjmp for each block we're executing.1644 */1645 for (;;)1646 {1647 PIEMTB pTb = NULL;1648 VBOXSTRICTRC rcStrict;1649 IEM_TRY_SETJMP(pVCpu, rcStrict)1650 {1651 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */1652 for (uint32_t iIterations = 0; ; iIterations++)1653 {1654 /* Translate PC to physical address, we'll need this for both lookup and compilation. */1655 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);1656 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);1657 1658 pTb = iemThreadedTbLookup(pVM, pVCpu, GCPhysPc, fExtraFlags);1659 if (pTb)1660 rcStrict = iemThreadedTbExec(pVCpu, pTb);1661 else1662 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);1663 if (rcStrict == VINF_SUCCESS)1664 {1665 Assert(pVCpu->iem.s.cActiveMappings == 0);1666 1667 uint64_t fCpu = pVCpu->fLocalForcedActions;1668 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR31669 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL1670 | VMCPU_FF_TLB_FLUSH1671 | VMCPU_FF_UNHALT );1672 if (RT_LIKELY( ( !fCpu1673 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))1674 && !pVCpu->cpum.GstCtx.rflags.Bits.u1IF) )1675 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))1676 {1677 if (RT_LIKELY( (iIterations & cPollRate) != 01678 || !TMTimerPollBool(pVM, pVCpu)))1679 {1680 1681 }1682 else1683 return VINF_SUCCESS;1684 }1685 else1686 return VINF_SUCCESS;1687 }1688 else1689 return rcStrict;1690 }1691 }1692 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);1693 {1694 pVCpu->iem.s.cLongJumps++;1695 if (pVCpu->iem.s.cActiveMappings > 0)1696 iemMemRollback(pVCpu);1697 1698 /* If pTb isn't NULL we're in iemThreadedTbExec. */1699 if (!pTb)1700 {1701 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/1702 pTb = pVCpu->iem.s.pCurTbR3;1703 if (pTb)1704 {1705 /* If the pCurTbR3 block is in compiling state, we're in iemThreadedCompile,1706 otherwise it's iemThreadedTbExec inside iemThreadedCompile (compile option). */1707 if ((pTb->fFlags & IEMTB_F_STATE_MASK) == IEMTB_F_STATE_COMPILING)1708 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);1709 }1710 }1711 return rcStrict;1712 }1713 IEM_CATCH_LONGJMP_END(pVCpu);1714 }1715 }1716 -
trunk/src/VBox/VMM/include/IEMInternal.h
r100734 r100736 4948 4948 IEM_CIMPL_PROTO_1(iemCImpl_Hypercall, uint16_t, uDisOpcode); /* both */ 4949 4949 4950 extern const PFNIEMOP g_apfnIemInterpretOnlyOneByteMap[256]; 4951 4952 /* 4953 * Recompiler related stuff. 4954 */ 4955 extern const PFNIEMOP g_apfnIemThreadedRecompilerOneByteMap[256]; 4956 4950 4957 void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb); 4951 4958 … … 4990 4997 (PVMCPU pVCpu, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)); 4991 4998 4992 4993 4994 extern const PFNIEMOP g_apfnIemInterpretOnlyOneByteMap[256]; 4999 bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb); 5000 4995 5001 4996 5002 /** @} */
Note:
See TracChangeset
for help on using the changeset viewer.