Changeset 103728 in vbox
- Timestamp:
- Mar 7, 2024 12:11:33 PM (12 months ago)
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r103671 r103728 133 133 static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, 134 134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg); 135 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 136 static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, 137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz); 138 # endif 135 139 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative); 136 140 #endif … … 3015 3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow; 3016 3020 #endif 3021 3022 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 3023 # ifdef RT_ARCH_ARM64 3024 /* 3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair 3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers. 3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init 3028 * and the register allocator assumes that it will be always free when the lower is picked. 3029 */ 3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa); 3031 # else 3032 uint32_t const fFixedAdditional = 0; 3033 # endif 3034 3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK 3036 | fFixedAdditional 3037 # if IEMNATIVE_HST_SIMD_REG_COUNT < 32 3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U) 3039 # endif 3040 ; 3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0; 3042 pReNative->Core.bmGstSimdRegShadows = 0; 3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0; 3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0; 3045 3046 /* Full host register reinit: */ 3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++) 3048 { 3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0; 3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid; 3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid; 3052 } 3053 3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional; 3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1) 3056 { 3057 fRegs &= ~RT_BIT_32(idxReg); 3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved; 3059 } 3060 3061 #ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0 3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp; 3063 #endif 3064 3065 #endif 3066 3017 3067 return pReNative; 3018 3068 } … … 3436 3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev; 3437 3487 } 3488 3489 3490 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 3491 /** 3492 * Debug Info: Record info about guest register shadowing. 3493 */ 3494 static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg, 3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX) 3496 { 3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo); 3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing; 3499 pEntry->GuestSimdRegShadowing.uUnused = 0; 3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg; 3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg; 3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev; 3503 } 3504 # endif 3438 3505 3439 3506 … … 5134 5201 5135 5202 5203 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 5204 /********************************************************************************************************************************* 5205 * SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) * 5206 *********************************************************************************************************************************/ 5207 5208 /** 5209 * Info about shadowed guest SIMD register values. 5210 * @see IEMNATIVEGSTSIMDREG 5211 */ 5212 static struct 5213 { 5214 /** Offset in VMCPU of XMM (low 128-bit) registers. */ 5215 uint32_t offXmm; 5216 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */ 5217 uint32_t offYmm; 5218 /** Name (for logging). */ 5219 const char *pszName; 5220 } const g_aGstSimdShadowInfo[] = 5221 { 5222 #define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \ 5223 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg]) 5224 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", }, 5225 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", }, 5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", }, 5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", }, 5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", }, 5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", }, 5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", }, 5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", }, 5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", }, 5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", }, 5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", }, 5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", }, 5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", }, 5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", }, 5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", }, 5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", }, 5240 #undef CPUMCTX_OFF_AND_SIZE 5241 }; 5242 AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End); 5243 5244 5245 #ifdef LOG_ENABLED 5246 /** Host CPU SIMD register names. */ 5247 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] = 5248 { 5249 #ifdef RT_ARCH_AMD64 5250 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" 5251 #elif RT_ARCH_ARM64 5252 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 5253 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 5254 #else 5255 # error "port me" 5256 #endif 5257 }; 5258 #endif 5259 5260 5261 DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg, 5262 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT 5263 { 5264 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg); 5265 5266 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat; 5267 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0; 5268 RT_NOREF(idxVar); 5269 return idxSimdReg; 5270 } 5271 5272 5273 /** 5274 * Frees a temporary SIMD register. 5275 * 5276 * Any shadow copies of guest registers assigned to the host register will not 5277 * be flushed by this operation. 5278 */ 5279 DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT 5280 { 5281 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)); 5282 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp); 5283 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg); 5284 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n", 5285 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)); 5286 } 5287 5288 5289 /** 5290 * Locate a register, possibly freeing one up. 5291 * 5292 * This ASSUMES the caller has done the minimal/optimal allocation checks and 5293 * failed. 5294 * 5295 * @returns Host register number on success. Returns UINT8_MAX if no registers 5296 * found, the caller is supposed to deal with this and raise a 5297 * allocation type specific status code (if desired). 5298 * 5299 * @throws VBox status code if we're run into trouble spilling a variable of 5300 * recording debug info. Does NOT throw anything if we're out of 5301 * registers, though. 5302 */ 5303 static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile, 5304 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK) 5305 { 5306 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree); 5307 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK)); 5308 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK)); 5309 5310 AssertFailed(); 5311 5312 /* 5313 * Try a freed register that's shadowing a guest register. 5314 */ 5315 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask; 5316 if (fRegs) 5317 { 5318 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar); 5319 5320 #if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */ 5321 /* 5322 * When we have livness information, we use it to kick out all shadowed 5323 * guest register that will not be needed any more in this TB. If we're 5324 * lucky, this may prevent us from ending up here again. 5325 * 5326 * Note! We must consider the previous entry here so we don't free 5327 * anything that the current threaded function requires (current 5328 * entry is produced by the next threaded function). 5329 */ 5330 uint32_t const idxCurCall = pReNative->idxCurCall; 5331 if (idxCurCall > 0) 5332 { 5333 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1]; 5334 5335 # ifndef IEMLIVENESS_EXTENDED_LAYOUT 5336 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */ 5337 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2); 5338 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */ 5339 #else 5340 /* Construct a mask of the registers not in the read or write state. 5341 Note! We could skips writes, if they aren't from us, as this is just 5342 a hack to prevent trashing registers that have just been written 5343 or will be written when we retire the current instruction. */ 5344 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64 5345 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64 5346 & IEMLIVENESSBIT_MASK; 5347 #endif 5348 /* Merge EFLAGS. */ 5349 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */ 5350 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */ 5351 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */ 5352 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1; 5353 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags); 5354 5355 /* If it matches any shadowed registers. */ 5356 if (pReNative->Core.bmGstRegShadows & fToFreeMask) 5357 { 5358 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed); 5359 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask); 5360 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */ 5361 5362 /* See if we've got any unshadowed registers we can return now. */ 5363 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow; 5364 if (fUnshadowedRegs) 5365 { 5366 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped); 5367 return (fPreferVolatile 5368 ? ASMBitFirstSetU32(fUnshadowedRegs) 5369 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK 5370 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs)) 5371 - 1; 5372 } 5373 } 5374 } 5375 #endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */ 5376 5377 unsigned const idxReg = (fPreferVolatile 5378 ? ASMBitFirstSetU32(fRegs) 5379 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK 5380 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs)) 5381 - 1; 5382 5383 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0); 5384 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows) 5385 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows); 5386 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)); 5387 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid); 5388 5389 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg); 5390 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows; 5391 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0; 5392 return idxReg; 5393 } 5394 5395 /* 5396 * Try free up a variable that's in a register. 5397 * 5398 * We do two rounds here, first evacuating variables we don't need to be 5399 * saved on the stack, then in the second round move things to the stack. 5400 */ 5401 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar); 5402 AssertReleaseFailed(); /** @todo */ 5403 #if 0 5404 for (uint32_t iLoop = 0; iLoop < 2; iLoop++) 5405 { 5406 uint32_t fVars = pReNative->Core.bmSimdVars; 5407 while (fVars) 5408 { 5409 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1; 5410 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg; 5411 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs) 5412 && (RT_BIT_32(idxReg) & fRegMask) 5413 && ( iLoop == 0 5414 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack 5415 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack) 5416 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired) 5417 { 5418 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)); 5419 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows) 5420 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows); 5421 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End)); 5422 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)) 5423 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)); 5424 5425 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack) 5426 { 5427 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar)); 5428 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg); 5429 } 5430 5431 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX; 5432 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg); 5433 5434 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg); 5435 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows; 5436 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0; 5437 return idxReg; 5438 } 5439 fVars &= ~RT_BIT_32(idxVar); 5440 } 5441 } 5442 #else 5443 RT_NOREF(poff); 5444 #endif 5445 5446 return UINT8_MAX; 5447 } 5448 5449 5450 /** 5451 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest 5452 * SIMD register @a enmGstSimdReg. 5453 * 5454 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any 5455 * host register before calling. 5456 */ 5457 DECL_FORCE_INLINE(void) 5458 iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off) 5459 { 5460 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))); 5461 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows); 5462 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End); 5463 5464 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg; 5465 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg); 5466 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg); 5467 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg); 5468 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO 5469 iemNativeDbgInfoAddNativeOffset(pReNative, off); 5470 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg); 5471 #else 5472 RT_NOREF(off); 5473 #endif 5474 } 5475 5476 5477 /** 5478 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom 5479 * to @a idxSimdRegTo. 5480 */ 5481 DECL_FORCE_INLINE(void) 5482 iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo, 5483 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off) 5484 { 5485 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg)); 5486 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom); 5487 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows) 5488 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows 5489 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End)); 5490 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows) 5491 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows); 5492 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom)) 5493 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)); 5494 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded 5495 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded); 5496 5497 5498 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg); 5499 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom; 5500 if (!fGstRegShadowsFrom) 5501 { 5502 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom); 5503 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid; 5504 } 5505 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo); 5506 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg); 5507 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo; 5508 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO 5509 iemNativeDbgInfoAddNativeOffset(pReNative, off); 5510 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom); 5511 #else 5512 RT_NOREF(off); 5513 #endif 5514 } 5515 5516 5517 /** 5518 * Clear any guest register shadow claims from @a idxHstSimdReg. 5519 * 5520 * The register does not need to be shadowing any guest registers. 5521 */ 5522 DECL_FORCE_INLINE(void) 5523 iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off) 5524 { 5525 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows) 5526 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows 5527 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End)); 5528 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg)) 5529 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)); 5530 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128) 5531 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128)); 5532 5533 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO 5534 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows; 5535 if (fGstRegs) 5536 { 5537 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End)); 5538 iemNativeDbgInfoAddNativeOffset(pReNative, off); 5539 while (fGstRegs) 5540 { 5541 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1; 5542 fGstRegs &= ~RT_BIT_64(iGstReg); 5543 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg); 5544 } 5545 } 5546 #else 5547 RT_NOREF(off); 5548 #endif 5549 5550 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg); 5551 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows; 5552 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0; 5553 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid; 5554 } 5555 5556 5557 /** 5558 * Flushes a set of guest register shadow copies. 5559 * 5560 * This is usually done after calling a threaded function or a C-implementation 5561 * of an instruction. 5562 * 5563 * @param pReNative The native recompile state. 5564 * @param fGstSimdRegs Set of guest SIMD registers to flush. 5565 */ 5566 DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT 5567 { 5568 /* 5569 * Reduce the mask by what's currently shadowed 5570 */ 5571 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows; 5572 fGstSimdRegs &= bmGstSimdRegShadows; 5573 if (fGstSimdRegs) 5574 { 5575 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs; 5576 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew)); 5577 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew; 5578 if (bmGstSimdRegShadowsNew) 5579 { 5580 /* 5581 * Partial. 5582 */ 5583 do 5584 { 5585 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1; 5586 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg]; 5587 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows)); 5588 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg)); 5589 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg)); 5590 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg)); 5591 5592 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg); 5593 fGstSimdRegs &= ~fInThisHstReg; 5594 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg; 5595 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew; 5596 if (!fGstRegShadowsNew) 5597 { 5598 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg); 5599 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid; 5600 } 5601 } while (fGstSimdRegs != 0); 5602 } 5603 else 5604 { 5605 /* 5606 * Clear all. 5607 */ 5608 do 5609 { 5610 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1; 5611 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg]; 5612 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows)); 5613 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg)); 5614 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg)); 5615 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg)); 5616 5617 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg)); 5618 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0; 5619 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid; 5620 } while (fGstSimdRegs != 0); 5621 pReNative->Core.bmHstSimdRegsWithGstShadow = 0; 5622 } 5623 } 5624 } 5625 5626 5627 /** 5628 * Allocates a temporary host SIMD register. 5629 * 5630 * This may emit code to save register content onto the stack in order to free 5631 * up a register. 5632 * 5633 * @returns The host register number; throws VBox status code on failure, 5634 * so no need to check the return value. 5635 * @param pReNative The native recompile state. 5636 * @param poff Pointer to the variable with the code buffer position. 5637 * This will be update if we need to move a variable from 5638 * register to stack in order to satisfy the request. 5639 * @param fPreferVolatile Whether to prefer volatile over non-volatile 5640 * registers (@c true, default) or the other way around 5641 * (@c false, for iemNativeRegAllocTmpForGuestReg()). 5642 */ 5643 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/) 5644 { 5645 /* 5646 * Try find a completely unused register, preferably a call-volatile one. 5647 */ 5648 uint8_t idxSimdReg; 5649 uint32_t fRegs = ~pReNative->Core.bmHstRegs 5650 & ~pReNative->Core.bmHstRegsWithGstShadow 5651 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK); 5652 if (fRegs) 5653 { 5654 if (fPreferVolatile) 5655 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK 5656 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1; 5657 else 5658 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK 5659 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1; 5660 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0); 5661 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg))); 5662 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg])); 5663 } 5664 else 5665 { 5666 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile); 5667 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP)); 5668 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg])); 5669 } 5670 5671 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid); 5672 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp); 5673 } 5674 5675 5676 /** 5677 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable 5678 * registers. 5679 * 5680 * @returns The host register number; throws VBox status code on failure, 5681 * so no need to check the return value. 5682 * @param pReNative The native recompile state. 5683 * @param poff Pointer to the variable with the code buffer position. 5684 * This will be update if we need to move a variable from 5685 * register to stack in order to satisfy the request. 5686 * @param fRegMask Mask of acceptable registers. 5687 * @param fPreferVolatile Whether to prefer volatile over non-volatile 5688 * registers (@c true, default) or the other way around 5689 * (@c false, for iemNativeRegAllocTmpForGuestReg()). 5690 */ 5691 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask, 5692 bool fPreferVolatile /*= true*/) 5693 { 5694 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK)); 5695 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK)); 5696 5697 /* 5698 * Try find a completely unused register, preferably a call-volatile one. 5699 */ 5700 uint8_t idxSimdReg; 5701 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs 5702 & ~pReNative->Core.bmHstSimdRegsWithGstShadow 5703 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK) 5704 & fRegMask; 5705 if (fRegs) 5706 { 5707 if (fPreferVolatile) 5708 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK 5709 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1; 5710 else 5711 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK 5712 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1; 5713 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0); 5714 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg))); 5715 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg])); 5716 } 5717 else 5718 { 5719 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask); 5720 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP)); 5721 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg])); 5722 } 5723 5724 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid); 5725 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp); 5726 } 5727 5728 5729 static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst, 5730 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst) 5731 { 5732 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */ 5733 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst 5734 || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256) 5735 { 5736 # ifdef RT_ARCH_ARM64 5737 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 5738 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1)); 5739 # endif 5740 5741 switch (enmLoadSzDst) 5742 { 5743 case kIemNativeGstSimdRegLdStSz_256: 5744 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5745 case kIemNativeGstSimdRegLdStSz_Low128: 5746 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5747 case kIemNativeGstSimdRegLdStSz_High128: 5748 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1); 5749 default: 5750 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE)); 5751 } 5752 5753 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst; 5754 return off; 5755 } 5756 else 5757 { 5758 /* Complicated stuff where the source is currently missing something, later. */ 5759 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE)); 5760 } 5761 5762 return off; 5763 } 5764 5765 5766 /** 5767 * Allocates a temporary host SIMD register for keeping a guest 5768 * SIMD register value. 5769 * 5770 * Since we may already have a register holding the guest register value, 5771 * code will be emitted to do the loading if that's not the case. Code may also 5772 * be emitted if we have to free up a register to satify the request. 5773 * 5774 * @returns The host register number; throws VBox status code on failure, so no 5775 * need to check the return value. 5776 * @param pReNative The native recompile state. 5777 * @param poff Pointer to the variable with the code buffer 5778 * position. This will be update if we need to move a 5779 * variable from register to stack in order to satisfy 5780 * the request. 5781 * @param enmGstSimdReg The guest SIMD register that will is to be updated. 5782 * @param enmIntendedUse How the caller will be using the host register. 5783 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any 5784 * register is okay (default). The ASSUMPTION here is 5785 * that the caller has already flushed all volatile 5786 * registers, so this is only applied if we allocate a 5787 * new register. 5788 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent 5789 */ 5790 DECL_HIDDEN_THROW(uint8_t) 5791 iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg, 5792 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/, 5793 bool fNoVolatileRegs /*= false*/) 5794 { 5795 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End); 5796 #if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */ 5797 AssertMsg( pReNative->idxCurCall == 0 5798 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite 5799 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) 5800 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate 5801 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) 5802 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ), 5803 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))); 5804 #endif 5805 #if defined(LOG_ENABLED) || defined(VBOX_STRICT) 5806 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" }; 5807 #endif 5808 uint32_t const fRegMask = !fNoVolatileRegs 5809 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK 5810 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK; 5811 5812 /* 5813 * First check if the guest register value is already in a host register. 5814 */ 5815 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)) 5816 { 5817 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg]; 5818 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)); 5819 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg)); 5820 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)); 5821 5822 /* It's not supposed to be allocated... */ 5823 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg))) 5824 { 5825 /* 5826 * If the register will trash the guest shadow copy, try find a 5827 * completely unused register we can use instead. If that fails, 5828 * we need to disassociate the host reg from the guest reg. 5829 */ 5830 /** @todo would be nice to know if preserving the register is in any way helpful. */ 5831 /* If the purpose is calculations, try duplicate the register value as 5832 we'll be clobbering the shadow. */ 5833 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation 5834 && ( ~pReNative->Core.bmHstSimdRegs 5835 & ~pReNative->Core.bmHstSimdRegsWithGstShadow 5836 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK))) 5837 { 5838 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask); 5839 5840 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz); 5841 5842 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n", 5843 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, 5844 g_apszIemNativeHstSimdRegNames[idxRegNew])); 5845 idxSimdReg = idxRegNew; 5846 } 5847 /* If the current register matches the restrictions, go ahead and allocate 5848 it for the caller. */ 5849 else if (fRegMask & RT_BIT_32(idxSimdReg)) 5850 { 5851 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg); 5852 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp; 5853 if (enmIntendedUse != kIemNativeGstRegUse_Calculation) 5854 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n", 5855 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse])); 5856 else 5857 { 5858 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff); 5859 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n", 5860 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName)); 5861 } 5862 } 5863 /* Otherwise, allocate a register that satisfies the caller and transfer 5864 the shadowing if compatible with the intended use. (This basically 5865 means the call wants a non-volatile register (RSP push/pop scenario).) */ 5866 else 5867 { 5868 Assert(fNoVolatileRegs); 5869 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg), 5870 !fNoVolatileRegs 5871 && enmIntendedUse == kIemNativeGstRegUse_Calculation); 5872 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz); 5873 if (enmIntendedUse != kIemNativeGstRegUse_Calculation) 5874 { 5875 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff); 5876 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n", 5877 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew], 5878 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse])); 5879 } 5880 else 5881 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n", 5882 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, 5883 g_apszIemNativeHstSimdRegNames[idxRegNew])); 5884 idxSimdReg = idxRegNew; 5885 } 5886 } 5887 else 5888 { 5889 /* 5890 * Oops. Shadowed guest register already allocated! 5891 * 5892 * Allocate a new register, copy the value and, if updating, the 5893 * guest shadow copy assignment to the new register. 5894 */ 5895 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate 5896 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite, 5897 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n", 5898 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse])); 5899 5900 /** @todo share register for readonly access. */ 5901 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, 5902 enmIntendedUse == kIemNativeGstRegUse_Calculation); 5903 5904 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite) 5905 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz); 5906 else 5907 { 5908 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */ 5909 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz; 5910 } 5911 5912 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate 5913 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite) 5914 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n", 5915 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, 5916 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse])); 5917 else 5918 { 5919 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff); 5920 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n", 5921 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, 5922 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse])); 5923 } 5924 idxSimdReg = idxRegNew; 5925 } 5926 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */ 5927 5928 #ifdef VBOX_STRICT 5929 /* Strict builds: Check that the value is correct. */ 5930 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz); 5931 #endif 5932 5933 return idxSimdReg; 5934 } 5935 5936 /* 5937 * Allocate a new register, load it with the guest value and designate it as a copy of the 5938 */ 5939 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation); 5940 5941 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite) 5942 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz); 5943 else 5944 { 5945 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */ 5946 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz; 5947 } 5948 5949 if (enmIntendedUse != kIemNativeGstRegUse_Calculation) 5950 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff); 5951 5952 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n", 5953 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse])); 5954 5955 return idxRegNew; 5956 } 5957 5958 5959 /** 5960 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association. 5961 * 5962 * @returns New code bufferoffset. 5963 * @param pReNative The native recompile state. 5964 * @param off Current code buffer position. 5965 * @param idxGstSimdReg The guest SIMD register to flush. 5966 */ 5967 static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGstSimdReg) 5968 { 5969 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg]; 5970 5971 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n", 5972 g_aGstSimdShadowInfo[idxGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg], 5973 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg), 5974 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))); 5975 5976 #ifdef RT_ARCH_AMD64 5977 # error "Port me" 5978 #elif defined(RT_ARCH_ARM64) 5979 /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */ 5980 Assert(!(idxHstSimdReg & 0x1)); 5981 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg)) 5982 { 5983 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256 5984 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128); 5985 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm); 5986 } 5987 5988 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)) 5989 { 5990 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256 5991 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128); 5992 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm); 5993 } 5994 #endif 5995 5996 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg); 5997 return off; 5998 } 5999 6000 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */ 6001 6002 6003 6004 /********************************************************************************************************************************* 6005 * Code emitters for flushing pending guest register writes and sanity checks * 6006 *********************************************************************************************************************************/ 6007 5136 6008 /** 5137 6009 * Flushes delayed write of a specific guest register. … … 5149 6021 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */ 5150 6022 #endif 6023 6024 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 6025 if ( enmClass == kIemNativeGstRegRef_XReg 6026 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg)) 6027 { 6028 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxReg); 6029 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */ 6030 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg]; 6031 6032 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off); 6033 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg))); 6034 } 6035 #endif 5151 6036 RT_NOREF(pReNative, enmClass, idxReg); 5152 6037 return off; … … 5163 6048 * RIP updates, since these are the most common ones. 5164 6049 */ 5165 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/) 6050 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/, 6051 bool fFlushShadows /*= true*/) 5166 6052 { 5167 6053 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING … … 5170 6056 #else 5171 6057 RT_NOREF(pReNative, fGstShwExcept); 6058 #endif 6059 6060 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 6061 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */ 6062 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++) 6063 { 6064 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg) 6065 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))); 6066 6067 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)) 6068 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxGstSimdReg); 6069 6070 if ( fFlushShadows 6071 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)) 6072 { 6073 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg]; 6074 6075 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off); 6076 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg))); 6077 } 6078 } 6079 #else 6080 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows); 5172 6081 #endif 5173 6082 … … 5277 6186 } 5278 6187 6188 6189 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 6190 /** 6191 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg. 6192 * 6193 * @returns New code buffer offset on success, UINT32_MAX on failure. 6194 * @param pReNative The recompiler state. 6195 * @param off The current code buffer position. 6196 * @param idxHstSimdReg The host register to load the guest register value into. 6197 * @param enmGstSimdReg The guest register to load. 6198 * @param enmLoadSz The load size of the register. 6199 * 6200 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg, 6201 * that is something the caller needs to do if applicable. 6202 */ 6203 DECL_HIDDEN_THROW(uint32_t) 6204 iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg, 6205 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz) 6206 { 6207 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo)); 6208 6209 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz; 6210 switch (enmLoadSz) 6211 { 6212 case kIemNativeGstSimdRegLdStSz_256: 6213 return iemNativeEmitSimdLoadVecRegFromVCpuU256(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm, 6214 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm); 6215 case kIemNativeGstSimdRegLdStSz_Low128: 6216 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm); 6217 case kIemNativeGstSimdRegLdStSz_High128: 6218 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm); 6219 default: 6220 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE)); 6221 } 6222 } 6223 #endif 5279 6224 5280 6225 #ifdef VBOX_STRICT … … 5431 6376 return off; 5432 6377 } 6378 6379 6380 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 6381 /** 6382 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same 6383 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint 6384 * instruction if that's not the case. 6385 * 6386 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0. 6387 * Trashes EFLAGS on AMD64. 6388 */ 6389 static uint32_t 6390 iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, 6391 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz) 6392 { 6393 # ifdef RT_ARCH_AMD64 6394 # error "Port me!" 6395 # elif defined(RT_ARCH_ARM64) 6396 /* mov vectmp0, [gstreg] */ 6397 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz); 6398 6399 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256) 6400 { 6401 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5); 6402 /* eor vectmp0, vectmp0, idxSimdReg */ 6403 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg); 6404 /* cnt vectmp0, vectmp0, #0*/ 6405 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0); 6406 /* umov tmp0, vectmp0.D[0] */ 6407 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, 6408 0 /*idxElem*/, kArmv8InstrUmovSz_U64); 6409 /* cbz tmp0, +1 */ 6410 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0); 6411 /* brk #0x1000+enmGstReg */ 6412 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000)); 6413 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 6414 } 6415 6416 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256) 6417 { 6418 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5); 6419 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */ 6420 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg); 6421 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/ 6422 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1); 6423 /* umov tmp0, (vectmp0 + 1).D[0] */ 6424 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, 6425 0 /*idxElem*/, kArmv8InstrUmovSz_U64); 6426 /* cbz tmp0, +1 */ 6427 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0); 6428 /* brk #0x1000+enmGstReg */ 6429 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000)); 6430 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 6431 } 6432 6433 # else 6434 # error "Port me!" 6435 # endif 6436 return off; 6437 } 6438 # endif 5433 6439 #endif /* VBOX_STRICT */ 5434 6440 … … 7086 8092 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? 7087 8093 */ 7088 off = iemNativeRegFlushPendingWrites(pReNative, off );8094 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/); 7089 8095 7090 8096 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING … … 7147 8153 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? 7148 8154 */ 7149 off = iemNativeRegFlushPendingWrites(pReNative, off );8155 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/); 7150 8156 7151 8157 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING … … 10970 11976 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg); 10971 11977 10972 /** @todo r=aeichner This needs to be done as soon as we shadow SSE registers in host registers, needs 10973 * figuring out the semantics on how this is tracked. 10974 * For now this is safe though as the reference will directly operate on the CPUMCTX 10975 * structure so the value can't get out of sync. 10976 */ 10977 #if 0 11978 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 10978 11979 /* If it's not a const reference we need to flush the shadow copy of the register now. */ 10979 11980 if (!fConst) 10980 iemNative RegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_XREG(iXReg)));11981 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg))); 10981 11982 #else 10982 11983 RT_NOREF(fConst); … … 14056 15057 14057 15058 15059 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 15060 /********************************************************************************************************************************* 15061 * Emitters for SSE/AVX specific operations. * 15062 *********************************************************************************************************************************/ 15063 15064 #define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \ 15065 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc) 15066 15067 /** Emits code for IEM_MC_FETCH_FSW. */ 15068 DECL_INLINE_THROW(uint32_t) 15069 iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc) 15070 { 15071 /* Allocate destination and source register. */ 15072 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst), 15073 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite); 15074 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc), 15075 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly); 15076 15077 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc); 15078 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst); 15079 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */ 15080 15081 /* Free but don't flush the source and destination register. */ 15082 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); 15083 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc); 15084 15085 return off; 15086 } 15087 #endif 15088 15089 14058 15090 /********************************************************************************************************************************* 14059 15091 * The native code generator functions for each MC block. * … … 14169 15201 ENTRY(cpum.GstCtx.eflags), 14170 15202 ENTRY(cpum.GstCtx.uRipInhibitInt), 15203 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 15204 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]), 15205 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]), 15206 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]), 15207 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]), 15208 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]), 15209 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]), 15210 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]), 15211 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]), 15212 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]), 15213 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]), 15214 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]), 15215 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]), 15216 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]), 15217 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]), 15218 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]), 15219 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]), 15220 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]), 15221 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]), 15222 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]), 15223 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]), 15224 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]), 15225 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]), 15226 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]), 15227 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]), 15228 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]), 15229 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]), 15230 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]), 15231 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]), 15232 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]), 15233 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]), 15234 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]), 15235 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15]) 15236 #endif 14171 15237 #undef ENTRY 14172 15238 }; … … 14502 15568 continue; 14503 15569 } 15570 15571 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 15572 case kIemTbDbgEntryType_GuestSimdRegShadowing: 15573 { 15574 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry]; 15575 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName; 15576 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX) 15577 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg, 15578 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]); 15579 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX) 15580 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg, 15581 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]); 15582 else 15583 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg, 15584 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg], 15585 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]); 15586 continue; 15587 } 15588 #endif 14504 15589 14505 15590 case kIemTbDbgEntryType_Label: … … 15262 16347 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg)); 15263 16348 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp()); 15264 # if def DEBUG_bird16349 # if defined(DEBUG_bird) || defined(DEBUG_aeichner) 15265 16350 RTLogFlush(NULL); 15266 16351 # endif -
trunk/src/VBox/VMM/include/IEMInternal.h
r103700 r103728 92 92 # define IEMNATIVE_WITH_DELAYED_PC_UPDATING 93 93 #endif 94 95 /** @def IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 96 * Enables the SIMD register allocator @bugref{10614}. */ 97 //# define IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 98 /** Enables access to even callee saved registers. */ 99 //# define IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS 94 100 95 101 /** @def VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP … … 964 970 /** Info about a host register shadowing a guest register. */ 965 971 kIemTbDbgEntryType_GuestRegShadowing, 972 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 973 /** Info about a host SIMD register shadowing a guest SIMD register. */ 974 kIemTbDbgEntryType_GuestSimdRegShadowing, 975 #endif 966 976 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING 967 977 /** Info about a delayed RIP update. */ … … 1039 1049 uint32_t idxHstRegPrev : 8; 1040 1050 } GuestRegShadowing; 1051 1052 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1053 struct 1054 { 1055 /* kIemTbDbgEntryType_GuestSimdRegShadowing. */ 1056 uint32_t uType : 4; 1057 uint32_t uUnused : 4; 1058 /** The guest register being shadowed (IEMNATIVEGSTSIMDREG). */ 1059 uint32_t idxGstSimdReg : 8; 1060 /** The host new register number, UINT8_MAX if dropped. */ 1061 uint32_t idxHstSimdReg : 8; 1062 /** The previous host register number, UINT8_MAX if new. */ 1063 uint32_t idxHstSimdRegPrev : 8; 1064 } GuestSimdRegShadowing; 1065 #endif 1041 1066 1042 1067 #ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING -
trunk/src/VBox/VMM/include/IEMN8veRecompiler.h
r103671 r103728 157 157 * Mask GPRs with fixes assignments, either by us or dictated by the CPU/OS 158 158 * architecture. */ 159 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 160 /** @def IEMNATIVE_SIMD_REG_FIXED_TMP0 161 * Mask SIMD registers with fixes assignments, either by us or dictated by the CPU/OS 162 * architecture. */ 163 /** @def IEMNATIVE_SIMD_REG_FIXED_TMP0 164 * Dedicated temporary SIMD register. */ 165 #endif 159 166 #if defined(RT_ARCH_AMD64) && !defined(DOXYGEN_RUNNING) 160 167 # define IEMNATIVE_REG_FIXED_PVMCPU X86_GREG_xBX … … 164 171 | RT_BIT_32(X86_GREG_xSP) \ 165 172 | RT_BIT_32(X86_GREG_xBP) ) 173 174 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 175 # if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS) || !defined(_MSC_VER) 176 # define IEMNATIVE_SIMD_REG_FIXED_MASK 0 177 # else 178 /** On Windows xmm6 through xmm15 are marked as callee saved. */ 179 # define IEMNATIVE_SIMD_REG_FIXED_MASK (UINT32_C(0xffc0)) 180 # endif 181 # endif 166 182 167 183 #elif defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING) … … 186 202 | IEMNATIVE_REG_FIXED_MASK_ADD) 187 203 204 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 205 # define IEMNATIVE_SIMD_REG_FIXED_TMP0 ARMV8_A64_REG_Q30 206 # if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS) 207 # define IEMNATIVE_SIMD_REG_FIXED_MASK RT_BIT_32(ARMV8_A64_REG_Q30) 208 # else 209 /** arm64 declares the low 64-bit of v8-v15 as callee saved. */ 210 # define IEMNATIVE_SIMD_REG_FIXED_MASK ( UINT32_C(0xff00) \ 211 | RT_BIT_32(ARMV8_A64_REG_Q30)) 212 # endif 213 # endif 214 188 215 #else 189 216 # error "port me" … … 227 254 | RT_BIT_32(X86_GREG_x10) \ 228 255 | RT_BIT_32(X86_GREG_x11) ) 256 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 257 /* xmm0 - xmm5 are marked as volatile. */ 258 # define IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK (UINT32_C(0x3f)) 259 # endif 260 229 261 # else 230 262 # define IEMNATIVE_CALL_ARG_GREG_COUNT 6 … … 250 282 | RT_BIT_32(X86_GREG_x10) \ 251 283 | RT_BIT_32(X86_GREG_x11) ) 284 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 285 /* xmm0 - xmm15 are marked as volatile. */ 286 # define IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK (UINT32_C(0xffff)) 287 # endif 252 288 # endif 253 289 … … 289 325 | RT_BIT_32(ARMV8_A64_REG_X16) \ 290 326 | RT_BIT_32(ARMV8_A64_REG_X17) ) 327 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 328 /* The low 64 bits of v8 - v15 marked as callee saved but the rest is volatile, 329 * so to simplify our life a bit we just mark everything as volatile. */ 330 # define IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK (UINT32_C(0xffffffff)) 331 # endif 291 332 292 333 #endif … … 306 347 * Mask corresponding to IEMNATIVE_HST_GREG_COUNT that can be applied to 307 348 * inverted register masks and such to get down to a correct set of regs. */ 349 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 350 /** @def IEMNATIVE_HST_SIMD_REG_COUNT 351 * Number of host SIMD registers we track. */ 352 /** @def IEMNATIVE_HST_SIMD_REG_MASK 353 * Mask corresponding to IEMNATIVE_HST_SIMD_REG_COUNT that can be applied to 354 * inverted register masks and such to get down to a correct set of regs. */ 355 #endif 308 356 #ifdef RT_ARCH_AMD64 309 357 # define IEMNATIVE_HST_GREG_COUNT 16 310 358 # define IEMNATIVE_HST_GREG_MASK UINT32_C(0xffff) 311 359 360 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 361 # define IEMNATIVE_HST_SIMD_REG_COUNT 16 362 # define IEMNATIVE_HST_SIMD_REG_MASK UINT32_C(0xffff) 363 # endif 364 312 365 #elif defined(RT_ARCH_ARM64) 313 366 # define IEMNATIVE_HST_GREG_COUNT 32 314 367 # define IEMNATIVE_HST_GREG_MASK UINT32_MAX 368 369 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 370 # define IEMNATIVE_HST_SIMD_REG_COUNT 32 371 # define IEMNATIVE_HST_SIMD_REG_MASK UINT32_MAX 372 # endif 373 315 374 #else 316 375 # error "Port me!" … … 735 794 /** @} */ 736 795 796 797 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 798 /** 799 * Guest registers that can be shadowed in host SIMD registers. 800 * 801 * @todo r=aeichner Liveness tracking 802 * @todo r=aeichner Given that we can only track xmm/ymm here does this actually make sense? 803 */ 804 typedef enum IEMNATIVEGSTSIMDREG : uint8_t 805 { 806 kIemNativeGstSimdReg_SimdRegFirst = 0, 807 kIemNativeGstSimdReg_SimdRegLast = kIemNativeGstSimdReg_SimdRegFirst + 15, 808 kIemNativeGstSimdReg_End 809 } IEMNATIVEGSTSIMDREG; 810 811 /** @name Helpers for converting register numbers to IEMNATIVEGSTSIMDREG values. 812 * @{ */ 813 #define IEMNATIVEGSTSIMDREG_SIMD(a_iSimdReg) ((IEMNATIVEGSTSIMDREG)(kIemNativeGstSimdReg_SimdRegFirst + (a_iSimdReg))) 814 /** @} */ 815 816 /** 817 * The Load/store size for a SIMD guest register. 818 */ 819 typedef enum IEMNATIVEGSTSIMDREGLDSTSZ : uint8_t 820 { 821 /** Invalid size. */ 822 kIemNativeGstSimdRegLdStSz_Invalid = 0, 823 /** Loads the low 128-bit of a guest SIMD register. */ 824 kIemNativeGstSimdRegLdStSz_Low128, 825 /** Loads the high 128-bit of a guest SIMD register. */ 826 kIemNativeGstSimdRegLdStSz_High128, 827 /** Loads the whole 256-bits of a guest SIMD register. */ 828 kIemNativeGstSimdRegLdStSz_256, 829 /** End value. */ 830 kIemNativeGstSimdRegLdStSz_End 831 } IEMNATIVEGSTSIMDREGLDSTSZ; 832 #endif 833 834 737 835 /** 738 836 * Intended use statement for iemNativeRegAllocTmpForGuestReg(). … … 912 1010 913 1011 1012 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1013 /** 1014 * Host SIMD register entry - this tracks a virtual 256-bit register split into two 128-bit 1015 * halves, on architectures where there is no 256-bit register available this entry will track 1016 * two adjacent 128-bit host registers. 1017 * 1018 * The actual allocation status is kept in IEMRECOMPILERSTATE::bmHstSimdRegs. 1019 */ 1020 typedef struct IEMNATIVEHSTSIMDREG 1021 { 1022 /** Set of guest registers this one shadows. 1023 * 1024 * Using a bitmap here so we can designate the same host register as a copy 1025 * for more than one guest register. This is expected to be useful in 1026 * situations where one value is copied to several registers in a sequence. 1027 * If the mapping is 1:1, then we'd have to pick which side of a 'MOV SRC,DST' 1028 * sequence we'd want to let this register follow to be a copy of and there 1029 * will always be places where we'd be picking the wrong one. 1030 */ 1031 uint64_t fGstRegShadows; 1032 /** What is being kept in this register. */ 1033 IEMNATIVEWHAT enmWhat; 1034 /** Flag what is currently loaded, low 128-bits, high 128-bits or complete 256-bits. */ 1035 IEMNATIVEGSTSIMDREGLDSTSZ enmLoaded; 1036 /** Alignment padding. */ 1037 uint8_t abAlign[6]; 1038 } IEMNATIVEHSTSIMDREG; 1039 #endif 1040 1041 914 1042 /** 915 1043 * Core state for the native recompiler, that is, things that needs careful … … 935 1063 uint64_t bmGstRegShadows; 936 1064 1065 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1066 /** Allocation bitmap for aHstSimdRegs. */ 1067 uint32_t bmHstSimdRegs; 1068 1069 /** Bitmap marking which host SIMD register contains guest SIMD register shadow copies. 1070 * This is used during register allocation to try preserve copies. */ 1071 uint32_t bmHstSimdRegsWithGstShadow; 1072 /** Bitmap marking valid entries in aidxSimdGstRegShadows. */ 1073 uint64_t bmGstSimdRegShadows; 1074 /** Bitmap marking whether the low 128-bit of the shadowed guest register are dirty and need writeback. */ 1075 uint64_t bmGstSimdRegShadowDirtyLo128; 1076 /** Bitmap marking whether the high 128-bit of the shadowed guest register are dirty and need writeback. */ 1077 uint64_t bmGstSimdRegShadowDirtyHi128; 1078 #endif 1079 937 1080 union 938 1081 { … … 953 1096 * there are no duplicate copies or ambiguities like that). */ 954 1097 uint8_t aidxGstRegShadows[kIemNativeGstReg_End]; 1098 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1099 /** Maps a guest SIMD register to a host SIMD register (index by IEMNATIVEGSTSIMDREG). 1100 * Entries are only valid if the corresponding bit in bmGstSimdRegShadows is set. 1101 * (A shadow copy of a guest register can only be held in a one host register, 1102 * there are no duplicate copies or ambiguities like that). */ 1103 uint8_t aidxGstSimdRegShadows[kIemNativeGstSimdReg_End]; 1104 #endif 955 1105 956 1106 /** Host register allocation tracking. */ 957 1107 IEMNATIVEHSTREG aHstRegs[IEMNATIVE_HST_GREG_COUNT]; 1108 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1109 /** Host SIMD register allocation tracking. */ 1110 IEMNATIVEHSTSIMDREG aHstSimdRegs[IEMNATIVE_HST_SIMD_REG_COUNT]; 1111 #endif 958 1112 959 1113 /** Variables and arguments. */ … … 982 1136 # define IEMNATIVE_VAR_IDX_UNPACK(a_idxVar) (a_idxVar) 983 1137 # define IEMNATIVE_VAR_IDX_PACK(a_idxVar) (a_idxVar) 1138 #endif 1139 1140 1141 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1142 /** Clear the dirty state of the given guest SIMD register. */ 1143 # define IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(a_pReNative, a_iSimdReg) \ 1144 do { \ 1145 (a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 &= ~RT_BIT_64(a_iSimdReg); \ 1146 (a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128 &= ~RT_BIT_64(a_iSimdReg); \ 1147 } while (0) 1148 1149 /** Returns whether the low 128-bits of the given guest SIMD register are dirty. */ 1150 # define IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(a_pReNative, a_iSimdReg) \ 1151 RT_BOOL((a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 & RT_BIT_64(a_iSimdReg)) 1152 /** Returns whether the high 128-bits of the given guest SIMD register are dirty. */ 1153 # define IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(a_pReNative, a_iSimdReg) \ 1154 RT_BOOL((a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128 & RT_BIT_64(a_iSimdReg)) 1155 /** Returns whether the given guest SIMD register is dirty. */ 1156 # define IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(a_pReNative, a_iSimdReg) \ 1157 RT_BOOL(((a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 | (a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128) & RT_BIT_64(a_iSimdReg)) 1158 1159 /** Set the low 128-bits of the given guest SIMD register to the dirty state. */ 1160 # define IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(a_pReNative, a_iSimdReg) \ 1161 ((a_pReNative)->Core.bmGstSimdRegShadowDirtyLo128 |= RT_BIT_64(a_iSimdReg)) 1162 /** Set the high 128-bits of the given guest SIMD register to the dirty state. */ 1163 # define IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(a_pReNative, a_iSimdReg) \ 1164 ((a_pReNative)->Core.bmGstSimdRegShadowDirtyHi128 |= RT_BIT_64(a_iSimdReg)) 984 1165 #endif 985 1166 … … 1238 1419 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT; 1239 1420 DECLHIDDEN(void) iemNativeRegFreeAndFlushMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegMask) RT_NOEXCEPT; 1240 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExept = 0 );1421 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExept = 0, bool fFlushShadows = true); 1241 1422 DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, 1242 1423 uint32_t fKeepVars = 0); … … 1264 1445 DECL_HIDDEN_THROW(uint32_t) iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, 1265 1446 PCIEMTHRDEDCALLENTRY pCallEntry); 1447 1448 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1449 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile = true); 1450 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask, 1451 bool fPreferVolatile = true); 1452 DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg, 1453 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse = kIemNativeGstRegUse_ReadOnly, 1454 bool fNoVolatileRegs = false); 1455 DECL_HIDDEN_THROW(uint32_t) iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, 1456 uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, 1457 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz); 1458 #endif 1266 1459 1267 1460 extern DECL_HIDDEN_DATA(const char * const) g_apszIemNativeHstRegNames[]; -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r103646 r103728 6822 6822 6823 6823 6824 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 6825 6826 /** 6827 * Emits a 128-bit vector register store to a VCpu value. 6828 */ 6829 DECL_FORCE_INLINE_THROW(uint32_t) 6830 iemNativeEmitSimdStoreVecRegToVCpuU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu) 6831 { 6832 #ifdef RT_ARCH_AMD64 6833 AssertReleaseFailed(); 6834 #elif defined(RT_ARCH_ARM64) 6835 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U)); 6836 6837 #else 6838 # error "port me" 6839 #endif 6840 return off; 6841 } 6842 6843 6844 /** 6845 * Emits a 128-bit vector register load of a VCpu value. 6846 */ 6847 DECL_INLINE_THROW(uint32_t) 6848 iemNativeEmitSimdStoreVecRegToVCpuU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu) 6849 { 6850 #ifdef RT_ARCH_AMD64 6851 off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu); 6852 #elif defined(RT_ARCH_ARM64) 6853 off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu); 6854 #else 6855 # error "port me" 6856 #endif 6857 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 6858 return off; 6859 } 6860 6861 6862 /** 6863 * Emits a 128-bit vector register load of a VCpu value. 6864 */ 6865 DECL_FORCE_INLINE_THROW(uint32_t) 6866 iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu) 6867 { 6868 #ifdef RT_ARCH_AMD64 6869 AssertReleaseFailed(); 6870 #elif defined(RT_ARCH_ARM64) 6871 off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U)); 6872 6873 #else 6874 # error "port me" 6875 #endif 6876 return off; 6877 } 6878 6879 6880 /** 6881 * Emits a 128-bit vector register load of a VCpu value. 6882 */ 6883 DECL_INLINE_THROW(uint32_t) 6884 iemNativeEmitSimdLoadVecRegFromVCpuU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu) 6885 { 6886 #ifdef RT_ARCH_AMD64 6887 off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecReg, offVCpu); 6888 #elif defined(RT_ARCH_ARM64) 6889 off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu); 6890 #else 6891 # error "port me" 6892 #endif 6893 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 6894 return off; 6895 } 6896 6897 6898 /** 6899 * Emits a 256-bit vector register store to a VCpu value. 6900 */ 6901 DECL_FORCE_INLINE_THROW(uint32_t) 6902 iemNativeEmitSimdStoreVecRegToVCpuU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpuLow, uint32_t offVCpuHigh) 6903 { 6904 #ifdef RT_ARCH_AMD64 6905 AssertReleaseFailed(); 6906 #elif defined(RT_ARCH_ARM64) 6907 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 6908 Assert(!(iVecReg & 0x1)); 6909 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, iVecReg, offVCpuLow); 6910 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, iVecReg + 1, offVCpuHigh); 6911 #else 6912 # error "port me" 6913 #endif 6914 return off; 6915 } 6916 6917 6918 /** 6919 * Emits a 256-bit vector register load of a VCpu value. 6920 */ 6921 DECL_FORCE_INLINE_THROW(uint32_t) 6922 iemNativeEmitSimdLoadVecRegFromVCpuU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpuLow, uint32_t offVCpuHigh) 6923 { 6924 #ifdef RT_ARCH_AMD64 6925 AssertReleaseFailed(); 6926 #elif defined(RT_ARCH_ARM64) 6927 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 6928 Assert(!(iVecReg & 0x1)); 6929 off = iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, iVecReg, offVCpuLow); 6930 off = iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, iVecReg + 1, offVCpuHigh); 6931 #else 6932 # error "port me" 6933 #endif 6934 return off; 6935 } 6936 6937 6938 /** 6939 * Emits a vecdst = vecsrc load. 6940 */ 6941 DECL_FORCE_INLINE(uint32_t) 6942 iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc) 6943 { 6944 #ifdef RT_ARCH_AMD64 6945 /* movdqu vecdst, vecsrc */ 6946 pCodeBuf[off++] = 0xf3; 6947 6948 if ((iVecRegDst | iVecRegSrc) >= 8) 6949 pCodeBuf[off++] = iVecRegDst < 8 ? X86_OP_REX_B 6950 : iVecRegSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B 6951 : X86_OP_REX_R; 6952 pCodeBuf[off++] = 0x0f; 6953 pCodeBuf[off++] = 0x6f; 6954 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegSrc & 7); 6955 6956 #elif defined(RT_ARCH_ARM64) 6957 /* mov dst, src; alias for: orr dst, src, src */ 6958 pCodeBuf[off++] = Armv8A64MkVecInstrOrr(iVecRegDst, iVecRegSrc, iVecRegSrc); 6959 6960 #else 6961 # error "port me" 6962 #endif 6963 return off; 6964 } 6965 6966 6967 /** 6968 * Emits a gprdst = gprsrc load, 128-bit. 6969 */ 6970 DECL_INLINE_THROW(uint32_t) 6971 iemNativeEmitSimdLoadVecRegFromVecRegU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc) 6972 { 6973 #ifdef RT_ARCH_AMD64 6974 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 3), off, iVecRegDst, iVecRegSrc); 6975 #elif defined(RT_ARCH_ARM64) 6976 off = iemNativeEmitSimdLoadVecRegFromVecRegU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iVecRegSrc); 6977 #else 6978 # error "port me" 6979 #endif 6980 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 6981 return off; 6982 } 6983 6984 6985 /** 6986 * Emits a gprdst = gprsrc load, 256-bit. 6987 */ 6988 DECL_INLINE_THROW(uint32_t) 6989 iemNativeEmitSimdLoadVecRegFromVecRegU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iVecRegSrc) 6990 { 6991 #ifdef RT_ARCH_AMD64 6992 AssertReleaseFailed(); 6993 #elif defined(RT_ARCH_ARM64) 6994 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 6995 Assert(!(iVecRegDst & 0x1)); Assert(!(iVecRegSrc & 0x1)); 6996 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst, iVecRegSrc ); 6997 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, iVecRegDst + 1, iVecRegSrc + 1); 6998 #else 6999 # error "port me" 7000 #endif 7001 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 7002 return off; 7003 } 7004 7005 #endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */ 7006 6824 7007 /** @} */ 6825 7008
Note:
See TracChangeset
for help on using the changeset viewer.