Changeset 103894 in vbox for trunk/src/VBox/VMM
- Timestamp:
- Mar 18, 2024 1:48:31 PM (11 months ago)
- svn:sync-xref-src-repo-rev:
- 162266
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py
r103865 r103894 3099 3099 'IEM_MC_FETCH_XREG_PAIR_XMM': (McBlock.parseMcGeneric, False, False, False, ), 3100 3100 'IEM_MC_FETCH_YREG_U128': (McBlock.parseMcGeneric, False, False, False, ), 3101 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, False,),3101 'IEM_MC_FETCH_YREG_U256': (McBlock.parseMcGeneric, False, False, g_fNativeSimd), 3102 3102 'IEM_MC_FETCH_YREG_U32': (McBlock.parseMcGeneric, False, False, g_fNativeSimd), 3103 3103 'IEM_MC_FETCH_YREG_U64': (McBlock.parseMcGeneric, False, False, g_fNativeSimd), … … 3343 3343 'IEM_MC_STORE_YREG_U128': (McBlock.parseMcGeneric, True, True, False, ), 3344 3344 'IEM_MC_STORE_YREG_U128_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ), 3345 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False,),3345 'IEM_MC_STORE_YREG_U256_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd), 3346 3346 'IEM_MC_STORE_YREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd), 3347 3347 'IEM_MC_STORE_YREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd), -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h
r103891 r103894 7508 7508 7509 7509 7510 #define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \ 7511 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc) 7512 7513 7514 /** Emits code for IEM_MC_FETCH_YREG_U256. */ 7515 DECL_INLINE_THROW(uint32_t) 7516 iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc) 7517 { 7518 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar); 7519 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U)); 7520 7521 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc), 7522 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly); 7523 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off); 7524 7525 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc); 7526 7527 /* Free but don't flush the source register. */ 7528 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc); 7529 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar); 7530 7531 return off; 7532 } 7533 7534 7535 #define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \ 7536 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src) 7537 7538 7539 /** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */ 7540 DECL_INLINE_THROW(uint32_t) 7541 iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar) 7542 { 7543 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar); 7544 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U)); 7545 7546 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst), 7547 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite); 7548 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/); 7549 7550 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc); 7551 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iYRegDst); 7552 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYRegDst); 7553 7554 /* Free but don't flush the source register. */ 7555 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); 7556 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar); 7557 7558 return off; 7559 } 7560 7561 7510 7562 7511 7563 /********************************************************************************************************************************* -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r103876 r103894 3093 3093 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0; 3094 3094 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid; 3095 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX; 3095 3096 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid; 3096 3097 } … … 3930 3931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar); 3931 3932 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld); 3933 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 3934 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg); 3935 #endif 3932 3936 RT_NOREF(pszCaller); 3933 3937 … … 4466 4470 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5)); 4467 4471 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg); 4472 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 4473 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg); 4474 #endif 4468 4475 4469 4476 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack) … … 4583 4590 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar; 4584 4591 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar); 4585 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg); 4592 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg); 4593 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 4594 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg); 4595 #endif 4586 4596 4587 4597 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX; … … 4609 4619 } 4610 4620 } 4621 4622 4623 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 4624 # ifdef LOG_ENABLED 4625 /** Host CPU SIMD register names. */ 4626 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] = 4627 { 4628 # ifdef RT_ARCH_AMD64 4629 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15" 4630 # elif RT_ARCH_ARM64 4631 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", 4632 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", 4633 # else 4634 # error "port me" 4635 # endif 4636 }; 4637 # endif 4638 4639 4640 /** 4641 * Frees a SIMD register assigned to a variable. 4642 * 4643 * The register will be disassociated from the variable. 4644 */ 4645 DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT 4646 { 4647 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg)); 4648 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var); 4649 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar; 4650 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar); 4651 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg); 4652 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg); 4653 4654 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX; 4655 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg); 4656 if (!fFlushShadows) 4657 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n", 4658 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar)); 4659 else 4660 { 4661 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg); 4662 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows; 4663 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0; 4664 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld; 4665 uint64_t fGstRegShadows = fGstRegShadowsOld; 4666 while (fGstRegShadows) 4667 { 4668 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1; 4669 fGstRegShadows &= ~RT_BIT_64(idxGstReg); 4670 4671 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg); 4672 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX; 4673 } 4674 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n", 4675 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar)); 4676 } 4677 } 4678 #endif 4611 4679 4612 4680 … … 4952 5020 }; 4953 5021 AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End); 4954 4955 4956 #ifdef LOG_ENABLED4957 /** Host CPU SIMD register names. */4958 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =4959 {4960 #ifdef RT_ARCH_AMD644961 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"4962 #elif RT_ARCH_ARM644963 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",4964 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",4965 #else4966 # error "port me"4967 #endif4968 };4969 #endif4970 5022 4971 5023 … … 6904 6956 pReNative->Core.aVars[idxVar].fRegAcquired = false; 6905 6957 pReNative->Core.aVars[idxVar].u.uValue = 0; 6958 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 6959 pReNative->Core.aVars[idxVar].fSimdReg = false; 6960 #endif 6906 6961 return idxVar; 6907 6962 } … … 6977 7032 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1; 6978 7033 uint32_t bmStack = ~pReNative->Core.bmStack; 6979 while (bmStack != UINT32_MAX)7034 while (bmStack != 0) 6980 7035 { 6981 7036 /** @todo allocate from the top to reduce BP displacement. */ … … 6993 7048 } 6994 7049 } 6995 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);7050 bmStack &= ~(fBitAlignMask << (iSlot & ~fBitAlignMask)); 6996 7051 } 6997 7052 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS)); … … 7298 7353 pVar->idxReg = idxReg; 7299 7354 7355 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 7356 pVar->fSimdReg = false; 7357 #endif 7358 7300 7359 /* 7301 7360 * Load it off the stack if we've got a stack slot. … … 7324 7383 return idxReg; 7325 7384 } 7385 7386 7387 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 7388 /** 7389 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays 7390 * fixed till we call iemNativeVarRegisterRelease. 7391 * 7392 * @returns The host register number. 7393 * @param pReNative The recompiler state. 7394 * @param idxVar The variable. 7395 * @param poff Pointer to the instruction buffer offset. 7396 * In case a register needs to be freed up or the value 7397 * loaded off the stack. 7398 * @param fInitialized Set if the variable must already have been initialized. 7399 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not 7400 * the case. 7401 * @param idxRegPref Preferred SIMD register number or UINT8_MAX. 7402 */ 7403 DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, 7404 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/) 7405 { 7406 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar); 7407 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)]; 7408 Assert( pVar->cbVar == sizeof(RTUINT128U) 7409 || pVar->cbVar == sizeof(RTUINT256U)); 7410 Assert(!pVar->fRegAcquired); 7411 7412 uint8_t idxReg = pVar->idxReg; 7413 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)) 7414 { 7415 Assert( pVar->enmKind > kIemNativeVarKind_Invalid 7416 && pVar->enmKind < kIemNativeVarKind_End); 7417 pVar->fRegAcquired = true; 7418 return idxReg; 7419 } 7420 7421 /* 7422 * If the kind of variable has not yet been set, default to 'stack'. 7423 */ 7424 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid 7425 && pVar->enmKind < kIemNativeVarKind_End); 7426 if (pVar->enmKind == kIemNativeVarKind_Invalid) 7427 iemNativeVarSetKindToStack(pReNative, idxVar); 7428 7429 /* 7430 * We have to allocate a register for the variable, even if its a stack one 7431 * as we don't know if there are modification being made to it before its 7432 * finalized (todo: analyze and insert hints about that?). 7433 * 7434 * If we can, we try get the correct register for argument variables. This 7435 * is assuming that most argument variables are fetched as close as possible 7436 * to the actual call, so that there aren't any interfering hidden calls 7437 * (memory accesses, etc) inbetween. 7438 * 7439 * If we cannot or it's a variable, we make sure no argument registers 7440 * that will be used by this MC block will be allocated here, and we always 7441 * prefer non-volatile registers to avoid needing to spill stuff for internal 7442 * call. 7443 */ 7444 /** @todo Detect too early argument value fetches and warn about hidden 7445 * calls causing less optimal code to be generated in the python script. */ 7446 7447 uint8_t const uArgNo = pVar->uArgNo; 7448 Assert(uArgNo == UINT8_MAX); /* No SIMD registers as arguments for now. */ 7449 7450 /* SIMD is bit simpler for now because there is no support for arguments. */ 7451 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs) 7452 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref))) 7453 { 7454 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)]; 7455 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs 7456 & ~pReNative->Core.bmHstSimdRegsWithGstShadow 7457 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK) 7458 & fNotArgsMask; 7459 if (fRegs) 7460 { 7461 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK 7462 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1; 7463 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0); 7464 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))); 7465 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo)); 7466 } 7467 else 7468 { 7469 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/, 7470 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask); 7471 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR)); 7472 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo)); 7473 } 7474 } 7475 else 7476 { 7477 idxReg = idxRegPref; 7478 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff); 7479 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg)); 7480 } 7481 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar); 7482 7483 pVar->fSimdReg = true; 7484 pVar->idxReg = idxReg; 7485 7486 /* 7487 * Load it off the stack if we've got a stack slot. 7488 */ 7489 uint8_t const idxStackSlot = pVar->idxStackSlot; 7490 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS) 7491 { 7492 Assert(fInitialized); 7493 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot); 7494 switch (pVar->cbVar) 7495 { 7496 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break; 7497 default: AssertFailed(); RT_FALL_THRU(); 7498 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break; 7499 } 7500 } 7501 else 7502 { 7503 Assert(idxStackSlot == UINT8_MAX); 7504 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED)); 7505 } 7506 pVar->fRegAcquired = true; 7507 return idxReg; 7508 } 7509 #endif 7326 7510 7327 7511 … … 7514 7698 } while (fHstRegs); 7515 7699 } 7700 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 7701 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK; 7702 if (fHstRegs) 7703 { 7704 do 7705 { 7706 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1; 7707 fHstRegs &= ~RT_BIT_32(idxHstReg); 7708 7709 /* 7710 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot 7711 * which would be more difficult due to spanning multiple stack slots and different sizes 7712 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers 7713 * don't need saving. 7714 */ 7715 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp 7716 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved) 7717 continue; 7718 7719 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var); 7720 7721 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar; 7722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar); 7723 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars) 7724 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar))) 7725 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg 7726 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg 7727 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U) 7728 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)), 7729 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12)); 7730 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind) 7731 { 7732 case kIemNativeVarKind_Stack: 7733 { 7734 /* Temporarily spill the variable register. */ 7735 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar; 7736 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar); 7737 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n", 7738 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off)); 7739 if (cbVar == sizeof(RTUINT128U)) 7740 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg); 7741 else 7742 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg); 7743 continue; 7744 } 7745 7746 case kIemNativeVarKind_Immediate: 7747 case kIemNativeVarKind_VarRef: 7748 case kIemNativeVarKind_GstRegRef: 7749 /* It is weird to have any of these loaded at this point. */ 7750 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13)); 7751 continue; 7752 7753 case kIemNativeVarKind_End: 7754 case kIemNativeVarKind_Invalid: 7755 break; 7756 } 7757 AssertFailed(); 7758 } while (fHstRegs); 7759 } 7760 #endif 7516 7761 return off; 7517 7762 } … … 7586 7831 } while (fHstRegs); 7587 7832 } 7833 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 7834 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK; 7835 if (fHstRegs) 7836 { 7837 do 7838 { 7839 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1; 7840 fHstRegs &= ~RT_BIT_32(idxHstReg); 7841 7842 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp 7843 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved) 7844 continue; 7845 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var); 7846 7847 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar; 7848 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar); 7849 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars) 7850 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar))) 7851 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg 7852 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg 7853 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U) 7854 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)), 7855 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12)); 7856 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind) 7857 { 7858 case kIemNativeVarKind_Stack: 7859 { 7860 /* Unspill the variable register. */ 7861 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar; 7862 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar); 7863 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n", 7864 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off)); 7865 7866 if (cbVar == sizeof(RTUINT128U)) 7867 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot)); 7868 else 7869 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot)); 7870 continue; 7871 } 7872 7873 case kIemNativeVarKind_Immediate: 7874 case kIemNativeVarKind_VarRef: 7875 case kIemNativeVarKind_GstRegRef: 7876 /* It is weird to have any of these loaded at this point. */ 7877 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13)); 7878 continue; 7879 7880 case kIemNativeVarKind_End: 7881 case kIemNativeVarKind_Invalid: 7882 break; 7883 } 7884 AssertFailed(); 7885 } while (fHstRegs); 7886 } 7887 #endif 7588 7888 return off; 7589 7889 } … … 7631 7931 /* Free the host register first if any assigned. */ 7632 7932 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg; 7933 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 7934 if ( idxHstReg != UINT8_MAX 7935 && pReNative->Core.aVars[idxVar].fSimdReg) 7936 { 7937 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)); 7938 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar)); 7939 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX; 7940 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg); 7941 } 7942 else 7943 #endif 7633 7944 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) 7634 7945 { … … 7832 8143 { 7833 8144 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg; 8145 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 8146 if ( idxRegOld != UINT8_MAX 8147 && pReNative->Core.aVars[idxVar].fSimdReg) 8148 { 8149 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)); 8150 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U)); 8151 8152 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar)); 8153 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n", 8154 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar, 8155 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off)); 8156 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U)) 8157 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld); 8158 else 8159 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld); 8160 8161 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128) 8162 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)); 8163 8164 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX; 8165 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld); 8166 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld); 8167 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows; 8168 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0; 8169 } 8170 else 8171 #endif 7834 8172 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs)) 7835 8173 { … … 7964 8302 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot); 7965 8303 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg; 8304 # ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 8305 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg; 8306 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar; 8307 if ( fSimdReg 8308 && idxRegOther != UINT8_MAX) 8309 { 8310 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)); 8311 if (cbVar == sizeof(RTUINT128U)) 8312 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther); 8313 else 8314 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther); 8315 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */ 8316 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX); 8317 } 8318 else 8319 # endif 7966 8320 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs)) 7967 8321 { … … 8050 8404 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot); 8051 8405 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg; 8406 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 8407 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg; 8408 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar; 8409 if ( fSimdReg 8410 && idxRegOther != UINT8_MAX) 8411 { 8412 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)); 8413 if (cbVar == sizeof(RTUINT128U)) 8414 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther); 8415 else 8416 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther); 8417 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */ 8418 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX); 8419 } 8420 else 8421 #endif 8052 8422 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs)) 8053 8423 { -
trunk/src/VBox/VMM/include/IEMN8veRecompiler.h
r103876 r103894 189 189 # define IEMNATIVE_SIMD_REG_FIXED_TMP0 5 /* xmm5/ymm5 */ 190 190 # if defined(IEMNATIVE_WITH_SIMD_REG_ACCESS_ALL_REGISTERS) || !defined(_MSC_VER) 191 # define IEMNATIVE_SIMD_REG_FIXED_MASK RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)191 # define IEMNATIVE_SIMD_REG_FIXED_MASK (RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)) 192 192 # else 193 193 /** On Windows xmm6 through xmm15 are marked as callee saved. */ … … 977 977 * @todo not sure what this really is for... */ 978 978 IEMNATIVEGSTREG enmGstReg; 979 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 980 /** Flag whether this variable is held in a SIMD register (only supported for 128-bit and 256-bit variables), 981 * only valid when idxReg is not UINT8_MAX. */ 982 bool fSimdReg : 1; 983 /** Set if the registered is currently used exclusively, false if the 984 * variable is idle and the register can be grabbed. */ 985 bool fRegAcquired : 1; 986 #else 979 987 /** Set if the registered is currently used exclusively, false if the 980 988 * variable is idle and the register can be grabbed. */ 981 989 bool fRegAcquired; 990 #endif 982 991 983 992 union … … 1095 1104 /** What is being kept in this register. */ 1096 1105 IEMNATIVEWHAT enmWhat; 1106 /** Variable index (packed) if holding a variable, otherwise UINT8_MAX. */ 1107 uint8_t idxVar; 1097 1108 /** Flag what is currently loaded, low 128-bits, high 128-bits or complete 256-bits. */ 1098 1109 IEMNATIVEGSTSIMDREGLDSTSZ enmLoaded; 1099 1110 /** Alignment padding. */ 1100 uint8_t abAlign[ 6];1111 uint8_t abAlign[5]; 1101 1112 } IEMNATIVEHSTSIMDREG; 1102 1113 #endif … … 1519 1530 DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT; 1520 1531 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT; 1532 DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, bool fFlushShadows) RT_NOEXCEPT; 1521 1533 DECLHIDDEN(void) iemNativeRegFreeAndFlushMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegMask) RT_NOEXCEPT; 1522 1534 DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, … … 1566 1578 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, 1567 1579 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX); 1580 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1581 DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, 1582 bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX); 1583 #endif 1568 1584 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, 1569 1585 IEMNATIVEGSTREG enmGstReg, uint32_t *poff); … … 1849 1865 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = false; 1850 1866 } 1867 1868 1869 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 1870 DECL_INLINE_THROW(void) iemNativeVarSimdRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar) 1871 { 1872 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg); 1873 iemNativeVarRegisterRelease(pReNative, idxVar); 1874 } 1875 #endif 1851 1876 1852 1877 … … 2233 2258 2234 2259 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat; 2260 pReNative->Core.aHstSimdRegs[idxSimdReg].idxVar = idxVar; 2235 2261 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0; 2236 RT_NOREF(idxVar);2237 2262 return idxSimdReg; 2238 2263 } -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r103892 r103894 2106 2106 2107 2107 2108 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 2109 /** 2110 * Emits a 128-bit vector register load instruction with an BP relative source address. 2111 */ 2112 DECL_FORCE_INLINE_THROW(uint32_t) 2113 iemNativeEmitLoadVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp) 2114 { 2115 #ifdef RT_ARCH_AMD64 2116 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 9); 2117 2118 /* movdqu reg128, mem128 */ 2119 pbCodeBuf[off++] = 0xf3; 2120 if (iVecRegDst >= 8) 2121 pbCodeBuf[off++] = X86_OP_REX_R; 2122 pbCodeBuf[off++] = 0x0f; 2123 pbCodeBuf[off++] = 0x6f; 2124 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative); 2125 #elif defined(RT_ARCH_ARM64) 2126 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U)); 2127 #else 2128 # error "port me" 2129 #endif 2130 } 2131 2132 2133 /** 2134 * Emits a 256-bit vector register load instruction with an BP relative source address. 2135 */ 2136 DECL_FORCE_INLINE_THROW(uint32_t) 2137 iemNativeEmitLoadVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, int32_t offDisp) 2138 { 2139 #ifdef RT_ARCH_AMD64 2140 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8); 2141 2142 /* vmovdqu reg256, mem256 */ 2143 pbCodeBuf[off++] = X86_OP_VEX2; 2144 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegDst >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H); 2145 pbCodeBuf[off++] = 0x6f; 2146 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegDst, offDisp, pReNative); 2147 #elif defined(RT_ARCH_ARM64) 2148 /* ASSUMES two consecutive vector registers for the 256-bit value. */ 2149 Assert(!(iVecRegDst & 0x1)); 2150 off = iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst, offDisp, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U)); 2151 return iemNativeEmitGprByBpLdSt(pReNative, off, iVecRegDst + 1, offDisp + sizeof(RTUINT128U), kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U)); 2152 #else 2153 # error "port me" 2154 #endif 2155 } 2156 2157 #endif 2158 2159 2108 2160 /** 2109 2161 * Emits a load effective address to a GRP with an BP relative source address. … … 2251 2303 return iemNativeEmitStoreGprByBp(pReNative, off, offDisp, IEMNATIVE_REG_FIXED_TMP0); 2252 2304 } 2305 2306 2307 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 2308 /** 2309 * Emits a 128-bit vector register store with an BP relative destination address. 2310 * 2311 * @note May trash IEMNATIVE_REG_FIXED_TMP0. 2312 */ 2313 DECL_INLINE_THROW(uint32_t) 2314 iemNativeEmitStoreVecRegByBpU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc) 2315 { 2316 #ifdef RT_ARCH_AMD64 2317 /* movdqu [rbp + offDisp], vecsrc */ 2318 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7); 2319 pbCodeBuf[off++] = 0xf3; 2320 if (iVecRegSrc >= 8) 2321 pbCodeBuf[off++] = X86_OP_REX_R; 2322 pbCodeBuf[off++] = 0x0f; 2323 pbCodeBuf[off++] = 0x7f; 2324 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative); 2325 2326 #elif defined(RT_ARCH_ARM64) 2327 if (offDisp >= 0 && offDisp < 4096 * 8 && !((uint32_t)offDisp & 7)) 2328 { 2329 /* str w/ unsigned imm12 (scaled) */ 2330 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 2331 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, 2332 ARMV8_A64_REG_BP, (uint32_t)offDisp / 8); 2333 } 2334 else if (offDisp >= -256 && offDisp <= 256) 2335 { 2336 /* stur w/ signed imm9 (unscaled) */ 2337 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 2338 pu32CodeBuf[off++] = Armv8A64MkInstrSturLdur(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, offDisp); 2339 } 2340 else if ((uint32_t)-offDisp < (unsigned)_4K) 2341 { 2342 /* Use temporary indexing register w/ sub uimm12. */ 2343 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2); 2344 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, 2345 ARMV8_A64_REG_BP, (uint32_t)-offDisp); 2346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, IEMNATIVE_REG_FIXED_TMP0, 0); 2347 } 2348 else 2349 { 2350 /* Use temporary indexing register. */ 2351 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, (uint32_t)offDisp); 2352 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); 2353 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(kArmv8A64InstrLdStType_St_Vr_128, iVecRegSrc, ARMV8_A64_REG_BP, 2354 IEMNATIVE_REG_FIXED_TMP0, kArmv8A64InstrLdStExtend_Sxtw); 2355 } 2356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 2357 return off; 2358 2359 #else 2360 # error "Port me!" 2361 #endif 2362 } 2363 2364 2365 /** 2366 * Emits a 256-bit vector register store with an BP relative destination address. 2367 * 2368 * @note May trash IEMNATIVE_REG_FIXED_TMP0. 2369 */ 2370 DECL_INLINE_THROW(uint32_t) 2371 iemNativeEmitStoreVecRegByBpU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, int32_t offDisp, uint8_t iVecRegSrc) 2372 { 2373 #ifdef RT_ARCH_AMD64 2374 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8); 2375 2376 /* vmovdqu mem256, reg256 */ 2377 pbCodeBuf[off++] = X86_OP_VEX2; 2378 pbCodeBuf[off++] = X86_OP_VEX2_BYTE1_MAKE_NO_VVVV(iVecRegSrc >= 8, true /*f256BitAvx*/, X86_OP_VEX2_BYTE1_P_0F3H); 2379 pbCodeBuf[off++] = 0x7f; 2380 return iemNativeEmitGprByBpDisp(pbCodeBuf, off, iVecRegSrc, offDisp, pReNative); 2381 #elif defined(RT_ARCH_ARM64) 2382 Assert(!(iVecRegSrc & 0x1)); 2383 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp, iVecRegSrc); 2384 return iemNativeEmitStoreVecRegByBpU128(pReNative, off, offDisp + sizeof(RTUINT128U), iVecRegSrc + 1); 2385 #else 2386 # error "Port me!" 2387 #endif 2388 } 2389 #endif 2253 2390 2254 2391 #if defined(RT_ARCH_ARM64) … … 7155 7292 7156 7293 /** 7157 * Emits code to load the variable address into an argument G RP.7294 * Emits code to load the variable address into an argument GPR. 7158 7295 * 7159 7296 * This only works for uninitialized and stack variables. … … 7173 7310 7174 7311 uint8_t const idxRegVar = pVar->idxReg; 7312 #ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR 7313 if ( idxRegVar != UINT8_MAX 7314 && pVar->fSimdReg) 7315 { 7316 Assert(idxRegVar < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)); 7317 Assert(pVar->cbVar == sizeof(RTUINT128U) || pVar->cbVar == sizeof(RTUINT256U)); 7318 7319 if (pVar->cbVar == sizeof(RTUINT128U)) 7320 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDisp, idxRegVar); 7321 else 7322 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDisp, idxRegVar); 7323 7324 iemNativeSimdRegFreeVar(pReNative, idxRegVar, fFlushShadows); 7325 Assert(pVar->idxReg == UINT8_MAX); 7326 } 7327 else 7328 #endif 7175 7329 if (idxRegVar < RT_ELEMENTS(pReNative->Core.aHstRegs)) 7176 7330 { … … 7400 7554 { 7401 7555 #ifdef RT_ARCH_AMD64 7402 off = iemNativeEmitSimdLoadVecRegFromVCpu LowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);7556 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu); 7403 7557 #elif defined(RT_ARCH_ARM64) 7404 7558 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 7405 7559 Assert(!(iVecReg & 0x1)); 7406 off = iemNativeEmitSimdLoadVecRegFromVCpu LowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);7560 off = iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu); 7407 7561 #else 7408 7562 # error "port me"
Note:
See TracChangeset
for help on using the changeset viewer.