Changeset 103777 in vbox
- Timestamp:
- Mar 11, 2024 4:42:51 PM (11 months ago)
- Location:
- trunk
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/armv8.h
r103754 r103777 4071 4071 4072 4072 4073 /** Armv8 UMOV vector element size. */4074 typedef enum ARMV8INSTRUMOV SZ4075 { 4076 kArmv8InstrUmov Sz_U8 = 0, /**< Byte. */4077 kArmv8InstrUmov Sz_U16 = 1, /**< Halfword. */4078 kArmv8InstrUmov Sz_U32 = 2, /**< 32-bit. */4079 kArmv8InstrUmov Sz_U64 = 3 /**< 64-bit (only valid when the destination is a 64-bit register. */4080 } ARMV8INSTRUMOV SZ;4073 /** Armv8 UMOV/INS vector element size. */ 4074 typedef enum ARMV8INSTRUMOVINSSZ 4075 { 4076 kArmv8InstrUmovInsSz_U8 = 0, /**< Byte. */ 4077 kArmv8InstrUmovInsSz_U16 = 1, /**< Halfword. */ 4078 kArmv8InstrUmovInsSz_U32 = 2, /**< 32-bit. */ 4079 kArmv8InstrUmovInsSz_U64 = 3 /**< 64-bit (only valid when the destination is a 64-bit register. */ 4080 } ARMV8INSTRUMOVINSSZ; 4081 4081 4082 4082 … … 4088 4088 * @param iVecRegSrc The vector source register. 4089 4089 * @param idxElem The element index. 4090 * @param enmSz Element size of the source evctor register.4090 * @param enmSz Element size of the source vector register. 4091 4091 * @param fDst64Bit Flag whether the destination register is 64-bit (true) or 32-bit (false). 4092 4092 */ 4093 4093 DECL_FORCE_INLINE(uint32_t) Armv8A64MkVecInstrUmov(uint32_t iRegDst, uint32_t iVecRegSrc, uint8_t idxElem, 4094 ARMV8INSTRUMOV SZ enmSz = kArmv8InstrUmovSz_U64, bool fDst64Bit = true)4094 ARMV8INSTRUMOVINSSZ enmSz = kArmv8InstrUmovInsSz_U64, bool fDst64Bit = true) 4095 4095 { 4096 4096 Assert(iRegDst < 32); Assert(iVecRegSrc < 32); 4097 Assert((fDst64Bit && enmSz == kArmv8InstrUmov Sz_U64) || (!fDst64Bit && enmSz != kArmv8InstrUmovSz_U64));4098 Assert( (enmSz == kArmv8InstrUmov Sz_U8 && idxElem < 16)4099 || (enmSz == kArmv8InstrUmov Sz_U16 && idxElem < 8)4100 || (enmSz == kArmv8InstrUmov Sz_U32 && idxElem < 4)4101 || (enmSz == kArmv8InstrUmov Sz_U64 && idxElem < 2));4097 Assert((fDst64Bit && enmSz == kArmv8InstrUmovInsSz_U64) || (!fDst64Bit && enmSz != kArmv8InstrUmovInsSz_U64)); 4098 Assert( (enmSz == kArmv8InstrUmovInsSz_U8 && idxElem < 16) 4099 || (enmSz == kArmv8InstrUmovInsSz_U16 && idxElem < 8) 4100 || (enmSz == kArmv8InstrUmovInsSz_U32 && idxElem < 4) 4101 || (enmSz == kArmv8InstrUmovInsSz_U64 && idxElem < 2)); 4102 4102 4103 4103 return UINT32_C(0x0e003c00) … … 4110 4110 4111 4111 4112 /** 4113 * A64: Encodes INS (vector, register). 4114 * 4115 * @returns The encoded instruction. 4116 * @param iVecRegDst The vector register to put the result into. 4117 * @param iRegSrc The source register. 4118 * @param idxElem The element index for the destination. 4119 * @param enmSz Element size of the source vector register. 4120 * 4121 * @note This instruction assumes a 32-bit W<n> register for all noon 64bit vector sizes. 4122 */ 4123 DECL_FORCE_INLINE(uint32_t) Armv8A64MkVecInstrIns(uint32_t iVecRegDst, uint32_t iRegSrc, uint8_t idxElem, 4124 ARMV8INSTRUMOVINSSZ enmSz = kArmv8InstrUmovInsSz_U64) 4125 { 4126 Assert(iRegSrc < 32); Assert(iVecRegDst < 32); 4127 Assert( (enmSz == kArmv8InstrUmovInsSz_U8 && idxElem < 16) 4128 || (enmSz == kArmv8InstrUmovInsSz_U16 && idxElem < 8) 4129 || (enmSz == kArmv8InstrUmovInsSz_U32 && idxElem < 4) 4130 || (enmSz == kArmv8InstrUmovInsSz_U64 && idxElem < 2)); 4131 4132 return UINT32_C(0x4e001c00) 4133 | ((uint32_t)idxElem << (16 + enmSz + 1)) 4134 | (RT_BIT_32(enmSz) << 16) 4135 | (iRegSrc << 5) 4136 | iVecRegDst; 4137 } 4138 4139 4112 4140 /** Armv8 vector compare to zero vector element size. */ 4113 4141 typedef enum ARMV8INSTRVECCMPZEROSZ -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp
r103775 r103777 5768 5768 5769 5769 5770 /** 5771 * Sets the indiactor for which part of the given SIMD register has valid data loaded. 5772 * 5773 * @param pReNative The native recompile state. 5774 * @param idxHstSimdReg The host SIMD register to update the state for. 5775 * @param enmLoadSz The load size to set. 5776 */ 5777 DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz) 5778 { 5779 /* Everything valid already? -> nothing to do. */ 5780 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256) 5781 return; 5782 5783 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid) 5784 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz; 5785 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz) 5786 { 5787 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128 5788 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128) 5789 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128 5790 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)); 5791 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256; 5792 } 5793 } 5794 5795 5770 5796 static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst, 5771 5797 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst) 5772 5798 { 5773 5799 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */ 5774 if ( pReNative->Core.aHstSimdRegs[idxHstSimdReg Dst].enmLoaded == enmLoadSzDst5775 || pReNative->Core.aHstSimdRegs[idxHstSimdReg Dst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)5800 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst 5801 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256) 5776 5802 { 5777 5803 # ifdef RT_ARCH_ARM64 … … 5780 5806 # endif 5781 5807 5782 switch (enmLoadSzDst)5808 if (idxHstSimdRegDst != idxHstSimdRegSrc) 5783 5809 { 5784 case kIemNativeGstSimdRegLdStSz_256: 5785 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5786 break; 5787 case kIemNativeGstSimdRegLdStSz_Low128: 5788 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5789 break; 5790 case kIemNativeGstSimdRegLdStSz_High128: 5791 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1); 5792 break; 5793 default: 5794 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE)); 5810 switch (enmLoadSzDst) 5811 { 5812 case kIemNativeGstSimdRegLdStSz_256: 5813 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5814 break; 5815 case kIemNativeGstSimdRegLdStSz_Low128: 5816 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc); 5817 break; 5818 case kIemNativeGstSimdRegLdStSz_High128: 5819 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1); 5820 break; 5821 default: 5822 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE)); 5823 } 5824 5825 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst); 5795 5826 } 5796 5797 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;5798 5827 } 5799 5828 else … … 5895 5924 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp; 5896 5925 if (enmIntendedUse != kIemNativeGstRegUse_Calculation) 5926 { 5927 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite) 5928 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz); 5929 else 5930 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz); 5897 5931 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n", 5898 5932 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse])); 5933 } 5899 5934 else 5900 5935 { … … 5948 5983 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz); 5949 5984 else 5950 { 5951 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */ 5952 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz; 5953 } 5985 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz); 5954 5986 5955 5987 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate … … 5971 6003 #ifdef VBOX_STRICT 5972 6004 /* Strict builds: Check that the value is correct. */ 5973 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz); 6005 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite) 6006 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz); 5974 6007 #endif 5975 6008 … … 5985 6018 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz); 5986 6019 else 5987 { 5988 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */ 5989 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz; 5990 } 6020 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz); 5991 6021 5992 6022 if (enmIntendedUse != kIemNativeGstRegUse_Calculation) … … 6208 6238 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo)); 6209 6239 6210 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;6240 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz); 6211 6241 switch (enmLoadSz) 6212 6242 { … … 6392 6422 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz) 6393 6423 { 6424 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */ 6425 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256 6426 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg) 6427 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))) 6428 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 6429 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)) 6430 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 6431 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))) 6432 return off; 6433 6394 6434 # ifdef RT_ARCH_AMD64 6395 6435 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */ … … 6476 6516 /* umov tmp0, vectmp0.D[0] */ 6477 6517 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, 6478 0 /*idxElem*/, kArmv8InstrUmov Sz_U64);6518 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64); 6479 6519 /* cbz tmp0, +1 */ 6480 6520 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0); … … 6492 6532 /* umov tmp0, (vectmp0 + 1).D[0] */ 6493 6533 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, 6494 0 /*idxElem*/, kArmv8InstrUmov Sz_U64);6534 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64); 6495 6535 /* cbz tmp0, +1 */ 6496 6536 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0); … … 15201 15241 /* Free but don't flush the source register. */ 15202 15242 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc); 15243 iemNativeVarRegisterRelease(pReNative, idxDstVar); 15244 15245 return off; 15246 } 15247 15248 15249 #define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \ 15250 off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord) 15251 15252 /** Emits code for IEM_MC_STORE_XREG_U64. */ 15253 DECL_INLINE_THROW(uint32_t) 15254 iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord) 15255 { 15256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar); 15257 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t)); 15258 15259 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg), 15260 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate); 15261 15262 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off); 15263 15264 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord); 15265 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg); 15266 15267 /* Free but don't flush the source register. */ 15268 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); 15203 15269 iemNativeVarRegisterRelease(pReNative, idxDstVar); 15204 15270 -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r103763 r103777 7105 7105 #elif defined(RT_ARCH_ARM64) 7106 7106 /* umov gprdst, vecsrc[iQWord] */ 7107 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmov Sz_U64);7107 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64); 7108 7108 #else 7109 7109 # error "port me" … … 7152 7152 #elif defined(RT_ARCH_ARM64) 7153 7153 /* umov gprdst, vecsrc[iDWord] */ 7154 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmov Sz_U32, false /*fDst64Bit*/);7154 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/); 7155 7155 #else 7156 7156 # error "port me" … … 7172 7172 #elif defined(RT_ARCH_ARM64) 7173 7173 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord); 7174 #else 7175 # error "port me" 7176 #endif 7177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 7178 return off; 7179 } 7180 7181 7182 /** 7183 * Emits a vecdst[x] = gprsrc store, 64-bit. 7184 */ 7185 DECL_FORCE_INLINE(uint32_t) 7186 iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord) 7187 { 7188 #ifdef RT_ARCH_AMD64 7189 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */ 7190 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7191 pCodeBuf[off++] = X86_OP_REX_W 7192 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7193 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7194 pCodeBuf[off++] = 0x0f; 7195 pCodeBuf[off++] = 0x3a; 7196 pCodeBuf[off++] = 0x22; 7197 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7198 pCodeBuf[off++] = iQWord; 7199 #elif defined(RT_ARCH_ARM64) 7200 /* ins vecsrc[iQWord], gpr */ 7201 pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64); 7202 #else 7203 # error "port me" 7204 #endif 7205 return off; 7206 } 7207 7208 7209 /** 7210 * Emits a vecdst[x] = gprsrc store, 64-bit. 7211 */ 7212 DECL_INLINE_THROW(uint32_t) 7213 iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord) 7214 { 7215 Assert(iQWord <= 1); 7216 7217 #ifdef RT_ARCH_AMD64 7218 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord); 7219 #elif defined(RT_ARCH_ARM64) 7220 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord); 7174 7221 #else 7175 7222 # error "port me"
Note:
See TracChangeset
for help on using the changeset viewer.