VirtualBox

Changeset 103758 in vbox


Ignore:
Timestamp:
Mar 11, 2024 11:01:51 AM (13 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162127
Message:

VMM/IEM: More amd64 fixes for SIMD enablement, bs3-cpu-instr-3 works now, bugref:10614

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/x86.h

    r103668 r103758  
    51795179#define X86_OP_REX_R            UINT8_C(0x44)
    51805180#define X86_OP_REX_W            UINT8_C(0x48)
    5181 /** @} */
    5182 
     5181#define X86_OP_VEX3             UINT8_C(0xc4)
     5182#define X86_OP_VEX2             UINT8_C(0xc5)
     5183/** @} */
     5184
     5185/** @name X86_OP_VEX3_XXX - 3-byte VEX prefix helpers.
     5186 * @{ */
     5187#define X86_OP_VEX3_BYTE1_MAP_MASK      0x1f
     5188#define X86_OP_VEX3_BYTE1_B             RT_BIT(5)
     5189#define X86_OP_VEX3_BYTE1_X             RT_BIT(6)
     5190#define X86_OP_VEX3_BYTE1_R             RT_BIT(7)
     5191
     5192#define X86_OP_VEX3_BYTE3_P_MASK        0x3
     5193# define X86_OP_VEX3_BYTE3_P_NO_PRF     0
     5194# define X86_OP_VEX3_BYTE3_P_066H       1
     5195# define X86_OP_VEX3_BYTE3_P_0F3H       2
     5196# define X86_OP_VEX3_BYTE3_P_0F2H       3
     5197#define X86_OP_VEX3_BYTE3_L             RT_BIT(2)
     5198#define X86_OP_VEX3_BYTE3_VVVV_MASK     0x78
     5199#define X86_OP_VEX3_BYTE3_VVVV_SHIFT    3
     5200#define X86_OP_VEX3_BYTE3_W             RT_BIT(7)
     5201
     5202#define X86_OP_VEX3_BYTE3_MAKE(a_f64BitOpSz, a_iSrcReg, a_f256BitAvx, a_fPrf) \
     5203    (  ((a_f64BitOpSz) ? X86_OP_VEX3_BYTE3_W : 0) \
     5204     | (~((uint8_t)(a_iSrcReg) & 0xf)) \
     5205     | ((a_f256BitAvx) ? X86_OP_VEX3_BYTE3_L : 0) \
     5206     | ((a_fPrf) & X86_OP_VEX3_BYTE3_P_MASK))
     5207/** @} */
    51835208
    51845209/** @} */
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r103750 r103758  
    59795979           IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)));
    59805980
    5981 #ifdef RT_ARCH_AMD64
    59825981    if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
    59835982    {
    59845983        Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    59855984               || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
    5986         off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
     5985        off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
    59875986    }
    59885987
     
    59915990        Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    59925991               || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
    5993         AssertReleaseFailed();
    5994         //off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
    5995     }
    5996 #elif defined(RT_ARCH_ARM64)
    5997     /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */
    5998     Assert(!(idxHstSimdReg & 0x1));
    5999     if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
    6000     {
    6001         Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    6002                || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
    6003         off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
    6004     }
    6005 
    6006     if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
    6007     {
    6008         Assert(   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
    6009                || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
    6010         off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
    6011     }
    6012 #endif
     5992        off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
     5993    }
    60135994
    60145995    IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg);
     
    62296210    {
    62306211        case kIemNativeGstSimdRegLdStSz_256:
    6231             return iemNativeEmitSimdLoadVecRegFromVCpuU256(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm,
    6232                                                           g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
     6212            off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
     6213            return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    62336214        case kIemNativeGstSimdRegLdStSz_Low128:
    6234             return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
     6215            return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
    62356216        case kIemNativeGstSimdRegLdStSz_High128:
    6236             return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
     6217            return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
    62376218        default:
    62386219            AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103750 r103758  
    68286828 */
    68296829DECL_FORCE_INLINE_THROW(uint32_t)
    6830 iemNativeEmitSimdStoreVecRegToVCpuU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6830iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
    68316831{
    68326832#ifdef RT_ARCH_AMD64
     
    68526852 */
    68536853DECL_INLINE_THROW(uint32_t)
    6854 iemNativeEmitSimdStoreVecRegToVCpuU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
    6855 {
    6856 #ifdef RT_ARCH_AMD64
    6857     off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
    6858 #elif defined(RT_ARCH_ARM64)
    6859     off = iemNativeEmitSimdStoreVecRegToVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
     6854iemNativeEmitSimdStoreVecRegToVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6855{
     6856#ifdef RT_ARCH_AMD64
     6857    off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
     6858#elif defined(RT_ARCH_ARM64)
     6859    off = iemNativeEmitSimdStoreVecRegToVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
     6860#else
     6861# error "port me"
     6862#endif
     6863    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6864    return off;
     6865}
     6866
     6867
     6868/**
     6869 * Emits a high 128-bit vector register store to a VCpu value.
     6870 */
     6871DECL_FORCE_INLINE_THROW(uint32_t)
     6872iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6873{
     6874#ifdef RT_ARCH_AMD64
     6875    /* vextracti128 mem128, reg128, 1 */ /* ASSUMES AVX2 support. */
     6876    pCodeBuf[off++] = X86_OP_VEX3;
     6877    if (iVecReg >= 8)
     6878        pCodeBuf[off++] = 0x63;
     6879    else
     6880        pCodeBuf[off++] = 0xe3;
     6881    pCodeBuf[off++] = 0x7d;
     6882    pCodeBuf[off++] = 0x39;
     6883    off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
     6884    pCodeBuf[off++] = 0x01; /* Immediate */
     6885#elif defined(RT_ARCH_ARM64)
     6886    off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_St_Vr_128, sizeof(RTUINT128U));
     6887#else
     6888# error "port me"
     6889#endif
     6890    return off;
     6891}
     6892
     6893
     6894/**
     6895 * Emits a high 128-bit vector register load of a VCpu value.
     6896 */
     6897DECL_INLINE_THROW(uint32_t)
     6898iemNativeEmitSimdStoreVecRegToVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6899{
     6900#ifdef RT_ARCH_AMD64
     6901    off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
     6902#elif defined(RT_ARCH_ARM64)
     6903    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     6904    Assert(!(iVecReg & 0x1));
     6905    off = iemNativeEmitSimdStoreVecRegToVCpuHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
    68606906#else
    68616907# error "port me"
     
    68706916 */
    68716917DECL_FORCE_INLINE_THROW(uint32_t)
    6872 iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6918iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
    68736919{
    68746920#ifdef RT_ARCH_AMD64
     
    68946940 */
    68956941DECL_INLINE_THROW(uint32_t)
    6896 iemNativeEmitSimdLoadVecRegFromVCpuU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
    6897 {
    6898 #ifdef RT_ARCH_AMD64
    6899     off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
    6900 #elif defined(RT_ARCH_ARM64)
    6901     off = iemNativeEmitSimdLoadVecRegFromVCpuU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
    6902 #else
    6903 # error "port me"
    6904 #endif
    6905     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6906     return off;
    6907 }
    6908 
    6909 
    6910 #if 0 /* unused */
    6911 /**
    6912  * Emits a 256-bit vector register store to a VCpu value.
     6942iemNativeEmitSimdLoadVecRegFromVCpuLowU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6943{
     6944#ifdef RT_ARCH_AMD64
     6945    off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 9), off, iVecReg, offVCpu);
     6946#elif defined(RT_ARCH_ARM64)
     6947    off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg, offVCpu);
     6948#else
     6949# error "port me"
     6950#endif
     6951    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6952    return off;
     6953}
     6954
     6955
     6956/**
     6957 * Emits a 128-bit vector register load of a VCpu value.
    69136958 */
    69146959DECL_FORCE_INLINE_THROW(uint32_t)
    6915 iemNativeEmitSimdStoreVecRegToVCpuU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpuLow, uint32_t offVCpuHigh)
    6916 {
    6917 #ifdef RT_ARCH_AMD64
    6918     AssertReleaseFailed();
     6960iemNativeEmitSimdLoadVecRegFromVCpuHighU128Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6961{
     6962#ifdef RT_ARCH_AMD64
     6963    /* vinserti128 ymm, ymm, mem128, 1. */ /* ASSUMES AVX2 support */
     6964    pCodeBuf[off++] = X86_OP_VEX3;
     6965    if (iVecReg >= 8)
     6966        pCodeBuf[off++] = 0x63;
     6967    else
     6968        pCodeBuf[off++] = 0xe3;
     6969    pCodeBuf[off++] = X86_OP_VEX3_BYTE3_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE3_P_066H);
     6970    pCodeBuf[off++] = 0x38;
     6971    off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
     6972    pCodeBuf[off++] = 0x01; /* Immediate */
     6973#elif defined(RT_ARCH_ARM64)
     6974    off = iemNativeEmitGprByVCpuLdStEx(pCodeBuf, off, iVecReg, offVCpu, kArmv8A64InstrLdStType_Ld_Vr_128, sizeof(RTUINT128U));
     6975#else
     6976# error "port me"
     6977#endif
     6978    return off;
     6979}
     6980
     6981
     6982/**
     6983 * Emits a 128-bit vector register load of a VCpu value.
     6984 */
     6985DECL_INLINE_THROW(uint32_t)
     6986iemNativeEmitSimdLoadVecRegFromVCpuHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpu)
     6987{
     6988#ifdef RT_ARCH_AMD64
     6989    off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 10), off, iVecReg, offVCpu);
    69196990#elif defined(RT_ARCH_ARM64)
    69206991    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
    69216992    Assert(!(iVecReg & 0x1));
    6922     off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, iVecReg,     offVCpuLow);
    6923     off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, iVecReg + 1, offVCpuHigh);
    6924 #else
    6925 # error "port me"
    6926 #endif
    6927     return off;
    6928 }
    6929 #endif
    6930 
    6931 
    6932 /**
    6933  * Emits a 256-bit vector register load of a VCpu value.
    6934  */
    6935 DECL_FORCE_INLINE_THROW(uint32_t)
    6936 iemNativeEmitSimdLoadVecRegFromVCpuU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecReg, uint32_t offVCpuLow, uint32_t offVCpuHigh)
    6937 {
    6938 #ifdef RT_ARCH_AMD64
    6939     AssertReleaseFailed();
    6940     RT_NOREF(pReNative, off, iVecReg, offVCpuLow, offVCpuHigh);
    6941 #elif defined(RT_ARCH_ARM64)
    6942     /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
    6943     Assert(!(iVecReg & 0x1));
    6944     off = iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, iVecReg,     offVCpuLow);
    6945     off = iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, iVecReg + 1, offVCpuHigh);
    6946 #else
    6947 # error "port me"
    6948 #endif
     6993    off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg + 1, offVCpu);
     6994#else
     6995# error "port me"
     6996#endif
     6997    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    69496998    return off;
    69506999}
     
    70097058    if (iVecRegDst >= 8 && iVecRegSrc >= 8)
    70107059    {
    7011         pbCodeBuf[off++] = 0xc4;
     7060        pbCodeBuf[off++] = X86_OP_VEX3;
    70127061        pbCodeBuf[off++] = 0x41;
    70137062        pbCodeBuf[off++] = 0x7d;
     
    70177066    else
    70187067    {
    7019         pbCodeBuf[off++] = 0xc5;                                               /* Two byte VEX prefix */
     7068        pbCodeBuf[off++] = X86_OP_VEX2;
    70207069        pbCodeBuf[off++] = (iVecRegSrc >= 8 || iVecRegDst >= 8) ? 0x7d : 0xfd;
    70217070        pbCodeBuf[off++] = iVecRegSrc >= 8 ? 0x7f : 0x6f;
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette