VirtualBox

Changeset 103777 in vbox


Ignore:
Timestamp:
Mar 11, 2024 4:42:51 PM (11 months ago)
Author:
vboxsync
Message:

VMM/IEM: Implement emitter for IEM_MC_STORE_XREG_U64() and some fixes to the SIMD register load state handling, bugref:10614

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/armv8.h

    r103754 r103777  
    40714071
    40724072
    4073 /** Armv8 UMOV vector element size.    */
    4074 typedef enum ARMV8INSTRUMOVSZ
    4075 {
    4076     kArmv8InstrUmovSz_U8  = 0, /**< Byte. */
    4077     kArmv8InstrUmovSz_U16 = 1, /**< Halfword. */
    4078     kArmv8InstrUmovSz_U32 = 2, /**< 32-bit. */
    4079     kArmv8InstrUmovSz_U64 = 3  /**< 64-bit (only valid when the destination is a 64-bit register. */
    4080 } ARMV8INSTRUMOVSZ;
     4073/** Armv8 UMOV/INS vector element size.    */
     4074typedef enum ARMV8INSTRUMOVINSSZ
     4075{
     4076    kArmv8InstrUmovInsSz_U8  = 0, /**< Byte. */
     4077    kArmv8InstrUmovInsSz_U16 = 1, /**< Halfword. */
     4078    kArmv8InstrUmovInsSz_U32 = 2, /**< 32-bit. */
     4079    kArmv8InstrUmovInsSz_U64 = 3  /**< 64-bit (only valid when the destination is a 64-bit register. */
     4080} ARMV8INSTRUMOVINSSZ;
    40814081
    40824082
     
    40884088 * @param   iVecRegSrc  The vector source register.
    40894089 * @param   idxElem     The element index.
    4090  * @param   enmSz       Element size of the source evctor register.
     4090 * @param   enmSz       Element size of the source vector register.
    40914091 * @param   fDst64Bit   Flag whether the destination register is 64-bit (true) or 32-bit (false).
    40924092 */
    40934093DECL_FORCE_INLINE(uint32_t) Armv8A64MkVecInstrUmov(uint32_t iRegDst, uint32_t iVecRegSrc, uint8_t idxElem,
    4094                                                    ARMV8INSTRUMOVSZ enmSz = kArmv8InstrUmovSz_U64, bool fDst64Bit = true)
     4094                                                   ARMV8INSTRUMOVINSSZ enmSz = kArmv8InstrUmovInsSz_U64, bool fDst64Bit = true)
    40954095{
    40964096    Assert(iRegDst < 32); Assert(iVecRegSrc < 32);
    4097     Assert((fDst64Bit && enmSz == kArmv8InstrUmovSz_U64) || (!fDst64Bit && enmSz != kArmv8InstrUmovSz_U64));
    4098     Assert(   (enmSz == kArmv8InstrUmovSz_U8 && idxElem < 16)
    4099            || (enmSz == kArmv8InstrUmovSz_U16 && idxElem < 8)
    4100            || (enmSz == kArmv8InstrUmovSz_U32 && idxElem < 4)
    4101            || (enmSz == kArmv8InstrUmovSz_U64 && idxElem < 2));
     4097    Assert((fDst64Bit && enmSz == kArmv8InstrUmovInsSz_U64) || (!fDst64Bit && enmSz != kArmv8InstrUmovInsSz_U64));
     4098    Assert(   (enmSz == kArmv8InstrUmovInsSz_U8 && idxElem < 16)
     4099           || (enmSz == kArmv8InstrUmovInsSz_U16 && idxElem < 8)
     4100           || (enmSz == kArmv8InstrUmovInsSz_U32 && idxElem < 4)
     4101           || (enmSz == kArmv8InstrUmovInsSz_U64 && idxElem < 2));
    41024102
    41034103    return UINT32_C(0x0e003c00)
     
    41104110
    41114111
     4112/**
     4113 * A64: Encodes INS (vector, register).
     4114 *
     4115 * @returns The encoded instruction.
     4116 * @param   iVecRegDst  The vector register to put the result into.
     4117 * @param   iRegSrc     The source register.
     4118 * @param   idxElem     The element index for the destination.
     4119 * @param   enmSz       Element size of the source vector register.
     4120 *
     4121 * @note This instruction assumes a 32-bit W<n> register for all noon 64bit vector sizes.
     4122 */
     4123DECL_FORCE_INLINE(uint32_t) Armv8A64MkVecInstrIns(uint32_t iVecRegDst, uint32_t iRegSrc, uint8_t idxElem,
     4124                                                  ARMV8INSTRUMOVINSSZ enmSz = kArmv8InstrUmovInsSz_U64)
     4125{
     4126    Assert(iRegSrc < 32); Assert(iVecRegDst < 32);
     4127    Assert(   (enmSz == kArmv8InstrUmovInsSz_U8 && idxElem < 16)
     4128           || (enmSz == kArmv8InstrUmovInsSz_U16 && idxElem < 8)
     4129           || (enmSz == kArmv8InstrUmovInsSz_U32 && idxElem < 4)
     4130           || (enmSz == kArmv8InstrUmovInsSz_U64 && idxElem < 2));
     4131
     4132    return UINT32_C(0x4e001c00)
     4133         | ((uint32_t)idxElem << (16 + enmSz + 1))
     4134         | (RT_BIT_32(enmSz) << 16)
     4135         | (iRegSrc << 5)
     4136         | iVecRegDst;
     4137}
     4138
     4139
    41124140/** Armv8 vector compare to zero vector element size.    */
    41134141typedef enum ARMV8INSTRVECCMPZEROSZ
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r103775 r103777  
    57685768
    57695769
     5770/**
     5771 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
     5772 *
     5773 * @param   pReNative       The native recompile state.
     5774 * @param   idxHstSimdReg   The host SIMD register to update the state for.
     5775 * @param   enmLoadSz       The load size to set.
     5776 */
     5777DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
     5778{
     5779    /* Everything valid already? -> nothing to do. */
     5780    if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
     5781        return;
     5782
     5783    if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
     5784        pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
     5785    else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
     5786    {
     5787        Assert(   (   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
     5788                   && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
     5789               || (   pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
     5790                   && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
     5791        pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
     5792    }
     5793}
     5794
     5795
    57705796static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
    57715797                                                            uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
    57725798{
    57735799    /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
    5774     if (   pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
    5775         || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
     5800    if (   pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
     5801        || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
    57765802    {
    57775803# ifdef RT_ARCH_ARM64
     
    57805806# endif
    57815807
    5782         switch (enmLoadSzDst)
     5808        if (idxHstSimdRegDst != idxHstSimdRegSrc)
    57835809        {
    5784             case kIemNativeGstSimdRegLdStSz_256:
    5785                 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
    5786                 break;
    5787             case kIemNativeGstSimdRegLdStSz_Low128:
    5788                 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
    5789                 break;
    5790             case kIemNativeGstSimdRegLdStSz_High128:
    5791                 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
    5792                 break;
    5793             default:
    5794                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
     5810            switch (enmLoadSzDst)
     5811            {
     5812                case kIemNativeGstSimdRegLdStSz_256:
     5813                    off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
     5814                    break;
     5815                case kIemNativeGstSimdRegLdStSz_Low128:
     5816                    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
     5817                    break;
     5818                case kIemNativeGstSimdRegLdStSz_High128:
     5819                    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
     5820                    break;
     5821                default:
     5822                    AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
     5823            }
     5824
     5825            iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
    57955826        }
    5796 
    5797         pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
    57985827    }
    57995828    else
     
    58955924                pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
    58965925                if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     5926                {
     5927                    if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
     5928                        *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
     5929                    else
     5930                        iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
    58975931                    Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
    58985932                           g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
     5933                }
    58995934                else
    59005935                {
     
    59485983                *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
    59495984            else
    5950             {
    5951                 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
    5952                 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
    5953             }
     5985                iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
    59545986
    59555987            if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
     
    59716003#ifdef VBOX_STRICT
    59726004        /* Strict builds: Check that the value is correct. */
    5973         *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
     6005        if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
     6006            *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
    59746007#endif
    59756008
     
    59856018        *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
    59866019    else
    5987     {
    5988         /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
    5989         pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
    5990     }
     6020        iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
    59916021
    59926022    if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     
    62086238    Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
    62096239
    6210     pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
     6240    iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
    62116241    switch (enmLoadSz)
    62126242    {
     
    63926422                                    IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
    63936423{
     6424    /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
     6425    if (   (   enmLoadSz == kIemNativeGstSimdRegLdStSz_256
     6426            && (   IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
     6427                || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
     6428        || (   enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
     6429            && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
     6430        || (   enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
     6431            && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
     6432        return off;
     6433
    63946434#  ifdef RT_ARCH_AMD64
    63956435    Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
     
    64766516        /* umov tmp0, vectmp0.D[0] */
    64776517        pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
    6478                                                     0 /*idxElem*/, kArmv8InstrUmovSz_U64);
     6518                                                    0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
    64796519        /* cbz tmp0, +1 */
    64806520        pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
     
    64926532        /* umov tmp0, (vectmp0 + 1).D[0] */
    64936533        pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
    6494                                                     0 /*idxElem*/, kArmv8InstrUmovSz_U64);
     6534                                                    0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
    64956535        /* cbz tmp0, +1 */
    64966536        pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
     
    1520115241    /* Free but don't flush the source register. */
    1520215242    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
     15243    iemNativeVarRegisterRelease(pReNative, idxDstVar);
     15244
     15245    return off;
     15246}
     15247
     15248
     15249#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
     15250    off = iemNativeEmitSimdStoreXregU64(pReNative, off, a_iXReg, a_u64Value, a_iQWord)
     15251
     15252/** Emits code for IEM_MC_STORE_XREG_U64. */
     15253DECL_INLINE_THROW(uint32_t)
     15254iemNativeEmitSimdStoreXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar, uint8_t iQWord)
     15255{
     15256    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
     15257    IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
     15258
     15259    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
     15260                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
     15261
     15262    uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
     15263
     15264    off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQWord);
     15265    IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
     15266
     15267    /* Free but don't flush the source register. */
     15268    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
    1520315269    iemNativeVarRegisterRelease(pReNative, idxDstVar);
    1520415270
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103763 r103777  
    71057105#elif defined(RT_ARCH_ARM64)
    71067106    /* umov gprdst, vecsrc[iQWord] */
    7107     pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovSz_U64);
     7107    pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iQWord, kArmv8InstrUmovInsSz_U64);
    71087108#else
    71097109# error "port me"
     
    71527152#elif defined(RT_ARCH_ARM64)
    71537153    /* umov gprdst, vecsrc[iDWord] */
    7154     pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovSz_U32, false /*fDst64Bit*/);
     7154    pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
    71557155#else
    71567156# error "port me"
     
    71727172#elif defined(RT_ARCH_ARM64)
    71737173    off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iVecRegSrc, iDWord);
     7174#else
     7175# error "port me"
     7176#endif
     7177    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     7178    return off;
     7179}
     7180
     7181
     7182/**
     7183 * Emits a vecdst[x] = gprsrc store, 64-bit.
     7184 */
     7185DECL_FORCE_INLINE(uint32_t)
     7186iemNativeEmitSimdStoreGprToVecRegU64Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
     7187{
     7188#ifdef RT_ARCH_AMD64
     7189    /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
     7190    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7191    pCodeBuf[off++] =   X86_OP_REX_W
     7192                      | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7193                      | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7194    pCodeBuf[off++] = 0x0f;
     7195    pCodeBuf[off++] = 0x3a;
     7196    pCodeBuf[off++] = 0x22;
     7197    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7198    pCodeBuf[off++] = iQWord;
     7199#elif defined(RT_ARCH_ARM64)
     7200    /* ins vecsrc[iQWord], gpr */
     7201    pCodeBuf[off++] = Armv8A64MkVecInstrIns(iVecRegDst, iGprSrc, iQWord, kArmv8InstrUmovInsSz_U64);
     7202#else
     7203# error "port me"
     7204#endif
     7205    return off;
     7206}
     7207
     7208
     7209/**
     7210 * Emits a vecdst[x] = gprsrc store, 64-bit.
     7211 */
     7212DECL_INLINE_THROW(uint32_t)
     7213iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
     7214{
     7215    Assert(iQWord <= 1);
     7216
     7217#ifdef RT_ARCH_AMD64
     7218    off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
     7219#elif defined(RT_ARCH_ARM64)
     7220    off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
    71747221#else
    71757222# error "port me"
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette