VirtualBox

Changeset 104133 in vbox for trunk/src/VBox/VMM/include


Ignore:
Timestamp:
Apr 3, 2024 12:03:22 PM (10 months ago)
Author:
vboxsync
Message:

VMM/IEM: Convert the 256-bit vmovsldup/vmovshdup/vmovddup emulations to microcode, bugref:10641

Location:
trunk/src/VBox/VMM/include
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r104132 r104133  
    35913591/** @name Media (SSE/MMX/AVX) operation: Sort this later
    35923592 * @{ */
    3593 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc));
    3594 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc));
    3595 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovshdup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc));
    3596 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovshdup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc));
    3597 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc));
    3598 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc));
    3599 
    36003593IEM_DECL_IMPL_DEF(void, iemAImpl_pmovsxbw_u128,(PRTUINT128U puDst, uint64_t uSrc));
    36013594IEM_DECL_IMPL_DEF(void, iemAImpl_vpmovsxbw_u128,(PRTUINT128U puDst, uint64_t uSrc));
  • trunk/src/VBox/VMM/include/IEMMc.h

    r104129 r104133  
    702702         pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[iYRegDstTmp].au64[1] = (a_u256Src).au64[3]; \
    703703         IEM_MC_INT_CLEAR_ZMM_256_UP(iYRegDstTmp); \
     704    } while (0)
     705#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
     706    do { uintptr_t const iYRegDstTmp    = (a_iYRegDst); \
     707         if ((a_iDwDst) < 4) \
     708            pVCpu->cpum.GstCtx.XState.x87.aXMM[(iYRegDstTmp)].au32[(a_iDwDst)] = (a_u256Value).au32[(a_iDwSrc)]; \
     709         else \
     710            pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[(iYRegDstTmp)].au32[(a_iDwDst) - 4] = (a_u256Value).au32[(a_iDwSrc)]; \
     711    } while (0)
     712#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
     713    do { uintptr_t const iYRegDstTmp    = (a_iYRegDst); \
     714         if ((a_iQwDst) < 2) \
     715            pVCpu->cpum.GstCtx.XState.x87.aXMM[(iYRegDstTmp)].au64[(a_iQwDst)] = (a_u256Value).au64[(a_iQwDst)]; \
     716         else \
     717            pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[(iYRegDstTmp)].au64[(a_iQwDst) - 4] = (a_u256Value).au64[(a_iQwDst)]; \
     718    } while (0)
     719#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
     720    do { uintptr_t const iYRegDstTmp    = (a_iYRegDst); \
     721         if ((a_iQword) < 2) \
     722            pVCpu->cpum.GstCtx.XState.x87.aXMM[(iYRegDstTmp)].au64[(a_iQword)] = (a_u64Value); \
     723         else \
     724            pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[(iYRegDstTmp)].au64[(a_iQword) - 2] = (a_u64Value); \
    704725    } while (0)
    705726
     
    889910         IEM_MC_INT_CLEAR_ZMM_256_UP(iYRegDstTmp); \
    890911    } while (0)
     912
     913#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
     914    do { IEM_MC_INT_CLEAR_ZMM_256_UP(a_iYReg); } while (0)
    891915
    892916#ifndef IEM_WITH_SETJMP
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r104099 r104133  
    82868286    if (iQWord >= 2)
    82878287    {
    8288         /** @todo Currently not used. */
    8289         AssertReleaseFailed();
     8288        /*
     8289         * vpextrq doesn't work on the upper 128-bits.
     8290         * So we use the following sequence:
     8291         *     vextracti128 vectmp0, vecsrc, 1
     8292         *     pextrd       gpr, vectmp0, #(iQWord - 2)
     8293         */
     8294        /* vextracti128 */
     8295        pCodeBuf[off++] = X86_OP_VEX3;
     8296        pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
     8297        pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
     8298        pCodeBuf[off++] = 0x39;
     8299        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
     8300        pCodeBuf[off++] = 0x1;
     8301
     8302        /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
     8303        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     8304        pCodeBuf[off++] =   X86_OP_REX_W
     8305                          | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
     8306                          | (iGprDst < 8 ? 0 : X86_OP_REX_B);
     8307        pCodeBuf[off++] = 0x0f;
     8308        pCodeBuf[off++] = 0x3a;
     8309        pCodeBuf[off++] = 0x16;
     8310        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
     8311        pCodeBuf[off++] = iQWord - 2;
    82908312    }
    82918313    else
     
    83478369    if (iDWord >= 4)
    83488370    {
    8349         /** @todo Currently not used. */
    8350         AssertReleaseFailed();
     8371        /*
     8372         * vpextrd doesn't work on the upper 128-bits.
     8373         * So we use the following sequence:
     8374         *     vextracti128 vectmp0, vecsrc, 1
     8375         *     pextrd       gpr, vectmp0, #(iDWord - 4)
     8376         */
     8377        /* vextracti128 */
     8378        pCodeBuf[off++] = X86_OP_VEX3;
     8379        pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8);
     8380        pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
     8381        pCodeBuf[off++] = 0x39;
     8382        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
     8383        pCodeBuf[off++] = 0x1;
     8384
     8385        /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */
     8386        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     8387        if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
     8388            pCodeBuf[off++] =   (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
     8389                              | (iGprDst < 8 ? 0 : X86_OP_REX_B);
     8390        pCodeBuf[off++] = 0x0f;
     8391        pCodeBuf[off++] = 0x3a;
     8392        pCodeBuf[off++] = 0x16;
     8393        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7);
     8394        pCodeBuf[off++] = iDWord - 4;
    83518395    }
    83528396    else
     
    83648408    }
    83658409#elif defined(RT_ARCH_ARM64)
     8410    Assert(iDWord < 4);
     8411
    83668412    /* umov gprdst, vecsrc[iDWord] */
    83678413    pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/);
     
    83828428
    83838429#ifdef RT_ARCH_AMD64
    8384     off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);
     8430    off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord);
    83858431#elif defined(RT_ARCH_ARM64)
    83868432    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     
    85278573{
    85288574#ifdef RT_ARCH_AMD64
    8529     /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
    8530     pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8531     pCodeBuf[off++] =   X86_OP_REX_W
    8532                       | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    8533                       | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    8534     pCodeBuf[off++] = 0x0f;
    8535     pCodeBuf[off++] = 0x3a;
    8536     pCodeBuf[off++] = 0x22;
    8537     pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    8538     pCodeBuf[off++] = iQWord;
     8575    if (iQWord >= 2)
     8576    {
     8577        /*
     8578         * vpinsrq doesn't work on the upper 128-bits.
     8579         * So we use the following sequence:
     8580         *     vextracti128 vectmp0, vecdst, 1
     8581         *     pinsrq       vectmp0, gpr, #(iQWord - 2)
     8582         *     vinserti128  vecdst, vectmp0, 1
     8583         */
     8584        /* vextracti128 */
     8585        pCodeBuf[off++] = X86_OP_VEX3;
     8586        pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
     8587        pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
     8588        pCodeBuf[off++] = 0x39;
     8589        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
     8590        pCodeBuf[off++] = 0x1;
     8591
     8592        /* pinsrq */
     8593        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     8594        pCodeBuf[off++] =   X86_OP_REX_W
     8595                          | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
     8596                          | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     8597        pCodeBuf[off++] = 0x0f;
     8598        pCodeBuf[off++] = 0x3a;
     8599        pCodeBuf[off++] = 0x22;
     8600        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
     8601        pCodeBuf[off++] = iQWord - 2;
     8602
     8603        /* vinserti128 */
     8604        pCodeBuf[off++] = X86_OP_VEX3;
     8605        pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
     8606        pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
     8607        pCodeBuf[off++] = 0x38;
     8608        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
     8609        pCodeBuf[off++] = 0x01; /* Immediate */
     8610    }
     8611    else
     8612    {
     8613        /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
     8614        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     8615        pCodeBuf[off++] =   X86_OP_REX_W
     8616                          | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     8617                          | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     8618        pCodeBuf[off++] = 0x0f;
     8619        pCodeBuf[off++] = 0x3a;
     8620        pCodeBuf[off++] = 0x22;
     8621        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     8622        pCodeBuf[off++] = iQWord;
     8623    }
    85398624#elif defined(RT_ARCH_ARM64)
    85408625    /* ins vecsrc[iQWord], gpr */
     
    85538638iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord)
    85548639{
    8555     Assert(iQWord <= 1);
    8556 
    8557 #ifdef RT_ARCH_AMD64
    8558     off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord);
    8559 #elif defined(RT_ARCH_ARM64)
    8560     off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord);
     8640    Assert(iQWord <= 3);
     8641
     8642#ifdef RT_ARCH_AMD64
     8643    off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord);
     8644#elif defined(RT_ARCH_ARM64)
     8645    Assert(!(iVecRegDst & 0x1));
     8646    if (iQWord >= 2)
     8647        off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2);
     8648    else
     8649        off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst,     iGprSrc, iQWord);
    85618650#else
    85628651# error "port me"
     
    85748663{
    85758664#ifdef RT_ARCH_AMD64
    8576     /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
    8577     pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8578     if (iVecRegDst >= 8 || iGprSrc >= 8)
    8579         pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    8580                           | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    8581     pCodeBuf[off++] = 0x0f;
    8582     pCodeBuf[off++] = 0x3a;
    8583     pCodeBuf[off++] = 0x22;
    8584     pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    8585     pCodeBuf[off++] = iDWord;
     8665    if (iDWord >= 4)
     8666    {
     8667        /*
     8668         * vpinsrq doesn't work on the upper 128-bits.
     8669         * So we use the following sequence:
     8670         *     vextracti128 vectmp0, vecdst, 1
     8671         *     pinsrd       vectmp0, gpr, #(iDword - 4)
     8672         *     vinserti128  vecdst, vectmp0, 1
     8673         */
     8674        /* vextracti128 */
     8675        pCodeBuf[off++] = X86_OP_VEX3;
     8676        pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
     8677        pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H);
     8678        pCodeBuf[off++] = 0x39;
     8679        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
     8680        pCodeBuf[off++] = 0x1;
     8681
     8682        /* pinsrd */
     8683        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     8684        if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8)
     8685            pCodeBuf[off++] =   (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
     8686                              | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     8687        pCodeBuf[off++] = 0x0f;
     8688        pCodeBuf[off++] = 0x3a;
     8689        pCodeBuf[off++] = 0x22;
     8690        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7);
     8691        pCodeBuf[off++] = iDWord - 4;
     8692
     8693        /* vinserti128 */
     8694        pCodeBuf[off++] = X86_OP_VEX3;
     8695        pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8);
     8696        pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H);
     8697        pCodeBuf[off++] = 0x38;
     8698        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
     8699        pCodeBuf[off++] = 0x01; /* Immediate */
     8700    }
     8701    else
     8702    {
     8703        /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
     8704        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     8705        if (iVecRegDst >= 8 || iGprSrc >= 8)
     8706            pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     8707                              | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     8708        pCodeBuf[off++] = 0x0f;
     8709        pCodeBuf[off++] = 0x3a;
     8710        pCodeBuf[off++] = 0x22;
     8711        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     8712        pCodeBuf[off++] = iDWord;
     8713    }
    85868714#elif defined(RT_ARCH_ARM64)
    85878715    /* ins vecsrc[iDWord], gpr */
     
    86008728iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord)
    86018729{
    8602     Assert(iDWord <= 3);
    8603 
    8604 #ifdef RT_ARCH_AMD64
    8605     off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord);
    8606 #elif defined(RT_ARCH_ARM64)
    8607     off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord);
     8730    Assert(iDWord <= 7);
     8731
     8732#ifdef RT_ARCH_AMD64
     8733    off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord);
     8734#elif defined(RT_ARCH_ARM64)
     8735    Assert(!(iVecRegDst & 0x1));
     8736    if (iDWord >= 4)
     8737        off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4);
     8738    else
     8739        off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst,     iGprSrc, iDWord);
    86088740#else
    86098741# error "port me"
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette