VirtualBox

Changeset 103816 in vbox


Ignore:
Timestamp:
Mar 13, 2024 8:31:43 AM (9 months ago)
Author:
vboxsync
Message:

VMM/IEM: Implement native emitter for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(), bugref:10614

Location:
trunk/src/VBox/VMM
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py

    r103815 r103816  
    29602960    'IEM_MC_BEGIN':                                              (McBlock.parseMcBegin,             False, False, True,  ),
    29612961    'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX':                        (McBlock.parseMcGeneric,           True,  True,  False, ),
    2962     'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX':                        (McBlock.parseMcGeneric,           True,  True,  False, ),
     2962    'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX':                        (McBlock.parseMcGeneric,           True,  True,  g_fNativeSimd),
    29632963    'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX':                        (McBlock.parseMcGeneric,           True,  True,  g_fNativeSimd),
    29642964    'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX':                         (McBlock.parseMcGeneric,           True,  True,  False, ),
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r103815 r103816  
    68326832
    68336833
     6834#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
     6835    off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
     6836
     6837/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
     6838DECL_INLINE_THROW(uint32_t)
     6839iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
     6840{
     6841    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
     6842    IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
     6843
     6844    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
     6845                                                                          kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
     6846
     6847    uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
     6848
     6849    off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
     6850    off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
     6851    IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg);
     6852    IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg);
     6853
     6854    /* Free but don't flush the source register. */
     6855    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
     6856    iemNativeVarRegisterRelease(pReNative, idxSrcVar);
     6857
     6858    return off;
     6859}
     6860
     6861
    68346862#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
    68356863    off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103815 r103816  
    72627262{
    72637263#ifdef RT_ARCH_AMD64
    7264     /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */
     7264    /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */
    72657265    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    72667266    if (iVecRegDst >= 8 || iGprSrc >= 8)
     
    73897389#elif defined(RT_ARCH_ARM64)
    73907390    off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg);
     7391#else
     7392# error "port me"
     7393#endif
     7394    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     7395    return off;
     7396}
     7397
     7398
     7399/**
     7400 * Emits a vecdst = gprsrc broadcast, 32-bit.
     7401 */
     7402DECL_FORCE_INLINE(uint32_t)
     7403iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
     7404{
     7405#ifdef RT_ARCH_AMD64
     7406    /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears,
     7407     *        vbroadcast needs a memory operand or another xmm register to work... */
     7408
     7409    /* pinsrd vecsrc, gpr, #0 (ASSUMES SSE4.1). */
     7410    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7411    if (iVecRegDst >= 8 || iGprSrc >= 8)
     7412        pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7413                          | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7414    pCodeBuf[off++] = 0x0f;
     7415    pCodeBuf[off++] = 0x3a;
     7416    pCodeBuf[off++] = 0x22;
     7417    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7418    pCodeBuf[off++] = 0x00;
     7419
     7420    if (f256Bit)
     7421    {
     7422        /* When broadcasting the entire ymm register we can use vbroadcastss now. */
     7423        /* vbroadcastss ymm, xmm (ASSUMES AVX2). */
     7424        pCodeBuf[off++] = X86_OP_VEX3;
     7425        pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
     7426                          | (  iVecRegDst >= 8
     7427                             ? 0
     7428                             : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
     7429        pCodeBuf[off++] = 0x7d;
     7430        pCodeBuf[off++] = 0x18;
     7431        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
     7432    }
     7433    else
     7434    {
     7435        /* pinsrd vecsrc, gpr, #1 (ASSUMES SSE4.1). */
     7436        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7437        if (iVecRegDst >= 8 || iGprSrc >= 8)
     7438            pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7439                              | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7440        pCodeBuf[off++] = 0x0f;
     7441        pCodeBuf[off++] = 0x3a;
     7442        pCodeBuf[off++] = 0x22;
     7443        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7444        pCodeBuf[off++] = 0x00;
     7445
     7446        /* pinsrd vecsrc, gpr, #2 (ASSUMES SSE4.1). */
     7447        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7448        if (iVecRegDst >= 8 || iGprSrc >= 8)
     7449            pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7450                              | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7451        pCodeBuf[off++] = 0x0f;
     7452        pCodeBuf[off++] = 0x3a;
     7453        pCodeBuf[off++] = 0x22;
     7454        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7455        pCodeBuf[off++] = 0x00;
     7456
     7457        /* pinsrd vecsrc, gpr, #3 (ASSUMES SSE4.1). */
     7458        pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     7459        if (iVecRegDst >= 8 || iGprSrc >= 8)
     7460            pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
     7461                              | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
     7462        pCodeBuf[off++] = 0x0f;
     7463        pCodeBuf[off++] = 0x3a;
     7464        pCodeBuf[off++] = 0x22;
     7465        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
     7466        pCodeBuf[off++] = 0x00;
     7467    }
     7468#elif defined(RT_ARCH_ARM64)
     7469    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     7470    Assert(!(iVecRegDst & 0x1) || !f256Bit);
     7471
     7472    /* dup vecsrc, gpr */
     7473    pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32);
     7474    if (f256Bit)
     7475        pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32);
     7476#else
     7477# error "port me"
     7478#endif
     7479    return off;
     7480}
     7481
     7482
     7483/**
     7484 * Emits a vecdst[x] = gprsrc broadcast, 32-bit.
     7485 */
     7486DECL_INLINE_THROW(uint32_t)
     7487iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false)
     7488{
     7489#ifdef RT_ARCH_AMD64
     7490    off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit);
     7491#elif defined(RT_ARCH_ARM64)
     7492    off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
    73917493#else
    73927494# error "port me"
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette