VirtualBox

Changeset 105172 in vbox


Ignore:
Timestamp:
Jul 7, 2024 12:59:59 PM (8 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
163814
Message:

VMM/IEM: Implement native emitters for pmullw,paddusb,paddusw, bugref:10652

Location:
trunk
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/armv8.h

    r105171 r105172  
    48204820} ARMV8INSTRVECARITHSZ;
    48214821
    4822 /**
    4823  * A64: Encodes ADD/SUB (vector, register).
    4824  *
    4825  * @returns The encoded instruction.
    4826  * @param   fSub        Flag whther this is an addition (false) or subtraction (true) instruction.
     4822
     4823/** Armv8 vector arithmetic operation. */
     4824typedef enum
     4825{
     4826    kArmv8VecInstrArithOp_Add           =                 RT_BIT_32(15),                                               /**< ADD   */
     4827    kArmv8VecInstrArithOp_Sub           = RT_BIT_32(29) | RT_BIT_32(15),                                               /**< SUB   */
     4828    kArmv8VecInstrArithOp_UnsignSat_Add = RT_BIT_32(29) |                                               RT_BIT_32(11), /**< UQADD */
     4829    kArmv8VecInstrArithOp_UnsignSat_Sub = RT_BIT_32(29) |                 RT_BIT_32(13)               | RT_BIT_32(11), /**< UQSUB */
     4830    kArmv8VecInstrArithOp_SignSat_Add   =                                                               RT_BIT_32(11), /**< SQADD */
     4831    kArmv8VecInstrArithOp_SignSat_Sub   =                                 RT_BIT_32(13)               | RT_BIT_32(11), /**< SQSUB */
     4832    kArmv8VecInstrArithOp_Mul           =                 RT_BIT_32(15) |               RT_BIT_32(12) | RT_BIT_32(11)  /**< MUL   */
     4833} ARMV8INSTRVECARITHOP;
     4834
     4835
     4836/**
     4837 * A64: Encodes an arithmetic operation (vector, register).
     4838 *
     4839 * @returns The encoded instruction.
     4840 * @param   enmOp       The operation to encode.
    48274841 * @param   iVecRegDst  The vector register to put the result into.
    48284842 * @param   iVecRegSrc1 The first vector source register.
     
    48324846 *                      or just the low 64-bit (false).
    48334847 */
    4834 DECL_FORCE_INLINE(uint32_t) Armv8A64MkVecInstrAddSub(bool fSub, uint32_t iVecRegDst, uint32_t iVecRegSrc1, uint32_t iVecRegSrc2,
    4835                                                      ARMV8INSTRVECARITHSZ enmSz, bool f128Bit = true)
     4848DECL_FORCE_INLINE(uint32_t) Armv8A64MkVecInstrArithOp(ARMV8INSTRVECARITHOP enmOp, uint32_t iVecRegDst, uint32_t iVecRegSrc1, uint32_t iVecRegSrc2,
     4849                                                      ARMV8INSTRVECARITHSZ enmSz, bool f128Bit = true)
    48364850{
    48374851    Assert(iVecRegDst < 32); Assert(iVecRegSrc1 < 32); Assert(iVecRegSrc2 < 32);
    48384852
    4839     return UINT32_C(0x0e208400)
     4853    return UINT32_C(0x0e200400)
     4854         | (uint32_t)enmOp
    48404855         | ((uint32_t)f128Bit << 30)
    4841          | ((uint32_t)fSub    << 29)
    48424856         | ((uint32_t)enmSz   << 22)
    48434857         | (iVecRegSrc2 << 16)
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstTwoByte0f.cpp.h

    r105170 r105172  
    1292512925{
    1292612926    IEMOP_MNEMONIC2(RM, PMULLW, pmullw, Vx, Wx, DISOPTYPE_HARMLESS | DISOPTYPE_X86_SSE, IEMOPHINT_IGNORES_OP_SIZES);
    12927     return FNIEMOP_CALL_1(iemOpCommonSse2Opt_FullFull_To_Full, iemAImpl_pmullw_u128);
     12927    SSE2_OPT_BODY_FullFull_To_Full(pmullw, iemAImpl_pmullw_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64);
    1292812928}
    1292912929
     
    1323813238{
    1323913239    IEMOP_MNEMONIC2(RM, PADDUSB, paddusb, Vx, Wx, DISOPTYPE_HARMLESS | DISOPTYPE_X86_SSE, IEMOPHINT_IGNORES_OP_SIZES);
    13240     return FNIEMOP_CALL_1(iemOpCommonSse2Opt_FullFull_To_Full, iemAImpl_paddusb_u128);
     13240    SSE2_OPT_BODY_FullFull_To_Full(paddusb, iemAImpl_paddusb_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64);
    1324113241}
    1324213242
     
    1325713257{
    1325813258    IEMOP_MNEMONIC2(RM, PADDUSW, paddusw, Vx, Wx, DISOPTYPE_HARMLESS | DISOPTYPE_X86_SSE, IEMOPHINT_IGNORES_OP_SIZES);
    13259     return FNIEMOP_CALL_1(iemOpCommonSse2Opt_FullFull_To_Full, iemAImpl_paddusw_u128);
     13259    SSE2_OPT_BODY_FullFull_To_Full(paddusw, iemAImpl_paddusw_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64);
    1326013260}
    1326113261
  • trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h

    r105170 r105172  
    20842084
    20852085/**
    2086  * Common emitter for the paddX/psubX instructions.
    2087  */
    2088 #ifdef RT_ARCH_AMD64
    2089 # define IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(a_Instr, a_fSub, a_ArmElemSz, a_bOpcX86) \
     2086 * Common emitter for packed arithmetic instructions.
     2087 */
     2088#ifdef RT_ARCH_AMD64
     2089# define IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(a_Instr, a_enmArmOp, a_ArmElemSz, a_bOpcX86) \
    20902090    DECL_INLINE_THROW(uint32_t) \
    20912091    RT_CONCAT3(iemNativeEmit_,a_Instr,_rr_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, \
     
    21312131    typedef int ignore_semicolon
    21322132#elif defined(RT_ARCH_ARM64)
    2133 # define IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(a_Instr, a_fSub, a_ArmElemSz, a_bOpcX86) \
     2133# define IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(a_Instr, a_enmArmOp, a_ArmElemSz, a_bOpcX86) \
    21342134    DECL_INLINE_THROW(uint32_t) \
    21352135    RT_CONCAT3(iemNativeEmit_,a_Instr,_rr_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, \
     
    21412141                                                                              kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly); \
    21422142        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); \
    2143         pCodeBuf[off++] = Armv8A64MkVecInstrAddSub((a_fSub), idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc, (a_ArmElemSz)); \
     2143        pCodeBuf[off++] = Armv8A64MkVecInstrArithOp((a_enmArmOp), idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc, (a_ArmElemSz)); \
    21442144        iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); \
    21452145        iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc); \
     
    21552155        uint8_t const idxSimdRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/); \
    21562156        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1); \
    2157         pCodeBuf[off++] = Armv8A64MkVecInstrAddSub((a_fSub), idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc, (a_ArmElemSz)); \
     2157        pCodeBuf[off++] = Armv8A64MkVecInstrArithOp((a_enmArmOp), idxSimdRegDst, idxSimdRegDst, idxSimdRegSrc, (a_ArmElemSz)); \
    21582158        iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); \
    21592159        iemNativeVarRegisterRelease(pReNative, idxVarSrc); \
     
    21662166#endif
    21672167
    2168 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(paddb, false /*a_fSub*/, kArmv8VecInstrArithSz_8,  0xfc);
    2169 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(paddw, false /*a_fSub*/, kArmv8VecInstrArithSz_16, 0xfd);
    2170 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(paddd, false /*a_fSub*/, kArmv8VecInstrArithSz_32, 0xfe);
    2171 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(paddq, false /*a_fSub*/, kArmv8VecInstrArithSz_64, 0xd4);
    2172 
    2173 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(psubb, true  /*a_fSub*/, kArmv8VecInstrArithSz_8,  0xf8);
    2174 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(psubw, true  /*a_fSub*/, kArmv8VecInstrArithSz_16, 0xf9);
    2175 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(psubd, true  /*a_fSub*/, kArmv8VecInstrArithSz_32, 0xfa);
    2176 IEMNATIVE_NATIVE_EMIT_ADD_SUB_U128(psubq, true  /*a_fSub*/, kArmv8VecInstrArithSz_64, 0xfb);
     2168/*
     2169 * PADDx.
     2170 */
     2171IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(paddb, kArmv8VecInstrArithOp_Add, kArmv8VecInstrArithSz_8,  0xfc);
     2172IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(paddw, kArmv8VecInstrArithOp_Add, kArmv8VecInstrArithSz_16, 0xfd);
     2173IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(paddd, kArmv8VecInstrArithOp_Add, kArmv8VecInstrArithSz_32, 0xfe);
     2174IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(paddq, kArmv8VecInstrArithOp_Add, kArmv8VecInstrArithSz_64, 0xd4);
     2175
     2176/*
     2177 * PSUBx.
     2178 */
     2179IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(psubb, kArmv8VecInstrArithOp_Sub, kArmv8VecInstrArithSz_8,  0xf8);
     2180IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(psubw, kArmv8VecInstrArithOp_Sub, kArmv8VecInstrArithSz_16, 0xf9);
     2181IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(psubd, kArmv8VecInstrArithOp_Sub, kArmv8VecInstrArithSz_32, 0xfa);
     2182IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(psubq, kArmv8VecInstrArithOp_Sub, kArmv8VecInstrArithSz_64, 0xfb);
     2183
     2184/*
     2185 * PADDUSx.
     2186 */
     2187IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(paddusb, kArmv8VecInstrArithOp_UnsignSat_Add, kArmv8VecInstrArithSz_8,  0xdc);
     2188IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(paddusw, kArmv8VecInstrArithOp_UnsignSat_Add, kArmv8VecInstrArithSz_16, 0xdd);
     2189
     2190/*
     2191 * PMULLx.
     2192 */
     2193IEMNATIVE_NATIVE_EMIT_ARITH_OP_U128(pmullw,  kArmv8VecInstrArithOp_Mul, kArmv8VecInstrArithSz_16, 0xd5);
    21772194
    21782195
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette