VirtualBox

Changeset 103821 in vbox


Ignore:
Timestamp:
Mar 13, 2024 10:54:07 AM (13 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
162190
Message:

VMM/IEM: Use vpbroadcast, some VEX3 prefix cleanups, bugref:10614

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/include/iprt/x86.h

    r103758 r103821  
    51905190#define X86_OP_VEX3_BYTE1_R             RT_BIT(7)
    51915191
    5192 #define X86_OP_VEX3_BYTE3_P_MASK        0x3
    5193 # define X86_OP_VEX3_BYTE3_P_NO_PRF     0
    5194 # define X86_OP_VEX3_BYTE3_P_066H       1
    5195 # define X86_OP_VEX3_BYTE3_P_0F3H       2
    5196 # define X86_OP_VEX3_BYTE3_P_0F2H       3
    5197 #define X86_OP_VEX3_BYTE3_L             RT_BIT(2)
    5198 #define X86_OP_VEX3_BYTE3_VVVV_MASK     0x78
    5199 #define X86_OP_VEX3_BYTE3_VVVV_SHIFT    3
    5200 #define X86_OP_VEX3_BYTE3_W             RT_BIT(7)
    5201 
    5202 #define X86_OP_VEX3_BYTE3_MAKE(a_f64BitOpSz, a_iSrcReg, a_f256BitAvx, a_fPrf) \
    5203     (  ((a_f64BitOpSz) ? X86_OP_VEX3_BYTE3_W : 0) \
    5204      | (~((uint8_t)(a_iSrcReg) & 0xf)) \
    5205      | ((a_f256BitAvx) ? X86_OP_VEX3_BYTE3_L : 0) \
    5206      | ((a_fPrf) & X86_OP_VEX3_BYTE3_P_MASK))
     5192#define X86_OP_VEX3_BYTE2_P_MASK        0x3
     5193# define X86_OP_VEX3_BYTE2_P_NO_PRF     0
     5194# define X86_OP_VEX3_BYTE2_P_066H       1
     5195# define X86_OP_VEX3_BYTE2_P_0F3H       2
     5196# define X86_OP_VEX3_BYTE2_P_0F2H       3
     5197#define X86_OP_VEX3_BYTE2_L             RT_BIT(2)
     5198#define X86_OP_VEX3_BYTE2_VVVV_MASK     0x78
     5199#define X86_OP_VEX3_BYTE2_VVVV_SHIFT    3
     5200#define X86_OP_VEX3_BYTE2_VVVV_NONE     15
     5201#define X86_OP_VEX3_BYTE2_W             RT_BIT(7)
     5202
     5203#define X86_OP_VEX3_BYTE2_MAKE(a_f64BitOpSz, a_iSrcReg, a_f256BitAvx, a_fPrf) \
     5204    (  ((a_f64BitOpSz) ? X86_OP_VEX3_BYTE2_W : 0) \
     5205     | (~((uint8_t)(a_iSrcReg) & 0xf) << X86_OP_VEX3_BYTE2_VVVV_SHIFT) \
     5206     | ((a_f256BitAvx) ? X86_OP_VEX3_BYTE2_L : 0) \
     5207     | ((a_fPrf) & X86_OP_VEX3_BYTE2_P_MASK))
     5208
     5209#define X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(a_f64BitOpSz, a_f256BitAvx, a_fPrf) \
     5210    (  ((a_f64BitOpSz) ? X86_OP_VEX3_BYTE2_W : 0) \
     5211     | (X86_OP_VEX3_BYTE2_VVVV_NONE << X86_OP_VEX3_BYTE2_VVVV_SHIFT) \
     5212     | ((a_f256BitAvx) ? X86_OP_VEX3_BYTE2_L : 0) \
     5213     | ((a_fPrf) & X86_OP_VEX3_BYTE2_P_MASK))
    52075214/** @} */
    52085215
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r103819 r103821  
    69676967    else
    69686968        pCodeBuf[off++] = 0xe3;
    6969     pCodeBuf[off++] = X86_OP_VEX3_BYTE3_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE3_P_066H);
     6969    pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H);
    69706970    pCodeBuf[off++] = 0x38;
    69716971    off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu);
     
    74187418    pCodeBuf[off++] = 0x00;
    74197419
    7420     if (f256Bit)
    7421     {
    7422         /* When broadcasting the entire ymm register we can use vbroadcastss now. */
    7423         /* vbroadcastss ymm, xmm (ASSUMES AVX2). */
    7424         pCodeBuf[off++] = X86_OP_VEX3;
    7425         pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
    7426                           | 0x02                 /* opcode map. */
    7427                           | (  iVecRegDst >= 8
    7428                              ? 0
    7429                              : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
    7430         pCodeBuf[off++] = 0x7d;
    7431         pCodeBuf[off++] = 0x18;
    7432         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
    7433     }
    7434     else
    7435     {
    7436         /* pinsrd vecdst, gpr, #1 (ASSUMES SSE4.1). */
    7437         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7438         if (iVecRegDst >= 8 || iGprSrc >= 8)
    7439             pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7440                               | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7441         pCodeBuf[off++] = 0x0f;
    7442         pCodeBuf[off++] = 0x3a;
    7443         pCodeBuf[off++] = 0x22;
    7444         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7445         pCodeBuf[off++] = 0x01;
    7446 
    7447         /* pinsrd vecdst, gpr, #2 (ASSUMES SSE4.1). */
    7448         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7449         if (iVecRegDst >= 8 || iGprSrc >= 8)
    7450             pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7451                               | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7452         pCodeBuf[off++] = 0x0f;
    7453         pCodeBuf[off++] = 0x3a;
    7454         pCodeBuf[off++] = 0x22;
    7455         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7456         pCodeBuf[off++] = 0x02;
    7457 
    7458         /* pinsrd vecdst, gpr, #3 (ASSUMES SSE4.1). */
    7459         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7460         if (iVecRegDst >= 8 || iGprSrc >= 8)
    7461             pCodeBuf[off++] =   (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7462                               | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7463         pCodeBuf[off++] = 0x0f;
    7464         pCodeBuf[off++] = 0x3a;
    7465         pCodeBuf[off++] = 0x22;
    7466         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7467         pCodeBuf[off++] = 0x03;
    7468     }
     7420    /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */
     7421    pCodeBuf[off++] = X86_OP_VEX3;
     7422    pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
     7423                      | 0x02                 /* opcode map. */
     7424                      | (  iVecRegDst >= 8
     7425                         ? 0
     7426                         : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
     7427    pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
     7428    pCodeBuf[off++] = 0x58;
     7429    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
    74697430#elif defined(RT_ARCH_ARM64)
    74707431    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
     
    74897450{
    74907451#ifdef RT_ARCH_AMD64
    7491     off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit);
     7452    off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit);
    74927453#elif defined(RT_ARCH_ARM64)
    74937454    off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit);
     
    75217482    pCodeBuf[off++] = 0x00;
    75227483
    7523     if (f256Bit)
    7524     {
    7525         /* When broadcasting the entire ymm register we can use vbroadcastsd now. */
    7526         /* vbroadcastsd ymm, xmm (ASSUMES AVX2). */
    7527         pCodeBuf[off++] = X86_OP_VEX3;
    7528         pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
    7529                           | 0x02                 /* opcode map. */
    7530                           | (  iVecRegDst >= 8
    7531                              ? 0
    7532                              : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
    7533         pCodeBuf[off++] = 0x7d;
    7534         pCodeBuf[off++] = 0x19;
    7535         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
    7536     }
    7537     else
    7538     {
    7539         /* pinsrq vecdst, gpr, #1 (ASSUMES SSE4.1). */
    7540         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    7541         pCodeBuf[off++] =   X86_OP_REX_W
    7542                           | (iVecRegDst < 8 ? 0 : X86_OP_REX_R)
    7543                           | (iGprSrc < 8 ? 0 : X86_OP_REX_B);
    7544         pCodeBuf[off++] = 0x0f;
    7545         pCodeBuf[off++] = 0x3a;
    7546         pCodeBuf[off++] = 0x22;
    7547         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7);
    7548         pCodeBuf[off++] = 0x01;
    7549     }
     7484    /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */
     7485    pCodeBuf[off++] = X86_OP_VEX3;
     7486    pCodeBuf[off++] =   X86_OP_VEX3_BYTE1_X
     7487                      | 0x02                 /* opcode map. */
     7488                      | (  iVecRegDst >= 8
     7489                         ? 0
     7490                         : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R);
     7491    pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H);
     7492    pCodeBuf[off++] = 0x59;
     7493    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7);
    75507494#elif defined(RT_ARCH_ARM64)
    75517495    /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette