Changeset 103821 in vbox
- Timestamp:
- Mar 13, 2024 10:54:07 AM (13 months ago)
- svn:sync-xref-src-repo-rev:
- 162190
- Location:
- trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/include/iprt/x86.h
r103758 r103821 5190 5190 #define X86_OP_VEX3_BYTE1_R RT_BIT(7) 5191 5191 5192 #define X86_OP_VEX3_BYTE3_P_MASK 0x3 5193 # define X86_OP_VEX3_BYTE3_P_NO_PRF 0 5194 # define X86_OP_VEX3_BYTE3_P_066H 1 5195 # define X86_OP_VEX3_BYTE3_P_0F3H 2 5196 # define X86_OP_VEX3_BYTE3_P_0F2H 3 5197 #define X86_OP_VEX3_BYTE3_L RT_BIT(2) 5198 #define X86_OP_VEX3_BYTE3_VVVV_MASK 0x78 5199 #define X86_OP_VEX3_BYTE3_VVVV_SHIFT 3 5200 #define X86_OP_VEX3_BYTE3_W RT_BIT(7) 5201 5202 #define X86_OP_VEX3_BYTE3_MAKE(a_f64BitOpSz, a_iSrcReg, a_f256BitAvx, a_fPrf) \ 5203 ( ((a_f64BitOpSz) ? X86_OP_VEX3_BYTE3_W : 0) \ 5204 | (~((uint8_t)(a_iSrcReg) & 0xf)) \ 5205 | ((a_f256BitAvx) ? X86_OP_VEX3_BYTE3_L : 0) \ 5206 | ((a_fPrf) & X86_OP_VEX3_BYTE3_P_MASK)) 5192 #define X86_OP_VEX3_BYTE2_P_MASK 0x3 5193 # define X86_OP_VEX3_BYTE2_P_NO_PRF 0 5194 # define X86_OP_VEX3_BYTE2_P_066H 1 5195 # define X86_OP_VEX3_BYTE2_P_0F3H 2 5196 # define X86_OP_VEX3_BYTE2_P_0F2H 3 5197 #define X86_OP_VEX3_BYTE2_L RT_BIT(2) 5198 #define X86_OP_VEX3_BYTE2_VVVV_MASK 0x78 5199 #define X86_OP_VEX3_BYTE2_VVVV_SHIFT 3 5200 #define X86_OP_VEX3_BYTE2_VVVV_NONE 15 5201 #define X86_OP_VEX3_BYTE2_W RT_BIT(7) 5202 5203 #define X86_OP_VEX3_BYTE2_MAKE(a_f64BitOpSz, a_iSrcReg, a_f256BitAvx, a_fPrf) \ 5204 ( ((a_f64BitOpSz) ? X86_OP_VEX3_BYTE2_W : 0) \ 5205 | (~((uint8_t)(a_iSrcReg) & 0xf) << X86_OP_VEX3_BYTE2_VVVV_SHIFT) \ 5206 | ((a_f256BitAvx) ? X86_OP_VEX3_BYTE2_L : 0) \ 5207 | ((a_fPrf) & X86_OP_VEX3_BYTE2_P_MASK)) 5208 5209 #define X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(a_f64BitOpSz, a_f256BitAvx, a_fPrf) \ 5210 ( ((a_f64BitOpSz) ? X86_OP_VEX3_BYTE2_W : 0) \ 5211 | (X86_OP_VEX3_BYTE2_VVVV_NONE << X86_OP_VEX3_BYTE2_VVVV_SHIFT) \ 5212 | ((a_f256BitAvx) ? X86_OP_VEX3_BYTE2_L : 0) \ 5213 | ((a_fPrf) & X86_OP_VEX3_BYTE2_P_MASK)) 5207 5214 /** @} */ 5208 5215 -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r103819 r103821 6967 6967 else 6968 6968 pCodeBuf[off++] = 0xe3; 6969 pCodeBuf[off++] = X86_OP_VEX3_BYTE 3_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE3_P_066H);6969 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecReg, true, X86_OP_VEX3_BYTE2_P_066H); 6970 6970 pCodeBuf[off++] = 0x38; 6971 6971 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, iVecReg, offVCpu); … … 7418 7418 pCodeBuf[off++] = 0x00; 7419 7419 7420 if (f256Bit) 7421 { 7422 /* When broadcasting the entire ymm register we can use vbroadcastss now. */ 7423 /* vbroadcastss ymm, xmm (ASSUMES AVX2). */ 7424 pCodeBuf[off++] = X86_OP_VEX3; 7425 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7426 | 0x02 /* opcode map. */ 7427 | ( iVecRegDst >= 8 7428 ? 0 7429 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R); 7430 pCodeBuf[off++] = 0x7d; 7431 pCodeBuf[off++] = 0x18; 7432 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7); 7433 } 7434 else 7435 { 7436 /* pinsrd vecdst, gpr, #1 (ASSUMES SSE4.1). */ 7437 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7438 if (iVecRegDst >= 8 || iGprSrc >= 8) 7439 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7440 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7441 pCodeBuf[off++] = 0x0f; 7442 pCodeBuf[off++] = 0x3a; 7443 pCodeBuf[off++] = 0x22; 7444 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7445 pCodeBuf[off++] = 0x01; 7446 7447 /* pinsrd vecdst, gpr, #2 (ASSUMES SSE4.1). */ 7448 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7449 if (iVecRegDst >= 8 || iGprSrc >= 8) 7450 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7451 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7452 pCodeBuf[off++] = 0x0f; 7453 pCodeBuf[off++] = 0x3a; 7454 pCodeBuf[off++] = 0x22; 7455 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7456 pCodeBuf[off++] = 0x02; 7457 7458 /* pinsrd vecdst, gpr, #3 (ASSUMES SSE4.1). */ 7459 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7460 if (iVecRegDst >= 8 || iGprSrc >= 8) 7461 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7462 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7463 pCodeBuf[off++] = 0x0f; 7464 pCodeBuf[off++] = 0x3a; 7465 pCodeBuf[off++] = 0x22; 7466 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7467 pCodeBuf[off++] = 0x03; 7468 } 7420 /* vpbroadcastd {y,x}mm, xmm (ASSUMES AVX2). */ 7421 pCodeBuf[off++] = X86_OP_VEX3; 7422 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7423 | 0x02 /* opcode map. */ 7424 | ( iVecRegDst >= 8 7425 ? 0 7426 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R); 7427 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H); 7428 pCodeBuf[off++] = 0x58; 7429 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7); 7469 7430 #elif defined(RT_ARCH_ARM64) 7470 7431 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ … … 7489 7450 { 7490 7451 #ifdef RT_ARCH_AMD64 7491 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit);7452 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 12), off, iVecRegDst, iGprSrc, f256Bit); 7492 7453 #elif defined(RT_ARCH_ARM64) 7493 7454 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit); … … 7521 7482 pCodeBuf[off++] = 0x00; 7522 7483 7523 if (f256Bit) 7524 { 7525 /* When broadcasting the entire ymm register we can use vbroadcastsd now. */ 7526 /* vbroadcastsd ymm, xmm (ASSUMES AVX2). */ 7527 pCodeBuf[off++] = X86_OP_VEX3; 7528 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7529 | 0x02 /* opcode map. */ 7530 | ( iVecRegDst >= 8 7531 ? 0 7532 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R); 7533 pCodeBuf[off++] = 0x7d; 7534 pCodeBuf[off++] = 0x19; 7535 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7); 7536 } 7537 else 7538 { 7539 /* pinsrq vecdst, gpr, #1 (ASSUMES SSE4.1). */ 7540 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7541 pCodeBuf[off++] = X86_OP_REX_W 7542 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7543 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7544 pCodeBuf[off++] = 0x0f; 7545 pCodeBuf[off++] = 0x3a; 7546 pCodeBuf[off++] = 0x22; 7547 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7548 pCodeBuf[off++] = 0x01; 7549 } 7484 /* vpbroadcastq {y,x}mm, xmm (ASSUMES AVX2). */ 7485 pCodeBuf[off++] = X86_OP_VEX3; 7486 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7487 | 0x02 /* opcode map. */ 7488 | ( iVecRegDst >= 8 7489 ? 0 7490 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R); 7491 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, f256Bit, X86_OP_VEX3_BYTE2_P_066H); 7492 pCodeBuf[off++] = 0x59; 7493 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7); 7550 7494 #elif defined(RT_ARCH_ARM64) 7551 7495 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
Note:
See TracChangeset
for help on using the changeset viewer.