Changeset 103816 in vbox
- Timestamp:
- Mar 13, 2024 8:31:43 AM (9 months ago)
- Location:
- trunk/src/VBox/VMM
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py
r103815 r103816 2960 2960 'IEM_MC_BEGIN': (McBlock.parseMcBegin, False, False, True, ), 2961 2961 'IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ), 2962 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False,),2962 'IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd), 2963 2963 'IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, g_fNativeSimd), 2964 2964 'IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX': (McBlock.parseMcGeneric, True, True, False, ), -
trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h
r103815 r103816 6832 6832 6833 6833 6834 #define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \ 6835 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src) 6836 6837 /** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */ 6838 DECL_INLINE_THROW(uint32_t) 6839 iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar) 6840 { 6841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar); 6842 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t)); 6843 6844 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg), 6845 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite); 6846 6847 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off); 6848 6849 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/); 6850 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst); 6851 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXReg); 6852 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iXReg); 6853 6854 /* Free but don't flush the source register. */ 6855 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst); 6856 iemNativeVarRegisterRelease(pReNative, idxSrcVar); 6857 6858 return off; 6859 } 6860 6861 6834 6862 #define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \ 6835 6863 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src) -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r103815 r103816 7262 7262 { 7263 7263 #ifdef RT_ARCH_AMD64 7264 /* pinsr q vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */7264 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */ 7265 7265 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7266 7266 if (iVecRegDst >= 8 || iGprSrc >= 8) … … 7389 7389 #elif defined(RT_ARCH_ARM64) 7390 7390 off = iemNativeEmitSimdZeroVecRegHighU128Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecReg); 7391 #else 7392 # error "port me" 7393 #endif 7394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off); 7395 return off; 7396 } 7397 7398 7399 /** 7400 * Emits a vecdst = gprsrc broadcast, 32-bit. 7401 */ 7402 DECL_FORCE_INLINE(uint32_t) 7403 iemNativeEmitSimdBroadcastGprToVecRegU32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false) 7404 { 7405 #ifdef RT_ARCH_AMD64 7406 /** @todo If anyone has a better idea on how to do this more efficiently I'm all ears, 7407 * vbroadcast needs a memory operand or another xmm register to work... */ 7408 7409 /* pinsrd vecsrc, gpr, #0 (ASSUMES SSE4.1). */ 7410 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7411 if (iVecRegDst >= 8 || iGprSrc >= 8) 7412 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7413 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7414 pCodeBuf[off++] = 0x0f; 7415 pCodeBuf[off++] = 0x3a; 7416 pCodeBuf[off++] = 0x22; 7417 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7418 pCodeBuf[off++] = 0x00; 7419 7420 if (f256Bit) 7421 { 7422 /* When broadcasting the entire ymm register we can use vbroadcastss now. */ 7423 /* vbroadcastss ymm, xmm (ASSUMES AVX2). */ 7424 pCodeBuf[off++] = X86_OP_VEX3; 7425 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_X 7426 | ( iVecRegDst >= 8 7427 ? 0 7428 : X86_OP_VEX3_BYTE1_B | X86_OP_VEX3_BYTE1_R); 7429 pCodeBuf[off++] = 0x7d; 7430 pCodeBuf[off++] = 0x18; 7431 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iVecRegDst & 7); 7432 } 7433 else 7434 { 7435 /* pinsrd vecsrc, gpr, #1 (ASSUMES SSE4.1). */ 7436 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7437 if (iVecRegDst >= 8 || iGprSrc >= 8) 7438 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7439 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7440 pCodeBuf[off++] = 0x0f; 7441 pCodeBuf[off++] = 0x3a; 7442 pCodeBuf[off++] = 0x22; 7443 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7444 pCodeBuf[off++] = 0x00; 7445 7446 /* pinsrd vecsrc, gpr, #2 (ASSUMES SSE4.1). */ 7447 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7448 if (iVecRegDst >= 8 || iGprSrc >= 8) 7449 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7450 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7451 pCodeBuf[off++] = 0x0f; 7452 pCodeBuf[off++] = 0x3a; 7453 pCodeBuf[off++] = 0x22; 7454 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7455 pCodeBuf[off++] = 0x00; 7456 7457 /* pinsrd vecsrc, gpr, #3 (ASSUMES SSE4.1). */ 7458 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 7459 if (iVecRegDst >= 8 || iGprSrc >= 8) 7460 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 7461 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 7462 pCodeBuf[off++] = 0x0f; 7463 pCodeBuf[off++] = 0x3a; 7464 pCodeBuf[off++] = 0x22; 7465 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 7466 pCodeBuf[off++] = 0x00; 7467 } 7468 #elif defined(RT_ARCH_ARM64) 7469 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ 7470 Assert(!(iVecRegDst & 0x1) || !f256Bit); 7471 7472 /* dup vecsrc, gpr */ 7473 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst, iGprSrc, kArmv8InstrUmovInsSz_U32); 7474 if (f256Bit) 7475 pCodeBuf[off++] = Armv8A64MkVecInstrDup(iVecRegDst + 1, iGprSrc, kArmv8InstrUmovInsSz_U32); 7476 #else 7477 # error "port me" 7478 #endif 7479 return off; 7480 } 7481 7482 7483 /** 7484 * Emits a vecdst[x] = gprsrc broadcast, 32-bit. 7485 */ 7486 DECL_INLINE_THROW(uint32_t) 7487 iemNativeEmitSimdBroadcastGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, bool f256Bit = false) 7488 { 7489 #ifdef RT_ARCH_AMD64 7490 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 12 : 28), off, iVecRegDst, iGprSrc, f256Bit); 7491 #elif defined(RT_ARCH_ARM64) 7492 off = iemNativeEmitSimdBroadcastGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, f256Bit ? 2 : 1), off, iVecRegDst, iGprSrc, f256Bit); 7391 7493 #else 7392 7494 # error "port me"
Note:
See TracChangeset
for help on using the changeset viewer.