Changeset 104133 in vbox for trunk/src/VBox/VMM/include
- Timestamp:
- Apr 3, 2024 12:03:22 PM (10 months ago)
- Location:
- trunk/src/VBox/VMM/include
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/VBox/VMM/include/IEMInternal.h
r104132 r104133 3591 3591 /** @name Media (SSE/MMX/AVX) operation: Sort this later 3592 3592 * @{ */ 3593 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc));3594 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc));3595 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovshdup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc));3596 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovshdup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc));3597 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc));3598 IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc));3599 3600 3593 IEM_DECL_IMPL_DEF(void, iemAImpl_pmovsxbw_u128,(PRTUINT128U puDst, uint64_t uSrc)); 3601 3594 IEM_DECL_IMPL_DEF(void, iemAImpl_vpmovsxbw_u128,(PRTUINT128U puDst, uint64_t uSrc)); -
trunk/src/VBox/VMM/include/IEMMc.h
r104129 r104133 702 702 pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[iYRegDstTmp].au64[1] = (a_u256Src).au64[3]; \ 703 703 IEM_MC_INT_CLEAR_ZMM_256_UP(iYRegDstTmp); \ 704 } while (0) 705 #define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \ 706 do { uintptr_t const iYRegDstTmp = (a_iYRegDst); \ 707 if ((a_iDwDst) < 4) \ 708 pVCpu->cpum.GstCtx.XState.x87.aXMM[(iYRegDstTmp)].au32[(a_iDwDst)] = (a_u256Value).au32[(a_iDwSrc)]; \ 709 else \ 710 pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[(iYRegDstTmp)].au32[(a_iDwDst) - 4] = (a_u256Value).au32[(a_iDwSrc)]; \ 711 } while (0) 712 #define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \ 713 do { uintptr_t const iYRegDstTmp = (a_iYRegDst); \ 714 if ((a_iQwDst) < 2) \ 715 pVCpu->cpum.GstCtx.XState.x87.aXMM[(iYRegDstTmp)].au64[(a_iQwDst)] = (a_u256Value).au64[(a_iQwDst)]; \ 716 else \ 717 pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[(iYRegDstTmp)].au64[(a_iQwDst) - 4] = (a_u256Value).au64[(a_iQwDst)]; \ 718 } while (0) 719 #define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \ 720 do { uintptr_t const iYRegDstTmp = (a_iYRegDst); \ 721 if ((a_iQword) < 2) \ 722 pVCpu->cpum.GstCtx.XState.x87.aXMM[(iYRegDstTmp)].au64[(a_iQword)] = (a_u64Value); \ 723 else \ 724 pVCpu->cpum.GstCtx.XState.u.YmmHi.aYmmHi[(iYRegDstTmp)].au64[(a_iQword) - 2] = (a_u64Value); \ 704 725 } while (0) 705 726 … … 889 910 IEM_MC_INT_CLEAR_ZMM_256_UP(iYRegDstTmp); \ 890 911 } while (0) 912 913 #define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \ 914 do { IEM_MC_INT_CLEAR_ZMM_256_UP(a_iYReg); } while (0) 891 915 892 916 #ifndef IEM_WITH_SETJMP -
trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h
r104099 r104133 8286 8286 if (iQWord >= 2) 8287 8287 { 8288 /** @todo Currently not used. */ 8289 AssertReleaseFailed(); 8288 /* 8289 * vpextrq doesn't work on the upper 128-bits. 8290 * So we use the following sequence: 8291 * vextracti128 vectmp0, vecsrc, 1 8292 * pextrd gpr, vectmp0, #(iQWord - 2) 8293 */ 8294 /* vextracti128 */ 8295 pCodeBuf[off++] = X86_OP_VEX3; 8296 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8); 8297 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H); 8298 pCodeBuf[off++] = 0x39; 8299 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7); 8300 pCodeBuf[off++] = 0x1; 8301 8302 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */ 8303 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8304 pCodeBuf[off++] = X86_OP_REX_W 8305 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 8306 | (iGprDst < 8 ? 0 : X86_OP_REX_B); 8307 pCodeBuf[off++] = 0x0f; 8308 pCodeBuf[off++] = 0x3a; 8309 pCodeBuf[off++] = 0x16; 8310 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7); 8311 pCodeBuf[off++] = iQWord - 2; 8290 8312 } 8291 8313 else … … 8347 8369 if (iDWord >= 4) 8348 8370 { 8349 /** @todo Currently not used. */ 8350 AssertReleaseFailed(); 8371 /* 8372 * vpextrd doesn't work on the upper 128-bits. 8373 * So we use the following sequence: 8374 * vextracti128 vectmp0, vecsrc, 1 8375 * pextrd gpr, vectmp0, #(iDWord - 4) 8376 */ 8377 /* vextracti128 */ 8378 pCodeBuf[off++] = X86_OP_VEX3; 8379 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegSrc >= 8); 8380 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H); 8381 pCodeBuf[off++] = 0x39; 8382 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegSrc & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7); 8383 pCodeBuf[off++] = 0x1; 8384 8385 /* pextrd gpr, vecsrc, #iDWord (ASSUMES SSE4.1). */ 8386 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8387 if (iGprDst >= 8 || IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8) 8388 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 8389 | (iGprDst < 8 ? 0 : X86_OP_REX_B); 8390 pCodeBuf[off++] = 0x0f; 8391 pCodeBuf[off++] = 0x3a; 8392 pCodeBuf[off++] = 0x16; 8393 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprDst & 7); 8394 pCodeBuf[off++] = iDWord - 4; 8351 8395 } 8352 8396 else … … 8364 8408 } 8365 8409 #elif defined(RT_ARCH_ARM64) 8410 Assert(iDWord < 4); 8411 8366 8412 /* umov gprdst, vecsrc[iDWord] */ 8367 8413 pCodeBuf[off++] = Armv8A64MkVecInstrUmov(iGprDst, iVecRegSrc, iDWord, kArmv8InstrUmovInsSz_U32, false /*fDst64Bit*/); … … 8382 8428 8383 8429 #ifdef RT_ARCH_AMD64 8384 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iVecRegSrc, iDWord);8430 off = iemNativeEmitSimdLoadGprFromVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 15), off, iGprDst, iVecRegSrc, iDWord); 8385 8431 #elif defined(RT_ARCH_ARM64) 8386 8432 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */ … … 8527 8573 { 8528 8574 #ifdef RT_ARCH_AMD64 8529 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */ 8530 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8531 pCodeBuf[off++] = X86_OP_REX_W 8532 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 8533 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 8534 pCodeBuf[off++] = 0x0f; 8535 pCodeBuf[off++] = 0x3a; 8536 pCodeBuf[off++] = 0x22; 8537 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 8538 pCodeBuf[off++] = iQWord; 8575 if (iQWord >= 2) 8576 { 8577 /* 8578 * vpinsrq doesn't work on the upper 128-bits. 8579 * So we use the following sequence: 8580 * vextracti128 vectmp0, vecdst, 1 8581 * pinsrq vectmp0, gpr, #(iQWord - 2) 8582 * vinserti128 vecdst, vectmp0, 1 8583 */ 8584 /* vextracti128 */ 8585 pCodeBuf[off++] = X86_OP_VEX3; 8586 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8); 8587 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H); 8588 pCodeBuf[off++] = 0x39; 8589 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7); 8590 pCodeBuf[off++] = 0x1; 8591 8592 /* pinsrq */ 8593 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8594 pCodeBuf[off++] = X86_OP_REX_W 8595 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 8596 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 8597 pCodeBuf[off++] = 0x0f; 8598 pCodeBuf[off++] = 0x3a; 8599 pCodeBuf[off++] = 0x22; 8600 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7); 8601 pCodeBuf[off++] = iQWord - 2; 8602 8603 /* vinserti128 */ 8604 pCodeBuf[off++] = X86_OP_VEX3; 8605 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8); 8606 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H); 8607 pCodeBuf[off++] = 0x38; 8608 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7); 8609 pCodeBuf[off++] = 0x01; /* Immediate */ 8610 } 8611 else 8612 { 8613 /* pinsrq vecsrc, gpr, #iQWord (ASSUMES SSE4.1). */ 8614 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8615 pCodeBuf[off++] = X86_OP_REX_W 8616 | (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 8617 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 8618 pCodeBuf[off++] = 0x0f; 8619 pCodeBuf[off++] = 0x3a; 8620 pCodeBuf[off++] = 0x22; 8621 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 8622 pCodeBuf[off++] = iQWord; 8623 } 8539 8624 #elif defined(RT_ARCH_ARM64) 8540 8625 /* ins vecsrc[iQWord], gpr */ … … 8553 8638 iemNativeEmitSimdStoreGprToVecRegU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iQWord) 8554 8639 { 8555 Assert(iQWord <= 1); 8556 8557 #ifdef RT_ARCH_AMD64 8558 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iQWord); 8559 #elif defined(RT_ARCH_ARM64) 8560 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord); 8640 Assert(iQWord <= 3); 8641 8642 #ifdef RT_ARCH_AMD64 8643 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iQWord); 8644 #elif defined(RT_ARCH_ARM64) 8645 Assert(!(iVecRegDst & 0x1)); 8646 if (iQWord >= 2) 8647 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iQWord - 2); 8648 else 8649 off = iemNativeEmitSimdStoreGprToVecRegU64Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iQWord); 8561 8650 #else 8562 8651 # error "port me" … … 8574 8663 { 8575 8664 #ifdef RT_ARCH_AMD64 8576 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */ 8577 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8578 if (iVecRegDst >= 8 || iGprSrc >= 8) 8579 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 8580 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 8581 pCodeBuf[off++] = 0x0f; 8582 pCodeBuf[off++] = 0x3a; 8583 pCodeBuf[off++] = 0x22; 8584 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 8585 pCodeBuf[off++] = iDWord; 8665 if (iDWord >= 4) 8666 { 8667 /* 8668 * vpinsrq doesn't work on the upper 128-bits. 8669 * So we use the following sequence: 8670 * vextracti128 vectmp0, vecdst, 1 8671 * pinsrd vectmp0, gpr, #(iDword - 4) 8672 * vinserti128 vecdst, vectmp0, 1 8673 */ 8674 /* vextracti128 */ 8675 pCodeBuf[off++] = X86_OP_VEX3; 8676 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8); 8677 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false, true, X86_OP_VEX3_BYTE2_P_066H); 8678 pCodeBuf[off++] = 0x39; 8679 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7); 8680 pCodeBuf[off++] = 0x1; 8681 8682 /* pinsrd */ 8683 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8684 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || iGprSrc >= 8) 8685 pCodeBuf[off++] = (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R) 8686 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 8687 pCodeBuf[off++] = 0x0f; 8688 pCodeBuf[off++] = 0x3a; 8689 pCodeBuf[off++] = 0x22; 8690 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, iGprSrc & 7); 8691 pCodeBuf[off++] = iDWord - 4; 8692 8693 /* vinserti128 */ 8694 pCodeBuf[off++] = X86_OP_VEX3; 8695 pCodeBuf[off++] = X86_OP_VEX3_BYTE1_MAKE(0x3, IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8, false, iVecRegDst >= 8); 8696 pCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE(false, iVecRegDst, true, X86_OP_VEX3_BYTE2_P_066H); 8697 pCodeBuf[off++] = 0x38; 8698 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7); 8699 pCodeBuf[off++] = 0x01; /* Immediate */ 8700 } 8701 else 8702 { 8703 /* pinsrd vecsrc, gpr, #iDWord (ASSUMES SSE4.1). */ 8704 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP; 8705 if (iVecRegDst >= 8 || iGprSrc >= 8) 8706 pCodeBuf[off++] = (iVecRegDst < 8 ? 0 : X86_OP_REX_R) 8707 | (iGprSrc < 8 ? 0 : X86_OP_REX_B); 8708 pCodeBuf[off++] = 0x0f; 8709 pCodeBuf[off++] = 0x3a; 8710 pCodeBuf[off++] = 0x22; 8711 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iVecRegDst & 7, iGprSrc & 7); 8712 pCodeBuf[off++] = iDWord; 8713 } 8586 8714 #elif defined(RT_ARCH_ARM64) 8587 8715 /* ins vecsrc[iDWord], gpr */ … … 8600 8728 iemNativeEmitSimdStoreGprToVecRegU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iVecRegDst, uint8_t iGprSrc, uint8_t iDWord) 8601 8729 { 8602 Assert(iDWord <= 3); 8603 8604 #ifdef RT_ARCH_AMD64 8605 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 7), off, iVecRegDst, iGprSrc, iDWord); 8606 #elif defined(RT_ARCH_ARM64) 8607 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord); 8730 Assert(iDWord <= 7); 8731 8732 #ifdef RT_ARCH_AMD64 8733 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 19), off, iVecRegDst, iGprSrc, iDWord); 8734 #elif defined(RT_ARCH_ARM64) 8735 Assert(!(iVecRegDst & 0x1)); 8736 if (iDWord >= 4) 8737 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst + 1, iGprSrc, iDWord - 4); 8738 else 8739 off = iemNativeEmitSimdStoreGprToVecRegU32Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iVecRegDst, iGprSrc, iDWord); 8608 8740 #else 8609 8741 # error "port me"
Note:
See TracChangeset
for help on using the changeset viewer.