Changeset 96247 in vbox

Timestamp:

Aug 17, 2022 9:08:30 AM (2 years ago)

Author:

vboxsync

Message:

VMM/IEM: Start implementing floating point SSE instructions using addps, added some new infrastructure bits (mostly untested), bugref:9898

Location:

trunk/src/VBox/VMM

Files:

: 7 edited

VMMAll/IEMAll.cpp (modified) (2 diffs)
VMMAll/IEMAllAImpl.asm (modified) (1 diff)
VMMAll/IEMAllAImplC.cpp (modified) (1 diff)
VMMAll/IEMAllInstructionsTwoByte0f.cpp.h (modified) (2 diffs)
include/IEMInternal.h (modified) (3 diffs)
include/IEMMc.h (modified) (7 diffs)
testcase/tstIEMCheckMc.cpp (modified) (5 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/VBox/VMM/VMMAll/IEMAll.cpp

-              r95575
+              r96247
+/** \#XF(0)/\#XM(0) - 19.   */
+VBOXSTRICTRC iemRaiseSimdFpException(PVMCPUCC pVCpu) RT_NOEXCEPT
+{
+    return iemRaiseXcptOrInt(pVCpu, 0, X86_XCPT_XF, IEM_XCPT_FLAGS_T_CPU_XCPT, 0, 0);
+}
 /** Accessed via IEMOP_RAISE_DIVIDE_ERROR.   */
 IEM_CIMPL_DEF_0(iemCImplRaiseDivideError)
 …
     iemFpuUpdateOpcodeAndIpWorker(pVCpu, pFpuCtx);
     iemFpuStackPushOverflowOnly(pVCpu, pFpuCtx);
+}
+/** @}  */
+/** @name   SSE+AVX SIMD access and helpers.
+ *
+ * @{
+ */
+/**
+ * Stores a result in a SIMD XMM register, updates the MXCSR.
+ *
+ * @param   pVCpu               The cross context virtual CPU structure of the calling thread.
+ * @param   pResult             The result to store.
+ * @param   iXmmReg             Which SIMD XMM register to store the result in.
+ */
+void iemSseStoreResult(PVMCPUCC pVCpu, PCIEMSSERESULT pResult, uint8_t iXmmReg) RT_NOEXCEPT
+{
+    PX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
+    pFpuCtx->MXCSR |= pResult->MXCSR & X86_MXCSR_XCPT_FLAGS;
+    pVCpu->cpum.GstCtx.XState.x87.aXMM[iXmmReg] = pResult->uResult;
+}

trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm

-              r96130
+              r96247
 IEMIMPL_V_PMOV_SZ_X pmovzxdq
+;;
+; Need to move this as well somewhere better?
+;
+struc IEMSSERESULT
+    .uResult      resd 4
+    .MXCSR        resd 1
+endstruc
+;;
+; Need to move this as well somewhere better?
+;
+struc IEMAVX128RESULT
+    .uResult      resd 4
+    .MXCSR        resd 1
+endstruc
+;;
+; Need to move this as well somewhere better?
+;
+struc IEMAVX256RESULT
+    .uResult      resd 8
+    .MXCSR        resd 1
+endstruc
+;;
+; Initialize the SSE MXCSR register using the guest value partially to
+; account for rounding mode.
+;
+; @uses     4 bytes of stack to save the original value, T0.
+; @param    1       Expression giving the address of the FXSTATE of the guest.
+;
+%macro SSE_LD_FXSTATE_MXCSR 1
+        sub     xSP, 4
+        stmxcsr [xSP]
+        mov     T0_32, [%1 + X86FXSTATE.MXCSR]
+        and     T0_32, X86_MXCSR_FZ | X86_MXCSR_RC_MASK | X86_MXCSR_DAZ
+        or      T0_32, X86_MXCSR_XCPT_MASK
+        sub     xSP, 4
+        mov     [xSP], T0_32
+        ldmxcsr [xSP]
+        add     xSP, 4
+%endmacro
+;;
+; Restores the SSE MXCSR register with the original value.
+;
+; @uses     4 bytes of stack to save the content of MXCSR value, T0, T1.
+; @param    1       Expression giving the address where to return the MXCSR value.
+; @param    2       Expression giving the address of the FXSTATE of the guest.
+;
+; @note Restores the stack pointer.
+;
+%macro SSE_ST_FXSTATE_MXCSR 2
+        sub     xSP, 4
+        stmxcsr [xSP]
+        mov     T0_32, [xSP]
+        add     xSP, 4
+        ; Merge the status bits into the original MXCSR value.
+        mov     T1_32, [%2 + X86FXSTATE.MXCSR]
+        and     T0_32, X86_MXCSR_XCPT_FLAGS
+        or      T0_32, T1_32
+        mov     [%1 + IEMSSERESULT.MXCSR], T0_32
+        ldmxcsr [xSP]
+        add     xSP, 4
+%endmacro
+;;
+; Initialize the SSE MXCSR register using the guest value partially to
+; account for rounding mode.
+;
+; @uses     4 bytes of stack to save the original value.
+; @param    1       Expression giving the address of the FXSTATE of the guest.
+;
+%macro AVX_LD_XSAVEAREA_MXCSR 1
+        sub     xSP, 4
+        stmxcsr [xSP]
+        mov     T0_32, [%1 + X86FXSTATE.MXCSR]
+        and     T0_32, X86_MXCSR_FZ | X86_MXCSR_RC_MASK | X86_MXCSR_DAZ
+        sub     xSP, 4
+        mov     [xSP], T0_32
+        ldmxcsr [xSP]
+        add     xSP, 4
+%endmacro
+;;
+; Restores the AVX128 MXCSR register with the original value.
+;
+; @param    1       Expression giving the address where to return the MXCSR value.
+;
+; @note Restores the stack pointer.
+;
+%macro AVX128_ST_XSAVEAREA_MXCSR 1
+        stmxcsr [%1 + IEMAVX128RESULT.MXCSR]
+        ldmxcsr [xSP]
+        add     xSP, 4
+%endmacro
+;;
+; Restores the AVX256 MXCSR register with the original value.
+;
+; @param    1       Expression giving the address where to return the MXCSR value.
+;
+; @note Restores the stack pointer.
+;
+%macro AVX256_ST_XSAVEAREA_MXCSR 1
+        stmxcsr [%1 + IEMAVX256RESULT.MXCSR]
+        ldmxcsr [xSP]
+        add     xSP, 4
+%endmacro
+;;
+; Floating point instruction working on two full sized registers.
+;
+; @param    1       The instruction
+;
+; @param    A0      FPU context (FXSTATE or XSAVEAREA).
+; @param    A1      Where to return the result including the MXCSR value.
+; @param    A2      Pointer to the first media register size operand (input/output).
+; @param    A3      Pointer to the second media register size operand (input).
+;
+%macro IEMIMPL_FP_F2 1
+BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
+        PROLOGUE_4_ARGS
+        IEMIMPL_SSE_PROLOGUE
+        SSE_LD_FXSTATE_MXCSR A0
+        movdqu   xmm0, [A2]
+        movdqu   xmm1, [A3]
+        %1       xmm0, xmm1
+        movdqu   [A1 + IEMSSERESULT.uResult], xmm0
+        SSE_ST_FXSTATE_MXCSR A1, A0
+        IEMIMPL_SSE_PROLOGUE
+        EPILOGUE_4_ARGS
+ENDPROC iemAImpl_ %+ %1 %+ _u128
+BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u128, 12
+        PROLOGUE_4_ARGS
+        IEMIMPL_AVX_PROLOGUE
+        AVX_LD_XSAVEAREA_MXCSR A0
+        vmovdqu  xmm0, [A2]
+        vmovdqu  xmm1, [A3]
+        v %+ %1  xmm0, xmm0, xmm1
+        vmovdqu  [A1 + IEMAVX128RESULT.uResult], xmm0
+        AVX128_ST_XSAVEAREA_MXCSR A1
+        IEMIMPL_AVX_PROLOGUE
+        EPILOGUE_4_ARGS
+ENDPROC iemAImpl_v %+ %1 %+ _u128
+BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u256, 12
+        PROLOGUE_4_ARGS
+        IEMIMPL_AVX_PROLOGUE
+        AVX_LD_XSAVEAREA_MXCSR A0
+        vmovdqu  ymm0, [A2]
+        vmovdqu  ymm1, [A3]
+        v %+ %1  ymm0, ymm0, ymm1
+        vmovdqu  [A1 + IEMAVX256RESULT.uResult], ymm0
+        AVX256_ST_XSAVEAREA_MXCSR A1
+        IEMIMPL_AVX_PROLOGUE
+        EPILOGUE_4_ARGS
+ENDPROC iemAImpl_v %+ %1 %+ _u256
+%endmacro
+IEMIMPL_FP_F2 addps

trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

-              r96115
+              r96247
+}
+/**
+ * Converts from the packed IPRT 32-bit (single precision) floating point format to
+ * the SoftFloat 32-bit floating point format (float32_t).
+ *
+ * This is only a structure format conversion, nothing else.
+ */
+DECLINLINE(float32_t) iemFpSoftF32FromIprt(PCRTFLOAT32U pr32Val)
+{
+    float32_t Tmp;
+    Tmp.v = pr32Val->u;
+    return Tmp;
+}
+/**
+ * Converts from SoftFloat 32-bit floating point format (float32_t)
+ * to the packed IPRT 32-bit floating point (RTFLOAT32U) format.
+ *
+ * This is only a structure format conversion, nothing else.
+ */
+DECLINLINE(PRTFLOAT32U) iemFpSoftF32ToIprt(PRTFLOAT32U pr32Dst, float32_t const r32XSrc)
+{
+    pr32Dst->u = r32XSrc.v;
+    return pr32Dst;
+}
+/**
+ * Converts from the packed IPRT 64-bit (single precision) floating point format to
+ * the SoftFloat 64-bit floating point format (float64_t).
+ *
+ * This is only a structure format conversion, nothing else.
+ */
+DECLINLINE(float64_t) iemFpSoftF64FromIprt(PCRTFLOAT64U pr64Val)
+{
+    float64_t Tmp;
+    Tmp.v = pr64Val->u;
+    return Tmp;
+}
+/**
+ * Converts from SoftFloat 64-bit floating point format (float64_t)
+ * to the packed IPRT 64-bit floating point (RTFLOAT64U) format.
+ *
+ * This is only a structure format conversion, nothing else.
+ */
+DECLINLINE(PRTFLOAT64U) iemFpSoftF64ToIprt(PRTFLOAT64U pr64Dst, float64_t const r64XSrc)
+{
+    pr64Dst->u = r64XSrc.v;
+    return pr64Dst;
+}
+/** Initializer for the SoftFloat state structure. */
+# define IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_MXCSR(a_Mxcsr) \
+    { \
+        softfloat_tininess_afterRounding, \
+          ((a_Mxcsr) & X86_MXCSR_RC_MASK) == X86_MXCSR_RC_NEAREST ? (uint8_t)softfloat_round_near_even \
+        : ((a_Mxcsr) & X86_MXCSR_RC_MASK) == X86_MXCSR_RC_UP      ? (uint8_t)softfloat_round_max \
+        : ((a_Mxcsr) & X86_MXCSR_RC_MASK) == X86_MXCSR_RC_DOWN    ? (uint8_t)softfloat_round_min \
+        :                                                           (uint8_t)softfloat_round_minMag, \
+, \
+        (uint8_t)(((a_Mxcsr) & X86_MXCSR_XCPT_MASK) >> X86_MXCSR_XCPT_MASK_SHIFT), /* Matches X86_FSW_?E */\
+/* Rounding precision, not relevant for SIMD. */ \
+    }
+/**
+ * Helper for transfering exception to MXCSR and setting the result value
+ * accordingly.
+ *
+ * @returns Updated MXCSR.
+ * @param   pSoftState      The SoftFloat state following the operation.
+ * @param   r32Result       The result of the SoftFloat operation.
+ * @param   pr32Result      Where to store the result for IEM.
+ * @param   fMxcsr          The original MXCSR value.
+ * @param   pr32Src1        The first source operand (for setting #DE under certain circumstances).
+ * @param   pr32Src2        The second source operand (for setting #DE under certain circumstances).
+ */
+DECLINLINE(uint32_t) iemSseSoftStateAndR32ToMxcsrAndIprtResult(softfloat_state_t const *pSoftState, float32_t r32Result,
+                                                               PRTFLOAT32U pr32Result, uint32_t fMxcsr,
+                                                               PCRTFLOAT32U pr32Src1, PCRTFLOAT32U pr32Src2)
+{
+    uint8_t fXcpt = pSoftState->exceptionFlags;
+    if (   (fMxcsr & X86_MXCSR_FZ)
+        && RTFLOAT32U_IS_SUBNORMAL((PRTFLOAT32U)&r32Result))
+    {
+        /* Underflow masked and flush to zero is set. */
+        iemFpSoftF32ToIprt(pr32Result, r32Result);
+        pr32Result->s.uFraction = 0;
+        pr32Result->s.uExponent = 0;
+        fXcpt |= X86_MXCSR_UE | X86_MXCSR_PE;
+    }
+    else
+        iemFpSoftF32ToIprt(pr32Result, r32Result);
+    /* If DAZ is set \#DE is never set. */
+    if (fMxcsr & X86_MXCSR_DAZ)
+        fXcpt &= ~X86_MXCSR_DE;
+    else /* Need to set \#DE when either the result or one of the source operands is a De-normal (softfloat doesn't do this always). */
+        fXcpt |=   (   RTFLOAT32U_IS_SUBNORMAL(pr32Result)
+                    || RTFLOAT32U_IS_SUBNORMAL(pr32Src1)
+                    || RTFLOAT32U_IS_SUBNORMAL(pr32Src2))
+                 ? X86_MXCSR_DE
+                 : 0;
+    return fMxcsr | (fXcpt & X86_MXCSR_XCPT_FLAGS);
+}
+#ifdef IEM_WITHOUT_ASSEMBLY
+/**
+ * Sets the given floating point input value to the given output taking the Denormals-as-zero flag
+ * in MXCSR into account.
+ *
+ * @returns nothing.
+ * @param   pr32Val         Where to store the result.
+ * @param   fMxcsr          The input MXCSR value.
+ * @param   pr32Src         The value to use.
+ */
+DECLINLINE(void) iemSsePrepareValueR32(PRTFLOAT32U pr32Val, uint32_t fMxcsr, PCRTFLOAT32U pr32Src)
+{
+    /* De-normals are changed to 0. */
+    if (   fMxcsr & X86_MXCSR_DAZ
+        && RTFLOAT32U_IS_SUBNORMAL(pr32Src))
+    {
+        pr32Val->s.fSign     = pr32Src->s.fSign;
+        pr32Val->s.uFraction = 0;
+        pr32Val->s.uExponent = 0;
+    }
+    else
+        *pr32Val = *pr32Src;
+}
+/**
+ * Validates the given input operands returning whether the operation can continue or whether one
+ * of the source operands contains a NaN value, setting the output accordingly.
+ *
+ * @returns Flag whether the operation can continue (true) or whether a NaN value was detected in one of the operands (false).
+ * @param   pr32Res         Where to store the result in case the operation can't continue.
+ * @param   pr32Val1        The first input operand.
+ * @param   pr32Val2        The second input operand.
+ * @param   pfMxcsr         Where to return the modified MXCSR state when false is returned.
+ */
+DECLINLINE(bool) iemSseCheckInputBinaryR32(PRTFLOAT32U pr32Res, PCRTFLOAT32U pr32Val1, PCRTFLOAT32U pr32Val2, uint32_t *pfMxcsr)
+{
+    uint8_t cQNan = RTFLOAT32U_IS_QUIET_NAN(pr32Val1) + RTFLOAT32U_IS_QUIET_NAN(pr32Val2);
+    uint8_t cSNan = RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val1) + RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val2);
+    if (cSNan + cQNan == 2)
+    {
+        /* Both values are either SNan or QNan, first operand is placed into the result and converted to a QNan. */
+        *pr32Res = *pr32Val1;
+        pr32Res->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
+        *pfMxcsr |= (cSNan ? X86_MXCSR_IE : 0);
+        return false;
+    }
+    else if (cSNan)
+    {
+        /* One operand is an SNan and placed into the result, converting it to a QNan. */
+        *pr32Res = RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val1) ? *pr32Val1 : *pr32Val2;
+        pr32Res->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
+        *pfMxcsr |= X86_MXCSR_IE;
+        return false;
+    }
+    else if (cQNan)
+    {
+        /* The QNan operand is placed into the result. */
+        *pr32Res = RTFLOAT32U_IS_QUIET_NAN(pr32Val1) ? *pr32Val1 : *pr32Val2;
+        return false;
+    }
+    Assert(!cQNan && !cSNan);
+    return true;
+}
+#endif
+/**
+ * ADDPS
+ */
+#ifdef IEM_WITHOUT_ASSEMBLY
+static uint32_t iemAImpl_addps_u128_worker(PRTFLOAT32U pr32Res, uint32_t fMxcsr, PCRTFLOAT32U pr32Val1, PCRTFLOAT32U pr32Val2)
+{
+    if (!iemSseCheckInputBinaryR32(pr32Res, pr32Val1, pr32Val2, &fMxcsr))
+        return fMxcsr;
+    RTFLOAT32U r32Src1, r32Src2;
+    iemSsePrepareValueR32(&r32Src1, fMxcsr, pr32Val1);
+    iemSsePrepareValueR32(&r32Src2, fMxcsr, pr32Val2);
+    softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_MXCSR(fMxcsr);
+    float32_t r32Result = f32_add(iemFpSoftF32FromIprt(&r32Src1), iemFpSoftF32FromIprt(&r32Src2), &SoftState);
+    return iemSseSoftStateAndR32ToMxcsrAndIprtResult(&SoftState, r32Result, pr32Res, fMxcsr, &r32Src1, &r32Src2);
+}
+IEM_DECL_IMPL_DEF(void, iemAImpl_addps_u128,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2))
+{
+    pResult->MXCSR |= iemAImpl_addps_u128_worker(&pResult->uResult.ar32[0], pFpuState->MXCSR, &puSrc1->ar32[0], &puSrc2->ar32[0]);
+    pResult->MXCSR |= iemAImpl_addps_u128_worker(&pResult->uResult.ar32[1], pFpuState->MXCSR, &puSrc1->ar32[1], &puSrc2->ar32[1]);
+    pResult->MXCSR |= iemAImpl_addps_u128_worker(&pResult->uResult.ar32[2], pFpuState->MXCSR, &puSrc1->ar32[2], &puSrc2->ar32[2]);
+    pResult->MXCSR |= iemAImpl_addps_u128_worker(&pResult->uResult.ar32[3], pFpuState->MXCSR, &puSrc1->ar32[3], &puSrc2->ar32[3]);
+}
+#endif

trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h

-              r96109
+              r96247
         IEM_MC_REF_XREG_U128(puDst, IEM_GET_MODRM_REG(pVCpu, bRm));
         IEM_MC_CALL_VOID_AIMPL_2(pfnU128, puDst, puSrc);
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    return VINF_SUCCESS;
+}
+/**
+ * Common worker for SSE instructions on the forms:
+ *      pxx{s,d}    xmm1, xmm2/mem128
+ *
+ * Proper alignment of the 128-bit operand is enforced.
+ * Exceptions type 2. SSE cpuid checks.
+ *
+ * @sa iemOpCommonSse41_FullFull_To_Full, iemOpCommonSse2_FullFull_To_Full
+ */
+FNIEMOP_DEF_1(iemOpCommonSseFp_FullFull_To_Full, PFNIEMAIMPLFPSSEF2U128, pfnU128)
+{
+    uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm);
+    if (IEM_IS_MODRM_REG_MODE(bRm))
+    {
+        /*
+         * Register, register.
+         */
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_BEGIN(3, 1);
+        IEM_MC_LOCAL(IEMSSERESULT,          SseRes);
+        IEM_MC_ARG_LOCAL_REF(PIEMSSERESULT, pSseRes,        SseRes,     0);
+        IEM_MC_ARG(PCX86XMMREG,             pSrc1,                      1);
+        IEM_MC_ARG(PCX86XMMREG,             pSrc2,                      2);
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_XMM_CONST(pSrc1, IEM_GET_MODRM_REG(pVCpu, bRm));
+        IEM_MC_REF_XREG_XMM_CONST(pSrc2, IEM_GET_MODRM_RM(pVCpu, bRm));
+        IEM_MC_CALL_SSE_AIMPL_3(pfnU128, pSseRes, pSrc1, pSrc2);
+        IEM_MC_STORE_SSE_RESULT(SseRes, IEM_GET_MODRM_REG(pVCpu, bRm));
+        IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT();
+        IEM_MC_ADVANCE_RIP();
+        IEM_MC_END();
+    }
+    else
+    {
+        /*
+         * Register, memory.
+         */
+        IEM_MC_BEGIN(3, 2);
+        IEM_MC_LOCAL(IEMSSERESULT,          SseRes);
+        IEM_MC_LOCAL(X86XMMREG,             uSrc2);
+        IEM_MC_ARG_LOCAL_REF(PIEMSSERESULT, pSseRes,        SseRes,     0);
+        IEM_MC_ARG(PCX86XMMREG,             pSrc1,                      1);
+        IEM_MC_ARG_LOCAL_REF(PCX86XMMREG,   pSrc2, uSrc2,               2);
+        IEM_MC_LOCAL(RTGCPTR, GCPtrEffSrc);
+        IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0);
+        IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
+        IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT();
+        IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(uSrc2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc);
+        IEM_MC_PREPARE_SSE_USAGE();
+        IEM_MC_REF_XREG_XMM_CONST(pSrc1, IEM_GET_MODRM_REG(pVCpu, bRm));
+        IEM_MC_CALL_SSE_AIMPL_3(pfnU128, pSseRes, pSrc1, pSrc2);
+        IEM_MC_STORE_SSE_RESULT(SseRes, IEM_GET_MODRM_REG(pVCpu, bRm));
+        IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT();
         IEM_MC_ADVANCE_RIP();
 …
 /** Opcode      0x0f 0x58 - addps Vps, Wps */
+FNIEMOP_STUB(iemOp_addps_Vps_Wps);
+FNIEMOP_DEF(iemOp_addps_Vps_Wps)
+{
+    IEMOP_MNEMONIC2(RM, ADDPS, addps, Vps, Wps, DISOPTYPE_HARMLESS, 0);
+    return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_addps_u128);
+}
 /** Opcode 0x66 0x0f 0x58 - addpd Vpd, Wpd */
 FNIEMOP_STUB(iemOp_addpd_Vpd_Wpd);

trunk/src/VBox/VMM/include/IEMInternal.h

-              r96115
+              r96247
+/** @name SSE/AVX single/double precision floating point operations.
+ * @{ */
+/**
+ * A SSE result.
+ */
+typedef struct IEMSSERESULT
+{
+    /** The output value. */
+    X86XMMREG       uResult;
+    /** The output status. */
+    uint32_t        MXCSR;
+} IEMSSERESULT;
+AssertCompileMemberOffset(IEMSSERESULT, MXCSR, 128 / 8);
+/** Pointer to a SSE result. */
+typedef IEMSSERESULT *PIEMSSERESULT;
+/** Pointer to a const SSE result. */
+typedef IEMSSERESULT const *PCIEMSSERESULT;
+/**
+ * A AVX128 result.
+ */
+typedef struct IEMAVX128RESULT
+{
+    /** The output value. */
+    X86XMMREG       uResult;
+    /** The output status. */
+    uint32_t        MXCSR;
+} IEMAVX128RESULT;
+AssertCompileMemberOffset(IEMAVX128RESULT, MXCSR, 128 / 8);
+/** Pointer to a AVX128 result. */
+typedef IEMAVX128RESULT *PIEMAVX128RESULT;
+/** Pointer to a const AVX128 result. */
+typedef IEMAVX128RESULT const *PCIEMAVX128RESULT;
+/**
+ * A AVX256 result.
+ */
+typedef struct IEMAVX256RESULT
+{
+    /** The output value. */
+    X86YMMREG       uResult;
+    /** The output status. */
+    uint32_t        MXCSR;
+} IEMAVX256RESULT;
+AssertCompileMemberOffset(IEMAVX256RESULT, MXCSR, 256 / 8);
+/** Pointer to a AVX256 result. */
+typedef IEMAVX256RESULT *PIEMAVX256RESULT;
+/** Pointer to a const AVX256 result. */
+typedef IEMAVX256RESULT const *PCIEMAVX256RESULT;
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPSSEF2U128,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2));
+typedef FNIEMAIMPLFPSSEF2U128  *PFNIEMAIMPLFPSSEF2U128;
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPAVXF3U128,(PX86XSAVEAREA pExtState, PIEMAVX128RESULT pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2));
+typedef FNIEMAIMPLFPAVXF3U128  *PFNIEMAIMPLFPAVXF3U128;
+typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPAVXF3U256,(PX86XSAVEAREA pExtState, PIEMAVX256RESULT pResult, PCX86YMMREG puSrc1, PCX86YMMREG puSrc2));
+typedef FNIEMAIMPLFPAVXF3U256  *PFNIEMAIMPLFPAVXF3U256;
+FNIEMAIMPLFPSSEF2U128 iemAImpl_addps_u128;
+FNIEMAIMPLFPAVXF3U128 iemAImpl_vaddps_u128, iemAImpl_vaddps_u128_fallback;
+FNIEMAIMPLFPAVXF3U256 iemAImpl_vaddps_u256, iemAImpl_vaddps_u256_fallback;
+/** @} */
 /** @name C instruction implementations for anything slightly complicated.
  * @{ */
 …
 DECL_NO_RETURN(void)    iemRaiseAlignmentCheckExceptionJmp(PVMCPUCC pVCpu)  RT_NOEXCEPT;
 #endif
+VBOXSTRICTRC            iemRaiseSimdFpException(PVMCPUCC pVCpu) RT_NOEXCEPT;
 IEM_CIMPL_DEF_0(iemCImplRaiseDivideError);
 …
 void            iemFpuStackPushOverflow(PVMCPUCC pVCpu) RT_NOEXCEPT;
 void            iemFpuStackPushOverflowWithMemOp(PVMCPUCC pVCpu, uint8_t iEffSeg, RTGCPTR GCPtrEff) RT_NOEXCEPT;
+/** @} */
+/** @name SSE+AVX SIMD access and helpers.
+ * @{ */
+void            iemSseStoreResult(PVMCPUCC pVCpu, PCIEMSSERESULT pResult, uint8_t iXmmReg) RT_NOEXCEPT;
 /** @} */

trunk/src/VBox/VMM/include/IEMMc.h

-              r95540
+              r96247
         if (!IEM_IS_CANONICAL(a_u64Addr)) \
             return iemRaiseGeneralProtectionFault0(pVCpu); \
+    } while (0)
+#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
+    do { \
+        if ((  ((pVCpu->cpum.GstCtx.XState.x87.MXCSR & X86_MXCSR_XCPT_MASK) >> X86_MXCSR_XCPT_MASK_SHIFT) \
+             & (pVCpu->cpum.GstCtx.XState.x87.MXCSR & X86_MXCSR_XCPT_FLAGS)) != 0) \
+        { \
+            if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)\
+                return iemRaiseSimdFpException(pVCpu); \
+            else \
+                return iemRaiseUndefinedOpcode(pVCpu); \
+        } \
     } while (0)
 …
 #define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
     (a_pu128Dst) = ((PCRTUINT128U)&pVCpu->cpum.GstCtx.XState.x87.aXMM[(a_iXReg)].uXmm)
+#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
+    (a_pXmmDst) = (&pVCpu->cpum.GstCtx.XState.x87.aXMM[(a_iXReg)])
 #define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
     (a_pu64Dst) = ((uint64_t const *)&pVCpu->cpum.GstCtx.XState.x87.aXMM[(a_iXReg)].au64[0])
 …
 # define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
     IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU128AlignedSse(pVCpu, &(a_u128Dst), (a_iSeg), (a_GCPtrMem)))
+# define IEM_MC_FETCH_MEM_XMM(a_XmmDst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU128(pVCpu, &(a_XmmDst).uXmm, (a_iSeg), (a_GCPtrMem)))
+# define IEM_MC_FETCH_MEM_XMM_NO_AC(a_XmmDst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU128(pVCpu, &(a_XmmDst).uXmm, (a_iSeg), (a_GCPtrMem)))
+# define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_XmmDst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU128AlignedSse(pVCpu, &(a_XmmDst).uXmm, (a_iSeg), (a_GCPtrMem)))
 #else
 # define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
 …
 # define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
     iemMemFetchDataU128AlignedSseJmp(pVCpu, &(a_u128Dst), (a_iSeg), (a_GCPtrMem))
+# define IEM_MC_FETCH_MEM_XMM(a_XmmDst, a_iSeg, a_GCPtrMem) \
+    iemMemFetchDataU128Jmp(pVCpu, &(a_XmmDst).uXmm, (a_iSeg), (a_GCPtrMem))
+# define IEM_MC_FETCH_MEM_XMM_NO_AC(a_XmmDst, a_iSeg, a_GCPtrMem) \
+    iemMemFetchDataU128Jmp(pVCpu, &(a_XmmDst).uXmm, (a_iSeg), (a_GCPtrMem))
+# define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_XmmDst, a_iSeg, a_GCPtrMem) \
+    iemMemFetchDataU128AlignedSseJmp(pVCpu, &(a_XmmDst).uXmm, (a_iSeg), (a_GCPtrMem))
 #endif
 …
 # define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
     IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU256AlignedSse(pVCpu, &(a_u256Dst), (a_iSeg), (a_GCPtrMem)))
+# define IEM_MC_FETCH_MEM_YMM(a_YmmDst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU256(pVCpu, &(a_YmmDst).ymm, (a_iSeg), (a_GCPtrMem)))
+# define IEM_MC_FETCH_MEM_YMM_NO_AC(a_YmmDst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU256(pVCpu, &(a_YmmDst).ymm, (a_iSeg), (a_GCPtrMem)))
+# define IEM_MC_FETCH_MEM_YMM_ALIGN_AVX(a_YmmDst, a_iSeg, a_GCPtrMem) \
+    IEM_MC_RETURN_ON_FAILURE(iemMemFetchDataU256AlignedSse(pVCpu, &(a_YmmDst).ymm, (a_iSeg), (a_GCPtrMem)))
 #else
 # define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
 …
 # define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
     iemMemFetchDataU256AlignedSseJmp(pVCpu, &(a_u256Dst), (a_iSeg), (a_GCPtrMem))
+# define IEM_MC_FETCH_MEM_YMM(a_YmmDst, a_iSeg, a_GCPtrMem) \
+    iemMemFetchDataU256Jmp(pVCpu, &(a_YmmDst).ymm, (a_iSeg), (a_GCPtrMem))
+# define IEM_MC_FETCH_MEM_YMM_NO_AC(a_YmmDst, a_iSeg, a_GCPtrMem) \
+    iemMemFetchDataU256Jmp(pVCpu, &(a_YmmDst).ymm, (a_iSeg), (a_GCPtrMem))
+# define IEM_MC_FETCH_MEM_YMM_ALIGN_AVX(a_YmmDst, a_iSeg, a_GCPtrMem) \
+    iemMemFetchDataU256AlignedSseJmp(pVCpu, &(a_YmmDst).ymm, (a_iSeg), (a_GCPtrMem))
 #endif
 …
 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() iemFpuActualizeStateForChange(pVCpu)
+/** Stores SSE SIMD result in a stack register. */
+#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg) \
+    iemSseStoreResult(pVCpu, &a_SseData, a_iXmmReg)
 /** Prepares for using the SSE state.
  * Ensures that we can use the host SSE/FPU in the current context (RC+R0.

trunk/src/VBox/VMM/testcase/tstIEMCheckMc.cpp

-              r96109
+              r96247
 #define iemAImpl_unpckhpd_u128          NULL
+#define iemAImpl_addps_u128             NULL
 /** @}  */
 …
 #define IEM_MC_MAYBE_RAISE_FSGSBASE_XCPT()              do { (void)fMcBegin; } while (0)
 #define IEM_MC_MAYBE_RAISE_NON_CANONICAL_ADDR_GP0(a_u64Addr) do { (void)fMcBegin; } while (0)
+#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() do { (void)fMcBegin; } while (0)
 #define IEM_MC_LOCAL(a_Type, a_Name) (void)fMcBegin; \
 …
 #define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg)     do { (a_pu128Dst) = (PCRTUINT128U)((uintptr_t)0);       CHK_PTYPE(PCRTUINT128U, a_pu128Dst);    (void)fSseWrite; (void)fMcBegin; } while (0)
 #define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg)       do { (a_pu64Dst)  = (uint64_t const *)((uintptr_t)0);   CHK_PTYPE(uint64_t const *, a_pu64Dst); (void)fSseWrite; (void)fMcBegin; } while (0)
+#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg)       do { (a_pXmmDst) = (PCX86XMMREG)((uintptr_t)0);         CHK_PTYPE(PCX86XMMREG, a_pXmmDst);      (void)fSseWrite; (void)fMcBegin; } while (0)
 #define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc)       do { (void)fSseWrite; (void)fMcBegin; } while (0)
 …
 #define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem)      do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT128U, a_u128Dst); (void)fMcBegin; } while (0)
 #define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem)  do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT128U, a_u128Dst); (void)fMcBegin; } while (0)
+#define IEM_MC_FETCH_MEM_XMM(a_XmmDst, a_iSeg, a_GCPtrMem)              do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(X86XMMREG, a_XmmDst); (void)fMcBegin; } while (0)
+#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_XmmDst, a_iSeg, a_GCPtrMem)        do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(X86XMMREG, a_XmmDst); (void)fMcBegin; } while (0)
+#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_XmmDst, a_iSeg, a_GCPtrMem)    do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(X86XMMREG, a_XmmDst); (void)fMcBegin; } while (0)
 #define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem)            do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT256U, a_u256Dst); (void)fMcBegin; } while (0)
 #define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem)      do { CHK_GCPTR(a_GCPtrMem); CHK_TYPE(RTUINT256U, a_u256Dst); (void)fMcBegin; } while (0)
 …
 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ()   (void)fMcBegin; const int fFpuRead = 1, fSseRead = 1
 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() (void)fMcBegin; const int fFpuRead = 1, fFpuWrite = 1, fSseRead = 1, fSseWrite = 1
+#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg)                                           do { (void)fSseWrite; (void)fMcBegin; } while (0)
 #define IEM_MC_PREPARE_SSE_USAGE()              (void)fMcBegin; const int fSseRead = 1, fSseWrite = 1, fSseHost = 1
 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ()   (void)fMcBegin; const int fSseRead = 1

Note: See TracChangeset for help on using the changeset viewer.