VirtualBox

Browse Source

Changeset 96384 in vbox

Timestamp:

Aug 20, 2022 8:51:52 PM (2 years ago)

Author:

vboxsync

Message:

VMM/IEM: Implement sqrtps/sqrtpd/sqrtss/sqrtsd instructions, bugref:9898

Location:

trunk/src/VBox/VMM

Files:

: 5 edited

VMMAll/IEMAllAImpl.asm (modified) (6 diffs)
VMMAll/IEMAllAImplC.cpp (modified) (2 diffs)
VMMAll/IEMAllInstructionsTwoByte0f.cpp.h (modified) (1 diff)
include/IEMInternal.h (modified) (4 diffs)
testcase/tstIEMCheckMc.cpp (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm

-              r96382
+              r96384
+;
 ; @param    1       The instruction
+; @param    2       Flag whether the AVX variant of the instruction takes two or three operands
+;
 ; @param    A0      FPU context (FXSTATE or XSAVEAREA).
 …
 ; @param    A3      Pointer to the second media register size operand (input).
+;
 %macro IEMIMPL_FP_F2 1
+%macro IEMIMPL_FP_F2 2
 BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
         PROLOGUE_4_ARGS
 …
 ENDPROC iemAImpl_ %+ %1 %+ _u128
+ %if %2 == 3
 BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u128, 12
         PROLOGUE_4_ARGS
 …
         EPILOGUE_4_ARGS
 ENDPROC iemAImpl_v %+ %1 %+ _u256
+%endmacro
+IEMIMPL_FP_F2 addps
+IEMIMPL_FP_F2 addpd
+IEMIMPL_FP_F2 mulps
+IEMIMPL_FP_F2 mulpd
+IEMIMPL_FP_F2 subps
+IEMIMPL_FP_F2 subpd
+IEMIMPL_FP_F2 minps
+IEMIMPL_FP_F2 minpd
+IEMIMPL_FP_F2 divps
+IEMIMPL_FP_F2 divpd
+IEMIMPL_FP_F2 maxps
+IEMIMPL_FP_F2 maxpd
+IEMIMPL_FP_F2 haddps
+IEMIMPL_FP_F2 haddpd
+IEMIMPL_FP_F2 hsubps
+IEMIMPL_FP_F2 hsubpd
+ %else
+BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u128, 12
+        PROLOGUE_4_ARGS
+        IEMIMPL_AVX_PROLOGUE
+        AVX_LD_XSAVEAREA_MXCSR A0
+        vmovdqu  xmm0, [A2]
+        vmovdqu  xmm1, [A3]
+        v %+ %1  xmm0, xmm1
+        vmovdqu  [A1 + IEMAVX128RESULT.uResult], xmm0
+        AVX128_ST_XSAVEAREA_MXCSR A1
+        IEMIMPL_AVX_PROLOGUE
+        EPILOGUE_4_ARGS
+ENDPROC iemAImpl_v %+ %1 %+ _u128
+BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u256, 12
+        PROLOGUE_4_ARGS
+        IEMIMPL_AVX_PROLOGUE
+        AVX_LD_XSAVEAREA_MXCSR A0
+        vmovdqu  ymm0, [A2]
+        vmovdqu  ymm1, [A3]
+        v %+ %1  ymm0, ymm1
+        vmovdqu  [A1 + IEMAVX256RESULT.uResult], ymm0
+        AVX256_ST_XSAVEAREA_MXCSR A1
+        IEMIMPL_AVX_PROLOGUE
+        EPILOGUE_4_ARGS
+ENDPROC iemAImpl_v %+ %1 %+ _u256
+ %endif
+%endmacro
+IEMIMPL_FP_F2 addps, 3
+IEMIMPL_FP_F2 addpd, 3
+IEMIMPL_FP_F2 mulps, 3
+IEMIMPL_FP_F2 mulpd, 3
+IEMIMPL_FP_F2 subps, 3
+IEMIMPL_FP_F2 subpd, 3
+IEMIMPL_FP_F2 minps, 3
+IEMIMPL_FP_F2 minpd, 3
+IEMIMPL_FP_F2 divps, 3
+IEMIMPL_FP_F2 divpd, 3
+IEMIMPL_FP_F2 maxps, 3
+IEMIMPL_FP_F2 maxpd, 3
+IEMIMPL_FP_F2 haddps, 3
+IEMIMPL_FP_F2 haddpd, 3
+IEMIMPL_FP_F2 hsubps, 3
+IEMIMPL_FP_F2 hsubpd, 3
+;;
+; These are actually unary operations but to keep it simple
+; we treat them as binary for now, so the output result is
+; always in sync with the register where the result might get written
+; to.
+IEMIMPL_FP_F2 sqrtps, 2
+IEMIMPL_FP_F2 sqrtpd, 2
 …
 IEMIMPL_FP_F2_R32 maxss
 IEMIMPL_FP_F2_R32 cvtss2sd
+IEMIMPL_FP_F2_R32 sqrtss
 …
 IEMIMPL_FP_F2_R64 maxsd
 IEMIMPL_FP_F2_R64 cvtsd2ss
+IEMIMPL_FP_F2_R64 sqrtsd

trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

-              r96382
+              r96384
     return false;
+}
+/**
+ * Validates the given single input operand returning whether the operation can continue or whether
+ * contains a NaN value, setting the output accordingly.
+ *
+ * @returns Flag whether the operation can continue (false) or whether a NaN value was detected in the operand (true).
+ * @param   pr32Res         Where to store the result in case the operation can't continue.
+ * @param   pr32Val         The input operand.
+ * @param   pfMxcsr         Where to return the modified MXCSR state when false is returned.
+ */
+DECLINLINE(bool) iemSseUnaryValIsNaNR32(PRTFLOAT32U pr32Res, PCRTFLOAT32U pr32Val, uint32_t *pfMxcsr)
+{
+    if (RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val))
+    {
+        /* One operand is an SNan and placed into the result, converting it to a QNan. */
+        *pr32Res = *pr32Val;
+        pr32Res->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
+        *pfMxcsr |= X86_MXCSR_IE;
+        return true;
+    }
+    else if (RTFLOAT32U_IS_QUIET_NAN(pr32Val))
+    {
+        /* The QNan operand is placed into the result. */
+        *pr32Res = *pr32Val;
+        return true;
+    }
+    return false;
+}
+/**
+ * Validates the given double input operand returning whether the operation can continue or whether
+ * contains a NaN value, setting the output accordingly.
+ *
+ * @returns Flag whether the operation can continue (false) or whether a NaN value was detected in the operand (true).
+ * @param   pr64Res         Where to store the result in case the operation can't continue.
+ * @param   pr64Val         The input operand.
+ * @param   pfMxcsr         Where to return the modified MXCSR state when false is returned.
+ */
+DECLINLINE(bool) iemSseUnaryValIsNaNR64(PRTFLOAT64U pr64Res, PCRTFLOAT64U pr64Val, uint32_t *pfMxcsr)
+{
+    if (RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val))
+    {
+        /* One operand is an SNan and placed into the result, converting it to a QNan. */
+        *pr64Res = *pr64Val;
+        pr64Res->s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
+        *pfMxcsr |= X86_MXCSR_IE;
+        return true;
+    }
+    else if (RTFLOAT64U_IS_QUIET_NAN(pr64Val))
+    {
+        /* The QNan operand is placed into the result. */
+        *pr64Res = *pr64Val;
+        return true;
+    }
+    return false;
+}
 #endif
 …
+}
 #endif
+/**
+ * SQRTPS
+ */
+#ifdef IEM_WITHOUT_ASSEMBLY
+static uint32_t iemAImpl_sqrtps_u128_worker(PRTFLOAT32U pr32Res, uint32_t fMxcsr, PCRTFLOAT32U pr32Val)
+{
+    if (iemSseUnaryValIsNaNR32(pr32Res, pr32Val, &fMxcsr))
+        return fMxcsr;
+    RTFLOAT32U r32Src;
+    uint32_t fDe = iemSsePrepareValueR32(&r32Src, fMxcsr, pr32Val);
+    if (RTFLOAT32U_IS_ZERO(&r32Src))
+    {
+        *pr32Res = r32Src;
+        return fMxcsr;
+    }
+    else if (r32Src.s.fSign)
+    {
+        *pr32Res = g_ar32QNaN[1];
+        return fMxcsr | X86_MXCSR_IE;
+    }
+    softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_MXCSR(fMxcsr);
+    float32_t r32Result = f32_sqrt(iemFpSoftF32FromIprt(&r32Src), &SoftState);
+    return iemSseSoftStateAndR32ToMxcsrAndIprtResult(&SoftState, r32Result, pr32Res, fMxcsr | fDe);
+}
+IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtps_u128,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2))
+{
+    RT_NOREF(puSrc1);
+    pResult->MXCSR  = iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[0], pFpuState->MXCSR, &puSrc2->ar32[0]);
+    pResult->MXCSR |= iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[1], pFpuState->MXCSR, &puSrc2->ar32[1]);
+    pResult->MXCSR |= iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[2], pFpuState->MXCSR, &puSrc2->ar32[2]);
+    pResult->MXCSR |= iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[3], pFpuState->MXCSR, &puSrc2->ar32[3]);
+}
+#endif
+/**
+ * SQRTSS
+ */
+#ifdef IEM_WITHOUT_ASSEMBLY
+IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtss_u128_r32,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCRTFLOAT32U pr32Src2))
+{
+    pResult->MXCSR = iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[0], pFpuState->MXCSR, pr32Src2);
+    pResult->uResult.ar32[1] = puSrc1->ar32[1];
+    pResult->uResult.ar32[2] = puSrc1->ar32[2];
+    pResult->uResult.ar32[3] = puSrc1->ar32[3];
+}
+#endif
+/**
+ * SQRTPD
+ */
+#ifdef IEM_WITHOUT_ASSEMBLY
+static uint32_t iemAImpl_sqrtpd_u128_worker(PRTFLOAT64U pr64Res, uint32_t fMxcsr, PCRTFLOAT64U pr64Val)
+{
+    if (iemSseUnaryValIsNaNR64(pr64Res, pr64Val, &fMxcsr))
+        return fMxcsr;
+    RTFLOAT64U r64Src;
+    uint32_t fDe = iemSsePrepareValueR64(&r64Src, fMxcsr, pr64Val);
+    if (RTFLOAT64U_IS_ZERO(&r64Src))
+    {
+        *pr64Res = r64Src;
+        return fMxcsr;
+    }
+    else if (r64Src.s.fSign)
+    {
+        *pr64Res = g_ar64QNaN[1];
+        return fMxcsr | X86_MXCSR_IE;
+    }
+    softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_MXCSR(fMxcsr);
+    float64_t r64Result = f64_sqrt(iemFpSoftF64FromIprt(&r64Src), &SoftState);
+    return iemSseSoftStateAndR64ToMxcsrAndIprtResult(&SoftState, r64Result, pr64Res, fMxcsr | fDe);
+}
+IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtpd_u128,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2))
+{
+    RT_NOREF(puSrc1);
+    pResult->MXCSR  = iemAImpl_sqrtpd_u128_worker(&pResult->uResult.ar64[0], pFpuState->MXCSR, &puSrc2->ar64[0]);
+    pResult->MXCSR |= iemAImpl_sqrtpd_u128_worker(&pResult->uResult.ar64[1], pFpuState->MXCSR, &puSrc2->ar64[1]);
+}
+#endif
+/**
+ * SQRTSD
+ */
+#ifdef IEM_WITHOUT_ASSEMBLY
+IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtsd_u128_r64,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCRTFLOAT64U pr64Src2))
+{
+    pResult->MXCSR = iemAImpl_sqrtpd_u128_worker(&pResult->uResult.ar64[0], pFpuState->MXCSR, pr64Src2);
+    pResult->uResult.ar64[1] = puSrc1->ar64[1];
+}
+#endif

trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h

-              r96382
+              r96384
 /*  Opcode 0xf2 0x0f 0x50 - invalid */
 /** Opcode      0x0f 0x51 - sqrtps Vps, Wps */
+FNIEMOP_STUB(iemOp_sqrtps_Vps_Wps);
+FNIEMOP_DEF(iemOp_sqrtps_Vps_Wps)
+{
+    IEMOP_MNEMONIC2(RM, SQRTPS, sqrtps, Vps, Wps, DISOPTYPE_HARMLESS, 0);
+    return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_sqrtps_u128);
+}
 /** Opcode 0x66 0x0f 0x51 - sqrtpd Vpd, Wpd */
+FNIEMOP_STUB(iemOp_sqrtpd_Vpd_Wpd);
+FNIEMOP_DEF(iemOp_sqrtpd_Vpd_Wpd)
+{
+    IEMOP_MNEMONIC2(RM, SQRTPD, sqrtpd, Vpd, Wpd, DISOPTYPE_HARMLESS, 0);
+    return FNIEMOP_CALL_1(iemOpCommonSse2Fp_FullFull_To_Full, iemAImpl_sqrtpd_u128);
+}
 /** Opcode 0xf3 0x0f 0x51 - sqrtss Vss, Wss */
+FNIEMOP_STUB(iemOp_sqrtss_Vss_Wss);
+FNIEMOP_DEF(iemOp_sqrtss_Vss_Wss)
+{
+    IEMOP_MNEMONIC2(RM, SQRTSS, sqrtss, Vss, Wss, DISOPTYPE_HARMLESS, 0);
+    return FNIEMOP_CALL_1(iemOpCommonSseFp_FullR32_To_Full, iemAImpl_sqrtss_u128_r32);
+}
 /** Opcode 0xf2 0x0f 0x51 - sqrtsd Vsd, Wsd */
+FNIEMOP_STUB(iemOp_sqrtsd_Vsd_Wsd);
+FNIEMOP_DEF(iemOp_sqrtsd_Vsd_Wsd)
+{
+    IEMOP_MNEMONIC2(RM, SQRTSD, sqrtsd, Vsd, Wsd, DISOPTYPE_HARMLESS, 0);
+    return FNIEMOP_CALL_1(iemOpCommonSse2Fp_FullR64_To_Full, iemAImpl_sqrtsd_u128_r64);
+}
 /** Opcode      0x0f 0x52 - rsqrtps Vps, Wps */

trunk/src/VBox/VMM/include/IEMInternal.h

-              r96382
+              r96384
 FNIEMAIMPLFPSSEF2U128 iemAImpl_hsubps_u128;
 FNIEMAIMPLFPSSEF2U128 iemAImpl_hsubpd_u128;
+FNIEMAIMPLFPSSEF2U128 iemAImpl_sqrtps_u128;
+FNIEMAIMPLFPSSEF2U128 iemAImpl_sqrtpd_u128;
 FNIEMAIMPLFPSSEF2U128R32 iemAImpl_addss_u128_r32;
 …
 FNIEMAIMPLFPSSEF2U128R32 iemAImpl_cvtss2sd_u128_r32;
 FNIEMAIMPLFPSSEF2U128R64 iemAImpl_cvtsd2ss_u128_r64;
+FNIEMAIMPLFPSSEF2U128R32 iemAImpl_sqrtss_u128_r32;
+FNIEMAIMPLFPSSEF2U128R64 iemAImpl_sqrtsd_u128_r64;
 FNIEMAIMPLFPAVXF3U128 iemAImpl_vaddps_u128, iemAImpl_vaddps_u128_fallback;
 …
 FNIEMAIMPLFPAVXF3U128 iemAImpl_vhsubps_u128, iemAImpl_vhsubps_u128_fallback;
 FNIEMAIMPLFPAVXF3U128 iemAImpl_vhsubpd_u128, iemAImpl_vhsubpd_u128_fallback;
+FNIEMAIMPLFPAVXF3U128 iemAImpl_vsqrtps_u128, iemAImpl_vsqrtps_u128_fallback;
+FNIEMAIMPLFPAVXF3U128 iemAImpl_vsqrtpd_u128, iemAImpl_vsqrtpd_u128_fallback;
 FNIEMAIMPLFPAVXF3U128R32 iemAImpl_vaddss_u128_r32, iemAImpl_vaddss_u128_r32_fallback;
 …
 FNIEMAIMPLFPAVXF3U128R32 iemAImpl_vmaxss_u128_r32, iemAImpl_vmaxss_u128_r32_fallback;
 FNIEMAIMPLFPAVXF3U128R64 iemAImpl_vmaxsd_u128_r64, iemAImpl_vmaxsd_u128_r64_fallback;
+FNIEMAIMPLFPAVXF3U128R32 iemAImpl_vsqrtss_u128_r32, iemAImpl_vsqrtss_u128_r32_fallback;
+FNIEMAIMPLFPAVXF3U128R64 iemAImpl_vsqrtsd_u128_r64, iemAImpl_vsqrtsd_u128_r64_fallback;
 FNIEMAIMPLFPAVXF3U256 iemAImpl_vaddps_u256, iemAImpl_vaddps_u256_fallback;

trunk/src/VBox/VMM/testcase/tstIEMCheckMc.cpp

-              r96382
+              r96384
 #define iemAImpl_hsubps_u128            NULL
 #define iemAImpl_hsubpd_u128            NULL
+#define iemAImpl_sqrtps_u128            NULL
+#define iemAImpl_sqrtpd_u128            NULL
 #define iemAImpl_addss_u128_r32         NULL
 …
 #define iemAImpl_maxss_u128_r32         NULL
 #define iemAImpl_maxsd_u128_r64         NULL
+#define iemAImpl_sqrtss_u128_r32        NULL
+#define iemAImpl_sqrtsd_u128_r64        NULL
 #define iemAImpl_cvtss2sd_u128_r32      NULL

Note: See TracChangeset for help on using the changeset viewer.

Download in other formats:

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette