VirtualBox

Changeset 96384 in vbox


Ignore:
Timestamp:
Aug 20, 2022 8:51:52 PM (2 years ago)
Author:
vboxsync
Message:

VMM/IEM: Implement sqrtps/sqrtpd/sqrtss/sqrtsd instructions, bugref:9898

Location:
trunk/src/VBox/VMM
Files:
5 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm

    r96382 r96384  
    45324532;
    45334533; @param    1       The instruction
     4534; @param    2       Flag whether the AVX variant of the instruction takes two or three operands
    45344535;
    45354536; @param    A0      FPU context (FXSTATE or XSAVEAREA).
     
    45384539; @param    A3      Pointer to the second media register size operand (input).
    45394540;
    4540 %macro IEMIMPL_FP_F2 1
     4541%macro IEMIMPL_FP_F2 2
    45414542BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
    45424543        PROLOGUE_4_ARGS
     
    45544555ENDPROC iemAImpl_ %+ %1 %+ _u128
    45554556
     4557 %if %2 == 3
    45564558BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u128, 12
    45574559        PROLOGUE_4_ARGS
     
    45834585        EPILOGUE_4_ARGS
    45844586ENDPROC iemAImpl_v %+ %1 %+ _u256
    4585 %endmacro
    4586 
    4587 IEMIMPL_FP_F2 addps
    4588 IEMIMPL_FP_F2 addpd
    4589 IEMIMPL_FP_F2 mulps
    4590 IEMIMPL_FP_F2 mulpd
    4591 IEMIMPL_FP_F2 subps
    4592 IEMIMPL_FP_F2 subpd
    4593 IEMIMPL_FP_F2 minps
    4594 IEMIMPL_FP_F2 minpd
    4595 IEMIMPL_FP_F2 divps
    4596 IEMIMPL_FP_F2 divpd
    4597 IEMIMPL_FP_F2 maxps
    4598 IEMIMPL_FP_F2 maxpd
    4599 IEMIMPL_FP_F2 haddps
    4600 IEMIMPL_FP_F2 haddpd
    4601 IEMIMPL_FP_F2 hsubps
    4602 IEMIMPL_FP_F2 hsubpd
     4587 %else
     4588BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u128, 12
     4589        PROLOGUE_4_ARGS
     4590        IEMIMPL_AVX_PROLOGUE
     4591        AVX_LD_XSAVEAREA_MXCSR A0
     4592
     4593        vmovdqu  xmm0, [A2]
     4594        vmovdqu  xmm1, [A3]
     4595        v %+ %1  xmm0, xmm1
     4596        vmovdqu  [A1 + IEMAVX128RESULT.uResult], xmm0
     4597
     4598        AVX128_ST_XSAVEAREA_MXCSR A1
     4599        IEMIMPL_AVX_PROLOGUE
     4600        EPILOGUE_4_ARGS
     4601ENDPROC iemAImpl_v %+ %1 %+ _u128
     4602
     4603BEGINPROC_FASTCALL iemAImpl_v %+ %1 %+ _u256, 12
     4604        PROLOGUE_4_ARGS
     4605        IEMIMPL_AVX_PROLOGUE
     4606        AVX_LD_XSAVEAREA_MXCSR A0
     4607
     4608        vmovdqu  ymm0, [A2]
     4609        vmovdqu  ymm1, [A3]
     4610        v %+ %1  ymm0, ymm1
     4611        vmovdqu  [A1 + IEMAVX256RESULT.uResult], ymm0
     4612
     4613        AVX256_ST_XSAVEAREA_MXCSR A1
     4614        IEMIMPL_AVX_PROLOGUE
     4615        EPILOGUE_4_ARGS
     4616ENDPROC iemAImpl_v %+ %1 %+ _u256
     4617 %endif
     4618%endmacro
     4619
     4620IEMIMPL_FP_F2 addps, 3
     4621IEMIMPL_FP_F2 addpd, 3
     4622IEMIMPL_FP_F2 mulps, 3
     4623IEMIMPL_FP_F2 mulpd, 3
     4624IEMIMPL_FP_F2 subps, 3
     4625IEMIMPL_FP_F2 subpd, 3
     4626IEMIMPL_FP_F2 minps, 3
     4627IEMIMPL_FP_F2 minpd, 3
     4628IEMIMPL_FP_F2 divps, 3
     4629IEMIMPL_FP_F2 divpd, 3
     4630IEMIMPL_FP_F2 maxps, 3
     4631IEMIMPL_FP_F2 maxpd, 3
     4632IEMIMPL_FP_F2 haddps, 3
     4633IEMIMPL_FP_F2 haddpd, 3
     4634IEMIMPL_FP_F2 hsubps, 3
     4635IEMIMPL_FP_F2 hsubpd, 3
     4636
     4637;;
     4638; These are actually unary operations but to keep it simple
     4639; we treat them as binary for now, so the output result is
     4640; always in sync with the register where the result might get written
     4641; to.
     4642IEMIMPL_FP_F2 sqrtps, 2
     4643IEMIMPL_FP_F2 sqrtpd, 2
    46034644
    46044645
     
    46524693IEMIMPL_FP_F2_R32 maxss
    46534694IEMIMPL_FP_F2_R32 cvtss2sd
     4695IEMIMPL_FP_F2_R32 sqrtss
    46544696
    46554697
     
    47034745IEMIMPL_FP_F2_R64 maxsd
    47044746IEMIMPL_FP_F2_R64 cvtsd2ss
     4747IEMIMPL_FP_F2_R64 sqrtsd
  • trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp

    r96382 r96384  
    1413414134    return false;
    1413514135}
     14136
     14137
     14138/**
     14139 * Validates the given single input operand returning whether the operation can continue or whether
     14140 * contains a NaN value, setting the output accordingly.
     14141 *
     14142 * @returns Flag whether the operation can continue (false) or whether a NaN value was detected in the operand (true).
     14143 * @param   pr32Res         Where to store the result in case the operation can't continue.
     14144 * @param   pr32Val         The input operand.
     14145 * @param   pfMxcsr         Where to return the modified MXCSR state when false is returned.
     14146 */
     14147DECLINLINE(bool) iemSseUnaryValIsNaNR32(PRTFLOAT32U pr32Res, PCRTFLOAT32U pr32Val, uint32_t *pfMxcsr)
     14148{
     14149    if (RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val))
     14150    {
     14151        /* One operand is an SNan and placed into the result, converting it to a QNan. */
     14152        *pr32Res = *pr32Val;
     14153        pr32Res->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
     14154        *pfMxcsr |= X86_MXCSR_IE;
     14155        return true;
     14156    }
     14157    else if (RTFLOAT32U_IS_QUIET_NAN(pr32Val))
     14158    {
     14159        /* The QNan operand is placed into the result. */
     14160        *pr32Res = *pr32Val;
     14161        return true;
     14162    }
     14163
     14164    return false;
     14165}
     14166
     14167
     14168/**
     14169 * Validates the given double input operand returning whether the operation can continue or whether
     14170 * contains a NaN value, setting the output accordingly.
     14171 *
     14172 * @returns Flag whether the operation can continue (false) or whether a NaN value was detected in the operand (true).
     14173 * @param   pr64Res         Where to store the result in case the operation can't continue.
     14174 * @param   pr64Val         The input operand.
     14175 * @param   pfMxcsr         Where to return the modified MXCSR state when false is returned.
     14176 */
     14177DECLINLINE(bool) iemSseUnaryValIsNaNR64(PRTFLOAT64U pr64Res, PCRTFLOAT64U pr64Val, uint32_t *pfMxcsr)
     14178{
     14179    if (RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val))
     14180    {
     14181        /* One operand is an SNan and placed into the result, converting it to a QNan. */
     14182        *pr64Res = *pr64Val;
     14183        pr64Res->s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
     14184        *pfMxcsr |= X86_MXCSR_IE;
     14185        return true;
     14186    }
     14187    else if (RTFLOAT64U_IS_QUIET_NAN(pr64Val))
     14188    {
     14189        /* The QNan operand is placed into the result. */
     14190        *pr64Res = *pr64Val;
     14191        return true;
     14192    }
     14193
     14194    return false;
     14195}
    1413614196#endif
    1413714197
     
    1481114871}
    1481214872#endif
     14873
     14874
     14875/**
     14876 * SQRTPS
     14877 */
     14878#ifdef IEM_WITHOUT_ASSEMBLY
     14879static uint32_t iemAImpl_sqrtps_u128_worker(PRTFLOAT32U pr32Res, uint32_t fMxcsr, PCRTFLOAT32U pr32Val)
     14880{
     14881    if (iemSseUnaryValIsNaNR32(pr32Res, pr32Val, &fMxcsr))
     14882        return fMxcsr;
     14883
     14884    RTFLOAT32U r32Src;
     14885    uint32_t fDe = iemSsePrepareValueR32(&r32Src, fMxcsr, pr32Val);
     14886    if (RTFLOAT32U_IS_ZERO(&r32Src))
     14887    {
     14888        *pr32Res = r32Src;
     14889        return fMxcsr;
     14890    }
     14891    else if (r32Src.s.fSign)
     14892    {
     14893        *pr32Res = g_ar32QNaN[1];
     14894        return fMxcsr | X86_MXCSR_IE;
     14895    }
     14896
     14897    softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_MXCSR(fMxcsr);
     14898    float32_t r32Result = f32_sqrt(iemFpSoftF32FromIprt(&r32Src), &SoftState);
     14899    return iemSseSoftStateAndR32ToMxcsrAndIprtResult(&SoftState, r32Result, pr32Res, fMxcsr | fDe);
     14900}
     14901
     14902
     14903IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtps_u128,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2))
     14904{
     14905    RT_NOREF(puSrc1);
     14906
     14907    pResult->MXCSR  = iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[0], pFpuState->MXCSR, &puSrc2->ar32[0]);
     14908    pResult->MXCSR |= iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[1], pFpuState->MXCSR, &puSrc2->ar32[1]);
     14909    pResult->MXCSR |= iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[2], pFpuState->MXCSR, &puSrc2->ar32[2]);
     14910    pResult->MXCSR |= iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[3], pFpuState->MXCSR, &puSrc2->ar32[3]);
     14911}
     14912#endif
     14913
     14914
     14915/**
     14916 * SQRTSS
     14917 */
     14918#ifdef IEM_WITHOUT_ASSEMBLY
     14919IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtss_u128_r32,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCRTFLOAT32U pr32Src2))
     14920{
     14921    pResult->MXCSR = iemAImpl_sqrtps_u128_worker(&pResult->uResult.ar32[0], pFpuState->MXCSR, pr32Src2);
     14922    pResult->uResult.ar32[1] = puSrc1->ar32[1];
     14923    pResult->uResult.ar32[2] = puSrc1->ar32[2];
     14924    pResult->uResult.ar32[3] = puSrc1->ar32[3];
     14925}
     14926#endif
     14927
     14928
     14929/**
     14930 * SQRTPD
     14931 */
     14932#ifdef IEM_WITHOUT_ASSEMBLY
     14933static uint32_t iemAImpl_sqrtpd_u128_worker(PRTFLOAT64U pr64Res, uint32_t fMxcsr, PCRTFLOAT64U pr64Val)
     14934{
     14935    if (iemSseUnaryValIsNaNR64(pr64Res, pr64Val, &fMxcsr))
     14936        return fMxcsr;
     14937
     14938    RTFLOAT64U r64Src;
     14939    uint32_t fDe = iemSsePrepareValueR64(&r64Src, fMxcsr, pr64Val);
     14940    if (RTFLOAT64U_IS_ZERO(&r64Src))
     14941    {
     14942        *pr64Res = r64Src;
     14943        return fMxcsr;
     14944    }
     14945    else if (r64Src.s.fSign)
     14946    {
     14947        *pr64Res = g_ar64QNaN[1];
     14948        return fMxcsr | X86_MXCSR_IE;
     14949    }
     14950
     14951    softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_MXCSR(fMxcsr);
     14952    float64_t r64Result = f64_sqrt(iemFpSoftF64FromIprt(&r64Src), &SoftState);
     14953    return iemSseSoftStateAndR64ToMxcsrAndIprtResult(&SoftState, r64Result, pr64Res, fMxcsr | fDe);
     14954}
     14955
     14956
     14957IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtpd_u128,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCX86XMMREG puSrc2))
     14958{
     14959    RT_NOREF(puSrc1);
     14960
     14961    pResult->MXCSR  = iemAImpl_sqrtpd_u128_worker(&pResult->uResult.ar64[0], pFpuState->MXCSR, &puSrc2->ar64[0]);
     14962    pResult->MXCSR |= iemAImpl_sqrtpd_u128_worker(&pResult->uResult.ar64[1], pFpuState->MXCSR, &puSrc2->ar64[1]);
     14963}
     14964#endif
     14965
     14966
     14967/**
     14968 * SQRTSD
     14969 */
     14970#ifdef IEM_WITHOUT_ASSEMBLY
     14971IEM_DECL_IMPL_DEF(void, iemAImpl_sqrtsd_u128_r64,(PX86FXSTATE pFpuState, PIEMSSERESULT pResult, PCX86XMMREG puSrc1, PCRTFLOAT64U pr64Src2))
     14972{
     14973    pResult->MXCSR = iemAImpl_sqrtpd_u128_worker(&pResult->uResult.ar64[0], pFpuState->MXCSR, pr64Src2);
     14974    pResult->uResult.ar64[1] = puSrc1->ar64[1];
     14975}
     14976#endif
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstructionsTwoByte0f.cpp.h

    r96382 r96384  
    40154015/*  Opcode 0xf2 0x0f 0x50 - invalid */
    40164016
     4017
    40174018/** Opcode      0x0f 0x51 - sqrtps Vps, Wps */
    4018 FNIEMOP_STUB(iemOp_sqrtps_Vps_Wps);
     4019FNIEMOP_DEF(iemOp_sqrtps_Vps_Wps)
     4020{
     4021    IEMOP_MNEMONIC2(RM, SQRTPS, sqrtps, Vps, Wps, DISOPTYPE_HARMLESS, 0);
     4022    return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_sqrtps_u128);
     4023}
     4024
     4025
    40194026/** Opcode 0x66 0x0f 0x51 - sqrtpd Vpd, Wpd */
    4020 FNIEMOP_STUB(iemOp_sqrtpd_Vpd_Wpd);
     4027FNIEMOP_DEF(iemOp_sqrtpd_Vpd_Wpd)
     4028{
     4029    IEMOP_MNEMONIC2(RM, SQRTPD, sqrtpd, Vpd, Wpd, DISOPTYPE_HARMLESS, 0);
     4030    return FNIEMOP_CALL_1(iemOpCommonSse2Fp_FullFull_To_Full, iemAImpl_sqrtpd_u128);
     4031}
     4032
     4033
    40214034/** Opcode 0xf3 0x0f 0x51 - sqrtss Vss, Wss */
    4022 FNIEMOP_STUB(iemOp_sqrtss_Vss_Wss);
     4035FNIEMOP_DEF(iemOp_sqrtss_Vss_Wss)
     4036{
     4037    IEMOP_MNEMONIC2(RM, SQRTSS, sqrtss, Vss, Wss, DISOPTYPE_HARMLESS, 0);
     4038    return FNIEMOP_CALL_1(iemOpCommonSseFp_FullR32_To_Full, iemAImpl_sqrtss_u128_r32);
     4039}
     4040
     4041
    40234042/** Opcode 0xf2 0x0f 0x51 - sqrtsd Vsd, Wsd */
    4024 FNIEMOP_STUB(iemOp_sqrtsd_Vsd_Wsd);
     4043FNIEMOP_DEF(iemOp_sqrtsd_Vsd_Wsd)
     4044{
     4045    IEMOP_MNEMONIC2(RM, SQRTSD, sqrtsd, Vsd, Wsd, DISOPTYPE_HARMLESS, 0);
     4046    return FNIEMOP_CALL_1(iemOpCommonSse2Fp_FullR64_To_Full, iemAImpl_sqrtsd_u128_r64);
     4047}
     4048
    40254049
    40264050/** Opcode      0x0f 0x52 - rsqrtps Vps, Wps */
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r96382 r96384  
    24422442FNIEMAIMPLFPSSEF2U128 iemAImpl_hsubps_u128;
    24432443FNIEMAIMPLFPSSEF2U128 iemAImpl_hsubpd_u128;
     2444FNIEMAIMPLFPSSEF2U128 iemAImpl_sqrtps_u128;
     2445FNIEMAIMPLFPSSEF2U128 iemAImpl_sqrtpd_u128;
    24442446
    24452447FNIEMAIMPLFPSSEF2U128R32 iemAImpl_addss_u128_r32;
     
    24572459FNIEMAIMPLFPSSEF2U128R32 iemAImpl_cvtss2sd_u128_r32;
    24582460FNIEMAIMPLFPSSEF2U128R64 iemAImpl_cvtsd2ss_u128_r64;
     2461FNIEMAIMPLFPSSEF2U128R32 iemAImpl_sqrtss_u128_r32;
     2462FNIEMAIMPLFPSSEF2U128R64 iemAImpl_sqrtsd_u128_r64;
    24592463
    24602464FNIEMAIMPLFPAVXF3U128 iemAImpl_vaddps_u128, iemAImpl_vaddps_u128_fallback;
     
    24742478FNIEMAIMPLFPAVXF3U128 iemAImpl_vhsubps_u128, iemAImpl_vhsubps_u128_fallback;
    24752479FNIEMAIMPLFPAVXF3U128 iemAImpl_vhsubpd_u128, iemAImpl_vhsubpd_u128_fallback;
     2480FNIEMAIMPLFPAVXF3U128 iemAImpl_vsqrtps_u128, iemAImpl_vsqrtps_u128_fallback;
     2481FNIEMAIMPLFPAVXF3U128 iemAImpl_vsqrtpd_u128, iemAImpl_vsqrtpd_u128_fallback;
    24762482
    24772483FNIEMAIMPLFPAVXF3U128R32 iemAImpl_vaddss_u128_r32, iemAImpl_vaddss_u128_r32_fallback;
     
    24872493FNIEMAIMPLFPAVXF3U128R32 iemAImpl_vmaxss_u128_r32, iemAImpl_vmaxss_u128_r32_fallback;
    24882494FNIEMAIMPLFPAVXF3U128R64 iemAImpl_vmaxsd_u128_r64, iemAImpl_vmaxsd_u128_r64_fallback;
     2495FNIEMAIMPLFPAVXF3U128R32 iemAImpl_vsqrtss_u128_r32, iemAImpl_vsqrtss_u128_r32_fallback;
     2496FNIEMAIMPLFPAVXF3U128R64 iemAImpl_vsqrtsd_u128_r64, iemAImpl_vsqrtsd_u128_r64_fallback;
    24892497
    24902498FNIEMAIMPLFPAVXF3U256 iemAImpl_vaddps_u256, iemAImpl_vaddps_u256_fallback;
  • trunk/src/VBox/VMM/testcase/tstIEMCheckMc.cpp

    r96382 r96384  
    478478#define iemAImpl_hsubps_u128            NULL
    479479#define iemAImpl_hsubpd_u128            NULL
     480#define iemAImpl_sqrtps_u128            NULL
     481#define iemAImpl_sqrtpd_u128            NULL
    480482
    481483#define iemAImpl_addss_u128_r32         NULL
     
    491493#define iemAImpl_maxss_u128_r32         NULL
    492494#define iemAImpl_maxsd_u128_r64         NULL
     495#define iemAImpl_sqrtss_u128_r32        NULL
     496#define iemAImpl_sqrtsd_u128_r64        NULL
    493497
    494498#define iemAImpl_cvtss2sd_u128_r32      NULL
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette