VirtualBox

Ignore:
Timestamp:
Apr 2, 2024 12:37:36 PM (10 months ago)
Author:
vboxsync
Message:

VMM/IEM: Rework MXCSR handling for SSE instructions, bugref:10641

The old approach by referencing the X86FXSTATE and accessing the MXCSR value there
prevents us from keeping the MXCSR shadowed in a host register for SIMD guest code
causing unecessary memory accesses. It also prevents avoiding skipping dirty guest registers
because the instruction helpers would have access the to CPUMCTX structure.

The new approach passes the guest MXCSR as the first argument of the helper callback and
the helper returns the MXCSR with the new exception flags being set as a return value.
With this the helpers only work on arguments supplied and don't access anything in CPUMCTX
directly which allows the recompiler to avoid flushing pending register writes unless they get
used.

As a bonus this also gets rid of the IEMSSERESULT struct which was required because the helpers
are restricted to 4 arguments due to restrictions on x86 for the assembly helpers in IEMAllAImpl.asm

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r104107 r104129  
    11501150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
    11511151#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
    1152     off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off)
     1152    off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
    11531153
    11541154/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
    11551155DECL_INLINE_THROW(uint32_t)
    1156 iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    1157 {
     1156iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
     1157{
     1158    /*
     1159     * Make sure we don't have any outstanding guest register writes as we may
     1160     * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
     1161     */
     1162    off = iemNativeRegFlushPendingWrites(pReNative, off);
     1163
     1164#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     1165    off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
     1166#else
     1167    RT_NOREF(idxInstr);
     1168#endif
     1169
    11581170    uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
    11591171    uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
     
    85508562
    85518563
    8552 #define IEM_MC_SSE_UPDATE_MXCSR(a_fMxcsr) \
    8553     off = iemNativeEmitSimdSseUpdateMxcsr(pReNative, off, a_fMxcsr)
    8554 
    8555 /** Emits code for IEM_MC_SSE_UPDATE_MXCSR. */
    8556 DECL_INLINE_THROW(uint32_t)
    8557 iemNativeEmitSimdSseUpdateMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxMxCsrVar)
    8558 {
    8559     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxMxCsrVar);
    8560     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxMxCsrVar, sizeof(uint32_t));
    8561 
    8562     uint8_t const idxRegMxCsr    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
    8563     uint8_t const idxVarRegMxCsr = iemNativeVarRegisterAcquire(pReNative, idxMxCsrVar, &off, true /*fInitalized*/);
    8564     uint8_t const idxVarRegTmp   = iemNativeRegAllocTmp(pReNative, &off);
    8565 
    8566     /** @todo r=aeichner I think it would be safe to spare the temporary register and trash
    8567      *                   the variable MXCSR register as it isn't used afterwards in the microcode block anyway.
    8568      *                   Needs verification though, so play it safe for now.
    8569      */
    8570     /* mov tmp, varmxcsr */
    8571     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarRegTmp, idxVarRegMxCsr);
    8572     /* and tmp, X86_MXCSR_XCPT_FLAGS */
    8573     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarRegTmp, X86_MXCSR_XCPT_FLAGS);
    8574     /* or mxcsr, tmp */
    8575     off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxVarRegTmp);
    8576 
    8577     /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
    8578     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
    8579 
    8580     /* Free but don't flush the MXCSR register. */
    8581     iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
    8582     iemNativeVarRegisterRelease(pReNative, idxMxCsrVar);
    8583     iemNativeRegFreeTmp(pReNative, idxVarRegTmp);
    8584 
    8585     return off;
    8586 }
    8587 
    8588 
    85898564#define IEM_MC_STORE_SSE_RESULT(a_SseData, a_iXmmReg) \
    85908565    off = iemNativeEmitSimdSseStoreResult(pReNative, off, a_SseData, a_iXmmReg)
     
    85928567/** Emits code for IEM_MC_STORE_SSE_RESULT. */
    85938568DECL_INLINE_THROW(uint32_t)
    8594 iemNativeEmitSimdSseStoreResult(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSseDataVar, uint8_t iXReg)
    8595 {
    8596     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSseDataVar);
    8597     IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSseDataVar, sizeof(IEMSSERESULT));
    8598 
    8599     /** @todo r=aeichner We probably need to rework this MC statement and the users to make thing more efficient. */
    8600     uint8_t const idxSimdRegDst    = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
    8601                                                                              kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
    8602     uint8_t const idxRegMxCsr      = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
    8603     uint8_t const idxVarRegResAddr = iemNativeRegAllocTmp(pReNative, &off);
    8604     uint8_t const idxRegTmp        = iemNativeRegAllocTmp(pReNative, &off);
    8605 
    8606     off = iemNativeEmitLoadArgGregWithVarAddr(pReNative, off, idxVarRegResAddr, idxSseDataVar, false /*fFlushShadows*/);
    8607 
    8608     /* Update MXCSR. */
    8609     off = iemNativeEmitLoadGprByGprU32(pReNative, off, idxRegTmp, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, MXCSR));
    8610     /* tmp &= X86_MXCSR_XCPT_FLAGS. */
    8611     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS);
    8612     /* mxcsr |= tmp */
    8613     off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
    8614 
    8615     /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
    8616     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
     8569iemNativeEmitSimdSseStoreResult(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSseRes, uint8_t iXReg)
     8570{
     8571    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSseRes);
     8572    IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSseRes, sizeof(X86XMMREG));
     8573
     8574    /* The ForUpdate is important as we might end up not writing the result value to the register in case of an unmasked exception. */
     8575    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
     8576                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
     8577    uint8_t const idxVarRegRes  = iemNativeVarSimdRegisterAcquire(pReNative, idxSseRes, &off, true /*fInitalized*/);
     8578    uint8_t const idxRegMxCsr   = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
     8579    uint8_t const idxRegTmp     = iemNativeRegAllocTmp(pReNative, &off);
    86178580
    86188581    /* Update the value if there is no unmasked exception. */
     
    86318594    uint32_t offFixup = off;
    86328595    off = iemNativeEmitJnzToFixed(pReNative, off, off);
    8633     AssertCompileMemberSize(IEMSSERESULT, uResult, sizeof(RTFLOAT128U));
    8634     off = iemNativeEmitLoadVecRegByGprU128(pReNative, off, idxSimdRegDst, idxVarRegResAddr, RT_UOFFSETOF_DYN(IEMSSERESULT, uResult));
     8596    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarRegRes);
    86358597    iemNativeFixupFixedJump(pReNative, offFixup, off);
    86368598
    86378599    /* Free but don't flush the shadowed register. */
     8600    iemNativeVarRegisterRelease(pReNative, idxSseRes);
    86388601    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
    86398602    iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
    8640     iemNativeRegFreeTmp(pReNative, idxVarRegResAddr);
    86418603    iemNativeRegFreeTmp(pReNative, idxRegTmp);
    86428604
     
    86558617iemNativeEmitCallSseAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
    86568618{
     8619    /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
     8620    uint8_t const  idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
     8621                                                                 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
     8622    AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
     8623
    86578624    /*
    86588625     * Need to do the FPU preparation.
     
    86638630     * Do all the call setup and cleanup.
    86648631     */
    8665     off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS);
     8632    off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
    86668633
    86678634    /*
    8668      * Load the XState::x87 pointer.
     8635     * Load the MXCSR register into the first argument and mask out the current exception flags.
    86698636     */
    8670     off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, kIemNativeGstRegRef_X87, 0 /*idxRegInClass*/);
     8637    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
     8638    off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
    86718639
    86728640    /*
     
    86758643    off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
    86768644
     8645    /*
     8646     * The updated MXCSR is in the return register.
     8647     */
     8648    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
     8649
     8650#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
     8651    /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
     8652    off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
     8653#endif
     8654    iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
     8655
    86778656    return off;
    86788657}
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette