VirtualBox

Ignore:
Timestamp:
Jul 24, 2024 2:49:29 PM (6 months ago)
Author:
vboxsync
Message:

VMM/IEM: Basic infrastructure to natively recompile SIMD floating point instructions, bugref:10652

SIMD floating point operation behavior depends on the guest MXCSR value which needs to be written to the
host's floating point control register (MXCSR on x86, FPCR on arm64 which needs conversion) and needs to be
restored to the host's value when the TB finished execution to avoid inconsistencies in case the guest
changes MXCSR. The ARM implementation does not conform to the x86 behavior because default NaN values have
the sign bit clear on arm64 while they are set on x86. There are rounding differences as well and earlier
ARMv8 revisions don't support the FPCR.FIZ and FPCR.AH features. Should still work out as long as the guest
doesn't try to do funny stuff.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h

    r105489 r105490  
    7979#endif
    8080
     81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
     82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
     83#endif
    8184
    8285
     
    33653368                                                    | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
    33663369                                                    | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
     3370# endif
     3371
     3372# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
     3373    /* Mark the host floating point control register as not synced if MXCSR is modified. */
     3374    if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
     3375        pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
    33673376# endif
    33683377#endif
     
    87568765DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
    87578766{
    8758     /** @todo this needs a lot more work later. */
     8767#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
    87598768    RT_NOREF(pReNative, fForChange);
     8769#else
     8770    if (   !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
     8771        && fForChange)
     8772    {
     8773# ifdef RT_ARCH_AMD64
     8774
     8775        /* Need to save the host MXCSR the first time, and clear the exception flags. */
     8776        if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
     8777        {
     8778            PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     8779
     8780            /* stmxcsr */
     8781            if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
     8782                pbCodeBuf[off++] = X86_OP_REX_B;
     8783            pbCodeBuf[off++] = 0x0f;
     8784            pbCodeBuf[off++] = 0xae;
     8785            pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
     8786            pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
     8787            pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
     8788            pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
     8789            pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
     8790            IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     8791
     8792            pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
     8793        }
     8794
     8795        uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
     8796        uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
     8797
     8798        /*
     8799         * Mask any exceptions and clear the exception status and save into MXCSR,
     8800         * taking a detour through memory here because ldmxcsr/stmxcsr don't support
     8801         * a register source/target (sigh).
     8802         */
     8803        off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
     8804        off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
     8805        off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
     8806        off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     8807
     8808        PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     8809
     8810        /* ldmxcsr */
     8811        if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
     8812            pbCodeBuf[off++] = X86_OP_REX_B;
     8813        pbCodeBuf[off++] = 0x0f;
     8814        pbCodeBuf[off++] = 0xae;
     8815        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
     8816        pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     8817        pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     8818        pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     8819        pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     8820        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     8821
     8822        iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
     8823        iemNativeRegFreeTmp(pReNative, idxRegTmp);
     8824
     8825# elif defined(RT_ARCH_ARM64)
     8826        uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
     8827
     8828        /* Need to save the host floating point control register the first time, clear FPSR. */
     8829        if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
     8830        {
     8831            PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
     8832            pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
     8833            pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
     8834            off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
     8835            pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
     8836        }
     8837
     8838        /*
     8839         * Translate MXCSR to FPCR.
     8840         *
     8841         * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
     8842         * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
     8843         * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
     8844         * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
     8845         */
     8846        /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
     8847         *        and implement alternate handling if FEAT_AFP is present. */
     8848        uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
     8849
     8850        PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
     8851
     8852        /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
     8853        pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
     8854
     8855        /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
     8856        pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
     8857        pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp,              idxRegTmp, X86_MXCSR_FZ_BIT);
     8858        pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
     8859        pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
     8860
     8861        /*
     8862         * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
     8863         *
     8864         * Value    MXCSR   FPCR
     8865         *   0       RN      RN
     8866         *   1       R-      R+
     8867         *   2       R+      R-
     8868         *   3       RZ      RZ
     8869         *
     8870         * Conversion can be achieved by switching bit positions
     8871         */
     8872        pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
     8873        pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
     8874        pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
     8875        pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
     8876
     8877        /* Write the value to FPCR. */
     8878        pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
     8879
     8880        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     8881        iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
     8882        iemNativeRegFreeTmp(pReNative, idxRegTmp);
     8883# else
     8884#  error "Port me"
     8885# endif
     8886        pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
     8887    }
     8888#endif
    87608889    return off;
    87618890}
     
    998010109    AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
    998110110
     10111#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
    998210112    /*
    998310113     * Need to do the FPU preparation.
    998410114     */
    998510115    off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
     10116#endif
    998610117
    998710118    /*
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette