VirtualBox

Changeset 105491 in vbox for trunk


Ignore:
Timestamp:
Jul 24, 2024 2:51:20 PM (6 months ago)
Author:
vboxsync
Message:

VMM/IEM: Implement native emitters for addps, addpd, mulps and subps, bugref:10652

Due to differences in rounding and default NaN behavior bs3-cpu-instr-4 doesn't pass
with this on an ARM host. This needs some tweaking in the testcase.

Location:
trunk/src/VBox/VMM
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstTwoByte0f.cpp.h

    r105488 r105491  
    749749    }
    750750}
     751
     752
     753/**
     754 * A body preprocessor variant of iemOpCommonSseFp_FullFull_To_Full in order
     755 * to support native emitters for certain instructions.
     756 */
     757#define SSE_FP_BODY_FullFull_To_Full(a_Ins, a_pImplExpr, a_fRegNativeArchs, a_fMemNativeArchs) \
     758        PFNIEMAIMPLFPSSEF2U128 const pfnU128 = (a_pImplExpr); \
     759        uint8_t bRm; IEM_OPCODE_GET_NEXT_U8(&bRm); \
     760        if (IEM_IS_MODRM_REG_MODE(bRm)) \
     761        { \
     762            /* \
     763             * XMM, XMM. \
     764             */ \
     765            IEM_MC_BEGIN(IEM_MC_F_NOT_286_OR_OLDER, 0); \
     766            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX_EX(fSse); \
     767            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT(); \
     768            IEM_MC_PREPARE_SSE_USAGE(); \
     769            IEM_MC_NATIVE_IF(a_fRegNativeArchs) { \
     770                IEM_MC_NATIVE_EMIT_2_EX(RT_CONCAT3(iemNativeEmit_,a_Ins,_rr_u128), IEM_GET_MODRM_REG(pVCpu, bRm), IEM_GET_MODRM_RM(pVCpu, bRm)); \
     771            } IEM_MC_NATIVE_ELSE() { \
     772                IEM_MC_LOCAL(X86XMMREG,             SseRes); \
     773                IEM_MC_ARG_LOCAL_REF(PX86XMMREG,    pSseRes,        SseRes,     0); \
     774                IEM_MC_ARG(PCX86XMMREG,             pSrc1,                      1); \
     775                IEM_MC_REF_XREG_XMM_CONST(pSrc1, IEM_GET_MODRM_REG(pVCpu, bRm)); \
     776                IEM_MC_ARG(PCX86XMMREG,             pSrc2,                      2); \
     777                IEM_MC_REF_XREG_XMM_CONST(pSrc2, IEM_GET_MODRM_RM(pVCpu, bRm)); \
     778                IEM_MC_CALL_SSE_AIMPL_3(pfnU128, pSseRes, pSrc1, pSrc2); \
     779                IEM_MC_STORE_XREG_XMM(IEM_GET_MODRM_REG(pVCpu, bRm), SseRes); \
     780            } IEM_MC_NATIVE_ENDIF(); \
     781            IEM_MC_ADVANCE_RIP_AND_FINISH(); \
     782            IEM_MC_END(); \
     783        } \
     784        else \
     785        { \
     786            /* \
     787             * XMM, [mem128]. \
     788             */ \
     789            IEM_MC_BEGIN(IEM_MC_F_NOT_286_OR_OLDER, 0); \
     790            IEM_MC_LOCAL(X86XMMREG,                 uSrc2); \
     791            IEM_MC_LOCAL(RTGCPTR,                   GCPtrEffSrc); \
     792            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffSrc, bRm, 0); \
     793            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX_EX(fSse); \
     794            IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT(); \
     795            IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(uSrc2, pVCpu->iem.s.iEffSeg, GCPtrEffSrc); \
     796            IEM_MC_PREPARE_SSE_USAGE(); \
     797            IEM_MC_NATIVE_IF(a_fRegNativeArchs) { \
     798                IEM_MC_NATIVE_EMIT_2_EX(RT_CONCAT3(iemNativeEmit_,a_Ins,_rv_u128), IEM_GET_MODRM_REG(pVCpu, bRm), uSrc2); \
     799            } IEM_MC_NATIVE_ELSE() { \
     800                IEM_MC_LOCAL(X86XMMREG,             SseRes); \
     801                IEM_MC_ARG_LOCAL_REF(PX86XMMREG,    pSseRes,        SseRes,     0); \
     802                IEM_MC_ARG(PCX86XMMREG,             pSrc1,                      1); \
     803                IEM_MC_ARG_LOCAL_REF(PCX86XMMREG,   pSrc2, uSrc2,               2); \
     804                IEM_MC_REF_XREG_XMM_CONST(pSrc1, IEM_GET_MODRM_REG(pVCpu, bRm)); \
     805                IEM_MC_CALL_SSE_AIMPL_3(pfnU128, pSseRes, pSrc1, pSrc2); \
     806                IEM_MC_STORE_XREG_XMM(IEM_GET_MODRM_REG(pVCpu, bRm), SseRes); \
     807            } IEM_MC_NATIVE_ENDIF(); \
     808            IEM_MC_ADVANCE_RIP_AND_FINISH(); \
     809            IEM_MC_END(); \
     810        } void(0)
    751811
    752812
     
    53725432{
    53735433    IEMOP_MNEMONIC2(RM, ADDPS, addps, Vps, Wps, DISOPTYPE_HARMLESS, 0);
    5374     return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_addps_u128);
     5434    SSE_FP_BODY_FullFull_To_Full(addps, iemAImpl_addps_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64);
    53755435}
    53765436
     
    53805440{
    53815441    IEMOP_MNEMONIC2(RM, ADDPD, addpd, Vpd, Wpd, DISOPTYPE_HARMLESS, 0);
    5382     return FNIEMOP_CALL_1(iemOpCommonSse2Fp_FullFull_To_Full, iemAImpl_addpd_u128);
     5442    SSE_FP_BODY_FullFull_To_Full(addpd, iemAImpl_addpd_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64);
    53835443}
    53845444
     
    54045464{
    54055465    IEMOP_MNEMONIC2(RM, MULPS, mulps, Vps, Wps, DISOPTYPE_HARMLESS, 0);
    5406     return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_mulps_u128);
     5466    SSE_FP_BODY_FullFull_To_Full(mulps, iemAImpl_mulps_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64);
    54075467}
    54085468
     
    55435603{
    55445604    IEMOP_MNEMONIC2(RM, SUBPS, subps, Vps, Wps, DISOPTYPE_HARMLESS, 0);
    5545     return FNIEMOP_CALL_1(iemOpCommonSseFp_FullFull_To_Full, iemAImpl_subps_u128);
     5605    SSE_FP_BODY_FullFull_To_Full(subps, iemAImpl_subps_u128, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64, RT_ARCH_VAL_AMD64 | RT_ARCH_VAL_ARM64);
    55465606}
    55475607
  • trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veEmit-x86.h

    r105484 r105491  
    25732573IEMNATIVE_NATIVE_EMIT_PMOV_S_Z_U128(pmovsxdq, false, kArmv8InstrShiftSz_U32, 0x25);
    25742574
     2575
     2576/**
     2577 * Updates the MXCSR exception flags, raising any unmasked exceptions.
     2578 */
     2579DECL_INLINE_THROW(uint32_t)
     2580iemNativeEmitMxcsrUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, uint8_t const idxSimdGstRegDst, uint8_t const idxSimdRegRes)
     2581{
     2582    uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ForUpdate);
     2583    uint8_t const idxRegMxCsrXcptFlags = iemNativeRegAllocTmp(pReNative, &off);
     2584    uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
     2585
     2586#ifdef RT_ARCH_AMD64
     2587    PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     2588
     2589    /* stmxcsr */
     2590    if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
     2591        pbCodeBuf[off++] = X86_OP_REX_B;
     2592    pbCodeBuf[off++] = 0x0f;
     2593    pbCodeBuf[off++] = 0xae;
     2594    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
     2595    pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2596    pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2597    pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2598    pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2599
     2600    /* Load MXCSR, mask everything except status flags and or into guest MXCSR. */
     2601    off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2602
     2603    /* Store the flags in the MXCSR xcpt flags register. */
     2604    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsrXcptFlags, idxRegTmp);
     2605    off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegMxCsrXcptFlags, X86_MXCSR_XCPT_FLAGS);
     2606
     2607    /* Clear the status flags in the temporary copy and write it back to MXCSR. */
     2608    off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, ~X86_MXCSR_XCPT_FLAGS);
     2609    off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2610
     2611    pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     2612
     2613    /* ldmxcsr */
     2614    if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
     2615        pbCodeBuf[off++] = X86_OP_REX_B;
     2616    pbCodeBuf[off++] = 0x0f;
     2617    pbCodeBuf[off++] = 0xae;
     2618    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
     2619    pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2620    pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2621    pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2622    pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
     2623
     2624#elif defined(RT_ARCH_ARM64)
     2625    PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     2626    pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegMxCsrXcptFlags, ARMV8_AARCH64_SYSREG_FPSR);
     2627    pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);      /* Clear FPSR for next instruction. */
     2628    pu32CodeBuf[off++] = Armv8A64MkInstrUxtb(idxRegMxCsrXcptFlags, idxRegMxCsrXcptFlags);       /* Ensure there are only the exception flags set (clears QC, and any possible NZCV flags). */
     2629
     2630    /*
     2631     * The exception flags layout differs between MXCSR and FPSR of course:
     2632     *
     2633     * Bit  FPSR        MXCSR
     2634     *  0   IOC  ------> IE
     2635     *
     2636     *  1   DZC  ----    DE <-+
     2637     *               \        |
     2638     *  2   OFC  ---  -> ZE   |
     2639     *              \         |
     2640     *  3   UFC  --  --> OE   |
     2641     *             \          |
     2642     *  4   IXC  -  ---> UE   |
     2643     *            \           |
     2644     *  5          ----> PE   |
     2645     *  6                     |
     2646     *  7   IDC --------------+
     2647     */
     2648    pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegMxCsrXcptFlags, 1);    /* Shift the block of flags starting at DZC to the least significant bits. */
     2649    pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegMxCsrXcptFlags, idxRegTmp, 2, 4);    /* Insert DZC, OFC, UFC and IXC into the MXCSR positions. */
     2650    pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegMxCsrXcptFlags, 6);    /* Shift IDC (now at 6) into the LSB. */
     2651    pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegMxCsrXcptFlags, idxRegTmp, 1, 1);    /* Insert IDC into the MXCSR positions. */
     2652#else
     2653# error "Port me"
     2654#endif
     2655
     2656    /*
     2657     * If PE is set together with OE/UE and neither are masked
     2658     * PE needs to be cleared, because on real hardware
     2659     * an exception is generated with only OE/UE being set,
     2660     * but because we mask all exceptions PE will get set as well.
     2661     */
     2662    /** @todo On ARM we can combine the load+and into one and instruction. */
     2663    /** @todo r=aeichner Can this be done more optimal? */
     2664    uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
     2665    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsrXcptFlags);
     2666    off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_OE | X86_MXCSR_UE);
     2667    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp2, idxRegMxCsr);
     2668    off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_MXCSR_OM | X86_MXCSR_UM);
     2669    off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp2, X86_MXCSR_XCPT_MASK_SHIFT);
     2670    off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp2, idxRegTmp2, false /*f64Bit*/);
     2671    off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp2, idxRegTmp);
     2672    off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp2, X86_MXCSR_OE | X86_MXCSR_UE);
     2673
     2674    uint32_t offFixup = off;
     2675    off = iemNativeEmitJzToFixed(pReNative, off, off);
     2676    off = iemNativeEmitBitClearInGpr32(pReNative, off, idxRegMxCsrXcptFlags, X86_MXCSR_PE_BIT);
     2677    iemNativeFixupFixedJump(pReNative, offFixup, off);
     2678    iemNativeRegFreeTmp(pReNative, idxRegTmp2);
     2679
     2680
     2681    /* Set the MXCSR flags now. */
     2682    off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegMxCsrXcptFlags);
     2683
     2684    /*
     2685     * Make sure we don't have any outstanding guest register writes as we may
     2686     * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
     2687     */
     2688    off = iemNativeRegFlushPendingWrites(pReNative, off);
     2689
     2690#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     2691    off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
     2692#else
     2693    RT_NOREF(idxInstr);
     2694#endif
     2695
     2696    /* Check whether an exception is pending and only update the guest SIMD register if it isn't. */
     2697    /* mov tmp, varmxcsr */
     2698    off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
     2699    /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
     2700    off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
     2701    /* tmp = ~tmp */
     2702    off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
     2703    /* tmp &= mxcsr */
     2704    off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegMxCsrXcptFlags, idxRegTmp);
     2705    off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegMxCsrXcptFlags, X86_MXCSR_XCPT_FLAGS,
     2706                                                         kIemNativeLabelType_RaiseSseAvxFpRelated);
     2707
     2708    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
     2709                                                                          kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
     2710
     2711    /* Move result to guest SIMD register (at this point there is no exception being raised). */
     2712    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegRes);
     2713
     2714    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2715    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
     2716    iemNativeRegFreeTmp(pReNative, idxRegTmp);
     2717    iemNativeRegFreeTmp(pReNative, idxRegMxCsrXcptFlags);
     2718    iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
     2719    return off;
     2720}
     2721
     2722
     2723/**
     2724 * Common emitter for packed floating point instructions with 3 operands - register, register variant.
     2725 */
     2726DECL_INLINE_THROW(uint32_t) iemNativeEmitSimdFp3OpCommon_rr_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr,
     2727                                                                 uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc,
     2728#ifdef RT_ARCH_AMD64
     2729                                                                 uint8_t const bPrefixX86, uint8_t const bOpcX86
     2730#elif defined(RT_ARCH_ARM64)
     2731                                                                 ARMV8INSTRVECFPOP const enmFpOp, ARMV8INSTRVECFPSZ const enmFpSz
     2732#endif
     2733                                                                 )
     2734{
     2735    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
     2736                                                                         kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
     2737    uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegSrc),
     2738                                                                         kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
     2739
     2740#ifdef RT_ARCH_AMD64
     2741    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst);
     2742    PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     2743    if (bPrefixX86 != 0)
     2744        pCodeBuf[off++] = bPrefixX86;
     2745    if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || idxSimdRegSrc >= 8)
     2746        pCodeBuf[off++] =   (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
     2747                          | (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
     2748    pCodeBuf[off++] = 0x0f;
     2749    pCodeBuf[off++] = bOpcX86;
     2750    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, idxSimdRegSrc & 7);
     2751#elif defined(RT_ARCH_ARM64)
     2752    PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     2753    pCodeBuf[off++] = Armv8A64MkVecInstrFp3Op(enmFpOp, enmFpSz, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst, idxSimdRegSrc);
     2754#else
     2755# error "Port me"
     2756#endif
     2757    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
     2758    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
     2759    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2760    return iemNativeEmitMxcsrUpdate(pReNative, off, idxInstr, idxSimdGstRegDst, IEMNATIVE_SIMD_REG_FIXED_TMP0);
     2761}
     2762
     2763
     2764/**
     2765 * Common emitter for packed floating point instructions with 3 operands - register, local variable variant.
     2766 */
     2767DECL_INLINE_THROW(uint32_t) iemNativeEmitSimdFp3OpCommon_rv_u128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr,
     2768                                                                 uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc,
     2769#ifdef RT_ARCH_AMD64
     2770                                                                 uint8_t const bPrefixX86, uint8_t const bOpcX86
     2771#elif defined(RT_ARCH_ARM64)
     2772                                                                 ARMV8INSTRVECFPOP const enmFpOp, ARMV8INSTRVECFPSZ const enmFpSz
     2773#endif
     2774                                                                 )
     2775{
     2776    uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(idxSimdGstRegDst),
     2777                                                                         kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
     2778    uint8_t const idxSimdRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxVarSrc, &off, true /*fInitialized*/);
     2779
     2780#ifdef RT_ARCH_AMD64
     2781    off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst);
     2782    PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     2783    if (bPrefixX86 != 0)
     2784        pCodeBuf[off++] = bPrefixX86;
     2785    if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 || idxSimdRegSrc >= 8)
     2786        pCodeBuf[off++] =   (idxSimdRegSrc >= 8 ? X86_OP_REX_B : 0)
     2787                          | (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
     2788    pCodeBuf[off++] = 0x0f;
     2789    pCodeBuf[off++] = bOpcX86;
     2790    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7, idxSimdRegSrc & 7);
     2791#elif defined(RT_ARCH_ARM64)
     2792    PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     2793    pCodeBuf[off++] = Armv8A64MkVecInstrFp3Op(enmFpOp, enmFpSz, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdRegDst, idxSimdRegSrc);
     2794#else
     2795# error "Port me"
     2796#endif
     2797    iemNativeVarRegisterRelease(pReNative, idxVarSrc);
     2798    iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
     2799    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2800    return iemNativeEmitMxcsrUpdate(pReNative, off, idxInstr, idxSimdGstRegDst, IEMNATIVE_SIMD_REG_FIXED_TMP0);
     2801}
     2802
     2803
     2804/**
     2805 * Common emitter for packed floating point instructions with 3 operands.
     2806 */
     2807#ifdef RT_ARCH_AMD64
     2808# define IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(a_Instr, a_enmArmOp, a_ArmElemSz, a_bPrefixX86, a_bOpcX86) \
     2809    DECL_FORCE_INLINE_THROW(uint32_t) \
     2810    RT_CONCAT3(iemNativeEmit_,a_Instr,_rr_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \
     2811                                                uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc) \
     2812    { \
     2813        return iemNativeEmitSimdFp3OpCommon_rr_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxSimdGstRegSrc, \
     2814                                                    a_bPrefixX86, a_bOpcX86); \
     2815    } \
     2816    DECL_FORCE_INLINE_THROW(uint32_t) \
     2817    RT_CONCAT3(iemNativeEmit_,a_Instr,_rv_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \
     2818                                                uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc) \
     2819    { \
     2820        return iemNativeEmitSimdFp3OpCommon_rv_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxVarSrc, \
     2821                                                    a_bPrefixX86, a_bOpcX86); \
     2822    } \
     2823    typedef int ignore_semicolon
     2824#elif defined(RT_ARCH_ARM64)
     2825# define IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(a_Instr, a_enmArmOp, a_ArmElemSz, a_bPrefixX86, a_bOpcX86) \
     2826    DECL_FORCE_INLINE_THROW(uint32_t) \
     2827    RT_CONCAT3(iemNativeEmit_,a_Instr,_rr_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \
     2828                                                uint8_t const idxSimdGstRegDst, uint8_t const idxSimdGstRegSrc) \
     2829    { \
     2830        return iemNativeEmitSimdFp3OpCommon_rr_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxSimdGstRegSrc, \
     2831                                                    a_enmArmOp, a_ArmElemSz); \
     2832    } \
     2833    DECL_FORCE_INLINE_THROW(uint32_t) \
     2834    RT_CONCAT3(iemNativeEmit_,a_Instr,_rv_u128)(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxInstr, \
     2835                                                uint8_t const idxSimdGstRegDst, uint8_t const idxVarSrc) \
     2836    { \
     2837        return iemNativeEmitSimdFp3OpCommon_rv_u128(pReNative, off, idxInstr, idxSimdGstRegDst, idxVarSrc, \
     2838                                                    a_enmArmOp, a_ArmElemSz); \
     2839    } \
     2840    typedef int ignore_semicolon
     2841#else
     2842# error "Port me"
     2843#endif
     2844
     2845
     2846IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(mulps, kArmv8VecInstrFpOp_Mul, kArmv8VecInstrFpSz_4x_Single, 0, 0x59);
     2847IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(addps, kArmv8VecInstrFpOp_Add, kArmv8VecInstrFpSz_4x_Single, 0, 0x58);
     2848IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(addpd, kArmv8VecInstrFpOp_Add, kArmv8VecInstrFpSz_2x_Double, X86_OP_PRF_SIZE_OP, 0x58);
     2849IEMNATIVE_NATIVE_EMIT_FP_3OP_U128(subps, kArmv8VecInstrFpOp_Sub, kArmv8VecInstrFpSz_4x_Single, 0, 0x5c);
     2850
    25752851#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
    25762852
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r105318 r105491  
    59845984
    59855985/*********************************************************************************************************************************
     5986*   Bitfield manipulation                                                                                                        *
     5987*********************************************************************************************************************************/
     5988
     5989/**
     5990 * Emits code for clearing.
     5991 */
     5992DECL_FORCE_INLINE(uint32_t)
     5993iemNativeEmitBitClearInGpr32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const iGpr, uint8_t iBit)
     5994{
     5995    Assert(iBit < 32);
     5996
     5997#if defined(RT_ARCH_AMD64)
     5998    /* btr r32, imm8 */
     5999    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
     6000
     6001    if (iGpr >= 8)
     6002        pbCodeBuf[off++] = X86_OP_REX_B;
     6003    pbCodeBuf[off++] = 0x0f;
     6004    pbCodeBuf[off++] = 0xba;
     6005    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 6, iGpr & 7);
     6006    pbCodeBuf[off++] = iBit;
     6007#elif defined(RT_ARCH_ARM64)
     6008    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     6009
     6010    pu32CodeBuf[off++] = Armv8A64MkInstrBfc(iGpr, iBit /*offFirstBit*/, 1 /*cBits*/, true /*f64Bit*/);
     6011#else
     6012# error "Port me"
     6013#endif
     6014
     6015    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     6016    return off;
     6017}
     6018
     6019
     6020/*********************************************************************************************************************************
    59866021*   Compare and Testing                                                                                                          *
    59876022*********************************************************************************************************************************/
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette