VirtualBox

Changeset 102765 in vbox for trunk/src/VBox/VMM


Ignore:
Timestamp:
Jan 4, 2024 7:01:46 PM (13 months ago)
Author:
vboxsync
Message:

VMM/IEM: Reworking native translation of IEM_MC_*PUSH* in prep for doing TLB lookups. bugreg:10371

Location:
trunk/src/VBox/VMM
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r102757 r102765  
    36673667        Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    36683668
     3669        /* It's not supposed to be allocated... */
    36693670        if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    36703671        {
     
    36753676             */
    36763677            /** @todo would be nice to know if preserving the register is in any way helpful. */
     3678            /* If the purpose is calculations, try duplicate the register value as
     3679               we'll be clobbering the shadow. */
    36773680            if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    36783681                && (  ~pReNative->Core.bmHstRegs
     
    36893692                idxReg = idxRegNew;
    36903693            }
    3691             else
     3694            /* If the current register matches the restrictions, go ahead and allocate
     3695               it for the caller. */
     3696            else if (fRegMask & RT_BIT_32(idxReg))
    36923697            {
    36933698                pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
     
    37043709                }
    37053710            }
     3711            /* Otherwise, allocate a register that satisfies the caller and transfer
     3712               the shadowing if compatible with the intended use.  (This basically
     3713               means the call wants a non-volatile register (RSP push/pop scenario).) */
     3714            else
     3715            {
     3716                Assert(fNoVolatileRegs);
     3717                uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
     3718                                                                    !fNoVolatileRegs
     3719                                                                 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
     3720                *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
     3721                if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     3722                {
     3723                    iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
     3724                    Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
     3725                           g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
     3726                           g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
     3727                }
     3728                else
     3729                    Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
     3730                           g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
     3731                           g_apszIemNativeHstRegNames[idxRegNew]));
     3732                idxReg = idxRegNew;
     3733            }
    37063734        }
    37073735        else
    37083736        {
     3737            /*
     3738             * Oops. Shadowed guest register already allocated!
     3739             *
     3740             * Allocate a new register, copy the value and, if updating, the
     3741             * guest shadow copy assignment to the new register.
     3742             */
    37093743            AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    37103744                      && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
     
    37123746                       idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
    37133747
    3714             /*
    3715              * Allocate a new register, copy the value and, if updating, the
    3716              * guest shadow copy assignment to the new register.
    3717              */
    37183748            /** @todo share register for readonly access. */
    37193749            uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
     
    1141711447                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32
    1141811448                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlat32PushU32SReg
    11419                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 16, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
     11449                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU16
    1142011450                   : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlat64PushU64
    1142111451                   : UINT64_C(0xc000b000a0009000) ));
     
    1144711477
    1144811478    /*
    11449      * Move/spill/flush stuff out of call-volatile registers, keeping whatever
    11450      * idxVarValue might be occupying.
    11451      *
    11452      * This is the easy way out. We could contain this to the tlb-miss branch
    11453      * by saving and restoring active stuff here.
    11454      */
    11455     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    11456     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarValue));
    11457 
    11458     /* For now, flush any shadow copy of the xSP register. */
    11459     iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(X86_GREG_xSP)));
    11460 
    11461     /*
    1146211479     * Define labels and allocate the result register (trying for the return
    1146311480     * register if we can).
     
    1146811485
    1146911486    /*
    11470      * First we try to go via the TLB.
    11471      */
    11472 //pReNative->pInstrBuf[off++] = 0xcc;
    11473     /** @todo later. */
    11474     RT_NOREF(cBitsVarAndFlat);
    11475 
    11476     /*
    11477      * Call helper to do the popping.
     11487     * First we calculate the new RSP and the effective stack pointer value.
     11488     * For 64-bit mode and flat 32-bit these two are the same.
     11489     */
     11490    uint8_t const cbMem       = RT_BYTE1(cBitsVarAndFlat) / 8;
     11491    uint8_t const cBitsFlat   = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
     11492    bool const    fSeg        = RT_BYTE3(cBitsVarAndFlat) != 0; RT_NOREF(fSeg);
     11493    uint8_t const idxRegRsp   = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
     11494                                                                kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
     11495    uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
     11496    if (cBitsFlat != 0)
     11497    {
     11498        Assert(idxRegEffSp == idxRegRsp);
     11499        Assert(cBitsFlat == 32 || cBitsFlat == 64);
     11500        Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
     11501        if (cBitsFlat == 64)
     11502            off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
     11503        else
     11504            off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
     11505    }
     11506    else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
     11507    {
     11508        Assert(idxRegEffSp != idxRegRsp);
     11509        uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
     11510                                                                     kIemNativeGstRegUse_ReadOnly);
     11511#ifdef RT_ARCH_AMD64
     11512        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 48);
     11513#else
     11514        PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
     11515#endif
     11516        off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
     11517        iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
     11518        uint32_t const offFixupJumpTo16BitSp = off;
     11519        off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
     11520        /* have_32bit_sp: */
     11521        off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
     11522        off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
     11523        uint32_t const offFixupJumpToEnd = off;
     11524        off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off /*8-bit suffices*/);
     11525
     11526        /** @todo Put snippet before TlbMiss. */
     11527        /* have_16bit_sp: */
     11528        iemNativeFixupFixedJump(pReNative, offFixupJumpTo16BitSp, off);
     11529#ifdef RT_ARCH_AMD64
     11530        off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
     11531        off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
     11532#else
     11533        /* sub regeff, regrsp, #cbMem */
     11534        pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
     11535        /* and regeff, regeff, #0xffff */
     11536        Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
     11537        pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0,  false /*f64Bit*/);
     11538        /* bfi regrsp, regeff, 0, 16 - moves bits 7:16 from idxVarReg to idxGstTmpReg bits 16:0. */
     11539        pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 15, 0, false /*f64Bit*/);
     11540#endif
     11541        /* sp_update_end: */
     11542        iemNativeFixupFixedJump(pReNative, offFixupJumpToEnd, off);
     11543        IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     11544    }
     11545
     11546    /*
     11547     * TlbMiss:
     11548     *
     11549     * Call helper to do the pushing.
    1147811550     */
    1147911551    iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
     
    1148511557#endif
    1148611558
     11559    /* Save variables in volatile registers. */
     11560    uint32_t const fHstRegsNotToSave = 0/*TlbState.getRegsNotToSave()*/
     11561                                     | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
     11562                                     | (  pReNative->Core.aVars[idxVarValue].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
     11563                                        ? RT_BIT_32(pReNative->Core.aVars[idxVarValue].idxReg) : 0);
     11564    off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
     11565
     11566
    1148711567    /* IEMNATIVE_CALL_ARG1_GREG = idxVarValue (first) */
    1148811568    off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarValue,
     
    1149511575    off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    1149611576
     11577    /* Restore variables and guest shadow registers to volatile registers. */
     11578    off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
     11579    off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, 0/*TlbState.getActiveRegsWithShadows()*/);
     11580
    1149711581    /* The value variable is implictly flushed. */
    1149811582    iemNativeVarFreeLocal(pReNative, idxVarValue);
    1149911583
     11584    /*
     11585     * TlbDone:
     11586     *
     11587     * Commit the new RSP value.
     11588     */
    1150011589    iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
     11590
     11591    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
     11592    iemNativeRegFreeTmp(pReNative, idxRegRsp);
     11593    if (idxRegEffSp != idxRegRsp)
     11594        iemNativeRegFreeTmp(pReNative, idxRegEffSp);
    1150111595
    1150211596    return off;
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r102737 r102765  
    824824                                                            IEMNATIVEGSTREG enmGstReg,
    825825                                                            IEMNATIVEGSTREGUSE enmIntendedUse = kIemNativeGstRegUse_ReadOnly,
    826                                                             bool fNoVoltileRegs = false);
     826                                                            bool fNoVolatileRegs = false);
    827827DECL_HIDDEN_THROW(uint8_t)  iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
    828828                                                                            IEMNATIVEGSTREG enmGstReg);
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerEmit.h

    r102756 r102765  
    978978 */
    979979DECL_INLINE_THROW(uint32_t)
    980 iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
     980iemNativeEmitLoadGprFromGpr16Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
    981981{
    982982#ifdef RT_ARCH_AMD64
    983983    /* movzx Gv,Ew */
    984     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
    985984    if ((iGprDst | iGprSrc) >= 8)
    986         pbCodeBuf[off++] = iGprDst < 8  ? X86_OP_REX_B
    987                          : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
    988                          :                X86_OP_REX_R;
    989     pbCodeBuf[off++] = 0x0f;
    990     pbCodeBuf[off++] = 0xb7;
    991     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
     985        pCodeBuf[off++] = iGprDst < 8  ? X86_OP_REX_B
     986                        : iGprSrc >= 8 ? X86_OP_REX_R | X86_OP_REX_B
     987                        :                X86_OP_REX_R;
     988    pCodeBuf[off++] = 0x0f;
     989    pCodeBuf[off++] = 0xb7;
     990    pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprDst & 7, iGprSrc & 7);
    992991
    993992#elif defined(RT_ARCH_ARM64)
    994993    /* and gprdst, gprsrc, #0xffff */
    995     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    996994# if 1
    997995    Assert(Armv8A64ConvertImmRImmS2Mask32(0x0f, 0) == UINT16_MAX);
    998     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
     996    pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x0f, 0, false /*f64Bit*/);
    999997# else
    1000998    Assert(Armv8A64ConvertImmRImmS2Mask64(0x4f, 0) == UINT16_MAX);
    1001     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
     999    pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprSrc, 0x4f, 0);
    10021000# endif
    10031001
     1002#else
     1003# error "port me"
     1004#endif
     1005    return off;
     1006}
     1007
     1008
     1009/**
     1010 * Emits a gprdst = gprsrc[15:0] load.
     1011 * @note Bits 63 thru 15 are cleared.
     1012 */
     1013DECL_INLINE_THROW(uint32_t)
     1014iemNativeEmitLoadGprFromGpr16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, uint8_t iGprSrc)
     1015{
     1016#ifdef RT_ARCH_AMD64
     1017    off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 4), off, iGprDst, iGprSrc);
     1018#elif defined(RT_ARCH_ARM64)
     1019    off = iemNativeEmitLoadGprFromGpr16Ex(iemNativeInstrBufEnsure(pReNative, off, 1), off, iGprDst, iGprSrc);
    10041020#else
    10051021# error "port me"
     
    25682584
    25692585
    2570 #ifdef RT_ARCH_AMD64
    25712586/**
    25722587 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
    2573  */
    2574 DECL_INLINE_THROW(uint32_t)
    2575 iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend)
    2576 {
    2577     /* sub gprdst, imm8/imm32 */
    2578     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    2579     if (iGprDst < 8)
    2580         pbCodeBuf[off++] = X86_OP_REX_W;
    2581     else
    2582         pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_B;
    2583     if (iSubtrahend < 128 && iSubtrahend >= -128)
    2584     {
    2585         pbCodeBuf[off++] = 0x83;
    2586         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
    2587         pbCodeBuf[off++] = (uint8_t)iSubtrahend;
    2588     }
    2589     else
    2590     {
    2591         pbCodeBuf[off++] = 0x81;
    2592         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
    2593         pbCodeBuf[off++] = RT_BYTE1(iSubtrahend);
    2594         pbCodeBuf[off++] = RT_BYTE2(iSubtrahend);
    2595         pbCodeBuf[off++] = RT_BYTE3(iSubtrahend);
    2596         pbCodeBuf[off++] = RT_BYTE4(iSubtrahend);
    2597     }
    2598     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    2599     return off;
    2600 }
    2601 #endif
     2588 *
     2589 * This will optimize using DEC/INC/whatever, so try avoid flag dependencies.
     2590 *
     2591 * @note Larger constants will require a temporary register.  Failing to specify
     2592 *       one when needed will trigger fatal assertion / throw.
     2593 */
     2594DECL_FORCE_INLINE_THROW(uint32_t)
     2595iemNativeEmitSubGprImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
     2596                         uint8_t iGprTmp = UINT8_MAX)
     2597{
     2598#ifdef RT_ARCH_AMD64
     2599    pCodeBuf[off++] = iGprDst >= 8 ? X86_OP_REX_W | X86_OP_REX_B : X86_OP_REX_W;
     2600    if (iSubtrahend == 1)
     2601    {
     2602        /* dec r/m64 */
     2603        pCodeBuf[off++] = 0xff;
     2604        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
     2605    }
     2606    else if (iSubtrahend == -1)
     2607    {
     2608        /* inc r/m64 */
     2609        pCodeBuf[off++] = 0xff;
     2610        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
     2611    }
     2612    else if ((int8_t)iSubtrahend == iSubtrahend)
     2613    {
     2614        /* sub r/m64, imm8 */
     2615        pCodeBuf[off++] = 0x83;
     2616        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
     2617        pCodeBuf[off++] = (uint8_t)iSubtrahend;
     2618    }
     2619    else if ((int32_t)iSubtrahend == iSubtrahend)
     2620    {
     2621        /* sub r/m64, imm32 */
     2622        pCodeBuf[off++] = 0x81;
     2623        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
     2624        pCodeBuf[off++] = RT_BYTE1((uint64_t)iSubtrahend);
     2625        pCodeBuf[off++] = RT_BYTE2((uint64_t)iSubtrahend);
     2626        pCodeBuf[off++] = RT_BYTE3((uint64_t)iSubtrahend);
     2627        pCodeBuf[off++] = RT_BYTE4((uint64_t)iSubtrahend);
     2628    }
     2629    else if (iGprTmp != UINT8_MAX)
     2630    {
     2631        off = iemNativeEmitLoadGprImmEx(pCodeBuf, off - 1, iGprTmp, (uint64_t)iSubtrahend);
     2632        /* sub r/m64, r64 */
     2633        pCodeBuf[off++] = X86_OP_REX_W | (iGprDst < 8 ? 0 : X86_OP_REX_B) | (iGprTmp < 8 ? 0 : X86_OP_REX_R);
     2634        pCodeBuf[off++] = 0x29;
     2635        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, iGprTmp & 7, iGprDst & 7);
     2636    }
     2637    else
     2638# ifdef IEM_WITH_THROW_CATCH
     2639        AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
     2640# else
     2641        AssertReleaseFailedStmt(off = UINT32_MAX);
     2642# endif
     2643
     2644#elif defined(RT_ARCH_ARM64)
     2645    uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
     2646    if (uAbsSubtrahend < 4096)
     2647    {
     2648        if (iSubtrahend >= 0)
     2649            pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend);
     2650        else
     2651            pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend);
     2652    }
     2653    else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
     2654    {
     2655        if (iSubtrahend >= 0)
     2656            pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
     2657                                                       true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
     2658        else
     2659            pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
     2660                                                       true /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
     2661    }
     2662    else if (iGprTmp != UINT8_MAX)
     2663    {
     2664        off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, iGprTmp, (uint64_t)iSubtrahend);
     2665        pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
     2666    }
     2667    else
     2668# ifdef IEM_WITH_THROW_CATCH
     2669        AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
     2670# else
     2671        AssertReleaseFailedStmt(off = UINT32_MAX);
     2672# endif
     2673
     2674#else
     2675# error "Port me"
     2676#endif
     2677    return off;
     2678}
     2679
     2680
     2681/**
     2682 * Emits a 64-bit GPR subtract with a signed immediate subtrahend.
     2683 *
     2684 * @note Larger constants will require a temporary register.  Failing to specify
     2685 *       one when needed will trigger fatal assertion / throw.
     2686 */
     2687DECL_INLINE_THROW(uint32_t)
     2688iemNativeEmitSubGprImm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int64_t iSubtrahend,
     2689                       uint8_t iGprTmp = UINT8_MAX)
     2690
     2691{
     2692#ifdef RT_ARCH_AMD64
     2693    off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 13), off, iGprDst, iSubtrahend, iGprTmp);
     2694#elif defined(RT_ARCH_ARM64)
     2695    off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 5), off, iGprDst, iSubtrahend, iGprTmp);
     2696#else
     2697# error "Port me"
     2698#endif
     2699    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2700    return off;
     2701}
    26022702
    26032703
     
    26692769    {
    26702770        off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
    2671         pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp);
     2771        pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
    26722772    }
    26732773    else
     
    26772777        AssertReleaseFailedStmt(off = UINT32_MAX);
    26782778# endif
     2779
     2780#else
     2781# error "Port me"
     2782#endif
     2783    return off;
     2784}
     2785
     2786
     2787/**
     2788 * Emits a 32-bit GPR subtract with a signed immediate subtrahend.
     2789 *
     2790 * @note ARM64: Larger constants will require a temporary register.  Failing to
     2791 *       specify one when needed will trigger fatal assertion / throw.
     2792 */
     2793DECL_INLINE_THROW(uint32_t)
     2794iemNativeEmitSubGpr32Imm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int32_t iSubtrahend,
     2795                         uint8_t iGprTmp = UINT8_MAX)
     2796
     2797{
     2798#ifdef RT_ARCH_AMD64
     2799    off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 7), off, iGprDst, iSubtrahend, iGprTmp);
     2800#elif defined(RT_ARCH_ARM64)
     2801    off = iemNativeEmitSubGprImmEx(iemNativeInstrBufEnsure(pReNative, off, 3), off, iGprDst, iSubtrahend, iGprTmp);
     2802#else
     2803# error "Port me"
     2804#endif
     2805    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     2806    return off;
     2807}
     2808
     2809
     2810/**
     2811 * Emits a 16-bit GPR subtract with a signed immediate subtrahend.
     2812 *
     2813 * This will optimize using DEC/INC/whatever and ARM64 will not set flags,
     2814 * so not suitable as a base for conditional jumps.
     2815 *
     2816 * @note ARM64: Will update the entire register.
     2817 * @note AMD64: May perhaps only update the lower 16 bits of the register.
     2818 * @note ARM64: Larger constants will require a temporary register.  Failing to
     2819 *       specify one when needed will trigger fatal assertion / throw.
     2820 */
     2821DECL_FORCE_INLINE_THROW(uint32_t)
     2822iemNativeEmitSubGpr16ImmEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprDst, int16_t iSubtrahend,
     2823                           uint8_t iGprTmp = UINT8_MAX)
     2824{
     2825#ifdef RT_ARCH_AMD64
     2826    pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     2827    if (iGprDst >= 8)
     2828        pCodeBuf[off++] = X86_OP_REX_B;
     2829    if (iSubtrahend == 1)
     2830    {
     2831        /* dec r/m16 */
     2832        pCodeBuf[off++] = 0xff;
     2833        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, iGprDst & 7);
     2834    }
     2835    else if (iSubtrahend == -1)
     2836    {
     2837        /* inc r/m16 */
     2838        pCodeBuf[off++] = 0xff;
     2839        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
     2840    }
     2841    else if ((int8_t)iSubtrahend == iSubtrahend)
     2842    {
     2843        /* sub r/m16, imm8 */
     2844        pCodeBuf[off++] = 0x83;
     2845        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
     2846        pCodeBuf[off++] = (uint8_t)iSubtrahend;
     2847    }
     2848    else
     2849    {
     2850        /* sub r/m16, imm16 */
     2851        pCodeBuf[off++] = 0x81;
     2852        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, iGprDst & 7);
     2853        pCodeBuf[off++] = RT_BYTE1((uint16_t)iSubtrahend);
     2854        pCodeBuf[off++] = RT_BYTE2((uint16_t)iSubtrahend);
     2855    }
     2856    RT_NOREF(iGprTmp);
     2857
     2858#elif defined(RT_ARCH_ARM64)
     2859    uint32_t uAbsSubtrahend = RT_ABS(iSubtrahend);
     2860    if (uAbsSubtrahend < 4096)
     2861    {
     2862        if (iSubtrahend >= 0)
     2863            pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
     2864        else
     2865            pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend, false /*f64Bit*/);
     2866    }
     2867    else if (uAbsSubtrahend <= 0xfff000 && !(uAbsSubtrahend & 0xfff))
     2868    {
     2869        if (iSubtrahend >= 0)
     2870            pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
     2871                                                       false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
     2872        else
     2873            pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(iGprDst, iGprDst, uAbsSubtrahend >> 12,
     2874                                                       false /*f64Bit*/, false /*fSetFlags*/, true /*fShift*/);
     2875    }
     2876    else if (iGprTmp != UINT8_MAX)
     2877    {
     2878        off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, iGprTmp, (uint32_t)iSubtrahend);
     2879        pCodeBuf[off++] = Armv8A64MkInstrSubReg(iGprDst, iGprDst, iGprTmp, false /*f64Bit*/);
     2880    }
     2881    else
     2882# ifdef IEM_WITH_THROW_CATCH
     2883        AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
     2884# else
     2885        AssertReleaseFailedStmt(off = UINT32_MAX);
     2886# endif
     2887    pCodeBuf[off++] = Armv8A64MkInstrAndImm(iGprDst, iGprDst, 15, 0, false /*f64Bit*/);
    26792888
    26802889#else
     
    51155324
    51165325#elif defined(RT_ARCH_ARM64)
    5117 
    5118     if (false)
    5119     {
    5120         /** @todo figure out how to work the immr / N:imms constants. */
    5121     }
    5122     else
    5123     {
    5124         /* ands Zr, iGprSrc, iTmpReg */
     5326    uint32_t uImmR     = 0;
     5327    uint32_t uImmNandS = 0;
     5328    if (Armv8A64ConvertMask64ToImmRImmS(fBits, &uImmNandS, &uImmR))
     5329    {
     5330        /* ands xzr, iGprSrc, #fBits */
     5331        uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     5332        pu32CodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR);
     5333    }
     5334    else
     5335    {
     5336        /* ands xzr, iGprSrc, iTmpReg */
    51255337        uint8_t const iTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBits);
    51265338        uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     
    51385350
    51395351/**
    5140  * Emits a test for any of the bits from @a fBits in the lower 8 bits of
     5352 * Emits a test for any of the bits from @a fBits in the lower 32 bits of
    51415353 * @a iGprSrc, setting CPU flags accordingly.
    51425354 *
     
    51465358 */
    51475359DECL_FORCE_INLINE_THROW(uint32_t)
     5360iemNativeEmitTestAnyBitsInGpr32Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint32_t fBits)
     5361{
     5362    Assert(fBits != 0);
     5363
     5364#ifdef RT_ARCH_AMD64
     5365    if (fBits <= UINT8_MAX)
     5366    {
     5367        /* test Eb, imm8 */
     5368        if (iGprSrc >= 4)
     5369            pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
     5370        pCodeBuf[off++] = 0xf6;
     5371        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
     5372        pCodeBuf[off++] = (uint8_t)fBits;
     5373    }
     5374    else
     5375    {
     5376        /* test Ev, imm32 */
     5377        if (iGprSrc >= 8)
     5378            pCodeBuf[off++] = X86_OP_REX_B;
     5379        pCodeBuf[off++] = 0xf7;
     5380        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprSrc & 7);
     5381        pCodeBuf[off++] = RT_BYTE1(fBits);
     5382        pCodeBuf[off++] = RT_BYTE2(fBits);
     5383        pCodeBuf[off++] = RT_BYTE3(fBits);
     5384        pCodeBuf[off++] = RT_BYTE4(fBits);
     5385    }
     5386
     5387#elif defined(RT_ARCH_ARM64)
     5388    /* ands xzr, src, #fBits */
     5389    uint32_t uImmR     = 0;
     5390    uint32_t uImmNandS = 0;
     5391    if (Armv8A64ConvertMask32ToImmRImmS(fBits, &uImmNandS, &uImmR))
     5392        pCodeBuf[off++] = Armv8A64MkInstrAndsImm(ARMV8_A64_REG_XZR, iGprSrc, uImmNandS, uImmR, false /*f64Bit*/);
     5393    else
     5394# ifdef IEM_WITH_THROW_CATCH
     5395        AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IEM_IPE_9));
     5396# else
     5397        AssertReleaseFailedStmt(off = UINT32_MAX);
     5398# endif
     5399
     5400#else
     5401# error "Port me!"
     5402#endif
     5403    return off;
     5404}
     5405
     5406
     5407
     5408/**
     5409 * Emits a test for any of the bits from @a fBits in the lower 8 bits of
     5410 * @a iGprSrc, setting CPU flags accordingly.
     5411 *
     5412 * @note For ARM64 this only supports @a fBits values that can be expressed
     5413 *       using the two 6-bit immediates of the ANDS instruction.  The caller
     5414 *       must make sure this is possible!
     5415 */
     5416DECL_FORCE_INLINE_THROW(uint32_t)
    51485417iemNativeEmitTestAnyBitsInGpr8Ex(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t iGprSrc, uint8_t fBits)
    51495418{
    51505419    Assert(fBits != 0);
    51515420
     5421#ifdef RT_ARCH_AMD64
    51525422    /* test Eb, imm8 */
    5153 #ifdef RT_ARCH_AMD64
    51545423    if (iGprSrc >= 4)
    51555424        pCodeBuf[off++] = iGprSrc >= 8 ? X86_OP_REX_B : X86_OP_REX;
     
    51595428
    51605429#elif defined(RT_ARCH_ARM64)
    5161     /* ands xzr, src, [tmp|#imm] */
     5430    /* ands xzr, src, #fBits */
    51625431    uint32_t uImmR     = 0;
    51635432    uint32_t uImmNandS = 0;
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette