VirtualBox

Ignore:
Timestamp:
Aug 14, 2024 1:57:57 PM (4 months ago)
Author:
vboxsync
Message:

VMM/IEM,TM: Do full-TB looping. Redid timer polling in the recompiler. Rewrote the Blt_CheckIrq code, eliminating a conditional. Fixed some TLB related assertions. Moved some IEMCPU members around in hope of better cache-locality. bugref:10656

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp

    r105271 r105673  
    203203
    204204/**
     205 * Worker for the CheckIrq, CheckTimers and CheckTimersAndIrq builtins below.
     206 */
     207template<bool const a_fCheckTimers, bool const a_fCheckIrqs>
     208DECL_FORCE_INLINE(uint32_t) iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off)
     209{
     210    uint8_t const         idxEflReg  = !a_fCheckIrqs ? UINT8_MAX
     211                                     : iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
     212                                                                       kIemNativeGstRegUse_ReadOnly);
     213    uint8_t const         idxTmpReg1 = iemNativeRegAllocTmp(pReNative, &off);
     214    uint8_t const         idxTmpReg2 = a_fCheckIrqs ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
     215    PIEMNATIVEINSTR const pCodeBuf   = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 72 : 32);
     216
     217    /*
     218     * First we decrement the timer poll counter, if so desired.
     219     */
     220    if (a_fCheckTimers)
     221    {
     222# ifdef RT_ARCH_AMD64
     223        /* dec  [rbx + cIrqChecksTillNextPoll] */
     224        pCodeBuf[off++] = 0xff;
     225        off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, RT_UOFFSETOF(VMCPU, iem.s.cIrqChecksTillNextPoll));
     226
     227        /* jz   ReturnBreakFF */
     228        off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_ReturnBreakFF, kIemNativeInstrCond_e);
     229
     230# elif defined(RT_ARCH_ARM64)
     231        AssertCompile(RTASSERT_OFFSET_OF(VMCPU, iem.s.cIrqChecksTillNextPoll) < _4K * sizeof(uint32_t));
     232        off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxTmpReg1, RT_UOFFSETOF(VMCPU, iem.s.cIrqChecksTillNextPoll));
     233        pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxTmpReg1, idxTmpReg1, 1, false /*f64Bit*/);
     234        off = iemNativeEmitStoreGprToVCpuU32Ex(pCodeBuf, off, idxTmpReg1, RT_UOFFSETOF(VMCPU, iem.s.cIrqChecksTillNextPoll));
     235
     236        /* cbz reg1, ReturnBreakFF */
     237        off = iemNativeEmitTestIfGprIsZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg1, false /*f64Bit*/,
     238                                                      kIemNativeLabelType_ReturnBreakFF);
     239
     240# else
     241#  error "port me"
     242# endif
     243    }
     244
     245    /*
     246     * Second, check forced flags, if so desired.
     247     *
     248     * We OR them together to save a conditional.  A trick here is that the
     249     * two IRQ flags are unused in the global flags, so we can still use the
     250     * resulting value to check for suppressed interrupts.
     251     */
     252    if (a_fCheckIrqs)
     253    {
     254        /* Load VMCPU::fLocalForcedActions first and mask it.  We can simplify the
     255           masking by ASSUMING none of the unwanted flags are located above bit 30.  */
     256        uint64_t const fUnwantedCpuFFs = VMCPU_FF_PGM_SYNC_CR3
     257                                       | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
     258                                       | VMCPU_FF_TLB_FLUSH
     259                                       | VMCPU_FF_UNHALT;
     260        AssertCompile(fUnwantedCpuFFs < RT_BIT_64(31));
     261        off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg1, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
     262# if defined(RT_ARCH_AMD64)
     263        /* and reg1, ~fUnwantedCpuFFs */
     264        pCodeBuf[off++] = idxTmpReg1 >= 8 ? X86_OP_REX_B | X86_OP_REX_W : X86_OP_REX_W;
     265        pCodeBuf[off++] = 0x81;
     266        pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, idxTmpReg1 & 7);
     267        *(uint32_t *)&pCodeBuf[off] = ~(uint32_t)fUnwantedCpuFFs;
     268        off += 4;
     269
     270# else
     271        off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpReg2, ~fUnwantedCpuFFs);
     272        off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, idxTmpReg1, idxTmpReg2);
     273# endif
     274
     275        /* OR in VM::fGlobalForcedActions.  We access the member via pVCpu.
     276           No need to mask anything here.  Unfortunately, it's a 32-bit
     277           variable, so we can't OR it directly on x86. */
     278        AssertCompile(VM_FF_ALL_MASK == UINT32_MAX);
     279        intptr_t const offGlobalForcedActions = (intptr_t)&pReNative->pVCpu->CTX_SUFF(pVM)->fGlobalForcedActions
     280                                              - (intptr_t)pReNative->pVCpu;
     281        Assert((int32_t)offGlobalForcedActions == offGlobalForcedActions);
     282
     283# ifdef RT_ARCH_AMD64
     284        if (idxTmpReg2 >= 8)
     285            pCodeBuf[off++] = X86_OP_REX_R;
     286        pCodeBuf[off++] = 0x8b; /* mov */
     287        off = iemNativeEmitGprByVCpuSignedDisp(pCodeBuf, off, idxTmpReg2, (int32_t)offGlobalForcedActions);
     288
     289        /* or reg1, reg2 */
     290        off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, idxTmpReg1, idxTmpReg2);
     291
     292        /* jz nothing_pending */
     293        uint32_t const offFixup1 = off;
     294        off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 64, kIemNativeInstrCond_e);
     295
     296# elif defined(RT_ARCH_ARM64)
     297        off = iemNativeEmitGprBySignedVCpuLdStEx(pCodeBuf, off, idxTmpReg2, (int32_t)offGlobalForcedActions,
     298                                                 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
     299        off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, idxTmpReg1, idxTmpReg2);
     300
     301        /* cbz nothing_pending */
     302        uint32_t const offFixup1 = off;
     303        off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(pCodeBuf, off, idxTmpReg1, true /*f64Bit*/,
     304                                                                   false /*fJmpIfNotZero*/, off + 16);
     305# else
     306#  error "port me"
     307# endif
     308
     309        /* More than just IRQ FFs pending? */
     310        AssertCompile((VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC) == 3);
     311        /* cmp reg1, 3 */
     312        off = iemNativeEmitCmpGprWithImmEx(pCodeBuf, off, idxTmpReg1, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC);
     313        /* ja ReturnBreakFF */
     314        off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_ReturnBreakFF, kIemNativeInstrCond_nbe);
     315
     316        /*
     317         * Okay, we've only got pending IRQ related FFs: Can we dispatch IRQs?
     318         *
     319         * ASSUME that the shadow flags are cleared when they ought to be cleared,
     320         * so we can skip the RIP check.
     321         */
     322        AssertCompile(CPUMCTX_INHIBIT_SHADOW < RT_BIT_32(31));
     323        /* reg1 = efl & (IF | INHIBIT_SHADOW) */
     324        off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxTmpReg1, idxEflReg, X86_EFL_IF | CPUMCTX_INHIBIT_SHADOW);
     325        /* reg1 ^= IF */
     326        off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg1, X86_EFL_IF);
     327
     328# ifdef RT_ARCH_AMD64
     329        /* jz   ReturnBreakFF */
     330        off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_ReturnBreakFF, kIemNativeInstrCond_e);
     331
     332# elif defined(RT_ARCH_ARM64)
     333        /* cbz  reg1, ReturnBreakFF */
     334        off = iemNativeEmitTestIfGprIsZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg1, false /*f64Bit*/,
     335                                                      kIemNativeLabelType_ReturnBreakFF);
     336# else
     337#  error "port me"
     338# endif
     339        /*
     340         * nothing_pending:
     341         */
     342        iemNativeFixupFixedJump(pReNative, offFixup1, off);
     343    }
     344
     345    IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
     346
     347    /*
     348     * Cleanup.
     349     */
     350    iemNativeRegFreeTmp(pReNative, idxTmpReg1);
     351    if (a_fCheckIrqs)
     352    {
     353        iemNativeRegFreeTmp(pReNative, idxTmpReg2);
     354        iemNativeRegFreeTmp(pReNative, idxEflReg);
     355    }
     356    else
     357    {
     358        Assert(idxTmpReg2 == UINT8_MAX);
     359        Assert(idxEflReg == UINT8_MAX);
     360    }
     361
     362    return off;
     363}
     364
     365
     366/**
    205367 * Built-in function that checks for pending interrupts that can be delivered or
    206368 * forced action flags.
     
    212374IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
    213375{
    214     RT_NOREF(pCallEntry);
    215 
    216376    BODY_FLUSH_PENDING_WRITES();
    217 
    218     /* It's too convenient to use iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet below
    219        and I'm too lazy to create a 'Fixed' version of that one. */
    220     uint32_t const idxLabelVmCheck = iemNativeLabelCreate(pReNative, kIemNativeLabelType_CheckIrq,
    221                                                           UINT32_MAX, pReNative->uCheckIrqSeqNo++);
    222 
    223     /* Again, we need to load the extended EFLAGS before we actually need them
    224        in case we jump.  We couldn't use iemNativeRegAllocTmpForGuestReg if we
    225        loaded them inside the check, as the shadow state would not be correct
    226        when the code branches before the load.  Ditto PC. */
    227     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    228                                                               kIemNativeGstRegUse_ReadOnly);
    229 
    230     uint8_t const idxPcReg  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ReadOnly);
    231 
    232     uint8_t idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    233 
    234     /*
    235      * Start by checking the local forced actions of the EMT we're on for IRQs
    236      * and other FFs that needs servicing.
    237      */
    238     /** @todo this isn't even close to the NMI and interrupt conditions in EM! */
    239     /* Load FFs in to idxTmpReg and AND with all relevant flags. */
    240     off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
    241     off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
    242                                    VMCPU_FF_ALL_MASK & ~(  VMCPU_FF_PGM_SYNC_CR3
    243                                                          | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
    244                                                          | VMCPU_FF_TLB_FLUSH
    245                                                          | VMCPU_FF_UNHALT ),
    246                                    true /*fSetFlags*/);
    247     /* If we end up with ZERO in idxTmpReg there is nothing to do.*/
    248     uint32_t const offFixupJumpToVmCheck1 = off;
    249     off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices */);
    250 
    251     /* Some relevant FFs are set, but if's only APIC or/and PIC being set,
    252        these may be supressed by EFLAGS.IF or CPUMIsInInterruptShadow. */
    253     off = iemNativeEmitAndGprByImm(pReNative, off, idxTmpReg,
    254                                    ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC), true /*fSetFlags*/);
    255     /* Return VINF_IEM_REEXEC_BREAK if other FFs are set. */
    256     off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakFF);
    257 
    258     /* So, it's only interrupt releated FFs and we need to see if IRQs are being
    259        suppressed by the CPU or not. */
    260     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, X86_EFL_IF_BIT, idxLabelVmCheck);
    261     off = iemNativeEmitTestAnyBitsInGprAndTbExitIfNoneSet(pReNative, off, idxEflReg, CPUMCTX_INHIBIT_SHADOW,
    262                                                           kIemNativeLabelType_ReturnBreakFF);
    263 
    264     /* We've got shadow flags set, so we must check that the PC they are valid
    265        for matches our current PC value. */
    266     /** @todo AMD64 can do this more efficiently w/o loading uRipInhibitInt into
    267      *        a register. */
    268     off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.uRipInhibitInt));
    269     off = iemNativeEmitTestIfGprNotEqualGprAndTbExit(pReNative, off, idxTmpReg, idxPcReg,
    270                                                      kIemNativeLabelType_ReturnBreakFF);
    271 
    272     /*
    273      * Now check the force flags of the VM.
    274      */
    275     iemNativeLabelDefine(pReNative, idxLabelVmCheck, off);
    276     iemNativeFixupFixedJump(pReNative, offFixupJumpToVmCheck1, off);
    277     off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, CTX_SUFF(pVM))); /* idxTmpReg = pVM */
    278     off = iemNativeEmitLoadGprByGprU32(pReNative, off, idxTmpReg, idxTmpReg, RT_UOFFSETOF(VMCC, fGlobalForcedActions));
    279     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, VM_FF_ALL_MASK, true /*fSetFlags*/);
    280     off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakFF);
    281 
    282     /** @todo STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckIrqBreaks); */
    283 
    284     /*
    285      * We're good, no IRQs or FFs pending.
    286      */
    287     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    288     iemNativeRegFreeTmp(pReNative, idxEflReg);
    289     iemNativeRegFreeTmp(pReNative, idxPcReg);
    290 
    291     /*
    292      * Note down that we've been here, so we can skip FFs + IRQ checks when
    293      * doing direct linking.
    294      */
     377    off = iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon<false, true>(pReNative, off);
     378
     379    /* Note down that we've been here, so we can skip FFs + IRQ checks when
     380       doing direct linking. */
    295381#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
    296382    pReNative->idxLastCheckIrqCallNo = pReNative->idxCurCall;
     383    RT_NOREF(pCallEntry);
    297384#else
    298385    pReNative->idxLastCheckIrqCallNo = pCallEntry - pReNative->pTbOrg->Thrd.paCalls;
     
    303390
    304391IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckIrq)
     392{
     393    IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
     394    IEM_LIVENESS_RAW_EFLAGS_ONE_INPUT(pOutgoing, fEflOther);
     395    RT_NOREF(pCallEntry);
     396}
     397
     398
     399/**
     400 * Built-in function that works the cIrqChecksTillNextPoll counter on direct TB
     401 * linking, like loop-jumps.
     402 */
     403IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckTimers)
     404{
     405    BODY_FLUSH_PENDING_WRITES();
     406    RT_NOREF(pCallEntry);
     407    return iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon<true, false>(pReNative, off);
     408}
     409
     410IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckTimers)
     411{
     412    IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
     413    RT_NOREF(pCallEntry);
     414}
     415
     416
     417/**
     418 * Combined BltIn_CheckTimers + BltIn_CheckIrq for direct linking.
     419 */
     420IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckTimersAndIrq)
     421{
     422    BODY_FLUSH_PENDING_WRITES();
     423    RT_NOREF(pCallEntry);
     424    return iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon<true, true>(pReNative, off);
     425}
     426
     427IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckTimersAndIrq)
    305428{
    306429    IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
     
    22932416#endif
    22942417
     2418
     2419/**
     2420 * Built-in function for jumping in the call sequence.
     2421 */
     2422IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Jump)
     2423{
     2424    PCIEMTB const  pTb         = pReNative->pTbOrg;
     2425    Assert(pCallEntry->auParams[1] == 0 && pCallEntry->auParams[2] == 0);
     2426    Assert(pCallEntry->auParams[0] < pTb->Thrd.cCalls);
     2427#if 1
     2428    RT_NOREF(pCallEntry, pTb);
     2429
     2430# ifdef VBOX_WITH_STATISTICS
     2431    /* Increment StatNativeTbExitLoopFullTb. */
     2432    uint32_t const offStat = RT_UOFFSETOF(VMCPU, iem.s.StatNativeTbExitLoopFullTb);
     2433#  ifdef RT_ARCH_AMD64
     2434    off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offStat);
     2435#  else
     2436    uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
     2437    uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
     2438    off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offStat);
     2439    iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
     2440    iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
     2441#  endif
     2442# endif
     2443# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
     2444    /** @todo
     2445    off = iemNativeEmitAddU32CounterInVCpuEx(pReNative, off, pTb->cInstructions, RT_UOFFSETOF(VMCPUCC, iem.s.cInstructions));
     2446    */
     2447# endif
     2448
     2449    /* Jump to the start of the TB. */
     2450    uint32_t idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_LoopJumpTarget);
     2451    AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6)); /** @todo better status */
     2452    return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
     2453#else
     2454    RT_NOREF(pReNative, pCallEntry, pTb);
     2455    return off;
     2456#endif
     2457}
     2458
     2459IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_Jump)
     2460{
     2461    IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
     2462    RT_NOREF(pCallEntry);
     2463}
     2464
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette