VirtualBox

Changeset 102847 in vbox for trunk/src/VBox/VMM


Ignore:
Timestamp:
Jan 11, 2024 2:41:51 PM (15 months ago)
Author:
vboxsync
svn:sync-xref-src-repo-rev:
161046
Message:

VMM/IEM: Moved the TLB lookup emitter to a common header file so it can be shared with the IEMAllN8veRecompBltIn.cpp code for code TLB lookups. bugref:10371

Location:
trunk/src/VBox/VMM
Files:
2 edited
1 copied

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r102846 r102847  
    100100#include "IEMN8veRecompiler.h"
    101101#include "IEMN8veRecompilerEmit.h"
     102#include "IEMN8veRecompilerTlbLookup.h"
    102103#include "IEMNativeFunctions.h"
    103 
    104 
    105 /*
    106  * TLB Lookup config.
    107  */
    108 #if (defined(RT_ARCH_AMD64) && 1) || (defined(RT_ARCH_ARM64) && 1)
    109 # define IEMNATIVE_WITH_TLB_LOOKUP
    110 #endif
    111 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    112 # define IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    113 #endif
    114 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    115 # define IEMNATIVE_WITH_TLB_LOOKUP_STORE
    116 #endif
    117 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    118 # define IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    119 #endif
    120 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    121 # define IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    122 #endif
    123 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    124 # define IEMNATIVE_WITH_TLB_LOOKUP_POP
    125 #endif
    126104
    127105
     
    71327110
    71337111/**
    7134  * Releases the variable's register.
    7135  *
    7136  * The register must have been previously acquired calling
    7137  * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
    7138  * iemNativeVarRegisterSetAndAcquire().
    7139  */
    7140 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7141 {
    7142     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7143     Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
    7144     pReNative->Core.aVars[idxVar].fRegAcquired = false;
    7145 }
    7146 
    7147 
    7148 /**
    71497112 * Makes sure variable @a idxVar has a register assigned to it and that it stays
    71507113 * fixed till we call iemNativeVarRegisterRelease.
     
    71627125 */
    71637126DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    7164                                                        bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
     7127                                                       bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
    71657128{
    71667129    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     
    1034410307
    1034510308/**
    10346  * This must be instantiate *before* branching off to the lookup code,
    10347  * so that register spilling and whatnot happens for everyone.
    10348  */
    10349 typedef struct IEMNATIVEEMITTLBSTATE
    10350 {
    10351     bool const      fSkip;
    10352     uint8_t const   idxRegPtrHlp;   /**< We don't support immediate variables with register assignment, so this a tmp reg alloc. */
    10353     uint8_t const   idxRegPtr;
    10354     uint8_t const   idxRegSegBase;
    10355     uint8_t const   idxRegSegLimit;
    10356     uint8_t const   idxRegSegAttrib;
    10357     uint8_t const   idxReg1;
    10358     uint8_t const   idxReg2;
    10359 #if defined(RT_ARCH_ARM64)
    10360     uint8_t const   idxReg3;
    10361 #endif
    10362     uint64_t const  uAbsPtr;
    10363 
    10364     IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint32_t *a_poff, uint8_t a_idxVarGCPtrMem,
    10365                           uint8_t a_iSegReg, uint8_t a_cbMem, uint8_t a_offDisp = 0)
    10366 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    10367         /* 32-bit and 64-bit wraparound will require special handling, so skip these for absolute addresses. */
    10368         :           fSkip(   a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
    10369                           &&   (  (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT
    10370                                 ? (uint64_t)(UINT32_MAX - a_cbMem - a_offDisp)
    10371                                 : (uint64_t)(UINT64_MAX - a_cbMem - a_offDisp))
    10372                              < a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
    10373 #else
    10374         :           fSkip(true)
    10375 #endif
    10376 #if defined(RT_ARCH_AMD64) /* got good immediate encoding, otherwise we just load the address in a reg immediately. */
    10377         ,    idxRegPtrHlp(UINT8_MAX)
    10378 #else
    10379         ,    idxRegPtrHlp(   a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate
    10380                           || fSkip
    10381                           ? UINT8_MAX
    10382                           : iemNativeRegAllocTmpImm(a_pReNative, a_poff, a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue) )
    10383 #endif
    10384         ,       idxRegPtr(a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate && !fSkip
    10385                           ? iemNativeVarRegisterAcquire(a_pReNative, a_idxVarGCPtrMem, a_poff,
    10386                                                         true /*fInitialized*/, IEMNATIVE_CALL_ARG2_GREG)
    10387                           : idxRegPtrHlp)
    10388         ,   idxRegSegBase(a_iSegReg == UINT8_MAX || fSkip
    10389                           ? UINT8_MAX
    10390                           : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
    10391         ,  idxRegSegLimit((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
    10392                           ? UINT8_MAX
    10393                           : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
    10394         , idxRegSegAttrib((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
    10395                           ? UINT8_MAX
    10396                           : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
    10397         ,         idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
    10398         ,         idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
    10399 #if defined(RT_ARCH_ARM64)
    10400         ,         idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
    10401 #endif
    10402         ,         uAbsPtr(  a_pReNative->Core.aVars[a_idxVarGCPtrMem].enmKind != kIemNativeVarKind_Immediate || fSkip
    10403                           ? UINT64_MAX
    10404                           : a_pReNative->Core.aVars[a_idxVarGCPtrMem].u.uValue)
    10405 
    10406     {
    10407         RT_NOREF(a_cbMem, a_offDisp);
    10408     }
    10409 
    10410     /* Alternative constructor for PUSH and POP where we don't have a GCPtrMem
    10411        variable, only a register derived from the guest RSP. */
    10412     IEMNATIVEEMITTLBSTATE(PIEMRECOMPILERSTATE a_pReNative, uint8_t a_idxRegPtr, uint32_t *a_poff,
    10413                           uint8_t a_iSegReg, uint8_t a_cbMem)
    10414 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    10415         :           fSkip(false)
    10416 #else
    10417         :           fSkip(true)
    10418 #endif
    10419         ,    idxRegPtrHlp(UINT8_MAX)
    10420         ,       idxRegPtr(a_idxRegPtr)
    10421         ,   idxRegSegBase(a_iSegReg == UINT8_MAX || fSkip
    10422                           ? UINT8_MAX
    10423                           : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_BASE(a_iSegReg)))
    10424         ,  idxRegSegLimit((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
    10425                           ? UINT8_MAX
    10426                           : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_LIMIT(a_iSegReg)))
    10427         , idxRegSegAttrib((a_iSegReg == UINT8_MAX || (a_pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_64BIT) || fSkip
    10428                           ? UINT8_MAX
    10429                           : iemNativeRegAllocTmpForGuestReg(a_pReNative, a_poff, IEMNATIVEGSTREG_SEG_ATTRIB(a_iSegReg)))
    10430         ,         idxReg1(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
    10431         ,         idxReg2(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
    10432 #if defined(RT_ARCH_ARM64)
    10433         ,         idxReg3(!fSkip ? iemNativeRegAllocTmp(a_pReNative, a_poff) : UINT8_MAX)
    10434 #endif
    10435         ,         uAbsPtr(UINT64_MAX)
    10436 
    10437     {
    10438         RT_NOREF_PV(a_cbMem);
    10439     }
    10440 
    10441     void freeRegsAndReleaseVars(PIEMRECOMPILERSTATE a_pReNative, uint8_t idxVarGCPtrMem = UINT8_MAX) const
    10442     {
    10443         if (idxRegPtr != UINT8_MAX)
    10444         {
    10445             if (idxRegPtrHlp == UINT8_MAX)
    10446             {
    10447                 if (idxVarGCPtrMem != UINT8_MAX)
    10448                     iemNativeVarRegisterRelease(a_pReNative, idxVarGCPtrMem);
    10449             }
    10450             else
    10451             {
    10452                 Assert(idxRegPtrHlp == idxRegPtr);
    10453                 iemNativeRegFreeTmpImm(a_pReNative, idxRegPtrHlp);
    10454             }
    10455         }
    10456         else
    10457             Assert(idxRegPtrHlp == UINT8_MAX);
    10458         if (idxRegSegBase != UINT8_MAX)
    10459             iemNativeRegFreeTmp(a_pReNative, idxRegSegBase);
    10460         if (idxRegSegLimit != UINT8_MAX)
    10461         {
    10462             iemNativeRegFreeTmp(a_pReNative, idxRegSegLimit);
    10463             iemNativeRegFreeTmp(a_pReNative, idxRegSegAttrib);
    10464         }
    10465         else
    10466             Assert(idxRegSegAttrib == UINT8_MAX);
    10467 #if defined(RT_ARCH_ARM64)
    10468         iemNativeRegFreeTmp(a_pReNative, idxReg3);
    10469 #endif
    10470         iemNativeRegFreeTmp(a_pReNative, idxReg2);
    10471         iemNativeRegFreeTmp(a_pReNative, idxReg1);
    10472 
    10473     }
    10474 
    10475     uint32_t getRegsNotToSave() const
    10476     {
    10477         if (!fSkip)
    10478             return RT_BIT_32(idxReg1)
    10479                  | RT_BIT_32(idxReg2)
    10480 #if defined(RT_ARCH_ARM64)
    10481                  | RT_BIT_32(idxReg3)
    10482 #endif
    10483                  ;
    10484         return 0;
    10485     }
    10486 
    10487     /** This is only for avoid assertions. */
    10488     uint32_t getActiveRegsWithShadows() const
    10489     {
    10490 #ifdef VBOX_STRICT
    10491         if (!fSkip)
    10492             return RT_BIT_32(idxRegSegBase) | RT_BIT_32(idxRegSegLimit) | RT_BIT_32(idxRegSegAttrib);
    10493 #endif
    10494         return 0;
    10495     }
    10496 } IEMNATIVEEMITTLBSTATE;
    10497 
    10498 
    10499 /**
    1050010309 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
    1050110310 */
     
    1054410353    RT_BREAKPOINT();
    1054510354}
    10546 DECLASM(void) iemNativeHlpAsmSafeWrapCheckTlbLookup(void);
    10547 
    10548 
    10549 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    10550 DECL_INLINE_THROW(uint32_t)
    10551 iemNativeEmitTlbLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEEMITTLBSTATE const * const pTlbState,
    10552                        uint8_t iSegReg, uint8_t cbMem, uint8_t fAlignMask, uint32_t fAccess,
    10553                        uint32_t idxLabelTlbLookup, uint32_t idxLabelTlbMiss, uint8_t idxRegMemResult,
    10554                        uint8_t offDisp = 0)
    10555 {
    10556     Assert(!pTlbState->fSkip);
    10557 # if defined(RT_ARCH_AMD64)
    10558     uint8_t * const  pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 512);
    10559 # elif defined(RT_ARCH_ARM64)
    10560     uint32_t * const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    10561 # endif
    10562 
    10563     /*
    10564      * The expand down check isn't use all that much, so we emit here to keep
    10565      * the lookup straighter.
    10566      */
    10567     /* check_expand_down: ; complicted! */
    10568     uint32_t const offCheckExpandDown = off;
    10569     uint32_t       offFixupLimitDone  = 0;
    10570     if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
    10571     {
    10572 off = iemNativeEmitBrkEx(pCodeBuf, off, 1); /** @todo this needs testing */
    10573         /* cmp  seglim, regptr */
    10574         if (pTlbState->idxRegPtr != UINT8_MAX && offDisp == 0)
    10575             off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxRegPtr);
    10576         else if (pTlbState->idxRegPtr == UINT8_MAX)
    10577             off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
    10578                                                  (uint32_t)(pTlbState->uAbsPtr + offDisp));
    10579         else if (cbMem == 1)
    10580             off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, pTlbState->idxReg2);
    10581         else
    10582         {   /* use idxRegMemResult to calc the displaced address. */
    10583             off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxRegPtr, offDisp);
    10584             off = iemNativeEmitCmpGpr32WithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit, idxRegMemResult);
    10585         }
    10586         /* ja  tlbmiss */
    10587         off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
    10588 
    10589         /* reg1 = segattr & X86DESCATTR_D (0x4000) */
    10590         off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib, X86DESCATTR_D);
    10591         /* xor  reg1, X86DESCATTR_D */
    10592         off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_D);
    10593         /* shl  reg1, 2 (16 - 14) */
    10594         AssertCompile((X86DESCATTR_D << 2) == UINT32_C(0x10000));
    10595         off = iemNativeEmitShiftGpr32LeftEx(pCodeBuf, off, pTlbState->idxReg1, 2);
    10596         /* dec  reg1 (=> 0xffff if D=0; 0xffffffff if D=1) */
    10597         off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, 1);
    10598         /* cmp  reg1, reg2 (64-bit) / imm (32-bit) */
    10599         if (pTlbState->idxRegPtr != UINT8_MAX)
    10600             off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1,
    10601                                                cbMem > 1 || offDisp != 0 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
    10602         else
    10603             off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1,
    10604                                                  (uint32_t)(pTlbState->uAbsPtr + offDisp + cbMem - 1)); /* fSkip=true on overflow. */
    10605         /* jbe  tlbmiss */
    10606         off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
    10607         /* jmp  limitdone */
    10608         offFixupLimitDone = off;
    10609         off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, off /* ASSUME short jump suffices */);
    10610     }
    10611 
    10612     /*
    10613      * tlblookup:
    10614      */
    10615     iemNativeLabelDefine(pReNative, idxLabelTlbLookup, off);
    10616 # if defined(RT_ARCH_ARM64) && 0
    10617     off = iemNativeEmitBrkEx(pCodeBuf, off, 0);
    10618 # endif
    10619 
    10620     /*
    10621      * 1. Segmentation.
    10622      *
    10623      * 1a. Check segment limit and attributes if non-flat 32-bit code.  This is complicated.
    10624      */
    10625     if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
    10626     {
    10627         /* Check that we've got a segment loaded and that it allows the access.
    10628            For write access this means a writable data segment.
    10629            For read-only accesses this means a readable code segment or any data segment. */
    10630         if (fAccess & IEM_ACCESS_TYPE_WRITE)
    10631         {
    10632             uint32_t const fMustBe1 = X86DESCATTR_P        | X86DESCATTR_DT    | X86_SEL_TYPE_WRITE;
    10633             uint32_t const fMustBe0 = X86DESCATTR_UNUSABLE | X86_SEL_TYPE_CODE;
    10634             /* reg1 = segattrs & (must1|must0) */
    10635             off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
    10636                                                   pTlbState->idxRegSegAttrib, fMustBe1 | fMustBe0);
    10637             /* cmp reg1, must1 */
    10638             AssertCompile(fMustBe1 <= UINT16_MAX);
    10639             off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, fMustBe1);
    10640             /* jne tlbmiss */
    10641             off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
    10642         }
    10643         else
    10644         {
    10645             /*  U  | !P |!DT |!CD | RW |
    10646                 16 |  8 |  4 |  3 |  1 |
    10647               -------------------------------
    10648                 0  |  0 |  0 |  0 |  0 | execute-only code segment. - must be excluded
    10649                 0  |  0 |  0 |  0 |  1 | execute-read code segment.
    10650                 0  |  0 |  0 |  1 |  0 | read-only data segment.
    10651                 0  |  0 |  0 |  1 |  1 | read-write data segment.   - last valid combination
    10652             */
    10653             /* reg1 = segattrs & (relevant attributes) */
    10654             off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxRegSegAttrib,
    10655                                                     X86DESCATTR_UNUSABLE | X86DESCATTR_P | X86DESCATTR_DT
    10656                                                   | X86_SEL_TYPE_CODE    | X86_SEL_TYPE_WRITE);
    10657             /* xor reg1, X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE ; place C=1 RW=0 at the bottom & limit the range.
    10658                                             ; EO-code=0,  ER-code=2, RO-data=8, RW-data=10 */
    10659 #ifdef RT_ARCH_ARM64
    10660             off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_DT | X86_SEL_TYPE_CODE);
    10661             off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1, X86DESCATTR_P);
    10662 #else
    10663             off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, pTlbState->idxReg1,
    10664                                                X86DESCATTR_P | X86DESCATTR_DT | X86_SEL_TYPE_CODE);
    10665 #endif
    10666             /* sub reg1, X86_SEL_TYPE_WRITE ; EO-code=-2, ER-code=0, RO-data=6, RW-data=8 */
    10667             off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_WRITE /* ER-code */);
    10668             /* cmp reg1, X86_SEL_TYPE_CODE | X86_SEL_TYPE_WRITE */
    10669             AssertCompile(X86_SEL_TYPE_CODE == 8);
    10670             off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_CODE);
    10671             /* ja  tlbmiss */
    10672             off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
    10673         }
    10674 
    10675         /* If we're accessing more than one byte or if we're working with a non-zero offDisp,
    10676            put the last address we'll be accessing in idxReg2 (64-bit). */
    10677         if ((cbMem > 1 || offDisp != 0) && pTlbState->idxRegPtr != UINT8_MAX)
    10678         {
    10679             if (!offDisp)
    10680                 /* reg2 = regptr + cbMem - 1; 64-bit result so we can fend of wraparounds/overflows. */
    10681                 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, pTlbState->idxReg2,/*=*/ pTlbState->idxRegPtr,/*+*/ cbMem - 1);
    10682             else
    10683             {
    10684                 /* reg2 = (uint32_t)(regptr + offDisp) + cbMem - 1;. */
    10685                 off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off,
    10686                                                        pTlbState->idxReg2,/*=*/ pTlbState->idxRegPtr,/*+*/ + offDisp);
    10687                 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, cbMem - 1);
    10688             }
    10689         }
    10690 
    10691         /*
    10692          * Check the limit.  If this is a write access, we know that it's a
    10693          * data segment and includes the expand_down bit.  For read-only accesses
    10694          * we need to check that code/data=0 and expanddown=1 before continuing.
    10695          */
    10696         if (fAccess & IEM_ACCESS_TYPE_WRITE)
    10697         {
    10698             /* test segattrs, X86_SEL_TYPE_DOWN */
    10699             AssertCompile(X86_SEL_TYPE_DOWN < 128);
    10700             off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, pTlbState->idxRegSegAttrib, X86_SEL_TYPE_DOWN);
    10701             /* jnz  check_expand_down */
    10702             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_ne);
    10703         }
    10704         else
    10705         {
    10706             /* reg1 = segattr & (code | down) */
    10707             off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,
    10708                                                   pTlbState->idxRegSegAttrib, X86_SEL_TYPE_CODE | X86_SEL_TYPE_DOWN);
    10709             /* cmp reg1, down */
    10710             off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, X86_SEL_TYPE_DOWN);
    10711             /* je check_expand_down */
    10712             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, offCheckExpandDown, kIemNativeInstrCond_e);
    10713         }
    10714 
    10715         /* expand_up:
    10716            cmp  seglim, regptr/reg2/imm */
    10717         if (pTlbState->idxRegPtr != UINT8_MAX)
    10718             off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
    10719                                                cbMem > 1 || offDisp != 0 ? pTlbState->idxReg2 : pTlbState->idxRegPtr);
    10720         else
    10721             off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxRegSegLimit,
    10722                                                  (uint32_t)pTlbState->uAbsPtr + offDisp + cbMem - 1U); /* fSkip=true on overflow. */
    10723         /* jbe  tlbmiss */
    10724         off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_be);
    10725 
    10726         /* limitdone: */
    10727         iemNativeFixupFixedJump(pReNative, offFixupLimitDone, off);
    10728     }
    10729 
    10730     /* 1b. Add the segment base.  We use idxRegMemResult for the ptr register if
    10731            this step is required or if the address is a constant (simplicity) or
    10732            if offDisp is non-zero. */
    10733     uint8_t const idxRegFlatPtr = iSegReg != UINT8_MAX || pTlbState->idxRegPtr == UINT8_MAX || offDisp != 0
    10734                                 ? idxRegMemResult : pTlbState->idxRegPtr;
    10735     if (iSegReg != UINT8_MAX)
    10736     {
    10737         Assert(idxRegFlatPtr != pTlbState->idxRegPtr);
    10738         /* regflat = segbase + regptr/imm */
    10739         if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
    10740         {
    10741             Assert(iSegReg >= X86_SREG_FS);
    10742             if (pTlbState->idxRegPtr != UINT8_MAX)
    10743             {
    10744                 off = iemNativeEmitGprEqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
    10745                 if (offDisp != 0)
    10746                     off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxRegFlatPtr, offDisp);
    10747             }
    10748             else
    10749                 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase,
    10750                                                      pTlbState->uAbsPtr + offDisp);
    10751         }
    10752         else if (pTlbState->idxRegPtr != UINT8_MAX)
    10753         {
    10754             off = iemNativeEmitGpr32EqGprPlusGprEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegSegBase, pTlbState->idxRegPtr);
    10755             if (offDisp != 0)
    10756                 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegFlatPtr, offDisp);
    10757         }
    10758         else
    10759             off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr,
    10760                                                    pTlbState->idxRegSegBase, (uint32_t)pTlbState->uAbsPtr + offDisp);
    10761     }
    10762     else if (pTlbState->idxRegPtr == UINT8_MAX)
    10763     {
    10764         if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
    10765             off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->uAbsPtr + offDisp);
    10766         else
    10767             off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxRegFlatPtr, (uint32_t)pTlbState->uAbsPtr + offDisp);
    10768     }
    10769     else if (offDisp != 0)
    10770     {
    10771         Assert(idxRegFlatPtr != pTlbState->idxRegPtr);
    10772         if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
    10773             off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr, offDisp);
    10774         else
    10775             off = iemNativeEmitGpr32EqGprPlusImmEx(pCodeBuf, off, idxRegFlatPtr, pTlbState->idxRegPtr, offDisp);
    10776     }
    10777     else
    10778         Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
    10779 
    10780     /*
    10781      * 2. Check that the address doesn't cross a page boundrary and doesn't have alignment issues.
    10782      *
    10783      * 2a. Alignment check using fAlignMask.
    10784      */
    10785     if (fAlignMask)
    10786     {
    10787         Assert(RT_IS_POWER_OF_TWO(fAlignMask + 1));
    10788         Assert(fAlignMask < 128);
    10789         /* test regflat, fAlignMask */
    10790         off = iemNativeEmitTestAnyBitsInGpr8Ex(pCodeBuf, off, idxRegFlatPtr, fAlignMask);
    10791         /* jnz tlbmiss */
    10792         off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
    10793     }
    10794 
    10795     /*
    10796      * 2b. Check that it's not crossing page a boundrary. This is implicit in
    10797      *     the previous test if the alignment is same or larger than the type.
    10798      */
    10799     if (cbMem > fAlignMask + 1)
    10800     {
    10801         /* reg1 = regflat & 0xfff */
    10802         off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, pTlbState->idxReg1,/*=*/ idxRegFlatPtr,/*&*/ GUEST_PAGE_OFFSET_MASK);
    10803         /* cmp reg1, GUEST_PAGE_SIZE - cbMem */
    10804         off = iemNativeEmitCmpGpr32WithImmEx(pCodeBuf, off, pTlbState->idxReg1, GUEST_PAGE_SIZE);
    10805         /* ja  tlbmiss */
    10806         off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
    10807     }
    10808 
    10809     /*
    10810      * 3. TLB lookup.
    10811      *
    10812      * 3a. Calculate the TLB tag value (IEMTLB_CALC_TAG).
    10813      *     In 64-bit mode we will also check for non-canonical addresses here.
    10814      */
    10815     if ((pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT)
    10816     {
    10817 # if defined(RT_ARCH_AMD64)
    10818         /* mov reg1, regflat */
    10819         off = iemNativeEmitLoadGprFromGprEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr);
    10820         /* rol reg1, 16 */
    10821         off = iemNativeEmitRotateGprLeftEx(pCodeBuf, off, pTlbState->idxReg1, 16);
    10822         /** @todo Would 'movsx reg2, word reg1' and working on reg2 in dwords be faster? */
    10823         /* inc word reg1 */
    10824         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    10825         if (pTlbState->idxReg1 >= 8)
    10826             pCodeBuf[off++] = X86_OP_REX_B;
    10827         pCodeBuf[off++] = 0xff;
    10828         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, pTlbState->idxReg1 & 7);
    10829         /* cmp word reg1, 1 */
    10830         pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    10831         if (pTlbState->idxReg1 >= 8)
    10832             pCodeBuf[off++] = X86_OP_REX_B;
    10833         pCodeBuf[off++] = 0x83;
    10834         pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, pTlbState->idxReg1 & 7);
    10835         pCodeBuf[off++] = 1;
    10836         /* ja  tlbmiss */
    10837         off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_nbe);
    10838         /* shr reg1, 16 + GUEST_PAGE_SHIFT */
    10839         off = iemNativeEmitShiftGprRightEx(pCodeBuf, off, pTlbState->idxReg1, 16 + GUEST_PAGE_SHIFT);
    10840 
    10841 # elif defined(RT_ARCH_ARM64)
    10842         /* lsr  reg1, regflat, #48 */
    10843         pCodeBuf[off++] = Armv8A64MkInstrLslImm(pTlbState->idxReg1, idxRegFlatPtr, 4);
    10844         /* add  reg1, reg1, #1 */
    10845         pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(pTlbState->idxReg1, pTlbState->idxReg1, 1, false /*f64Bit*/);
    10846         /* tst  reg1, #0xfffe */
    10847         Assert(Armv8A64ConvertImmRImmS2Mask32(14, 31) == 0xfffe);
    10848         pCodeBuf[off++] = Armv8A64MkInstrTstImm(pTlbState->idxReg1, 14, 31,  false /*f64Bit*/);
    10849         /* b.nq tlbmiss */
    10850         off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
    10851 
    10852         /* ubfx reg1, regflat, #12, #36 */
    10853         pCodeBuf[off++] = Armv8A64MkInstrUbfx(pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT, 48 - GUEST_PAGE_SHIFT);
    10854 # else
    10855 #  error "Port me"
    10856 # endif
    10857     }
    10858     else
    10859     {
    10860         /* reg1 = (uint32_t)(regflat >> 12) */
    10861         off = iemNativeEmitGpr32EqGprShiftRightImmEx(pCodeBuf, off, pTlbState->idxReg1, idxRegFlatPtr, GUEST_PAGE_SHIFT);
    10862     }
    10863     /* or  reg1, [qword pVCpu->iem.s.DataTlb.uTlbRevision] */
    10864 # if defined(RT_ARCH_AMD64)
    10865     pCodeBuf[off++] = pTlbState->idxReg1 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
    10866     pCodeBuf[off++] = 0x0b; /* OR r64,r/m64 */
    10867     off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, pTlbState->idxReg1, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
    10868 # else
    10869     off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbRevision));
    10870     off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
    10871 # endif
    10872 
    10873     /*
    10874      * 3b. Calc pTlbe.
    10875      */
    10876 # if defined(RT_ARCH_AMD64)
    10877     /* movzx reg2, byte reg1 */
    10878     off = iemNativeEmitLoadGprFromGpr8Ex(pCodeBuf, off, pTlbState->idxReg2, pTlbState->idxReg1);
    10879     /* shl   reg2, 5 ; reg2 *= sizeof(IEMTLBENTRY) */
    10880     AssertCompileSize(IEMTLBENTRY, 32);
    10881     off = iemNativeEmitShiftGprLeftEx(pCodeBuf, off, pTlbState->idxReg2, 5);
    10882     /* lea   reg2, [pVCpu->iem.s.DataTlb.aEntries + reg2] */
    10883     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU < 8);
    10884     pCodeBuf[off++] = pTlbState->idxReg2 < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_X | X86_OP_REX_R;
    10885     pCodeBuf[off++] = 0x8d;
    10886     pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, pTlbState->idxReg2 & 7, 4 /*SIB*/);
    10887     pCodeBuf[off++] = X86_SIB_MAKE(IEMNATIVE_REG_FIXED_PVMCPU & 7, pTlbState->idxReg2 & 7, 0);
    10888     pCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPUCC,  iem.s.DataTlb.aEntries));
    10889     pCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPUCC,  iem.s.DataTlb.aEntries));
    10890     pCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPUCC,  iem.s.DataTlb.aEntries));
    10891     pCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPUCC,  iem.s.DataTlb.aEntries));
    10892 
    10893 # elif defined(RT_ARCH_ARM64)
    10894     /* reg2 = (reg1 & 0xff) << 5 */
    10895     pCodeBuf[off++] = Armv8A64MkInstrUbfiz(pTlbState->idxReg2, pTlbState->idxReg1, 5, 8);
    10896     /* reg2 += offsetof(VMCPUCC, iem.s.DataTlb.aEntries) */
    10897     off = iemNativeEmitAddGprImmEx(pCodeBuf, off, pTlbState->idxReg2, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.aEntries),
    10898                                    pTlbState->idxReg3 /*iGprTmp*/);
    10899     /* reg2 += pVCpu */
    10900     off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, pTlbState->idxReg2, IEMNATIVE_REG_FIXED_PVMCPU);
    10901 # else
    10902 #  error "Port me"
    10903 # endif
    10904 
    10905     /*
    10906      * 3c. Compare the TLBE.uTag with the one from 2a (reg1).
    10907      */
    10908 # if defined(RT_ARCH_AMD64)
    10909     /* cmp reg1, [reg2] */
    10910     pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
    10911     pCodeBuf[off++] = 0x3b;
    10912     off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
    10913 # elif defined(RT_ARCH_ARM64)
    10914     off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, uTag));
    10915     off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
    10916 # else
    10917 #  error "Port me"
    10918 # endif
    10919     /* jne tlbmiss */
    10920     off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
    10921 
    10922     /*
    10923      * 4. Check TLB page table level access flags and physical page revision #.
    10924      */
    10925     /* mov reg1, mask */
    10926     AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
    10927     uint64_t const fNoUser = (((pReNative->fExec >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK) + 1) & IEMTLBE_F_PT_NO_USER;
    10928     uint64_t       fTlbe   = IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3 | IEMTLBE_F_PG_UNASSIGNED | IEMTLBE_F_PT_NO_ACCESSED
    10929                            | fNoUser;
    10930     if (fAccess & IEM_ACCESS_TYPE_READ)
    10931         fTlbe |= IEMTLBE_F_PG_NO_READ;
    10932     if (fAccess & IEM_ACCESS_TYPE_WRITE)
    10933         fTlbe |= IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PG_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY;
    10934     off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, fTlbe);
    10935 # if defined(RT_ARCH_AMD64)
    10936     /* and reg1, [reg2->fFlagsAndPhysRev] */
    10937     pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R) | (pTlbState->idxReg2 < 8 ? 0 : X86_OP_REX_B);
    10938     pCodeBuf[off++] = 0x23;
    10939     off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1,
    10940                                     pTlbState->idxReg2, RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
    10941 
    10942     /* cmp reg1, [pVCpu->iem.s.DataTlb.uTlbPhysRev] */
    10943     pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_R);
    10944     pCodeBuf[off++] = 0x3b;
    10945     off = iemNativeEmitGprByGprDisp(pCodeBuf, off, pTlbState->idxReg1, IEMNATIVE_REG_FIXED_PVMCPU,
    10946                                     RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
    10947 # elif defined(RT_ARCH_ARM64)
    10948     off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg3, pTlbState->idxReg2,
    10949                                          RT_UOFFSETOF(IEMTLBENTRY, fFlagsAndPhysRev));
    10950     pCodeBuf[off++] = Armv8A64MkInstrAnd(pTlbState->idxReg1, pTlbState->idxReg1, pTlbState->idxReg3);
    10951     off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, pTlbState->idxReg3, RT_UOFFSETOF(VMCPUCC, iem.s.DataTlb.uTlbPhysRev));
    10952     off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg3);
    10953 # else
    10954 #  error "Port me"
    10955 # endif
    10956     /* jne tlbmiss */
    10957     off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelTlbMiss, kIemNativeInstrCond_ne);
    10958 
    10959     /*
    10960      * 5. Check that pbMappingR3 isn't NULL (paranoia) and calculate the
    10961      *    resulting pointer.
    10962      */
    10963     /* mov  reg1, [reg2->pbMappingR3] */
    10964     off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, pTlbState->idxReg1, pTlbState->idxReg2,
    10965                                          RT_UOFFSETOF(IEMTLBENTRY, pbMappingR3));
    10966     /* if (!reg1) goto tlbmiss; */
    10967     /** @todo eliminate the need for this test? */
    10968     off = iemNativeEmitTestIfGprIsZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, pTlbState->idxReg1,
    10969                                                       true /*f64Bit*/, idxLabelTlbMiss);
    10970 
    10971     if (idxRegFlatPtr == idxRegMemResult) /* See step 1b. */
    10972     {
    10973         /* and result, 0xfff */
    10974         off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxRegMemResult, GUEST_PAGE_OFFSET_MASK);
    10975     }
    10976     else
    10977     {
    10978         Assert(idxRegFlatPtr == pTlbState->idxRegPtr);
    10979         /* result = regflat & 0xfff */
    10980         off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxRegMemResult, idxRegFlatPtr, GUEST_PAGE_OFFSET_MASK);
    10981     }
    10982     /* add result, reg1 */
    10983     off = iemNativeEmitAddTwoGprsEx(pCodeBuf, off, idxRegMemResult, pTlbState->idxReg1);
    10984 
    10985 # if 0
    10986     /*
    10987      * To verify the result we call a helper function.
    10988      *
    10989      * It's like the state logging, so parameters are passed on the stack.
    10990      * iemNativeHlpAsmSafeWrapCheckTlbLookup(pVCpu, result, addr, seg | (cbMem << 8) | (fAccess << 16))
    10991      */
    10992 #  ifdef RT_ARCH_AMD64
    10993     /* push     seg | (cbMem << 8) | (fAccess << 16) */
    10994     pCodeBuf[off++] = 0x68;
    10995     pCodeBuf[off++] = iSegReg;
    10996     pCodeBuf[off++] = cbMem;
    10997     pCodeBuf[off++] = RT_BYTE1(fAccess);
    10998     pCodeBuf[off++] = RT_BYTE2(fAccess);
    10999     /* push     pTlbState->idxRegPtr / immediate address. */
    11000     if (pTlbState->idxRegPtr != UINT8_MAX)
    11001     {
    11002         if (pTlbState->idxRegPtr >= 8)
    11003             pCodeBuf[off++] = X86_OP_REX_B;
    11004         pCodeBuf[off++] = 0x50 + (pTlbState->idxRegPtr & 7);
    11005     }
    11006     else
    11007     {
    11008         off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, pTlbState->uAbsPtr);
    11009         if (pTlbState->idxReg1 >= 8)
    11010             pCodeBuf[off++] = X86_OP_REX_B;
    11011         pCodeBuf[off++] = 0x50 + (pTlbState->idxReg1 & 7);
    11012     }
    11013     /* push     idxRegMemResult */
    11014     if (idxRegMemResult >= 8)
    11015         pCodeBuf[off++] = X86_OP_REX_B;
    11016     pCodeBuf[off++] = 0x50 + (idxRegMemResult & 7);
    11017     /* push     pVCpu */
    11018     pCodeBuf[off++] = 0x50 + IEMNATIVE_REG_FIXED_PVMCPU;
    11019     /* mov      reg1, helper */
    11020     off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, pTlbState->idxReg1, (uintptr_t)iemNativeHlpAsmSafeWrapCheckTlbLookup);
    11021     /* call     [reg1] */
    11022     pCodeBuf[off++] = X86_OP_REX_W | (pTlbState->idxReg1 < 8 ? 0 : X86_OP_REX_B);
    11023     pCodeBuf[off++] = 0xff;
    11024     pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, pTlbState->idxReg1 & 7);
    11025     /* The stack is cleaned up by helper function. */
    11026 
    11027 #  else
    11028 #   error "Port me"
    11029 #  endif
    11030 # endif
    11031 
    11032     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11033 
    11034     return off;
    11035 }
    11036 #endif /* IEMNATIVE_WITH_TLB_LOOKUP */
     10355
     10356/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
    1103710357
    1103810358
     
    1130010620         * TlbLookup:
    1130110621         */
    11302         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
    11303                                      enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
    11304                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
     10622        off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
     10623                                           enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
     10624                                           idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
    1130510625
    1130610626        /*
     
    1202311343         * TlbLookup:
    1202411344         */
    12025         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1, IEM_ACCESS_TYPE_WRITE,
    12026                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
     11345        off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
     11346                                           IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    1202711347
    1202811348        /*
     
    1237111691         * TlbLookup:
    1237211692         */
    12373         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
    12374                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
     11693        off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
     11694                                           idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    1237511695
    1237611696        /*
     
    1288912209         * TlbLookup:
    1289012210         */
    12891         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
    12892                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
     12211        off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
     12212                                           idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    1289312213# ifdef VBOX_WITH_STATISTICS
    1289412214        off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r102846 r102847  
    847847
    848848DECL_HIDDEN_THROW(uint8_t)  iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
     849DECL_HIDDEN_THROW(uint8_t)  iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
     850                                                        bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX);
    849851DECL_HIDDEN_THROW(uint8_t)  iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    850852                                                                   IEMNATIVEGSTREG enmGstReg, uint32_t *poff);
     
    853855DECL_HIDDEN_THROW(uint32_t) iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    854856                                                                       uint32_t fHstRegsNotToSave);
    855 
    856857
    857858DECL_HIDDEN_THROW(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     
    961962}
    962963
     964
     965/**
     966 * Releases the variable's register.
     967 *
     968 * The register must have been previously acquired calling
     969 * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
     970 * iemNativeVarRegisterSetAndAcquire().
     971 */
     972DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
     973{
     974    IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
     975    Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
     976    pReNative->Core.aVars[idxVar].fRegAcquired = false;
     977}
     978
    963979/** @} */
    964980
  • trunk/src/VBox/VMM/include/IEMN8veRecompilerTlbLookup.h

    r102846 r102847  
    11/* $Id$ */
    22/** @file
    3  * IEM - Native Recompiler
    4  *
    5  * Logging group IEM_RE_NATIVE assignments:
    6  *      - Level 1  (Log)  : ...
    7  *      - Flow  (LogFlow) : ...
    8  *      - Level 2  (Log2) : Details calls as they're recompiled.
    9  *      - Level 3  (Log3) : Disassemble native code after recompiling.
    10  *      - Level 4  (Log4) : ...
    11  *      - Level 5  (Log5) : ...
    12  *      - Level 6  (Log6) : ...
    13  *      - Level 7  (Log7) : ...
    14  *      - Level 8  (Log8) : ...
    15  *      - Level 9  (Log9) : ...
    16  *      - Level 10 (Log10): ...
    17  *      - Level 11 (Log11): Variable allocator.
    18  *      - Level 12 (Log12): Register allocator.
     3 * IEM - Interpreted Execution Manager - Native Recompiler TLB Lookup Code Emitter.
    194 */
    205
    216/*
    22  * Copyright (C) 2023 Oracle and/or its affiliates.
     7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
    238 *
    249 * This file is part of VirtualBox base platform packages, as
     
    4126 */
    4227
    43 
    44 /*********************************************************************************************************************************
    45 *   Header Files                                                                                                                 *
    46 *********************************************************************************************************************************/
    47 #define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
    48 #define IEM_WITH_OPAQUE_DECODER_STATE
    49 #define VMCPU_INCL_CPUM_GST_CTX
    50 #define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
    51 #include <VBox/vmm/iem.h>
    52 #include <VBox/vmm/cpum.h>
    53 #include <VBox/vmm/dbgf.h>
    54 #include "IEMInternal.h"
    55 #include <VBox/vmm/vmcc.h>
    56 #include <VBox/log.h>
    57 #include <VBox/err.h>
    58 #include <VBox/dis.h>
    59 #include <VBox/param.h>
    60 #include <iprt/assert.h>
    61 #include <iprt/heap.h>
    62 #include <iprt/mem.h>
    63 #include <iprt/string.h>
    64 #if   defined(RT_ARCH_AMD64)
    65 # include <iprt/x86.h>
    66 #elif defined(RT_ARCH_ARM64)
    67 # include <iprt/armv8.h>
    68 #endif
    69 
    70 #ifdef RT_OS_WINDOWS
    71 # include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
    72 extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
    73 extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
    74 #else
    75 # include <iprt/formats/dwarf.h>
    76 # if defined(RT_OS_DARWIN)
    77 #  include <libkern/OSCacheControl.h>
    78 #  define IEMNATIVE_USE_LIBUNWIND
    79 extern "C" void  __register_frame(const void *pvFde);
    80 extern "C" void  __deregister_frame(const void *pvFde);
    81 # else
    82 #  ifdef DEBUG_bird /** @todo not thread safe yet */
    83 #   define IEMNATIVE_USE_GDB_JIT
    84 #  endif
    85 #  ifdef IEMNATIVE_USE_GDB_JIT
    86 #   include <iprt/critsect.h>
    87 #   include <iprt/once.h>
    88 #   include <iprt/formats/elf64.h>
    89 #  endif
    90 extern "C" void  __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
    91 extern "C" void *__deregister_frame_info(void *pvBegin);           /* (returns pvObj from __register_frame_info call) */
    92 # endif
    93 #endif
    94 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    95 # include "/opt/local/include/capstone/capstone.h"
    96 #endif
    97 
    98 #include "IEMInline.h"
    99 #include "IEMThreadedFunctions.h"
     28#ifndef VMM_INCLUDED_SRC_include_IEMN8veRecompilerTlbLookup_h
     29#define VMM_INCLUDED_SRC_include_IEMN8veRecompilerTlbLookup_h
     30#ifndef RT_WITHOUT_PRAGMA_ONCE
     31# pragma once
     32#endif
     33
    10034#include "IEMN8veRecompiler.h"
    10135#include "IEMN8veRecompilerEmit.h"
    102 #include "IEMNativeFunctions.h"
    103 
     36
     37
     38/** @defgroup grp_iem_n8ve_re_tlblookup Native Recompiler TLB Lookup Code Emitter
     39 * @ingroup grp_iem_n8ve_re
     40 * @{
     41 */
    10442
    10543/*
    10644 * TLB Lookup config.
    10745 */
    108 #if (defined(RT_ARCH_AMD64) && 1) || (defined(RT_ARCH_ARM64) && 1)
     46#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
    10947# define IEMNATIVE_WITH_TLB_LOOKUP
    11048#endif
     
    12563#endif
    12664
    127 
    128 /*
    129  * Narrow down configs here to avoid wasting time on unused configs here.
    130  * Note! Same checks in IEMAllThrdRecompiler.cpp.
    131  */
    132 
    133 #ifndef IEM_WITH_CODE_TLB
    134 # error The code TLB must be enabled for the recompiler.
    135 #endif
    136 
    137 #ifndef IEM_WITH_DATA_TLB
    138 # error The data TLB must be enabled for the recompiler.
    139 #endif
    140 
    141 #ifndef IEM_WITH_SETJMP
    142 # error The setjmp approach must be enabled for the recompiler.
    143 #endif
    144 
    145 /** @todo eliminate this clang build hack. */
    146 #if RT_CLANG_PREREQ(4, 0)
    147 # pragma GCC diagnostic ignored "-Wunused-function"
    148 #endif
    149 
    150 
    151 /*********************************************************************************************************************************
    152 *   Internal Functions                                                                                                           *
    153 *********************************************************************************************************************************/
    154 #ifdef VBOX_STRICT
    155 static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    156                                                 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
    157 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
    158 #endif
    159 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    160 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
    161 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
    162 #endif
    163 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
    164 DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
    165                                                             IEMNATIVEGSTREG enmGstReg, uint32_t off);
    166 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
    167 
    168 
    169 /*********************************************************************************************************************************
    170 *   Executable Memory Allocator                                                                                                  *
    171 *********************************************************************************************************************************/
    172 /** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    173  * Use an alternative chunk sub-allocator that does store internal data
    174  * in the chunk.
    175  *
    176  * Using the RTHeapSimple is not practial on newer darwin systems where
    177  * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
    178  * memory.  We would have to change the protection of the whole chunk for
    179  * every call to RTHeapSimple, which would be rather expensive.
    180  *
    181  * This alternative implemenation let restrict page protection modifications
    182  * to the pages backing the executable memory we just allocated.
    183  */
    184 #define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    185 /** The chunk sub-allocation unit size in bytes. */
    186 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE      128
    187 /** The chunk sub-allocation unit size as a shift factor. */
    188 #define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT     7
    189 
    190 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    191 # ifdef IEMNATIVE_USE_GDB_JIT
    192 #   define IEMNATIVE_USE_GDB_JIT_ET_DYN
    193 
    194 /** GDB JIT: Code entry.   */
    195 typedef struct GDBJITCODEENTRY
    196 {
    197     struct GDBJITCODEENTRY *pNext;
    198     struct GDBJITCODEENTRY *pPrev;
    199     uint8_t                *pbSymFile;
    200     uint64_t                cbSymFile;
    201 } GDBJITCODEENTRY;
    202 
    203 /** GDB JIT: Actions. */
    204 typedef enum GDBJITACTIONS : uint32_t
    205 {
    206     kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
    207 } GDBJITACTIONS;
    208 
    209 /** GDB JIT: Descriptor. */
    210 typedef struct GDBJITDESCRIPTOR
    211 {
    212     uint32_t            uVersion;
    213     GDBJITACTIONS       enmAction;
    214     GDBJITCODEENTRY    *pRelevant;
    215     GDBJITCODEENTRY    *pHead;
    216     /** Our addition: */
    217     GDBJITCODEENTRY    *pTail;
    218 } GDBJITDESCRIPTOR;
    219 
    220 /** GDB JIT: Our simple symbol file data. */
    221 typedef struct GDBJITSYMFILE
    222 {
    223     Elf64_Ehdr          EHdr;
    224 #  ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
    225     Elf64_Shdr          aShdrs[5];
    226 #  else
    227     Elf64_Shdr          aShdrs[7];
    228     Elf64_Phdr          aPhdrs[2];
    229 #  endif
    230     /** The dwarf ehframe data for the chunk. */
    231     uint8_t             abEhFrame[512];
    232     char                szzStrTab[128];
    233     Elf64_Sym           aSymbols[3];
    234 #  ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    235     Elf64_Sym           aDynSyms[2];
    236     Elf64_Dyn           aDyn[6];
    237 #  endif
    238 } GDBJITSYMFILE;
    239 
    240 extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
    241 extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
    242 
    243 /** Init once for g_IemNativeGdbJitLock. */
    244 static RTONCE     g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
    245 /** Init once for the critical section. */
    246 static RTCRITSECT g_IemNativeGdbJitLock;
    247 
    248 /** GDB reads the info here. */
    249 GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
    250 
    251 /** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
    252 DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
    253 {
    254     ASMNopPause();
    255 }
    256 
    257 /** @callback_method_impl{FNRTONCE} */
    258 static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
    259 {
    260     RT_NOREF(pvUser);
    261     return RTCritSectInit(&g_IemNativeGdbJitLock);
    262 }
    263 
    264 
    265 # endif /* IEMNATIVE_USE_GDB_JIT */
    266 
    267 /**
    268  * Per-chunk unwind info for non-windows hosts.
    269  */
    270 typedef struct IEMEXECMEMCHUNKEHFRAME
    271 {
    272 # ifdef IEMNATIVE_USE_LIBUNWIND
    273     /** The offset of the FDA into abEhFrame. */
    274     uintptr_t               offFda;
    275 # else
    276     /** 'struct object' storage area. */
    277     uint8_t                 abObject[1024];
    278 # endif
    279 #  ifdef IEMNATIVE_USE_GDB_JIT
    280 #   if 0
    281     /** The GDB JIT 'symbol file' data. */
    282     GDBJITSYMFILE           GdbJitSymFile;
    283 #   endif
    284     /** The GDB JIT list entry. */
    285     GDBJITCODEENTRY         GdbJitEntry;
    286 #  endif
    287     /** The dwarf ehframe data for the chunk. */
    288     uint8_t                 abEhFrame[512];
    289 } IEMEXECMEMCHUNKEHFRAME;
    290 /** Pointer to per-chunk info info for non-windows hosts. */
    291 typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
    292 #endif
    293 
    294 
    295 /**
    296  * An chunk of executable memory.
    297  */
    298 typedef struct IEMEXECMEMCHUNK
    299 {
    300 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    301     /** Number of free items in this chunk. */
    302     uint32_t                cFreeUnits;
    303     /** Hint were to start searching for free space in the allocation bitmap. */
    304     uint32_t                idxFreeHint;
    305 #else
    306     /** The heap handle. */
    307     RTHEAPSIMPLE            hHeap;
    308 #endif
    309     /** Pointer to the chunk. */
    310     void                   *pvChunk;
    311 #ifdef IN_RING3
    312     /**
    313      * Pointer to the unwind information.
    314      *
    315      * This is used during C++ throw and longjmp (windows and probably most other
    316      * platforms).  Some debuggers (windbg) makes use of it as well.
    317      *
    318      * Windows: This is allocated from hHeap on windows because (at least for
    319      *          AMD64) the UNWIND_INFO structure address in the
    320      *          RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
    321      *
    322      * Others:  Allocated from the regular heap to avoid unnecessary executable data
    323      *          structures.  This points to an IEMEXECMEMCHUNKEHFRAME structure. */
    324     void                   *pvUnwindInfo;
    325 #elif defined(IN_RING0)
    326     /** Allocation handle. */
    327     RTR0MEMOBJ              hMemObj;
    328 #endif
    329 } IEMEXECMEMCHUNK;
    330 /** Pointer to a memory chunk. */
    331 typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
    332 
    333 
    334 /**
    335  * Executable memory allocator for the native recompiler.
    336  */
    337 typedef struct IEMEXECMEMALLOCATOR
    338 {
    339     /** Magic value (IEMEXECMEMALLOCATOR_MAGIC).  */
    340     uint32_t                uMagic;
    341 
    342     /** The chunk size. */
    343     uint32_t                cbChunk;
    344     /** The maximum number of chunks. */
    345     uint32_t                cMaxChunks;
    346     /** The current number of chunks. */
    347     uint32_t                cChunks;
    348     /** Hint where to start looking for available memory. */
    349     uint32_t                idxChunkHint;
    350     /** Statistics: Current number of allocations. */
    351     uint32_t                cAllocations;
    352 
    353     /** The total amount of memory available. */
    354     uint64_t                cbTotal;
    355     /** Total amount of free memory. */
    356     uint64_t                cbFree;
    357     /** Total amount of memory allocated. */
    358     uint64_t                cbAllocated;
    359 
    360 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    361     /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
    362      *
    363      * Since the chunk size is a power of two and the minimum chunk size is a lot
    364      * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
    365      * require a whole number of uint64_t elements in the allocation bitmap.  So,
    366      * for sake of simplicity, they are allocated as one continous chunk for
    367      * simplicity/laziness. */
    368     uint64_t               *pbmAlloc;
    369     /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
    370     uint32_t                cUnitsPerChunk;
    371     /** Number of bitmap elements per chunk (for quickly locating the bitmap
    372      * portion corresponding to an chunk). */
    373     uint32_t                cBitmapElementsPerChunk;
    374 #else
    375     /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
    376      * @{ */
    377     /** The size of the heap internal block header.   This is used to adjust the
    378      * request memory size to make sure there is exacly enough room for a header at
    379      * the end of the blocks we allocate before the next 64 byte alignment line. */
    380     uint32_t                cbHeapBlockHdr;
    381     /** The size of initial heap allocation required make sure the first
    382      *  allocation is correctly aligned. */
    383     uint32_t                cbHeapAlignTweak;
    384     /** The alignment tweak allocation address. */
    385     void                   *pvAlignTweak;
    386     /** @} */
    387 #endif
    388 
    389 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    390     /** Pointer to the array of unwind info running parallel to aChunks (same
    391      * allocation as this structure, located after the bitmaps).
    392      * (For Windows, the structures must reside in 32-bit RVA distance to the
    393      * actual chunk, so they are allocated off the chunk.) */
    394     PIEMEXECMEMCHUNKEHFRAME paEhFrames;
    395 #endif
    396 
    397     /** The allocation chunks. */
    398     RT_FLEXIBLE_ARRAY_EXTENSION
    399     IEMEXECMEMCHUNK         aChunks[RT_FLEXIBLE_ARRAY];
    400 } IEMEXECMEMALLOCATOR;
    401 /** Pointer to an executable memory allocator. */
    402 typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
    403 
    404 /** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
    405 #define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
    406 
    407 
    408 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
    409 
    410 
    411 /**
    412  * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
    413  * the heap statistics.
    414  */
    415 static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
    416                                                uint32_t cbReq, uint32_t idxChunk)
    417 {
    418     pExecMemAllocator->cAllocations += 1;
    419     pExecMemAllocator->cbAllocated  += cbReq;
    420 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    421     pExecMemAllocator->cbFree       -= cbReq;
    422 #else
    423     pExecMemAllocator->cbFree       -= RT_ALIGN_32(cbReq, 64);
    424 #endif
    425     pExecMemAllocator->idxChunkHint  = idxChunk;
    426 
    427 #ifdef RT_OS_DARWIN
    428     /*
    429      * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
    430      * on darwin.  So, we mark the pages returned as read+write after alloc and
    431      * expect the caller to call iemExecMemAllocatorReadyForUse when done
    432      * writing to the allocation.
    433      *
    434      * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    435      * for details.
    436      */
    437     /** @todo detect if this is necessary... it wasn't required on 10.15 or
    438      *        whatever older version it was. */
    439     int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
    440     AssertRC(rc);
    441 #endif
    442 
    443     return pvRet;
    444 }
    445 
    446 
    447 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    448 static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
    449                                                 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
    450 {
    451     /*
    452      * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
    453      */
    454     Assert(!(cToScan & 63));
    455     Assert(!(idxFirst & 63));
    456     Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
    457     pbmAlloc += idxFirst / 64;
    458 
    459     /*
    460      * Scan the bitmap for cReqUnits of consequtive clear bits
    461      */
    462     /** @todo This can probably be done more efficiently for non-x86 systems. */
    463     int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
    464     while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
    465     {
    466         uint32_t idxAddBit = 1;
    467         while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
    468             idxAddBit++;
    469         if (idxAddBit >= cReqUnits)
    470         {
    471             ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
    472 
    473             PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
    474             pChunk->cFreeUnits -= cReqUnits;
    475             pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
    476 
    477             void * const pvRet  = (uint8_t *)pChunk->pvChunk
    478                                 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
    479 
    480             return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
    481                                                     cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
    482         }
    483 
    484         iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
    485     }
    486     return NULL;
    487 }
    488 #endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    489 
    490 
    491 static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
    492 {
    493 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    494     /*
    495      * Figure out how much to allocate.
    496      */
    497     uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    498     if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
    499     {
    500         uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    501         uint32_t const   idxHint  = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
    502         if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
    503         {
    504             void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
    505                                                              pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
    506             if (pvRet)
    507                 return pvRet;
    508         }
    509         return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
    510                                                   RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
    511                                                   cReqUnits, idxChunk);
    512     }
    513 #else
    514     void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
    515     if (pvRet)
    516         return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
    517 #endif
    518     return NULL;
    519 
    520 }
    521 
    522 
    523 /**
    524  * Allocates @a cbReq bytes of executable memory.
    525  *
    526  * @returns Pointer to the memory, NULL if out of memory or other problem
    527  *          encountered.
    528  * @param   pVCpu   The cross context virtual CPU structure of the calling
    529  *                  thread.
    530  * @param   cbReq   How many bytes are required.
    531  */
    532 static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
    533 {
    534     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    535     AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
    536     AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
    537 
    538 
    539     for (unsigned iIteration = 0;; iIteration++)
    540     {
    541         /*
    542          * Adjust the request size so it'll fit the allocator alignment/whatnot.
    543          *
    544          * For the RTHeapSimple allocator this means to follow the logic described
    545          * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
    546          * existing chunks if we think we've got sufficient free memory around.
    547          *
    548          * While for the alternative one we just align it up to a whole unit size.
    549          */
    550 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    551         cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    552 #else
    553         cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    554 #endif
    555         if (cbReq <= pExecMemAllocator->cbFree)
    556         {
    557             uint32_t const cChunks      = pExecMemAllocator->cChunks;
    558             uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
    559             for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
    560             {
    561                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    562                 if (pvRet)
    563                     return pvRet;
    564             }
    565             for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
    566             {
    567                 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    568                 if (pvRet)
    569                     return pvRet;
    570             }
    571         }
    572 
    573         /*
    574          * Can we grow it with another chunk?
    575          */
    576         if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
    577         {
    578             int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    579             AssertLogRelRCReturn(rc, NULL);
    580 
    581             uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
    582             void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
    583             if (pvRet)
    584                 return pvRet;
    585             AssertFailed();
    586         }
    587 
    588         /*
    589          * Try prune native TBs once.
    590          */
    591         if (iIteration == 0)
    592             iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
    593         else
    594         {
    595             /** @todo stats...   */
    596             return NULL;
    597         }
    598     }
    599 
    600 }
    601 
    602 
    603 /** This is a hook that we may need later for changing memory protection back
    604  *  to readonly+exec */
    605 static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
    606 {
    607 #ifdef RT_OS_DARWIN
    608     /* See iemExecMemAllocatorAllocTailCode for the explanation. */
    609     int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
    610     AssertRC(rc); RT_NOREF(pVCpu);
    611 
    612     /*
    613      * Flush the instruction cache:
    614      *      https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
    615      */
    616     /* sys_dcache_flush(pv, cb); - not necessary */
    617     sys_icache_invalidate(pv, cb);
    618 #else
    619     RT_NOREF(pVCpu, pv, cb);
    620 #endif
    621 }
    622 
    623 
    624 /**
    625  * Frees executable memory.
    626  */
    627 void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
    628 {
    629     PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
    630     Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
    631     Assert(pv);
    632 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    633     Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
    634 #else
    635     Assert(!((uintptr_t)pv & 63));
    636 #endif
    637 
    638     /* Align the size as we did when allocating the block. */
    639 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    640     cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    641 #else
    642     cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
    643 #endif
    644 
    645     /* Free it / assert sanity. */
    646 #if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
    647     uint32_t const cChunks = pExecMemAllocator->cChunks;
    648     uint32_t const cbChunk = pExecMemAllocator->cbChunk;
    649     bool           fFound  = false;
    650     for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
    651     {
    652         uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
    653         fFound = offChunk < cbChunk;
    654         if (fFound)
    655         {
    656 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    657             uint32_t const idxFirst  = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    658             uint32_t const cReqUnits = (uint32_t)cb       >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    659 
    660             /* Check that it's valid and free it. */
    661             uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
    662             AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
    663             for (uint32_t i = 1; i < cReqUnits; i++)
    664                 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
    665             ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
    666 
    667             pExecMemAllocator->aChunks[idxChunk].cFreeUnits  += cReqUnits;
    668             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = idxFirst;
    669 
    670             /* Update the stats. */
    671             pExecMemAllocator->cbAllocated  -= cb;
    672             pExecMemAllocator->cbFree       += cb;
    673             pExecMemAllocator->cAllocations -= 1;
    674             return;
    675 #else
    676             Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
    677             break;
    678 #endif
    679         }
    680     }
    681 # ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    682     AssertFailed();
    683 # else
    684     Assert(fFound);
    685 # endif
    686 #endif
    687 
    688 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    689     /* Update stats while cb is freshly calculated.*/
    690     pExecMemAllocator->cbAllocated  -= cb;
    691     pExecMemAllocator->cbFree       += RT_ALIGN_Z(cb, 64);
    692     pExecMemAllocator->cAllocations -= 1;
    693 
    694     /* Free it. */
    695     RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
    696 #endif
    697 }
    698 
    699 
    700 
    701 #ifdef IN_RING3
    702 # ifdef RT_OS_WINDOWS
    703 
    704 /**
    705  * Initializes the unwind info structures for windows hosts.
    706  */
    707 static int
    708 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    709                                                      void *pvChunk, uint32_t idxChunk)
    710 {
    711     RT_NOREF(pVCpu);
    712 
    713     /*
    714      * The AMD64 unwind opcodes.
    715      *
    716      * This is a program that starts with RSP after a RET instruction that
    717      * ends up in recompiled code, and the operations we describe here will
    718      * restore all non-volatile registers and bring RSP back to where our
    719      * RET address is.  This means it's reverse order from what happens in
    720      * the prologue.
    721      *
    722      * Note! Using a frame register approach here both because we have one
    723      *       and but mainly because the UWOP_ALLOC_LARGE argument values
    724      *       would be a pain to write initializers for.  On the positive
    725      *       side, we're impervious to changes in the the stack variable
    726      *       area can can deal with dynamic stack allocations if necessary.
    727      */
    728     static const IMAGE_UNWIND_CODE s_aOpcodes[] =
    729     {
    730         { { 16, IMAGE_AMD64_UWOP_SET_FPREG,     0 } },              /* RSP  = RBP - FrameOffset * 10 (0x60) */
    731         { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL,   0 } },              /* RSP += 8; */
    732         { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x15 } },   /* R15  = [RSP]; RSP += 8; */
    733         { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x14 } },   /* R14  = [RSP]; RSP += 8; */
    734         { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x13 } },   /* R13  = [RSP]; RSP += 8; */
    735         { {  8, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_x12 } },   /* R12  = [RSP]; RSP += 8; */
    736         { {  7, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xDI } },   /* RDI  = [RSP]; RSP += 8; */
    737         { {  6, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xSI } },   /* RSI  = [RSP]; RSP += 8; */
    738         { {  5, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBX } },   /* RBX  = [RSP]; RSP += 8; */
    739         { {  4, IMAGE_AMD64_UWOP_PUSH_NONVOL,   X86_GREG_xBP } },   /* RBP  = [RSP]; RSP += 8; */
    740     };
    741     union
    742     {
    743         IMAGE_UNWIND_INFO Info;
    744         uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
    745     } s_UnwindInfo =
    746     {
    747         {
    748             /* .Version = */        1,
    749             /* .Flags = */          0,
    750             /* .SizeOfProlog = */   16, /* whatever */
    751             /* .CountOfCodes = */   RT_ELEMENTS(s_aOpcodes),
    752             /* .FrameRegister = */  X86_GREG_xBP,
    753             /* .FrameOffset = */    (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
    754         }
    755     };
    756     AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
    757     AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
    758 
    759     /*
    760      * Calc how much space we need and allocate it off the exec heap.
    761      */
    762     unsigned const cFunctionEntries = 1;
    763     unsigned const cbUnwindInfo     = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
    764     unsigned const cbNeeded         = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
    765 #  ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    766     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    767     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
    768         = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    769 #  else
    770     unsigned const cbNeededAligned  = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    771                                     - pExecMemAllocator->cbHeapBlockHdr;
    772     PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
    773                                                                                                        32 /*cbAlignment*/);
    774 #  endif
    775     AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
    776     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
    777 
    778     /*
    779      * Initialize the structures.
    780      */
    781     PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
    782 
    783     paFunctions[0].BeginAddress         = 0;
    784     paFunctions[0].EndAddress           = pExecMemAllocator->cbChunk;
    785     paFunctions[0].UnwindInfoAddress    = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
    786 
    787     memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
    788     memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
    789 
    790     /*
    791      * Register it.
    792      */
    793     uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
    794     AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
    795 
    796     return VINF_SUCCESS;
    797 }
    798 
    799 
    800 # else /* !RT_OS_WINDOWS */
    801 
    802 /**
    803  * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
    804  */
    805 DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
    806 {
    807     if (iValue >= 64)
    808     {
    809         Assert(iValue < 0x2000);
    810         *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
    811         *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
    812     }
    813     else if (iValue >= 0)
    814         *Ptr.pb++ = (uint8_t)iValue;
    815     else if (iValue > -64)
    816         *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
    817     else
    818     {
    819         Assert(iValue > -0x2000);
    820         *Ptr.pb++ = ((uint8_t)iValue & 0x7f)        | 0x80;
    821         *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
    822     }
    823     return Ptr;
    824 }
    825 
    826 
    827 /**
    828  * Emits an ULEB128 encoded value (up to 64-bit wide).
    829  */
    830 DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
    831 {
    832     while (uValue >= 0x80)
    833     {
    834         *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
    835         uValue  >>= 7;
    836     }
    837     *Ptr.pb++ = (uint8_t)uValue;
    838     return Ptr;
    839 }
    840 
    841 
    842 /**
    843  * Emits a CFA rule as register @a uReg + offset @a off.
    844  */
    845 DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    846 {
    847     *Ptr.pb++ = DW_CFA_def_cfa;
    848     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    849     Ptr = iemDwarfPutUleb128(Ptr, off);
    850     return Ptr;
    851 }
    852 
    853 
    854 /**
    855  * Emits a register (@a uReg) save location:
    856  *      CFA + @a off * data_alignment_factor
    857  */
    858 DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
    859 {
    860     if (uReg < 0x40)
    861         *Ptr.pb++ = DW_CFA_offset | uReg;
    862     else
    863     {
    864         *Ptr.pb++ = DW_CFA_offset_extended;
    865         Ptr = iemDwarfPutUleb128(Ptr, uReg);
    866     }
    867     Ptr = iemDwarfPutUleb128(Ptr, off);
    868     return Ptr;
    869 }
    870 
    871 
    872 #  if 0 /* unused */
    873 /**
    874  * Emits a register (@a uReg) save location, using signed offset:
    875  *      CFA + @a offSigned * data_alignment_factor
    876  */
    877 DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
    878 {
    879     *Ptr.pb++ = DW_CFA_offset_extended_sf;
    880     Ptr = iemDwarfPutUleb128(Ptr, uReg);
    881     Ptr = iemDwarfPutLeb128(Ptr, offSigned);
    882     return Ptr;
    883 }
    884 #  endif
    885 
    886 
    887 /**
    888  * Initializes the unwind info section for non-windows hosts.
    889  */
    890 static int
    891 iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
    892                                                      void *pvChunk, uint32_t idxChunk)
    893 {
    894     PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
    895     pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
    896 
    897     RTPTRUNION Ptr = { pEhFrame->abEhFrame };
    898 
    899     /*
    900      * Generate the CIE first.
    901      */
    902 #  ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
    903     uint8_t const iDwarfVer = 3;
    904 #  else
    905     uint8_t const iDwarfVer = 4;
    906 #  endif
    907     RTPTRUNION const PtrCie = Ptr;
    908     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    909     *Ptr.pu32++ = 0 /*UINT32_MAX*/;                         /* I'm a CIE in .eh_frame speak. */
    910     *Ptr.pb++   = iDwarfVer;                                /* DwARF version */
    911     *Ptr.pb++   = 0;                                        /* Augmentation. */
    912     if (iDwarfVer >= 4)
    913     {
    914         *Ptr.pb++   = sizeof(uintptr_t);                    /* Address size. */
    915         *Ptr.pb++   = 0;                                    /* Segment selector size. */
    916     }
    917 #  ifdef RT_ARCH_AMD64
    918     Ptr = iemDwarfPutLeb128(Ptr, 1);                        /* Code alignment factor (LEB128 = 1). */
    919 #  else
    920     Ptr = iemDwarfPutLeb128(Ptr, 4);                        /* Code alignment factor (LEB128 = 4). */
    921 #  endif
    922     Ptr = iemDwarfPutLeb128(Ptr, -8);                       /* Data alignment factor (LEB128 = -8). */
    923 #  ifdef RT_ARCH_AMD64
    924     Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA);          /* Return address column (ULEB128) */
    925 #  elif defined(RT_ARCH_ARM64)
    926     Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR);          /* Return address column (ULEB128) */
    927 #  else
    928 #   error "port me"
    929 #  endif
    930     /* Initial instructions: */
    931 #  ifdef RT_ARCH_AMD64
    932     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16);   /* CFA     = RBP + 0x10 - first stack parameter */
    933     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA,  1);    /* Ret RIP = [CFA + 1*-8] */
    934     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2);    /* RBP     = [CFA + 2*-8] */
    935     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3);    /* RBX     = [CFA + 3*-8] */
    936     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4);    /* R12     = [CFA + 4*-8] */
    937     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5);    /* R13     = [CFA + 5*-8] */
    938     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6);    /* R14     = [CFA + 6*-8] */
    939     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7);    /* R15     = [CFA + 7*-8] */
    940 #  elif defined(RT_ARCH_ARM64)
    941 #   if 1
    942     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP,  16);   /* CFA     = BP + 0x10 - first stack parameter */
    943 #   else
    944     Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP,  IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
    945 #   endif
    946     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR,   1);   /* Ret PC  = [CFA + 1*-8] */
    947     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP,   2);   /* Ret BP  = [CFA + 2*-8] */
    948     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28,  3);   /* X28     = [CFA + 3*-8] */
    949     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27,  4);   /* X27     = [CFA + 4*-8] */
    950     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26,  5);   /* X26     = [CFA + 5*-8] */
    951     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25,  6);   /* X25     = [CFA + 6*-8] */
    952     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24,  7);   /* X24     = [CFA + 7*-8] */
    953     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23,  8);   /* X23     = [CFA + 8*-8] */
    954     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22,  9);   /* X22     = [CFA + 9*-8] */
    955     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10);   /* X21     = [CFA +10*-8] */
    956     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11);   /* X20     = [CFA +11*-8] */
    957     Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12);   /* X19     = [CFA +12*-8] */
    958     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    959     /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
    960 #  else
    961 #   error "port me"
    962 #  endif
    963     while ((Ptr.u - PtrCie.u) & 3)
    964         *Ptr.pb++ = DW_CFA_nop;
    965     /* Finalize the CIE size. */
    966     *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
    967 
    968     /*
    969      * Generate an FDE for the whole chunk area.
    970      */
    971 #  ifdef IEMNATIVE_USE_LIBUNWIND
    972     pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
    973 #  endif
    974     RTPTRUNION const PtrFde = Ptr;
    975     *Ptr.pu32++ = 123;                                      /* The CIE length will be determined later. */
    976     *Ptr.pu32   = Ptr.u - PtrCie.u;                         /* Negated self relative CIE address. */
    977     Ptr.pu32++;
    978     *Ptr.pu64++ = (uintptr_t)pvChunk;                       /* Absolute start PC of this FDE. */
    979     *Ptr.pu64++ = pExecMemAllocator->cbChunk;               /* PC range length for this PDE. */
    980 #  if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
    981     *Ptr.pb++ = DW_CFA_nop;
    982 #  endif
    983     while ((Ptr.u - PtrFde.u) & 3)
    984         *Ptr.pb++ = DW_CFA_nop;
    985     /* Finalize the FDE size. */
    986     *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
    987 
    988     /* Terminator entry. */
    989     *Ptr.pu32++ = 0;
    990     *Ptr.pu32++ = 0;            /* just to be sure... */
    991     Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
    992 
    993     /*
    994      * Register it.
    995      */
    996 #  ifdef IEMNATIVE_USE_LIBUNWIND
    997     __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
    998 #  else
    999     memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
    1000     __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
    1001 #  endif
    1002 
    1003 #  ifdef IEMNATIVE_USE_GDB_JIT
    1004     /*
    1005      * Now for telling GDB about this (experimental).
    1006      *
    1007      * This seems to work best with ET_DYN.
    1008      */
    1009     unsigned const cbNeeded        = sizeof(GDBJITSYMFILE);
    1010 #   ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1011     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
    1012     GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
    1013 #   else
    1014     unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
    1015                                    - pExecMemAllocator->cbHeapBlockHdr;
    1016     GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
    1017 #   endif
    1018     AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
    1019     unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
    1020 
    1021     RT_ZERO(*pSymFile);
    1022 
    1023     /*
    1024      * The ELF header:
    1025      */
    1026     pSymFile->EHdr.e_ident[0]           = ELFMAG0;
    1027     pSymFile->EHdr.e_ident[1]           = ELFMAG1;
    1028     pSymFile->EHdr.e_ident[2]           = ELFMAG2;
    1029     pSymFile->EHdr.e_ident[3]           = ELFMAG3;
    1030     pSymFile->EHdr.e_ident[EI_VERSION]  = EV_CURRENT;
    1031     pSymFile->EHdr.e_ident[EI_CLASS]    = ELFCLASS64;
    1032     pSymFile->EHdr.e_ident[EI_DATA]     = ELFDATA2LSB;
    1033     pSymFile->EHdr.e_ident[EI_OSABI]    = ELFOSABI_NONE;
    1034 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1035     pSymFile->EHdr.e_type               = ET_DYN;
    1036 #   else
    1037     pSymFile->EHdr.e_type               = ET_REL;
    1038 #   endif
    1039 #   ifdef RT_ARCH_AMD64
    1040     pSymFile->EHdr.e_machine            = EM_AMD64;
    1041 #   elif defined(RT_ARCH_ARM64)
    1042     pSymFile->EHdr.e_machine            = EM_AARCH64;
    1043 #   else
    1044 #    error "port me"
    1045 #   endif
    1046     pSymFile->EHdr.e_version            = 1; /*?*/
    1047     pSymFile->EHdr.e_entry              = 0;
    1048 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1049     pSymFile->EHdr.e_phoff              = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
    1050 #   else
    1051     pSymFile->EHdr.e_phoff              = 0;
    1052 #   endif
    1053     pSymFile->EHdr.e_shoff              = sizeof(pSymFile->EHdr);
    1054     pSymFile->EHdr.e_flags              = 0;
    1055     pSymFile->EHdr.e_ehsize             = sizeof(pSymFile->EHdr);
    1056 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1057     pSymFile->EHdr.e_phentsize          = sizeof(pSymFile->aPhdrs[0]);
    1058     pSymFile->EHdr.e_phnum              = RT_ELEMENTS(pSymFile->aPhdrs);
    1059 #   else
    1060     pSymFile->EHdr.e_phentsize          = 0;
    1061     pSymFile->EHdr.e_phnum              = 0;
    1062 #   endif
    1063     pSymFile->EHdr.e_shentsize          = sizeof(pSymFile->aShdrs[0]);
    1064     pSymFile->EHdr.e_shnum              = RT_ELEMENTS(pSymFile->aShdrs);
    1065     pSymFile->EHdr.e_shstrndx           = 0; /* set later */
    1066 
    1067     uint32_t offStrTab = 0;
    1068 #define APPEND_STR(a_szStr) do { \
    1069         memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
    1070         offStrTab += sizeof(a_szStr); \
    1071         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1072     } while (0)
    1073 #define APPEND_STR_FMT(a_szStr, ...) do { \
    1074         offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
    1075         offStrTab++; \
    1076         Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
    1077     } while (0)
    1078 
    1079     /*
    1080      * Section headers.
    1081      */
    1082     /* Section header #0: NULL */
    1083     unsigned i = 0;
    1084     APPEND_STR("");
    1085     RT_ZERO(pSymFile->aShdrs[i]);
    1086     i++;
    1087 
    1088     /* Section header: .eh_frame */
    1089     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1090     APPEND_STR(".eh_frame");
    1091     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1092     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1093 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1094     pSymFile->aShdrs[i].sh_offset
    1095         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
    1096 #   else
    1097     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->abEhFrame[0];
    1098     pSymFile->aShdrs[i].sh_offset       = 0;
    1099 #   endif
    1100 
    1101     pSymFile->aShdrs[i].sh_size         = sizeof(pEhFrame->abEhFrame);
    1102     pSymFile->aShdrs[i].sh_link         = 0;
    1103     pSymFile->aShdrs[i].sh_info         = 0;
    1104     pSymFile->aShdrs[i].sh_addralign    = 1;
    1105     pSymFile->aShdrs[i].sh_entsize      = 0;
    1106     memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
    1107     i++;
    1108 
    1109     /* Section header: .shstrtab */
    1110     unsigned const iShStrTab = i;
    1111     pSymFile->EHdr.e_shstrndx           = iShStrTab;
    1112     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1113     APPEND_STR(".shstrtab");
    1114     pSymFile->aShdrs[i].sh_type         = SHT_STRTAB;
    1115     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1116 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1117     pSymFile->aShdrs[i].sh_offset
    1118         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1119 #   else
    1120     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->szzStrTab[0];
    1121     pSymFile->aShdrs[i].sh_offset       = 0;
    1122 #   endif
    1123     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->szzStrTab);
    1124     pSymFile->aShdrs[i].sh_link         = 0;
    1125     pSymFile->aShdrs[i].sh_info         = 0;
    1126     pSymFile->aShdrs[i].sh_addralign    = 1;
    1127     pSymFile->aShdrs[i].sh_entsize      = 0;
    1128     i++;
    1129 
    1130     /* Section header: .symbols */
    1131     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1132     APPEND_STR(".symtab");
    1133     pSymFile->aShdrs[i].sh_type         = SHT_SYMTAB;
    1134     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1135     pSymFile->aShdrs[i].sh_offset
    1136         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
    1137     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aSymbols);
    1138     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1139     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aSymbols);
    1140     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aSymbols[0].st_value);
    1141     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aSymbols[0]);
    1142     i++;
    1143 
    1144 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1145     /* Section header: .symbols */
    1146     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1147     APPEND_STR(".dynsym");
    1148     pSymFile->aShdrs[i].sh_type         = SHT_DYNSYM;
    1149     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1150     pSymFile->aShdrs[i].sh_offset
    1151         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1152     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDynSyms);
    1153     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1154     pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aDynSyms);
    1155     pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aDynSyms[0].st_value);
    1156     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDynSyms[0]);
    1157     i++;
    1158 #   endif
    1159 
    1160 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1161     /* Section header: .dynamic */
    1162     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1163     APPEND_STR(".dynamic");
    1164     pSymFile->aShdrs[i].sh_type         = SHT_DYNAMIC;
    1165     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
    1166     pSymFile->aShdrs[i].sh_offset
    1167         = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1168     pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDyn);
    1169     pSymFile->aShdrs[i].sh_link         = iShStrTab;
    1170     pSymFile->aShdrs[i].sh_info         = 0;
    1171     pSymFile->aShdrs[i].sh_addralign    = 1;
    1172     pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDyn[0]);
    1173     i++;
    1174 #   endif
    1175 
    1176     /* Section header: .text */
    1177     unsigned const iShText = i;
    1178     pSymFile->aShdrs[i].sh_name         = offStrTab;
    1179     APPEND_STR(".text");
    1180     pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
    1181     pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
    1182 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
    1183     pSymFile->aShdrs[i].sh_offset
    1184         = pSymFile->aShdrs[i].sh_addr   = sizeof(GDBJITSYMFILE);
    1185 #   else
    1186     pSymFile->aShdrs[i].sh_addr         = (uintptr_t)(pSymFile + 1);
    1187     pSymFile->aShdrs[i].sh_offset       = 0;
    1188 #   endif
    1189     pSymFile->aShdrs[i].sh_size         = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
    1190     pSymFile->aShdrs[i].sh_link         = 0;
    1191     pSymFile->aShdrs[i].sh_info         = 0;
    1192     pSymFile->aShdrs[i].sh_addralign    = 1;
    1193     pSymFile->aShdrs[i].sh_entsize      = 0;
    1194     i++;
    1195 
    1196     Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
    1197 
    1198 #   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
    1199     /*
    1200      * The program headers:
    1201      */
    1202     /* Everything in a single LOAD segment: */
    1203     i = 0;
    1204     pSymFile->aPhdrs[i].p_type          = PT_LOAD;
    1205     pSymFile->aPhdrs[i].p_flags         = PF_X | PF_R;
    1206     pSymFile->aPhdrs[i].p_offset
    1207         = pSymFile->aPhdrs[i].p_vaddr
    1208         = pSymFile->aPhdrs[i].p_paddr   = 0;
    1209     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1210         = pSymFile->aPhdrs[i].p_memsz   = pExecMemAllocator->cbChunk - offSymFileInChunk;
    1211     pSymFile->aPhdrs[i].p_align         = HOST_PAGE_SIZE;
    1212     i++;
    1213     /* The .dynamic segment. */
    1214     pSymFile->aPhdrs[i].p_type          = PT_DYNAMIC;
    1215     pSymFile->aPhdrs[i].p_flags         = PF_R;
    1216     pSymFile->aPhdrs[i].p_offset
    1217         = pSymFile->aPhdrs[i].p_vaddr
    1218         = pSymFile->aPhdrs[i].p_paddr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
    1219     pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
    1220         = pSymFile->aPhdrs[i].p_memsz   = sizeof(pSymFile->aDyn);
    1221     pSymFile->aPhdrs[i].p_align         = sizeof(pSymFile->aDyn[0].d_tag);
    1222     i++;
    1223 
    1224     Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
    1225 
    1226     /*
    1227      * The dynamic section:
    1228      */
    1229     i = 0;
    1230     pSymFile->aDyn[i].d_tag             = DT_SONAME;
    1231     pSymFile->aDyn[i].d_un.d_val        = offStrTab;
    1232     APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
    1233     i++;
    1234     pSymFile->aDyn[i].d_tag             = DT_STRTAB;
    1235     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
    1236     i++;
    1237     pSymFile->aDyn[i].d_tag             = DT_STRSZ;
    1238     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->szzStrTab);
    1239     i++;
    1240     pSymFile->aDyn[i].d_tag             = DT_SYMTAB;
    1241     pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
    1242     i++;
    1243     pSymFile->aDyn[i].d_tag             = DT_SYMENT;
    1244     pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->aDynSyms[0]);
    1245     i++;
    1246     pSymFile->aDyn[i].d_tag             = DT_NULL;
    1247     i++;
    1248     Assert(i == RT_ELEMENTS(pSymFile->aDyn));
    1249 #   endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
    1250 
    1251     /*
    1252      * Symbol tables:
    1253      */
    1254     /** @todo gdb doesn't seem to really like this ...   */
    1255     i = 0;
    1256     pSymFile->aSymbols[i].st_name       = 0;
    1257     pSymFile->aSymbols[i].st_shndx      = SHN_UNDEF;
    1258     pSymFile->aSymbols[i].st_value      = 0;
    1259     pSymFile->aSymbols[i].st_size       = 0;
    1260     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
    1261     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1262 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1263     pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
    1264 #   endif
    1265     i++;
    1266 
    1267     pSymFile->aSymbols[i].st_name       = 0;
    1268     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1269     pSymFile->aSymbols[i].st_value      = 0;
    1270     pSymFile->aSymbols[i].st_size       = 0;
    1271     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
    1272     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1273     i++;
    1274 
    1275     pSymFile->aSymbols[i].st_name       = offStrTab;
    1276     APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
    1277 #   if 0
    1278     pSymFile->aSymbols[i].st_shndx      = iShText;
    1279     pSymFile->aSymbols[i].st_value      = 0;
    1280 #   else
    1281     pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
    1282     pSymFile->aSymbols[i].st_value      = (uintptr_t)(pSymFile + 1);
    1283 #   endif
    1284     pSymFile->aSymbols[i].st_size       = pSymFile->aShdrs[iShText].sh_size;
    1285     pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
    1286     pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
    1287 #   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
    1288     pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
    1289     pSymFile->aDynSyms[1].st_value      = (uintptr_t)(pSymFile + 1);
    1290 #   endif
    1291     i++;
    1292 
    1293     Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
    1294     Assert(offStrTab < sizeof(pSymFile->szzStrTab));
    1295 
    1296     /*
    1297      * The GDB JIT entry and informing GDB.
    1298      */
    1299     pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
    1300 #   if 1
    1301     pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
    1302 #   else
    1303     pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
    1304 #   endif
    1305 
    1306     RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
    1307     RTCritSectEnter(&g_IemNativeGdbJitLock);
    1308     pEhFrame->GdbJitEntry.pNext      = NULL;
    1309     pEhFrame->GdbJitEntry.pPrev      = __jit_debug_descriptor.pTail;
    1310     if (__jit_debug_descriptor.pTail)
    1311         __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
    1312     else
    1313         __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
    1314     __jit_debug_descriptor.pTail     = &pEhFrame->GdbJitEntry;
    1315     __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
    1316 
    1317     /* Notify GDB: */
    1318     __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
    1319     __jit_debug_register_code();
    1320     __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
    1321     RTCritSectLeave(&g_IemNativeGdbJitLock);
    1322 
    1323 #  else  /* !IEMNATIVE_USE_GDB_JIT */
    1324     RT_NOREF(pVCpu);
    1325 #  endif /* !IEMNATIVE_USE_GDB_JIT */
    1326 
    1327     return VINF_SUCCESS;
    1328 }
    1329 
    1330 # endif /* !RT_OS_WINDOWS */
    1331 #endif /* IN_RING3 */
    1332 
    1333 
    1334 /**
    1335  * Adds another chunk to the executable memory allocator.
    1336  *
    1337  * This is used by the init code for the initial allocation and later by the
    1338  * regular allocator function when it's out of memory.
    1339  */
    1340 static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
    1341 {
    1342     /* Check that we've room for growth. */
    1343     uint32_t const idxChunk = pExecMemAllocator->cChunks;
    1344     AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
    1345 
    1346     /* Allocate a chunk. */
    1347 #ifdef RT_OS_DARWIN
    1348     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
    1349 #else
    1350     void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
    1351 #endif
    1352     AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
    1353 
    1354 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1355     int rc = VINF_SUCCESS;
    1356 #else
    1357     /* Initialize the heap for the chunk. */
    1358     RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
    1359     int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
    1360     AssertRC(rc);
    1361     if (RT_SUCCESS(rc))
    1362     {
    1363         /*
    1364          * We want the memory to be aligned on 64 byte, so the first time thru
    1365          * here we do some exploratory allocations to see how we can achieve this.
    1366          * On subsequent runs we only make an initial adjustment allocation, if
    1367          * necessary.
    1368          *
    1369          * Since we own the heap implementation, we know that the internal block
    1370          * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
    1371          * so all we need to wrt allocation size adjustments is to add 32 bytes
    1372          * to the size, align up by 64 bytes, and subtract 32 bytes.
    1373          *
    1374          * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
    1375          * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
    1376          * allocation to force subsequent allocations to return 64 byte aligned
    1377          * user areas.
    1378          */
    1379         if (!pExecMemAllocator->cbHeapBlockHdr)
    1380         {
    1381             pExecMemAllocator->cbHeapBlockHdr   = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
    1382             pExecMemAllocator->cbHeapAlignTweak = 64;
    1383             pExecMemAllocator->pvAlignTweak     = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
    1384                                                                     32 /*cbAlignment*/);
    1385             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
    1386 
    1387             void *pvTest1 = RTHeapSimpleAlloc(hHeap,
    1388                                                 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1389                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1390             AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
    1391             AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
    1392 
    1393             void *pvTest2 = RTHeapSimpleAlloc(hHeap,
    1394                                                 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
    1395                                               - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
    1396             AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
    1397             AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
    1398 
    1399             RTHeapSimpleFree(hHeap, pvTest2);
    1400             RTHeapSimpleFree(hHeap, pvTest1);
    1401         }
    1402         else
    1403         {
    1404             pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap,  pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
    1405             AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
    1406         }
    1407         if (RT_SUCCESS(rc))
    1408 #endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
    1409         {
    1410             /*
    1411              * Add the chunk.
    1412              *
    1413              * This must be done before the unwind init so windows can allocate
    1414              * memory from the chunk when using the alternative sub-allocator.
    1415              */
    1416             pExecMemAllocator->aChunks[idxChunk].pvChunk      = pvChunk;
    1417 #ifdef IN_RING3
    1418             pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
    1419 #endif
    1420 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1421             pExecMemAllocator->aChunks[idxChunk].hHeap        = hHeap;
    1422 #else
    1423             pExecMemAllocator->aChunks[idxChunk].cFreeUnits   = pExecMemAllocator->cUnitsPerChunk;
    1424             pExecMemAllocator->aChunks[idxChunk].idxFreeHint  = 0;
    1425             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1426                    0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1427 #endif
    1428 
    1429             pExecMemAllocator->cChunks      = idxChunk + 1;
    1430             pExecMemAllocator->idxChunkHint = idxChunk;
    1431 
    1432 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1433             pExecMemAllocator->cbTotal     += pExecMemAllocator->cbChunk;
    1434             pExecMemAllocator->cbFree      += pExecMemAllocator->cbChunk;
    1435 #else
    1436             size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
    1437             pExecMemAllocator->cbTotal     += cbFree;
    1438             pExecMemAllocator->cbFree      += cbFree;
    1439 #endif
    1440 
    1441 #ifdef IN_RING3
    1442             /*
    1443              * Initialize the unwind information (this cannot really fail atm).
    1444              * (This sets pvUnwindInfo.)
    1445              */
    1446             rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
    1447             if (RT_SUCCESS(rc))
    1448 #endif
    1449             {
    1450                 return VINF_SUCCESS;
    1451             }
    1452 
    1453 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1454             /* Just in case the impossible happens, undo the above up: */
    1455             pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
    1456             pExecMemAllocator->cbFree  -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1457             pExecMemAllocator->cChunks  = idxChunk;
    1458             memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
    1459                    0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
    1460             pExecMemAllocator->aChunks[idxChunk].pvChunk    = NULL;
    1461             pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
    1462 #endif
    1463         }
    1464 #ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1465     }
    1466 #endif
    1467     RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
    1468     RT_NOREF(pVCpu);
    1469     return rc;
    1470 }
    1471 
    1472 
    1473 /**
    1474  * Initializes the executable memory allocator for native recompilation on the
    1475  * calling EMT.
    1476  *
    1477  * @returns VBox status code.
    1478  * @param   pVCpu       The cross context virtual CPU structure of the calling
    1479  *                      thread.
    1480  * @param   cbMax       The max size of the allocator.
    1481  * @param   cbInitial   The initial allocator size.
    1482  * @param   cbChunk     The chunk size, 0 or UINT32_MAX for default (@a cbMax
    1483  *                      dependent).
    1484  */
    1485 int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
    1486 {
    1487     /*
    1488      * Validate input.
    1489      */
    1490     AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
    1491     AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
    1492     AssertLogRelMsgReturn(   cbChunk != UINT32_MAX
    1493                           || cbChunk == 0
    1494                           || (   RT_IS_POWER_OF_TWO(cbChunk)
    1495                               && cbChunk >= _1M
    1496                               && cbChunk <= _256M
    1497                               && cbChunk <= cbMax),
    1498                           ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
    1499                           VERR_OUT_OF_RANGE);
    1500 
    1501     /*
    1502      * Adjust/figure out the chunk size.
    1503      */
    1504     if (cbChunk == 0 || cbChunk == UINT32_MAX)
    1505     {
    1506         if (cbMax >= _256M)
    1507             cbChunk = _64M;
    1508         else
    1509         {
    1510             if (cbMax < _16M)
    1511                 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
    1512             else
    1513                 cbChunk = (uint32_t)cbMax / 4;
    1514             if (!RT_IS_POWER_OF_TWO(cbChunk))
    1515                 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
    1516         }
    1517     }
    1518 
    1519     if (cbChunk > cbMax)
    1520         cbMax = cbChunk;
    1521     else
    1522         cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
    1523     uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
    1524     AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
    1525 
    1526     /*
    1527      * Allocate and initialize the allocatore instance.
    1528      */
    1529     size_t       cbNeeded   = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
    1530 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1531     size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1532     size_t const cbBitmap   = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
    1533     cbNeeded += cbBitmap * cMaxChunks;
    1534     AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
    1535     Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
    1536 #endif
    1537 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1538     size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
    1539     cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
    1540 #endif
    1541     PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
    1542     AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
    1543                           VERR_NO_MEMORY);
    1544     pExecMemAllocator->uMagic       = IEMEXECMEMALLOCATOR_MAGIC;
    1545     pExecMemAllocator->cbChunk      = cbChunk;
    1546     pExecMemAllocator->cMaxChunks   = cMaxChunks;
    1547     pExecMemAllocator->cChunks      = 0;
    1548     pExecMemAllocator->idxChunkHint = 0;
    1549     pExecMemAllocator->cAllocations = 0;
    1550     pExecMemAllocator->cbTotal      = 0;
    1551     pExecMemAllocator->cbFree       = 0;
    1552     pExecMemAllocator->cbAllocated  = 0;
    1553 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1554     pExecMemAllocator->pbmAlloc                 = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
    1555     pExecMemAllocator->cUnitsPerChunk           = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
    1556     pExecMemAllocator->cBitmapElementsPerChunk  = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
    1557     memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
    1558 #endif
    1559 #if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
    1560     pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
    1561 #endif
    1562     for (uint32_t i = 0; i < cMaxChunks; i++)
    1563     {
    1564 #ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
    1565         pExecMemAllocator->aChunks[i].cFreeUnits   = 0;
    1566         pExecMemAllocator->aChunks[i].idxFreeHint  = 0;
    1567 #else
    1568         pExecMemAllocator->aChunks[i].hHeap        = NIL_RTHEAPSIMPLE;
    1569 #endif
    1570         pExecMemAllocator->aChunks[i].pvChunk      = NULL;
    1571 #ifdef IN_RING0
    1572         pExecMemAllocator->aChunks[i].hMemObj      = NIL_RTR0MEMOBJ;
    1573 #else
    1574         pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
    1575 #endif
    1576     }
    1577     pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
    1578 
    1579     /*
    1580      * Do the initial allocations.
    1581      */
    1582     while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
    1583     {
    1584         int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
    1585         AssertLogRelRCReturn(rc, rc);
    1586     }
    1587 
    1588     pExecMemAllocator->idxChunkHint = 0;
    1589 
    1590     return VINF_SUCCESS;
    1591 }
    1592 
    1593 
    1594 /*********************************************************************************************************************************
    1595 *   Native Recompilation                                                                                                         *
    1596 *********************************************************************************************************************************/
    1597 
    1598 
    1599 /**
    1600  * Used by TB code when encountering a non-zero status or rcPassUp after a call.
    1601  */
    1602 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
    1603 {
    1604     pVCpu->iem.s.cInstructions += idxInstr;
    1605     return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
    1606 }
    1607 
    1608 
    1609 /**
    1610  * Used by TB code when it wants to raise a \#GP(0).
    1611  */
    1612 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
    1613 {
    1614     iemRaiseGeneralProtectionFault0Jmp(pVCpu);
    1615 #ifndef _MSC_VER
    1616     return VINF_IEM_RAISED_XCPT; /* not reached */
    1617 #endif
    1618 }
    1619 
    1620 
    1621 /**
    1622  * Used by TB code when detecting opcode changes.
    1623  * @see iemThreadeFuncWorkerObsoleteTb
    1624  */
    1625 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
    1626 {
    1627     /* We set fSafeToFree to false where as we're being called in the context
    1628        of a TB callback function, which for native TBs means we cannot release
    1629        the executable memory till we've returned our way back to iemTbExec as
    1630        that return path codes via the native code generated for the TB. */
    1631     Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
    1632     iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
    1633     return VINF_IEM_REEXEC_BREAK;
    1634 }
    1635 
    1636 
    1637 /**
    1638  * Used by TB code when we need to switch to a TB with CS.LIM checking.
    1639  */
    1640 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
    1641 {
    1642     Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
    1643           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1644           (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
    1645           pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
    1646     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
    1647     return VINF_IEM_REEXEC_BREAK;
    1648 }
    1649 
    1650 
    1651 /**
    1652  * Used by TB code when we missed a PC check after a branch.
    1653  */
    1654 IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
    1655 {
    1656     Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
    1657           pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
    1658           pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
    1659           pVCpu->iem.s.pbInstrBuf));
    1660     STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
    1661     return VINF_IEM_REEXEC_BREAK;
    1662 }
    1663 
    1664 
    1665 
    1666 /*********************************************************************************************************************************
    1667 *   Helpers: Segmented memory fetches and stores.                                                                                *
    1668 *********************************************************************************************************************************/
    1669 
    1670 /**
    1671  * Used by TB code to load unsigned 8-bit data w/ segmentation.
    1672  */
    1673 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1674 {
    1675 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1676     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1677 #else
    1678     return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1679 #endif
    1680 }
    1681 
    1682 
    1683 /**
    1684  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1685  * to 16 bits.
    1686  */
    1687 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1688 {
    1689 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1690     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1691 #else
    1692     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1693 #endif
    1694 }
    1695 
    1696 
    1697 /**
    1698  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1699  * to 32 bits.
    1700  */
    1701 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1702 {
    1703 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1704     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1705 #else
    1706     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1707 #endif
    1708 }
    1709 
    1710 /**
    1711  * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
    1712  * to 64 bits.
    1713  */
    1714 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1715 {
    1716 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1717     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1718 #else
    1719     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
    1720 #endif
    1721 }
    1722 
    1723 
    1724 /**
    1725  * Used by TB code to load unsigned 16-bit data w/ segmentation.
    1726  */
    1727 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1728 {
    1729 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1730     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1731 #else
    1732     return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1733 #endif
    1734 }
    1735 
    1736 
    1737 /**
    1738  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1739  * to 32 bits.
    1740  */
    1741 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1742 {
    1743 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1744     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1745 #else
    1746     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1747 #endif
    1748 }
    1749 
    1750 
    1751 /**
    1752  * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
    1753  * to 64 bits.
    1754  */
    1755 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1756 {
    1757 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1758     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1759 #else
    1760     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
    1761 #endif
    1762 }
    1763 
    1764 
    1765 /**
    1766  * Used by TB code to load unsigned 32-bit data w/ segmentation.
    1767  */
    1768 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1769 {
    1770 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1771     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1772 #else
    1773     return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1774 #endif
    1775 }
    1776 
    1777 
    1778 /**
    1779  * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
    1780  * to 64 bits.
    1781  */
    1782 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1783 {
    1784 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1785     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1786 #else
    1787     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
    1788 #endif
    1789 }
    1790 
    1791 
    1792 /**
    1793  * Used by TB code to load unsigned 64-bit data w/ segmentation.
    1794  */
    1795 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
    1796 {
    1797 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1798     return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
    1799 #else
    1800     return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
    1801 #endif
    1802 }
    1803 
    1804 
    1805 /**
    1806  * Used by TB code to store unsigned 8-bit data w/ segmentation.
    1807  */
    1808 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
    1809 {
    1810 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1811     iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    1812 #else
    1813     iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
    1814 #endif
    1815 }
    1816 
    1817 
    1818 /**
    1819  * Used by TB code to store unsigned 16-bit data w/ segmentation.
    1820  */
    1821 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
    1822 {
    1823 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1824     iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    1825 #else
    1826     iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
    1827 #endif
    1828 }
    1829 
    1830 
    1831 /**
    1832  * Used by TB code to store unsigned 32-bit data w/ segmentation.
    1833  */
    1834 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
    1835 {
    1836 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1837     iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    1838 #else
    1839     iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
    1840 #endif
    1841 }
    1842 
    1843 
    1844 /**
    1845  * Used by TB code to store unsigned 64-bit data w/ segmentation.
    1846  */
    1847 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
    1848 {
    1849 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    1850     iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    1851 #else
    1852     iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
    1853 #endif
    1854 }
    1855 
    1856 
    1857 
    1858 /**
    1859  * Used by TB code to store an unsigned 16-bit value onto a generic stack.
    1860  */
    1861 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    1862 {
    1863 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1864     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    1865 #else
    1866     iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    1867 #endif
    1868 }
    1869 
    1870 
    1871 /**
    1872  * Used by TB code to store an unsigned 32-bit value onto a generic stack.
    1873  */
    1874 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    1875 {
    1876 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1877     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    1878 #else
    1879     iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    1880 #endif
    1881 }
    1882 
    1883 
    1884 /**
    1885  * Used by TB code to store an 32-bit selector value onto a generic stack.
    1886  *
    1887  * Intel CPUs doesn't do write a whole dword, thus the special function.
    1888  */
    1889 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    1890 {
    1891 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1892     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    1893 #else
    1894     iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    1895 #endif
    1896 }
    1897 
    1898 
    1899 /**
    1900  * Used by TB code to push unsigned 64-bit value onto a generic stack.
    1901  */
    1902 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    1903 {
    1904 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    1905     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    1906 #else
    1907     iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    1908 #endif
    1909 }
    1910 
    1911 
    1912 /**
    1913  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    1914  */
    1915 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1916 {
    1917 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1918     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    1919 #else
    1920     return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
    1921 #endif
    1922 }
    1923 
    1924 
    1925 /**
    1926  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    1927  */
    1928 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1929 {
    1930 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1931     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    1932 #else
    1933     return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
    1934 #endif
    1935 }
    1936 
    1937 
    1938 /**
    1939  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    1940  */
    1941 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1942 {
    1943 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    1944     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    1945 #else
    1946     return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
    1947 #endif
    1948 }
    1949 
    1950 
    1951 
    1952 /*********************************************************************************************************************************
    1953 *   Helpers: Flat memory fetches and stores.                                                                                     *
    1954 *********************************************************************************************************************************/
    1955 
    1956 /**
    1957  * Used by TB code to load unsigned 8-bit data w/ flat address.
    1958  * @note Zero extending the value to 64-bit to simplify assembly.
    1959  */
    1960 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1961 {
    1962 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1963     return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    1964 #else
    1965     return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    1966 #endif
    1967 }
    1968 
    1969 
    1970 /**
    1971  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    1972  * to 16 bits.
    1973  * @note Zero extending the value to 64-bit to simplify assembly.
    1974  */
    1975 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1976 {
    1977 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1978     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    1979 #else
    1980     return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    1981 #endif
    1982 }
    1983 
    1984 
    1985 /**
    1986  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    1987  * to 32 bits.
    1988  * @note Zero extending the value to 64-bit to simplify assembly.
    1989  */
    1990 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    1991 {
    1992 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    1993     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    1994 #else
    1995     return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    1996 #endif
    1997 }
    1998 
    1999 
    2000 /**
    2001  * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
    2002  * to 64 bits.
    2003  */
    2004 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2005 {
    2006 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2007     return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2008 #else
    2009     return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
    2010 #endif
    2011 }
    2012 
    2013 
    2014 /**
    2015  * Used by TB code to load unsigned 16-bit data w/ flat address.
    2016  * @note Zero extending the value to 64-bit to simplify assembly.
    2017  */
    2018 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2019 {
    2020 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2021     return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2022 #else
    2023     return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2024 #endif
    2025 }
    2026 
    2027 
    2028 /**
    2029  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2030  * to 32 bits.
    2031  * @note Zero extending the value to 64-bit to simplify assembly.
    2032  */
    2033 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2034 {
    2035 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2036     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2037 #else
    2038     return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2039 #endif
    2040 }
    2041 
    2042 
    2043 /**
    2044  * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
    2045  * to 64 bits.
    2046  * @note Zero extending the value to 64-bit to simplify assembly.
    2047  */
    2048 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2049 {
    2050 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2051     return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2052 #else
    2053     return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
    2054 #endif
    2055 }
    2056 
    2057 
    2058 /**
    2059  * Used by TB code to load unsigned 32-bit data w/ flat address.
    2060  * @note Zero extending the value to 64-bit to simplify assembly.
    2061  */
    2062 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2063 {
    2064 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2065     return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2066 #else
    2067     return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2068 #endif
    2069 }
    2070 
    2071 
    2072 /**
    2073  * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
    2074  * to 64 bits.
    2075  * @note Zero extending the value to 64-bit to simplify assembly.
    2076  */
    2077 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2078 {
    2079 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2080     return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2081 #else
    2082     return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
    2083 #endif
    2084 }
    2085 
    2086 
    2087 /**
    2088  * Used by TB code to load unsigned 64-bit data w/ flat address.
    2089  */
    2090 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2091 {
    2092 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
    2093     return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
    2094 #else
    2095     return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
    2096 #endif
    2097 }
    2098 
    2099 
    2100 /**
    2101  * Used by TB code to store unsigned 8-bit data w/ flat address.
    2102  */
    2103 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
    2104 {
    2105 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2106     iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
    2107 #else
    2108     iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
    2109 #endif
    2110 }
    2111 
    2112 
    2113 /**
    2114  * Used by TB code to store unsigned 16-bit data w/ flat address.
    2115  */
    2116 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2117 {
    2118 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2119     iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
    2120 #else
    2121     iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
    2122 #endif
    2123 }
    2124 
    2125 
    2126 /**
    2127  * Used by TB code to store unsigned 32-bit data w/ flat address.
    2128  */
    2129 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2130 {
    2131 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2132     iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
    2133 #else
    2134     iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
    2135 #endif
    2136 }
    2137 
    2138 
    2139 /**
    2140  * Used by TB code to store unsigned 64-bit data w/ flat address.
    2141  */
    2142 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2143 {
    2144 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
    2145     iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
    2146 #else
    2147     iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
    2148 #endif
    2149 }
    2150 
    2151 
    2152 
    2153 /**
    2154  * Used by TB code to store an unsigned 16-bit value onto a flat stack.
    2155  */
    2156 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
    2157 {
    2158 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2159     iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
    2160 #else
    2161     iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
    2162 #endif
    2163 }
    2164 
    2165 
    2166 /**
    2167  * Used by TB code to store an unsigned 32-bit value onto a flat stack.
    2168  */
    2169 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2170 {
    2171 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2172     iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
    2173 #else
    2174     iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
    2175 #endif
    2176 }
    2177 
    2178 
    2179 /**
    2180  * Used by TB code to store a segment selector value onto a flat stack.
    2181  *
    2182  * Intel CPUs doesn't do write a whole dword, thus the special function.
    2183  */
    2184 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
    2185 {
    2186 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2187     iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
    2188 #else
    2189     iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
    2190 #endif
    2191 }
    2192 
    2193 
    2194 /**
    2195  * Used by TB code to store an unsigned 64-bit value onto a flat stack.
    2196  */
    2197 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
    2198 {
    2199 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
    2200     iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
    2201 #else
    2202     iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
    2203 #endif
    2204 }
    2205 
    2206 
    2207 /**
    2208  * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
    2209  */
    2210 IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2211 {
    2212 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2213     return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
    2214 #else
    2215     return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
    2216 #endif
    2217 }
    2218 
    2219 
    2220 /**
    2221  * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
    2222  */
    2223 IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2224 {
    2225 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2226     return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
    2227 #else
    2228     return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
    2229 #endif
    2230 }
    2231 
    2232 
    2233 /**
    2234  * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
    2235  */
    2236 IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
    2237 {
    2238 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
    2239     return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
    2240 #else
    2241     return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
    2242 #endif
    2243 }
    2244 
    2245 
    2246 
    2247 /*********************************************************************************************************************************
    2248 *   Helpers: Segmented memory mapping.                                                                                           *
    2249 *********************************************************************************************************************************/
    2250 
    2251 /**
    2252  * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
    2253  */
    2254 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2255                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2256 {
    2257 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2258     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2259 #else
    2260     return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2261 #endif
    2262 }
    2263 
    2264 
    2265 /**
    2266  * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
    2267  */
    2268 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2269                                                                RTGCPTR GCPtrMem, uint8_t iSegReg))
    2270 {
    2271 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2272     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2273 #else
    2274     return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2275 #endif
    2276 }
    2277 
    2278 
    2279 /**
    2280  * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
    2281  */
    2282 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2283                                                                      RTGCPTR GCPtrMem, uint8_t iSegReg))
    2284 {
    2285 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2286     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2287 #else
    2288     return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2289 #endif
    2290 }
    2291 
    2292 
    2293 /**
    2294  * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
    2295  */
    2296 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2297                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2298 {
    2299 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2300     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2301 #else
    2302     return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2303 #endif
    2304 }
    2305 
    2306 
    2307 /**
    2308  * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
    2309  */
    2310 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2311                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2312 {
    2313 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2314     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2315 #else
    2316     return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2317 #endif
    2318 }
    2319 
    2320 
    2321 /**
    2322  * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
    2323  */
    2324 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2325                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2326 {
    2327 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2328     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2329 #else
    2330     return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2331 #endif
    2332 }
    2333 
    2334 
    2335 /**
    2336  * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
    2337  */
    2338 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2339                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2340 {
    2341 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2342     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2343 #else
    2344     return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2345 #endif
    2346 }
    2347 
    2348 
    2349 /**
    2350  * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
    2351  */
    2352 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2353                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2354 {
    2355 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2356     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2357 #else
    2358     return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2359 #endif
    2360 }
    2361 
    2362 
    2363 /**
    2364  * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
    2365  */
    2366 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2367                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2368 {
    2369 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2370     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2371 #else
    2372     return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2373 #endif
    2374 }
    2375 
    2376 
    2377 /**
    2378  * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
    2379  */
    2380 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2381                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2382 {
    2383 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2384     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2385 #else
    2386     return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2387 #endif
    2388 }
    2389 
    2390 
    2391 /**
    2392  * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
    2393  */
    2394 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2395                                                                  RTGCPTR GCPtrMem, uint8_t iSegReg))
    2396 {
    2397 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2398     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2399 #else
    2400     return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2401 #endif
    2402 }
    2403 
    2404 
    2405 /**
    2406  * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
    2407  */
    2408 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2409                                                                        RTGCPTR GCPtrMem, uint8_t iSegReg))
    2410 {
    2411 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2412     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2413 #else
    2414     return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2415 #endif
    2416 }
    2417 
    2418 
    2419 /**
    2420  * Used by TB code to map 80-bit float data writeonly w/ segmentation.
    2421  */
    2422 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2423                                                                    RTGCPTR GCPtrMem, uint8_t iSegReg))
    2424 {
    2425 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2426     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2427 #else
    2428     return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2429 #endif
    2430 }
    2431 
    2432 
    2433 /**
    2434  * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
    2435  */
    2436 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2437                                                                   RTGCPTR GCPtrMem, uint8_t iSegReg))
    2438 {
    2439 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2440     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2441 #else
    2442     return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2443 #endif
    2444 }
    2445 
    2446 
    2447 /**
    2448  * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
    2449  */
    2450 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2451                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2452 {
    2453 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2454     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2455 #else
    2456     return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2457 #endif
    2458 }
    2459 
    2460 
    2461 /**
    2462  * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
    2463  */
    2464 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2465                                                                     RTGCPTR GCPtrMem, uint8_t iSegReg))
    2466 {
    2467 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2468     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2469 #else
    2470     return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2471 #endif
    2472 }
    2473 
    2474 
    2475 /**
    2476  * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
    2477  */
    2478 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
    2479                                                                           RTGCPTR GCPtrMem, uint8_t iSegReg))
    2480 {
    2481 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2482     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2483 #else
    2484     return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
    2485 #endif
    2486 }
    2487 
    2488 
    2489 /*********************************************************************************************************************************
    2490 *   Helpers: Flat memory mapping.                                                                                                *
    2491 *********************************************************************************************************************************/
    2492 
    2493 /**
    2494  * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
    2495  */
    2496 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2497 {
    2498 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2499     return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2500 #else
    2501     return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2502 #endif
    2503 }
    2504 
    2505 
    2506 /**
    2507  * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
    2508  */
    2509 IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2510 {
    2511 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2512     return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2513 #else
    2514     return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2515 #endif
    2516 }
    2517 
    2518 
    2519 /**
    2520  * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
    2521  */
    2522 IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2523 {
    2524 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2525     return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2526 #else
    2527     return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2528 #endif
    2529 }
    2530 
    2531 
    2532 /**
    2533  * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
    2534  */
    2535 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2536 {
    2537 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2538     return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2539 #else
    2540     return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2541 #endif
    2542 }
    2543 
    2544 
    2545 /**
    2546  * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
    2547  */
    2548 IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2549 {
    2550 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2551     return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2552 #else
    2553     return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2554 #endif
    2555 }
    2556 
    2557 
    2558 /**
    2559  * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
    2560  */
    2561 IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2562 {
    2563 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2564     return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2565 #else
    2566     return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2567 #endif
    2568 }
    2569 
    2570 
    2571 /**
    2572  * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
    2573  */
    2574 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2575 {
    2576 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2577     return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2578 #else
    2579     return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2580 #endif
    2581 }
    2582 
    2583 
    2584 /**
    2585  * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
    2586  */
    2587 IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2588 {
    2589 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2590     return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2591 #else
    2592     return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2593 #endif
    2594 }
    2595 
    2596 
    2597 /**
    2598  * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
    2599  */
    2600 IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2601 {
    2602 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2603     return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2604 #else
    2605     return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2606 #endif
    2607 }
    2608 
    2609 
    2610 /**
    2611  * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
    2612  */
    2613 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2614 {
    2615 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2616     return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2617 #else
    2618     return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2619 #endif
    2620 }
    2621 
    2622 
    2623 /**
    2624  * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
    2625  */
    2626 IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2627 {
    2628 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2629     return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2630 #else
    2631     return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2632 #endif
    2633 }
    2634 
    2635 
    2636 /**
    2637  * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
    2638  */
    2639 IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2640 {
    2641 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2642     return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2643 #else
    2644     return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2645 #endif
    2646 }
    2647 
    2648 
    2649 /**
    2650  * Used by TB code to map 80-bit float data writeonly w/ flat address.
    2651  */
    2652 IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2653 {
    2654 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2655     return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2656 #else
    2657     return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2658 #endif
    2659 }
    2660 
    2661 
    2662 /**
    2663  * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
    2664  */
    2665 IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2666 {
    2667 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2668     return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2669 #else
    2670     return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2671 #endif
    2672 }
    2673 
    2674 
    2675 /**
    2676  * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
    2677  */
    2678 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2679 {
    2680 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2681     return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2682 #else
    2683     return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2684 #endif
    2685 }
    2686 
    2687 
    2688 /**
    2689  * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
    2690  */
    2691 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2692 {
    2693 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2694     return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2695 #else
    2696     return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2697 #endif
    2698 }
    2699 
    2700 
    2701 /**
    2702  * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
    2703  */
    2704 IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
    2705 {
    2706 #ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
    2707     return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
    2708 #else
    2709     return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
    2710 #endif
    2711 }
    2712 
    2713 
    2714 /*********************************************************************************************************************************
    2715 *   Helpers: Commit, rollback & unmap                                                                                            *
    2716 *********************************************************************************************************************************/
    2717 
    2718 /**
    2719  * Used by TB code to commit and unmap a read-write memory mapping.
    2720  */
    2721 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2722 {
    2723     return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
    2724 }
    2725 
    2726 
    2727 /**
    2728  * Used by TB code to commit and unmap a write-only memory mapping.
    2729  */
    2730 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2731 {
    2732     return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
    2733 }
    2734 
    2735 
    2736 /**
    2737  * Used by TB code to commit and unmap a read-only memory mapping.
    2738  */
    2739 IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
    2740 {
    2741     return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
    2742 }
    2743 
    2744 
    2745 /**
    2746  * Reinitializes the native recompiler state.
    2747  *
    2748  * Called before starting a new recompile job.
    2749  */
    2750 static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
    2751 {
    2752     pReNative->cLabels                     = 0;
    2753     pReNative->bmLabelTypes                = 0;
    2754     pReNative->cFixups                     = 0;
    2755 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2756     pReNative->pDbgInfo->cEntries          = 0;
    2757 #endif
    2758     pReNative->pTbOrg                      = pTb;
    2759     pReNative->cCondDepth                  = 0;
    2760     pReNative->uCondSeqNo                  = 0;
    2761     pReNative->uCheckIrqSeqNo              = 0;
    2762     pReNative->uTlbSeqNo                   = 0;
    2763 
    2764     pReNative->Core.bmHstRegs              = IEMNATIVE_REG_FIXED_MASK
    2765 #if IEMNATIVE_HST_GREG_COUNT < 32
    2766                                            | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
    2767 #endif
    2768                                            ;
    2769     pReNative->Core.bmHstRegsWithGstShadow = 0;
    2770     pReNative->Core.bmGstRegShadows        = 0;
    2771     pReNative->Core.bmVars                 = 0;
    2772     pReNative->Core.bmStack                = 0;
    2773     AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
    2774     pReNative->Core.u64ArgVars             = UINT64_MAX;
    2775 
    2776     AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 9);
    2777     pReNative->aidxUniqueLabels[0]         = UINT32_MAX;
    2778     pReNative->aidxUniqueLabels[1]         = UINT32_MAX;
    2779     pReNative->aidxUniqueLabels[2]         = UINT32_MAX;
    2780     pReNative->aidxUniqueLabels[3]         = UINT32_MAX;
    2781     pReNative->aidxUniqueLabels[4]         = UINT32_MAX;
    2782     pReNative->aidxUniqueLabels[5]         = UINT32_MAX;
    2783     pReNative->aidxUniqueLabels[6]         = UINT32_MAX;
    2784     pReNative->aidxUniqueLabels[7]         = UINT32_MAX;
    2785     pReNative->aidxUniqueLabels[8]         = UINT32_MAX;
    2786 
    2787     /* Full host register reinit: */
    2788     for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
    2789     {
    2790         pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
    2791         pReNative->Core.aHstRegs[i].enmWhat        = kIemNativeWhat_Invalid;
    2792         pReNative->Core.aHstRegs[i].idxVar         = UINT8_MAX;
    2793     }
    2794 
    2795     uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
    2796                    & ~(  RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
    2797 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2798                        | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
    2799 #endif
    2800 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2801                        | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    2802 #endif
    2803                       );
    2804     for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
    2805     {
    2806         fRegs &= ~RT_BIT_32(idxReg);
    2807         pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
    2808     }
    2809 
    2810     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat     = kIemNativeWhat_pVCpuFixed;
    2811 #ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
    2812     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat   = kIemNativeWhat_pCtxFixed;
    2813 #endif
    2814 #ifdef IEMNATIVE_REG_FIXED_TMP0
    2815     pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat       = kIemNativeWhat_FixedTmp;
    2816 #endif
    2817     return pReNative;
    2818 }
    2819 
    2820 
    2821 /**
    2822  * Allocates and initializes the native recompiler state.
    2823  *
    2824  * This is called the first time an EMT wants to recompile something.
    2825  *
    2826  * @returns Pointer to the new recompiler state.
    2827  * @param   pVCpu   The cross context virtual CPU structure of the calling
    2828  *                  thread.
    2829  * @param   pTb     The TB that's about to be recompiled.
    2830  * @thread  EMT(pVCpu)
    2831  */
    2832 static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
    2833 {
    2834     VMCPU_ASSERT_EMT(pVCpu);
    2835 
    2836     PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
    2837     AssertReturn(pReNative, NULL);
    2838 
    2839     /*
    2840      * Try allocate all the buffers and stuff we need.
    2841      */
    2842     pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
    2843     pReNative->paLabels  = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
    2844     pReNative->paFixups  = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
    2845 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2846     pReNative->pDbgInfo  = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
    2847 #endif
    2848     if (RT_LIKELY(   pReNative->pInstrBuf
    2849                   && pReNative->paLabels
    2850                   && pReNative->paFixups)
    2851 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2852         && pReNative->pDbgInfo
    2853 #endif
    2854        )
    2855     {
    2856         /*
    2857          * Set the buffer & array sizes on success.
    2858          */
    2859         pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
    2860         pReNative->cLabelsAlloc   = _8K;
    2861         pReNative->cFixupsAlloc   = _16K;
    2862 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2863         pReNative->cDbgInfoAlloc  = _16K;
    2864 #endif
    2865 
    2866         /* Other constant stuff: */
    2867         pReNative->pVCpu          = pVCpu;
    2868 
    2869         /*
    2870          * Done, just need to save it and reinit it.
    2871          */
    2872         pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
    2873         return iemNativeReInit(pReNative, pTb);
    2874     }
    2875 
    2876     /*
    2877      * Failed. Cleanup and return.
    2878      */
    2879     AssertFailed();
    2880     RTMemFree(pReNative->pInstrBuf);
    2881     RTMemFree(pReNative->paLabels);
    2882     RTMemFree(pReNative->paFixups);
    2883 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2884     RTMemFree(pReNative->pDbgInfo);
    2885 #endif
    2886     RTMemFree(pReNative);
    2887     return NULL;
    2888 }
    2889 
    2890 
    2891 /**
    2892  * Creates a label
    2893  *
    2894  * If the label does not yet have a defined position,
    2895  * call iemNativeLabelDefine() later to set it.
    2896  *
    2897  * @returns Label ID. Throws VBox status code on failure, so no need to check
    2898  *          the return value.
    2899  * @param   pReNative   The native recompile state.
    2900  * @param   enmType     The label type.
    2901  * @param   offWhere    The instruction offset of the label.  UINT32_MAX if the
    2902  *                      label is not yet defined (default).
    2903  * @param   uData       Data associated with the lable. Only applicable to
    2904  *                      certain type of labels. Default is zero.
    2905  */
    2906 DECL_HIDDEN_THROW(uint32_t)
    2907 iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    2908                      uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
    2909 {
    2910     Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
    2911 
    2912     /*
    2913      * Locate existing label definition.
    2914      *
    2915      * This is only allowed for forward declarations where offWhere=UINT32_MAX
    2916      * and uData is zero.
    2917      */
    2918     PIEMNATIVELABEL paLabels = pReNative->paLabels;
    2919     uint32_t const  cLabels  = pReNative->cLabels;
    2920     if (   pReNative->bmLabelTypes & RT_BIT_64(enmType)
    2921 #ifndef VBOX_STRICT
    2922         && enmType  <  kIemNativeLabelType_FirstWithMultipleInstances
    2923         && offWhere == UINT32_MAX
    2924         && uData    == 0
    2925 #endif
    2926         )
    2927     {
    2928 #ifndef VBOX_STRICT
    2929         AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
    2930                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2931         uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
    2932         if (idxLabel < pReNative->cLabels)
    2933             return idxLabel;
    2934 #else
    2935         for (uint32_t i = 0; i < cLabels; i++)
    2936             if (   paLabels[i].enmType == enmType
    2937                 && paLabels[i].uData   == uData)
    2938             {
    2939                 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2940                 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2941                 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
    2942                 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
    2943                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2944                 return i;
    2945             }
    2946         AssertStmt(   enmType >= kIemNativeLabelType_FirstWithMultipleInstances
    2947                    || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
    2948 #endif
    2949     }
    2950 
    2951     /*
    2952      * Make sure we've got room for another label.
    2953      */
    2954     if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
    2955     { /* likely */ }
    2956     else
    2957     {
    2958         uint32_t cNew = pReNative->cLabelsAlloc;
    2959         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    2960         AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
    2961         cNew *= 2;
    2962         AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
    2963         paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
    2964         AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
    2965         pReNative->paLabels     = paLabels;
    2966         pReNative->cLabelsAlloc = cNew;
    2967     }
    2968 
    2969     /*
    2970      * Define a new label.
    2971      */
    2972     paLabels[cLabels].off     = offWhere;
    2973     paLabels[cLabels].enmType = enmType;
    2974     paLabels[cLabels].uData   = uData;
    2975     pReNative->cLabels = cLabels + 1;
    2976 
    2977     Assert((unsigned)enmType < 64);
    2978     pReNative->bmLabelTypes |= RT_BIT_64(enmType);
    2979 
    2980     if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    2981     {
    2982         Assert(uData == 0);
    2983         pReNative->aidxUniqueLabels[enmType] = cLabels;
    2984     }
    2985 
    2986     if (offWhere != UINT32_MAX)
    2987     {
    2988 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    2989         iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    2990         iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
    2991 #endif
    2992     }
    2993     return cLabels;
    2994 }
    2995 
    2996 
    2997 /**
    2998  * Defines the location of an existing label.
    2999  *
    3000  * @param   pReNative   The native recompile state.
    3001  * @param   idxLabel    The label to define.
    3002  * @param   offWhere    The position.
    3003  */
    3004 DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
    3005 {
    3006     AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
    3007     PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
    3008     AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
    3009     pLabel->off = offWhere;
    3010 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3011     iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
    3012     iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
    3013 #endif
    3014 }
    3015 
    3016 
    3017 /**
    3018  * Looks up a lable.
    3019  *
    3020  * @returns Label ID if found, UINT32_MAX if not.
    3021  */
    3022 static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
    3023                                    uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
    3024 {
    3025     Assert((unsigned)enmType < 64);
    3026     if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
    3027     {
    3028         if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
    3029             return pReNative->aidxUniqueLabels[enmType];
    3030 
    3031         PIEMNATIVELABEL paLabels = pReNative->paLabels;
    3032         uint32_t const  cLabels  = pReNative->cLabels;
    3033         for (uint32_t i = 0; i < cLabels; i++)
    3034             if (   paLabels[i].enmType == enmType
    3035                 && paLabels[i].uData   == uData
    3036                 && (   paLabels[i].off == offWhere
    3037                     || offWhere        == UINT32_MAX
    3038                     || paLabels[i].off == UINT32_MAX))
    3039                 return i;
    3040     }
    3041     return UINT32_MAX;
    3042 }
    3043 
    3044 
    3045 /**
    3046  * Adds a fixup.
    3047  *
    3048  * @throws  VBox status code (int) on failure.
    3049  * @param   pReNative   The native recompile state.
    3050  * @param   offWhere    The instruction offset of the fixup location.
    3051  * @param   idxLabel    The target label ID for the fixup.
    3052  * @param   enmType     The fixup type.
    3053  * @param   offAddend   Fixup addend if applicable to the type. Default is 0.
    3054  */
    3055 DECL_HIDDEN_THROW(void)
    3056 iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
    3057                   IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
    3058 {
    3059     Assert(idxLabel <= UINT16_MAX);
    3060     Assert((unsigned)enmType <= UINT8_MAX);
    3061 
    3062     /*
    3063      * Make sure we've room.
    3064      */
    3065     PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
    3066     uint32_t const  cFixups  = pReNative->cFixups;
    3067     if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
    3068     { /* likely */ }
    3069     else
    3070     {
    3071         uint32_t cNew = pReNative->cFixupsAlloc;
    3072         AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3073         AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
    3074         cNew *= 2;
    3075         AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
    3076         paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
    3077         AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
    3078         pReNative->paFixups     = paFixups;
    3079         pReNative->cFixupsAlloc = cNew;
    3080     }
    3081 
    3082     /*
    3083      * Add the fixup.
    3084      */
    3085     paFixups[cFixups].off       = offWhere;
    3086     paFixups[cFixups].idxLabel  = (uint16_t)idxLabel;
    3087     paFixups[cFixups].enmType   = enmType;
    3088     paFixups[cFixups].offAddend = offAddend;
    3089     pReNative->cFixups = cFixups + 1;
    3090 }
    3091 
    3092 
    3093 /**
    3094  * Slow code path for iemNativeInstrBufEnsure.
    3095  */
    3096 DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
    3097 {
    3098     /* Double the buffer size till we meet the request. */
    3099     uint32_t cNew = pReNative->cInstrBufAlloc;
    3100     AssertReturn(cNew > 0, NULL);
    3101     do
    3102         cNew *= 2;
    3103     while (cNew < off + cInstrReq);
    3104 
    3105     uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
    3106 #ifdef RT_ARCH_ARM64
    3107     uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
    3108 #else
    3109     uint32_t const cbMaxInstrBuf = _2M;
    3110 #endif
    3111     AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
    3112 
    3113     void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
    3114     AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
    3115 
    3116 #ifdef VBOX_STRICT
    3117     pReNative->offInstrBufChecked = off + cInstrReq;
    3118 #endif
    3119     pReNative->cInstrBufAlloc     = cNew;
    3120     return pReNative->pInstrBuf   = (PIEMNATIVEINSTR)pvNew;
    3121 }
    3122 
    3123 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3124 
    3125 /**
    3126  * Grows the static debug info array used during recompilation.
    3127  *
    3128  * @returns Pointer to the new debug info block; throws VBox status code on
    3129  *          failure, so no need to check the return value.
    3130  */
    3131 DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3132 {
    3133     uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
    3134     AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
    3135     pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
    3136     AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
    3137     pReNative->pDbgInfo      = pDbgInfo;
    3138     pReNative->cDbgInfoAlloc = cNew;
    3139     return pDbgInfo;
    3140 }
    3141 
    3142 
    3143 /**
    3144  * Adds a new debug info uninitialized entry, returning the pointer to it.
    3145  */
    3146 DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
    3147 {
    3148     if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
    3149     { /* likely */ }
    3150     else
    3151         pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
    3152     return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
    3153 }
    3154 
    3155 
    3156 /**
    3157  * Debug Info: Adds a native offset record, if necessary.
    3158  */
    3159 static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    3160 {
    3161     PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
    3162 
    3163     /*
    3164      * Search backwards to see if we've got a similar record already.
    3165      */
    3166     uint32_t idx     = pDbgInfo->cEntries;
    3167     uint32_t idxStop = idx > 8 ? idx - 8 : 0;
    3168     while (idx-- > idxStop)
    3169         if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
    3170         {
    3171             if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
    3172                 return;
    3173             AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
    3174                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
    3175             break;
    3176         }
    3177 
    3178     /*
    3179      * Add it.
    3180      */
    3181     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
    3182     pEntry->NativeOffset.uType     = kIemTbDbgEntryType_NativeOffset;
    3183     pEntry->NativeOffset.offNative = off;
    3184 }
    3185 
    3186 
    3187 /**
    3188  * Debug Info: Record info about a label.
    3189  */
    3190 static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
    3191 {
    3192     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3193     pEntry->Label.uType    = kIemTbDbgEntryType_Label;
    3194     pEntry->Label.uUnused  = 0;
    3195     pEntry->Label.enmLabel = (uint8_t)enmType;
    3196     pEntry->Label.uData    = uData;
    3197 }
    3198 
    3199 
    3200 /**
    3201  * Debug Info: Record info about a threaded call.
    3202  */
    3203 static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
    3204 {
    3205     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3206     pEntry->ThreadedCall.uType       = kIemTbDbgEntryType_ThreadedCall;
    3207     pEntry->ThreadedCall.fRecompiled = fRecompiled;
    3208     pEntry->ThreadedCall.uUnused     = 0;
    3209     pEntry->ThreadedCall.enmCall     = (uint16_t)enmCall;
    3210 }
    3211 
    3212 
    3213 /**
    3214  * Debug Info: Record info about a new guest instruction.
    3215  */
    3216 static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
    3217 {
    3218     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3219     pEntry->GuestInstruction.uType   = kIemTbDbgEntryType_GuestInstruction;
    3220     pEntry->GuestInstruction.uUnused = 0;
    3221     pEntry->GuestInstruction.fExec   = fExec;
    3222 }
    3223 
    3224 
    3225 /**
    3226  * Debug Info: Record info about guest register shadowing.
    3227  */
    3228 static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
    3229                                                  uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
    3230 {
    3231     PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
    3232     pEntry->GuestRegShadowing.uType         = kIemTbDbgEntryType_GuestRegShadowing;
    3233     pEntry->GuestRegShadowing.uUnused       = 0;
    3234     pEntry->GuestRegShadowing.idxGstReg     = enmGstReg;
    3235     pEntry->GuestRegShadowing.idxHstReg     = idxHstReg;
    3236     pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
    3237 }
    3238 
    3239 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    3240 
    3241 
    3242 /*********************************************************************************************************************************
    3243 *   Register Allocator                                                                                                           *
    3244 *********************************************************************************************************************************/
    3245 
    3246 /**
    3247  * Register parameter indexes (indexed by argument number).
    3248  */
    3249 DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
    3250 {
    3251     IEMNATIVE_CALL_ARG0_GREG,
    3252     IEMNATIVE_CALL_ARG1_GREG,
    3253     IEMNATIVE_CALL_ARG2_GREG,
    3254     IEMNATIVE_CALL_ARG3_GREG,
    3255 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3256     IEMNATIVE_CALL_ARG4_GREG,
    3257 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3258     IEMNATIVE_CALL_ARG5_GREG,
    3259 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3260     IEMNATIVE_CALL_ARG6_GREG,
    3261 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3262     IEMNATIVE_CALL_ARG7_GREG,
    3263 #   endif
    3264 #  endif
    3265 # endif
    3266 #endif
    3267 };
    3268 
    3269 /**
    3270  * Call register masks indexed by argument count.
    3271  */
    3272 DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
    3273 {
    3274     0,
    3275     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
    3276     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
    3277     RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
    3278       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3279     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
    3280 #if defined(IEMNATIVE_CALL_ARG4_GREG)
    3281       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3282     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
    3283 # if defined(IEMNATIVE_CALL_ARG5_GREG)
    3284       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3285     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
    3286 #  if defined(IEMNATIVE_CALL_ARG6_GREG)
    3287       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3288     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3289     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
    3290 #   if defined(IEMNATIVE_CALL_ARG7_GREG)
    3291       RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
    3292     | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
    3293     | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
    3294 #   endif
    3295 #  endif
    3296 # endif
    3297 #endif
    3298 };
    3299 
    3300 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    3301 /**
    3302  * BP offset of the stack argument slots.
    3303  *
    3304  * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
    3305  * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
    3306  */
    3307 DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
    3308 {
    3309     IEMNATIVE_FP_OFF_STACK_ARG0,
    3310 # ifdef IEMNATIVE_FP_OFF_STACK_ARG1
    3311     IEMNATIVE_FP_OFF_STACK_ARG1,
    3312 # endif
    3313 # ifdef IEMNATIVE_FP_OFF_STACK_ARG2
    3314     IEMNATIVE_FP_OFF_STACK_ARG2,
    3315 # endif
    3316 # ifdef IEMNATIVE_FP_OFF_STACK_ARG3
    3317     IEMNATIVE_FP_OFF_STACK_ARG3,
    3318 # endif
    3319 };
    3320 AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
    3321 #endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
    3322 
    3323 /**
    3324  * Info about shadowed guest register values.
    3325  * @see IEMNATIVEGSTREG
    3326  */
    3327 static struct
    3328 {
    3329     /** Offset in VMCPU. */
    3330     uint32_t    off;
    3331     /** The field size. */
    3332     uint8_t     cb;
    3333     /** Name (for logging). */
    3334     const char *pszName;
    3335 } const g_aGstShadowInfo[] =
    3336 {
    3337 #define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
    3338     /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */  { CPUMCTX_OFF_AND_SIZE(rax),                "rax", },
    3339     /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */  { CPUMCTX_OFF_AND_SIZE(rcx),                "rcx", },
    3340     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */  { CPUMCTX_OFF_AND_SIZE(rdx),                "rdx", },
    3341     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */  { CPUMCTX_OFF_AND_SIZE(rbx),                "rbx", },
    3342     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */  { CPUMCTX_OFF_AND_SIZE(rsp),                "rsp", },
    3343     /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */  { CPUMCTX_OFF_AND_SIZE(rbp),                "rbp", },
    3344     /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */  { CPUMCTX_OFF_AND_SIZE(rsi),                "rsi", },
    3345     /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */  { CPUMCTX_OFF_AND_SIZE(rdi),                "rdi", },
    3346     /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */  { CPUMCTX_OFF_AND_SIZE(r8),                 "r8", },
    3347     /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */  { CPUMCTX_OFF_AND_SIZE(r9),                 "r9", },
    3348     /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */  { CPUMCTX_OFF_AND_SIZE(r10),                "r10", },
    3349     /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */  { CPUMCTX_OFF_AND_SIZE(r11),                "r11", },
    3350     /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */  { CPUMCTX_OFF_AND_SIZE(r12),                "r12", },
    3351     /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */  { CPUMCTX_OFF_AND_SIZE(r13),                "r13", },
    3352     /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */  { CPUMCTX_OFF_AND_SIZE(r14),                "r14", },
    3353     /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */  { CPUMCTX_OFF_AND_SIZE(r15),                "r15", },
    3354     /* [kIemNativeGstReg_Pc] = */                       { CPUMCTX_OFF_AND_SIZE(rip),                "rip", },
    3355     /* [kIemNativeGstReg_EFlags] = */                   { CPUMCTX_OFF_AND_SIZE(eflags),             "eflags", },
    3356     /* [kIemNativeGstReg_SegSelFirst + 0] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel),      "es", },
    3357     /* [kIemNativeGstReg_SegSelFirst + 1] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel),      "cs", },
    3358     /* [kIemNativeGstReg_SegSelFirst + 2] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel),      "ss", },
    3359     /* [kIemNativeGstReg_SegSelFirst + 3] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel),      "ds", },
    3360     /* [kIemNativeGstReg_SegSelFirst + 4] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel),      "fs", },
    3361     /* [kIemNativeGstReg_SegSelFirst + 5] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel),      "gs", },
    3362     /* [kIemNativeGstReg_SegBaseFirst + 0] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base),  "es_base", },
    3363     /* [kIemNativeGstReg_SegBaseFirst + 1] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base),  "cs_base", },
    3364     /* [kIemNativeGstReg_SegBaseFirst + 2] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base),  "ss_base", },
    3365     /* [kIemNativeGstReg_SegBaseFirst + 3] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base),  "ds_base", },
    3366     /* [kIemNativeGstReg_SegBaseFirst + 4] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base),  "fs_base", },
    3367     /* [kIemNativeGstReg_SegBaseFirst + 5] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base),  "gs_base", },
    3368     /* [kIemNativeGstReg_SegLimitFirst + 0] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
    3369     /* [kIemNativeGstReg_SegLimitFirst + 1] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
    3370     /* [kIemNativeGstReg_SegLimitFirst + 2] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
    3371     /* [kIemNativeGstReg_SegLimitFirst + 3] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
    3372     /* [kIemNativeGstReg_SegLimitFirst + 4] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
    3373     /* [kIemNativeGstReg_SegLimitFirst + 5] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
    3374     /* [kIemNativeGstReg_SegAttribFirst + 0] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u),   "es_attrib", },
    3375     /* [kIemNativeGstReg_SegAttribFirst + 1] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u),   "cs_attrib", },
    3376     /* [kIemNativeGstReg_SegAttribFirst + 2] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u),   "ss_attrib", },
    3377     /* [kIemNativeGstReg_SegAttribFirst + 3] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u),   "ds_attrib", },
    3378     /* [kIemNativeGstReg_SegAttribFirst + 4] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u),   "fs_attrib", },
    3379     /* [kIemNativeGstReg_SegAttribFirst + 5] = */       { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u),   "gs_attrib", },
    3380 #undef CPUMCTX_OFF_AND_SIZE
    3381 };
    3382 AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
    3383 
    3384 
    3385 /** Host CPU general purpose register names. */
    3386 DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
    3387 {
    3388 #ifdef RT_ARCH_AMD64
    3389     "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
    3390 #elif RT_ARCH_ARM64
    3391     "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
    3392     "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp",  "lr",  "sp/xzr",
    3393 #else
    3394 # error "port me"
    3395 #endif
    3396 };
    3397 
    3398 
    3399 DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
    3400                                                      IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
    3401 {
    3402     pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    3403 
    3404     pReNative->Core.aHstRegs[idxReg].enmWhat        = enmWhat;
    3405     pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3406     pReNative->Core.aHstRegs[idxReg].idxVar         = idxVar;
    3407     return (uint8_t)idxReg;
    3408 }
    3409 
    3410 
    3411 /**
    3412  * Tries to locate a suitable register in the given register mask.
    3413  *
    3414  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3415  * failed.
    3416  *
    3417  * @returns Host register number on success, returns UINT8_MAX on failure.
    3418  */
    3419 static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
    3420 {
    3421     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3422     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3423     if (fRegs)
    3424     {
    3425         /** @todo pick better here:    */
    3426         unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
    3427 
    3428         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3429         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3430                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3431         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3432 
    3433         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3434         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3435         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3436         return idxReg;
    3437     }
    3438     return UINT8_MAX;
    3439 }
    3440 
    3441 
    3442 /**
    3443  * Locate a register, possibly freeing one up.
    3444  *
    3445  * This ASSUMES the caller has done the minimal/optimal allocation checks and
    3446  * failed.
    3447  *
    3448  * @returns Host register number on success. Returns UINT8_MAX if no registers
    3449  *          found, the caller is supposed to deal with this and raise a
    3450  *          allocation type specific status code (if desired).
    3451  *
    3452  * @throws  VBox status code if we're run into trouble spilling a variable of
    3453  *          recording debug info.  Does NOT throw anything if we're out of
    3454  *          registers, though.
    3455  */
    3456 static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
    3457                                          uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
    3458 {
    3459     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3460     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    3461 
    3462     /*
    3463      * Try a freed register that's shadowing a guest register
    3464      */
    3465     uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
    3466     if (fRegs)
    3467     {
    3468         unsigned const idxReg = (fPreferVolatile
    3469                                  ? ASMBitFirstSetU32(fRegs)
    3470                                  : ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3471                                                     ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK: fRegs))
    3472                               - 1;
    3473 
    3474         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    3475         Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    3476                == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3477         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3478 
    3479         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3480         pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3481         pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3482         return idxReg;
    3483     }
    3484 
    3485     /*
    3486      * Try free up a variable that's in a register.
    3487      *
    3488      * We do two rounds here, first evacuating variables we don't need to be
    3489      * saved on the stack, then in the second round move things to the stack.
    3490      */
    3491     for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
    3492     {
    3493         uint32_t fVars = pReNative->Core.bmVars;
    3494         while (fVars)
    3495         {
    3496             uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
    3497             uint8_t const  idxReg = pReNative->Core.aVars[idxVar].idxReg;
    3498             if (   idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
    3499                 && (RT_BIT_32(idxReg) & fRegMask)
    3500                 && (  iLoop == 0
    3501                     ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
    3502                     : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3503                 && !pReNative->Core.aVars[idxVar].fRegAcquired)
    3504             {
    3505                 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
    3506                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
    3507                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    3508                 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3509                 Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    3510                        == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
    3511 
    3512                 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    3513                 {
    3514                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    3515                     *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
    3516                 }
    3517 
    3518                 pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    3519                 pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxReg);
    3520 
    3521                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    3522                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    3523                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    3524                 return idxReg;
    3525             }
    3526             fVars &= ~RT_BIT_32(idxVar);
    3527         }
    3528     }
    3529 
    3530     return UINT8_MAX;
    3531 }
    3532 
    3533 
    3534 /**
    3535  * Reassigns a variable to a different register specified by the caller.
    3536  *
    3537  * @returns The new code buffer position.
    3538  * @param   pReNative       The native recompile state.
    3539  * @param   off             The current code buffer position.
    3540  * @param   idxVar          The variable index.
    3541  * @param   idxRegOld       The old host register number.
    3542  * @param   idxRegNew       The new host register number.
    3543  * @param   pszCaller       The caller for logging.
    3544  */
    3545 static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3546                                     uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
    3547 {
    3548     Assert(pReNative->Core.aVars[idxVar].idxReg == idxRegOld);
    3549     RT_NOREF(pszCaller);
    3550 
    3551     iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
    3552 
    3553     uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3554     Log12(("%s: moving idxVar=%d from %s to %s (fGstRegShadows=%RX64)\n",
    3555            pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
    3556     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
    3557 
    3558     pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
    3559     pReNative->Core.aHstRegs[idxRegNew].enmWhat        = kIemNativeWhat_Var;
    3560     pReNative->Core.aHstRegs[idxRegNew].idxVar         = idxVar;
    3561     if (fGstRegShadows)
    3562     {
    3563         pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
    3564                                                | RT_BIT_32(idxRegNew);
    3565         while (fGstRegShadows)
    3566         {
    3567             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    3568             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    3569 
    3570             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
    3571             pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
    3572         }
    3573     }
    3574 
    3575     pReNative->Core.aVars[idxVar].idxReg = (uint8_t)idxRegNew;
    3576     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3577     pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
    3578     return off;
    3579 }
    3580 
    3581 
    3582 /**
    3583  * Moves a variable to a different register or spills it onto the stack.
    3584  *
    3585  * This must be a stack variable (kIemNativeVarKind_Stack) because the other
    3586  * kinds can easily be recreated if needed later.
    3587  *
    3588  * @returns The new code buffer position.
    3589  * @param   pReNative       The native recompile state.
    3590  * @param   off             The current code buffer position.
    3591  * @param   idxVar          The variable index.
    3592  * @param   fForbiddenRegs  Mask of the forbidden registers.  Defaults to
    3593  *                          call-volatile registers.
    3594  */
    3595 static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
    3596                                                 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
    3597 {
    3598     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    3599     Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
    3600     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    3601 
    3602     uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
    3603     Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3604     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
    3605     Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
    3606     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
    3607            == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
    3608     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3609     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
    3610            == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
    3611 
    3612 
    3613     /** @todo Add statistics on this.*/
    3614     /** @todo Implement basic variable liveness analysis (python) so variables
    3615      * can be freed immediately once no longer used.  This has the potential to
    3616      * be trashing registers and stack for dead variables. */
    3617 
    3618     /*
    3619      * First try move it to a different register, as that's cheaper.
    3620      */
    3621     fForbiddenRegs |= RT_BIT_32(idxRegOld);
    3622     fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
    3623     uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
    3624     if (fRegs)
    3625     {
    3626         /* Avoid using shadow registers, if possible. */
    3627         if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
    3628             fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
    3629         unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
    3630         return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
    3631     }
    3632 
    3633     /*
    3634      * Otherwise we must spill the register onto the stack.
    3635      */
    3636     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    3637     Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    3638            idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    3639     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    3640 
    3641     pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    3642     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    3643     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    3644     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    3645     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    3646     return off;
    3647 }
    3648 
    3649 
    3650 /**
    3651  * Allocates a temporary host general purpose register.
    3652  *
    3653  * This may emit code to save register content onto the stack in order to free
    3654  * up a register.
    3655  *
    3656  * @returns The host register number; throws VBox status code on failure,
    3657  *          so no need to check the return value.
    3658  * @param   pReNative       The native recompile state.
    3659  * @param   poff            Pointer to the variable with the code buffer position.
    3660  *                          This will be update if we need to move a variable from
    3661  *                          register to stack in order to satisfy the request.
    3662  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3663  *                          registers (@c true, default) or the other way around
    3664  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    3665  */
    3666 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
    3667 {
    3668     /*
    3669      * Try find a completely unused register, preferably a call-volatile one.
    3670      */
    3671     uint8_t  idxReg;
    3672     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    3673                    & ~pReNative->Core.bmHstRegsWithGstShadow
    3674                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
    3675     if (fRegs)
    3676     {
    3677         if (fPreferVolatile)
    3678             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3679                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3680         else
    3681             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3682                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3683         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3684         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    3685     }
    3686     else
    3687     {
    3688         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
    3689         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    3690     }
    3691     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    3692 }
    3693 
    3694 
    3695 /**
    3696  * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
    3697  * registers.
    3698  *
    3699  * @returns The host register number; throws VBox status code on failure,
    3700  *          so no need to check the return value.
    3701  * @param   pReNative       The native recompile state.
    3702  * @param   poff            Pointer to the variable with the code buffer position.
    3703  *                          This will be update if we need to move a variable from
    3704  *                          register to stack in order to satisfy the request.
    3705  * @param   fRegMask        Mask of acceptable registers.
    3706  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3707  *                          registers (@c true, default) or the other way around
    3708  *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
    3709  */
    3710 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
    3711                                                   bool fPreferVolatile /*= true*/)
    3712 {
    3713     Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
    3714     Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
    3715 
    3716     /*
    3717      * Try find a completely unused register, preferably a call-volatile one.
    3718      */
    3719     uint8_t  idxReg;
    3720     uint32_t fRegs = ~pReNative->Core.bmHstRegs
    3721                    & ~pReNative->Core.bmHstRegsWithGstShadow
    3722                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    3723                    & fRegMask;
    3724     if (fRegs)
    3725     {
    3726         if (fPreferVolatile)
    3727             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3728                                                 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3729         else
    3730             idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    3731                                                 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    3732         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    3733         Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    3734     }
    3735     else
    3736     {
    3737         idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
    3738         AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
    3739     }
    3740     return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
    3741 }
    3742 
    3743 
    3744 /**
    3745  * Allocates a temporary register for loading an immediate value into.
    3746  *
    3747  * This will emit code to load the immediate, unless there happens to be an
    3748  * unused register with the value already loaded.
    3749  *
    3750  * The caller will not modify the returned register, it must be considered
    3751  * read-only.  Free using iemNativeRegFreeTmpImm.
    3752  *
    3753  * @returns The host register number; throws VBox status code on failure, so no
    3754  *          need to check the return value.
    3755  * @param   pReNative       The native recompile state.
    3756  * @param   poff            Pointer to the variable with the code buffer position.
    3757  * @param   uImm            The immediate value that the register must hold upon
    3758  *                          return.
    3759  * @param   fPreferVolatile Whether to prefer volatile over non-volatile
    3760  *                          registers (@c true, default) or the other way around
    3761  *                          (@c false).
    3762  *
    3763  * @note    Reusing immediate values has not been implemented yet.
    3764  */
    3765 DECL_HIDDEN_THROW(uint8_t)
    3766 iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
    3767 {
    3768     uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
    3769     *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
    3770     return idxReg;
    3771 }
    3772 
    3773 
    3774 /**
    3775  * Marks host register @a idxHstReg as containing a shadow copy of guest
    3776  * register @a enmGstReg.
    3777  *
    3778  * ASSUMES that caller has made sure @a enmGstReg is not associated with any
    3779  * host register before calling.
    3780  */
    3781 DECL_FORCE_INLINE(void)
    3782 iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3783 {
    3784     Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
    3785     Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    3786     Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
    3787 
    3788     pReNative->Core.aidxGstRegShadows[enmGstReg]       = idxHstReg;
    3789     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
    3790     pReNative->Core.bmGstRegShadows                   |= RT_BIT_64(enmGstReg);
    3791     pReNative->Core.bmHstRegsWithGstShadow            |= RT_BIT_32(idxHstReg);
    3792 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3793     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3794     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
    3795 #else
    3796     RT_NOREF(off);
    3797 #endif
    3798 }
    3799 
    3800 
    3801 /**
    3802  * Clear any guest register shadow claims from @a idxHstReg.
    3803  *
    3804  * The register does not need to be shadowing any guest registers.
    3805  */
    3806 DECL_FORCE_INLINE(void)
    3807 iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
    3808 {
    3809     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    3810               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    3811            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3812     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    3813            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    3814 
    3815 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3816     uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    3817     if (fGstRegs)
    3818     {
    3819         Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
    3820         iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3821         while (fGstRegs)
    3822         {
    3823             unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    3824             fGstRegs &= ~RT_BIT_64(iGstReg);
    3825             iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
    3826         }
    3827     }
    3828 #else
    3829     RT_NOREF(off);
    3830 #endif
    3831 
    3832     pReNative->Core.bmHstRegsWithGstShadow            &= ~RT_BIT_32(idxHstReg);
    3833     pReNative->Core.bmGstRegShadows                   &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    3834     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    3835 }
    3836 
    3837 
    3838 /**
    3839  * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
    3840  * and global overview flags.
    3841  */
    3842 DECL_FORCE_INLINE(void)
    3843 iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3844 {
    3845     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3846     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    3847               == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
    3848            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3849     Assert(pReNative->Core.bmGstRegShadows                    & RT_BIT_64(enmGstReg));
    3850     Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    3851     Assert(pReNative->Core.bmHstRegsWithGstShadow             & RT_BIT_32(idxHstReg));
    3852 
    3853 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3854     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3855     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
    3856 #else
    3857     RT_NOREF(off);
    3858 #endif
    3859 
    3860     uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    3861     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    3862     if (!fGstRegShadowsNew)
    3863         pReNative->Core.bmHstRegsWithGstShadow        &= ~RT_BIT_32(idxHstReg);
    3864     pReNative->Core.bmGstRegShadows                   &= ~RT_BIT_64(enmGstReg);
    3865 }
    3866 
    3867 
    3868 /**
    3869  * Clear any guest register shadow claim for @a enmGstReg.
    3870  */
    3871 DECL_FORCE_INLINE(void)
    3872 iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3873 {
    3874     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3875     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    3876     {
    3877         Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3878         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    3879     }
    3880 }
    3881 
    3882 
    3883 /**
    3884  * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
    3885  * as the new shadow of it.
    3886  */
    3887 DECL_FORCE_INLINE(void)
    3888 iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
    3889                                        IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3890 {
    3891     Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3892     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    3893     {
    3894         Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3895         if (pReNative->Core.aidxGstRegShadows[enmGstReg] == idxHstRegNew)
    3896             return;
    3897         iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
    3898     }
    3899     iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
    3900 }
    3901 
    3902 
    3903 /**
    3904  * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
    3905  * to @a idxRegTo.
    3906  */
    3907 DECL_FORCE_INLINE(void)
    3908 iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
    3909                                     IEMNATIVEGSTREG enmGstReg, uint32_t off)
    3910 {
    3911     Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
    3912     Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
    3913     Assert(      (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
    3914               == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
    3915            && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    3916     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
    3917            == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
    3918     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
    3919            == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
    3920 
    3921     uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
    3922     pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows  = fGstRegShadowsFrom;
    3923     if (!fGstRegShadowsFrom)
    3924         pReNative->Core.bmHstRegsWithGstShadow          &= ~RT_BIT_32(idxRegFrom);
    3925     pReNative->Core.bmHstRegsWithGstShadow              |= RT_BIT_32(idxRegTo);
    3926     pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows   |= RT_BIT_64(enmGstReg);
    3927     pReNative->Core.aidxGstRegShadows[enmGstReg]         = idxRegTo;
    3928 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    3929     iemNativeDbgInfoAddNativeOffset(pReNative, off);
    3930     iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
    3931 #else
    3932     RT_NOREF(off);
    3933 #endif
    3934 }
    3935 
    3936 
    3937 /**
    3938  * Allocates a temporary host general purpose register for keeping a guest
    3939  * register value.
    3940  *
    3941  * Since we may already have a register holding the guest register value,
    3942  * code will be emitted to do the loading if that's not the case. Code may also
    3943  * be emitted if we have to free up a register to satify the request.
    3944  *
    3945  * @returns The host register number; throws VBox status code on failure, so no
    3946  *          need to check the return value.
    3947  * @param   pReNative       The native recompile state.
    3948  * @param   poff            Pointer to the variable with the code buffer
    3949  *                          position. This will be update if we need to move a
    3950  *                          variable from register to stack in order to satisfy
    3951  *                          the request.
    3952  * @param   enmGstReg       The guest register that will is to be updated.
    3953  * @param   enmIntendedUse  How the caller will be using the host register.
    3954  * @param   fNoVolatileRegs Set if no volatile register allowed, clear if any
    3955  *                          register is okay (default).  The ASSUMPTION here is
    3956  *                          that the caller has already flushed all volatile
    3957  *                          registers, so this is only applied if we allocate a
    3958  *                          new register.
    3959  * @sa      iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
    3960  */
    3961 DECL_HIDDEN_THROW(uint8_t)
    3962 iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
    3963                                 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
    3964                                 bool fNoVolatileRegs /*= false*/)
    3965 {
    3966     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    3967 #if defined(LOG_ENABLED) || defined(VBOX_STRICT)
    3968     static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
    3969 #endif
    3970     uint32_t const fRegMask = !fNoVolatileRegs
    3971                             ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
    3972                             : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    3973 
    3974     /*
    3975      * First check if the guest register value is already in a host register.
    3976      */
    3977     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    3978     {
    3979         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    3980         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    3981         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    3982         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    3983 
    3984         /* It's not supposed to be allocated... */
    3985         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    3986         {
    3987             /*
    3988              * If the register will trash the guest shadow copy, try find a
    3989              * completely unused register we can use instead.  If that fails,
    3990              * we need to disassociate the host reg from the guest reg.
    3991              */
    3992             /** @todo would be nice to know if preserving the register is in any way helpful. */
    3993             /* If the purpose is calculations, try duplicate the register value as
    3994                we'll be clobbering the shadow. */
    3995             if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
    3996                 && (  ~pReNative->Core.bmHstRegs
    3997                     & ~pReNative->Core.bmHstRegsWithGstShadow
    3998                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
    3999             {
    4000                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
    4001 
    4002                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4003 
    4004                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4005                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4006                        g_apszIemNativeHstRegNames[idxRegNew]));
    4007                 idxReg = idxRegNew;
    4008             }
    4009             /* If the current register matches the restrictions, go ahead and allocate
    4010                it for the caller. */
    4011             else if (fRegMask & RT_BIT_32(idxReg))
    4012             {
    4013                 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4014                 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4015                 pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4016                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4017                     Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
    4018                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4019                 else
    4020                 {
    4021                     iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    4022                     Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
    4023                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4024                 }
    4025             }
    4026             /* Otherwise, allocate a register that satisfies the caller and transfer
    4027                the shadowing if compatible with the intended use.  (This basically
    4028                means the call wants a non-volatile register (RSP push/pop scenario).) */
    4029             else
    4030             {
    4031                 Assert(fNoVolatileRegs);
    4032                 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
    4033                                                                     !fNoVolatileRegs
    4034                                                                  && enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4035                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4036                 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4037                 {
    4038                     iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4039                     Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
    4040                            g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
    4041                            g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4042                 }
    4043                 else
    4044                     Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
    4045                            g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4046                            g_apszIemNativeHstRegNames[idxRegNew]));
    4047                 idxReg = idxRegNew;
    4048             }
    4049         }
    4050         else
    4051         {
    4052             /*
    4053              * Oops. Shadowed guest register already allocated!
    4054              *
    4055              * Allocate a new register, copy the value and, if updating, the
    4056              * guest shadow copy assignment to the new register.
    4057              */
    4058             AssertMsg(   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4059                       && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
    4060                       ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
    4061                        idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
    4062 
    4063             /** @todo share register for readonly access. */
    4064             uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
    4065                                                              enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4066 
    4067             if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4068                 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
    4069 
    4070             if (   enmIntendedUse != kIemNativeGstRegUse_ForUpdate
    4071                 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4072                 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
    4073                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4074                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4075             else
    4076             {
    4077                 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
    4078                 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
    4079                        g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
    4080                        g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
    4081             }
    4082             idxReg = idxRegNew;
    4083         }
    4084         Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
    4085 
    4086 #ifdef VBOX_STRICT
    4087         /* Strict builds: Check that the value is correct. */
    4088         *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4089 #endif
    4090 
    4091         return idxReg;
    4092     }
    4093 
    4094     /*
    4095      * Allocate a new register, load it with the guest value and designate it as a copy of the
    4096      */
    4097     uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
    4098 
    4099     if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
    4100         *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
    4101 
    4102     if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
    4103         iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
    4104     Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
    4105            g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
    4106 
    4107     return idxRegNew;
    4108 }
    4109 
    4110 
    4111 /**
    4112  * Allocates a temporary host general purpose register that already holds the
    4113  * given guest register value.
    4114  *
    4115  * The use case for this function is places where the shadowing state cannot be
    4116  * modified due to branching and such.  This will fail if the we don't have a
    4117  * current shadow copy handy or if it's incompatible.  The only code that will
    4118  * be emitted here is value checking code in strict builds.
    4119  *
    4120  * The intended use can only be readonly!
    4121  *
    4122  * @returns The host register number, UINT8_MAX if not present.
    4123  * @param   pReNative       The native recompile state.
    4124  * @param   poff            Pointer to the instruction buffer offset.
    4125  *                          Will be updated in strict builds if a register is
    4126  *                          found.
    4127  * @param   enmGstReg       The guest register that will is to be updated.
    4128  * @note    In strict builds, this may throw instruction buffer growth failures.
    4129  *          Non-strict builds will not throw anything.
    4130  * @sa iemNativeRegAllocTmpForGuestReg
    4131  */
    4132 DECL_HIDDEN_THROW(uint8_t)
    4133 iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
    4134 {
    4135     Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
    4136 
    4137     /*
    4138      * First check if the guest register value is already in a host register.
    4139      */
    4140     if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    4141     {
    4142         uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
    4143         Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4144         Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
    4145         Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
    4146 
    4147         if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
    4148         {
    4149             /*
    4150              * We only do readonly use here, so easy compared to the other
    4151              * variant of this code.
    4152              */
    4153             pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
    4154             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
    4155             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4156             Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
    4157                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    4158 
    4159 #ifdef VBOX_STRICT
    4160             /* Strict builds: Check that the value is correct. */
    4161             *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
    4162 #else
    4163             RT_NOREF(poff);
    4164 #endif
    4165             return idxReg;
    4166         }
    4167     }
    4168 
    4169     return UINT8_MAX;
    4170 }
    4171 
    4172 
    4173 DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar);
    4174 
    4175 
    4176 /**
    4177  * Allocates argument registers for a function call.
    4178  *
    4179  * @returns New code buffer offset on success; throws VBox status code on failure, so no
    4180  *          need to check the return value.
    4181  * @param   pReNative   The native recompile state.
    4182  * @param   off         The current code buffer offset.
    4183  * @param   cArgs       The number of arguments the function call takes.
    4184  */
    4185 DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
    4186 {
    4187     AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
    4188                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
    4189     Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4190     Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
    4191 
    4192     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4193         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4194     else if (cArgs == 0)
    4195         return true;
    4196 
    4197     /*
    4198      * Do we get luck and all register are free and not shadowing anything?
    4199      */
    4200     if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
    4201         for (uint32_t i = 0; i < cArgs; i++)
    4202         {
    4203             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4204             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    4205             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4206             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4207         }
    4208     /*
    4209      * Okay, not lucky so we have to free up the registers.
    4210      */
    4211     else
    4212         for (uint32_t i = 0; i < cArgs; i++)
    4213         {
    4214             uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
    4215             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
    4216             {
    4217                 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4218                 {
    4219                     case kIemNativeWhat_Var:
    4220                     {
    4221                         uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4222                         AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars),
    4223                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    4224                         Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
    4225                         Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
    4226 
    4227                         if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    4228                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    4229                         else
    4230                         {
    4231                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4232                             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    4233                         }
    4234                         break;
    4235                     }
    4236 
    4237                     case kIemNativeWhat_Tmp:
    4238                     case kIemNativeWhat_Arg:
    4239                     case kIemNativeWhat_rc:
    4240                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
    4241                     default:
    4242                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
    4243                 }
    4244 
    4245             }
    4246             if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
    4247             {
    4248                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
    4249                 Assert(   (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
    4250                        == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
    4251                 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4252                 pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4253                 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4254             }
    4255             else
    4256                 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    4257             pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
    4258             pReNative->Core.aHstRegs[idxReg].idxVar  = UINT8_MAX;
    4259         }
    4260     pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
    4261     return true;
    4262 }
    4263 
    4264 
    4265 DECL_HIDDEN_THROW(uint8_t)  iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
    4266 
    4267 
    4268 #if 0
    4269 /**
    4270  * Frees a register assignment of any type.
    4271  *
    4272  * @param   pReNative       The native recompile state.
    4273  * @param   idxHstReg       The register to free.
    4274  *
    4275  * @note    Does not update variables.
    4276  */
    4277 DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4278 {
    4279     Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4280     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4281     Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
    4282     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
    4283            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
    4284            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
    4285            || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
    4286     Assert(   pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
    4287            || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
    4288            || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
    4289     Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4290            == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4291     Assert(   RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
    4292            == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4293 
    4294     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxHstReg);
    4295     /* no flushing, right:
    4296     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4297     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4298     pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4299     */
    4300 }
    4301 #endif
    4302 
    4303 
    4304 /**
    4305  * Frees a temporary register.
    4306  *
    4307  * Any shadow copies of guest registers assigned to the host register will not
    4308  * be flushed by this operation.
    4309  */
    4310 DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4311 {
    4312     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4313     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
    4314     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    4315     Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
    4316            g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4317 }
    4318 
    4319 
    4320 /**
    4321  * Frees a temporary immediate register.
    4322  *
    4323  * It is assumed that the call has not modified the register, so it still hold
    4324  * the same value as when it was allocated via iemNativeRegAllocTmpImm().
    4325  */
    4326 DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
    4327 {
    4328     iemNativeRegFreeTmp(pReNative, idxHstReg);
    4329 }
    4330 
    4331 
    4332 /**
    4333  * Frees a register assigned to a variable.
    4334  *
    4335  * The register will be disassociated from the variable.
    4336  */
    4337 DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
    4338 {
    4339     Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
    4340     Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
    4341     uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    4342     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    4343     Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
    4344 
    4345     pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    4346     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    4347     if (!fFlushShadows)
    4348         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%d\n",
    4349                g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
    4350     else
    4351     {
    4352         pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4353         uint64_t const fGstRegShadowsOld        = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4354         pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4355         pReNative->Core.bmGstRegShadows        &= ~fGstRegShadowsOld;
    4356         uint64_t       fGstRegShadows           = fGstRegShadowsOld;
    4357         while (fGstRegShadows)
    4358         {
    4359             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    4360             fGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4361 
    4362             Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
    4363             pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
    4364         }
    4365         Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%d\n",
    4366                g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
    4367     }
    4368 }
    4369 
    4370 
    4371 /**
    4372  * Called right before emitting a call instruction to move anything important
    4373  * out of call-volatile registers, free and flush the call-volatile registers,
    4374  * optionally freeing argument variables.
    4375  *
    4376  * @returns New code buffer offset, UINT32_MAX on failure.
    4377  * @param   pReNative       The native recompile state.
    4378  * @param   off             The code buffer offset.
    4379  * @param   cArgs           The number of arguments the function call takes.
    4380  *                          It is presumed that the host register part of these have
    4381  *                          been allocated as such already and won't need moving,
    4382  *                          just freeing.
    4383  * @param   fKeepVars       Mask of variables that should keep their register
    4384  *                          assignments.  Caller must take care to handle these.
    4385  */
    4386 DECL_HIDDEN_THROW(uint32_t)
    4387 iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
    4388 {
    4389     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    4390 
    4391     /* fKeepVars will reduce this mask. */
    4392     uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4393 
    4394     /*
    4395      * Move anything important out of volatile registers.
    4396      */
    4397     if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
    4398         cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
    4399     uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
    4400 #ifdef IEMNATIVE_REG_FIXED_TMP0
    4401                          & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
    4402 #endif
    4403                          & ~g_afIemNativeCallRegs[cArgs];
    4404 
    4405     fRegsToMove &= pReNative->Core.bmHstRegs;
    4406     if (!fRegsToMove)
    4407     { /* likely */ }
    4408     else
    4409     {
    4410         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
    4411         while (fRegsToMove != 0)
    4412         {
    4413             unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
    4414             fRegsToMove &= ~RT_BIT_32(idxReg);
    4415 
    4416             switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
    4417             {
    4418                 case kIemNativeWhat_Var:
    4419                 {
    4420                     uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
    4421                     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    4422                     Assert(pReNative->Core.bmVars & RT_BIT_32(idxVar));
    4423                     Assert(pReNative->Core.aVars[idxVar].idxReg == idxReg);
    4424                     if (!(RT_BIT_32(idxVar) & fKeepVars))
    4425                     {
    4426                         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%d enmKind=%d idxReg=%d\n",
    4427                                idxVar, pReNative->Core.aVars[idxVar].enmKind, pReNative->Core.aVars[idxVar].idxReg));
    4428                         if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    4429                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    4430                         else
    4431                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    4432                     }
    4433                     else
    4434                         fRegsToFree &= ~RT_BIT_32(idxReg);
    4435                     continue;
    4436                 }
    4437 
    4438                 case kIemNativeWhat_Arg:
    4439                     AssertMsgFailed(("What?!?: %u\n", idxReg));
    4440                     continue;
    4441 
    4442                 case kIemNativeWhat_rc:
    4443                 case kIemNativeWhat_Tmp:
    4444                     AssertMsgFailed(("Missing free: %u\n", idxReg));
    4445                     continue;
    4446 
    4447                 case kIemNativeWhat_FixedTmp:
    4448                 case kIemNativeWhat_pVCpuFixed:
    4449                 case kIemNativeWhat_pCtxFixed:
    4450                 case kIemNativeWhat_FixedReserved:
    4451                 case kIemNativeWhat_Invalid:
    4452                 case kIemNativeWhat_End:
    4453                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
    4454             }
    4455             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
    4456         }
    4457     }
    4458 
    4459     /*
    4460      * Do the actual freeing.
    4461      */
    4462     if (pReNative->Core.bmHstRegs & fRegsToFree)
    4463         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
    4464                pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
    4465     pReNative->Core.bmHstRegs &= ~fRegsToFree;
    4466 
    4467     /* If there are guest register shadows in any call-volatile register, we
    4468        have to clear the corrsponding guest register masks for each register. */
    4469     uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
    4470     if (fHstRegsWithGstShadow)
    4471     {
    4472         Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
    4473                pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
    4474         pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
    4475         do
    4476         {
    4477             unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
    4478             fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
    4479 
    4480             AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
    4481             pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
    4482             pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
    4483         } while (fHstRegsWithGstShadow != 0);
    4484     }
    4485 
    4486     return off;
    4487 }
    4488 
    4489 
    4490 /**
    4491  * Flushes a set of guest register shadow copies.
    4492  *
    4493  * This is usually done after calling a threaded function or a C-implementation
    4494  * of an instruction.
    4495  *
    4496  * @param   pReNative       The native recompile state.
    4497  * @param   fGstRegs        Set of guest registers to flush.
    4498  */
    4499 DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
    4500 {
    4501     /*
    4502      * Reduce the mask by what's currently shadowed
    4503      */
    4504     uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
    4505     fGstRegs &= bmGstRegShadowsOld;
    4506     if (fGstRegs)
    4507     {
    4508         uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
    4509         Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
    4510         pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
    4511         if (bmGstRegShadowsNew)
    4512         {
    4513             /*
    4514              * Partial.
    4515              */
    4516             do
    4517             {
    4518                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4519                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4520                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4521                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    4522                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    4523 
    4524                 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
    4525                 fGstRegs &= ~fInThisHstReg;
    4526                 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
    4527                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
    4528                 if (!fGstRegShadowsNew)
    4529                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4530             } while (fGstRegs != 0);
    4531         }
    4532         else
    4533         {
    4534             /*
    4535              * Clear all.
    4536              */
    4537             do
    4538             {
    4539                 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    4540                 uint8_t const  idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4541                 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4542                 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
    4543                 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
    4544 
    4545                 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
    4546                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4547             } while (fGstRegs != 0);
    4548             pReNative->Core.bmHstRegsWithGstShadow = 0;
    4549         }
    4550     }
    4551 }
    4552 
    4553 
    4554 /**
    4555  * Flushes guest register shadow copies held by a set of host registers.
    4556  *
    4557  * This is used with the TLB lookup code for ensuring that we don't carry on
    4558  * with any guest shadows in volatile registers, as these will get corrupted by
    4559  * a TLB miss.
    4560  *
    4561  * @param   pReNative       The native recompile state.
    4562  * @param   fHstRegs        Set of host registers to flush guest shadows for.
    4563  */
    4564 DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
    4565 {
    4566     /*
    4567      * Reduce the mask by what's currently shadowed.
    4568      */
    4569     uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
    4570     fHstRegs &= bmHstRegsWithGstShadowOld;
    4571     if (fHstRegs)
    4572     {
    4573         uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
    4574         Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
    4575                fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
    4576         pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
    4577         if (bmHstRegsWithGstShadowNew)
    4578         {
    4579             /*
    4580              * Partial (likely).
    4581              */
    4582             uint64_t fGstShadows = 0;
    4583             do
    4584             {
    4585                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    4586                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    4587                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4588                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4589 
    4590                 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4591                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4592                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    4593             } while (fHstRegs != 0);
    4594             pReNative->Core.bmGstRegShadows &= ~fGstShadows;
    4595         }
    4596         else
    4597         {
    4598             /*
    4599              * Clear all.
    4600              */
    4601             do
    4602             {
    4603                 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    4604                 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
    4605                 Assert(   (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
    4606                        == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
    4607 
    4608                 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
    4609                 fHstRegs &= ~RT_BIT_32(idxHstReg);
    4610             } while (fHstRegs != 0);
    4611             pReNative->Core.bmGstRegShadows = 0;
    4612         }
    4613     }
    4614 }
    4615 
    4616 
    4617 /**
    4618  * Restores guest shadow copies in volatile registers.
    4619  *
    4620  * This is used after calling a helper function (think TLB miss) to restore the
    4621  * register state of volatile registers.
    4622  *
    4623  * @param   pReNative               The native recompile state.
    4624  * @param   off                     The code buffer offset.
    4625  * @param   fHstRegsActiveShadows   Set of host registers which are allowed to
    4626  *                                  be active (allocated) w/o asserting. Hack.
    4627  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    4628  *          iemNativeVarRestoreVolatileRegsPostHlpCall()
    4629  */
    4630 DECL_HIDDEN_THROW(uint32_t)
    4631 iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
    4632 {
    4633     uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    4634     if (fHstRegs)
    4635     {
    4636         Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
    4637         do
    4638         {
    4639             unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    4640 
    4641             /* It's not fatal if a register is active holding a variable that
    4642                shadowing a guest register, ASSUMING all pending guest register
    4643                writes were flushed prior to the helper call. However, we'll be
    4644                emitting duplicate restores, so it wasts code space. */
    4645             Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
    4646             RT_NOREF(fHstRegsActiveShadows);
    4647 
    4648             uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4649             Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
    4650             AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
    4651                        IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
    4652 
    4653             unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
    4654             off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
    4655 
    4656             fHstRegs &= ~RT_BIT_32(idxHstReg);
    4657         } while (fHstRegs != 0);
    4658     }
    4659     return off;
    4660 }
    4661 
    4662 
    4663 /**
    4664  * Flushes delayed write of a specific guest register.
    4665  *
    4666  * This must be called prior to calling CImpl functions and any helpers that use
    4667  * the guest state (like raising exceptions) and such.
    4668  *
    4669  * This optimization has not yet been implemented.  The first target would be
    4670  * RIP updates, since these are the most common ones.
    4671  */
    4672 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    4673                                                                   IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
    4674 {
    4675     RT_NOREF(pReNative, enmClass, idxReg);
    4676     return off;
    4677 }
    4678 
    4679 
    4680 /**
    4681  * Flushes any delayed guest register writes.
    4682  *
    4683  * This must be called prior to calling CImpl functions and any helpers that use
    4684  * the guest state (like raising exceptions) and such.
    4685  *
    4686  * This optimization has not yet been implemented.  The first target would be
    4687  * RIP updates, since these are the most common ones.
    4688  */
    4689 DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    4690 {
    4691     RT_NOREF(pReNative, off);
    4692     return off;
    4693 }
    4694 
    4695 
    4696 #ifdef VBOX_STRICT
    4697 /**
    4698  * Does internal register allocator sanity checks.
    4699  */
    4700 static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
    4701 {
    4702     /*
    4703      * Iterate host registers building a guest shadowing set.
    4704      */
    4705     uint64_t bmGstRegShadows        = 0;
    4706     uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
    4707     AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
    4708     while (bmHstRegsWithGstShadow)
    4709     {
    4710         unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
    4711         Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    4712         bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
    4713 
    4714         uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
    4715         AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
    4716         AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
    4717         bmGstRegShadows |= fThisGstRegShadows;
    4718         while (fThisGstRegShadows)
    4719         {
    4720             unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
    4721             fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4722             AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
    4723                       ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
    4724                        idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
    4725         }
    4726     }
    4727     AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
    4728               ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
    4729                bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
    4730 
    4731     /*
    4732      * Now the other way around, checking the guest to host index array.
    4733      */
    4734     bmHstRegsWithGstShadow = 0;
    4735     bmGstRegShadows        = pReNative->Core.bmGstRegShadows;
    4736     Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
    4737     while (bmGstRegShadows)
    4738     {
    4739         unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
    4740         Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
    4741         bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
    4742 
    4743         uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    4744         AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
    4745         AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
    4746                   ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
    4747                    idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
    4748         bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
    4749     }
    4750     AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
    4751               ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
    4752                bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
    4753 }
    4754 #endif
    4755 
    4756 
    4757 /*********************************************************************************************************************************
    4758 *   Code Emitters (larger snippets)                                                                                              *
    4759 *********************************************************************************************************************************/
    4760 
    4761 /**
    4762  * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
    4763  * extending to 64-bit width.
    4764  *
    4765  * @returns New code buffer offset on success, UINT32_MAX on failure.
    4766  * @param   pReNative   .
    4767  * @param   off         The current code buffer position.
    4768  * @param   idxHstReg   The host register to load the guest register value into.
    4769  * @param   enmGstReg   The guest register to load.
    4770  *
    4771  * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
    4772  *       that is something the caller needs to do if applicable.
    4773  */
    4774 DECL_HIDDEN_THROW(uint32_t)
    4775 iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
    4776 {
    4777     Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
    4778     Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
    4779 
    4780     switch (g_aGstShadowInfo[enmGstReg].cb)
    4781     {
    4782         case sizeof(uint64_t):
    4783             return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4784         case sizeof(uint32_t):
    4785             return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4786         case sizeof(uint16_t):
    4787             return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4788 #if 0 /* not present in the table. */
    4789         case sizeof(uint8_t):
    4790             return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
    4791 #endif
    4792         default:
    4793             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
    4794     }
    4795 }
    4796 
    4797 
    4798 #ifdef VBOX_STRICT
    4799 /**
    4800  * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
    4801  *
    4802  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    4803  *       Trashes EFLAGS on AMD64.
    4804  */
    4805 static uint32_t
    4806 iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
    4807 {
    4808 # ifdef RT_ARCH_AMD64
    4809     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    4810 
    4811     /* rol reg64, 32 */
    4812     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    4813     pbCodeBuf[off++] = 0xc1;
    4814     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4815     pbCodeBuf[off++] = 32;
    4816 
    4817     /* test reg32, ffffffffh */
    4818     if (idxReg >= 8)
    4819         pbCodeBuf[off++] = X86_OP_REX_B;
    4820     pbCodeBuf[off++] = 0xf7;
    4821     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4822     pbCodeBuf[off++] = 0xff;
    4823     pbCodeBuf[off++] = 0xff;
    4824     pbCodeBuf[off++] = 0xff;
    4825     pbCodeBuf[off++] = 0xff;
    4826 
    4827     /* je/jz +1 */
    4828     pbCodeBuf[off++] = 0x74;
    4829     pbCodeBuf[off++] = 0x01;
    4830 
    4831     /* int3 */
    4832     pbCodeBuf[off++] = 0xcc;
    4833 
    4834     /* rol reg64, 32 */
    4835     pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    4836     pbCodeBuf[off++] = 0xc1;
    4837     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4838     pbCodeBuf[off++] = 32;
    4839 
    4840 # elif defined(RT_ARCH_ARM64)
    4841     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    4842     /* lsr tmp0, reg64, #32 */
    4843     pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
    4844     /* cbz tmp0, +1 */
    4845     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    4846     /* brk #0x1100 */
    4847     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
    4848 
    4849 # else
    4850 #  error "Port me!"
    4851 # endif
    4852     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4853     return off;
    4854 }
    4855 #endif /* VBOX_STRICT */
    4856 
    4857 
    4858 #ifdef VBOX_STRICT
    4859 /**
    4860  * Emitting code that checks that the content of register @a idxReg is the same
    4861  * as what's in the guest register @a enmGstReg, resulting in a breakpoint
    4862  * instruction if that's not the case.
    4863  *
    4864  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    4865  *       Trashes EFLAGS on AMD64.
    4866  */
    4867 static uint32_t
    4868 iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
    4869 {
    4870 # ifdef RT_ARCH_AMD64
    4871     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    4872 
    4873     /* cmp reg, [mem] */
    4874     if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
    4875     {
    4876         if (idxReg >= 8)
    4877             pbCodeBuf[off++] = X86_OP_REX_R;
    4878         pbCodeBuf[off++] = 0x38;
    4879     }
    4880     else
    4881     {
    4882         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
    4883             pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
    4884         else
    4885         {
    4886             if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
    4887                 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    4888             else
    4889                 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
    4890                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
    4891             if (idxReg >= 8)
    4892                 pbCodeBuf[off++] = X86_OP_REX_R;
    4893         }
    4894         pbCodeBuf[off++] = 0x39;
    4895     }
    4896     off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
    4897 
    4898     /* je/jz +1 */
    4899     pbCodeBuf[off++] = 0x74;
    4900     pbCodeBuf[off++] = 0x01;
    4901 
    4902     /* int3 */
    4903     pbCodeBuf[off++] = 0xcc;
    4904 
    4905     /* For values smaller than the register size, we must check that the rest
    4906        of the register is all zeros. */
    4907     if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
    4908     {
    4909         /* test reg64, imm32 */
    4910         pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
    4911         pbCodeBuf[off++] = 0xf7;
    4912         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
    4913         pbCodeBuf[off++] = 0;
    4914         pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
    4915         pbCodeBuf[off++] = 0xff;
    4916         pbCodeBuf[off++] = 0xff;
    4917 
    4918         /* je/jz +1 */
    4919         pbCodeBuf[off++] = 0x74;
    4920         pbCodeBuf[off++] = 0x01;
    4921 
    4922         /* int3 */
    4923         pbCodeBuf[off++] = 0xcc;
    4924         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4925     }
    4926     else
    4927     {
    4928         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4929         if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
    4930             iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
    4931     }
    4932 
    4933 # elif defined(RT_ARCH_ARM64)
    4934     /* mov TMP0, [gstreg] */
    4935     off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
    4936 
    4937     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    4938     /* sub tmp0, tmp0, idxReg */
    4939     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
    4940     /* cbz tmp0, +1 */
    4941     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
    4942     /* brk #0x1000+enmGstReg */
    4943     pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
    4944     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4945 
    4946 # else
    4947 #  error "Port me!"
    4948 # endif
    4949     return off;
    4950 }
    4951 #endif /* VBOX_STRICT */
    4952 
    4953 
    4954 #ifdef VBOX_STRICT
    4955 /**
    4956  * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
    4957  * important bits.
    4958  *
    4959  * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
    4960  *       Trashes EFLAGS on AMD64.
    4961  */
    4962 static uint32_t
    4963 iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
    4964 {
    4965     uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
    4966     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
    4967     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
    4968     off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
    4969 
    4970 #ifdef RT_ARCH_AMD64
    4971     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    4972 
    4973     /* je/jz +1 */
    4974     pbCodeBuf[off++] = 0x74;
    4975     pbCodeBuf[off++] = 0x01;
    4976 
    4977     /* int3 */
    4978     pbCodeBuf[off++] = 0xcc;
    4979 
    4980 # elif defined(RT_ARCH_ARM64)
    4981     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    4982 
    4983     /* b.eq +1 */
    4984     pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
    4985     /* brk #0x2000 */
    4986     pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
    4987 
    4988 # else
    4989 #  error "Port me!"
    4990 # endif
    4991     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    4992 
    4993     iemNativeRegFreeTmp(pReNative, idxRegTmp);
    4994     return off;
    4995 }
    4996 #endif /* VBOX_STRICT */
    4997 
    4998 
    4999 /**
    5000  * Emits a code for checking the return code of a call and rcPassUp, returning
    5001  * from the code if either are non-zero.
    5002  */
    5003 DECL_HIDDEN_THROW(uint32_t)
    5004 iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
    5005 {
    5006 #ifdef RT_ARCH_AMD64
    5007     /*
    5008      * AMD64: eax = call status code.
    5009      */
    5010 
    5011     /* edx = rcPassUp */
    5012     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    5013 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5014     off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
    5015 # endif
    5016 
    5017     /* edx = eax | rcPassUp */
    5018     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    5019     pbCodeBuf[off++] = 0x0b;                    /* or edx, eax */
    5020     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
    5021     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5022 
    5023     /* Jump to non-zero status return path. */
    5024     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
    5025 
    5026     /* done. */
    5027 
    5028 #elif RT_ARCH_ARM64
    5029     /*
    5030      * ARM64: w0 = call status code.
    5031      */
    5032 # ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5033     off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
    5034 # endif
    5035     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
    5036 
    5037     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    5038 
    5039     pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
    5040 
    5041     uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    5042     iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
    5043     pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
    5044 
    5045 #else
    5046 # error "port me"
    5047 #endif
    5048     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5049     return off;
    5050 }
    5051 
    5052 
    5053 /**
    5054  * Emits code to check if the content of @a idxAddrReg is a canonical address,
    5055  * raising a \#GP(0) if it isn't.
    5056  *
    5057  * @returns New code buffer offset, UINT32_MAX on failure.
    5058  * @param   pReNative       The native recompile state.
    5059  * @param   off             The code buffer offset.
    5060  * @param   idxAddrReg      The host register with the address to check.
    5061  * @param   idxInstr        The current instruction.
    5062  */
    5063 DECL_HIDDEN_THROW(uint32_t)
    5064 iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
    5065 {
    5066     /*
    5067      * Make sure we don't have any outstanding guest register writes as we may
    5068      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    5069      */
    5070     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5071 
    5072 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5073     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    5074 #else
    5075     RT_NOREF(idxInstr);
    5076 #endif
    5077 
    5078 #ifdef RT_ARCH_AMD64
    5079     /*
    5080      * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
    5081      *     return raisexcpt();
    5082      * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
    5083      */
    5084     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    5085 
    5086     off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
    5087     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
    5088     off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
    5089     off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
    5090     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    5091 
    5092     iemNativeRegFreeTmp(pReNative, iTmpReg);
    5093 
    5094 #elif defined(RT_ARCH_ARM64)
    5095     /*
    5096      * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
    5097      *     return raisexcpt();
    5098      * ----
    5099      *     mov     x1, 0x800000000000
    5100      *     add     x1, x0, x1
    5101      *     cmp     xzr, x1, lsr 48
    5102      *     b.ne    .Lraisexcpt
    5103      */
    5104     uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    5105 
    5106     off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
    5107     off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
    5108     off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
    5109     off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    5110 
    5111     iemNativeRegFreeTmp(pReNative, iTmpReg);
    5112 
    5113 #else
    5114 # error "Port me"
    5115 #endif
    5116     return off;
    5117 }
    5118 
    5119 
    5120 /**
    5121  * Emits code to check if the content of @a idxAddrReg is within the limit of
    5122  * idxSegReg, raising a \#GP(0) if it isn't.
    5123  *
    5124  * @returns New code buffer offset; throws VBox status code on error.
    5125  * @param   pReNative       The native recompile state.
    5126  * @param   off             The code buffer offset.
    5127  * @param   idxAddrReg      The host register (32-bit) with the address to
    5128  *                          check.
    5129  * @param   idxSegReg       The segment register (X86_SREG_XXX) to check
    5130  *                          against.
    5131  * @param   idxInstr        The current instruction.
    5132  */
    5133 DECL_HIDDEN_THROW(uint32_t)
    5134 iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5135                                                     uint8_t idxAddrReg, uint8_t idxSegReg, uint8_t idxInstr)
    5136 {
    5137     /*
    5138      * Make sure we don't have any outstanding guest register writes as we may
    5139      * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
    5140      */
    5141     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5142 
    5143 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5144     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    5145 #else
    5146     RT_NOREF(idxInstr);
    5147 #endif
    5148 
    5149     /** @todo implement expand down/whatnot checking */
    5150     AssertStmt(idxSegReg == X86_SREG_CS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_CASE_NOT_IMPLEMENTED_1));
    5151 
    5152     uint8_t const iTmpLimReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
    5153                                                                (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + idxSegReg),
    5154                                                                kIemNativeGstRegUse_ForUpdate);
    5155 
    5156     off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, iTmpLimReg);
    5157     off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
    5158 
    5159     iemNativeRegFreeTmp(pReNative, iTmpLimReg);
    5160     return off;
    5161 }
    5162 
    5163 
    5164 /**
    5165  * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
    5166  *
    5167  * @returns The flush mask.
    5168  * @param   fCImpl          The IEM_CIMPL_F_XXX flags.
    5169  * @param   fGstShwFlush    The starting flush mask.
    5170  */
    5171 DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
    5172 {
    5173     if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
    5174         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_CS)
    5175                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_CS)
    5176                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
    5177     if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
    5178         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
    5179                      |  RT_BIT_64(kIemNativeGstReg_SegSelFirst   + X86_SREG_SS)
    5180                      |  RT_BIT_64(kIemNativeGstReg_SegBaseFirst  + X86_SREG_SS)
    5181                      |  RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
    5182     else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
    5183         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
    5184     if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
    5185         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    5186     return fGstShwFlush;
    5187 }
    5188 
    5189 
    5190 /**
    5191  * Emits a call to a CImpl function or something similar.
    5192  */
    5193 DECL_HIDDEN_THROW(uint32_t)
    5194 iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
    5195                        uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
    5196 {
    5197     /*
    5198      * Flush stuff. PC and EFlags are implictly flushed, the latter because we
    5199      * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
    5200      */
    5201     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
    5202                                                              fGstShwFlush
    5203                                                              | RT_BIT_64(kIemNativeGstReg_Pc)
    5204                                                              | RT_BIT_64(kIemNativeGstReg_EFlags));
    5205     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    5206 
    5207     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    5208 
    5209     /*
    5210      * Load the parameters.
    5211      */
    5212 #if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
    5213     /* Special code the hidden VBOXSTRICTRC pointer. */
    5214     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5215     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    5216     if (cAddParams > 0)
    5217         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
    5218     if (cAddParams > 1)
    5219         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
    5220     if (cAddParams > 2)
    5221         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
    5222     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    5223 
    5224 #else
    5225     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    5226     off = iemNativeEmitLoadGprFromGpr(  pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5227     off = iemNativeEmitLoadGprImm64(    pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
    5228     if (cAddParams > 0)
    5229         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
    5230     if (cAddParams > 1)
    5231         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
    5232     if (cAddParams > 2)
    5233 # if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
    5234         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
    5235 # else
    5236         off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
    5237 # endif
    5238 #endif
    5239 
    5240     /*
    5241      * Make the call.
    5242      */
    5243     off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
    5244 
    5245 #if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    5246     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    5247 #endif
    5248 
    5249     /*
    5250      * Check the status code.
    5251      */
    5252     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    5253 }
    5254 
    5255 
    5256 /**
    5257  * Emits a call to a threaded worker function.
    5258  */
    5259 DECL_HIDDEN_THROW(uint32_t)
    5260 iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
    5261 {
    5262     iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
    5263     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
    5264 
    5265 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5266     /* The threaded function may throw / long jmp, so set current instruction
    5267        number if we're counting. */
    5268     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    5269 #endif
    5270 
    5271     uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
    5272 
    5273 #ifdef RT_ARCH_AMD64
    5274     /* Load the parameters and emit the call. */
    5275 # ifdef RT_OS_WINDOWS
    5276 #  ifndef VBOXSTRICTRC_STRICT_ENABLED
    5277     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    5278     if (cParams > 0)
    5279         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
    5280     if (cParams > 1)
    5281         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
    5282     if (cParams > 2)
    5283         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
    5284 #  else  /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
    5285     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
    5286     if (cParams > 0)
    5287         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
    5288     if (cParams > 1)
    5289         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
    5290     if (cParams > 2)
    5291     {
    5292         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
    5293         off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
    5294     }
    5295     off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    5296 #  endif /* VBOXSTRICTRC_STRICT_ENABLED */
    5297 # else
    5298     off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    5299     if (cParams > 0)
    5300         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
    5301     if (cParams > 1)
    5302         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
    5303     if (cParams > 2)
    5304         off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
    5305 # endif
    5306 
    5307     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    5308 
    5309 # if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
    5310     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    5311 # endif
    5312 
    5313 #elif RT_ARCH_ARM64
    5314     /*
    5315      * ARM64:
    5316      */
    5317     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5318     if (cParams > 0)
    5319         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
    5320     if (cParams > 1)
    5321         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
    5322     if (cParams > 2)
    5323         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
    5324 
    5325     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    5326 
    5327 #else
    5328 # error "port me"
    5329 #endif
    5330 
    5331     /*
    5332      * Check the status code.
    5333      */
    5334     off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
    5335 
    5336     return off;
    5337 }
    5338 
    5339 
    5340 /**
    5341  * Emits the code at the CheckBranchMiss label.
    5342  */
    5343 static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5344 {
    5345     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
    5346     if (idxLabel != UINT32_MAX)
    5347     {
    5348         iemNativeLabelDefine(pReNative, idxLabel, off);
    5349 
    5350         /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
    5351         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5352         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
    5353 
    5354         /* jump back to the return sequence. */
    5355         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5356     }
    5357     return off;
    5358 }
    5359 
    5360 
    5361 /**
    5362  * Emits the code at the NeedCsLimChecking label.
    5363  */
    5364 static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5365 {
    5366     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
    5367     if (idxLabel != UINT32_MAX)
    5368     {
    5369         iemNativeLabelDefine(pReNative, idxLabel, off);
    5370 
    5371         /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
    5372         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5373         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
    5374 
    5375         /* jump back to the return sequence. */
    5376         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5377     }
    5378     return off;
    5379 }
    5380 
    5381 
    5382 /**
    5383  * Emits the code at the ObsoleteTb label.
    5384  */
    5385 static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5386 {
    5387     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
    5388     if (idxLabel != UINT32_MAX)
    5389     {
    5390         iemNativeLabelDefine(pReNative, idxLabel, off);
    5391 
    5392         /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
    5393         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5394         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
    5395 
    5396         /* jump back to the return sequence. */
    5397         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5398     }
    5399     return off;
    5400 }
    5401 
    5402 
    5403 /**
    5404  * Emits the code at the RaiseGP0 label.
    5405  */
    5406 static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5407 {
    5408     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
    5409     if (idxLabel != UINT32_MAX)
    5410     {
    5411         iemNativeLabelDefine(pReNative, idxLabel, off);
    5412 
    5413         /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
    5414         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5415         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
    5416 
    5417         /* jump back to the return sequence. */
    5418         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5419     }
    5420     return off;
    5421 }
    5422 
    5423 
    5424 /**
    5425  * Emits the code at the ReturnWithFlags label (returns
    5426  * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
    5427  */
    5428 static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5429 {
    5430     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
    5431     if (idxLabel != UINT32_MAX)
    5432     {
    5433         iemNativeLabelDefine(pReNative, idxLabel, off);
    5434 
    5435         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
    5436 
    5437         /* jump back to the return sequence. */
    5438         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5439     }
    5440     return off;
    5441 }
    5442 
    5443 
    5444 /**
    5445  * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
    5446  */
    5447 static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5448 {
    5449     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
    5450     if (idxLabel != UINT32_MAX)
    5451     {
    5452         iemNativeLabelDefine(pReNative, idxLabel, off);
    5453 
    5454         off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
    5455 
    5456         /* jump back to the return sequence. */
    5457         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5458     }
    5459     return off;
    5460 }
    5461 
    5462 
    5463 /**
    5464  * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
    5465  */
    5466 static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
    5467 {
    5468     /*
    5469      * Generate the rc + rcPassUp fiddling code if needed.
    5470      */
    5471     uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
    5472     if (idxLabel != UINT32_MAX)
    5473     {
    5474         iemNativeLabelDefine(pReNative, idxLabel, off);
    5475 
    5476         /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
    5477 #ifdef RT_ARCH_AMD64
    5478 # ifdef RT_OS_WINDOWS
    5479 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5480         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8,  X86_GREG_xCX); /* cl = instruction number */
    5481 #  endif
    5482         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    5483         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
    5484 # else
    5485         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    5486         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
    5487 #  ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5488         off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
    5489 #  endif
    5490 # endif
    5491 # ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    5492         off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
    5493 # endif
    5494 
    5495 #else
    5496         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
    5497         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    5498         /* IEMNATIVE_CALL_ARG2_GREG is already set. */
    5499 #endif
    5500 
    5501         off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
    5502         off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
    5503     }
    5504     return off;
    5505 }
    5506 
    5507 
    5508 /**
    5509  * Emits a standard epilog.
    5510  */
    5511 static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
    5512 {
    5513     *pidxReturnLabel = UINT32_MAX;
    5514 
    5515     /*
    5516      * Successful return, so clear the return register (eax, w0).
    5517      */
    5518     off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
    5519 
    5520     /*
    5521      * Define label for common return point.
    5522      */
    5523     uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
    5524     *pidxReturnLabel = idxReturn;
    5525 
    5526     /*
    5527      * Restore registers and return.
    5528      */
    5529 #ifdef RT_ARCH_AMD64
    5530     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
    5531 
    5532     /* Reposition esp at the r15 restore point. */
    5533     pbCodeBuf[off++] = X86_OP_REX_W;
    5534     pbCodeBuf[off++] = 0x8d;                    /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
    5535     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
    5536     pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
    5537 
    5538     /* Pop non-volatile registers and return */
    5539     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r15 */
    5540     pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
    5541     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r14 */
    5542     pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
    5543     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r13 */
    5544     pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
    5545     pbCodeBuf[off++] = X86_OP_REX_B;            /* pop r12 */
    5546     pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
    5547 # ifdef RT_OS_WINDOWS
    5548     pbCodeBuf[off++] = 0x58 + X86_GREG_xDI;     /* pop rdi */
    5549     pbCodeBuf[off++] = 0x58 + X86_GREG_xSI;     /* pop rsi */
    5550 # endif
    5551     pbCodeBuf[off++] = 0x58 + X86_GREG_xBX;     /* pop rbx */
    5552     pbCodeBuf[off++] = 0xc9;                    /* leave */
    5553     pbCodeBuf[off++] = 0xc3;                    /* ret */
    5554     pbCodeBuf[off++] = 0xcc;                    /* int3 poison */
    5555 
    5556 #elif RT_ARCH_ARM64
    5557     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    5558 
    5559     /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
    5560     AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
    5561     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    5562                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    5563                                                  IEMNATIVE_FRAME_VAR_SIZE / 8);
    5564     /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
    5565     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5566                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    5567     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5568                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    5569     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5570                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    5571     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5572                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    5573     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5574                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    5575     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    5576 
    5577     /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ;  */
    5578     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
    5579     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
    5580                                                      IEMNATIVE_FRAME_SAVE_REG_SIZE);
    5581 
    5582     /* retab / ret */
    5583 # ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
    5584     if (1)
    5585         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
    5586     else
    5587 # endif
    5588         pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
    5589 
    5590 #else
    5591 # error "port me"
    5592 #endif
    5593     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5594 
    5595     return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
    5596 }
    5597 
    5598 
    5599 /**
    5600  * Emits a standard prolog.
    5601  */
    5602 static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5603 {
    5604 #ifdef RT_ARCH_AMD64
    5605     /*
    5606      * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
    5607      * reserving 64 bytes for stack variables plus 4 non-register argument
    5608      * slots.  Fixed register assignment: xBX = pReNative;
    5609      *
    5610      * Since we always do the same register spilling, we can use the same
    5611      * unwind description for all the code.
    5612      */
    5613     uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    5614     pbCodeBuf[off++] = 0x50 + X86_GREG_xBP;     /* push rbp */
    5615     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbp, rsp */
    5616     pbCodeBuf[off++] = 0x8b;
    5617     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
    5618     pbCodeBuf[off++] = 0x50 + X86_GREG_xBX;     /* push rbx */
    5619     AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
    5620 # ifdef RT_OS_WINDOWS
    5621     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rcx ; RBX = pVCpu */
    5622     pbCodeBuf[off++] = 0x8b;
    5623     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
    5624     pbCodeBuf[off++] = 0x50 + X86_GREG_xSI;     /* push rsi */
    5625     pbCodeBuf[off++] = 0x50 + X86_GREG_xDI;     /* push rdi */
    5626 # else
    5627     pbCodeBuf[off++] = X86_OP_REX_W;            /* mov rbx, rdi ; RBX = pVCpu */
    5628     pbCodeBuf[off++] = 0x8b;
    5629     pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
    5630 # endif
    5631     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r12 */
    5632     pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
    5633     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r13 */
    5634     pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
    5635     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r14 */
    5636     pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
    5637     pbCodeBuf[off++] = X86_OP_REX_B;            /* push r15 */
    5638     pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
    5639 
    5640     off = iemNativeEmitSubGprImm(pReNative, off,    /* sub rsp, byte 28h */
    5641                                  X86_GREG_xSP,
    5642                                    IEMNATIVE_FRAME_ALIGN_SIZE
    5643                                  + IEMNATIVE_FRAME_VAR_SIZE
    5644                                  + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
    5645                                  + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
    5646     AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
    5647     AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
    5648     AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
    5649 
    5650 #elif RT_ARCH_ARM64
    5651     /*
    5652      * We set up a stack frame exactly like on x86, only we have to push the
    5653      * return address our selves here.  We save all non-volatile registers.
    5654      */
    5655     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    5656 
    5657 # ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
    5658                       * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind.  It's
    5659                       * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
    5660                       * in any way conditional, so just emitting this instructions now and hoping for the best... */
    5661     /* pacibsp */
    5662     pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
    5663 # endif
    5664 
    5665     /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
    5666     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
    5667     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
    5668                                                  ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
    5669                                                  -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
    5670     /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
    5671     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5672                                                  ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
    5673     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5674                                                  ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
    5675     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5676                                                  ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
    5677     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5678                                                  ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
    5679     /* Save the BP and LR (ret address) registers at the top of the frame. */
    5680     pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
    5681                                                  ARMV8_A64_REG_BP,  ARMV8_A64_REG_LR,  ARMV8_A64_REG_SP, 10);
    5682     AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    5683     /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
    5684     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
    5685                                                      ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
    5686 
    5687     /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ;  Allocate the variable area from SP. */
    5688     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
    5689 
    5690     /* mov r28, r0  */
    5691     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
    5692     /* mov r27, r1  */
    5693     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
    5694 
    5695 #else
    5696 # error "port me"
    5697 #endif
    5698     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    5699     return off;
    5700 }
    5701 
    5702 
    5703 
    5704 
    5705 /*********************************************************************************************************************************
    5706 *   Emitters for IEM_MC_BEGIN and IEM_MC_END.                                                                                    *
    5707 *********************************************************************************************************************************/
    5708 
    5709 #define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
    5710     { \
    5711         Assert(pReNative->Core.bmVars     == 0); \
    5712         Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
    5713         Assert(pReNative->Core.bmStack    == 0); \
    5714         pReNative->fMc    = (a_fMcFlags); \
    5715         pReNative->fCImpl = (a_fCImplFlags); \
    5716         pReNative->cArgs  = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
    5717 
    5718 /** We have to get to the end in recompilation mode, as otherwise we won't
    5719  * generate code for all the IEM_MC_IF_XXX branches. */
    5720 #define IEM_MC_END() \
    5721         iemNativeVarFreeAll(pReNative); \
    5722     } return off
    5723 
    5724 
    5725 
    5726 /*********************************************************************************************************************************
    5727 *   Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX)                                               *
    5728 *********************************************************************************************************************************/
    5729 
    5730 #define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
    5731     pReNative->fMc    = 0; \
    5732     pReNative->fCImpl = (a_fFlags); \
    5733     return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
    5734 
    5735 
    5736 #define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    5737     pReNative->fMc    = 0; \
    5738     pReNative->fCImpl = (a_fFlags); \
    5739     return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
    5740 
    5741 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5742                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    5743                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
    5744 {
    5745     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
    5746 }
    5747 
    5748 
    5749 #define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    5750     pReNative->fMc    = 0; \
    5751     pReNative->fCImpl = (a_fFlags); \
    5752     return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    5753                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
    5754 
    5755 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5756                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    5757                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
    5758 {
    5759     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
    5760 }
    5761 
    5762 
    5763 #define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    5764     pReNative->fMc    = 0; \
    5765     pReNative->fCImpl = (a_fFlags); \
    5766     return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
    5767                                    (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
    5768 
    5769 DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    5770                                                     uint8_t idxInstr, uint64_t a_fGstShwFlush,
    5771                                                     uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
    5772                                                     uint64_t uArg2)
    5773 {
    5774     return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
    5775 }
    5776 
    5777 
    5778 
    5779 /*********************************************************************************************************************************
    5780 *   Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX)                                                     *
    5781 *********************************************************************************************************************************/
    5782 
    5783 /** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
    5784  *  and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
    5785 DECL_INLINE_THROW(uint32_t)
    5786 iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    5787 {
    5788     /*
    5789      * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
    5790      * return with special status code and make the execution loop deal with
    5791      * this.  If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
    5792      * exception and won't continue execution.  While CPUMCTX_DBG_DBGF_MASK
    5793      * could continue w/o interruption, it probably will drop into the
    5794      * debugger, so not worth the effort of trying to services it here and we
    5795      * just lump it in with the handling of the others.
    5796      *
    5797      * To simplify the code and the register state management even more (wrt
    5798      * immediate in AND operation), we always update the flags and skip the
    5799      * extra check associated conditional jump.
    5800      */
    5801     AssertCompile(   (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
    5802                   <= UINT32_MAX);
    5803     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    5804                                                              kIemNativeGstRegUse_ForUpdate);
    5805     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
    5806                                                              X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
    5807                                                              iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
    5808     off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
    5809     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
    5810 
    5811     /* Free but don't flush the EFLAGS register. */
    5812     iemNativeRegFreeTmp(pReNative, idxEflReg);
    5813 
    5814     return off;
    5815 }
    5816 
    5817 
    5818 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
    5819     off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr))
    5820 
    5821 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr) \
    5822     IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr); \
    5823     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5824 
    5825 /** Same as iemRegAddToRip64AndFinishingNoFlags. */
    5826 DECL_INLINE_THROW(uint32_t)
    5827 iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    5828 {
    5829     /* Allocate a temporary PC register. */
    5830     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5831 
    5832     /* Perform the addition and store the result. */
    5833     off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
    5834     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5835 
    5836     /* Free but don't flush the PC register. */
    5837     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5838 
    5839     return off;
    5840 }
    5841 
    5842 
    5843 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
    5844     off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr))
    5845 
    5846 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr) \
    5847     IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr); \
    5848     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5849 
    5850 /** Same as iemRegAddToEip32AndFinishingNoFlags. */
    5851 DECL_INLINE_THROW(uint32_t)
    5852 iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    5853 {
    5854     /* Allocate a temporary PC register. */
    5855     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5856 
    5857     /* Perform the addition and store the result. */
    5858     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    5859     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5860 
    5861     /* Free but don't flush the PC register. */
    5862     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5863 
    5864     return off;
    5865 }
    5866 
    5867 
    5868 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
    5869     off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr))
    5870 
    5871 #define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr) \
    5872     IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr); \
    5873     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5874 
    5875 /** Same as iemRegAddToIp16AndFinishingNoFlags. */
    5876 DECL_INLINE_THROW(uint32_t)
    5877 iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    5878 {
    5879     /* Allocate a temporary PC register. */
    5880     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5881 
    5882     /* Perform the addition and store the result. */
    5883     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
    5884     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    5885     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5886 
    5887     /* Free but don't flush the PC register. */
    5888     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5889 
    5890     return off;
    5891 }
    5892 
    5893 
    5894 
    5895 /*********************************************************************************************************************************
    5896 *   Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX).                                *
    5897 *********************************************************************************************************************************/
    5898 
    5899 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize) \
    5900     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    5901                                                             (a_enmEffOpSize), pCallEntry->idxInstr)
    5902 
    5903 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
    5904     IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize); \
    5905     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5906 
    5907 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
    5908     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    5909                                                             IEMMODE_16BIT, pCallEntry->idxInstr)
    5910 
    5911 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
    5912     IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
    5913     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5914 
    5915 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr) \
    5916     off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    5917                                                             IEMMODE_64BIT, pCallEntry->idxInstr)
    5918 
    5919 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr) \
    5920     IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr); \
    5921     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5922 
    5923 /** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
    5924  *  iemRegRip64RelativeJumpS16AndFinishNoFlags and
    5925  *  iemRegRip64RelativeJumpS32AndFinishNoFlags. */
    5926 DECL_INLINE_THROW(uint32_t)
    5927 iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    5928                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    5929 {
    5930     Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
    5931 
    5932     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    5933     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5934 
    5935     /* Allocate a temporary PC register. */
    5936     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5937 
    5938     /* Perform the addition. */
    5939     off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
    5940 
    5941     if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
    5942     {
    5943         /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    5944         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    5945     }
    5946     else
    5947     {
    5948         /* Just truncate the result to 16-bit IP. */
    5949         Assert(enmEffOpSize == IEMMODE_16BIT);
    5950         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    5951     }
    5952     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    5953 
    5954     /* Free but don't flush the PC register. */
    5955     iemNativeRegFreeTmp(pReNative, idxPcReg);
    5956 
    5957     return off;
    5958 }
    5959 
    5960 
    5961 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize) \
    5962     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
    5963                                                             (a_enmEffOpSize), pCallEntry->idxInstr)
    5964 
    5965 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize) \
    5966     IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize); \
    5967     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5968 
    5969 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
    5970     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
    5971                                                             IEMMODE_16BIT, pCallEntry->idxInstr)
    5972 
    5973 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
    5974     IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
    5975     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5976 
    5977 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
    5978     off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
    5979                                                             IEMMODE_32BIT, pCallEntry->idxInstr)
    5980 
    5981 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
    5982     IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
    5983     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    5984 
    5985 /** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
    5986  *  iemRegEip32RelativeJumpS16AndFinishNoFlags and
    5987  *  iemRegEip32RelativeJumpS32AndFinishNoFlags. */
    5988 DECL_INLINE_THROW(uint32_t)
    5989 iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
    5990                                                   int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
    5991 {
    5992     Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
    5993 
    5994     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    5995     off = iemNativeRegFlushPendingWrites(pReNative, off);
    5996 
    5997     /* Allocate a temporary PC register. */
    5998     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    5999 
    6000     /* Perform the addition. */
    6001     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    6002 
    6003     /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
    6004     if (enmEffOpSize == IEMMODE_16BIT)
    6005         off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    6006 
    6007     /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
    6008     off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
    6009 
    6010     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6011 
    6012     /* Free but don't flush the PC register. */
    6013     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6014 
    6015     return off;
    6016 }
    6017 
    6018 
    6019 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr) \
    6020     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr)
    6021 
    6022 #define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr) \
    6023     IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr); \
    6024     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6025 
    6026 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
    6027     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr)
    6028 
    6029 #define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
    6030     IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
    6031     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6032 
    6033 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr) \
    6034     off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr)
    6035 
    6036 #define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr) \
    6037     IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr); \
    6038     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6039 
    6040 /** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
    6041 DECL_INLINE_THROW(uint32_t)
    6042 iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6043                                                  uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
    6044 {
    6045     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    6046     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6047 
    6048     /* Allocate a temporary PC register. */
    6049     uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
    6050 
    6051     /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
    6052     off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
    6053     off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
    6054     off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
    6055     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6056 
    6057     /* Free but don't flush the PC register. */
    6058     iemNativeRegFreeTmp(pReNative, idxPcReg);
    6059 
    6060     return off;
    6061 }
    6062 
    6063 
    6064 
    6065 /*********************************************************************************************************************************
    6066 *   Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH).                                    *
    6067 *********************************************************************************************************************************/
    6068 
    6069 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
    6070 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
    6071     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    6072 
    6073 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
    6074 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
    6075     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    6076 
    6077 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
    6078 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
    6079     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
    6080 
    6081 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
    6082  *  clears flags. */
    6083 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
    6084     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
    6085     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6086 
    6087 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
    6088  *  clears flags. */
    6089 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
    6090     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
    6091     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6092 
    6093 /** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
    6094  *  clears flags. */
    6095 #define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
    6096     IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
    6097     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6098 
    6099 #undef IEM_MC_SET_RIP_U16_AND_FINISH
    6100 
    6101 
    6102 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
    6103 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
    6104     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    6105 
    6106 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
    6107 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
    6108     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
    6109 
    6110 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
    6111  *  clears flags. */
    6112 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
    6113     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
    6114     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6115 
    6116 /** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
    6117  *  and clears flags. */
    6118 #define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
    6119     IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
    6120     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6121 
    6122 #undef IEM_MC_SET_RIP_U32_AND_FINISH
    6123 
    6124 
    6125 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
    6126 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
    6127     off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP),  true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
    6128 
    6129 /** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
    6130  *  and clears flags. */
    6131 #define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
    6132     IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
    6133     off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
    6134 
    6135 #undef IEM_MC_SET_RIP_U64_AND_FINISH
    6136 
    6137 
    6138 /** Same as iemRegRipJumpU16AndFinishNoFlags,
    6139  *  iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
    6140 DECL_INLINE_THROW(uint32_t)
    6141 iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
    6142                             uint8_t idxInstr, uint8_t cbVar)
    6143 {
    6144     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
    6145     Assert(pReNative->Core.aVars[idxVarPc].cbVar == cbVar);
    6146 
    6147     /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
    6148     off = iemNativeRegFlushPendingWrites(pReNative, off);
    6149 
    6150     /* Get a register with the new PC loaded from idxVarPc.
    6151        Note! This ASSUMES that the high bits of the GPR is zeroed. */
    6152     uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
    6153 
    6154     /* Check limit (may #GP(0) + exit TB). */
    6155     if (!f64Bit)
    6156         off = iemNativeEmitCheckGpr32AgainstSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, X86_SREG_CS, idxInstr);
    6157     /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
    6158     else if (cbVar > sizeof(uint32_t))
    6159         off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
    6160 
    6161     /* Store the result. */
    6162     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
    6163 
    6164     iemNativeVarRegisterRelease(pReNative, idxVarPc);
    6165     /** @todo implictly free the variable? */
    6166 
    6167     return off;
    6168 }
    6169 
    6170 
    6171 
    6172 /*********************************************************************************************************************************
    6173 *   Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF)                                                         *
    6174 *********************************************************************************************************************************/
    6175 
    6176 /**
    6177  * Pushes an IEM_MC_IF_XXX onto the condition stack.
    6178  *
    6179  * @returns Pointer to the condition stack entry on success, NULL on failure
    6180  *          (too many nestings)
    6181  */
    6182 DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
    6183 {
    6184     uint32_t const idxStack = pReNative->cCondDepth;
    6185     AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
    6186 
    6187     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
    6188     pReNative->cCondDepth = (uint8_t)(idxStack + 1);
    6189 
    6190     uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
    6191     pEntry->fInElse       = false;
    6192     pEntry->idxLabelElse  = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
    6193     pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
    6194 
    6195     return pEntry;
    6196 }
    6197 
    6198 
    6199 /**
    6200  * Start of the if-block, snapshotting the register and variable state.
    6201  */
    6202 DECL_INLINE_THROW(void)
    6203 iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
    6204 {
    6205     Assert(offIfBlock != UINT32_MAX);
    6206     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    6207     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    6208     Assert(!pEntry->fInElse);
    6209 
    6210     /* Define the start of the IF block if request or for disassembly purposes. */
    6211     if (idxLabelIf != UINT32_MAX)
    6212         iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
    6213 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    6214     else
    6215         iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
    6216 #else
    6217     RT_NOREF(offIfBlock);
    6218 #endif
    6219 
    6220     /* Copy the initial state so we can restore it in the 'else' block. */
    6221     pEntry->InitialState = pReNative->Core;
    6222 }
    6223 
    6224 
    6225 #define IEM_MC_ELSE() } while (0); \
    6226         off = iemNativeEmitElse(pReNative, off); \
    6227         do {
    6228 
    6229 /** Emits code related to IEM_MC_ELSE. */
    6230 DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    6231 {
    6232     /* Check sanity and get the conditional stack entry. */
    6233     Assert(off != UINT32_MAX);
    6234     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    6235     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    6236     Assert(!pEntry->fInElse);
    6237 
    6238     /* Jump to the endif */
    6239     off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
    6240 
    6241     /* Define the else label and enter the else part of the condition. */
    6242     iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    6243     pEntry->fInElse = true;
    6244 
    6245     /* Snapshot the core state so we can do a merge at the endif and restore
    6246        the snapshot we took at the start of the if-block. */
    6247     pEntry->IfFinalState = pReNative->Core;
    6248     pReNative->Core = pEntry->InitialState;
    6249 
    6250     return off;
    6251 }
    6252 
    6253 
    6254 #define IEM_MC_ENDIF() } while (0); \
    6255         off = iemNativeEmitEndIf(pReNative, off)
    6256 
    6257 /** Emits code related to IEM_MC_ENDIF. */
    6258 DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    6259 {
    6260     /* Check sanity and get the conditional stack entry. */
    6261     Assert(off != UINT32_MAX);
    6262     Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
    6263     PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
    6264 
    6265     /*
    6266      * Now we have find common group with the core state at the end of the
    6267      * if-final.  Use the smallest common denominator and just drop anything
    6268      * that isn't the same in both states.
    6269      */
    6270     /** @todo We could, maybe, shuffle registers around if we thought it helpful,
    6271      *        which is why we're doing this at the end of the else-block.
    6272      *        But we'd need more info about future for that to be worth the effort. */
    6273     PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
    6274     if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
    6275     {
    6276         /* shadow guest stuff first. */
    6277         uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
    6278         if (fGstRegs)
    6279         {
    6280             Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
    6281             do
    6282             {
    6283                 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
    6284                 fGstRegs &= ~RT_BIT_64(idxGstReg);
    6285 
    6286                 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
    6287                 if (  !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
    6288                     || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
    6289                 {
    6290                     Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
    6291                            g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
    6292                     iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
    6293                 }
    6294             } while (fGstRegs);
    6295         }
    6296         else
    6297             Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
    6298 
    6299         /* Check variables next. For now we must require them to be identical
    6300            or stuff we can recreate. */
    6301         Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
    6302         uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
    6303         if (fVars)
    6304         {
    6305             uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
    6306             do
    6307             {
    6308                 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
    6309                 fVars &= ~RT_BIT_32(idxVar);
    6310 
    6311                 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
    6312                 {
    6313                     if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
    6314                         continue;
    6315                     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    6316                     {
    6317                         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    6318                         if (idxHstReg != UINT8_MAX)
    6319                         {
    6320                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    6321                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    6322                             Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u\n",
    6323                                    g_apszIemNativeHstRegNames[idxHstReg], idxVar));
    6324                         }
    6325                         continue;
    6326                     }
    6327                 }
    6328                 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
    6329                     continue;
    6330 
    6331                 /* Irreconcilable, so drop it. */
    6332                 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    6333                 if (idxHstReg != UINT8_MAX)
    6334                 {
    6335                     pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    6336                     pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    6337                     Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u (also dropped)\n",
    6338                            g_apszIemNativeHstRegNames[idxHstReg], idxVar));
    6339                 }
    6340                 Log11(("iemNativeEmitEndIf: Freeing variable #%u\n", idxVar));
    6341                 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    6342             } while (fVars);
    6343         }
    6344 
    6345         /* Finally, check that the host register allocations matches. */
    6346         AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
    6347                       ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
    6348                        pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
    6349                       IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
    6350     }
    6351 
    6352     /*
    6353      * Define the endif label and maybe the else one if we're still in the 'if' part.
    6354      */
    6355     if (!pEntry->fInElse)
    6356         iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
    6357     else
    6358         Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
    6359     iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
    6360 
    6361     /* Pop the conditional stack.*/
    6362     pReNative->cCondDepth -= 1;
    6363 
    6364     return off;
    6365 }
    6366 
    6367 
    6368 #define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
    6369         off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
    6370         do {
    6371 
    6372 /** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
    6373 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    6374 {
    6375     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6376 
    6377     /* Get the eflags. */
    6378     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6379                                                               kIemNativeGstRegUse_ReadOnly);
    6380 
    6381     /* Test and jump. */
    6382     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    6383 
    6384     /* Free but don't flush the EFlags register. */
    6385     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6386 
    6387     /* Make a copy of the core state now as we start the if-block. */
    6388     iemNativeCondStartIfBlock(pReNative, off);
    6389 
    6390     return off;
    6391 }
    6392 
    6393 
    6394 #define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
    6395         off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
    6396         do {
    6397 
    6398 /** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
    6399 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
    6400 {
    6401     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6402 
    6403     /* Get the eflags. */
    6404     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6405                                                               kIemNativeGstRegUse_ReadOnly);
    6406 
    6407     /* Test and jump. */
    6408     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
    6409 
    6410     /* Free but don't flush the EFlags register. */
    6411     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6412 
    6413     /* Make a copy of the core state now as we start the if-block. */
    6414     iemNativeCondStartIfBlock(pReNative, off);
    6415 
    6416     return off;
    6417 }
    6418 
    6419 
    6420 #define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
    6421         off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
    6422         do {
    6423 
    6424 /** Emits code for IEM_MC_IF_EFL_BIT_SET. */
    6425 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    6426 {
    6427     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6428 
    6429     /* Get the eflags. */
    6430     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6431                                                               kIemNativeGstRegUse_ReadOnly);
    6432 
    6433     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6434     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6435 
    6436     /* Test and jump. */
    6437     off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    6438 
    6439     /* Free but don't flush the EFlags register. */
    6440     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6441 
    6442     /* Make a copy of the core state now as we start the if-block. */
    6443     iemNativeCondStartIfBlock(pReNative, off);
    6444 
    6445     return off;
    6446 }
    6447 
    6448 
    6449 #define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
    6450         off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
    6451         do {
    6452 
    6453 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
    6454 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
    6455 {
    6456     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6457 
    6458     /* Get the eflags. */
    6459     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6460                                                               kIemNativeGstRegUse_ReadOnly);
    6461 
    6462     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6463     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6464 
    6465     /* Test and jump. */
    6466     off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    6467 
    6468     /* Free but don't flush the EFlags register. */
    6469     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6470 
    6471     /* Make a copy of the core state now as we start the if-block. */
    6472     iemNativeCondStartIfBlock(pReNative, off);
    6473 
    6474     return off;
    6475 }
    6476 
    6477 
    6478 #define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2)         \
    6479     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
    6480     do {
    6481 
    6482 #define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2)         \
    6483     off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
    6484     do {
    6485 
    6486 /** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
    6487 DECL_INLINE_THROW(uint32_t)
    6488 iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6489                                   uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    6490 {
    6491     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6492 
    6493     /* Get the eflags. */
    6494     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6495                                                               kIemNativeGstRegUse_ReadOnly);
    6496 
    6497     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    6498     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    6499 
    6500     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    6501     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    6502     Assert(iBitNo1 != iBitNo2);
    6503 
    6504 #ifdef RT_ARCH_AMD64
    6505     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
    6506 
    6507     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    6508     if (iBitNo1 > iBitNo2)
    6509         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    6510     else
    6511         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    6512     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    6513 
    6514 #elif defined(RT_ARCH_ARM64)
    6515     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    6516     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    6517 
    6518     /* and tmpreg, eflreg, #1<<iBitNo1 */
    6519     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    6520 
    6521     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    6522     if (iBitNo1 > iBitNo2)
    6523         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6524                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    6525     else
    6526         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6527                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    6528 
    6529     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6530 
    6531 #else
    6532 # error "Port me"
    6533 #endif
    6534 
    6535     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    6536     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    6537                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    6538 
    6539     /* Free but don't flush the EFlags and tmp registers. */
    6540     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    6541     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6542 
    6543     /* Make a copy of the core state now as we start the if-block. */
    6544     iemNativeCondStartIfBlock(pReNative, off);
    6545 
    6546     return off;
    6547 }
    6548 
    6549 
    6550 #define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
    6551     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
    6552     do {
    6553 
    6554 #define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
    6555     off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
    6556     do {
    6557 
    6558 /** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
    6559  *  IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
    6560 DECL_INLINE_THROW(uint32_t)
    6561 iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
    6562                                               uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
    6563 {
    6564     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6565 
    6566     /* We need an if-block label for the non-inverted variant. */
    6567     uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
    6568                                                                  pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
    6569 
    6570     /* Get the eflags. */
    6571     uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6572                                                               kIemNativeGstRegUse_ReadOnly);
    6573 
    6574     /* Translate the flag masks to bit numbers. */
    6575     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6576     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6577 
    6578     unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
    6579     Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
    6580     Assert(iBitNo1 != iBitNo);
    6581 
    6582     unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
    6583     Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
    6584     Assert(iBitNo2 != iBitNo);
    6585     Assert(iBitNo2 != iBitNo1);
    6586 
    6587 #ifdef RT_ARCH_AMD64
    6588     uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
    6589 #elif defined(RT_ARCH_ARM64)
    6590     uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
    6591 #endif
    6592 
    6593     /* Check for the lone bit first. */
    6594     if (!fInverted)
    6595         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
    6596     else
    6597         off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
    6598 
    6599     /* Then extract and compare the other two bits. */
    6600 #ifdef RT_ARCH_AMD64
    6601     off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    6602     if (iBitNo1 > iBitNo2)
    6603         off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
    6604     else
    6605         off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
    6606     off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
    6607 
    6608 #elif defined(RT_ARCH_ARM64)
    6609     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    6610 
    6611     /* and tmpreg, eflreg, #1<<iBitNo1 */
    6612     pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
    6613 
    6614     /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
    6615     if (iBitNo1 > iBitNo2)
    6616         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6617                                                 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
    6618     else
    6619         pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
    6620                                                 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
    6621 
    6622     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    6623 
    6624 #else
    6625 # error "Port me"
    6626 #endif
    6627 
    6628     /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
    6629     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
    6630                                                      pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
    6631 
    6632     /* Free but don't flush the EFlags and tmp registers. */
    6633     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    6634     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6635 
    6636     /* Make a copy of the core state now as we start the if-block. */
    6637     iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
    6638 
    6639     return off;
    6640 }
    6641 
    6642 
    6643 #define IEM_MC_IF_CX_IS_NZ() \
    6644     off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
    6645     do {
    6646 
    6647 /** Emits code for IEM_MC_IF_CX_IS_NZ. */
    6648 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
    6649 {
    6650     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6651 
    6652     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6653                                                                  kIemNativeGstRegUse_ReadOnly);
    6654     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
    6655     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6656 
    6657     iemNativeCondStartIfBlock(pReNative, off);
    6658     return off;
    6659 }
    6660 
    6661 
    6662 #define IEM_MC_IF_ECX_IS_NZ() \
    6663     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
    6664     do {
    6665 
    6666 #define IEM_MC_IF_RCX_IS_NZ() \
    6667     off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
    6668     do {
    6669 
    6670 /** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
    6671 DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
    6672 {
    6673     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6674 
    6675     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6676                                                                  kIemNativeGstRegUse_ReadOnly);
    6677     off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
    6678     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6679 
    6680     iemNativeCondStartIfBlock(pReNative, off);
    6681     return off;
    6682 }
    6683 
    6684 
    6685 #define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
    6686     off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
    6687     do {
    6688 
    6689 #define IEM_MC_IF_CX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
    6690     off = iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
    6691     do {
    6692 
    6693 /** Emits code for IEM_MC_IF_CX_IS_NZ. */
    6694 DECL_INLINE_THROW(uint32_t)
    6695 iemNativeEmitIfCxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
    6696 {
    6697     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6698 
    6699     /* We have to load both RCX and EFLAGS before we can start branching,
    6700        otherwise we'll end up in the else-block with an inconsistent
    6701        register allocator state.
    6702        Doing EFLAGS first as it's more likely to be loaded, right? */
    6703     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6704                                                                  kIemNativeGstRegUse_ReadOnly);
    6705     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6706                                                                  kIemNativeGstRegUse_ReadOnly);
    6707 
    6708     /** @todo we could reduce this to a single branch instruction by spending a
    6709      *        temporary register and some setnz stuff.  Not sure if loops are
    6710      *        worth it. */
    6711     /* Check CX. */
    6712     off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
    6713 
    6714     /* Check the EFlags bit. */
    6715     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6716     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6717     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    6718                                                      !fCheckIfSet /*fJmpIfSet*/);
    6719 
    6720     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6721     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6722 
    6723     iemNativeCondStartIfBlock(pReNative, off);
    6724     return off;
    6725 }
    6726 
    6727 
    6728 #define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
    6729     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
    6730     do {
    6731 
    6732 #define IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
    6733     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
    6734     do {
    6735 
    6736 #define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET(a_fBit) \
    6737     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
    6738     do {
    6739 
    6740 #define IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET(a_fBit) \
    6741     off = iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
    6742     do {
    6743 
    6744 /** Emits code for IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_SET,
    6745  *  IEM_MC_IF_ECX_IS_NZ_AND_EFL_BIT_NOT_SET,
    6746  *  IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_SET and
    6747  *  IEM_MC_IF_RCX_IS_NZ_AND_EFL_BIT_NOT_SET. */
    6748 DECL_INLINE_THROW(uint32_t)
    6749 iemNativeEmitIfRcxEcxIsNotZeroAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    6750                                                uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
    6751 {
    6752     PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
    6753 
    6754     /* We have to load both RCX and EFLAGS before we can start branching,
    6755        otherwise we'll end up in the else-block with an inconsistent
    6756        register allocator state.
    6757        Doing EFLAGS first as it's more likely to be loaded, right? */
    6758     uint8_t const idxEflReg    = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
    6759                                                                  kIemNativeGstRegUse_ReadOnly);
    6760     uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
    6761                                                                  kIemNativeGstRegUse_ReadOnly);
    6762 
    6763     /** @todo we could reduce this to a single branch instruction by spending a
    6764      *        temporary register and some setnz stuff.  Not sure if loops are
    6765      *        worth it. */
    6766     /* Check RCX/ECX. */
    6767     off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
    6768 
    6769     /* Check the EFlags bit. */
    6770     unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
    6771     Assert(RT_BIT_32(iBitNo) == fBitInEfl);
    6772     off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
    6773                                                      !fCheckIfSet /*fJmpIfSet*/);
    6774 
    6775     iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
    6776     iemNativeRegFreeTmp(pReNative, idxEflReg);
    6777 
    6778     iemNativeCondStartIfBlock(pReNative, off);
    6779     return off;
    6780 }
    6781 
    6782 
    6783 
    6784 /*********************************************************************************************************************************
    6785 *   Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++                                                            *
    6786 *********************************************************************************************************************************/
    6787 /** Number of hidden arguments for CIMPL calls.
    6788  * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
    6789 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    6790 # define IEM_CIMPL_HIDDEN_ARGS 3
    6791 #else
    6792 # define IEM_CIMPL_HIDDEN_ARGS 2
    6793 #endif
    6794 
    6795 #define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
    6796     uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
    6797 
    6798 #define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
    6799     uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
    6800 
    6801 #define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
    6802     uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
    6803 
    6804 #define IEM_MC_LOCAL(a_Type, a_Name) \
    6805     uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
    6806 
    6807 #define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
    6808     uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
    6809 
    6810 
    6811 /**
    6812  * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
    6813  */
    6814 DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
    6815 {
    6816     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
    6817         return IEM_CIMPL_HIDDEN_ARGS;
    6818     if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
    6819         return 1;
    6820     return 0;
    6821 }
    6822 
    6823 
    6824 /**
    6825  * Internal work that allocates a variable with kind set to
    6826  * kIemNativeVarKind_Invalid and no current stack allocation.
    6827  *
    6828  * The kind will either be set by the caller or later when the variable is first
    6829  * assigned a value.
    6830  */
    6831 static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    6832 {
    6833     Assert(cbType > 0 && cbType <= 64);
    6834     unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
    6835     AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
    6836     pReNative->Core.bmVars |= RT_BIT_32(idxVar);
    6837     pReNative->Core.aVars[idxVar].enmKind        = kIemNativeVarKind_Invalid;
    6838     pReNative->Core.aVars[idxVar].cbVar          = cbType;
    6839     pReNative->Core.aVars[idxVar].idxStackSlot   = UINT8_MAX;
    6840     pReNative->Core.aVars[idxVar].idxReg         = UINT8_MAX;
    6841     pReNative->Core.aVars[idxVar].uArgNo         = UINT8_MAX;
    6842     pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
    6843     pReNative->Core.aVars[idxVar].enmGstReg      = kIemNativeGstReg_End;
    6844     pReNative->Core.aVars[idxVar].fRegAcquired   = false;
    6845     pReNative->Core.aVars[idxVar].u.uValue       = 0;
    6846     return idxVar;
    6847 }
    6848 
    6849 
    6850 /**
    6851  * Internal work that allocates an argument variable w/o setting enmKind.
    6852  */
    6853 static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    6854 {
    6855     iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
    6856     AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    6857     AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
    6858 
    6859     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    6860     pReNative->Core.aidxArgVars[iArgNo]  = idxVar;
    6861     pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
    6862     return idxVar;
    6863 }
    6864 
    6865 
    6866 /**
    6867  * Gets the stack slot for a stack variable, allocating one if necessary.
    6868  *
    6869  * Calling this function implies that the stack slot will contain a valid
    6870  * variable value.  The caller deals with any register currently assigned to the
    6871  * variable, typically by spilling it into the stack slot.
    6872  *
    6873  * @returns The stack slot number.
    6874  * @param   pReNative   The recompiler state.
    6875  * @param   idxVar      The variable.
    6876  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS
    6877  */
    6878 DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    6879 {
    6880     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6881     Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
    6882 
    6883     /* Already got a slot? */
    6884     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    6885     if (idxStackSlot != UINT8_MAX)
    6886     {
    6887         Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
    6888         return idxStackSlot;
    6889     }
    6890 
    6891     /*
    6892      * A single slot is easy to allocate.
    6893      * Allocate them from the top end, closest to BP, to reduce the displacement.
    6894      */
    6895     if (pReNative->Core.aVars[idxVar].cbVar <= sizeof(uint64_t))
    6896     {
    6897         unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    6898         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    6899         pReNative->Core.bmStack |= RT_BIT_32(iSlot);
    6900         pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
    6901         Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x\n", idxVar, iSlot));
    6902         return (uint8_t)iSlot;
    6903     }
    6904 
    6905     /*
    6906      * We need more than one stack slot.
    6907      *
    6908      * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
    6909      */
    6910     AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
    6911     Assert(pReNative->Core.aVars[idxVar].cbVar <= 64);
    6912     uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pReNative->Core.aVars[idxVar].cbVar) - 4) - 1;
    6913     uint32_t       fBitAllocMask = RT_BIT_32((pReNative->Core.aVars[idxVar].cbVar + 7) >> 3) - 1;
    6914     uint32_t       bmStack       = ~pReNative->Core.bmStack;
    6915     while (bmStack != UINT32_MAX)
    6916     {
    6917 /** @todo allocate from the top to reduce BP displacement. */
    6918         unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
    6919         AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    6920         if (!(iSlot & fBitAlignMask))
    6921         {
    6922             if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
    6923             {
    6924                 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
    6925                 pReNative->Core.aVars[idxVar].idxStackSlot = (uint8_t)iSlot;
    6926                 Log11(("iemNativeVarSetKindToStack: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n",
    6927                        idxVar, iSlot, fBitAllocMask, pReNative->Core.aVars[idxVar].cbVar));
    6928                 return (uint8_t)iSlot;
    6929             }
    6930         }
    6931         bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
    6932     }
    6933     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    6934 }
    6935 
    6936 
    6937 /**
    6938  * Changes the variable to a stack variable.
    6939  *
    6940  * Currently this is s only possible to do the first time the variable is used,
    6941  * switching later is can be implemented but not done.
    6942  *
    6943  * @param   pReNative   The recompiler state.
    6944  * @param   idxVar      The variable.
    6945  * @throws  VERR_IEM_VAR_IPE_2
    6946  */
    6947 static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    6948 {
    6949     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6950     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
    6951     {
    6952         /* We could in theory transition from immediate to stack as well, but it
    6953            would involve the caller doing work storing the value on the stack. So,
    6954            till that's required we only allow transition from invalid. */
    6955         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    6956                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6957         AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6958         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Stack;
    6959 
    6960         /* Note! We don't allocate a stack slot here, that's only done when a
    6961                  slot is actually needed to hold a variable value. */
    6962     }
    6963 }
    6964 
    6965 
    6966 /**
    6967  * Sets it to a variable with a constant value.
    6968  *
    6969  * This does not require stack storage as we know the value and can always
    6970  * reload it, unless of course it's referenced.
    6971  *
    6972  * @param   pReNative   The recompiler state.
    6973  * @param   idxVar      The variable.
    6974  * @param   uValue      The immediate value.
    6975  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    6976  */
    6977 static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
    6978 {
    6979     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    6980     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Immediate)
    6981     {
    6982         /* Only simple transitions for now. */
    6983         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    6984                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6985         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Immediate;
    6986     }
    6987     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    6988 
    6989     pReNative->Core.aVars[idxVar].u.uValue = uValue;
    6990     AssertMsg(   pReNative->Core.aVars[idxVar].cbVar >= sizeof(uint64_t)
    6991               || pReNative->Core.aVars[idxVar].u.uValue < RT_BIT_64(pReNative->Core.aVars[idxVar].cbVar * 8),
    6992               ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pReNative->Core.aVars[idxVar].cbVar, uValue));
    6993 }
    6994 
    6995 
    6996 /**
    6997  * Sets the variable to a reference (pointer) to @a idxOtherVar.
    6998  *
    6999  * This does not require stack storage as we know the value and can always
    7000  * reload it.  Loading is postponed till needed.
    7001  *
    7002  * @param   pReNative   The recompiler state.
    7003  * @param   idxVar      The variable.
    7004  * @param   idxOtherVar The variable to take the (stack) address of.
    7005  *
    7006  * @throws  VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
    7007  */
    7008 static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
    7009 {
    7010     Assert(idxVar      < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
    7011     Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
    7012 
    7013     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
    7014     {
    7015         /* Only simple transitions for now. */
    7016         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    7017                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7018         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
    7019     }
    7020     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7021 
    7022     pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar;
    7023 
    7024     /* Update the other variable, ensure it's a stack variable. */
    7025     /** @todo handle variables with const values... that'll go boom now. */
    7026     pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
    7027     iemNativeVarSetKindToStack(pReNative, idxOtherVar);
    7028 }
    7029 
    7030 
    7031 /**
    7032  * Sets the variable to a reference (pointer) to a guest register reference.
    7033  *
    7034  * This does not require stack storage as we know the value and can always
    7035  * reload it.  Loading is postponed till needed.
    7036  *
    7037  * @param   pReNative       The recompiler state.
    7038  * @param   idxVar          The variable.
    7039  * @param   enmRegClass     The class guest registers to reference.
    7040  * @param   idxReg          The register within @a enmRegClass to reference.
    7041  *
    7042  * @throws  VERR_IEM_VAR_IPE_2
    7043  */
    7044 static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    7045                                            IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
    7046 {
    7047     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
    7048 
    7049     if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_GstRegRef)
    7050     {
    7051         /* Only simple transitions for now. */
    7052         AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
    7053                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7054         pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_GstRegRef;
    7055     }
    7056     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
    7057 
    7058     pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass = enmRegClass;
    7059     pReNative->Core.aVars[idxVar].u.GstRegRef.idx      = idxReg;
    7060 }
    7061 
    7062 
    7063 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
    7064 {
    7065     return iemNativeArgAllocInt(pReNative, iArgNo, cbType);
    7066 }
    7067 
    7068 
    7069 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
    7070 {
    7071     uint8_t const idxVar = iemNativeArgAllocInt(pReNative, iArgNo, cbType);
    7072 
    7073     /* Since we're using a generic uint64_t value type, we must truncate it if
    7074        the variable is smaller otherwise we may end up with too large value when
    7075        scaling up a imm8 w/ sign-extension.
    7076 
    7077        This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
    7078        in the bios, bx=1) when running on arm, because clang expect 16-bit
    7079        register parameters to have bits 16 and up set to zero.  Instead of
    7080        setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
    7081        CF value in the result.  */
    7082     switch (cbType)
    7083     {
    7084         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    7085         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    7086         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    7087     }
    7088     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    7089     return idxVar;
    7090 }
    7091 
    7092 
    7093 DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
    7094 {
    7095     AssertStmt(   idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
    7096                && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
    7097                && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
    7098                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
    7099 
    7100     uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
    7101     iemNativeVarSetKindToLocalRef(pReNative, idxArgVar, idxOtherVar);
    7102     return idxArgVar;
    7103 }
    7104 
    7105 
    7106 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
    7107 {
    7108     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    7109     /* Don't set to stack now, leave that to the first use as for instance
    7110        IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
    7111     return idxVar;
    7112 }
    7113 
    7114 
    7115 DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
    7116 {
    7117     uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
    7118 
    7119     /* Since we're using a generic uint64_t value type, we must truncate it if
    7120        the variable is smaller otherwise we may end up with too large value when
    7121        scaling up a imm8 w/ sign-extension. */
    7122     switch (cbType)
    7123     {
    7124         case sizeof(uint8_t):   uValue &= UINT64_C(0xff); break;
    7125         case sizeof(uint16_t):  uValue &= UINT64_C(0xffff); break;
    7126         case sizeof(uint32_t):  uValue &= UINT64_C(0xffffffff); break;
    7127     }
    7128     iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
    7129     return idxVar;
    7130 }
    7131 
    7132 
    7133 /**
    7134  * Releases the variable's register.
    7135  *
    7136  * The register must have been previously acquired calling
    7137  * iemNativeVarRegisterAcquire(), iemNativeVarRegisterAcquireForGuestReg() or
    7138  * iemNativeVarRegisterSetAndAcquire().
    7139  */
    7140 DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7141 {
    7142     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7143     Assert(pReNative->Core.aVars[idxVar].fRegAcquired);
    7144     pReNative->Core.aVars[idxVar].fRegAcquired = false;
    7145 }
    7146 
    7147 
    7148 /**
    7149  * Makes sure variable @a idxVar has a register assigned to it and that it stays
    7150  * fixed till we call iemNativeVarRegisterRelease.
    7151  *
    7152  * @returns The host register number.
    7153  * @param   pReNative   The recompiler state.
    7154  * @param   idxVar      The variable.
    7155  * @param   poff        Pointer to the instruction buffer offset.
    7156  *                      In case a register needs to be freed up or the value
    7157  *                      loaded off the stack.
    7158  * @param  fInitialized Set if the variable must already have been initialized.
    7159  *                      Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
    7160  *                      the case.
    7161  * @param  idxRegPref   Preferred register number or UINT8_MAX.
    7162  */
    7163 DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
    7164                                                        bool fInitialized = false, uint8_t idxRegPref = UINT8_MAX)
    7165 {
    7166     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7167     Assert(pReNative->Core.aVars[idxVar].cbVar <= 8);
    7168     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    7169 
    7170     uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
    7171     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7172     {
    7173         Assert(   pReNative->Core.aVars[idxVar].enmKind > kIemNativeVarKind_Invalid
    7174                && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
    7175         pReNative->Core.aVars[idxVar].fRegAcquired = true;
    7176         return idxReg;
    7177     }
    7178 
    7179     /*
    7180      * If the kind of variable has not yet been set, default to 'stack'.
    7181      */
    7182     Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid
    7183            && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End);
    7184     if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid)
    7185         iemNativeVarSetKindToStack(pReNative, idxVar);
    7186 
    7187     /*
    7188      * We have to allocate a register for the variable, even if its a stack one
    7189      * as we don't know if there are modification being made to it before its
    7190      * finalized (todo: analyze and insert hints about that?).
    7191      *
    7192      * If we can, we try get the correct register for argument variables. This
    7193      * is assuming that most argument variables are fetched as close as possible
    7194      * to the actual call, so that there aren't any interfering hidden calls
    7195      * (memory accesses, etc) inbetween.
    7196      *
    7197      * If we cannot or it's a variable, we make sure no argument registers
    7198      * that will be used by this MC block will be allocated here, and we always
    7199      * prefer non-volatile registers to avoid needing to spill stuff for internal
    7200      * call.
    7201      */
    7202     /** @todo Detect too early argument value fetches and warn about hidden
    7203      * calls causing less optimal code to be generated in the python script. */
    7204 
    7205     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    7206     if (   uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
    7207         && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
    7208     {
    7209         idxReg = g_aidxIemNativeCallRegs[uArgNo];
    7210         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    7211         Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
    7212     }
    7213     else if (   idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
    7214              || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
    7215     {
    7216         uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
    7217         uint32_t const fRegs        = ~pReNative->Core.bmHstRegs
    7218                                     & ~pReNative->Core.bmHstRegsWithGstShadow
    7219                                     & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
    7220                                     & fNotArgsMask;
    7221         if (fRegs)
    7222         {
    7223             /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
    7224             idxReg = (uint8_t)ASMBitLastSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
    7225                                                ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
    7226             Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
    7227             Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
    7228             Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    7229         }
    7230         else
    7231         {
    7232             idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
    7233                                                IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
    7234             AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
    7235             Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
    7236         }
    7237     }
    7238     else
    7239     {
    7240         idxReg = idxRegPref;
    7241         iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
    7242         Log11(("iemNativeVarRegisterAcquire: idxVar=%u idxReg=%u (preferred)\n", idxVar, idxReg));
    7243     }
    7244     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    7245     pReNative->Core.aVars[idxVar].idxReg = idxReg;
    7246 
    7247     /*
    7248      * Load it off the stack if we've got a stack slot.
    7249      */
    7250     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    7251     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    7252     {
    7253         Assert(fInitialized);
    7254         int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
    7255         switch (pReNative->Core.aVars[idxVar].cbVar)
    7256         {
    7257             case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
    7258             case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
    7259             case 3: AssertFailed(); RT_FALL_THRU();
    7260             case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
    7261             default: AssertFailed(); RT_FALL_THRU();
    7262             case 8: *poff = iemNativeEmitLoadGprByBp(   pReNative, *poff, idxReg, offDispBp); break;
    7263         }
    7264     }
    7265     else
    7266     {
    7267         Assert(idxStackSlot == UINT8_MAX);
    7268         AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    7269     }
    7270     pReNative->Core.aVars[idxVar].fRegAcquired = true;
    7271     return idxReg;
    7272 }
    7273 
    7274 
    7275 /**
    7276  * The value of variable @a idxVar will be written in full to the @a enmGstReg
    7277  * guest register.
    7278  *
    7279  * This function makes sure there is a register for it and sets it to be the
    7280  * current shadow copy of @a enmGstReg.
    7281  *
    7282  * @returns The host register number.
    7283  * @param   pReNative   The recompiler state.
    7284  * @param   idxVar      The variable.
    7285  * @param   enmGstReg   The guest register this variable will be written to
    7286  *                      after this call.
    7287  * @param   poff        Pointer to the instruction buffer offset.
    7288  *                      In case a register needs to be freed up or if the
    7289  *                      variable content needs to be loaded off the stack.
    7290  *
    7291  * @note    We DO NOT expect @a idxVar to be an argument variable,
    7292  *          because we can only in the commit stage of an instruction when this
    7293  *          function is used.
    7294  */
    7295 DECL_HIDDEN_THROW(uint8_t)
    7296 iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
    7297 {
    7298     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7299     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    7300     AssertMsgStmt(   pReNative->Core.aVars[idxVar].cbVar <= 8
    7301                   && (   pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate
    7302                       || pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack),
    7303                   ("idxVar=%d cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pReNative->Core.aVars[idxVar].cbVar,
    7304                    pReNative->Core.aVars[idxVar].enmKind, g_aGstShadowInfo[enmGstReg].pszName),
    7305                   IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    7306 
    7307     /*
    7308      * This shouldn't ever be used for arguments, unless it's in a weird else
    7309      * branch that doesn't do any calling and even then it's questionable.
    7310      *
    7311      * However, in case someone writes crazy wrong MC code and does register
    7312      * updates before making calls, just use the regular register allocator to
    7313      * ensure we get a register suitable for the intended argument number.
    7314      */
    7315     AssertStmt(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
    7316 
    7317     /*
    7318      * If there is already a register for the variable, we transfer/set the
    7319      * guest shadow copy assignment to it.
    7320      */
    7321     uint8_t idxReg = pReNative->Core.aVars[idxVar].idxReg;
    7322     if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7323     {
    7324         if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
    7325         {
    7326             uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
    7327             iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
    7328             Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
    7329                    g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
    7330         }
    7331         else
    7332         {
    7333             iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
    7334             Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
    7335                    g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
    7336         }
    7337         /** @todo figure this one out. We need some way of making sure the register isn't
    7338          * modified after this point, just in case we start writing crappy MC code. */
    7339         pReNative->Core.aVars[idxVar].enmGstReg    = enmGstReg;
    7340         pReNative->Core.aVars[idxVar].fRegAcquired = true;
    7341         return idxReg;
    7342     }
    7343     Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
    7344 
    7345     /*
    7346      * Because this is supposed to be the commit stage, we're just tag along with the
    7347      * temporary register allocator and upgrade it to a variable register.
    7348      */
    7349     idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
    7350     Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
    7351     Assert(pReNative->Core.aHstRegs[idxReg].idxVar  == UINT8_MAX);
    7352     pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
    7353     pReNative->Core.aHstRegs[idxReg].idxVar  = idxVar;
    7354     pReNative->Core.aVars[idxVar].idxReg     = idxReg;
    7355 
    7356     /*
    7357      * Now we need to load the register value.
    7358      */
    7359     if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Immediate)
    7360         *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pReNative->Core.aVars[idxVar].u.uValue);
    7361     else
    7362     {
    7363         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    7364         int32_t const offDispBp    = iemNativeStackCalcBpDisp(idxStackSlot);
    7365         switch (pReNative->Core.aVars[idxVar].cbVar)
    7366         {
    7367             case sizeof(uint64_t):
    7368                 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
    7369                 break;
    7370             case sizeof(uint32_t):
    7371                 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
    7372                 break;
    7373             case sizeof(uint16_t):
    7374                 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
    7375                 break;
    7376             case sizeof(uint8_t):
    7377                 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
    7378                 break;
    7379             default:
    7380                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
    7381         }
    7382     }
    7383 
    7384     pReNative->Core.aVars[idxVar].fRegAcquired = true;
    7385     return idxReg;
    7386 }
    7387 
    7388 
    7389 /**
    7390  * Sets the host register for @a idxVarRc to @a idxReg.
    7391  *
    7392  * The register must not be allocated. Any guest register shadowing will be
    7393  * implictly dropped by this call.
    7394  *
    7395  * The variable must not have any register associated with it (causes
    7396  * VERR_IEM_VAR_IPE_10 to be raised).  Conversion to a stack variable is
    7397  * implied.
    7398  *
    7399  * @returns idxReg
    7400  * @param   pReNative   The recompiler state.
    7401  * @param   idxVar      The variable.
    7402  * @param   idxReg      The host register (typically IEMNATIVE_CALL_RET_GREG).
    7403  * @param   off         For recording in debug info.
    7404  *
    7405  * @throws  VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
    7406  */
    7407 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
    7408 {
    7409     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7410     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    7411     Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
    7412     AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
    7413     AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
    7414 
    7415     iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
    7416     iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
    7417 
    7418     iemNativeVarSetKindToStack(pReNative, idxVar);
    7419     pReNative->Core.aVars[idxVar].idxReg = idxReg;
    7420 
    7421     return idxReg;
    7422 }
    7423 
    7424 
    7425 /**
    7426  * A convenient helper function.
    7427  */
    7428 DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
    7429                                                              uint8_t idxReg, uint32_t *poff)
    7430 {
    7431     idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
    7432     pReNative->Core.aVars[idxVar].fRegAcquired = true;
    7433     return idxReg;
    7434 }
    7435 
    7436 
    7437 /**
    7438  * Emit code to save volatile registers prior to a call to a helper (TLB miss).
    7439  *
    7440  * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
    7441  * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
    7442  * requirement of flushing anything in volatile host registers when making a
    7443  * call.
    7444  *
    7445  * @returns New @a off value.
    7446  * @param   pReNative           The recompiler state.
    7447  * @param   off                 The code buffer position.
    7448  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    7449  */
    7450 DECL_HIDDEN_THROW(uint32_t)
    7451 iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    7452 {
    7453     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    7454     if (fHstRegs)
    7455     {
    7456         do
    7457         {
    7458             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    7459             fHstRegs &= ~RT_BIT_32(idxHstReg);
    7460 
    7461             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    7462             {
    7463                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    7464                 AssertStmt(   idxVar < RT_ELEMENTS(pReNative->Core.aVars)
    7465                            && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
    7466                            && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
    7467                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    7468                 switch (pReNative->Core.aVars[idxVar].enmKind)
    7469                 {
    7470                     case kIemNativeVarKind_Stack:
    7471                     {
    7472                         /* Temporarily spill the variable register. */
    7473                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    7474                         Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%d/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    7475                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    7476                         off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    7477                         continue;
    7478                     }
    7479 
    7480                     case kIemNativeVarKind_Immediate:
    7481                     case kIemNativeVarKind_VarRef:
    7482                     case kIemNativeVarKind_GstRegRef:
    7483                         /* It is weird to have any of these loaded at this point. */
    7484                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    7485                         continue;
    7486 
    7487                     case kIemNativeVarKind_End:
    7488                     case kIemNativeVarKind_Invalid:
    7489                         break;
    7490                 }
    7491                 AssertFailed();
    7492             }
    7493             else
    7494             {
    7495                 /*
    7496                  * Allocate a temporary stack slot and spill the register to it.
    7497                  */
    7498                 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
    7499                 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
    7500                            IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
    7501                 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
    7502                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
    7503                 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
    7504                        idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    7505                 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
    7506             }
    7507         } while (fHstRegs);
    7508     }
    7509     return off;
    7510 }
    7511 
    7512 
    7513 /**
    7514  * Emit code to restore volatile registers after to a call to a helper.
    7515  *
    7516  * @returns New @a off value.
    7517  * @param   pReNative           The recompiler state.
    7518  * @param   off                 The code buffer position.
    7519  * @param   fHstRegsNotToSave   Set of registers not to save & restore.
    7520  * @see     iemNativeVarSaveVolatileRegsPreHlpCall(),
    7521  *          iemNativeRegRestoreGuestShadowsInVolatileRegs()
    7522  */
    7523 DECL_HIDDEN_THROW(uint32_t)
    7524 iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
    7525 {
    7526     uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
    7527     if (fHstRegs)
    7528     {
    7529         do
    7530         {
    7531             unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
    7532             fHstRegs &= ~RT_BIT_32(idxHstReg);
    7533 
    7534             if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
    7535             {
    7536                 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
    7537                 AssertStmt(   idxVar < RT_ELEMENTS(pReNative->Core.aVars)
    7538                            && (pReNative->Core.bmVars & RT_BIT_32(idxVar))
    7539                            && pReNative->Core.aVars[idxVar].idxReg == idxHstReg,
    7540                            IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_12));
    7541                 switch (pReNative->Core.aVars[idxVar].enmKind)
    7542                 {
    7543                     case kIemNativeVarKind_Stack:
    7544                     {
    7545                         /* Unspill the variable register. */
    7546                         uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    7547                         Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%d/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
    7548                                idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    7549                         off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    7550                         continue;
    7551                     }
    7552 
    7553                     case kIemNativeVarKind_Immediate:
    7554                     case kIemNativeVarKind_VarRef:
    7555                     case kIemNativeVarKind_GstRegRef:
    7556                         /* It is weird to have any of these loaded at this point. */
    7557                         AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative,  VERR_IEM_VAR_IPE_13));
    7558                         continue;
    7559 
    7560                     case kIemNativeVarKind_End:
    7561                     case kIemNativeVarKind_Invalid:
    7562                         break;
    7563                 }
    7564                 AssertFailed();
    7565             }
    7566             else
    7567             {
    7568                 /*
    7569                  * Restore from temporary stack slot.
    7570                  */
    7571                 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
    7572                 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
    7573                 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
    7574                 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
    7575 
    7576                 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
    7577             }
    7578         } while (fHstRegs);
    7579     }
    7580     return off;
    7581 }
    7582 
    7583 
    7584 /**
    7585  * Worker that frees the stack slots for variable @a idxVar if any allocated.
    7586  *
    7587  * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
    7588  */
    7589 DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7590 {
    7591     uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    7592     if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
    7593     {
    7594         uint8_t const  cbVar      = pReNative->Core.aVars[idxVar].cbVar;
    7595         uint8_t const  cSlots     = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
    7596         uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
    7597         Assert(cSlots > 0);
    7598         Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
    7599         Log11(("iemNativeVarFreeStackSlots: idxVar=%d iSlot=%#x/%#x (cbVar=%#x)\n", idxVar, idxStackSlot, fAllocMask, cbVar));
    7600         pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
    7601         pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
    7602     }
    7603     else
    7604         Assert(idxStackSlot == UINT8_MAX);
    7605 }
    7606 
    7607 
    7608 /**
    7609  * Worker that frees a single variable.
    7610  *
    7611  * ASSUMES that @a idxVar is valid.
    7612  */
    7613 DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7614 {
    7615     Assert(   pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid  /* Including invalid as we may have unused */
    7616            && pReNative->Core.aVars[idxVar].enmKind <  kIemNativeVarKind_End);    /* variables in conditional branches. */
    7617     Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
    7618 
    7619     /* Free the host register first if any assigned. */
    7620     uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    7621     if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7622     {
    7623         Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
    7624         pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    7625         pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    7626     }
    7627 
    7628     /* Free argument mapping. */
    7629     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    7630     if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
    7631         pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
    7632 
    7633     /* Free the stack slots. */
    7634     iemNativeVarFreeStackSlots(pReNative, idxVar);
    7635 
    7636     /* Free the actual variable. */
    7637     pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
    7638     pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
    7639 }
    7640 
    7641 
    7642 /**
    7643  * Worker for iemNativeVarFreeAll that's called when there is anything to do.
    7644  */
    7645 DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
    7646 {
    7647     while (bmVars != 0)
    7648     {
    7649         uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    7650         bmVars &= ~RT_BIT_32(idxVar);
    7651 
    7652 #if 1 /** @todo optimize by simplifying this later... */
    7653         iemNativeVarFreeOneWorker(pReNative, idxVar);
    7654 #else
    7655         /* Only need to free the host register, the rest is done as bulk updates below. */
    7656         uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
    7657         if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7658         {
    7659             Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == idxVar);
    7660             pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
    7661             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
    7662         }
    7663 #endif
    7664     }
    7665 #if 0 /** @todo optimize by simplifying this later... */
    7666     pReNative->Core.bmVars     = 0;
    7667     pReNative->Core.bmStack    = 0;
    7668     pReNative->Core.u64ArgVars = UINT64_MAX;
    7669 #endif
    7670 }
    7671 
    7672 
    7673 /**
    7674  * This is called by IEM_MC_END() to clean up all variables.
    7675  */
    7676 DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
    7677 {
    7678     uint32_t const bmVars = pReNative->Core.bmVars;
    7679     if (bmVars != 0)
    7680         iemNativeVarFreeAllSlow(pReNative, bmVars);
    7681     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    7682     Assert(pReNative->Core.bmStack    == 0);
    7683 }
    7684 
    7685 
    7686 #define IEM_MC_FREE_LOCAL(a_Name)   iemNativeVarFreeLocal(pReNative, a_Name)
    7687 
    7688 /**
    7689  * This is called by IEM_MC_FREE_LOCAL.
    7690  */
    7691 DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7692 {
    7693     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7694     Assert(pReNative->Core.aVars[idxVar].uArgNo == UINT8_MAX);
    7695     iemNativeVarFreeOneWorker(pReNative, idxVar);
    7696 }
    7697 
    7698 
    7699 #define IEM_MC_FREE_ARG(a_Name)     iemNativeVarFreeArg(pReNative, a_Name)
    7700 
    7701 /**
    7702  * This is called by IEM_MC_FREE_ARG.
    7703  */
    7704 DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
    7705 {
    7706     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
    7707     Assert(pReNative->Core.aVars[idxVar].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
    7708     iemNativeVarFreeOneWorker(pReNative, idxVar);
    7709 }
    7710 
    7711 
    7712 #define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
    7713 
    7714 /**
    7715  * This is called by IEM_MC_ASSIGN_TO_SMALLER.
    7716  */
    7717 DECL_INLINE_THROW(uint32_t)
    7718 iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
    7719 {
    7720     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
    7721     AssertStmt(pReNative->Core.aVars[idxVarDst].enmKind == kIemNativeVarKind_Invalid,
    7722                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    7723     Assert(   pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint16_t)
    7724            || pReNative->Core.aVars[idxVarDst].cbVar == sizeof(uint32_t));
    7725 
    7726     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
    7727     AssertStmt(   pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Stack
    7728                || pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate,
    7729                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    7730 
    7731     Assert(pReNative->Core.aVars[idxVarDst].cbVar < pReNative->Core.aVars[idxVarSrc].cbVar);
    7732 
    7733     /*
    7734      * Special case for immediates.
    7735      */
    7736     if (pReNative->Core.aVars[idxVarSrc].enmKind == kIemNativeVarKind_Immediate)
    7737     {
    7738         switch (pReNative->Core.aVars[idxVarDst].cbVar)
    7739         {
    7740             case sizeof(uint16_t):
    7741                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
    7742                 break;
    7743             case sizeof(uint32_t):
    7744                 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pReNative->Core.aVars[idxVarSrc].u.uValue);
    7745                 break;
    7746             default: AssertFailed(); break;
    7747         }
    7748     }
    7749     else
    7750     {
    7751         /*
    7752          * The generic solution for now.
    7753          */
    7754         /** @todo optimize this by having the python script make sure the source
    7755          *        variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
    7756          *        statement.   Then we could just transfer the register assignments. */
    7757         uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
    7758         uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
    7759         switch (pReNative->Core.aVars[idxVarDst].cbVar)
    7760         {
    7761             case sizeof(uint16_t):
    7762                 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
    7763                 break;
    7764             case sizeof(uint32_t):
    7765                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
    7766                 break;
    7767             default: AssertFailed(); break;
    7768         }
    7769         iemNativeVarRegisterRelease(pReNative, idxVarSrc);
    7770         iemNativeVarRegisterRelease(pReNative, idxVarDst);
    7771     }
    7772     return off;
    7773 }
    7774 
    7775 
    7776 
    7777 /*********************************************************************************************************************************
    7778 *   Emitters for IEM_MC_CALL_CIMPL_XXX                                                                                           *
    7779 *********************************************************************************************************************************/
    7780 
    7781 /**
    7782  * Emits code to load a reference to the given guest register into @a idxGprDst.
    7783   */
    7784 DECL_INLINE_THROW(uint32_t)
    7785 iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
    7786                                IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
    7787 {
    7788     /*
    7789      * Get the offset relative to the CPUMCTX structure.
    7790      */
    7791     uint32_t offCpumCtx;
    7792     switch (enmClass)
    7793     {
    7794         case kIemNativeGstRegRef_Gpr:
    7795             Assert(idxRegInClass < 16);
    7796             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
    7797             break;
    7798 
    7799         case kIemNativeGstRegRef_GprHighByte:    /**< AH, CH, DH, BH*/
    7800             Assert(idxRegInClass < 4);
    7801             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
    7802             break;
    7803 
    7804         case kIemNativeGstRegRef_EFlags:
    7805             Assert(idxRegInClass == 0);
    7806             offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
    7807             break;
    7808 
    7809         case kIemNativeGstRegRef_MxCsr:
    7810             Assert(idxRegInClass == 0);
    7811             offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
    7812             break;
    7813 
    7814         case kIemNativeGstRegRef_FpuReg:
    7815             Assert(idxRegInClass < 8);
    7816             AssertFailed(); /** @todo what kind of indexing? */
    7817             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    7818             break;
    7819 
    7820         case kIemNativeGstRegRef_MReg:
    7821             Assert(idxRegInClass < 8);
    7822             AssertFailed(); /** @todo what kind of indexing? */
    7823             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
    7824             break;
    7825 
    7826         case kIemNativeGstRegRef_XReg:
    7827             Assert(idxRegInClass < 16);
    7828             offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
    7829             break;
    7830 
    7831         default:
    7832             AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
    7833     }
    7834 
    7835     /*
    7836      * Load the value into the destination register.
    7837      */
    7838 #ifdef RT_ARCH_AMD64
    7839     off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
    7840 
    7841 #elif defined(RT_ARCH_ARM64)
    7842     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    7843     Assert(offCpumCtx < 4096);
    7844     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
    7845 
    7846 #else
    7847 # error "Port me!"
    7848 #endif
    7849 
    7850     return off;
    7851 }
    7852 
    7853 
    7854 /**
    7855  * Common code for CIMPL and AIMPL calls.
    7856  *
    7857  * These are calls that uses argument variables and such.  They should not be
    7858  * confused with internal calls required to implement an MC operation,
    7859  * like a TLB load and similar.
    7860  *
    7861  * Upon return all that is left to do is to load any hidden arguments and
    7862  * perform the call. All argument variables are freed.
    7863  *
    7864  * @returns New code buffer offset; throws VBox status code on error.
    7865  * @param   pReNative       The native recompile state.
    7866  * @param   off             The code buffer offset.
    7867  * @param   cArgs           The total nubmer of arguments (includes hidden
    7868  *                          count).
    7869  * @param   cHiddenArgs     The number of hidden arguments.  The hidden
    7870  *                          arguments must not have any variable declared for
    7871  *                          them, whereas all the regular arguments must
    7872  *                          (tstIEMCheckMc ensures this).
    7873  */
    7874 DECL_HIDDEN_THROW(uint32_t)
    7875 iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
    7876 {
    7877 #ifdef VBOX_STRICT
    7878     /*
    7879      * Assert sanity.
    7880      */
    7881     Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
    7882     Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
    7883     for (unsigned i = 0; i < cHiddenArgs; i++)
    7884         Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
    7885     for (unsigned i = cHiddenArgs; i < cArgs; i++)
    7886     {
    7887         Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
    7888         Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
    7889     }
    7890     iemNativeRegAssertSanity(pReNative);
    7891 #endif
    7892 
    7893     /*
    7894      * Before we do anything else, go over variables that are referenced and
    7895      * make sure they are not in a register.
    7896      */
    7897     uint32_t bmVars = pReNative->Core.bmVars;
    7898     if (bmVars)
    7899     {
    7900         do
    7901         {
    7902             uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
    7903             bmVars &= ~RT_BIT_32(idxVar);
    7904 
    7905             if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
    7906             {
    7907                 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
    7908                 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
    7909                 {
    7910                     uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
    7911                     Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
    7912                            idxVar, idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
    7913                            idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
    7914                     off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
    7915 
    7916                     pReNative->Core.aVars[idxVar].idxReg    = UINT8_MAX;
    7917                     pReNative->Core.bmHstRegs              &= ~RT_BIT_32(idxRegOld);
    7918                     pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
    7919                     pReNative->Core.bmGstRegShadows        &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
    7920                     pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
    7921                 }
    7922             }
    7923         } while (bmVars != 0);
    7924 #if 0 //def VBOX_STRICT
    7925         iemNativeRegAssertSanity(pReNative);
    7926 #endif
    7927     }
    7928 
    7929     uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
    7930 
    7931     /*
    7932      * First, go over the host registers that will be used for arguments and make
    7933      * sure they either hold the desired argument or are free.
    7934      */
    7935     if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
    7936     {
    7937         for (uint32_t i = 0; i < cRegArgs; i++)
    7938         {
    7939             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    7940             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    7941             {
    7942                 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
    7943                 {
    7944                     uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
    7945                     Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    7946                     Assert(pReNative->Core.aVars[idxVar].idxReg == idxArgReg);
    7947                     uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
    7948                     if (uArgNo == i)
    7949                     { /* prefect */ }
    7950                     /* The variable allocator logic should make sure this is impossible,
    7951                        except for when the return register is used as a parameter (ARM,
    7952                        but not x86). */
    7953 #if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
    7954                     else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
    7955                     {
    7956 # ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    7957 #  error "Implement this"
    7958 # endif
    7959                         Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
    7960                         uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
    7961                         AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
    7962                                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    7963                         off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
    7964                     }
    7965 #endif
    7966                     else
    7967                     {
    7968                         AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
    7969 
    7970                         if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
    7971                             off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
    7972                         else
    7973                         {
    7974                             /* just free it, can be reloaded if used again */
    7975                             pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    7976                             pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
    7977                             iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
    7978                         }
    7979                     }
    7980                 }
    7981                 else
    7982                     AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
    7983                                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
    7984             }
    7985         }
    7986 #if 0 //def VBOX_STRICT
    7987         iemNativeRegAssertSanity(pReNative);
    7988 #endif
    7989     }
    7990 
    7991     Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
    7992 
    7993 #ifdef IEMNATIVE_FP_OFF_STACK_ARG0
    7994     /*
    7995      * If there are any stack arguments, make sure they are in their place as well.
    7996      *
    7997      * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
    7998      * the caller) be loading it later and it must be free (see first loop).
    7999      */
    8000     if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
    8001     {
    8002         for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
    8003         {
    8004             uint8_t const idxVar    = pReNative->Core.aidxArgVars[i];
    8005             int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
    8006             if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8007             {
    8008                 Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
    8009                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].idxReg);
    8010                 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg);
    8011                 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
    8012             }
    8013             else
    8014             {
    8015                 /* Use ARG0 as temp for stuff we need registers for. */
    8016                 switch (pReNative->Core.aVars[idxVar].enmKind)
    8017                 {
    8018                     case kIemNativeVarKind_Stack:
    8019                     {
    8020                         uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    8021                         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8022                         off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
    8023                                                        iemNativeStackCalcBpDisp(idxStackSlot));
    8024                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    8025                         continue;
    8026                     }
    8027 
    8028                     case kIemNativeVarKind_Immediate:
    8029                         off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pReNative->Core.aVars[idxVar].u.uValue);
    8030                         continue;
    8031 
    8032                     case kIemNativeVarKind_VarRef:
    8033                     {
    8034                         uint8_t const idxOtherVar    = pReNative->Core.aVars[idxVar].u.idxRefVar;
    8035                         Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    8036                         uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
    8037                         int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    8038                         uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    8039                         if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8040                         {
    8041                             off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    8042                             iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    8043                             Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    8044                         }
    8045                         Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    8046                                && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    8047                         off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
    8048                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    8049                         continue;
    8050                     }
    8051 
    8052                     case kIemNativeVarKind_GstRegRef:
    8053                         off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
    8054                                                              pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
    8055                                                              pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
    8056                         off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
    8057                         continue;
    8058 
    8059                     case kIemNativeVarKind_Invalid:
    8060                     case kIemNativeVarKind_End:
    8061                         break;
    8062                 }
    8063                 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    8064             }
    8065         }
    8066 # if 0 //def VBOX_STRICT
    8067         iemNativeRegAssertSanity(pReNative);
    8068 # endif
    8069     }
    8070 #else
    8071     AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
    8072 #endif
    8073 
    8074     /*
    8075      * Make sure the argument variables are loaded into their respective registers.
    8076      *
    8077      * We can optimize this by ASSUMING that any register allocations are for
    8078      * registeres that have already been loaded and are ready.  The previous step
    8079      * saw to that.
    8080      */
    8081     if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
    8082     {
    8083         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    8084         {
    8085             uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
    8086             if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
    8087                 Assert(   pReNative->Core.aHstRegs[idxArgReg].idxVar == pReNative->Core.aidxArgVars[i]
    8088                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
    8089                        && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
    8090             else
    8091             {
    8092                 uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
    8093                 if (pReNative->Core.aVars[idxVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8094                 {
    8095                     Assert(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
    8096                     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].idxReg);
    8097                     pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pReNative->Core.aVars[idxVar].idxReg))
    8098                                               | RT_BIT_32(idxArgReg);
    8099                     pReNative->Core.aVars[idxVar].idxReg = idxArgReg;
    8100                 }
    8101                 else
    8102                 {
    8103                     /* Use ARG0 as temp for stuff we need registers for. */
    8104                     switch (pReNative->Core.aVars[idxVar].enmKind)
    8105                     {
    8106                         case kIemNativeVarKind_Stack:
    8107                         {
    8108                             uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
    8109                             AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8110                             off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
    8111                             continue;
    8112                         }
    8113 
    8114                         case kIemNativeVarKind_Immediate:
    8115                             off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pReNative->Core.aVars[idxVar].u.uValue);
    8116                             continue;
    8117 
    8118                         case kIemNativeVarKind_VarRef:
    8119                         {
    8120                             uint8_t const idxOtherVar    = pReNative->Core.aVars[idxVar].u.idxRefVar;
    8121                             Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
    8122                             uint8_t const idxStackSlot   = iemNativeVarGetStackSlot(pReNative, idxOtherVar);
    8123                             int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
    8124                             uint8_t const idxRegOther    = pReNative->Core.aVars[idxOtherVar].idxReg;
    8125                             if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8126                             {
    8127                                 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
    8128                                 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
    8129                                 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
    8130                             }
    8131                             Assert(   pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
    8132                                    && pReNative->Core.aVars[idxOtherVar].idxReg       == UINT8_MAX);
    8133                             off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
    8134                             continue;
    8135                         }
    8136 
    8137                         case kIemNativeVarKind_GstRegRef:
    8138                             off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
    8139                                                                  pReNative->Core.aVars[idxVar].u.GstRegRef.enmClass,
    8140                                                                  pReNative->Core.aVars[idxVar].u.GstRegRef.idx);
    8141                             continue;
    8142 
    8143                         case kIemNativeVarKind_Invalid:
    8144                         case kIemNativeVarKind_End:
    8145                             break;
    8146                     }
    8147                     AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
    8148                 }
    8149             }
    8150         }
    8151 #if 0 //def VBOX_STRICT
    8152         iemNativeRegAssertSanity(pReNative);
    8153 #endif
    8154     }
    8155 #ifdef VBOX_STRICT
    8156     else
    8157         for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
    8158         {
    8159             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
    8160             Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
    8161         }
    8162 #endif
    8163 
    8164     /*
    8165      * Free all argument variables (simplified).
    8166      * Their lifetime always expires with the call they are for.
    8167      */
    8168     /** @todo Make the python script check that arguments aren't used after
    8169      *        IEM_MC_CALL_XXXX. */
    8170     /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
    8171      *        a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
    8172      *        an argument value.  There is also some FPU stuff. */
    8173     for (uint32_t i = cHiddenArgs; i < cArgs; i++)
    8174     {
    8175         uint8_t const idxVar = pReNative->Core.aidxArgVars[i];
    8176         Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
    8177 
    8178         /* no need to free registers: */
    8179         AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
    8180                   ?    pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
    8181                     || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
    8182                   : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
    8183                   ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
    8184                    i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
    8185 
    8186         pReNative->Core.aidxArgVars[i] = UINT8_MAX;
    8187         pReNative->Core.bmVars        &= ~RT_BIT_32(idxVar);
    8188         iemNativeVarFreeStackSlots(pReNative, idxVar);
    8189     }
    8190     Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
    8191 
    8192     /*
    8193      * Flush volatile registers as we make the call.
    8194      */
    8195     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
    8196 
    8197     return off;
    8198 }
    8199 
    8200 
    8201 /** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
    8202 DECL_HIDDEN_THROW(uint32_t)
    8203 iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
    8204                              uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
    8205 
    8206 {
    8207     /*
    8208      * Do all the call setup and cleanup.
    8209      */
    8210     off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
    8211 
    8212     /*
    8213      * Load the two or three hidden arguments.
    8214      */
    8215 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    8216     off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    8217     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    8218     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
    8219 #else
    8220     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    8221     off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
    8222 #endif
    8223 
    8224     /*
    8225      * Make the call and check the return code.
    8226      *
    8227      * Shadow PC copies are always flushed here, other stuff depends on flags.
    8228      * Segment and general purpose registers are explictily flushed via the
    8229      * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
    8230      * macros.
    8231      */
    8232     off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
    8233 #if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
    8234     off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
    8235 #endif
    8236     fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
    8237     if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls.  */
    8238         fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
    8239     iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
    8240 
    8241     return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
    8242 }
    8243 
    8244 
    8245 #define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
    8246     off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
    8247 
    8248 /** Emits code for IEM_MC_CALL_CIMPL_1. */
    8249 DECL_INLINE_THROW(uint32_t)
    8250 iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    8251                         uintptr_t pfnCImpl, uint8_t idxArg0)
    8252 {
    8253     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    8254     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
    8255 }
    8256 
    8257 
    8258 #define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
    8259     off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
    8260 
    8261 /** Emits code for IEM_MC_CALL_CIMPL_2. */
    8262 DECL_INLINE_THROW(uint32_t)
    8263 iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    8264                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
    8265 {
    8266     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    8267     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    8268     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
    8269 }
    8270 
    8271 
    8272 #define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
    8273     off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    8274                                   (uintptr_t)a_pfnCImpl, a0, a1, a2)
    8275 
    8276 /** Emits code for IEM_MC_CALL_CIMPL_3. */
    8277 DECL_INLINE_THROW(uint32_t)
    8278 iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    8279                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    8280 {
    8281     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    8282     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    8283     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    8284     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
    8285 }
    8286 
    8287 
    8288 #define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
    8289     off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    8290                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
    8291 
    8292 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    8293 DECL_INLINE_THROW(uint32_t)
    8294 iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    8295                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    8296 {
    8297     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    8298     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    8299     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    8300     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    8301     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
    8302 }
    8303 
    8304 
    8305 #define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
    8306     off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
    8307                                   (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
    8308 
    8309 /** Emits code for IEM_MC_CALL_CIMPL_4. */
    8310 DECL_INLINE_THROW(uint32_t)
    8311 iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
    8312                         uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
    8313 {
    8314     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
    8315     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
    8316     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
    8317     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
    8318     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
    8319     return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
    8320 }
    8321 
    8322 
    8323 /** Recompiler debugging: Flush guest register shadow copies. */
    8324 #define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
    8325 
    8326 
    8327 
    8328 /*********************************************************************************************************************************
    8329 *   Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX                                                            *
    8330 *********************************************************************************************************************************/
    8331 
    8332 /**
    8333  * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
    8334  */
    8335 DECL_INLINE_THROW(uint32_t)
    8336 iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    8337                              uintptr_t pfnAImpl, uint8_t cArgs)
    8338 {
    8339     if (idxVarRc != UINT8_MAX)
    8340     {
    8341         IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
    8342         AssertStmt(pReNative->Core.aVars[idxVarRc].uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
    8343         AssertStmt(pReNative->Core.aVars[idxVarRc].cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
    8344     }
    8345 
    8346     /*
    8347      * Do all the call setup and cleanup.
    8348      */
    8349     off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
    8350 
    8351     /*
    8352      * Make the call and update the return code variable if we've got one.
    8353      */
    8354     off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
    8355     if (idxVarRc < RT_ELEMENTS(pReNative->Core.aVars))
    8356     {
    8357 pReNative->pInstrBuf[off++] = 0xcc; /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
    8358         iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
    8359     }
    8360 
    8361     return off;
    8362 }
    8363 
    8364 
    8365 
    8366 #define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
    8367     off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
    8368 
    8369 #define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
    8370     off = iemNativeEmitCallAImpl0(pReNative, off, a_rc,                   (uintptr_t)(a_pfn))
    8371 
    8372 /** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
    8373 DECL_INLINE_THROW(uint32_t)
    8374 iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
    8375 {
    8376     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
    8377 }
    8378 
    8379 
    8380 #define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
    8381     off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
    8382 
    8383 #define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
    8384     off = iemNativeEmitCallAImpl1(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0)
    8385 
    8386 /** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
    8387 DECL_INLINE_THROW(uint32_t)
    8388 iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
    8389 {
    8390     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    8391     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
    8392 }
    8393 
    8394 
    8395 #define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
    8396     off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
    8397 
    8398 #define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
    8399     off = iemNativeEmitCallAImpl2(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1)
    8400 
    8401 /** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
    8402 DECL_INLINE_THROW(uint32_t)
    8403 iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    8404                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
    8405 {
    8406     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    8407     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    8408     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
    8409 }
    8410 
    8411 
    8412 #define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
    8413     off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
    8414 
    8415 #define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
    8416     off = iemNativeEmitCallAImpl3(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2)
    8417 
    8418 /** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
    8419 DECL_INLINE_THROW(uint32_t)
    8420 iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    8421                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
    8422 {
    8423     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    8424     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    8425     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    8426     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
    8427 }
    8428 
    8429 
    8430 #define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
    8431     off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
    8432 
    8433 #define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
    8434     off = iemNativeEmitCallAImpl4(pReNative, off, a_rc,                   (uintptr_t)(a_pfn), a0, a1, a2, a3)
    8435 
    8436 /** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
    8437 DECL_INLINE_THROW(uint32_t)
    8438 iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
    8439                         uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
    8440 {
    8441     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
    8442     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
    8443     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
    8444     IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
    8445     return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
    8446 }
    8447 
    8448 
    8449 
    8450 /*********************************************************************************************************************************
    8451 *   Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX).                                                       *
    8452 *********************************************************************************************************************************/
    8453 
    8454 #define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
    8455     off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst,  a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
    8456 
    8457 #define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    8458     off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
    8459 
    8460 #define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    8461     off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
    8462 
    8463 #define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    8464     off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
    8465 
    8466 
    8467 /** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
    8468  *  IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
    8469 DECL_INLINE_THROW(uint32_t)
    8470 iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
    8471 {
    8472     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8473     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
    8474     Assert(iGRegEx < 20);
    8475 
    8476     /* Same discussion as in iemNativeEmitFetchGregU16 */
    8477     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    8478                                                                   kIemNativeGstRegUse_ReadOnly);
    8479 
    8480     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8481     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8482 
    8483     /* The value is zero-extended to the full 64-bit host register width. */
    8484     if (iGRegEx < 16)
    8485         off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    8486     else
    8487         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    8488 
    8489     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8490     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8491     return off;
    8492 }
    8493 
    8494 
    8495 #define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
    8496     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
    8497 
    8498 #define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
    8499     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
    8500 
    8501 #define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
    8502     off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
    8503 
    8504 /** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
    8505 DECL_INLINE_THROW(uint32_t)
    8506 iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
    8507 {
    8508     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8509     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
    8510     Assert(iGRegEx < 20);
    8511 
    8512     /* Same discussion as in iemNativeEmitFetchGregU16 */
    8513     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    8514                                                                   kIemNativeGstRegUse_ReadOnly);
    8515 
    8516     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8517     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8518 
    8519     if (iGRegEx < 16)
    8520     {
    8521         switch (cbSignExtended)
    8522         {
    8523             case sizeof(uint16_t):
    8524                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    8525                 break;
    8526             case sizeof(uint32_t):
    8527                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    8528                 break;
    8529             case sizeof(uint64_t):
    8530                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
    8531                 break;
    8532             default: AssertFailed(); break;
    8533         }
    8534     }
    8535     else
    8536     {
    8537         off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
    8538         switch (cbSignExtended)
    8539         {
    8540             case sizeof(uint16_t):
    8541                 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    8542                 break;
    8543             case sizeof(uint32_t):
    8544                 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    8545                 break;
    8546             case sizeof(uint64_t):
    8547                 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
    8548                 break;
    8549             default: AssertFailed(); break;
    8550         }
    8551     }
    8552 
    8553     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8554     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8555     return off;
    8556 }
    8557 
    8558 
    8559 
    8560 #define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
    8561     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
    8562 
    8563 #define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
    8564     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    8565 
    8566 #define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
    8567     off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    8568 
    8569 /** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
    8570 DECL_INLINE_THROW(uint32_t)
    8571 iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    8572 {
    8573     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8574     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF(cbZeroExtended);
    8575     Assert(iGReg < 16);
    8576 
    8577     /*
    8578      * We can either just load the low 16-bit of the GPR into a host register
    8579      * for the variable, or we can do so via a shadow copy host register. The
    8580      * latter will avoid having to reload it if it's being stored later, but
    8581      * will waste a host register if it isn't touched again.  Since we don't
    8582      * know what going to happen, we choose the latter for now.
    8583      */
    8584     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8585                                                                   kIemNativeGstRegUse_ReadOnly);
    8586 
    8587     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8588     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8589     off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    8590     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8591 
    8592     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8593     return off;
    8594 }
    8595 
    8596 
    8597 #define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
    8598     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
    8599 
    8600 #define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
    8601     off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
    8602 
    8603 /** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
    8604 DECL_INLINE_THROW(uint32_t)
    8605 iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
    8606 {
    8607     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8608     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbSignExtended);
    8609     Assert(iGReg < 16);
    8610 
    8611     /*
    8612      * We can either just load the low 16-bit of the GPR into a host register
    8613      * for the variable, or we can do so via a shadow copy host register. The
    8614      * latter will avoid having to reload it if it's being stored later, but
    8615      * will waste a host register if it isn't touched again.  Since we don't
    8616      * know what going to happen, we choose the latter for now.
    8617      */
    8618     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8619                                                                   kIemNativeGstRegUse_ReadOnly);
    8620 
    8621     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8622     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8623     if (cbSignExtended == sizeof(uint32_t))
    8624         off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    8625     else
    8626     {
    8627         Assert(cbSignExtended == sizeof(uint64_t));
    8628         off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
    8629     }
    8630     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8631 
    8632     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8633     return off;
    8634 }
    8635 
    8636 
    8637 #define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
    8638     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
    8639 
    8640 #define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
    8641     off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
    8642 
    8643 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    8644 DECL_INLINE_THROW(uint32_t)
    8645 iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
    8646 {
    8647     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8648     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbZeroExtended); RT_NOREF_PV(cbZeroExtended);
    8649     Assert(iGReg < 16);
    8650 
    8651     /*
    8652      * We can either just load the low 16-bit of the GPR into a host register
    8653      * for the variable, or we can do so via a shadow copy host register. The
    8654      * latter will avoid having to reload it if it's being stored later, but
    8655      * will waste a host register if it isn't touched again.  Since we don't
    8656      * know what going to happen, we choose the latter for now.
    8657      */
    8658     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8659                                                                   kIemNativeGstRegUse_ReadOnly);
    8660 
    8661     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8662     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8663     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    8664     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8665 
    8666     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8667     return off;
    8668 }
    8669 
    8670 
    8671 #define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
    8672     off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
    8673 
    8674 /** Emits code for IEM_MC_FETCH_GREG_U32. */
    8675 DECL_INLINE_THROW(uint32_t)
    8676 iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    8677 {
    8678     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8679     Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
    8680     Assert(iGReg < 16);
    8681 
    8682     /*
    8683      * We can either just load the low 32-bit of the GPR into a host register
    8684      * for the variable, or we can do so via a shadow copy host register. The
    8685      * latter will avoid having to reload it if it's being stored later, but
    8686      * will waste a host register if it isn't touched again.  Since we don't
    8687      * know what going to happen, we choose the latter for now.
    8688      */
    8689     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8690                                                                   kIemNativeGstRegUse_ReadOnly);
    8691 
    8692     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8693     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8694     off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
    8695     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8696 
    8697     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8698     return off;
    8699 }
    8700 
    8701 
    8702 #define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
    8703     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    8704 
    8705 #define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
    8706     off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
    8707 
    8708 /** Emits code for IEM_MC_FETCH_GREG_U64 (and the
    8709  *  IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
    8710 DECL_INLINE_THROW(uint32_t)
    8711 iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
    8712 {
    8713     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    8714     Assert(pReNative->Core.aVars[idxDstVar].cbVar == sizeof(uint64_t));
    8715     Assert(iGReg < 16);
    8716 
    8717     uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8718                                                                   kIemNativeGstRegUse_ReadOnly);
    8719 
    8720     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    8721     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    8722     off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
    8723     /** @todo name the register a shadow one already? */
    8724     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    8725 
    8726     iemNativeRegFreeTmp(pReNative, idxGstFullReg);
    8727     return off;
    8728 }
    8729 
    8730 
    8731 
    8732 /*********************************************************************************************************************************
    8733 *   Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX).                                                        *
    8734 *********************************************************************************************************************************/
    8735 
    8736 #define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
    8737     off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
    8738 
    8739 /** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
    8740 DECL_INLINE_THROW(uint32_t)
    8741 iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
    8742 {
    8743     Assert(iGRegEx < 20);
    8744     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    8745                                                                  kIemNativeGstRegUse_ForUpdate);
    8746 #ifdef RT_ARCH_AMD64
    8747     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    8748 
    8749     /* To the lowest byte of the register: mov r8, imm8 */
    8750     if (iGRegEx < 16)
    8751     {
    8752         if (idxGstTmpReg >= 8)
    8753             pbCodeBuf[off++] = X86_OP_REX_B;
    8754         else if (idxGstTmpReg >= 4)
    8755             pbCodeBuf[off++] = X86_OP_REX;
    8756         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    8757         pbCodeBuf[off++] = u8Value;
    8758     }
    8759     /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
    8760     else if (idxGstTmpReg < 4)
    8761     {
    8762         pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
    8763         pbCodeBuf[off++] = u8Value;
    8764     }
    8765     else
    8766     {
    8767         /* ror reg64, 8 */
    8768         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8769         pbCodeBuf[off++] = 0xc1;
    8770         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    8771         pbCodeBuf[off++] = 8;
    8772 
    8773         /* mov reg8, imm8  */
    8774         if (idxGstTmpReg >= 8)
    8775             pbCodeBuf[off++] = X86_OP_REX_B;
    8776         else if (idxGstTmpReg >= 4)
    8777             pbCodeBuf[off++] = X86_OP_REX;
    8778         pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
    8779         pbCodeBuf[off++] = u8Value;
    8780 
    8781         /* rol reg64, 8 */
    8782         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8783         pbCodeBuf[off++] = 0xc1;
    8784         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8785         pbCodeBuf[off++] = 8;
    8786     }
    8787 
    8788 #elif defined(RT_ARCH_ARM64)
    8789     uint8_t const    idxImmReg   = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
    8790     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    8791     if (iGRegEx < 16)
    8792         /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
    8793         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
    8794     else
    8795         /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
    8796         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
    8797     iemNativeRegFreeTmp(pReNative, idxImmReg);
    8798 
    8799 #else
    8800 # error "Port me!"
    8801 #endif
    8802 
    8803     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8804 
    8805     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    8806 
    8807     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8808     return off;
    8809 }
    8810 
    8811 
    8812 #define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
    8813     off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
    8814 
    8815 /** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
    8816 DECL_INLINE_THROW(uint32_t)
    8817 iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
    8818 {
    8819     Assert(iGRegEx < 20);
    8820     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    8821 
    8822     /*
    8823      * If it's a constant value (unlikely) we treat this as a
    8824      * IEM_MC_STORE_GREG_U8_CONST statement.
    8825      */
    8826     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    8827     { /* likely */ }
    8828     else
    8829     {
    8830         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    8831                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8832         return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pReNative->Core.aVars[idxValueVar].u.uValue);
    8833     }
    8834 
    8835     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
    8836                                                                  kIemNativeGstRegUse_ForUpdate);
    8837     uint8_t const    idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    8838 
    8839 #ifdef RT_ARCH_AMD64
    8840     /* To the lowest byte of the register: mov reg8, reg8(r/m) */
    8841     if (iGRegEx < 16)
    8842     {
    8843         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
    8844         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    8845             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    8846         else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
    8847             pbCodeBuf[off++] = X86_OP_REX;
    8848         pbCodeBuf[off++] = 0x8a;
    8849         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    8850     }
    8851     /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
    8852     else if (idxGstTmpReg < 4 && idxVarReg < 4)
    8853     {
    8854         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
    8855         pbCodeBuf[off++] = 0x8a;
    8856         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
    8857     }
    8858     else
    8859     {
    8860         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
    8861 
    8862         /* ror reg64, 8 */
    8863         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8864         pbCodeBuf[off++] = 0xc1;
    8865         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    8866         pbCodeBuf[off++] = 8;
    8867 
    8868         /* mov reg8, reg8(r/m)  */
    8869         if (idxGstTmpReg >= 8 || idxVarReg >= 8)
    8870             pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
    8871         else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
    8872             pbCodeBuf[off++] = X86_OP_REX;
    8873         pbCodeBuf[off++] = 0x8a;
    8874         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
    8875 
    8876         /* rol reg64, 8 */
    8877         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
    8878         pbCodeBuf[off++] = 0xc1;
    8879         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    8880         pbCodeBuf[off++] = 8;
    8881     }
    8882 
    8883 #elif defined(RT_ARCH_ARM64)
    8884     /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
    8885             or
    8886        bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
    8887     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8888     if (iGRegEx < 16)
    8889         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
    8890     else
    8891         pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
    8892 
    8893 #else
    8894 # error "Port me!"
    8895 #endif
    8896     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8897 
    8898     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    8899 
    8900     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
    8901     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8902     return off;
    8903 }
    8904 
    8905 
    8906 
    8907 #define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
    8908     off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
    8909 
    8910 /** Emits code for IEM_MC_STORE_GREG_U16. */
    8911 DECL_INLINE_THROW(uint32_t)
    8912 iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
    8913 {
    8914     Assert(iGReg < 16);
    8915     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8916                                                                  kIemNativeGstRegUse_ForUpdate);
    8917 #ifdef RT_ARCH_AMD64
    8918     /* mov reg16, imm16 */
    8919     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    8920     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8921     if (idxGstTmpReg >= 8)
    8922         pbCodeBuf[off++] = X86_OP_REX_B;
    8923     pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
    8924     pbCodeBuf[off++] = RT_BYTE1(uValue);
    8925     pbCodeBuf[off++] = RT_BYTE2(uValue);
    8926 
    8927 #elif defined(RT_ARCH_ARM64)
    8928     /* movk xdst, #uValue, lsl #0 */
    8929     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8930     pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
    8931 
    8932 #else
    8933 # error "Port me!"
    8934 #endif
    8935 
    8936     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    8937 
    8938     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    8939     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    8940     return off;
    8941 }
    8942 
    8943 
    8944 #define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
    8945     off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
    8946 
    8947 /** Emits code for IEM_MC_STORE_GREG_U16. */
    8948 DECL_INLINE_THROW(uint32_t)
    8949 iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    8950 {
    8951     Assert(iGReg < 16);
    8952     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    8953 
    8954     /*
    8955      * If it's a constant value (unlikely) we treat this as a
    8956      * IEM_MC_STORE_GREG_U16_CONST statement.
    8957      */
    8958     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    8959     { /* likely */ }
    8960     else
    8961     {
    8962         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    8963                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    8964         return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pReNative->Core.aVars[idxValueVar].u.uValue);
    8965     }
    8966 
    8967     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    8968                                                                  kIemNativeGstRegUse_ForUpdate);
    8969 
    8970 #ifdef RT_ARCH_AMD64
    8971     /* mov reg16, reg16 or [mem16] */
    8972     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
    8973     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    8974     if (pReNative->Core.aVars[idxValueVar].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
    8975     {
    8976         if (idxGstTmpReg >= 8 || pReNative->Core.aVars[idxValueVar].idxReg >= 8)
    8977             pbCodeBuf[off++] = (idxGstTmpReg >= 8                              ? X86_OP_REX_R : 0)
    8978                              | (pReNative->Core.aVars[idxValueVar].idxReg >= 8 ? X86_OP_REX_B : 0);
    8979         pbCodeBuf[off++] = 0x8b;
    8980         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pReNative->Core.aVars[idxValueVar].idxReg & 7);
    8981     }
    8982     else
    8983     {
    8984         uint8_t const idxStackSlot = pReNative->Core.aVars[idxValueVar].idxStackSlot;
    8985         AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
    8986         if (idxGstTmpReg >= 8)
    8987             pbCodeBuf[off++] = X86_OP_REX_R;
    8988         pbCodeBuf[off++] = 0x8b;
    8989         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    8990     }
    8991 
    8992 #elif defined(RT_ARCH_ARM64)
    8993     /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    8994     uint8_t const    idxVarReg   = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
    8995     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    8996     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
    8997     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    8998 
    8999 #else
    9000 # error "Port me!"
    9001 #endif
    9002 
    9003     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9004 
    9005     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9006     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9007     return off;
    9008 }
    9009 
    9010 
    9011 #define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
    9012     off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
    9013 
    9014 /** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
    9015 DECL_INLINE_THROW(uint32_t)
    9016 iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
    9017 {
    9018     Assert(iGReg < 16);
    9019     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9020                                                                  kIemNativeGstRegUse_ForFullWrite);
    9021     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    9022     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9023     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9024     return off;
    9025 }
    9026 
    9027 
    9028 #define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
    9029     off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
    9030 
    9031 /** Emits code for IEM_MC_STORE_GREG_U32. */
    9032 DECL_INLINE_THROW(uint32_t)
    9033 iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    9034 {
    9035     Assert(iGReg < 16);
    9036     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    9037 
    9038     /*
    9039      * If it's a constant value (unlikely) we treat this as a
    9040      * IEM_MC_STORE_GREG_U32_CONST statement.
    9041      */
    9042     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    9043     { /* likely */ }
    9044     else
    9045     {
    9046         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    9047                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9048         return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pReNative->Core.aVars[idxValueVar].u.uValue);
    9049     }
    9050 
    9051     /*
    9052      * For the rest we allocate a guest register for the variable and writes
    9053      * it to the CPUMCTX structure.
    9054      */
    9055     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    9056     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9057 #ifdef VBOX_STRICT
    9058     off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
    9059 #endif
    9060     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    9061     return off;
    9062 }
    9063 
    9064 
    9065 #define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
    9066     off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
    9067 
    9068 /** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
    9069 DECL_INLINE_THROW(uint32_t)
    9070 iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
    9071 {
    9072     Assert(iGReg < 16);
    9073     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9074                                                                  kIemNativeGstRegUse_ForFullWrite);
    9075     off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
    9076     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9077     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9078     return off;
    9079 }
    9080 
    9081 
    9082 #define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
    9083     off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
    9084 
    9085 /** Emits code for IEM_MC_STORE_GREG_U64. */
    9086 DECL_INLINE_THROW(uint32_t)
    9087 iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
    9088 {
    9089     Assert(iGReg < 16);
    9090     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
    9091 
    9092     /*
    9093      * If it's a constant value (unlikely) we treat this as a
    9094      * IEM_MC_STORE_GREG_U64_CONST statement.
    9095      */
    9096     if (pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Stack)
    9097     { /* likely */ }
    9098     else
    9099     {
    9100         AssertStmt(pReNative->Core.aVars[idxValueVar].enmKind == kIemNativeVarKind_Immediate,
    9101                    IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    9102         return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pReNative->Core.aVars[idxValueVar].u.uValue);
    9103     }
    9104 
    9105     /*
    9106      * For the rest we allocate a guest register for the variable and writes
    9107      * it to the CPUMCTX structure.
    9108      */
    9109     uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
    9110     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9111     iemNativeVarRegisterRelease(pReNative, idxValueVar);
    9112     return off;
    9113 }
    9114 
    9115 
    9116 #define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
    9117     off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
    9118 
    9119 /** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
    9120 DECL_INLINE_THROW(uint32_t)
    9121 iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
    9122 {
    9123     Assert(iGReg < 16);
    9124     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9125                                                                  kIemNativeGstRegUse_ForUpdate);
    9126     off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
    9127     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9128     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9129     return off;
    9130 }
    9131 
    9132 
    9133 /*********************************************************************************************************************************
    9134 *   General purpose register manipulation (add, sub).                                                                            *
    9135 *********************************************************************************************************************************/
    9136 
    9137 #define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    9138     off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    9139 
    9140 /** Emits code for IEM_MC_ADD_GREG_U16. */
    9141 DECL_INLINE_THROW(uint32_t)
    9142 iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
    9143 {
    9144     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9145                                                                  kIemNativeGstRegUse_ForUpdate);
    9146 
    9147 #ifdef RT_ARCH_AMD64
    9148     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    9149     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    9150     if (idxGstTmpReg >= 8)
    9151         pbCodeBuf[off++] = X86_OP_REX_B;
    9152     if (uAddend == 1)
    9153     {
    9154         pbCodeBuf[off++] = 0xff; /* inc */
    9155         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    9156     }
    9157     else
    9158     {
    9159         pbCodeBuf[off++] = 0x81;
    9160         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    9161         pbCodeBuf[off++] = uAddend;
    9162         pbCodeBuf[off++] = 0;
    9163     }
    9164 
    9165 #else
    9166     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    9167     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    9168 
    9169     /* sub tmp, gstgrp, uAddend */
    9170     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
    9171 
    9172     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    9173     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    9174 
    9175     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    9176 #endif
    9177 
    9178     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9179 
    9180     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9181 
    9182     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9183     return off;
    9184 }
    9185 
    9186 
    9187 #define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
    9188     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    9189 
    9190 #define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
    9191     off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    9192 
    9193 /** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
    9194 DECL_INLINE_THROW(uint32_t)
    9195 iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
    9196 {
    9197     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9198                                                                  kIemNativeGstRegUse_ForUpdate);
    9199 
    9200 #ifdef RT_ARCH_AMD64
    9201     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    9202     if (f64Bit)
    9203         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    9204     else if (idxGstTmpReg >= 8)
    9205         pbCodeBuf[off++] = X86_OP_REX_B;
    9206     if (uAddend == 1)
    9207     {
    9208         pbCodeBuf[off++] = 0xff; /* inc */
    9209         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    9210     }
    9211     else if (uAddend < 128)
    9212     {
    9213         pbCodeBuf[off++] = 0x83; /* add */
    9214         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    9215         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    9216     }
    9217     else
    9218     {
    9219         pbCodeBuf[off++] = 0x81; /* add */
    9220         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
    9221         pbCodeBuf[off++] = RT_BYTE1(uAddend);
    9222         pbCodeBuf[off++] = 0;
    9223         pbCodeBuf[off++] = 0;
    9224         pbCodeBuf[off++] = 0;
    9225     }
    9226 
    9227 #else
    9228     /* sub tmp, gstgrp, uAddend */
    9229     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9230     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
    9231 
    9232 #endif
    9233 
    9234     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9235 
    9236     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9237 
    9238     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9239     return off;
    9240 }
    9241 
    9242 
    9243 
    9244 #define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
    9245     off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
    9246 
    9247 /** Emits code for IEM_MC_SUB_GREG_U16. */
    9248 DECL_INLINE_THROW(uint32_t)
    9249 iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
    9250 {
    9251     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9252                                                                  kIemNativeGstRegUse_ForUpdate);
    9253 
    9254 #ifdef RT_ARCH_AMD64
    9255     uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
    9256     pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
    9257     if (idxGstTmpReg >= 8)
    9258         pbCodeBuf[off++] = X86_OP_REX_B;
    9259     if (uSubtrahend == 1)
    9260     {
    9261         pbCodeBuf[off++] = 0xff; /* dec */
    9262         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    9263     }
    9264     else
    9265     {
    9266         pbCodeBuf[off++] = 0x81;
    9267         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    9268         pbCodeBuf[off++] = uSubtrahend;
    9269         pbCodeBuf[off++] = 0;
    9270     }
    9271 
    9272 #else
    9273     uint8_t const    idxTmpReg   = iemNativeRegAllocTmp(pReNative, &off);
    9274     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
    9275 
    9276     /* sub tmp, gstgrp, uSubtrahend */
    9277     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
    9278 
    9279     /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
    9280     pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
    9281 
    9282     iemNativeRegFreeTmp(pReNative, idxTmpReg);
    9283 #endif
    9284 
    9285     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9286 
    9287     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9288 
    9289     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9290     return off;
    9291 }
    9292 
    9293 
    9294 #define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
    9295     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
    9296 
    9297 #define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
    9298     off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
    9299 
    9300 /** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
    9301 DECL_INLINE_THROW(uint32_t)
    9302 iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
    9303 {
    9304     uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
    9305                                                                  kIemNativeGstRegUse_ForUpdate);
    9306 
    9307 #ifdef RT_ARCH_AMD64
    9308     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    9309     if (f64Bit)
    9310         pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
    9311     else if (idxGstTmpReg >= 8)
    9312         pbCodeBuf[off++] = X86_OP_REX_B;
    9313     if (uSubtrahend == 1)
    9314     {
    9315         pbCodeBuf[off++] = 0xff; /* dec */
    9316         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
    9317     }
    9318     else if (uSubtrahend < 128)
    9319     {
    9320         pbCodeBuf[off++] = 0x83; /* sub */
    9321         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    9322         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    9323     }
    9324     else
    9325     {
    9326         pbCodeBuf[off++] = 0x81; /* sub */
    9327         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
    9328         pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
    9329         pbCodeBuf[off++] = 0;
    9330         pbCodeBuf[off++] = 0;
    9331         pbCodeBuf[off++] = 0;
    9332     }
    9333 
    9334 #else
    9335     /* sub tmp, gstgrp, uSubtrahend */
    9336     uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9337     pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
    9338 
    9339 #endif
    9340 
    9341     IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9342 
    9343     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
    9344 
    9345     iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
    9346     return off;
    9347 }
    9348 
    9349 
    9350 
    9351 /*********************************************************************************************************************************
    9352 *   EFLAGS                                                                                                                       *
    9353 *********************************************************************************************************************************/
    9354 
    9355 #define IEM_MC_FETCH_EFLAGS(a_EFlags) \
    9356     off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags)
    9357 
    9358 /** Handles IEM_MC_FETCH_EFLAGS. */
    9359 DECL_INLINE_THROW(uint32_t)
    9360 iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
    9361 {
    9362     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
    9363     Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
    9364 
    9365     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
    9366     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    9367     off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
    9368     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    9369     return off;
    9370 }
    9371 
    9372 
    9373 #define IEM_MC_COMMIT_EFLAGS(a_EFlags) \
    9374     off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags)
    9375 
    9376 /** Handles IEM_MC_COMMIT_EFLAGS. */
    9377 DECL_INLINE_THROW(uint32_t)
    9378 iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
    9379 {
    9380     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
    9381     Assert(pReNative->Core.aVars[idxVarEFlags].cbVar == sizeof(uint32_t));
    9382 
    9383     uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
    9384 
    9385 #ifdef VBOX_STRICT
    9386     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
    9387     uint32_t offFixup = off;
    9388     off = iemNativeEmitJnzToFixed(pReNative, off, off);
    9389     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
    9390     iemNativeFixupFixedJump(pReNative, offFixup, off);
    9391 
    9392     off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
    9393     offFixup = off;
    9394     off = iemNativeEmitJzToFixed(pReNative, off, off);
    9395     off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
    9396     iemNativeFixupFixedJump(pReNative, offFixup, off);
    9397 #endif
    9398 
    9399     iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
    9400     off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
    9401     iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
    9402     return off;
    9403 }
    9404 
    9405 
    9406 
    9407 /*********************************************************************************************************************************
    9408 *   Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
    9409 *********************************************************************************************************************************/
    9410 
    9411 #define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
    9412     off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
    9413 
    9414 #define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
    9415     off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
    9416 
    9417 #define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
    9418     off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
    9419 
    9420 
    9421 /** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
    9422  *  IEM_MC_FETCH_SREG_ZX_U64. */
    9423 DECL_INLINE_THROW(uint32_t)
    9424 iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
    9425 {
    9426     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
    9427     Assert(pReNative->Core.aVars[idxDstVar].cbVar == cbVar); RT_NOREF(cbVar);
    9428     Assert(iSReg < X86_SREG_COUNT);
    9429 
    9430     /*
    9431      * For now, we will not create a shadow copy of a selector.  The rational
    9432      * is that since we do not recompile the popping and loading of segment
    9433      * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
    9434      * pushing and moving to registers, there is only a small chance that the
    9435      * shadow copy will be accessed again before the register is reloaded.  One
    9436      * scenario would be nested called in 16-bit code, but I doubt it's worth
    9437      * the extra register pressure atm.
    9438      *
    9439      * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
    9440      * and iemNativeVarRegisterAcquire for a load scenario. We only got the
    9441      * store scencario covered at present (r160730).
    9442      */
    9443     iemNativeVarSetKindToStack(pReNative, idxDstVar);
    9444     uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
    9445     off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
    9446     iemNativeVarRegisterRelease(pReNative, idxDstVar);
    9447     return off;
    9448 }
    9449 
    9450 
    9451 
    9452 /*********************************************************************************************************************************
    9453 *   Register references.                                                                                                         *
    9454 *********************************************************************************************************************************/
    9455 
    9456 #define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
    9457     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
    9458 
    9459 #define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
    9460     off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
    9461 
    9462 /** Handles IEM_MC_REF_GREG_U8[_CONST]. */
    9463 DECL_INLINE_THROW(uint32_t)
    9464 iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
    9465 {
    9466     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    9467     Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
    9468     Assert(iGRegEx < 20);
    9469 
    9470     if (iGRegEx < 16)
    9471         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    9472     else
    9473         iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
    9474 
    9475     /* If we've delayed writing back the register value, flush it now. */
    9476     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
    9477 
    9478     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    9479     if (!fConst)
    9480         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
    9481 
    9482     return off;
    9483 }
    9484 
    9485 #define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
    9486     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
    9487 
    9488 #define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
    9489     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
    9490 
    9491 #define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
    9492     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
    9493 
    9494 #define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
    9495     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
    9496 
    9497 #define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
    9498     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
    9499 
    9500 #define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
    9501     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
    9502 
    9503 #define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
    9504     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
    9505 
    9506 #define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
    9507     off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
    9508 
    9509 #define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
    9510     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
    9511 
    9512 #define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
    9513     off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
    9514 
    9515 /** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
    9516 DECL_INLINE_THROW(uint32_t)
    9517 iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
    9518 {
    9519     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    9520     Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
    9521     Assert(iGReg < 16);
    9522 
    9523     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
    9524 
    9525     /* If we've delayed writing back the register value, flush it now. */
    9526     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
    9527 
    9528     /* If it's not a const reference we need to flush the shadow copy of the register now. */
    9529     if (!fConst)
    9530         iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
    9531 
    9532     return off;
    9533 }
    9534 
    9535 
    9536 #define IEM_MC_REF_EFLAGS(a_pEFlags) \
    9537     off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
    9538 
    9539 /** Handles IEM_MC_REF_EFLAGS. */
    9540 DECL_INLINE_THROW(uint32_t)
    9541 iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
    9542 {
    9543     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
    9544     Assert(pReNative->Core.aVars[idxVarRef].cbVar == sizeof(void *));
    9545 
    9546     iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
    9547 
    9548     /* If we've delayed writing back the register value, flush it now. */
    9549     off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
    9550 
    9551     /* If there is a shadow copy of guest EFLAGS, flush it now. */
    9552     iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
    9553 
    9554     return off;
    9555 }
    9556 
    9557 
    9558 /*********************************************************************************************************************************
    9559 *   Effective Address Calculation                                                                                                *
    9560 *********************************************************************************************************************************/
    9561 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
    9562     off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
    9563 
    9564 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
    9565  * @sa iemOpHlpCalcRmEffAddrThreadedAddr16  */
    9566 DECL_INLINE_THROW(uint32_t)
    9567 iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    9568                                          uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
    9569 {
    9570     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    9571 
    9572     /*
    9573      * Handle the disp16 form with no registers first.
    9574      *
    9575      * Convert to an immediate value, as that'll delay the register allocation
    9576      * and assignment till the memory access / call / whatever and we can use
    9577      * a more appropriate register (or none at all).
    9578      */
    9579     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
    9580     {
    9581         iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
    9582         return off;
    9583     }
    9584 
    9585     /* Determin the displacment. */
    9586     uint16_t u16EffAddr;
    9587     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    9588     {
    9589         case 0:  u16EffAddr = 0;                        break;
    9590         case 1:  u16EffAddr = (int16_t)(int8_t)u16Disp; break;
    9591         case 2:  u16EffAddr = u16Disp;                  break;
    9592         default: AssertFailedStmt(u16EffAddr = 0);
    9593     }
    9594 
    9595     /* Determine the registers involved. */
    9596     uint8_t idxGstRegBase;
    9597     uint8_t idxGstRegIndex;
    9598     switch (bRm & X86_MODRM_RM_MASK)
    9599     {
    9600         case 0:
    9601             idxGstRegBase  = X86_GREG_xBX;
    9602             idxGstRegIndex = X86_GREG_xSI;
    9603             break;
    9604         case 1:
    9605             idxGstRegBase  = X86_GREG_xBX;
    9606             idxGstRegIndex = X86_GREG_xDI;
    9607             break;
    9608         case 2:
    9609             idxGstRegBase  = X86_GREG_xBP;
    9610             idxGstRegIndex = X86_GREG_xSI;
    9611             break;
    9612         case 3:
    9613             idxGstRegBase  = X86_GREG_xBP;
    9614             idxGstRegIndex = X86_GREG_xDI;
    9615             break;
    9616         case 4:
    9617             idxGstRegBase  = X86_GREG_xSI;
    9618             idxGstRegIndex = UINT8_MAX;
    9619             break;
    9620         case 5:
    9621             idxGstRegBase  = X86_GREG_xDI;
    9622             idxGstRegIndex = UINT8_MAX;
    9623             break;
    9624         case 6:
    9625             idxGstRegBase  = X86_GREG_xBP;
    9626             idxGstRegIndex = UINT8_MAX;
    9627             break;
    9628 #ifdef _MSC_VER  /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
    9629         default:
    9630 #endif
    9631         case 7:
    9632             idxGstRegBase  = X86_GREG_xBX;
    9633             idxGstRegIndex = UINT8_MAX;
    9634             break;
    9635     }
    9636 
    9637     /*
    9638      * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
    9639      */
    9640     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    9641     uint8_t const idxRegBase  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    9642                                                                kIemNativeGstRegUse_ReadOnly);
    9643     uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
    9644                               ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    9645                                                                kIemNativeGstRegUse_ReadOnly)
    9646                               : UINT8_MAX;
    9647 #ifdef RT_ARCH_AMD64
    9648     if (idxRegIndex == UINT8_MAX)
    9649     {
    9650         if (u16EffAddr == 0)
    9651         {
    9652             /* movxz ret, base */
    9653             off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
    9654         }
    9655         else
    9656         {
    9657             /* lea ret32, [base64 + disp32] */
    9658             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    9659             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9660             if (idxRegRet >= 8 || idxRegBase >= 8)
    9661                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    9662             pbCodeBuf[off++] = 0x8d;
    9663             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    9664                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
    9665             else
    9666             {
    9667                 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
    9668                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    9669             }
    9670             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    9671             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    9672             pbCodeBuf[off++] = 0;
    9673             pbCodeBuf[off++] = 0;
    9674             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9675 
    9676             off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    9677         }
    9678     }
    9679     else
    9680     {
    9681         /* lea ret32, [index64 + base64 (+ disp32)] */
    9682         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    9683         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9684         if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    9685             pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9686                              | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    9687                              | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    9688         pbCodeBuf[off++] = 0x8d;
    9689         uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
    9690         pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9691         pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
    9692         if (bMod == X86_MOD_MEM4)
    9693         {
    9694             pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
    9695             pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
    9696             pbCodeBuf[off++] = 0;
    9697             pbCodeBuf[off++] = 0;
    9698         }
    9699         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9700         off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
    9701     }
    9702 
    9703 #elif defined(RT_ARCH_ARM64)
    9704     uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
    9705     if (u16EffAddr == 0)
    9706     {
    9707         if (idxRegIndex == UINT8_MAX)
    9708             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
    9709         else
    9710         {
    9711             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
    9712             pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    9713         }
    9714     }
    9715     else
    9716     {
    9717         if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
    9718             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
    9719         else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
    9720             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    9721                                                              (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
    9722         else
    9723         {
    9724             pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
    9725             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    9726         }
    9727         if (idxRegIndex != UINT8_MAX)
    9728             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
    9729         pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
    9730     }
    9731 
    9732 #else
    9733 # error "port me"
    9734 #endif
    9735 
    9736     if (idxRegIndex != UINT8_MAX)
    9737         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    9738     iemNativeRegFreeTmp(pReNative, idxRegBase);
    9739     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    9740     return off;
    9741 }
    9742 
    9743 
    9744 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
    9745     off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
    9746 
    9747 /** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
    9748  * @see iemOpHlpCalcRmEffAddrThreadedAddr32  */
    9749 DECL_INLINE_THROW(uint32_t)
    9750 iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    9751                                          uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
    9752 {
    9753     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    9754 
    9755     /*
    9756      * Handle the disp32 form with no registers first.
    9757      *
    9758      * Convert to an immediate value, as that'll delay the register allocation
    9759      * and assignment till the memory access / call / whatever and we can use
    9760      * a more appropriate register (or none at all).
    9761      */
    9762     if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    9763     {
    9764         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
    9765         return off;
    9766     }
    9767 
    9768     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    9769     uint32_t u32EffAddr = 0;
    9770     switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    9771     {
    9772         case 0: break;
    9773         case 1: u32EffAddr = (int8_t)u32Disp; break;
    9774         case 2: u32EffAddr = u32Disp; break;
    9775         default: AssertFailed();
    9776     }
    9777 
    9778     /* Get the register (or SIB) value. */
    9779     uint8_t idxGstRegBase  = UINT8_MAX;
    9780     uint8_t idxGstRegIndex = UINT8_MAX;
    9781     uint8_t cShiftIndex    = 0;
    9782     switch (bRm & X86_MODRM_RM_MASK)
    9783     {
    9784         case 0: idxGstRegBase = X86_GREG_xAX; break;
    9785         case 1: idxGstRegBase = X86_GREG_xCX; break;
    9786         case 2: idxGstRegBase = X86_GREG_xDX; break;
    9787         case 3: idxGstRegBase = X86_GREG_xBX; break;
    9788         case 4: /* SIB */
    9789         {
    9790             /* index /w scaling . */
    9791             cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    9792             switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    9793             {
    9794                 case 0: idxGstRegIndex = X86_GREG_xAX; break;
    9795                 case 1: idxGstRegIndex = X86_GREG_xCX; break;
    9796                 case 2: idxGstRegIndex = X86_GREG_xDX; break;
    9797                 case 3: idxGstRegIndex = X86_GREG_xBX; break;
    9798                 case 4: cShiftIndex    = 0; /*no index*/ break;
    9799                 case 5: idxGstRegIndex = X86_GREG_xBP; break;
    9800                 case 6: idxGstRegIndex = X86_GREG_xSI; break;
    9801                 case 7: idxGstRegIndex = X86_GREG_xDI; break;
    9802             }
    9803 
    9804             /* base */
    9805             switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
    9806             {
    9807                 case 0: idxGstRegBase = X86_GREG_xAX; break;
    9808                 case 1: idxGstRegBase = X86_GREG_xCX; break;
    9809                 case 2: idxGstRegBase = X86_GREG_xDX; break;
    9810                 case 3: idxGstRegBase = X86_GREG_xBX; break;
    9811                 case 4:
    9812                     idxGstRegBase     = X86_GREG_xSP;
    9813                     u32EffAddr       += uSibAndRspOffset >> 8;
    9814                     break;
    9815                 case 5:
    9816                     if ((bRm & X86_MODRM_MOD_MASK) != 0)
    9817                         idxGstRegBase = X86_GREG_xBP;
    9818                     else
    9819                     {
    9820                         Assert(u32EffAddr == 0);
    9821                         u32EffAddr    = u32Disp;
    9822                     }
    9823                     break;
    9824                 case 6: idxGstRegBase = X86_GREG_xSI; break;
    9825                 case 7: idxGstRegBase = X86_GREG_xDI; break;
    9826             }
    9827             break;
    9828         }
    9829         case 5: idxGstRegBase = X86_GREG_xBP; break;
    9830         case 6: idxGstRegBase = X86_GREG_xSI; break;
    9831         case 7: idxGstRegBase = X86_GREG_xDI; break;
    9832     }
    9833 
    9834     /*
    9835      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    9836      * the start of the function.
    9837      */
    9838     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    9839     {
    9840         iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
    9841         return off;
    9842     }
    9843 
    9844     /*
    9845      * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    9846      */
    9847     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    9848     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    9849                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    9850                                                                 kIemNativeGstRegUse_ReadOnly);
    9851     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    9852                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    9853                                                                kIemNativeGstRegUse_ReadOnly);
    9854 
    9855     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    9856     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    9857     {
    9858         idxRegBase  = idxRegIndex;
    9859         idxRegIndex = UINT8_MAX;
    9860     }
    9861 
    9862 #ifdef RT_ARCH_AMD64
    9863     if (idxRegIndex == UINT8_MAX)
    9864     {
    9865         if (u32EffAddr == 0)
    9866         {
    9867             /* mov ret, base */
    9868             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    9869         }
    9870         else
    9871         {
    9872             /* lea ret32, [base64 + disp32] */
    9873             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    9874             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9875             if (idxRegRet >= 8 || idxRegBase >= 8)
    9876                 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
    9877             pbCodeBuf[off++] = 0x8d;
    9878             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    9879             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    9880                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    9881             else
    9882             {
    9883                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9884                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    9885             }
    9886             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9887             if (bMod == X86_MOD_MEM4)
    9888             {
    9889                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9890                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9891                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9892             }
    9893             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9894         }
    9895     }
    9896     else
    9897     {
    9898         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    9899         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    9900         if (idxRegBase == UINT8_MAX)
    9901         {
    9902             /* lea ret32, [(index64 << cShiftIndex) + disp32] */
    9903             if (idxRegRet >= 8 || idxRegIndex >= 8)
    9904                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9905                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    9906             pbCodeBuf[off++] = 0x8d;
    9907             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    9908             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    9909             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9910             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9911             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9912             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9913         }
    9914         else
    9915         {
    9916             /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    9917             if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    9918                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    9919                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    9920                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
    9921             pbCodeBuf[off++] = 0x8d;
    9922             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    9923                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    9924             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    9925             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    9926             if (bMod != X86_MOD_MEM0)
    9927             {
    9928                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    9929                 if (bMod == X86_MOD_MEM4)
    9930                 {
    9931                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    9932                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    9933                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    9934                 }
    9935             }
    9936         }
    9937         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    9938     }
    9939 
    9940 #elif defined(RT_ARCH_ARM64)
    9941     if (u32EffAddr == 0)
    9942     {
    9943         if (idxRegIndex == UINT8_MAX)
    9944             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    9945         else if (idxRegBase == UINT8_MAX)
    9946         {
    9947             if (cShiftIndex == 0)
    9948                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
    9949             else
    9950             {
    9951                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9952                 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
    9953             }
    9954         }
    9955         else
    9956         {
    9957             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9958             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    9959                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    9960         }
    9961     }
    9962     else
    9963     {
    9964         if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
    9965         {
    9966             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9967             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
    9968         }
    9969         else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
    9970         {
    9971             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9972             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
    9973                                                              (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
    9974         }
    9975         else
    9976         {
    9977             off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
    9978             if (idxRegBase != UINT8_MAX)
    9979             {
    9980                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9981                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
    9982             }
    9983         }
    9984         if (idxRegIndex != UINT8_MAX)
    9985         {
    9986             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    9987             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    9988                                                           false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
    9989         }
    9990     }
    9991 
    9992 #else
    9993 # error "port me"
    9994 #endif
    9995 
    9996     if (idxRegIndex != UINT8_MAX)
    9997         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    9998     if (idxRegBase != UINT8_MAX)
    9999         iemNativeRegFreeTmp(pReNative, idxRegBase);
    10000     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    10001     return off;
    10002 }
    10003 
    10004 
    10005 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    10006     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    10007                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    10008 
    10009 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    10010     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    10011                                                    a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
    10012 
    10013 #define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
    10014     off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
    10015                                                    a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
    10016 
    10017 /**
    10018  * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
    10019  *
    10020  * @returns New off.
    10021  * @param   pReNative           .
    10022  * @param   off                 .
    10023  * @param   bRmEx               The ModRM byte but with bit 3 set to REX.B and
    10024  *                              bit 4 to REX.X.  The two bits are part of the
    10025  *                              REG sub-field, which isn't needed in this
    10026  *                              function.
    10027  * @param   uSibAndRspOffset    Two parts:
    10028  *                                - The first 8 bits make up the SIB byte.
    10029  *                                - The next 8 bits are the fixed RSP/ESP offset
    10030  *                                  in case of a pop [xSP].
    10031  * @param   u32Disp             The displacement byte/word/dword, if any.
    10032  * @param   cbInstr             The size of the fully decoded instruction. Used
    10033  *                              for RIP relative addressing.
    10034  * @param   idxVarRet           The result variable number.
    10035  * @param   f64Bit              Whether to use a 64-bit or 32-bit address size
    10036  *                              when calculating the address.
    10037  *
    10038  * @see iemOpHlpCalcRmEffAddrThreadedAddr64
    10039  */
    10040 DECL_INLINE_THROW(uint32_t)
    10041 iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
    10042                                          uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
    10043 {
    10044     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
    10045 
    10046     /*
    10047      * Special case the rip + disp32 form first.
    10048      */
    10049     if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
    10050     {
    10051         uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    10052         uint8_t const idxRegPc  = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
    10053                                                                   kIemNativeGstRegUse_ReadOnly);
    10054 #ifdef RT_ARCH_AMD64
    10055         if (f64Bit)
    10056         {
    10057             int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
    10058             if ((int32_t)offFinalDisp == offFinalDisp)
    10059                 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
    10060             else
    10061             {
    10062                 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
    10063                 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
    10064             }
    10065         }
    10066         else
    10067             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
    10068 
    10069 #elif defined(RT_ARCH_ARM64)
    10070         if (f64Bit)
    10071             off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    10072                                                                  (int64_t)(int32_t)u32Disp + cbInstr);
    10073         else
    10074             off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
    10075                                                                    (int32_t)u32Disp + cbInstr);
    10076 
    10077 #else
    10078 # error "Port me!"
    10079 #endif
    10080         iemNativeRegFreeTmp(pReNative, idxRegPc);
    10081         iemNativeVarRegisterRelease(pReNative, idxVarRet);
    10082         return off;
    10083     }
    10084 
    10085     /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
    10086     int64_t i64EffAddr = 0;
    10087     switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
    10088     {
    10089         case 0: break;
    10090         case 1: i64EffAddr = (int8_t)u32Disp; break;
    10091         case 2: i64EffAddr = (int32_t)u32Disp; break;
    10092         default: AssertFailed();
    10093     }
    10094 
    10095     /* Get the register (or SIB) value. */
    10096     uint8_t idxGstRegBase  = UINT8_MAX;
    10097     uint8_t idxGstRegIndex = UINT8_MAX;
    10098     uint8_t cShiftIndex    = 0;
    10099     if ((bRmEx & X86_MODRM_RM_MASK) != 4)
    10100         idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
    10101     else /* SIB: */
    10102     {
    10103         /* index /w scaling . */
    10104         cShiftIndex    = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
    10105         idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
    10106                        | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
    10107         if (idxGstRegIndex == 4)
    10108         {
    10109             /* no index */
    10110             cShiftIndex    = 0;
    10111             idxGstRegIndex = UINT8_MAX;
    10112         }
    10113 
    10114         /* base */
    10115         idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
    10116         if (idxGstRegBase == 4)
    10117         {
    10118             /* pop [rsp] hack */
    10119             i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
    10120         }
    10121         else if (   (idxGstRegBase & X86_SIB_BASE_MASK) == 5
    10122                  && (bRmEx & X86_MODRM_MOD_MASK) == 0)
    10123         {
    10124             /* mod=0 and base=5 -> disp32, no base reg. */
    10125             Assert(i64EffAddr == 0);
    10126             i64EffAddr    = (int32_t)u32Disp;
    10127             idxGstRegBase = UINT8_MAX;
    10128         }
    10129     }
    10130 
    10131     /*
    10132      * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
    10133      * the start of the function.
    10134      */
    10135     if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
    10136     {
    10137         if (f64Bit)
    10138             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
    10139         else
    10140             iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
    10141         return off;
    10142     }
    10143 
    10144     /*
    10145      * Now emit code that calculates:
    10146      *      idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    10147      * or if !f64Bit:
    10148      *      idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
    10149      */
    10150     uint8_t const idxRegRet   = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
    10151     uint8_t       idxRegBase  = idxGstRegBase == UINT8_MAX ? UINT8_MAX
    10152                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
    10153                                                                 kIemNativeGstRegUse_ReadOnly);
    10154     uint8_t       idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
    10155                               : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
    10156                                                                kIemNativeGstRegUse_ReadOnly);
    10157 
    10158     /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
    10159     if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
    10160     {
    10161         idxRegBase  = idxRegIndex;
    10162         idxRegIndex = UINT8_MAX;
    10163     }
    10164 
    10165 #ifdef RT_ARCH_AMD64
    10166     uint8_t bFinalAdj;
    10167     if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
    10168         bFinalAdj = 0; /* likely */
    10169     else
    10170     {
    10171         /* pop [rsp] with a problematic disp32 value.  Split out the
    10172            RSP offset and add it separately afterwards (bFinalAdj). */
    10173         /** @todo testcase: pop [rsp] with problematic disp32 (mod4).   */
    10174         Assert(idxGstRegBase == X86_GREG_xSP);
    10175         Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
    10176         bFinalAdj   = (uint8_t)(uSibAndRspOffset >> 8);
    10177         Assert(bFinalAdj != 0);
    10178         i64EffAddr -= bFinalAdj;
    10179         Assert((int32_t)i64EffAddr == i64EffAddr);
    10180     }
    10181     uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
    10182 //pReNative->pInstrBuf[off++] = 0xcc;
    10183 
    10184     if (idxRegIndex == UINT8_MAX)
    10185     {
    10186         if (u32EffAddr == 0)
    10187         {
    10188             /* mov ret, base */
    10189             if (f64Bit)
    10190                 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
    10191             else
    10192                 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
    10193         }
    10194         else
    10195         {
    10196             /* lea ret, [base + disp32] */
    10197             Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
    10198             uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    10199             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
    10200                 pbCodeBuf[off++] = (idxRegRet  >= 8 ? X86_OP_REX_R : 0)
    10201                                  | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
    10202                                  | (f64Bit          ? X86_OP_REX_W : 0);
    10203             pbCodeBuf[off++] = 0x8d;
    10204             uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
    10205             if (idxRegBase != X86_GREG_x12 /*SIB*/)
    10206                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
    10207             else
    10208             {
    10209                 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    10210                 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
    10211             }
    10212             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    10213             if (bMod == X86_MOD_MEM4)
    10214             {
    10215                 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    10216                 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    10217                 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    10218             }
    10219             IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10220         }
    10221     }
    10222     else
    10223     {
    10224         Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
    10225         uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
    10226         if (idxRegBase == UINT8_MAX)
    10227         {
    10228             /* lea ret, [(index64 << cShiftIndex) + disp32] */
    10229             if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
    10230                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    10231                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    10232                                  | (f64Bit           ? X86_OP_REX_W : 0);
    10233             pbCodeBuf[off++] = 0x8d;
    10234             pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
    10235             pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
    10236             pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    10237             pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    10238             pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    10239             pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    10240         }
    10241         else
    10242         {
    10243             /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
    10244             if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
    10245                 pbCodeBuf[off++] = (idxRegRet   >= 8 ? X86_OP_REX_R : 0)
    10246                                  | (idxRegBase  >= 8 ? X86_OP_REX_B : 0)
    10247                                  | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
    10248                                  | (f64Bit           ? X86_OP_REX_W : 0);
    10249             pbCodeBuf[off++] = 0x8d;
    10250             uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
    10251                                : (int8_t)u32EffAddr == (int32_t)u32EffAddr           ? X86_MOD_MEM1 : X86_MOD_MEM4;
    10252             pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
    10253             pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
    10254             if (bMod != X86_MOD_MEM0)
    10255             {
    10256                 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
    10257                 if (bMod == X86_MOD_MEM4)
    10258                 {
    10259                     pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
    10260                     pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
    10261                     pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
    10262                 }
    10263             }
    10264         }
    10265         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    10266     }
    10267 
    10268     if (!bFinalAdj)
    10269     { /* likely */ }
    10270     else
    10271     {
    10272         Assert(f64Bit);
    10273         off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
    10274     }
    10275 
    10276 #elif defined(RT_ARCH_ARM64)
    10277     if (i64EffAddr == 0)
    10278     {
    10279         uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    10280         if (idxRegIndex == UINT8_MAX)
    10281             pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
    10282         else if (idxRegBase != UINT8_MAX)
    10283             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
    10284                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    10285         else
    10286         {
    10287             Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
    10288             pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
    10289         }
    10290     }
    10291     else
    10292     {
    10293         if (f64Bit)
    10294         { /* likely */ }
    10295         else
    10296             i64EffAddr = (int32_t)i64EffAddr;
    10297 
    10298         if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
    10299         {
    10300             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    10301             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
    10302         }
    10303         else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
    10304         {
    10305             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    10306             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
    10307         }
    10308         else
    10309         {
    10310             if (f64Bit)
    10311                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
    10312             else
    10313                 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
    10314             if (idxRegBase != UINT8_MAX)
    10315             {
    10316                 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    10317                 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
    10318             }
    10319         }
    10320         if (idxRegIndex != UINT8_MAX)
    10321         {
    10322             uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    10323             pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
    10324                                                           f64Bit, false /*fSetFlags*/, cShiftIndex);
    10325         }
    10326     }
    10327 
    10328 #else
    10329 # error "port me"
    10330 #endif
    10331 
    10332     if (idxRegIndex != UINT8_MAX)
    10333         iemNativeRegFreeTmp(pReNative, idxRegIndex);
    10334     if (idxRegBase != UINT8_MAX)
    10335         iemNativeRegFreeTmp(pReNative, idxRegBase);
    10336     iemNativeVarRegisterRelease(pReNative, idxVarRet);
    10337     return off;
    10338 }
    10339 
    10340 
    10341 /*********************************************************************************************************************************
    10342 *   TLB Lookup.                                                                                                                  *
    10343 *********************************************************************************************************************************/
    1034465
    1034566/**
     
    10496217} IEMNATIVEEMITTLBSTATE;
    10497218
    10498 
    10499 /**
    10500  * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
    10501  */
    10502 DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
    10503 {
    10504     uint8_t const  iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
    10505     uint8_t const  cbMem   = RT_BYTE2(uSegAndSizeAndAccess);
    10506     uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
    10507     Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
    10508 
    10509     /* Do the lookup manually. */
    10510     RTGCPTR const      GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
    10511     uint64_t const     uTag      = IEMTLB_CALC_TAG(    &pVCpu->iem.s.DataTlb, GCPtrFlat);
    10512     PIEMTLBENTRY const pTlbe     = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
    10513     if (RT_LIKELY(pTlbe->uTag == uTag))
    10514     {
    10515         /*
    10516          * Check TLB page table level access flags.
    10517          */
    10518         AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
    10519         uint64_t const fNoUser          = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
    10520         uint64_t const fNoWriteNoDirty  = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
    10521                                         : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
    10522         uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & (  IEMTLBE_F_PHYS_REV       | IEMTLBE_F_NO_MAPPINGR3
    10523                                                                      | IEMTLBE_F_PG_UNASSIGNED
    10524                                                                      | IEMTLBE_F_PT_NO_ACCESSED
    10525                                                                      | fNoWriteNoDirty          | fNoUser);
    10526         uint64_t const uTlbPhysRev      = pVCpu->iem.s.DataTlb.uTlbPhysRev;
    10527         if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
    10528         {
    10529             /*
    10530              * Return the address.
    10531              */
    10532             uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
    10533             if ((uintptr_t)pbAddr == uResult)
    10534                 return;
    10535             RT_NOREF(cbMem);
    10536             AssertFailed();
    10537         }
    10538         else
    10539             AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
    10540                              fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
    10541     }
    10542     else
    10543         AssertFailed();
    10544     RT_BREAKPOINT();
    10545 }
    10546219DECLASM(void) iemNativeHlpAsmSafeWrapCheckTlbLookup(void);
    10547220
    10548221
    10549222#ifdef IEMNATIVE_WITH_TLB_LOOKUP
     223template<bool const a_fDataTlb>
    10550224DECL_INLINE_THROW(uint32_t)
    10551225iemNativeEmitTlbLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEEMITTLBSTATE const * const pTlbState,
     
    10568242    uint32_t const offCheckExpandDown = off;
    10569243    uint32_t       offFixupLimitDone  = 0;
    10570     if (iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
     244    if (a_fDataTlb && iSegReg != UINT8_MAX && (pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) != IEMMODE_64BIT)
    10571245    {
    10572246off = iemNativeEmitBrkEx(pCodeBuf, off, 1); /** @todo this needs testing */
     
    11037711
    11038712
    11039 /*********************************************************************************************************************************
    11040 *   Memory fetches and stores common                                                                                             *
    11041 *********************************************************************************************************************************/
    11042 
    11043 typedef enum IEMNATIVEMITMEMOP
    11044 {
    11045     kIemNativeEmitMemOp_Store = 0,
    11046     kIemNativeEmitMemOp_Fetch,
    11047     kIemNativeEmitMemOp_Fetch_Zx_U16,
    11048     kIemNativeEmitMemOp_Fetch_Zx_U32,
    11049     kIemNativeEmitMemOp_Fetch_Zx_U64,
    11050     kIemNativeEmitMemOp_Fetch_Sx_U16,
    11051     kIemNativeEmitMemOp_Fetch_Sx_U32,
    11052     kIemNativeEmitMemOp_Fetch_Sx_U64
    11053 } IEMNATIVEMITMEMOP;
    11054 
    11055 /** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
    11056  * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
    11057  * (with iSegReg = UINT8_MAX). */
    11058 DECL_INLINE_THROW(uint32_t)
    11059 iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off,  uint8_t idxVarValue, uint8_t iSegReg,
    11060                                      uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
    11061                                      uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
    11062 {
    11063     /*
    11064      * Assert sanity.
    11065      */
    11066     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    11067     Assert(   enmOp != kIemNativeEmitMemOp_Store
    11068            || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Immediate
    11069            || pReNative->Core.aVars[idxVarValue].enmKind == kIemNativeVarKind_Stack);
    11070     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    11071     AssertStmt(   pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
    11072                || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
    11073                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    11074     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    11075     Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
    11076     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    11077 #ifdef VBOX_STRICT
    11078     if (iSegReg == UINT8_MAX)
    11079     {
    11080         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    11081                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    11082                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    11083         switch (cbMem)
    11084         {
    11085             case 1:
    11086                 Assert(   pfnFunction
    11087                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
    11088                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11089                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11090                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11091                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
    11092                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
    11093                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
    11094                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
    11095                            : UINT64_C(0xc000b000a0009000) ));
    11096                 break;
    11097             case 2:
    11098                 Assert(   pfnFunction
    11099                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
    11100                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    11101                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    11102                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
    11103                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
    11104                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
    11105                            : UINT64_C(0xc000b000a0009000) ));
    11106                 break;
    11107             case 4:
    11108                 Assert(   pfnFunction
    11109                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
    11110                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    11111                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
    11112                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
    11113                            : UINT64_C(0xc000b000a0009000) ));
    11114                 break;
    11115             case 8:
    11116                 Assert(    pfnFunction
    11117                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
    11118                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
    11119                            : UINT64_C(0xc000b000a0009000) ));
    11120                 break;
    11121         }
    11122     }
    11123     else
    11124     {
    11125         Assert(iSegReg < 6);
    11126         switch (cbMem)
    11127         {
    11128             case 1:
    11129                 Assert(   pfnFunction
    11130                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU8
    11131                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11132                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11133                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11134                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
    11135                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
    11136                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
    11137                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
    11138                            : UINT64_C(0xc000b000a0009000) ));
    11139                 break;
    11140             case 2:
    11141                 Assert(   pfnFunction
    11142                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU16
    11143                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU16
    11144                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    11145                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
    11146                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
    11147                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
    11148                            : UINT64_C(0xc000b000a0009000) ));
    11149                 break;
    11150             case 4:
    11151                 Assert(   pfnFunction
    11152                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU32
    11153                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU32
    11154                            : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
    11155                            : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
    11156                            : UINT64_C(0xc000b000a0009000) ));
    11157                 break;
    11158             case 8:
    11159                 Assert(    pfnFunction
    11160                        == (  enmOp == kIemNativeEmitMemOp_Store        ? (uintptr_t)iemNativeHlpMemStoreDataU64
    11161                            : enmOp == kIemNativeEmitMemOp_Fetch        ? (uintptr_t)iemNativeHlpMemFetchDataU64
    11162                            : UINT64_C(0xc000b000a0009000) ));
    11163                 break;
    11164         }
    11165     }
    11166 #endif
    11167 
    11168 #ifdef VBOX_STRICT
    11169     /*
    11170      * Check that the fExec flags we've got make sense.
    11171      */
    11172     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    11173 #endif
    11174 
    11175     /*
    11176      * To keep things simple we have to commit any pending writes first as we
    11177      * may end up making calls.
    11178      */
    11179     /** @todo we could postpone this till we make the call and reload the
    11180      * registers after returning from the call. Not sure if that's sensible or
    11181      * not, though. */
    11182     off = iemNativeRegFlushPendingWrites(pReNative, off);
    11183 
    11184 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11185     /*
    11186      * Move/spill/flush stuff out of call-volatile registers.
    11187      * This is the easy way out. We could contain this to the tlb-miss branch
    11188      * by saving and restoring active stuff here.
    11189      */
    11190     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    11191 #endif
    11192 
    11193     /*
    11194      * Define labels and allocate the result register (trying for the return
    11195      * register if we can).
    11196      */
    11197     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    11198     uint8_t  const idxRegValueFetch  = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
    11199                                      : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    11200                                      ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
    11201                                      : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
    11202     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
    11203     uint8_t  const idxRegValueStore  =    !TlbState.fSkip
    11204                                        && enmOp == kIemNativeEmitMemOp_Store
    11205                                        && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
    11206                                      ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
    11207                                      : UINT8_MAX;
    11208     uint32_t const idxRegMemResult   = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
    11209     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    11210                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    11211                                      : UINT32_MAX;
    11212 
    11213     /*
    11214      * Jump to the TLB lookup code.
    11215      */
    11216     if (!TlbState.fSkip)
    11217         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    11218 
    11219     /*
    11220      * TlbMiss:
    11221      *
    11222      * Call helper to do the fetching.
    11223      * We flush all guest register shadow copies here.
    11224      */
    11225     uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
    11226 
    11227 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    11228     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    11229 #else
    11230     RT_NOREF(idxInstr);
    11231 #endif
    11232 
    11233 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11234     /* Save variables in volatile registers. */
    11235     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    11236                                      | (idxRegMemResult  != UINT8_MAX ? RT_BIT_32(idxRegMemResult)  : 0)
    11237                                      | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
    11238     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    11239 #endif
    11240 
    11241     /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
    11242     uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
    11243     if (enmOp == kIemNativeEmitMemOp_Store)
    11244     {
    11245         uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
    11246         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
    11247 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11248                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    11249 #else
    11250                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
    11251         fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
    11252 #endif
    11253     }
    11254 
    11255     /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
    11256     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
    11257 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11258                                                     fVolGregMask);
    11259 #else
    11260                                                     fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
    11261 #endif
    11262 
    11263     if (iSegReg != UINT8_MAX)
    11264     {
    11265         /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
    11266         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    11267         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
    11268     }
    11269 
    11270     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    11271     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    11272 
    11273     /* Done setting up parameters, make the call. */
    11274     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    11275 
    11276     /*
    11277      * Put the result in the right register if this is a fetch.
    11278      */
    11279     if (enmOp != kIemNativeEmitMemOp_Store)
    11280     {
    11281         Assert(idxRegValueFetch == pReNative->Core.aVars[idxVarValue].idxReg);
    11282         if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
    11283             off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
    11284     }
    11285 
    11286 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11287     /* Restore variables and guest shadow registers to volatile registers. */
    11288     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    11289     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    11290 #endif
    11291 
    11292 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    11293     if (!TlbState.fSkip)
    11294     {
    11295         /* end of TlbMiss - Jump to the done label. */
    11296         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    11297         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    11298 
    11299         /*
    11300          * TlbLookup:
    11301          */
    11302         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
    11303                                      enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
    11304                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
    11305 
    11306         /*
    11307          * Emit code to do the actual storing / fetching.
    11308          */
    11309         PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    11310 # ifdef VBOX_WITH_STATISTICS
    11311         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    11312                                                   enmOp == kIemNativeEmitMemOp_Store
    11313                                                   ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
    11314                                                   : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
    11315 # endif
    11316         switch (enmOp)
    11317         {
    11318             case kIemNativeEmitMemOp_Store:
    11319                 if (pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate)
    11320                 {
    11321                     switch (cbMem)
    11322                     {
    11323                         case 1:
    11324                             off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11325                             break;
    11326                         case 2:
    11327                             off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11328                             break;
    11329                         case 4:
    11330                             off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11331                             break;
    11332                         case 8:
    11333                             off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
    11334                             break;
    11335                         default:
    11336                             AssertFailed();
    11337                     }
    11338                 }
    11339                 else
    11340                 {
    11341                     switch (cbMem)
    11342                     {
    11343                         case 1:
    11344                             off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off,
    11345                                                                 (uint8_t)pReNative->Core.aVars[idxVarValue].u.uValue,
    11346                                                                 idxRegMemResult, TlbState.idxReg1);
    11347                             break;
    11348                         case 2:
    11349                             off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
    11350                                                                  (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
    11351                                                                  idxRegMemResult, TlbState.idxReg1);
    11352                             break;
    11353                         case 4:
    11354                             off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
    11355                                                                  (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
    11356                                                                  idxRegMemResult, TlbState.idxReg1);
    11357                             break;
    11358                         case 8:
    11359                             off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
    11360                                                                  idxRegMemResult, TlbState.idxReg1);
    11361                             break;
    11362                         default:
    11363                             AssertFailed();
    11364                     }
    11365                 }
    11366                 break;
    11367 
    11368             case kIemNativeEmitMemOp_Fetch:
    11369             case kIemNativeEmitMemOp_Fetch_Zx_U16:
    11370             case kIemNativeEmitMemOp_Fetch_Zx_U32:
    11371             case kIemNativeEmitMemOp_Fetch_Zx_U64:
    11372                 switch (cbMem)
    11373                 {
    11374                     case 1:
    11375                         off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11376                         break;
    11377                     case 2:
    11378                         off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11379                         break;
    11380                     case 4:
    11381                         off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11382                         break;
    11383                     case 8:
    11384                         off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11385                         break;
    11386                     default:
    11387                         AssertFailed();
    11388                 }
    11389                 break;
    11390 
    11391             case kIemNativeEmitMemOp_Fetch_Sx_U16:
    11392                 Assert(cbMem == 1);
    11393                 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11394                 break;
    11395 
    11396             case kIemNativeEmitMemOp_Fetch_Sx_U32:
    11397                 Assert(cbMem == 1 || cbMem == 2);
    11398                 if (cbMem == 1)
    11399                     off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11400                 else
    11401                     off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11402                 break;
    11403 
    11404             case kIemNativeEmitMemOp_Fetch_Sx_U64:
    11405                 switch (cbMem)
    11406                 {
    11407                     case 1:
    11408                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11409                         break;
    11410                     case 2:
    11411                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11412                         break;
    11413                     case 4:
    11414                         off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
    11415                         break;
    11416                     default:
    11417                         AssertFailed();
    11418                 }
    11419                 break;
    11420 
    11421             default:
    11422                 AssertFailed();
    11423         }
    11424 
    11425         iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    11426 
    11427         /*
    11428          * TlbDone:
    11429          */
    11430         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    11431 
    11432         TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
    11433 
    11434 # ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    11435         /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
    11436         iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    11437 # endif
    11438     }
    11439 #else
    11440     RT_NOREF(fAlignMask, idxLabelTlbMiss);
    11441 #endif
    11442 
    11443     if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
    11444         iemNativeVarRegisterRelease(pReNative, idxVarValue);
    11445     return off;
    11446 }
    11447 
    11448 
    11449 
    11450 /*********************************************************************************************************************************
    11451 *   Memory fetches (IEM_MEM_FETCH_XXX).                                                                                          *
    11452 *********************************************************************************************************************************/
    11453 
    11454 /* 8-bit segmented: */
    11455 #define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
    11456     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
    11457                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    11458                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11459 
    11460 #define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    11461     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11462                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    11463                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11464 
    11465 #define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11466     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11467                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    11468                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11469 
    11470 #define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11471     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11472                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11473                                                (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
    11474 
    11475 #define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    11476     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11477                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    11478                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    11479 
    11480 #define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11481     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11482                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    11483                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    11484 
    11485 #define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11486     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11487                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11488                                                (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    11489 
    11490 /* 16-bit segmented: */
    11491 #define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
    11492     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11493                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11494                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    11495 
    11496 #define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    11497     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
    11498                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11499                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    11500 
    11501 #define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11502     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11503                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    11504                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    11505 
    11506 #define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11507     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11508                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11509                                                (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
    11510 
    11511 #define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11512     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11513                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    11514                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    11515 
    11516 #define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11517     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11518                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11519                                                (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    11520 
    11521 
    11522 /* 32-bit segmented: */
    11523 #define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
    11524     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11525                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11526                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    11527 
    11528 #define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
    11529     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
    11530                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11531                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    11532 
    11533 #define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11534     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11535                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11536                                                (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
    11537 
    11538 #define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11539     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11540                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11541                                                (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    11542 
    11543 
    11544 /* 64-bit segmented: */
    11545 #define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
    11546     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
    11547                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11548                                                (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
    11549 
    11550 
    11551 
    11552 /* 8-bit flat: */
    11553 #define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
    11554     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
    11555                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
    11556                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    11557 
    11558 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
    11559     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    11560                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
    11561                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    11562 
    11563 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
    11564     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    11565                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    11566                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    11567 
    11568 #define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
    11569     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    11570                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11571                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
    11572 
    11573 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
    11574     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    11575                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
    11576                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
    11577 
    11578 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
    11579     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    11580                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    11581                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
    11582 
    11583 #define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
    11584     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    11585                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11586                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
    11587 
    11588 
    11589 /* 16-bit flat: */
    11590 #define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
    11591     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    11592                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11593                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    11594 
    11595 #define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
    11596     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
    11597                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11598                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
    11599 
    11600 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
    11601     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    11602                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
    11603                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    11604 
    11605 #define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
    11606     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    11607                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11608                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
    11609 
    11610 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
    11611     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    11612                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
    11613                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
    11614 
    11615 #define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
    11616     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    11617                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11618                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
    11619 
    11620 /* 32-bit flat: */
    11621 #define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
    11622     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    11623                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11624                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    11625 
    11626 #define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
    11627     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
    11628                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11629                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
    11630 
    11631 #define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
    11632     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    11633                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
    11634                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
    11635 
    11636 #define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
    11637     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    11638                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
    11639                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
    11640 
    11641 /* 64-bit flat: */
    11642 #define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
    11643     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
    11644                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
    11645                                                (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
    11646 
    11647 
    11648 
    11649 /*********************************************************************************************************************************
    11650 *   Memory stores (IEM_MEM_STORE_XXX).                                                                                           *
    11651 *********************************************************************************************************************************/
    11652 
    11653 #define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
    11654     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
    11655                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    11656                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    11657 
    11658 #define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
    11659     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
    11660                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    11661                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    11662 
    11663 #define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
    11664     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
    11665                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    11666                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    11667 
    11668 #define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
    11669     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
    11670                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    11671                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    11672 
    11673 
    11674 #define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
    11675     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
    11676                                                sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
    11677                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    11678 
    11679 #define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
    11680     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
    11681                                                sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
    11682                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    11683 
    11684 #define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
    11685     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
    11686                                                sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
    11687                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    11688 
    11689 #define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
    11690     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
    11691                                                sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
    11692                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    11693 
    11694 
    11695 #define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
    11696     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    11697                                                (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
    11698 
    11699 #define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
    11700     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    11701                                                (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
    11702 
    11703 #define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
    11704     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    11705                                                (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
    11706 
    11707 #define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
    11708     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    11709                                                (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
    11710 
    11711 
    11712 #define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
    11713     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    11714                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
    11715 
    11716 #define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
    11717     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    11718                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
    11719 
    11720 #define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
    11721     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    11722                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
    11723 
    11724 #define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
    11725     off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    11726                                                (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
    11727 
    11728 /** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
    11729  *  IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
    11730 DECL_INLINE_THROW(uint32_t)
    11731 iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
    11732                                     uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
    11733 {
    11734     /*
    11735      * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
    11736      * to do the grunt work.
    11737      */
    11738     uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
    11739     off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
    11740                                                cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
    11741                                                pfnFunction, idxInstr);
    11742     iemNativeVarFreeLocal(pReNative, idxVarConstValue);
    11743     return off;
    11744 }
    11745 
    11746 
    11747 
    11748 /*********************************************************************************************************************************
    11749 *   Stack Accesses.                                                                                                              *
    11750 *********************************************************************************************************************************/
    11751 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
    11752 #define IEM_MC_PUSH_U16(a_u16Value) \
    11753     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    11754                                  (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
    11755 #define IEM_MC_PUSH_U32(a_u32Value) \
    11756     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    11757                                  (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
    11758 #define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
    11759     off = iemNativeEmitStackPush(pReNative, off, a_uSegVal,  RT_MAKE_U32_FROM_U8(32,  0, 1, 0), \
    11760                                  (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
    11761 #define IEM_MC_PUSH_U64(a_u64Value) \
    11762     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    11763                                  (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
    11764 
    11765 #define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
    11766     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    11767                                  (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
    11768 #define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
    11769     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    11770                                  (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
    11771 #define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
    11772     off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
    11773                                  (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
    11774 
    11775 #define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
    11776     off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    11777                                  (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
    11778 #define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
    11779     off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    11780                                  (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
    11781 
    11782 
    11783 DECL_FORCE_INLINE_THROW(uint32_t)
    11784 iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    11785 {
    11786     /* Use16BitSp: */
    11787 #ifdef RT_ARCH_AMD64
    11788     off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
    11789     off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    11790 #else
    11791     /* sub regeff, regrsp, #cbMem */
    11792     pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
    11793     /* and regeff, regeff, #0xffff */
    11794     Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
    11795     pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0,  false /*f64Bit*/);
    11796     /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
    11797     pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
    11798 #endif
    11799     return off;
    11800 }
    11801 
    11802 
    11803 DECL_FORCE_INLINE(uint32_t)
    11804 iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    11805 {
    11806     /* Use32BitSp: */
    11807     off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
    11808     off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    11809     return off;
    11810 }
    11811 
    11812 
    11813 /** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
    11814 DECL_INLINE_THROW(uint32_t)
    11815 iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
    11816                        uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    11817 {
    11818     /*
    11819      * Assert sanity.
    11820      */
    11821     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
    11822 #ifdef VBOX_STRICT
    11823     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    11824     {
    11825         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    11826                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    11827                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    11828         Assert(   pfnFunction
    11829                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
    11830                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
    11831                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
    11832                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
    11833                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
    11834                    : UINT64_C(0xc000b000a0009000) ));
    11835     }
    11836     else
    11837         Assert(   pfnFunction
    11838                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
    11839                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
    11840                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
    11841                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
    11842                    : UINT64_C(0xc000b000a0009000) ));
    11843 #endif
    11844 
    11845 #ifdef VBOX_STRICT
    11846     /*
    11847      * Check that the fExec flags we've got make sense.
    11848      */
    11849     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    11850 #endif
    11851 
    11852     /*
    11853      * To keep things simple we have to commit any pending writes first as we
    11854      * may end up making calls.
    11855      */
    11856     /** @todo we could postpone this till we make the call and reload the
    11857      * registers after returning from the call. Not sure if that's sensible or
    11858      * not, though. */
    11859     off = iemNativeRegFlushPendingWrites(pReNative, off);
    11860 
    11861     /*
    11862      * First we calculate the new RSP and the effective stack pointer value.
    11863      * For 64-bit mode and flat 32-bit these two are the same.
    11864      * (Code structure is very similar to that of PUSH)
    11865      */
    11866     uint8_t const cbMem       = RT_BYTE1(cBitsVarAndFlat) / 8;
    11867     bool const    fIsSegReg   = RT_BYTE3(cBitsVarAndFlat) != 0;
    11868     bool const    fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
    11869     uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
    11870                               ? cbMem : sizeof(uint16_t);
    11871     uint8_t const cBitsFlat   = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
    11872     uint8_t const idxRegRsp   = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
    11873                                                                 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
    11874     uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
    11875     uint32_t      offFixupJumpToUseOtherBitSp = UINT32_MAX;
    11876     if (cBitsFlat != 0)
    11877     {
    11878         Assert(idxRegEffSp == idxRegRsp);
    11879         Assert(cBitsFlat == 32 || cBitsFlat == 64);
    11880         Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
    11881         if (cBitsFlat == 64)
    11882             off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
    11883         else
    11884             off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
    11885     }
    11886     else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
    11887     {
    11888         Assert(idxRegEffSp != idxRegRsp);
    11889         uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
    11890                                                                      kIemNativeGstRegUse_ReadOnly);
    11891 #ifdef RT_ARCH_AMD64
    11892         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    11893 #else
    11894         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    11895 #endif
    11896         off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
    11897         iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
    11898         offFixupJumpToUseOtherBitSp = off;
    11899         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    11900         {
    11901             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
    11902             off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    11903         }
    11904         else
    11905         {
    11906             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
    11907             off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    11908         }
    11909         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11910     }
    11911     /* SpUpdateEnd: */
    11912     uint32_t const offLabelSpUpdateEnd = off;
    11913 
    11914     /*
    11915      * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
    11916      * we're skipping lookup).
    11917      */
    11918     uint8_t const  iSegReg           = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
    11919     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
    11920     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    11921     uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    11922     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    11923                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    11924                                      : UINT32_MAX;
    11925     uint8_t const  idxRegValue       =    !TlbState.fSkip
    11926                                        && pReNative->Core.aVars[idxVarValue].enmKind != kIemNativeVarKind_Immediate
    11927                                      ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
    11928                                                                    IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
    11929                                      : UINT8_MAX;
    11930     uint8_t const  idxRegMemResult   = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
    11931 
    11932 
    11933     if (!TlbState.fSkip)
    11934         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    11935     else
    11936         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
    11937 
    11938     /*
    11939      * Use16BitSp:
    11940      */
    11941     if (cBitsFlat == 0)
    11942     {
    11943 #ifdef RT_ARCH_AMD64
    11944         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    11945 #else
    11946         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    11947 #endif
    11948         iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
    11949         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    11950             off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    11951         else
    11952             off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    11953         off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
    11954         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    11955     }
    11956 
    11957     /*
    11958      * TlbMiss:
    11959      *
    11960      * Call helper to do the pushing.
    11961      */
    11962     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    11963 
    11964 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    11965     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    11966 #else
    11967     RT_NOREF(idxInstr);
    11968 #endif
    11969 
    11970     /* Save variables in volatile registers. */
    11971     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    11972                                      | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
    11973                                      | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
    11974                                      | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
    11975     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    11976 
    11977     if (   idxRegValue == IEMNATIVE_CALL_ARG1_GREG
    11978         && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
    11979     {
    11980         /* Swap them using ARG0 as temp register: */
    11981         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
    11982         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
    11983         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
    11984     }
    11985     else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
    11986     {
    11987         /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
    11988         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
    11989                                                         0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    11990 
    11991         /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
    11992         if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
    11993             off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    11994     }
    11995     else
    11996     {
    11997         /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
    11998         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    11999 
    12000         /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
    12001         off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
    12002                                                         IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
    12003     }
    12004 
    12005     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    12006     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    12007 
    12008     /* Done setting up parameters, make the call. */
    12009     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    12010 
    12011     /* Restore variables and guest shadow registers to volatile registers. */
    12012     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    12013     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    12014 
    12015 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    12016     if (!TlbState.fSkip)
    12017     {
    12018         /* end of TlbMiss - Jump to the done label. */
    12019         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    12020         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    12021 
    12022         /*
    12023          * TlbLookup:
    12024          */
    12025         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1, IEM_ACCESS_TYPE_WRITE,
    12026                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    12027 
    12028         /*
    12029          * Emit code to do the actual storing / fetching.
    12030          */
    12031         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
    12032 # ifdef VBOX_WITH_STATISTICS
    12033         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    12034                                                   RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
    12035 # endif
    12036         if (idxRegValue != UINT8_MAX)
    12037         {
    12038             switch (cbMemAccess)
    12039             {
    12040                 case 2:
    12041                     off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    12042                     break;
    12043                 case 4:
    12044                     if (!fIsIntelSeg)
    12045                         off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    12046                     else
    12047                     {
    12048                         /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
    12049                            PUSH FS in real mode, so we have to try emulate that here.
    12050                            We borrow the now unused idxReg1 from the TLB lookup code here. */
    12051                         uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
    12052                                                                                             kIemNativeGstReg_EFlags);
    12053                         if (idxRegEfl != UINT8_MAX)
    12054                         {
    12055 #ifdef ARCH_AMD64
    12056                             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
    12057                             off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
    12058                                                              UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    12059 #else
    12060                             off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
    12061                                                                   off, TlbState.idxReg1, idxRegEfl,
    12062                                                                   UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    12063 #endif
    12064                             iemNativeRegFreeTmp(pReNative, idxRegEfl);
    12065                         }
    12066                         else
    12067                         {
    12068                             off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
    12069                                                                   RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
    12070                             off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
    12071                                                              UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
    12072                         }
    12073                         /* ASSUMES the upper half of idxRegValue is ZERO. */
    12074                         off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
    12075                         off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
    12076                     }
    12077                     break;
    12078                 case 8:
    12079                     off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
    12080                     break;
    12081                 default:
    12082                     AssertFailed();
    12083             }
    12084         }
    12085         else
    12086         {
    12087             switch (cbMemAccess)
    12088             {
    12089                 case 2:
    12090                     off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off,
    12091                                                          (uint16_t)pReNative->Core.aVars[idxVarValue].u.uValue,
    12092                                                          idxRegMemResult, TlbState.idxReg1);
    12093                     break;
    12094                 case 4:
    12095                     Assert(!fIsSegReg);
    12096                     off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off,
    12097                                                          (uint32_t)pReNative->Core.aVars[idxVarValue].u.uValue,
    12098                                                          idxRegMemResult, TlbState.idxReg1);
    12099                     break;
    12100                 case 8:
    12101                     off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pReNative->Core.aVars[idxVarValue].u.uValue,
    12102                                                          idxRegMemResult, TlbState.idxReg1);
    12103                     break;
    12104                 default:
    12105                     AssertFailed();
    12106             }
    12107         }
    12108 
    12109         iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    12110         TlbState.freeRegsAndReleaseVars(pReNative);
    12111 
    12112         /*
    12113          * TlbDone:
    12114          *
    12115          * Commit the new RSP value.
    12116          */
    12117         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    12118     }
    12119 #endif /* IEMNATIVE_WITH_TLB_LOOKUP */
    12120 
    12121     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
    12122     iemNativeRegFreeTmp(pReNative, idxRegRsp);
    12123     if (idxRegEffSp != idxRegRsp)
    12124         iemNativeRegFreeTmp(pReNative, idxRegEffSp);
    12125 
    12126     /* The value variable is implictly flushed. */
    12127     if (idxRegValue != UINT8_MAX)
    12128         iemNativeVarRegisterRelease(pReNative, idxVarValue);
    12129     iemNativeVarFreeLocal(pReNative, idxVarValue);
    12130 
    12131     return off;
    12132 }
    12133 
    12134 
    12135 
    12136 /*                                                     RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
    12137 #define IEM_MC_POP_GREG_U16(a_iGReg) \
    12138     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16,  0, 0, 0), \
    12139                                     (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
    12140 #define IEM_MC_POP_GREG_U32(a_iGReg) \
    12141     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32,  0, 0, 0), \
    12142                                     (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
    12143 #define IEM_MC_POP_GREG_U64(a_iGReg) \
    12144     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64,  0, 0, 0), \
    12145                                     (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
    12146 
    12147 #define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
    12148     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
    12149                                     (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
    12150 #define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
    12151     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
    12152                                     (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
    12153 
    12154 #define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
    12155     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
    12156                                     (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
    12157 #define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
    12158     off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
    12159                                     (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
    12160 
    12161 
    12162 DECL_FORCE_INLINE_THROW(uint32_t)
    12163 iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
    12164                              uint8_t idxRegTmp)
    12165 {
    12166     /* Use16BitSp: */
    12167 #ifdef RT_ARCH_AMD64
    12168     off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    12169     off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
    12170     RT_NOREF(idxRegTmp);
    12171 #else
    12172     /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
    12173     pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
    12174     /* add tmp, regrsp, #cbMem */
    12175     pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
    12176     /* and tmp, tmp, #0xffff */
    12177     Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
    12178     pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0,  false /*f64Bit*/);
    12179     /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
    12180     pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
    12181 #endif
    12182     return off;
    12183 }
    12184 
    12185 
    12186 DECL_FORCE_INLINE(uint32_t)
    12187 iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
    12188 {
    12189     /* Use32BitSp: */
    12190     off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
    12191     off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
    12192     return off;
    12193 }
    12194 
    12195 
    12196 /** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
    12197 DECL_INLINE_THROW(uint32_t)
    12198 iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
    12199                           uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
    12200 {
    12201     /*
    12202      * Assert sanity.
    12203      */
    12204     Assert(idxGReg < 16);
    12205 #ifdef VBOX_STRICT
    12206     if (RT_BYTE2(cBitsVarAndFlat) != 0)
    12207     {
    12208         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    12209                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    12210                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    12211         Assert(   pfnFunction
    12212                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
    12213                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
    12214                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
    12215                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
    12216                    : UINT64_C(0xc000b000a0009000) ));
    12217     }
    12218     else
    12219         Assert(   pfnFunction
    12220                == (  cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
    12221                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
    12222                    : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
    12223                    : UINT64_C(0xc000b000a0009000) ));
    12224 #endif
    12225 
    12226 #ifdef VBOX_STRICT
    12227     /*
    12228      * Check that the fExec flags we've got make sense.
    12229      */
    12230     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    12231 #endif
    12232 
    12233     /*
    12234      * To keep things simple we have to commit any pending writes first as we
    12235      * may end up making calls.
    12236      */
    12237     off = iemNativeRegFlushPendingWrites(pReNative, off);
    12238 
    12239     /*
    12240      * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
    12241      * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
    12242      * directly as the effective stack pointer.
    12243      * (Code structure is very similar to that of PUSH)
    12244      */
    12245     uint8_t const cbMem           = RT_BYTE1(cBitsVarAndFlat) / 8;
    12246     uint8_t const cBitsFlat       = RT_BYTE2(cBitsVarAndFlat);      RT_NOREF(cBitsFlat);
    12247     uint8_t const idxRegRsp       = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
    12248                                                                     kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
    12249     uint8_t const idxRegEffSp     = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
    12250     /** @todo can do a better job picking the register here. For cbMem >= 4 this
    12251      *        will be the resulting register value. */
    12252     uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too.  */
    12253 
    12254     uint32_t      offFixupJumpToUseOtherBitSp = UINT32_MAX;
    12255     if (cBitsFlat != 0)
    12256     {
    12257         Assert(idxRegEffSp == idxRegRsp);
    12258         Assert(cBitsFlat == 32 || cBitsFlat == 64);
    12259         Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
    12260     }
    12261     else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
    12262     {
    12263         Assert(idxRegEffSp != idxRegRsp);
    12264         uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
    12265                                                                      kIemNativeGstRegUse_ReadOnly);
    12266 #ifdef RT_ARCH_AMD64
    12267         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12268 #else
    12269         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    12270 #endif
    12271         off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
    12272         iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
    12273         offFixupJumpToUseOtherBitSp = off;
    12274         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    12275         {
    12276 /** @todo can skip idxRegRsp updating when popping ESP.   */
    12277             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
    12278             off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12279         }
    12280         else
    12281         {
    12282             off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
    12283             off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
    12284         }
    12285         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12286     }
    12287     /* SpUpdateEnd: */
    12288     uint32_t const offLabelSpUpdateEnd = off;
    12289 
    12290     /*
    12291      * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
    12292      * we're skipping lookup).
    12293      */
    12294     uint8_t const  iSegReg           = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
    12295     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
    12296     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    12297     uint32_t const idxLabelTlbMiss   = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
    12298     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    12299                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    12300                                      : UINT32_MAX;
    12301 
    12302     if (!TlbState.fSkip)
    12303         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    12304     else
    12305         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
    12306 
    12307     /*
    12308      * Use16BitSp:
    12309      */
    12310     if (cBitsFlat == 0)
    12311     {
    12312 #ifdef RT_ARCH_AMD64
    12313         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12314 #else
    12315         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
    12316 #endif
    12317         iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
    12318         if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
    12319             off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
    12320         else
    12321             off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
    12322         off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
    12323         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    12324     }
    12325 
    12326     /*
    12327      * TlbMiss:
    12328      *
    12329      * Call helper to do the pushing.
    12330      */
    12331     iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
    12332 
    12333 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    12334     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    12335 #else
    12336     RT_NOREF(idxInstr);
    12337 #endif
    12338 
    12339     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
    12340                                      | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
    12341                                      | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
    12342     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    12343 
    12344 
    12345     /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
    12346     if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
    12347         off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
    12348 
    12349     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    12350     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    12351 
    12352     /* Done setting up parameters, make the call. */
    12353     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    12354 
    12355     /* Move the return register content to idxRegMemResult. */
    12356     if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
    12357         off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
    12358 
    12359     /* Restore variables and guest shadow registers to volatile registers. */
    12360     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    12361     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    12362 
    12363 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    12364     if (!TlbState.fSkip)
    12365     {
    12366         /* end of TlbMiss - Jump to the done label. */
    12367         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    12368         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    12369 
    12370         /*
    12371          * TlbLookup:
    12372          */
    12373         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
    12374                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    12375 
    12376         /*
    12377          * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
    12378          */
    12379         PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
    12380 # ifdef VBOX_WITH_STATISTICS
    12381         off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
    12382                                                   RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
    12383 # endif
    12384         switch (cbMem)
    12385         {
    12386             case 2:
    12387                 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    12388                 break;
    12389             case 4:
    12390                 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    12391                 break;
    12392             case 8:
    12393                 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
    12394                 break;
    12395             default:
    12396                 AssertFailed();
    12397         }
    12398 
    12399         TlbState.freeRegsAndReleaseVars(pReNative);
    12400 
    12401         /*
    12402          * TlbDone:
    12403          *
    12404          * Set the new RSP value (FLAT accesses needs to calculate it first) and
    12405          * commit the popped register value.
    12406          */
    12407         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    12408     }
    12409 #endif /* IEMNATIVE_WITH_TLB_LOOKUP */
    12410 
    12411     if (idxGReg != X86_GREG_xSP)
    12412     {
    12413         /* Set the register. */
    12414         if (cbMem >= sizeof(uint32_t))
    12415         {
    12416             iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult,  IEMNATIVEGSTREG_GPR(idxGReg), off);
    12417             off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
    12418                                                  RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
    12419         }
    12420         else
    12421         {
    12422             Assert(cbMem == sizeof(uint16_t));
    12423             uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
    12424                                                                       kIemNativeGstRegUse_ForUpdate);
    12425             off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
    12426             off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
    12427             iemNativeRegFreeTmp(pReNative, idxRegDst);
    12428         }
    12429 
    12430         /* Complete RSP calculation for FLAT mode. */
    12431         if (idxRegEffSp == idxRegRsp)
    12432         {
    12433             if (cBitsFlat == 64)
    12434                 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
    12435             else
    12436                 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
    12437         }
    12438     }
    12439     else
    12440     {
    12441         /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
    12442         if (cbMem == sizeof(uint64_t))
    12443             off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
    12444         else if (cbMem == sizeof(uint32_t))
    12445             off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
    12446         else
    12447         {
    12448             if (idxRegEffSp == idxRegRsp)
    12449             {
    12450                 if (cBitsFlat == 64)
    12451                     off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
    12452                 else
    12453                     off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
    12454             }
    12455             off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
    12456         }
    12457     }
    12458     off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
    12459 
    12460     iemNativeRegFreeTmp(pReNative, idxRegRsp);
    12461     if (idxRegEffSp != idxRegRsp)
    12462         iemNativeRegFreeTmp(pReNative, idxRegEffSp);
    12463     iemNativeRegFreeTmp(pReNative, idxRegMemResult);
    12464 
    12465     return off;
    12466 }
    12467 
    12468 
    12469 
    12470 /*********************************************************************************************************************************
    12471 *   Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX).                                                                      *
    12472 *********************************************************************************************************************************/
    12473 
    12474 #define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12475     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12476                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    12477                                     (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
    12478 
    12479 #define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12480     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12481                                     IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    12482                                     (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
    12483 
    12484 #define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12485     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
    12486                                     IEM_ACCESS_TYPE_READ,  0 /*fAlignMask*/, \
    12487                                     (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
    12488 
    12489 
    12490 #define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12491     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12492                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12493                                     (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
    12494 
    12495 #define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12496     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12497                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12498                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    12499 
    12500 #define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12501     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
    12502                                     IEM_ACCESS_TYPE_READ,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12503                                     (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
    12504 
    12505 #define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12506     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
    12507                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12508                                     (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
    12509 
    12510 
    12511 #define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12512     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12513                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12514                                     (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
    12515 
    12516 #define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12517     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12518                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12519                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    12520 
    12521 #define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12522     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
    12523                                     IEM_ACCESS_TYPE_READ,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12524                                     (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
    12525 
    12526 #define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12527     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
    12528                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12529                                     (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
    12530 
    12531 
    12532 #define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12533     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    12534                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12535                                     (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
    12536 
    12537 #define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12538     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    12539                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12540                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    12541 
    12542 #define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12543     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
    12544                                     IEM_ACCESS_TYPE_READ,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12545                                     (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
    12546 
    12547 #define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12548     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
    12549                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12550                                     (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
    12551 
    12552 
    12553 #define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12554     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    12555                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12556                                     (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
    12557 
    12558 #define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12559     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
    12560                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    12561                                     (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
    12562 
    12563 
    12564 #define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12565     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    12566                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    12567                                     (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
    12568 
    12569 #define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12570     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    12571                                     IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    12572                                     (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
    12573 
    12574 #define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
    12575     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
    12576                                     IEM_ACCESS_TYPE_READ,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    12577                                     (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
    12578 
    12579 
    12580 
    12581 #define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    12582     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    12583                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    12584                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
    12585 
    12586 #define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    12587     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    12588                                     IEM_ACCESS_TYPE_WRITE,  0 /*fAlignMask*/, \
    12589                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
    12590 
    12591 #define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
    12592     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
    12593                                     IEM_ACCESS_TYPE_READ,  0 /*fAlignMask*/, \
    12594                                     (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
    12595 
    12596 
    12597 #define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    12598     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    12599                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12600                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
    12601 
    12602 #define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    12603     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    12604                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12605                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    12606 
    12607 #define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
    12608     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
    12609                                     IEM_ACCESS_TYPE_READ,  sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12610                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
    12611 
    12612 #define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
    12613     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
    12614                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint16_t) - 1 /*fAlignMask*/, \
    12615                                     (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
    12616 
    12617 
    12618 #define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    12619     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    12620                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12621                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
    12622 
    12623 #define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    12624     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    12625                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12626                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    12627 
    12628 #define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
    12629     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
    12630                                     IEM_ACCESS_TYPE_READ,  sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12631                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
    12632 
    12633 #define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
    12634     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
    12635                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint32_t) - 1 /*fAlignMask*/, \
    12636                                     (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
    12637 
    12638 
    12639 #define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    12640     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    12641                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12642                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
    12643 
    12644 #define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    12645     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    12646                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12647                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    12648 
    12649 #define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
    12650     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
    12651                                     IEM_ACCESS_TYPE_READ,  sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12652                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
    12653 
    12654 #define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
    12655     off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
    12656                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12657                                     (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
    12658 
    12659 
    12660 #define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
    12661     off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    12662                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, \
    12663                                     (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
    12664 
    12665 #define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
    12666     off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
    12667                                     IEM_ACCESS_TYPE_WRITE, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
    12668                                     (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
    12669 
    12670 
    12671 #define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    12672     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    12673                                     IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    12674                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
    12675 
    12676 #define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    12677     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    12678                                     IEM_ACCESS_TYPE_WRITE, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    12679                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
    12680 
    12681 #define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
    12682     off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
    12683                                     IEM_ACCESS_TYPE_READ,  sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
    12684                                     (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
    12685 
    12686 
    12687 DECL_INLINE_THROW(uint32_t)
    12688 iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
    12689                           uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
    12690                           uintptr_t pfnFunction, uint8_t idxInstr)
    12691 {
    12692     /*
    12693      * Assert sanity.
    12694      */
    12695     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
    12696     AssertStmt(   pReNative->Core.aVars[idxVarMem].enmKind == kIemNativeVarKind_Invalid
    12697                && pReNative->Core.aVars[idxVarMem].cbVar   == sizeof(void *),
    12698                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    12699 
    12700     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    12701     AssertStmt(   pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Invalid
    12702                && pReNative->Core.aVars[idxVarUnmapInfo].cbVar   == sizeof(uint8_t),
    12703                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    12704 
    12705     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
    12706     AssertStmt(   pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Immediate
    12707                || pReNative->Core.aVars[idxVarGCPtrMem].enmKind == kIemNativeVarKind_Stack,
    12708                IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
    12709 
    12710     Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
    12711 
    12712     AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
    12713 
    12714 #ifdef VBOX_STRICT
    12715 # define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
    12716         (  ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
    12717          ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
    12718          : ((a_fAccess) & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_READ \
    12719          ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
    12720 
    12721     if (iSegReg == UINT8_MAX)
    12722     {
    12723         Assert(   (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
    12724                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
    12725                || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
    12726         switch (cbMem)
    12727         {
    12728             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
    12729             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
    12730             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
    12731             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
    12732             case 10:
    12733                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
    12734                        || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
    12735                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    12736                 break;
    12737             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
    12738 # if 0
    12739             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
    12740             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
    12741 # endif
    12742             default: AssertFailed(); break;
    12743         }
    12744     }
    12745     else
    12746     {
    12747         Assert(iSegReg < 6);
    12748         switch (cbMem)
    12749         {
    12750             case 1:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
    12751             case 2:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
    12752             case 4:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
    12753             case 8:  Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
    12754             case 10:
    12755                 Assert(   pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
    12756                        || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
    12757                 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
    12758                 break;
    12759             case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
    12760 # if 0
    12761             case 32: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU256)); break;
    12762             case 64: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU512)); break;
    12763 # endif
    12764             default: AssertFailed(); break;
    12765         }
    12766     }
    12767 # undef IEM_MAP_HLP_FN
    12768 #endif
    12769 
    12770 #ifdef VBOX_STRICT
    12771     /*
    12772      * Check that the fExec flags we've got make sense.
    12773      */
    12774     off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
    12775 #endif
    12776 
    12777     /*
    12778      * To keep things simple we have to commit any pending writes first as we
    12779      * may end up making calls.
    12780      */
    12781     off = iemNativeRegFlushPendingWrites(pReNative, off);
    12782 
    12783 #ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    12784     /*
    12785      * Move/spill/flush stuff out of call-volatile registers.
    12786      * This is the easy way out. We could contain this to the tlb-miss branch
    12787      * by saving and restoring active stuff here.
    12788      */
    12789     /** @todo save+restore active registers and maybe guest shadows in tlb-miss.  */
    12790     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
    12791 #endif
    12792 
    12793     /* The bUnmapInfo variable will get a register in the tlb-hit code path,
    12794        while the tlb-miss codepath will temporarily put it on the stack.
    12795        Set the the type to stack here so we don't need to do it twice below. */
    12796     iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
    12797     uint8_t const idxRegUnmapInfo   = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
    12798     /** @todo use a tmp register from TlbState, since they'll be free after tlb
    12799      *        lookup is done. */
    12800 
    12801     /*
    12802      * Define labels and allocate the result register (trying for the return
    12803      * register if we can).
    12804      */
    12805     uint16_t const uTlbSeqNo         = pReNative->uTlbSeqNo++;
    12806     uint8_t  const idxRegMemResult   = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
    12807                                      ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
    12808                                      : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
    12809     IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
    12810     uint32_t const idxLabelTlbLookup = !TlbState.fSkip
    12811                                      ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
    12812                                      : UINT32_MAX;
    12813 //off=iemNativeEmitBrk(pReNative, off, 0);
    12814     /*
    12815      * Jump to the TLB lookup code.
    12816      */
    12817     if (!TlbState.fSkip)
    12818         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
    12819 
    12820     /*
    12821      * TlbMiss:
    12822      *
    12823      * Call helper to do the fetching.
    12824      * We flush all guest register shadow copies here.
    12825      */
    12826     uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
    12827 
    12828 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
    12829     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    12830 #else
    12831     RT_NOREF(idxInstr);
    12832 #endif
    12833 
    12834 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    12835     /* Save variables in volatile registers. */
    12836     uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
    12837     off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
    12838 #endif
    12839 
    12840     /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
    12841     off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
    12842 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    12843                                                     IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
    12844 #else
    12845                                                     IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    12846 #endif
    12847 
    12848     /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
    12849     if (iSegReg != UINT8_MAX)
    12850     {
    12851         AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
    12852         off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
    12853     }
    12854 
    12855     /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
    12856     int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
    12857     off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
    12858 
    12859     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    12860     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    12861 
    12862     /* Done setting up parameters, make the call. */
    12863     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    12864 
    12865     /*
    12866      * Put the output in the right registers.
    12867      */
    12868     Assert(idxRegMemResult == pReNative->Core.aVars[idxVarMem].idxReg);
    12869     if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
    12870         off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
    12871 
    12872 #ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    12873     /* Restore variables and guest shadow registers to volatile registers. */
    12874     off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
    12875     off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
    12876 #endif
    12877 
    12878     Assert(pReNative->Core.aVars[idxVarUnmapInfo].idxReg == idxRegUnmapInfo);
    12879     off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
    12880 
    12881 #ifdef IEMNATIVE_WITH_TLB_LOOKUP
    12882     if (!TlbState.fSkip)
    12883     {
    12884         /* end of tlbsmiss - Jump to the done label. */
    12885         uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
    12886         off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
    12887 
    12888         /*
    12889          * TlbLookup:
    12890          */
    12891         off = iemNativeEmitTlbLookup(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
    12892                                      idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
    12893 # ifdef VBOX_WITH_STATISTICS
    12894         off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
    12895                                                 RT_UOFFSETOF(VMCPUCC,  iem.s.StatNativeTlbHitsForMapped));
    12896 # endif
    12897 
    12898         /* [idxVarUnmapInfo] = 0; */
    12899         off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
    12900 
    12901         /*
    12902          * TlbDone:
    12903          */
    12904         iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
    12905 
    12906         TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
    12907 
    12908 # ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
    12909         /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
    12910         iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    12911 # endif
    12912     }
    12913 #else
    12914     RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
    12915 #endif
    12916 
    12917     iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
    12918     iemNativeVarRegisterRelease(pReNative, idxVarMem);
    12919 
    12920     return off;
    12921 }
    12922 
    12923 
    12924 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
    12925     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE, \
    12926                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
    12927 
    12928 #define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
    12929     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_WRITE, \
    12930                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
    12931 
    12932 #define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
    12933     off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_TYPE_READ, \
    12934                                          (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
    12935 
    12936 DECL_INLINE_THROW(uint32_t)
    12937 iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
    12938                                uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
    12939 {
    12940     /*
    12941      * Assert sanity.
    12942      */
    12943     IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
    12944     Assert(pReNative->Core.aVars[idxVarUnmapInfo].enmKind == kIemNativeVarKind_Stack);
    12945     Assert(   pReNative->Core.aVars[idxVarUnmapInfo].idxReg       < RT_ELEMENTS(pReNative->Core.aHstRegs)
    12946            || pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
    12947 #ifdef VBOX_STRICT
    12948     switch (fAccess & IEM_ACCESS_TYPE_MASK)
    12949     {
    12950         case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE: Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
    12951         case IEM_ACCESS_TYPE_WRITE:                        Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
    12952         case IEM_ACCESS_TYPE_READ:                         Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
    12953         default: AssertFailed();
    12954     }
    12955 #else
    12956     RT_NOREF(fAccess);
    12957 #endif
    12958 
    12959     /*
    12960      * To keep things simple we have to commit any pending writes first as we
    12961      * may end up making calls (there shouldn't be any at this point, so this
    12962      * is just for consistency).
    12963      */
    12964     /** @todo we could postpone this till we make the call and reload the
    12965      * registers after returning from the call. Not sure if that's sensible or
    12966      * not, though. */
    12967     off = iemNativeRegFlushPendingWrites(pReNative, off);
    12968 
    12969     /*
    12970      * Move/spill/flush stuff out of call-volatile registers.
    12971      *
    12972      * We exclude any register holding the bUnmapInfo variable, as we'll be
    12973      * checking it after returning from the call and will free it afterwards.
    12974      */
    12975     /** @todo save+restore active registers and maybe guest shadows in miss
    12976      *        scenario. */
    12977     off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
    12978 
    12979     /*
    12980      * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
    12981      * to call the unmap helper function.
    12982      *
    12983      * The likelyhood of it being zero is higher than for the TLB hit when doing
    12984      * the mapping, as a TLB miss for an well aligned and unproblematic memory
    12985      * access should also end up with a mapping that won't need special unmapping.
    12986      */
    12987     /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case!  That
    12988      *        should speed up things for the pure interpreter as well when TLBs
    12989      *        are enabled. */
    12990 #ifdef RT_ARCH_AMD64
    12991     if (pReNative->Core.aVars[idxVarUnmapInfo].idxReg == UINT8_MAX)
    12992     {
    12993         /* test byte [rbp - xxx], 0ffh  */
    12994         uint8_t * const pbCodeBuf    = iemNativeInstrBufEnsure(pReNative, off, 7);
    12995         pbCodeBuf[off++] = 0xf6;
    12996         uint8_t const   idxStackSlot = pReNative->Core.aVars[idxVarUnmapInfo].idxStackSlot;
    12997         off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
    12998         pbCodeBuf[off++] = 0xff;
    12999         IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
    13000     }
    13001     else
    13002 #endif
    13003     {
    13004         uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
    13005                                                               true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
    13006         off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
    13007         iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
    13008     }
    13009     uint32_t const offJmpFixup = off;
    13010     off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
    13011 
    13012     /*
    13013      * Call the unmap helper function.
    13014      */
    13015 #ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
    13016     off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
    13017 #else
    13018     RT_NOREF(idxInstr);
    13019 #endif
    13020 
    13021     /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
    13022     off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
    13023                                                0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
    13024 
    13025     /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
    13026     off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
    13027 
    13028     /* Done setting up parameters, make the call. */
    13029     off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
    13030 
    13031     /* The bUnmapInfo variable is implictly free by these MCs. */
    13032     iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
    13033 
    13034     /*
    13035      * Done, just fixup the jump for the non-call case.
    13036      */
    13037     iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
    13038 
    13039     return off;
    13040 }
    13041 
    13042 
    13043 
    13044 /*********************************************************************************************************************************
    13045 *   State and Exceptions                                                                                                         *
    13046 *********************************************************************************************************************************/
    13047 
    13048 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13049 #define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    13050 
    13051 #define IEM_MC_PREPARE_SSE_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13052 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13053 #define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    13054 
    13055 #define IEM_MC_PREPARE_AVX_USAGE()                  off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13056 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE()     off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
    13057 #define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ()       off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
    13058 
    13059 
    13060 DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
    13061 {
    13062     /** @todo this needs a lot more work later. */
    13063     RT_NOREF(pReNative, fForChange);
    13064     return off;
    13065 }
    13066 
    13067 
    13068 /*********************************************************************************************************************************
    13069 *   The native code generator functions for each MC block.                                                                       *
    13070 *********************************************************************************************************************************/
    13071 
    13072 
    13073 /*
    13074  * Include g_apfnIemNativeRecompileFunctions and associated functions.
    13075  *
    13076  * This should probably live in it's own file later, but lets see what the
    13077  * compile times turn out to be first.
    13078  */
    13079 #include "IEMNativeFunctions.cpp.h"
    13080 
    13081 
    13082 
    13083 /*********************************************************************************************************************************
    13084 *   Recompiler Core.                                                                                                             *
    13085 *********************************************************************************************************************************/
    13086 
    13087 
    13088 /** @callback_method_impl{FNDISREADBYTES, Dummy.} */
    13089 static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
    13090 {
    13091     RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
    13092     pDis->cbCachedInstr += cbMaxRead;
    13093     RT_NOREF(cbMinRead);
    13094     return VERR_NO_DATA;
    13095 }
    13096 
    13097 
    13098 /**
    13099  * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
    13100  * @returns pszBuf.
    13101  * @param   fFlags  The flags.
    13102  * @param   pszBuf  The output buffer.
    13103  * @param   cbBuf   The output buffer size.  At least 32 bytes.
    13104  */
    13105 DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
    13106 {
    13107     Assert(cbBuf >= 32);
    13108     static RTSTRTUPLE const s_aModes[] =
    13109     {
    13110         /* [00] = */ { RT_STR_TUPLE("16BIT") },
    13111         /* [01] = */ { RT_STR_TUPLE("32BIT") },
    13112         /* [02] = */ { RT_STR_TUPLE("!2!") },
    13113         /* [03] = */ { RT_STR_TUPLE("!3!") },
    13114         /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
    13115         /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
    13116         /* [06] = */ { RT_STR_TUPLE("!6!") },
    13117         /* [07] = */ { RT_STR_TUPLE("!7!") },
    13118         /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
    13119         /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
    13120         /* [0a] = */ { RT_STR_TUPLE("64BIT") },
    13121         /* [0b] = */ { RT_STR_TUPLE("!b!") },
    13122         /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
    13123         /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
    13124         /* [0e] = */ { RT_STR_TUPLE("!e!") },
    13125         /* [0f] = */ { RT_STR_TUPLE("!f!") },
    13126         /* [10] = */ { RT_STR_TUPLE("!10!") },
    13127         /* [11] = */ { RT_STR_TUPLE("!11!") },
    13128         /* [12] = */ { RT_STR_TUPLE("!12!") },
    13129         /* [13] = */ { RT_STR_TUPLE("!13!") },
    13130         /* [14] = */ { RT_STR_TUPLE("!14!") },
    13131         /* [15] = */ { RT_STR_TUPLE("!15!") },
    13132         /* [16] = */ { RT_STR_TUPLE("!16!") },
    13133         /* [17] = */ { RT_STR_TUPLE("!17!") },
    13134         /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
    13135         /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
    13136         /* [1a] = */ { RT_STR_TUPLE("!1a!") },
    13137         /* [1b] = */ { RT_STR_TUPLE("!1b!") },
    13138         /* [1c] = */ { RT_STR_TUPLE("!1c!") },
    13139         /* [1d] = */ { RT_STR_TUPLE("!1d!") },
    13140         /* [1e] = */ { RT_STR_TUPLE("!1e!") },
    13141         /* [1f] = */ { RT_STR_TUPLE("!1f!") },
    13142     };
    13143     AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
    13144     memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
    13145     size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
    13146 
    13147     pszBuf[off++] = ' ';
    13148     pszBuf[off++] = 'C';
    13149     pszBuf[off++] = 'P';
    13150     pszBuf[off++] = 'L';
    13151     pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
    13152     Assert(off < 32);
    13153 
    13154     fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
    13155 
    13156     static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
    13157     {
    13158         { RT_STR_TUPLE("BYPASS_HANDLERS"),      IEM_F_BYPASS_HANDLERS    },
    13159         { RT_STR_TUPLE("PENDING_BRK_INSTR"),    IEM_F_PENDING_BRK_INSTR  },
    13160         { RT_STR_TUPLE("PENDING_BRK_DATA"),     IEM_F_PENDING_BRK_DATA   },
    13161         { RT_STR_TUPLE("PENDING_BRK_X86_IO"),   IEM_F_PENDING_BRK_X86_IO },
    13162         { RT_STR_TUPLE("X86_DISREGARD_LOCK"),   IEM_F_X86_DISREGARD_LOCK },
    13163         { RT_STR_TUPLE("X86_CTX_VMX"),          IEM_F_X86_CTX_VMX        },
    13164         { RT_STR_TUPLE("X86_CTX_SVM"),          IEM_F_X86_CTX_SVM        },
    13165         { RT_STR_TUPLE("X86_CTX_IN_GUEST"),     IEM_F_X86_CTX_IN_GUEST   },
    13166         { RT_STR_TUPLE("X86_CTX_SMM"),          IEM_F_X86_CTX_SMM        },
    13167         { RT_STR_TUPLE("INHIBIT_SHADOW"),       IEMTB_F_INHIBIT_SHADOW   },
    13168         { RT_STR_TUPLE("INHIBIT_NMI"),          IEMTB_F_INHIBIT_NMI      },
    13169         { RT_STR_TUPLE("CS_LIM_CHECKS"),        IEMTB_F_CS_LIM_CHECKS    },
    13170         { RT_STR_TUPLE("TYPE_THREADED"),        IEMTB_F_TYPE_THREADED    },
    13171         { RT_STR_TUPLE("TYPE_NATIVE"),          IEMTB_F_TYPE_NATIVE      },
    13172     };
    13173     if (fFlags)
    13174         for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
    13175             if (s_aFlags[i].fFlag & fFlags)
    13176             {
    13177                 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
    13178                 pszBuf[off++] = ' ';
    13179                 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
    13180                 off += s_aFlags[i].cchName;
    13181                 fFlags &= ~s_aFlags[i].fFlag;
    13182                 if (!fFlags)
    13183                     break;
    13184             }
    13185     pszBuf[off] = '\0';
    13186 
    13187     return pszBuf;
    13188 }
    13189 
    13190 
    13191 DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
    13192 {
    13193     AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
    13194 #if defined(RT_ARCH_AMD64)
    13195     static const char * const a_apszMarkers[] =
    13196     {
    13197         /*[0]=*/ "unknown0",        "CheckCsLim",           "ConsiderLimChecking",  "CheckOpcodes",
    13198         /*[4]=*/ "PcAfterBranch",   "LoadTlbForNewPage",    "LoadTlbAfterBranch"
    13199     };
    13200 #endif
    13201 
    13202     char                    szDisBuf[512];
    13203     DISSTATE                Dis;
    13204     PCIEMNATIVEINSTR const  paNative      = pTb->Native.paInstructions;
    13205     uint32_t const          cNative       = pTb->Native.cInstructions;
    13206     uint32_t                offNative     = 0;
    13207 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13208     PCIEMTBDBG const        pDbgInfo      = pTb->pDbgInfo;
    13209 #endif
    13210     DISCPUMODE              enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    13211                                           : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    13212                                           :                                                            DISCPUMODE_64BIT;
    13213 #if   defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    13214     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_64BIT;
    13215 #elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    13216     DISCPUMODE const        enmHstCpuMode = DISCPUMODE_ARMV8_A64;
    13217 #elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
    13218 # error "Port me"
    13219 #else
    13220     csh                     hDisasm       = ~(size_t)0;
    13221 # if defined(RT_ARCH_AMD64)
    13222     cs_err                  rcCs          = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
    13223 # elif defined(RT_ARCH_ARM64)
    13224     cs_err                  rcCs          = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
    13225 # else
    13226 #  error "Port me"
    13227 # endif
    13228     AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
    13229 #endif
    13230 
    13231     /*
    13232      * Print TB info.
    13233      */
    13234     pHlp->pfnPrintf(pHlp,
    13235                     "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
    13236                     "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
    13237                     pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
    13238                     pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
    13239 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13240     if (pDbgInfo && pDbgInfo->cEntries > 1)
    13241     {
    13242         Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
    13243 
    13244         /*
    13245          * This disassembly is driven by the debug info which follows the native
    13246          * code and indicates when it starts with the next guest instructions,
    13247          * where labels are and such things.
    13248          */
    13249         uint32_t                idxThreadedCall  = 0;
    13250         uint32_t                fExec            = pTb->fFlags & UINT32_C(0x00ffffff);
    13251         uint8_t                 idxRange         = UINT8_MAX;
    13252         uint8_t const           cRanges          = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
    13253         uint32_t                offRange         = 0;
    13254         uint32_t                offOpcodes       = 0;
    13255         uint32_t const          cbOpcodes        = pTb->cbOpcodes;
    13256         RTGCPHYS                GCPhysPc         = pTb->GCPhysPc;
    13257         uint32_t const          cDbgEntries      = pDbgInfo->cEntries;
    13258         uint32_t                iDbgEntry        = 1;
    13259         uint32_t                offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
    13260 
    13261         while (offNative < cNative)
    13262         {
    13263             /* If we're at or have passed the point where the next chunk of debug
    13264                info starts, process it. */
    13265             if (offDbgNativeNext <= offNative)
    13266             {
    13267                 offDbgNativeNext = UINT32_MAX;
    13268                 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
    13269                 {
    13270                     switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
    13271                     {
    13272                         case kIemTbDbgEntryType_GuestInstruction:
    13273                         {
    13274                             /* Did the exec flag change? */
    13275                             if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
    13276                             {
    13277                                 pHlp->pfnPrintf(pHlp,
    13278                                                 "  fExec change %#08x -> %#08x %s\n",
    13279                                                 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    13280                                                 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
    13281                                                                    szDisBuf, sizeof(szDisBuf)));
    13282                                 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
    13283                                 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
    13284                                               : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
    13285                                               :                                                      DISCPUMODE_64BIT;
    13286                             }
    13287 
    13288                             /* New opcode range? We need to fend up a spurious debug info entry here for cases
    13289                                where the compilation was aborted before the opcode was recorded and the actual
    13290                                instruction was translated to a threaded call.  This may happen when we run out
    13291                                of ranges, or when some complicated interrupts/FFs are found to be pending or
    13292                                similar.  So, we just deal with it here rather than in the compiler code as it
    13293                                is a lot simpler to do here. */
    13294                             if (   idxRange == UINT8_MAX
    13295                                 || idxRange >= cRanges
    13296                                 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
    13297                             {
    13298                                 idxRange += 1;
    13299                                 if (idxRange < cRanges)
    13300                                     offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
    13301                                 else
    13302                                     continue;
    13303                                 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
    13304                                 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
    13305                                          + (pTb->aRanges[idxRange].idxPhysPage == 0
    13306                                             ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    13307                                             : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
    13308                                 pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    13309                                                 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
    13310                                                 pTb->aRanges[idxRange].idxPhysPage);
    13311                                 GCPhysPc += offRange;
    13312                             }
    13313 
    13314                             /* Disassemble the instruction. */
    13315                             //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
    13316                             uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
    13317                             uint32_t      cbInstr    = 1;
    13318                             int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    13319                                                                  &pTb->pabOpcodes[offOpcodes], cbInstrMax,
    13320                                                                  iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    13321                             if (RT_SUCCESS(rc))
    13322                             {
    13323                                 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    13324                                                              DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    13325                                                              | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    13326                                                              NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    13327 
    13328                                 static unsigned const s_offMarker  = 55;
    13329                                 static char const     s_szMarker[] = " ; <--- guest";
    13330                                 if (cch < s_offMarker)
    13331                                 {
    13332                                     memset(&szDisBuf[cch], ' ', s_offMarker - cch);
    13333                                     cch = s_offMarker;
    13334                                 }
    13335                                 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
    13336                                     memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
    13337 
    13338                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
    13339                             }
    13340                             else
    13341                             {
    13342                                 pHlp->pfnPrintf(pHlp, "  %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
    13343                                                 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
    13344                                 cbInstr = 1;
    13345                             }
    13346                             GCPhysPc   += cbInstr;
    13347                             offOpcodes += cbInstr;
    13348                             offRange   += cbInstr;
    13349                             continue;
    13350                         }
    13351 
    13352                         case kIemTbDbgEntryType_ThreadedCall:
    13353                             pHlp->pfnPrintf(pHlp,
    13354                                             "  Call #%u to %s (%u args) - %s\n",
    13355                                             idxThreadedCall,
    13356                                             g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    13357                                             g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
    13358                                             pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
    13359                             idxThreadedCall++;
    13360                             continue;
    13361 
    13362                         case kIemTbDbgEntryType_GuestRegShadowing:
    13363                         {
    13364                             PCIEMTBDBGENTRY const pEntry    = &pDbgInfo->aEntries[iDbgEntry];
    13365                             const char * const    pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
    13366                             if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
    13367                                 pHlp->pfnPrintf(pHlp, "  Guest register %s != host register %s\n", pszGstReg,
    13368                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    13369                             else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
    13370                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s\n", pszGstReg,
    13371                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
    13372                             else
    13373                                 pHlp->pfnPrintf(pHlp, "  Guest register %s == host register %s (previously in %s)\n", pszGstReg,
    13374                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
    13375                                                 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
    13376                             continue;
    13377                         }
    13378 
    13379                         case kIemTbDbgEntryType_Label:
    13380                         {
    13381                             const char *pszName    = "what_the_fudge";
    13382                             const char *pszComment = "";
    13383                             bool        fNumbered  = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
    13384                             switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
    13385                             {
    13386                                 case kIemNativeLabelType_Return:
    13387                                     pszName = "Return";
    13388                                     break;
    13389                                 case kIemNativeLabelType_ReturnBreak:
    13390                                     pszName = "ReturnBreak";
    13391                                     break;
    13392                                 case kIemNativeLabelType_ReturnWithFlags:
    13393                                     pszName = "ReturnWithFlags";
    13394                                     break;
    13395                                 case kIemNativeLabelType_NonZeroRetOrPassUp:
    13396                                     pszName = "NonZeroRetOrPassUp";
    13397                                     break;
    13398                                 case kIemNativeLabelType_RaiseGp0:
    13399                                     pszName = "RaiseGp0";
    13400                                     break;
    13401                                 case kIemNativeLabelType_ObsoleteTb:
    13402                                     pszName = "ObsoleteTb";
    13403                                     break;
    13404                                 case kIemNativeLabelType_NeedCsLimChecking:
    13405                                     pszName = "NeedCsLimChecking";
    13406                                     break;
    13407                                 case kIemNativeLabelType_CheckBranchMiss:
    13408                                     pszName = "CheckBranchMiss";
    13409                                     break;
    13410                                 case kIemNativeLabelType_If:
    13411                                     pszName = "If";
    13412                                     fNumbered = true;
    13413                                     break;
    13414                                 case kIemNativeLabelType_Else:
    13415                                     pszName = "Else";
    13416                                     fNumbered = true;
    13417                                     pszComment = "   ; regs state restored pre-if-block";
    13418                                     break;
    13419                                 case kIemNativeLabelType_Endif:
    13420                                     pszName = "Endif";
    13421                                     fNumbered = true;
    13422                                     break;
    13423                                 case kIemNativeLabelType_CheckIrq:
    13424                                     pszName = "CheckIrq_CheckVM";
    13425                                     fNumbered = true;
    13426                                     break;
    13427                                 case kIemNativeLabelType_TlbLookup:
    13428                                     pszName = "TlbLookup";
    13429                                     fNumbered = true;
    13430                                     break;
    13431                                 case kIemNativeLabelType_TlbMiss:
    13432                                     pszName = "TlbMiss";
    13433                                     fNumbered = true;
    13434                                     break;
    13435                                 case kIemNativeLabelType_TlbDone:
    13436                                     pszName = "TlbDone";
    13437                                     fNumbered = true;
    13438                                     break;
    13439                                 case kIemNativeLabelType_Invalid:
    13440                                 case kIemNativeLabelType_End:
    13441                                     break;
    13442                             }
    13443                             if (fNumbered)
    13444                                 pHlp->pfnPrintf(pHlp, "  %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
    13445                             else
    13446                                 pHlp->pfnPrintf(pHlp, "  %s:\n", pszName);
    13447                             continue;
    13448                         }
    13449 
    13450                         case kIemTbDbgEntryType_NativeOffset:
    13451                             offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
    13452                             Assert(offDbgNativeNext > offNative);
    13453                             break;
    13454 
    13455                         default:
    13456                             AssertFailed();
    13457                     }
    13458                     iDbgEntry++;
    13459                     break;
    13460                 }
    13461             }
    13462 
    13463             /*
    13464              * Disassemble the next native instruction.
    13465              */
    13466             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    13467 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    13468             uint32_t               cbInstr    = sizeof(paNative[0]);
    13469             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    13470             if (RT_SUCCESS(rc))
    13471             {
    13472 #  if defined(RT_ARCH_AMD64)
    13473                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    13474                 {
    13475                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    13476                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    13477                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    13478                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    13479                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    13480                                         uInfo & 0x8000 ? "recompiled" : "todo");
    13481                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    13482                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    13483                     else
    13484                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    13485                 }
    13486                 else
    13487 #  endif
    13488                 {
    13489 #  ifdef RT_ARCH_AMD64
    13490                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    13491                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    13492                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    13493                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    13494 #  elif defined(RT_ARCH_ARM64)
    13495                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    13496                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    13497                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    13498 #  else
    13499 #   error "Port me"
    13500 #  endif
    13501                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    13502                 }
    13503             }
    13504             else
    13505             {
    13506 #  if defined(RT_ARCH_AMD64)
    13507                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    13508                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    13509 #  elif defined(RT_ARCH_ARM64)
    13510                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    13511 #  else
    13512 #   error "Port me"
    13513 #  endif
    13514                 cbInstr = sizeof(paNative[0]);
    13515             }
    13516             offNative += cbInstr / sizeof(paNative[0]);
    13517 
    13518 #  else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    13519             cs_insn *pInstr;
    13520             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    13521                                          (uintptr_t)pNativeCur, 1, &pInstr);
    13522             if (cInstrs > 0)
    13523             {
    13524                 Assert(cInstrs == 1);
    13525 #  if defined(RT_ARCH_AMD64)
    13526                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    13527                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    13528 #  else
    13529                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    13530                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    13531 #  endif
    13532                 offNative += pInstr->size / sizeof(*pNativeCur);
    13533                 cs_free(pInstr, cInstrs);
    13534             }
    13535             else
    13536             {
    13537 #  if defined(RT_ARCH_AMD64)
    13538                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    13539                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    13540 #  else
    13541                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    13542 #  endif
    13543                 offNative++;
    13544             }
    13545 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    13546         }
    13547     }
    13548     else
    13549 #endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
    13550     {
    13551         /*
    13552          * No debug info, just disassemble the x86 code and then the native code.
    13553          *
    13554          * First the guest code:
    13555          */
    13556         for (unsigned i = 0; i < pTb->cRanges; i++)
    13557         {
    13558             RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
    13559                               + (pTb->aRanges[i].idxPhysPage == 0
    13560                                  ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
    13561                                  : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
    13562             pHlp->pfnPrintf(pHlp, "  Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
    13563                             i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
    13564             unsigned       off       = pTb->aRanges[i].offOpcodes;
    13565             /** @todo this ain't working when crossing pages!   */
    13566             unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
    13567             while (off < cbOpcodes)
    13568             {
    13569                 uint32_t cbInstr = 1;
    13570                 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
    13571                                                      &pTb->pabOpcodes[off], cbOpcodes - off,
    13572                                                      iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
    13573                 if (RT_SUCCESS(rc))
    13574                 {
    13575                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    13576                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    13577                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    13578                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    13579                     pHlp->pfnPrintf(pHlp, "    %RGp: %s\n", GCPhysPc, szDisBuf);
    13580                     GCPhysPc += cbInstr;
    13581                     off      += cbInstr;
    13582                 }
    13583                 else
    13584                 {
    13585                     pHlp->pfnPrintf(pHlp, "    %RGp: %.*Rhxs - disassembly failure %Rrc\n",
    13586                                     GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
    13587                     break;
    13588                 }
    13589             }
    13590         }
    13591 
    13592         /*
    13593          * Then the native code:
    13594          */
    13595         pHlp->pfnPrintf(pHlp, "  Native code %p L %#x\n", paNative, cNative);
    13596         while (offNative < cNative)
    13597         {
    13598             PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
    13599 # ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    13600             uint32_t               cbInstr    = sizeof(paNative[0]);
    13601             int const              rc         = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
    13602             if (RT_SUCCESS(rc))
    13603             {
    13604 #  if defined(RT_ARCH_AMD64)
    13605                 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
    13606                 {
    13607                     uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
    13608                     if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
    13609                         pHlp->pfnPrintf(pHlp, "\n    %p: nop ; marker: call #%u to %s (%u args) - %s\n",
    13610                                         pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
    13611                                         g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
    13612                                         uInfo & 0x8000 ? "recompiled" : "todo");
    13613                     else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
    13614                         pHlp->pfnPrintf(pHlp, "    %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
    13615                     else
    13616                         pHlp->pfnPrintf(pHlp, "    %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
    13617                 }
    13618                 else
    13619 #  endif
    13620                 {
    13621 #  ifdef RT_ARCH_AMD64
    13622                     DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
    13623                                     DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
    13624                                     | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    13625                                     NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    13626 #  elif defined(RT_ARCH_ARM64)
    13627                     DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
    13628                                      DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
    13629                                      NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
    13630 #  else
    13631 #   error "Port me"
    13632 #  endif
    13633                     pHlp->pfnPrintf(pHlp, "    %p: %s\n", pNativeCur, szDisBuf);
    13634                 }
    13635             }
    13636             else
    13637             {
    13638 #  if defined(RT_ARCH_AMD64)
    13639                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %Rrc\n",
    13640                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
    13641 #  else
    13642                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
    13643 #  endif
    13644                 cbInstr = sizeof(paNative[0]);
    13645             }
    13646             offNative += cbInstr / sizeof(paNative[0]);
    13647 
    13648 # else  /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    13649             cs_insn *pInstr;
    13650             size_t   cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
    13651                                          (uintptr_t)pNativeCur, 1, &pInstr);
    13652             if (cInstrs > 0)
    13653             {
    13654                 Assert(cInstrs == 1);
    13655 #  if defined(RT_ARCH_AMD64)
    13656                 pHlp->pfnPrintf(pHlp, "    %p: %.*Rhxs %-7s %s\n",
    13657                                 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
    13658 #  else
    13659                 pHlp->pfnPrintf(pHlp, "    %p: %#010RX32 %-7s %s\n",
    13660                                 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
    13661 #  endif
    13662                 offNative += pInstr->size / sizeof(*pNativeCur);
    13663                 cs_free(pInstr, cInstrs);
    13664             }
    13665             else
    13666             {
    13667 #  if defined(RT_ARCH_AMD64)
    13668                 pHlp->pfnPrintf(pHlp, "    %p:  %.*Rhxs - disassembly failure %d\n",
    13669                                 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
    13670 #  else
    13671                 pHlp->pfnPrintf(pHlp, "    %p:  %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
    13672 #  endif
    13673                 offNative++;
    13674             }
    13675 # endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
    13676         }
    13677     }
    13678 
    13679 #ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
    13680     /* Cleanup. */
    13681     cs_close(&hDisasm);
    13682 #endif
    13683 }
    13684 
    13685 
    13686 /**
    13687  * Recompiles the given threaded TB into a native one.
    13688  *
    13689  * In case of failure the translation block will be returned as-is.
    13690  *
    13691  * @returns pTb.
    13692  * @param   pVCpu   The cross context virtual CPU structure of the calling
    13693  *                  thread.
    13694  * @param   pTb     The threaded translation to recompile to native.
    13695  */
    13696 DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
    13697 {
    13698     STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
    13699 
    13700     /*
    13701      * The first time thru, we allocate the recompiler state, the other times
    13702      * we just need to reset it before using it again.
    13703      */
    13704     PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
    13705     if (RT_LIKELY(pReNative))
    13706         iemNativeReInit(pReNative, pTb);
    13707     else
    13708     {
    13709         pReNative = iemNativeInit(pVCpu, pTb);
    13710         AssertReturn(pReNative, pTb);
    13711     }
    13712 
    13713     /*
    13714      * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
    13715      * for aborting if an error happens.
    13716      */
    13717     uint32_t        cCallsLeft = pTb->Thrd.cCalls;
    13718 #ifdef LOG_ENABLED
    13719     uint32_t const  cCallsOrg  = cCallsLeft;
    13720 #endif
    13721     uint32_t        off        = 0;
    13722     int             rc         = VINF_SUCCESS;
    13723     IEMNATIVE_TRY_SETJMP(pReNative, rc)
    13724     {
    13725         /*
    13726          * Emit prolog code (fixed).
    13727          */
    13728         off = iemNativeEmitProlog(pReNative, off);
    13729 
    13730         /*
    13731          * Convert the calls to native code.
    13732          */
    13733 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13734         int32_t              iGstInstr        = -1;
    13735 #endif
    13736 #ifndef VBOX_WITHOUT_RELEASE_STATISTICS
    13737         uint32_t             cThreadedCalls   = 0;
    13738         uint32_t             cRecompiledCalls = 0;
    13739 #endif
    13740         PCIEMTHRDEDCALLENTRY pCallEntry       = pTb->Thrd.paCalls;
    13741         pReNative->fExec                      = pTb->fFlags & IEMTB_F_IEM_F_MASK;
    13742         while (cCallsLeft-- > 0)
    13743         {
    13744             PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
    13745 
    13746             /*
    13747              * Debug info and assembly markup.
    13748              */
    13749             if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
    13750                 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
    13751 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13752             iemNativeDbgInfoAddNativeOffset(pReNative, off);
    13753             if (iGstInstr < (int32_t)pCallEntry->idxInstr)
    13754             {
    13755                 if (iGstInstr < (int32_t)pTb->cInstructions)
    13756                     iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
    13757                 else
    13758                     Assert(iGstInstr == pTb->cInstructions);
    13759                 iGstInstr = pCallEntry->idxInstr;
    13760             }
    13761             iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
    13762 #endif
    13763 #if defined(VBOX_STRICT)
    13764             off = iemNativeEmitMarker(pReNative, off,
    13765                                       RT_MAKE_U32((pTb->Thrd.cCalls - cCallsLeft - 1) | (pfnRecom ? 0x8000 : 0),
    13766                                                   pCallEntry->enmFunction));
    13767 #endif
    13768 #if defined(VBOX_STRICT)
    13769             iemNativeRegAssertSanity(pReNative);
    13770 #endif
    13771 
    13772             /*
    13773              * Actual work.
    13774              */
    13775             Log2(("%u[%u]: %s%s\n", pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr,
    13776                   g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)"));
    13777             if (pfnRecom) /** @todo stats on this.   */
    13778             {
    13779                 off = pfnRecom(pReNative, off, pCallEntry);
    13780                 STAM_REL_STATS({cRecompiledCalls++;});
    13781             }
    13782             else
    13783             {
    13784                 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
    13785                 STAM_REL_STATS({cThreadedCalls++;});
    13786             }
    13787             Assert(off <= pReNative->cInstrBufAlloc);
    13788             Assert(pReNative->cCondDepth == 0);
    13789 
    13790             /*
    13791              * Advance.
    13792              */
    13793             pCallEntry++;
    13794         }
    13795 
    13796         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
    13797         STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded,   cThreadedCalls);
    13798         if (!cThreadedCalls)
    13799             STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
    13800 
    13801         /*
    13802          * Emit the epilog code.
    13803          */
    13804         uint32_t idxReturnLabel;
    13805         off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
    13806 
    13807         /*
    13808          * Generate special jump labels.
    13809          */
    13810         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
    13811             off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
    13812         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
    13813             off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
    13814         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
    13815             off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
    13816         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
    13817             off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
    13818         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
    13819             off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
    13820         if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
    13821             off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
    13822     }
    13823     IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
    13824     {
    13825         Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
    13826         return pTb;
    13827     }
    13828     IEMNATIVE_CATCH_LONGJMP_END(pReNative);
    13829     Assert(off <= pReNative->cInstrBufAlloc);
    13830 
    13831     /*
    13832      * Make sure all labels has been defined.
    13833      */
    13834     PIEMNATIVELABEL const paLabels = pReNative->paLabels;
    13835 #ifdef VBOX_STRICT
    13836     uint32_t const        cLabels  = pReNative->cLabels;
    13837     for (uint32_t i = 0; i < cLabels; i++)
    13838         AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
    13839 #endif
    13840 
    13841     /*
    13842      * Allocate executable memory, copy over the code we've generated.
    13843      */
    13844     PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
    13845     if (pTbAllocator->pDelayedFreeHead)
    13846         iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
    13847 
    13848     PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
    13849     AssertReturn(paFinalInstrBuf, pTb);
    13850     memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
    13851 
    13852     /*
    13853      * Apply fixups.
    13854      */
    13855     PIEMNATIVEFIXUP const paFixups   = pReNative->paFixups;
    13856     uint32_t const        cFixups    = pReNative->cFixups;
    13857     for (uint32_t i = 0; i < cFixups; i++)
    13858     {
    13859         Assert(paFixups[i].off < off);
    13860         Assert(paFixups[i].idxLabel < cLabels);
    13861         AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
    13862                   ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
    13863                    paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
    13864         RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
    13865         switch (paFixups[i].enmType)
    13866         {
    13867 #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
    13868             case kIemNativeFixupType_Rel32:
    13869                 Assert(paFixups[i].off + 4 <= off);
    13870                 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13871                 continue;
    13872 
    13873 #elif defined(RT_ARCH_ARM64)
    13874             case kIemNativeFixupType_RelImm26At0:
    13875             {
    13876                 Assert(paFixups[i].off < off);
    13877                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13878                 Assert(offDisp >= -262144 && offDisp < 262144);
    13879                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
    13880                 continue;
    13881             }
    13882 
    13883             case kIemNativeFixupType_RelImm19At5:
    13884             {
    13885                 Assert(paFixups[i].off < off);
    13886                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13887                 Assert(offDisp >= -262144 && offDisp < 262144);
    13888                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
    13889                 continue;
    13890             }
    13891 
    13892             case kIemNativeFixupType_RelImm14At5:
    13893             {
    13894                 Assert(paFixups[i].off < off);
    13895                 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
    13896                 Assert(offDisp >= -8192 && offDisp < 8192);
    13897                 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
    13898                 continue;
    13899             }
    13900 
    13901 #endif
    13902             case kIemNativeFixupType_Invalid:
    13903             case kIemNativeFixupType_End:
    13904                 break;
    13905         }
    13906         AssertFailed();
    13907     }
    13908 
    13909     iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
    13910     STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
    13911 
    13912     /*
    13913      * Convert the translation block.
    13914      */
    13915     RTMemFree(pTb->Thrd.paCalls);
    13916     pTb->Native.paInstructions  = paFinalInstrBuf;
    13917     pTb->Native.cInstructions   = off;
    13918     pTb->fFlags                 = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
    13919 #ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
    13920     pTb->pDbgInfo               = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
    13921                                                       RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
    13922 #endif
    13923 
    13924     Assert(pTbAllocator->cThreadedTbs > 0);
    13925     pTbAllocator->cThreadedTbs -= 1;
    13926     pTbAllocator->cNativeTbs   += 1;
    13927     Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
    13928 
    13929 #ifdef LOG_ENABLED
    13930     /*
    13931      * Disassemble to the log if enabled.
    13932      */
    13933     if (LogIs3Enabled())
    13934     {
    13935         Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
    13936         iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
    13937 # ifdef DEBUG_bird
    13938         RTLogFlush(NULL);
    13939 # endif
    13940     }
    13941 #endif
    13942     /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
    13943 
    13944     STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
    13945     return pTb;
    13946 }
    13947 
     713/** @} */
     714
     715#endif /* !VMM_INCLUDED_SRC_include_IEMN8veRecompilerTlbLookup_h */
     716
Note: See TracChangeset for help on using the changeset viewer.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette