VirtualBox

Changeset 101484 in vbox for trunk/src/VBox/VMM


Ignore:
Timestamp:
Oct 18, 2023 1:32:17 AM (14 months ago)
Author:
vboxsync
Message:

VMM/IEM: Basic register allocator sketches that incorporates simple skipping of guest register value loads. Sketched out varable and argument managmenet. Start telling GDB our jitted code to help with backtraces. ++ bugref:10371

Location:
trunk/src/VBox/VMM
Files:
10 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstOneByte.cpp.h

    r101387 r101484  
    57645764    /*
    57655765     * If rm is denoting a register, no more instruction bytes.
     5766     *
     5767     * Note! Using IEMOP_MOV_SW_EV_REG_BODY here to specify different
     5768     *       IEM_CIMPL_F_XXX values depending on the CPU mode and target
     5769     *       register. This is a restriction of the current recompiler
     5770     *       approach.
    57665771     */
    57675772    if (IEM_IS_MODRM_REG_MODE(bRm))
    57685773    {
    5769         /** @todo Only set IEM_CIMPL_F_INHIBIT_SHADOW when it actually applies... */
    5770         IEM_MC_BEGIN(2, 0, 0, 0);
    5771         IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
    5772         IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0);
    5773         IEM_MC_ARG(uint16_t,      u16Value,          1);
    5774         IEM_MC_FETCH_GREG_U16(u16Value, IEM_GET_MODRM_RM(pVCpu, bRm));
    5775         if (iSRegArg >= X86_SREG_FS || !IEM_IS_32BIT_CODE(pVCpu))
    5776             IEM_MC_CALL_CIMPL_2(IEM_CIMPL_F_INHIBIT_SHADOW,                    iemCImpl_load_SReg, iSRegArg, u16Value);
     5774#define IEMOP_MOV_SW_EV_REG_BODY(a_fCImplFlags) \
     5775            IEM_MC_BEGIN(2, 0, 0, a_fCImplFlags); \
     5776            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX(); \
     5777            IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0); \
     5778            IEM_MC_ARG(uint16_t,      u16Value,          1); \
     5779            IEM_MC_FETCH_GREG_U16(u16Value, IEM_GET_MODRM_RM(pVCpu, bRm)); \
     5780            IEM_MC_CALL_CIMPL_2(a_fCImplFlags, iemCImpl_load_SReg, iSRegArg, u16Value); \
     5781            IEM_MC_END()
     5782
     5783        if (iSegReg == X86_SREG_SS)
     5784        {
     5785            if (IEM_IS_32BIT_CODE(pVCpu))
     5786            {
     5787                IEMOP_MOV_SW_EV_REG_BODY(IEM_CIMPL_F_INHIBIT_SHADOW | IEM_CIMPL_F_MODE);
     5788            }
     5789            else
     5790            {
     5791                IEMOP_MOV_SW_EV_REG_BODY(IEM_CIMPL_F_INHIBIT_SHADOW);
     5792            }
     5793        }
     5794        else if (iSegReg >= X86_SREG_FS || !IEM_IS_32BIT_CODE(pVCpu))
     5795        {
     5796            IEMOP_MOV_SW_EV_REG_BODY(0);
     5797        }
    57775798        else
    5778             IEM_MC_CALL_CIMPL_2(IEM_CIMPL_F_INHIBIT_SHADOW | IEM_CIMPL_F_MODE, iemCImpl_load_SReg, iSRegArg, u16Value);
    5779         IEM_MC_END();
     5799        {
     5800            IEMOP_MOV_SW_EV_REG_BODY(IEM_CIMPL_F_MODE);
     5801        }
     5802#undef IEMOP_MOV_SW_EV_REG_BODY
    57805803    }
    57815804    else
     
    57855808         * regardless of operand size prefixes.
    57865809         */
    5787         /** @todo Only set IEM_CIMPL_F_INHIBIT_SHADOW when it actually applies... */
    5788         IEM_MC_BEGIN(2, 1, 0, 0);
    5789         IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0);
    5790         IEM_MC_ARG(uint16_t,      u16Value,          1);
    5791         IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst);
    5792         IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0);
    5793         IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX();
    5794         IEM_MC_FETCH_MEM_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst);
    5795         if (iSRegArg >= X86_SREG_FS || !IEM_IS_32BIT_CODE(pVCpu))
    5796             IEM_MC_CALL_CIMPL_2(IEM_CIMPL_F_INHIBIT_SHADOW,                    iemCImpl_load_SReg, iSRegArg, u16Value);
     5810#define IEMOP_MOV_SW_EV_MEM_BODY(a_fCImplFlags) \
     5811            IEM_MC_BEGIN(2, 1, 0, a_fCImplFlags); \
     5812            IEM_MC_ARG_CONST(uint8_t, iSRegArg, iSegReg, 0); \
     5813            IEM_MC_ARG(uint16_t,      u16Value,          1); \
     5814            IEM_MC_LOCAL(RTGCPTR, GCPtrEffDst); \
     5815            IEM_MC_CALC_RM_EFF_ADDR(GCPtrEffDst, bRm, 0); \
     5816            IEMOP_HLP_DONE_DECODING_NO_LOCK_PREFIX(); \
     5817            IEM_MC_FETCH_MEM_U16(u16Value, pVCpu->iem.s.iEffSeg, GCPtrEffDst); \
     5818            IEM_MC_CALL_CIMPL_2(a_fCImplFlags, iemCImpl_load_SReg, iSRegArg, u16Value); \
     5819            IEM_MC_END()
     5820
     5821        if (iSegReg == X86_SREG_SS)
     5822        {
     5823            if (IEM_IS_32BIT_CODE(pVCpu))
     5824            {
     5825                IEMOP_MOV_SW_EV_MEM_BODY(IEM_CIMPL_F_INHIBIT_SHADOW | IEM_CIMPL_F_MODE);
     5826            }
     5827            else
     5828            {
     5829                IEMOP_MOV_SW_EV_MEM_BODY(IEM_CIMPL_F_INHIBIT_SHADOW);
     5830            }
     5831        }
     5832        else if (iSegReg >= X86_SREG_FS || !IEM_IS_32BIT_CODE(pVCpu))
     5833        {
     5834            IEMOP_MOV_SW_EV_MEM_BODY(0);
     5835        }
    57975836        else
    5798             IEM_MC_CALL_CIMPL_2(IEM_CIMPL_F_INHIBIT_SHADOW | IEM_CIMPL_F_MODE, iemCImpl_load_SReg, iSRegArg, u16Value);
    5799         IEM_MC_END();
     5837        {
     5838            IEMOP_MOV_SW_EV_MEM_BODY(IEM_CIMPL_F_MODE);
     5839        }
     5840#undef IEMOP_MOV_SW_EV_MEM_BODY
    58005841    }
    58015842}
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstPython.py

    r101387 r101484  
    20032003        ## IEM_MC_BEGIN: Argument count.
    20042004        self.cArgs        = -1;
     2005        ## IEM_MC_ARG, IEM_MC_ARG_CONST, IEM_MC_ARG_LOCAL_REF, IEM_MC_ARG_LOCAL_EFLAGS.
     2006        self.aoArgs       = []              # type: List[McStmtArg]
    20052007        ## IEM_MC_BEGIN: Locals count.
    20062008        self.cLocals      = -1;
     2009        ## IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, IEM_MC_ARG_LOCAL_EFLAGS.
     2010        self.aoLocals     = []              # type: List[McStmtVar]
    20072011        ## IEM_MC_BEGIN: IEM_MC_F_XXX dictionary
    20082012        self.dsMcFlags    = {}              # type: Dict[str, bool]
     
    20792083        """ IEM_MC_ARG """
    20802084        oSelf.checkStmtParamCount(sName, asParams, 3);
    2081         return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
     2085        oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[2]));
     2086        oSelf.aoArgs.append(oStmt);
     2087        return oStmt;
    20822088
    20832089    @staticmethod
     
    20852091        """ IEM_MC_ARG_CONST """
    20862092        oSelf.checkStmtParamCount(sName, asParams, 4);
    2087         return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
     2093        oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sConstValue = asParams[2]);
     2094        oSelf.aoArgs.append(oStmt);
     2095        return oStmt;
    20882096
    20892097    @staticmethod
     
    20912099        """ IEM_MC_ARG_LOCAL_REF """
    20922100        oSelf.checkStmtParamCount(sName, asParams, 4);
    2093         return McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
     2101        oStmt = McStmtArg(sName, asParams, asParams[0], asParams[1], int(asParams[3]), sRef = asParams[2], sRefType = 'local');
     2102        oSelf.aoArgs.append(oStmt);
     2103        return oStmt;
    20942104
    20952105    @staticmethod
     
    20982108        oSelf.checkStmtParamCount(sName, asParams, 3);
    20992109        # Note! We split this one up into IEM_MC_LOCAL_VAR and IEM_MC_ARG_LOCAL_REF.
    2100         return (
    2101             McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]),
    2102             McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
    2103                       'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local'),
    2104         );
     2110        oStmtLocal = McStmtVar('IEM_MC_LOCAL', ['uint32_t', asParams[1],], 'uint32_t', asParams[1]);
     2111        oSelf.aoLocals.append(oStmtLocal);
     2112        oStmtArg   = McStmtArg('IEM_MC_ARG_LOCAL_REF', ['uint32_t *', asParams[0], asParams[1], asParams[2]],
     2113                               'uint32_t *', asParams[0], int(asParams[2]), sRef = asParams[1], sRefType = 'local');
     2114        oSelf.aoArgs.append(oStmtArg);
     2115        return (oStmtLocal, oStmtArg,);
     2116
     2117    @staticmethod
     2118    def parseMcImplicitAvxAArgs(oSelf, sName, asParams):
     2119        """ IEM_MC_IMPLICIT_AVX_AIMPL_ARGS """
     2120        oSelf.checkStmtParamCount(sName, asParams, 0);
     2121        # Note! Translate to IEM_MC_ARG_CONST
     2122        oStmt = McStmtArg('IEM_MC_ARG_CONST', ['PX86XSAVEAREA', 'pXState', '&pVCpu->cpum.GstCtx.XState', '0'],
     2123                          'PX86XSAVEAREA', 'pXState', 0,  '&pVCpu->cpum.GstCtx.XState');
     2124        oSelf.aoArgs.append(oStmt);
     2125        return oStmt;
    21052126
    21062127    @staticmethod
     
    21082129        """ IEM_MC_LOCAL """
    21092130        oSelf.checkStmtParamCount(sName, asParams, 2);
    2110         return McStmtVar(sName, asParams, asParams[0], asParams[1]);
     2131        oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1]);
     2132        oSelf.aoLocals.append(oStmt);
     2133        return oStmt;
    21112134
    21122135    @staticmethod
     
    21142137        """ IEM_MC_LOCAL_CONST """
    21152138        oSelf.checkStmtParamCount(sName, asParams, 3);
    2116         return McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
     2139        oStmt = McStmtVar(sName, asParams, asParams[0], asParams[1], sConstValue = asParams[2]);
     2140        oSelf.aoLocals.append(oStmt);
     2141        return oStmt;
    21172142
    21182143    @staticmethod
     
    28812906    'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80':                   (McBlock.parseMcGenericCond,       True,  False, ),
    28822907    'IEM_MC_IF_TWO_FPUREGS_NOT_EMPTY_REF_R80_FIRST':             (McBlock.parseMcGenericCond,       True,  False, ),
    2883     'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS':                            (McBlock.parseMcGeneric,           False, False, ),
     2908    'IEM_MC_IMPLICIT_AVX_AIMPL_ARGS':                            (McBlock.parseMcImplicitAvxAArgs,  False, False, ),
    28842909    'IEM_MC_INT_CLEAR_ZMM_256_UP':                               (McBlock.parseMcGeneric,           True,  False, ),
    28852910    'IEM_MC_LOCAL':                                              (McBlock.parseMcLocal,             False, False, ),
  • trunk/src/VBox/VMM/VMMAll/IEMAllInstTwoByte0f.cpp.h

    r101448 r101484  
    1061510615     *        assuming that it will be ignored. Would be nice to have a few
    1061610616     *        test for this. */
     10617
     10618    /** @todo There should be no difference in the behaviour whether REX.W is
     10619     *        present or not... */
     10620
    1061710621    /*
    1061810622     * If rm is denoting a register, no more instruction bytes.
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8vePython.py

    r101387 r101484  
    5252
    5353    'IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16':               (None, True,  False, ),
    54     'IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32':               (None, True,  False, ), # True ),
     54    'IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32':               (None, True,  True, ),
    5555    'IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64':               (None, True,  False, ),
    5656
  • trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp

    r101387 r101484  
    77 *      - Flow  (LogFlow) : ...
    88 *      - Level 2  (Log2) : ...
    9  *      - Level 3  (Log3) : ...
     9 *      - Level 3  (Log3) : Disassemble native code after recompiling.
    1010 *      - Level 4  (Log4) : ...
    1111 *      - Level 5  (Log5) : ...
     
    1616 *      - Level 10 (Log10): ...
    1717 *      - Level 11 (Log11): ...
    18  *      - Level 12 (Log12): ...
     18 *      - Level 12 (Log12): Register allocator
    1919 */
    2020
     
    4545*   Header Files                                                                                                                 *
    4646*********************************************************************************************************************************/
    47 #define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
     47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
    4848#define IEM_WITH_OPAQUE_DECODER_STATE
    4949#define VMCPU_INCL_CPUM_GST_CTX
     
    7878extern "C" void  __deregister_frame(const void *pvFde);
    7979# else
     80#  ifdef DEBUG_bird /** @todo not thread safe yet */
     81#   define IEMNATIVE_USE_GDB_JIT
     82#  endif
     83#  ifdef IEMNATIVE_USE_GDB_JIT
     84#   include <iprt/critsect.h>
     85#   include <iprt/once.h>
     86#   include <iprt/formats/elf64.h>
     87#  endif
    8088extern "C" void  __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
    8189extern "C" void *__deregister_frame_info(void *pvBegin);           /* (returns pvObj from __register_frame_info call) */
     
    129137
    130138#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
     139# ifdef IEMNATIVE_USE_GDB_JIT
     140#   define IEMNATIVE_USE_GDB_JIT_ET_DYN
     141
     142/** GDB JIT: Code entry.   */
     143typedef struct GDBJITCODEENTRY
     144{
     145    struct GDBJITCODEENTRY *pNext;
     146    struct GDBJITCODEENTRY *pPrev;
     147    uint8_t                *pbSymFile;
     148    uint64_t                cbSymFile;
     149} GDBJITCODEENTRY;
     150
     151/** GDB JIT: Actions. */
     152typedef enum GDBJITACTIONS : uint32_t
     153{
     154    kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
     155} GDBJITACTIONS;
     156
     157/** GDB JIT: Descriptor. */
     158typedef struct GDBJITDESCRIPTOR
     159{
     160    uint32_t            uVersion;
     161    GDBJITACTIONS       enmAction;
     162    GDBJITCODEENTRY    *pRelevant;
     163    GDBJITCODEENTRY    *pHead;
     164    /** Our addition: */
     165    GDBJITCODEENTRY    *pTail;
     166} GDBJITDESCRIPTOR;
     167
     168/** GDB JIT: Our simple symbol file data. */
     169typedef struct GDBJITSYMFILE
     170{
     171    Elf64_Ehdr          EHdr;
     172#  ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
     173    Elf64_Shdr          aShdrs[5];
     174#  else
     175    Elf64_Shdr          aShdrs[6];
     176    Elf64_Phdr          aPhdrs[3];
     177#  endif
     178    /** The dwarf ehframe data for the chunk. */
     179    uint8_t             abEhFrame[512];
     180    char                szzStrTab[128];
     181    Elf64_Sym           aSymbols[1];
     182#  ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
     183    Elf64_Dyn           aDyn[6];
     184#  endif
     185} GDBJITSYMFILE;
     186
     187extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
     188extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
     189
     190/** Init once for g_IemNativeGdbJitLock. */
     191static RTONCE     g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
     192/** Init once for the critical section. */
     193static RTCRITSECT g_IemNativeGdbJitLock;
     194
     195/** GDB reads the info here. */
     196GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
     197
     198/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
     199DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
     200{
     201    ASMNopPause();
     202}
     203
     204/** @callback_method_impl{FNRTONCE} */
     205static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
     206{
     207    RT_NOREF(pvUser);
     208    return RTCritSectInit(&g_IemNativeGdbJitLock);
     209}
     210
     211
     212# endif /* IEMNATIVE_USE_GDB_JIT */
     213
    131214/**
    132215 * Per-chunk unwind info for non-windows hosts.
     
    138221    uintptr_t               offFda;
    139222# else
    140     /** struct object storage area. */
     223    /** 'struct object' storage area. */
    141224    uint8_t                 abObject[1024];
    142225# endif
     226#  ifdef IEMNATIVE_USE_GDB_JIT
     227#   if 0
     228    /** The GDB JIT 'symbol file' data. */
     229    GDBJITSYMFILE           GdbJitSymFile;
     230#   endif
     231    /** The GDB JIT list entry. */
     232    GDBJITCODEENTRY         GdbJitEntry;
     233#  endif
    143234    /** The dwarf ehframe data for the chunk. */
    144235    uint8_t                 abEhFrame[512];
     
    841932#  endif
    842933
     934#  ifdef IEMNATIVE_USE_GDB_JIT
     935    /*
     936     * Now for telling GDB about this (experimental).
     937     *
     938     * This seems to work best with ET_DYN.
     939     */
     940    unsigned const cbNeeded        = sizeof(GDBJITSYMFILE);
     941#   ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
     942    unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
     943    GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
     944#   else
     945    unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
     946                                   - pExecMemAllocator->cbHeapBlockHdr;
     947    GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
     948#   endif
     949    AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
     950    unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
     951
     952    RT_ZERO(*pSymFile);
     953    /* The ELF header: */
     954    pSymFile->EHdr.e_ident[0]           = ELFMAG0;
     955    pSymFile->EHdr.e_ident[1]           = ELFMAG1;
     956    pSymFile->EHdr.e_ident[2]           = ELFMAG2;
     957    pSymFile->EHdr.e_ident[3]           = ELFMAG3;
     958    pSymFile->EHdr.e_ident[EI_VERSION]  = EV_CURRENT;
     959    pSymFile->EHdr.e_ident[EI_CLASS]    = ELFCLASS64;
     960    pSymFile->EHdr.e_ident[EI_DATA]     = ELFDATA2LSB;
     961    pSymFile->EHdr.e_ident[EI_OSABI]    = ELFOSABI_NONE;
     962#   ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
     963    pSymFile->EHdr.e_type               = ET_DYN;
     964#   else
     965    pSymFile->EHdr.e_type               = ET_REL;
     966#   endif
     967#   ifdef RT_ARCH_AMD64
     968    pSymFile->EHdr.e_machine            = EM_AMD64;
     969#   elif defined(RT_ARCH_ARM64)
     970    pSymFile->EHdr.e_machine            = EM_AARCH64;
     971#   else
     972#    error "port me"
     973#   endif
     974    pSymFile->EHdr.e_version            = 1; /*?*/
     975    pSymFile->EHdr.e_entry              = 0;
     976#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
     977    pSymFile->EHdr.e_phoff              = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
     978#   else
     979    pSymFile->EHdr.e_phoff              = 0;
     980#   endif
     981    pSymFile->EHdr.e_shoff              = sizeof(pSymFile->EHdr);
     982    pSymFile->EHdr.e_flags              = 0;
     983    pSymFile->EHdr.e_ehsize             = sizeof(pSymFile->EHdr);
     984#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
     985    pSymFile->EHdr.e_phentsize          = sizeof(pSymFile->aPhdrs[0]);
     986    pSymFile->EHdr.e_phnum              = RT_ELEMENTS(pSymFile->aPhdrs);
     987#   else
     988    pSymFile->EHdr.e_phentsize          = 0;
     989    pSymFile->EHdr.e_phnum              = 0;
     990#   endif
     991    pSymFile->EHdr.e_shentsize          = sizeof(pSymFile->aShdrs[0]);
     992    pSymFile->EHdr.e_shnum              = RT_ELEMENTS(pSymFile->aShdrs);
     993    pSymFile->EHdr.e_shstrndx           = 0; /* set later */
     994
     995    uint32_t offStrTab = 0;
     996#define APPEND_STR(a_szStr) do { \
     997        memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
     998        offStrTab += sizeof(a_szStr); \
     999    } while (0)
     1000    /* Section header #0: NULL */
     1001    unsigned i = 0;
     1002    APPEND_STR("");
     1003    RT_ZERO(pSymFile->aShdrs[i]);
     1004    i++;
     1005
     1006    /* Section header: .eh_frame */
     1007    pSymFile->aShdrs[i].sh_name         = offStrTab;
     1008    APPEND_STR(".eh_frame");
     1009    pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
     1010    pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
     1011#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
     1012    pSymFile->aShdrs[i].sh_offset
     1013        = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
     1014#   else
     1015    pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->abEhFrame[0];
     1016    pSymFile->aShdrs[i].sh_offset       = 0;
     1017#   endif
     1018
     1019    pSymFile->aShdrs[i].sh_size         = sizeof(pEhFrame->abEhFrame);
     1020    pSymFile->aShdrs[i].sh_link         = 0;
     1021    pSymFile->aShdrs[i].sh_info         = 0;
     1022    pSymFile->aShdrs[i].sh_addralign    = 1;
     1023    pSymFile->aShdrs[i].sh_entsize      = 0;
     1024    memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
     1025    i++;
     1026
     1027    /* Section header: .shstrtab */
     1028    unsigned const iShStrTab = i;
     1029    pSymFile->EHdr.e_shstrndx           = iShStrTab;
     1030    pSymFile->aShdrs[i].sh_name         = offStrTab;
     1031    APPEND_STR(".shstrtab");
     1032    pSymFile->aShdrs[i].sh_type         = SHT_STRTAB;
     1033    pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
     1034#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
     1035    pSymFile->aShdrs[i].sh_offset
     1036        = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
     1037#   else
     1038    pSymFile->aShdrs[i].sh_addr         = (uintptr_t)&pSymFile->szzStrTab[0];
     1039    pSymFile->aShdrs[i].sh_offset       = 0;
     1040#   endif
     1041    pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->szzStrTab);
     1042    pSymFile->aShdrs[i].sh_link         = 0;
     1043    pSymFile->aShdrs[i].sh_info         = 0;
     1044    pSymFile->aShdrs[i].sh_addralign    = 1;
     1045    pSymFile->aShdrs[i].sh_entsize      = 0;
     1046    i++;
     1047
     1048    /* Section header: .symbols */
     1049    pSymFile->aShdrs[i].sh_name         = offStrTab;
     1050#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
     1051    APPEND_STR(".dynsym");
     1052    pSymFile->aShdrs[i].sh_type         = SHT_DYNSYM;
     1053#   else
     1054    APPEND_STR(".symtab");
     1055    pSymFile->aShdrs[i].sh_type         = SHT_SYMTAB;
     1056#   endif
     1057    pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
     1058    pSymFile->aShdrs[i].sh_offset
     1059        = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
     1060    pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aSymbols);
     1061    pSymFile->aShdrs[i].sh_link         = iShStrTab;
     1062    pSymFile->aShdrs[i].sh_info         = RT_ELEMENTS(pSymFile->aSymbols);
     1063    pSymFile->aShdrs[i].sh_addralign    = sizeof(pSymFile->aSymbols[0].st_value);
     1064    pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aSymbols[0]);
     1065    i++;
     1066
     1067#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
     1068    /* Section header: .dynamic */
     1069    pSymFile->aShdrs[i].sh_name         = offStrTab;
     1070    APPEND_STR(".dynamic");
     1071    pSymFile->aShdrs[i].sh_type         = SHT_DYNAMIC;
     1072    pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC;
     1073    pSymFile->aShdrs[i].sh_offset
     1074        = pSymFile->aShdrs[i].sh_addr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
     1075    pSymFile->aShdrs[i].sh_size         = sizeof(pSymFile->aDyn);
     1076    pSymFile->aShdrs[i].sh_link         = iShStrTab;
     1077    pSymFile->aShdrs[i].sh_info         = 0;
     1078    pSymFile->aShdrs[i].sh_addralign    = 1;
     1079    pSymFile->aShdrs[i].sh_entsize      = sizeof(pSymFile->aDyn[0]);
     1080    i++;
     1081#   endif
     1082
     1083    /* Section header: .text */
     1084    pSymFile->aShdrs[i].sh_name         = offStrTab;
     1085    APPEND_STR(".text");
     1086    pSymFile->aShdrs[i].sh_type         = SHT_PROGBITS;
     1087    pSymFile->aShdrs[i].sh_flags        = SHF_ALLOC | SHF_EXECINSTR;
     1088#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
     1089    pSymFile->aShdrs[i].sh_offset
     1090        = pSymFile->aShdrs[i].sh_addr   = sizeof(GDBJITSYMFILE);
     1091#   else
     1092    pSymFile->aShdrs[i].sh_addr         = (uintptr_t)(pSymFile + 1);
     1093    pSymFile->aShdrs[i].sh_offset       = 0;
     1094#   endif
     1095    pSymFile->aShdrs[i].sh_size         = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
     1096    pSymFile->aShdrs[i].sh_link         = 0;
     1097    pSymFile->aShdrs[i].sh_info         = 0;
     1098    pSymFile->aShdrs[i].sh_addralign    = 1;
     1099    pSymFile->aShdrs[i].sh_entsize      = 0;
     1100    i++;
     1101
     1102    Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
     1103
     1104#   if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
     1105    /*
     1106     * The program headers:
     1107     */
     1108    /* Headers and whatnot up to .dynamic: */
     1109    i = 0;
     1110    pSymFile->aPhdrs[i].p_type          = PT_LOAD;
     1111    pSymFile->aPhdrs[i].p_flags         = PF_X | PF_R;
     1112    pSymFile->aPhdrs[i].p_offset
     1113        = pSymFile->aPhdrs[i].p_vaddr
     1114        = pSymFile->aPhdrs[i].p_paddr   = 0;
     1115    pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
     1116        = pSymFile->aPhdrs[i].p_memsz   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
     1117    pSymFile->aPhdrs[i].p_align         = HOST_PAGE_SIZE;
     1118    i++;
     1119    /* .dynamic */
     1120    pSymFile->aPhdrs[i].p_type          = PT_DYNAMIC;
     1121    pSymFile->aPhdrs[i].p_flags         = PF_R;
     1122    pSymFile->aPhdrs[i].p_offset
     1123        = pSymFile->aPhdrs[i].p_vaddr
     1124        = pSymFile->aPhdrs[i].p_paddr   = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
     1125    pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
     1126        = pSymFile->aPhdrs[i].p_memsz   = sizeof(pSymFile->aDyn);
     1127    pSymFile->aPhdrs[i].p_align         = sizeof(pSymFile->aDyn[0].d_tag);
     1128    i++;
     1129    /* The rest of the chunk. */
     1130    pSymFile->aPhdrs[i].p_type          = PT_LOAD;
     1131    pSymFile->aPhdrs[i].p_flags         = PF_X | PF_R;
     1132    pSymFile->aPhdrs[i].p_offset
     1133        = pSymFile->aPhdrs[i].p_vaddr
     1134        = pSymFile->aPhdrs[i].p_paddr   = sizeof(GDBJITSYMFILE);
     1135    pSymFile->aPhdrs[i].p_filesz         /* Size of segment in file. */
     1136        = pSymFile->aPhdrs[i].p_memsz   = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
     1137    pSymFile->aPhdrs[i].p_align         = 1;
     1138    i++;
     1139
     1140    Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
     1141
     1142    /* The dynamic section: */
     1143    i = 0;
     1144    pSymFile->aDyn[i].d_tag             = DT_SONAME;
     1145    pSymFile->aDyn[i].d_un.d_val        = offStrTab;
     1146    APPEND_STR("iem-native.so");
     1147    i++;
     1148    pSymFile->aDyn[i].d_tag             = DT_STRTAB;
     1149    pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
     1150    i++;
     1151    pSymFile->aDyn[i].d_tag             = DT_STRSZ;
     1152    pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->szzStrTab);
     1153    i++;
     1154    pSymFile->aDyn[i].d_tag             = DT_SYMTAB;
     1155    pSymFile->aDyn[i].d_un.d_ptr        = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
     1156    i++;
     1157    pSymFile->aDyn[i].d_tag             = DT_SYMENT;
     1158    pSymFile->aDyn[i].d_un.d_val        = sizeof(pSymFile->aSymbols[0]);
     1159    i++;
     1160    pSymFile->aDyn[i].d_tag             = DT_NULL;
     1161    i++;
     1162    Assert(i == RT_ELEMENTS(pSymFile->aDyn));
     1163#   endif
     1164
     1165    /* Symbol table: */
     1166    i = 0;
     1167    pSymFile->aSymbols[i].st_name       = offStrTab;
     1168    APPEND_STR("iem_exec_chunk");
     1169    pSymFile->aSymbols[i].st_shndx      = SHN_ABS;
     1170    pSymFile->aSymbols[i].st_value      = (uintptr_t)pvChunk;
     1171    pSymFile->aSymbols[i].st_size       = pExecMemAllocator->cbChunk;
     1172    pSymFile->aSymbols[i].st_info       = ELF64_ST_INFO(STB_LOCAL, STT_FUNC);
     1173    pSymFile->aSymbols[i].st_other      = 0 /* STV_DEFAULT */;
     1174    i++;
     1175    Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
     1176    Assert(offStrTab < sizeof(pSymFile->szzStrTab));
     1177
     1178    /* The GDB JIT entry: */
     1179    pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
     1180#   if 1
     1181    pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
     1182#   else
     1183    pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
     1184#   endif
     1185
     1186    RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
     1187    RTCritSectEnter(&g_IemNativeGdbJitLock);
     1188    pEhFrame->GdbJitEntry.pNext      = NULL;
     1189    pEhFrame->GdbJitEntry.pPrev      = __jit_debug_descriptor.pTail;
     1190    if (__jit_debug_descriptor.pTail)
     1191        __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
     1192    else
     1193        __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
     1194    __jit_debug_descriptor.pTail     = &pEhFrame->GdbJitEntry;
     1195    __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
     1196
     1197    /* Notify GDB: */
     1198    __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
     1199    __jit_debug_register_code();
     1200    __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
     1201    RTCritSectLeave(&g_IemNativeGdbJitLock);
     1202
     1203    RT_BREAKPOINT();
     1204#  endif
     1205
    8431206    return VINF_SUCCESS;
    8441207}
     
    11321495    pReNative->cFixups   = 0;
    11331496    pReNative->pTbOrg    = pTb;
     1497
     1498    pReNative->bmHstRegs              = IEMNATIVE_REG_FIXED_MASK
     1499#if IEMNATIVE_HST_GREG_COUNT < 32
     1500                                      | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
     1501#endif
     1502                                      ;
     1503    pReNative->bmHstRegsWithGstShadow = 0;
     1504    pReNative->bmGstRegShadows        = 0;
     1505    pReNative->bmVars                 = 0;
     1506    pReNative->u64ArgVars             = UINT64_MAX;
     1507
     1508    /* Full host register reinit: */
     1509    for (unsigned i = 0; i < RT_ELEMENTS(pReNative->aHstRegs); i++)
     1510    {
     1511        pReNative->aHstRegs[i].fGstRegShadows = 0;
     1512        pReNative->aHstRegs[i].enmWhat        = kIemNativeWhat_Invalid;
     1513        pReNative->aHstRegs[i].idxVar         = UINT8_MAX;
     1514    }
     1515
     1516    uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
     1517                   & ~(  RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
     1518#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
     1519                       | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
     1520#endif
     1521#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
     1522                       | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
     1523#endif
     1524                      );
     1525    for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
     1526    {
     1527        fRegs &= ~RT_BIT_32(idxReg);
     1528        pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
     1529    }
     1530
     1531    pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat     = kIemNativeWhat_pVCpuFixed;
     1532#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
     1533    pReNative->aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat   = kIemNativeWhat_pCtxFixed;
     1534#endif
     1535#ifdef IEMNATIVE_REG_FIXED_TMP0
     1536    pReNative->aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat       = kIemNativeWhat_FixedTmp;
     1537#endif
    11341538    return pReNative;
    11351539}
     
    13451749
    13461750/**
     1751 * Register parameter indexes (indexed by argument number).
     1752 */
     1753DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
     1754{
     1755    IEMNATIVE_CALL_ARG0_GREG,
     1756    IEMNATIVE_CALL_ARG1_GREG,
     1757    IEMNATIVE_CALL_ARG2_GREG,
     1758    IEMNATIVE_CALL_ARG3_GREG,
     1759#if defined(IEMNATIVE_CALL_ARG4_GREG)
     1760    IEMNATIVE_CALL_ARG4_GREG,
     1761# if defined(IEMNATIVE_CALL_ARG5_GREG)
     1762    IEMNATIVE_CALL_ARG5_GREG,
     1763#  if defined(IEMNATIVE_CALL_ARG6_GREG)
     1764    IEMNATIVE_CALL_ARG6_GREG,
     1765#   if defined(IEMNATIVE_CALL_ARG7_GREG)
     1766    IEMNATIVE_CALL_ARG7_GREG,
     1767#   endif
     1768#  endif
     1769# endif
     1770#endif
     1771};
     1772
     1773/**
     1774 * Call register masks indexed by argument count.
     1775 */
     1776DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
     1777{
     1778    0,
     1779    RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
     1780    RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
     1781    RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
     1782      RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
     1783    | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
     1784#if defined(IEMNATIVE_CALL_ARG4_GREG)
     1785      RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
     1786    | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
     1787# if defined(IEMNATIVE_CALL_ARG5_GREG)
     1788      RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
     1789    | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
     1790#  if defined(IEMNATIVE_CALL_ARG6_GREG)
     1791      RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
     1792    | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
     1793    | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
     1794#   if defined(IEMNATIVE_CALL_ARG7_GREG)
     1795      RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
     1796    | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
     1797    | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
     1798#   endif
     1799#  endif
     1800# endif
     1801#endif
     1802};
     1803
     1804
     1805DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
     1806                                                     IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
     1807{
     1808    pReNative->bmHstRegs |= RT_BIT_32(idxReg);
     1809
     1810    pReNative->aHstRegs[idxReg].enmWhat        = enmWhat;
     1811    pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
     1812    pReNative->aHstRegs[idxReg].idxVar         = idxVar;
     1813    return (uint8_t)idxReg;
     1814}
     1815
     1816
     1817/**
     1818 * Locate a register, possibly freeing one up.
     1819 *
     1820 * This ASSUMES the caller has done the minimal/optimal allocation checks and
     1821 * failed.
     1822 */
     1823static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fAllowVolatile) RT_NOEXCEPT
     1824{
     1825    uint32_t fRegMask = fAllowVolatile
     1826                      ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
     1827                      : IEMNATIVE_HST_GREG_MASK & ~(IEMNATIVE_REG_FIXED_MASK | IEMNATIVE_CALL_VOLATILE_GREG_MASK);
     1828
     1829    /*
     1830     * Try a freed register that's shadowing a guest register
     1831     */
     1832    uint32_t fRegs = ~pReNative->bmHstRegs & fRegMask;
     1833    if (fRegs)
     1834    {
     1835        /** @todo pick better here:    */
     1836        unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
     1837
     1838        Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
     1839        Assert(   (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
     1840               == pReNative->aHstRegs[idxReg].fGstRegShadows);
     1841        Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
     1842
     1843        pReNative->bmGstRegShadows        &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
     1844        pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
     1845        pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
     1846        return idxReg;
     1847    }
     1848
     1849    /*
     1850     * Try free up a variable that's in a register.
     1851     *
     1852     * We do two rounds here, first evacuating variables we don't need to be
     1853     * saved on the stack, then in the second round move things to the stack.
     1854     */
     1855    for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
     1856    {
     1857        uint32_t fVars = pReNative->bmVars;
     1858        while (fVars)
     1859        {
     1860            uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
     1861            uint8_t const  idxReg = pReNative->aVars[idxVar].idxReg;
     1862            if (   idxReg < RT_ELEMENTS(pReNative->aHstRegs)
     1863                && (RT_BIT_32(idxReg) & fRegMask)
     1864                && (  iLoop == 0
     1865                    ? pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack
     1866                    : pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack))
     1867            {
     1868                Assert(pReNative->bmHstRegs & RT_BIT_32(idxReg));
     1869                Assert(   (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxReg].fGstRegShadows)
     1870                       == pReNative->aHstRegs[idxReg].fGstRegShadows);
     1871                Assert(   RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
     1872                       == RT_BOOL(pReNative->aHstRegs[idxReg].fGstRegShadows));
     1873
     1874                if (pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
     1875                {
     1876                    AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT8_MAX);
     1877                    uint32_t off = *poff;
     1878                    *poff = off = iemNativeEmitStoreGprByBp(pReNative, off,
     1879                                                              pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t)
     1880                                                            - IEMNATIVE_FP_OFF_STACK_VARS,
     1881                                                            idxReg);
     1882                    AssertReturn(off != UINT32_MAX, UINT8_MAX);
     1883                }
     1884
     1885                pReNative->aVars[idxVar].idxReg    = UINT8_MAX;
     1886                pReNative->bmGstRegShadows        &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
     1887                pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
     1888                pReNative->bmHstRegs              &= ~RT_BIT_32(idxReg);
     1889                return idxReg;
     1890            }
     1891            fVars &= ~RT_BIT_32(idxVar);
     1892        }
     1893    }
     1894
     1895    AssertFailedReturn(UINT8_MAX);
     1896}
     1897
     1898
     1899/**
     1900 * Moves a variable to a different register or spills it onto the stack.
     1901 *
     1902 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
     1903 * kinds can easily be recreated if needed later.
     1904 *
     1905 * @returns The new code buffer position, UINT32_MAX on failure.
     1906 * @param   pReNative       The native recompile state.
     1907 * @param   off             The current code buffer position.
     1908 * @param   idxVar          The variable index.
     1909 * @param   fForbiddenRegs  Mask of the forbidden registers.  Defaults to
     1910 *                          call-volatile registers.
     1911 */
     1912static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
     1913                                                uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
     1914{
     1915    Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
     1916    Assert(pReNative->aVars[idxVar].enmKind == kIemNativeVarKind_Stack);
     1917
     1918    uint8_t const idxRegOld = pReNative->aVars[idxVar].idxReg;
     1919    Assert(idxRegOld < RT_ELEMENTS(pReNative->aHstRegs));
     1920    Assert(pReNative->bmHstRegs & RT_BIT_32(idxRegOld));
     1921    Assert(pReNative->aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
     1922    Assert(   (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegOld].fGstRegShadows)
     1923           == pReNative->aHstRegs[idxRegOld].fGstRegShadows);
     1924    Assert(   RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
     1925           == RT_BOOL(pReNative->aHstRegs[idxRegOld].fGstRegShadows));
     1926
     1927
     1928    /** @todo Add statistics on this.*/
     1929    /** @todo Implement basic variable liveness analysis (python) so variables
     1930     * can be freed immediately once no longer used.  This has the potential to
     1931     * be trashing registers and stack for dead variables. */
     1932
     1933    /*
     1934     * First try move it to a different register, as that's cheaper.
     1935     */
     1936    fForbiddenRegs |= RT_BIT_32(idxRegOld);
     1937    fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
     1938    uint32_t fRegs = ~pReNative->bmHstRegs & ~fForbiddenRegs;
     1939    if (fRegs)
     1940    {
     1941        /* Avoid using shadow registers, if possible. */
     1942        if (fRegs & ~pReNative->bmHstRegsWithGstShadow)
     1943            fRegs &= ~pReNative->bmHstRegsWithGstShadow;
     1944        unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
     1945
     1946        uint64_t fGstRegShadows = pReNative->aHstRegs[idxRegOld].fGstRegShadows;
     1947        pReNative->aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
     1948        pReNative->aHstRegs[idxRegNew].enmWhat        = kIemNativeWhat_Var;
     1949        pReNative->aHstRegs[idxRegNew].idxVar         = idxVar;
     1950        if (fGstRegShadows)
     1951        {
     1952            pReNative->bmHstRegsWithGstShadow |= RT_BIT_32(idxRegNew);
     1953            while (fGstRegShadows)
     1954            {
     1955                unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows);
     1956                fGstRegShadows &= ~RT_BIT_64(idxGstReg);
     1957
     1958                Assert(pReNative->aidxGstRegShadows[idxGstReg] == idxRegOld);
     1959                pReNative->aidxGstRegShadows[idxGstReg] = idxRegNew;
     1960            }
     1961        }
     1962
     1963        pReNative->aVars[idxVar].idxReg = (uint8_t)idxRegNew;
     1964        pReNative->bmHstRegs           |= RT_BIT_32(idxRegNew);
     1965    }
     1966    /*
     1967     * Otherwise we must spill the register onto the stack.
     1968     */
     1969    else
     1970    {
     1971        AssertReturn(pReNative->aVars[idxVar].idxStackSlot != UINT8_MAX, UINT32_MAX);
     1972        off = iemNativeEmitStoreGprByBp(pReNative, off,
     1973                                        pReNative->aVars[idxVar].idxStackSlot * sizeof(uint64_t) - IEMNATIVE_FP_OFF_STACK_VARS,
     1974                                        idxRegOld);
     1975        AssertReturn(off != UINT32_MAX, UINT32_MAX);
     1976
     1977        pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
     1978        pReNative->bmGstRegShadows        &= ~pReNative->aHstRegs[idxRegOld].fGstRegShadows;
     1979    }
     1980
     1981    pReNative->bmHstRegs &= ~RT_BIT_32(idxRegOld);
     1982    pReNative->aHstRegs[idxRegOld].fGstRegShadows = 0;
     1983    return off;
     1984}
     1985
     1986
     1987/**
     1988 * Allocates a temporary host general purpose register.
     1989 *
     1990 * This may emit code to save register content onto the stack in order to free
     1991 * up a register.
     1992 *
     1993 * @returns The host register number, UINT8_MAX on failure.
     1994 * @param   pReNative       The native recompile state.
     1995 * @param   poff            Pointer to the variable with the code buffer position.
     1996 *                          This will be update if we need to move a variable from
     1997 *                          register to stack in order to satisfy the request.
     1998 * @param   fPreferVolatile Wheter to prefer volatile over non-volatile
     1999 *                          registers (@c true, default) or the other way around
     2000 *                          (@c false, for iemNativeRegAllocTmpForGuestReg()).
     2001 */
     2002DECLHIDDEN(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
     2003                                         bool fPreferVolatile /*= true*/) RT_NOEXCEPT
     2004{
     2005    /*
     2006     * Try find a completely unused register, preferably a call-volatile one.
     2007     */
     2008    uint8_t  idxReg;
     2009    uint32_t fRegs = ~pReNative->bmHstRegs
     2010                   & ~pReNative->bmHstRegsWithGstShadow
     2011                   & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
     2012    if (fRegs)
     2013    {
     2014fPreferVolatile = false; /// @todo DO NOT COMMIT THIS
     2015        if (fPreferVolatile)
     2016            idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
     2017                                                ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
     2018        else
     2019            idxReg = (uint8_t)ASMBitFirstSetU32(  fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
     2020                                                ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
     2021        Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
     2022        Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
     2023    }
     2024    else
     2025    {
     2026        idxReg = iemNativeRegAllocFindFree(pReNative, poff, true /*fAllowVolatile*/);
     2027        AssertReturn(idxReg != UINT8_MAX, UINT8_MAX);
     2028    }
     2029    return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
     2030}
     2031
     2032
     2033/**
     2034 * Info about shadowed guest register values.
     2035 * @see IEMNATIVEGSTREG
     2036 */
     2037static struct
     2038{
     2039    /** Offset in VMCPU. */
     2040    uint32_t    off;
     2041    /** The field size. */
     2042    uint8_t     cb;
     2043    /** Name (for logging). */
     2044    const char *pszName;
     2045} const g_aGstShadowInfo[] =
     2046{
     2047#define CPUMCTX_OFF_AND_SIZE(a_Reg) RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
     2048    /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */  { CPUMCTX_OFF_AND_SIZE(rax),                "rax", },
     2049    /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */  { CPUMCTX_OFF_AND_SIZE(rcx),                "rcx", },
     2050    /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */  { CPUMCTX_OFF_AND_SIZE(rdx),                "rdx", },
     2051    /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */  { CPUMCTX_OFF_AND_SIZE(rbx),                "rbx", },
     2052    /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */  { CPUMCTX_OFF_AND_SIZE(rsp),                "rsp", },
     2053    /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */  { CPUMCTX_OFF_AND_SIZE(rbp),                "rbp", },
     2054    /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */  { CPUMCTX_OFF_AND_SIZE(rsi),                "rsi", },
     2055    /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */  { CPUMCTX_OFF_AND_SIZE(rdi),                "rdi", },
     2056    /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */  { CPUMCTX_OFF_AND_SIZE(r8),                 "r8", },
     2057    /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */  { CPUMCTX_OFF_AND_SIZE(r9),                 "r9", },
     2058    /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */  { CPUMCTX_OFF_AND_SIZE(r10),                "r10", },
     2059    /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */  { CPUMCTX_OFF_AND_SIZE(r11),                "r11", },
     2060    /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */  { CPUMCTX_OFF_AND_SIZE(r12),                "r12", },
     2061    /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */  { CPUMCTX_OFF_AND_SIZE(r13),                "r13", },
     2062    /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */  { CPUMCTX_OFF_AND_SIZE(r14),                "r14", },
     2063    /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */  { CPUMCTX_OFF_AND_SIZE(r15),                "r15", },
     2064    /* [kIemNativeGstReg_Pc] = */                       { CPUMCTX_OFF_AND_SIZE(rip),                "rip", },
     2065    /* [kIemNativeGstReg_Rflags] = */                   { CPUMCTX_OFF_AND_SIZE(rflags),             "rflags", },
     2066    /* [18] = */                                        { UINT32_C(0xfffffff7),                  0, NULL, },
     2067    /* [19] = */                                        { UINT32_C(0xfffffff5),                  0, NULL, },
     2068    /* [20] = */                                        { UINT32_C(0xfffffff3),                  0, NULL, },
     2069    /* [21] = */                                        { UINT32_C(0xfffffff1),                  0, NULL, },
     2070    /* [22] = */                                        { UINT32_C(0xffffffef),                  0, NULL, },
     2071    /* [23] = */                                        { UINT32_C(0xffffffed),                  0, NULL, },
     2072    /* [kIemNativeGstReg_SegSelFirst + 0] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel),      "es", },
     2073    /* [kIemNativeGstReg_SegSelFirst + 1] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel),      "cs", },
     2074    /* [kIemNativeGstReg_SegSelFirst + 2] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel),      "ss", },
     2075    /* [kIemNativeGstReg_SegSelFirst + 3] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel),      "ds", },
     2076    /* [kIemNativeGstReg_SegSelFirst + 4] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel),      "fs", },
     2077    /* [kIemNativeGstReg_SegSelFirst + 5] = */          { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel),      "gs", },
     2078    /* [kIemNativeGstReg_SegBaseFirst + 0] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base),  "es_base", },
     2079    /* [kIemNativeGstReg_SegBaseFirst + 1] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base),  "cs_base", },
     2080    /* [kIemNativeGstReg_SegBaseFirst + 2] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base),  "ss_base", },
     2081    /* [kIemNativeGstReg_SegBaseFirst + 3] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base),  "ds_base", },
     2082    /* [kIemNativeGstReg_SegBaseFirst + 4] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base),  "fs_base", },
     2083    /* [kIemNativeGstReg_SegBaseFirst + 5] = */         { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base),  "gs_base", },
     2084    /* [kIemNativeGstReg_SegLimitFirst + 0] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
     2085    /* [kIemNativeGstReg_SegLimitFirst + 1] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
     2086    /* [kIemNativeGstReg_SegLimitFirst + 2] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
     2087    /* [kIemNativeGstReg_SegLimitFirst + 3] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
     2088    /* [kIemNativeGstReg_SegLimitFirst + 4] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
     2089    /* [kIemNativeGstReg_SegLimitFirst + 5] = */        { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
     2090#undef CPUMCTX_OFF_AND_SIZE
     2091};
     2092AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
     2093
     2094
     2095/** Host CPU general purpose register names. */
     2096const char * const g_apszIemNativeHstRegNames[] =
     2097{
     2098#ifdef RT_ARCH_AMD64
     2099    "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
     2100#elif RT_ARCH_ARM64
     2101    "x0",  "x1",  "x2",  "x3",  "x4",  "x5",  "x6",  "x7",  "x8",  "x9",  "x10", "x11", "x12", "x13", "x14", "x15",
     2102    "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp",  "lr",  "sp/xzr",
     2103#else
     2104# error "port me"
     2105#endif
     2106};
     2107
     2108/**
     2109 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
     2110 * extending to 64-bit width.
     2111 *
     2112 * @returns New code buffer offset on success, UINT32_MAX on failure.
     2113 * @param   pReNative   .
     2114 * @param   off         The current code buffer position.
     2115 * @param   idxHstReg   The host register to load the guest register value into.
     2116 * @param   enmGstReg   The guest register to load.
     2117 *
     2118 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
     2119 *       that is something the caller needs to do if applicable.
     2120 */
     2121DECLHIDDEN(uint32_t) iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     2122                                                          uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
     2123{
     2124    Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
     2125    Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
     2126
     2127    switch (g_aGstShadowInfo[enmGstReg].cb)
     2128    {
     2129        case sizeof(uint64_t):
     2130            return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
     2131        case sizeof(uint32_t):
     2132            return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
     2133        case sizeof(uint16_t):
     2134            return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
     2135#if 0 /* not present in the table. */
     2136        case sizeof(uint8_t):
     2137            return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
     2138#endif
     2139        default:
     2140            AssertFailedReturn(UINT32_MAX);
     2141    }
     2142}
     2143
     2144
     2145#ifdef VBOX_STRICT
     2146/**
     2147 * Emitting code that checks that the content of register @a idxReg is the same
     2148 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
     2149 * instruction if that's not the case.
     2150 *
     2151 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
     2152 *       Trashes EFLAGS on AMD64.
     2153 */
     2154static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     2155                                                uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
     2156{
     2157# ifdef RT_ARCH_AMD64
     2158    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
     2159    AssertReturn(pbCodeBuf, UINT32_MAX);
     2160
     2161    /* cmp reg, [mem] */
     2162    if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
     2163    {
     2164        if (idxReg >= 8)
     2165            pbCodeBuf[off++] = X86_OP_REX_R;
     2166        pbCodeBuf[off++] = 0x38;
     2167    }
     2168    else
     2169    {
     2170        if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
     2171            pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
     2172        else
     2173        {
     2174            if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
     2175                pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     2176            else
     2177                AssertReturn(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t), UINT32_MAX);
     2178            if (idxReg >= 8)
     2179                pbCodeBuf[off++] = X86_OP_REX_R;
     2180        }
     2181        pbCodeBuf[off++] = 0x39;
     2182    }
     2183    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
     2184
     2185    /* je/jz +1 */
     2186    pbCodeBuf[off++] = 0x74;
     2187    pbCodeBuf[off++] = 0x01;
     2188
     2189    /* int3 */
     2190    pbCodeBuf[off++] = 0xcc;
     2191
     2192    /* For values smaller than the register size, we must check that the rest
     2193       of the register is all zeros. */
     2194    if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
     2195    {
     2196        /* test reg64, imm32 */
     2197        pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
     2198        pbCodeBuf[off++] = 0xf7;
     2199        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
     2200        pbCodeBuf[off++] = 0;
     2201        pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
     2202        pbCodeBuf[off++] = 0xff;
     2203        pbCodeBuf[off++] = 0xff;
     2204
     2205        /* je/jz +1 */
     2206        pbCodeBuf[off++] = 0x74;
     2207        pbCodeBuf[off++] = 0x01;
     2208
     2209        /* int3 */
     2210        pbCodeBuf[off++] = 0xcc;
     2211    }
     2212    else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
     2213    {
     2214        /* rol reg64, 32 */
     2215        pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
     2216        pbCodeBuf[off++] = 0xc1;
     2217        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
     2218        pbCodeBuf[off++] = 32;
     2219
     2220        /* test reg32, ffffffffh */
     2221        if (idxReg >= 8)
     2222            pbCodeBuf[off++] = X86_OP_REX_B;
     2223        pbCodeBuf[off++] = 0xf7;
     2224        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
     2225        pbCodeBuf[off++] = 0xff;
     2226        pbCodeBuf[off++] = 0xff;
     2227        pbCodeBuf[off++] = 0xff;
     2228        pbCodeBuf[off++] = 0xff;
     2229
     2230        /* je/jz +1 */
     2231        pbCodeBuf[off++] = 0x74;
     2232        pbCodeBuf[off++] = 0x01;
     2233
     2234        /* int3 */
     2235        pbCodeBuf[off++] = 0xcc;
     2236
     2237        /* rol reg64, 32 */
     2238        pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
     2239        pbCodeBuf[off++] = 0xc1;
     2240        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
     2241        pbCodeBuf[off++] = 32;
     2242    }
     2243
     2244# elif defined(RT_ARCH_ARM64)
     2245    /* mov TMP0, [gstreg] */
     2246    off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
     2247
     2248    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
     2249    AssertReturn(pu32CodeBuf, UINT32_MAX);
     2250    /* sub tmp0, tmp0, idxReg */
     2251    pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
     2252    /* cbz tmp0, +1 */
     2253    pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 1, IEMNATIVE_REG_FIXED_TMP0);
     2254    /* brk #0x1000+enmGstReg */
     2255    pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
     2256
     2257# else
     2258#  error "Port me!"
     2259# endif
     2260    return off;
     2261}
     2262#endif /* VBOX_STRICT */
     2263
     2264
     2265/**
     2266 * Marks host register @a idxHstReg as containing a shadow copy of guest
     2267 * register @a enmGstReg.
     2268 *
     2269 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
     2270 * host register before calling.
     2271 */
     2272DECL_FORCE_INLINE(void)
     2273iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
     2274{
     2275    Assert(!(pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg)));
     2276
     2277    pReNative->aidxGstRegShadows[enmGstReg]       = idxHstReg;
     2278    pReNative->aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg);
     2279    pReNative->bmGstRegShadows                   |= RT_BIT_64(enmGstReg);
     2280    pReNative->bmHstRegsWithGstShadow            |= RT_BIT_32(idxHstReg);
     2281}
     2282
     2283
     2284/**
     2285 * Clear any guest register shadow claims from @a idxHstReg.
     2286 *
     2287 * The register does not need to be shadowing any guest registers.
     2288 */
     2289DECL_FORCE_INLINE(void)
     2290iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg)
     2291{
     2292    Assert(   (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
     2293           == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
     2294    Assert(   RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
     2295           == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
     2296
     2297    pReNative->bmHstRegsWithGstShadow            &= ~RT_BIT_32(idxHstReg);
     2298    pReNative->bmGstRegShadows                   &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
     2299    pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
     2300}
     2301
     2302
     2303/**
     2304 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
     2305 * to @a idxRegTo.
     2306 */
     2307DECL_FORCE_INLINE(void)
     2308iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo, IEMNATIVEGSTREG enmGstReg)
     2309{
     2310    Assert(pReNative->aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
     2311    Assert(   (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxRegFrom].fGstRegShadows)
     2312           == pReNative->aHstRegs[idxRegFrom].fGstRegShadows);
     2313    Assert(   RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
     2314           == RT_BOOL(pReNative->aHstRegs[idxRegFrom].fGstRegShadows));
     2315
     2316    pReNative->aHstRegs[idxRegFrom].fGstRegShadows &= ~RT_BIT_64(enmGstReg);
     2317    pReNative->aHstRegs[idxRegTo].fGstRegShadows    = RT_BIT_64(enmGstReg);
     2318    pReNative->aidxGstRegShadows[enmGstReg]         = idxRegTo;
     2319}
     2320
     2321
     2322
     2323/**
     2324 * Intended use statement for iemNativeRegAllocTmpForGuestReg().
     2325 */
     2326typedef enum IEMNATIVEGSTREGUSE
     2327{
     2328    /** The usage is read-only, the register holding the guest register
     2329     * shadow copy will not be modified by the caller. */
     2330    kIemNativeGstRegUse_ReadOnly = 0,
     2331    /** The caller will update the guest register (think: PC += cbInstr).
     2332     * The guest shadow copy will follow the returned register. */
     2333    kIemNativeGstRegUse_ForUpdate,
     2334    /** The caller will use the guest register value as input in a calculation
     2335     * and the host register will be modified.
     2336     * This means that the returned host register will not be marked as a shadow
     2337     * copy of the guest register. */
     2338    kIemNativeGstRegUse_Calculation
     2339} IEMNATIVEGSTREGUSE;
     2340
     2341/**
     2342 * Allocates a temporary host general purpose register for updating a guest
     2343 * register value.
     2344 *
     2345 * Since we may already have a register holding the guest register value,
     2346 * code will be emitted to do the loading if that's not the case. Code may also
     2347 * be emitted if we have to free up a register to satify the request.
     2348 *
     2349 * @returns The host register number, UINT8_MAX on failure.
     2350 * @param   pReNative       The native recompile state.
     2351 * @param   poff            Pointer to the variable with the code buffer
     2352 *                          position. This will be update if we need to move a
     2353 *                          variable from register to stack in order to satisfy
     2354 *                          the request.
     2355 * @param   enmGstReg       The guest register that will is to be updated.
     2356 * @param   enmIntendedUse  How the caller will be using the host register.
     2357 */
     2358DECLHIDDEN(uint8_t) iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
     2359                                                    IEMNATIVEGSTREG enmGstReg, IEMNATIVEGSTREGUSE enmIntendedUse) RT_NOEXCEPT
     2360{
     2361    Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
     2362#ifdef LOG_ENABLED
     2363    static const char * const s_pszIntendedUse[] = { "fetch", "update", "destructive calc" };
     2364#endif
     2365
     2366    /*
     2367     * First check if the guest register value is already in a host register.
     2368     */
     2369    if (pReNative->bmGstRegShadows & RT_BIT_64(enmGstReg))
     2370    {
     2371        uint8_t idxReg = pReNative->aidxGstRegShadows[enmGstReg];
     2372        Assert(idxReg < RT_ELEMENTS(pReNative->aHstRegs));
     2373        Assert(pReNative->aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
     2374        Assert(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
     2375
     2376        if (!(pReNative->bmHstRegs & RT_BIT_32(idxReg)))
     2377        {
     2378            /*
     2379             * If the register will trash the guest shadow copy, try find a
     2380             * completely unused register we can use instead.  If that fails,
     2381             * we need to disassociate the host reg from the guest reg.
     2382             */
     2383            /** @todo would be nice to know if preserving the register is in any way helpful. */
     2384            if (   enmIntendedUse == kIemNativeGstRegUse_Calculation
     2385                && (  ~pReNative->bmHstRegs
     2386                    & ~pReNative->bmHstRegsWithGstShadow
     2387                    & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
     2388            {
     2389                uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff);
     2390                Assert(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs));
     2391
     2392                uint32_t off = *poff;
     2393                *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
     2394                AssertReturn(off != UINT32_MAX, UINT8_MAX);
     2395
     2396                Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
     2397                       g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
     2398                       g_apszIemNativeHstRegNames[idxRegNew]));
     2399                idxReg = idxRegNew;
     2400            }
     2401            else
     2402            {
     2403                pReNative->bmHstRegs |= RT_BIT_32(idxReg);
     2404                pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
     2405                pReNative->aHstRegs[idxReg].idxVar  = UINT8_MAX;
     2406                if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     2407                    Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
     2408                           g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
     2409                else
     2410                {
     2411                    iemNativeRegClearGstRegShadowing(pReNative, idxReg);
     2412                    Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
     2413                           g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
     2414                }
     2415            }
     2416        }
     2417        else
     2418        {
     2419            AssertMsg(enmIntendedUse != kIemNativeGstRegUse_ForUpdate,
     2420                      ("This shouldn't happen: idxReg=%d enmGstReg=%d\n", idxReg, enmGstReg));
     2421
     2422            /*
     2423             * Allocate a new register, copy the value and, if updating, the
     2424             * guest shadow copy assignment to the new register.
     2425             */
     2426            /** @todo share register for readonly access. */
     2427            uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
     2428            AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
     2429
     2430            uint32_t off = *poff;
     2431            *poff = off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxReg);
     2432            AssertReturn(off != UINT32_MAX, UINT8_MAX);
     2433
     2434            if (enmIntendedUse != kIemNativeGstRegUse_ForUpdate)
     2435                Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
     2436                       g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
     2437                       g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
     2438            else
     2439            {
     2440                iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg);
     2441                Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for update\n",
     2442                       g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
     2443                       g_apszIemNativeHstRegNames[idxRegNew]));
     2444            }
     2445            idxReg = idxRegNew;
     2446        }
     2447
     2448#ifdef VBOX_STRICT
     2449        /* Strict builds: Check that the value is correct. */
     2450        uint32_t off = *poff;
     2451        *poff = off = iemNativeEmitGuestRegValueCheck(pReNative, off, idxReg, enmGstReg);
     2452        AssertReturn(off != UINT32_MAX, UINT8_MAX);
     2453#endif
     2454
     2455        return idxReg;
     2456    }
     2457
     2458    /*
     2459     * Allocate a new register, load it with the guest value and designate it as a copy of the
     2460     */
     2461    uint8_t const idxRegNew = iemNativeRegAllocTmp(pReNative, poff, enmIntendedUse == kIemNativeGstRegUse_Calculation);
     2462    AssertReturn(idxRegNew < RT_ELEMENTS(pReNative->aHstRegs), UINT8_MAX);
     2463
     2464    uint32_t off = *poff;
     2465    *poff = off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxRegNew, enmGstReg);
     2466    AssertReturn(off != UINT32_MAX, UINT8_MAX);
     2467
     2468    if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
     2469        iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg);
     2470    Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
     2471           g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
     2472
     2473    return idxRegNew;
     2474}
     2475
     2476
     2477DECLHIDDEN(uint8_t)         iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar) RT_NOEXCEPT;
     2478
     2479
     2480/**
     2481 * Allocates argument registers for a function call.
     2482 *
     2483 * @returns New code buffer offset on success, UINT32_MAX on failure.
     2484 * @param   pReNative   The native recompile state.
     2485 * @param   off         The current code buffer offset.
     2486 * @param   cArgs       The number of arguments the function call takes.
     2487 */
     2488DECLHIDDEN(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs) RT_NOEXCEPT
     2489{
     2490    AssertReturn(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT, false);
     2491    Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
     2492    Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
     2493
     2494    if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
     2495        cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
     2496    else if (cArgs == 0)
     2497        return true;
     2498
     2499    /*
     2500     * Do we get luck and all register are free and not shadowing anything?
     2501     */
     2502    if (((pReNative->bmHstRegs | pReNative->bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
     2503        for (uint32_t i = 0; i < cArgs; i++)
     2504        {
     2505            uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
     2506            pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
     2507            pReNative->aHstRegs[idxReg].idxVar  = UINT8_MAX;
     2508            Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
     2509        }
     2510    /*
     2511     * Okay, not lucky so we have to free up the registers.
     2512     */
     2513    else
     2514        for (uint32_t i = 0; i < cArgs; i++)
     2515        {
     2516            uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
     2517            if (pReNative->bmHstRegs & RT_BIT_32(idxReg))
     2518            {
     2519                switch (pReNative->aHstRegs[idxReg].enmWhat)
     2520                {
     2521                    case kIemNativeWhat_Var:
     2522                    {
     2523                        uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
     2524                        AssertReturn(idxVar < RT_ELEMENTS(pReNative->aVars), false);
     2525                        Assert(pReNative->aVars[idxVar].idxReg == idxReg);
     2526                        Assert(pReNative->bmVars & RT_BIT_32(idxVar));
     2527
     2528                        if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
     2529                            pReNative->aVars[idxVar].idxReg = UINT8_MAX;
     2530                        else
     2531                        {
     2532                            off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
     2533                            AssertReturn(off != UINT32_MAX, false);
     2534                            Assert(!(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
     2535                        }
     2536                        break;
     2537                    }
     2538
     2539                    case kIemNativeWhat_Tmp:
     2540                    case kIemNativeWhat_Arg:
     2541                    case kIemNativeWhat_rc:
     2542                        AssertFailedReturn(false);
     2543                    default:
     2544                        AssertFailedReturn(false);
     2545                }
     2546
     2547            }
     2548            if (pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
     2549            {
     2550                Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
     2551                Assert(   (pReNative->aHstRegs[idxReg].fGstRegShadows & pReNative->bmGstRegShadows)
     2552                       == pReNative->aHstRegs[idxReg].fGstRegShadows);
     2553                pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
     2554                pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
     2555            }
     2556            else
     2557                Assert(pReNative->aHstRegs[idxReg].fGstRegShadows == 0);
     2558            pReNative->aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
     2559            pReNative->aHstRegs[idxReg].idxVar  = UINT8_MAX;
     2560        }
     2561    pReNative->bmHstRegs |= g_afIemNativeCallRegs[cArgs];
     2562    return true;
     2563}
     2564
     2565
     2566DECLHIDDEN(uint8_t)         iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
     2567
     2568
     2569#if 0
     2570/**
     2571 * Frees a register assignment of any type.
     2572 *
     2573 * @param   pReNative       The native recompile state.
     2574 * @param   idxHstReg       The register to free.
     2575 *
     2576 * @note    Does not update variables.
     2577 */
     2578DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
     2579{
     2580    Assert(idxHstReg < RT_ELEMENTS(pReNative->aHstRegs));
     2581    Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
     2582    Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
     2583    Assert(   pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
     2584           || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
     2585           || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
     2586           || pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
     2587    Assert(   pReNative->aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
     2588           || pReNative->aVars[pReNative->aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
     2589           || (pReNative->bmVars & RT_BIT_32(pReNative->aHstRegs[idxHstReg].idxVar)));
     2590    Assert(   (pReNative->bmGstRegShadows & pReNative->aHstRegs[idxHstReg].fGstRegShadows)
     2591           == pReNative->aHstRegs[idxHstReg].fGstRegShadows);
     2592    Assert(   RT_BOOL(pReNative->bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
     2593           == RT_BOOL(pReNative->aHstRegs[idxHstReg].fGstRegShadows));
     2594
     2595    pReNative->bmHstRegs              &= ~RT_BIT_32(idxHstReg);
     2596    /* no flushing, right:
     2597    pReNative->bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
     2598    pReNative->bmGstRegShadows        &= ~pReNative->aHstRegs[idxHstReg].fGstRegShadows;
     2599    pReNative->aHstRegs[idxHstReg].fGstRegShadows = 0;
     2600    */
     2601}
     2602#endif
     2603
     2604
     2605/**
     2606 * Frees a temporary register.
     2607 *
     2608 * Any shadow copies of guest registers assigned to the host register will not
     2609 * be flushed by this operation.
     2610 */
     2611DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
     2612{
     2613    Assert(pReNative->bmHstRegs & RT_BIT_32(idxHstReg));
     2614    Assert(pReNative->aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
     2615    pReNative->bmHstRegs &= ~RT_BIT_32(idxHstReg);
     2616    Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
     2617           g_apszIemNativeHstRegNames[idxHstReg], pReNative->aHstRegs[idxHstReg].fGstRegShadows));
     2618}
     2619
     2620
     2621/**
     2622 * Called right before emitting a call instruction to move anything important
     2623 * out of call-volatile registers, free and flush the call-volatile registers,
     2624 * optionally freeing argument variables.
     2625 *
     2626 * @returns New code buffer offset, UINT32_MAX on failure.
     2627 * @param   pReNative       The native recompile state.
     2628 * @param   off             The code buffer offset.
     2629 * @param   cArgs           The number of arguments the function call takes.
     2630 *                          It is presumed that the host register part of these have
     2631 *                          been allocated as such already and won't need moving,
     2632 *                          just freeing.
     2633 * @param   fFreeArgVars    Whether to free argument variables for the call.
     2634 */
     2635DECLHIDDEN(uint32_t) iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     2636                                                           uint8_t cArgs, bool fFreeArgVars) RT_NOEXCEPT
     2637{
     2638    /*
     2639     * Free argument variables first (simplified).
     2640     */
     2641    AssertReturn(cArgs <= RT_ELEMENTS(pReNative->aidxArgVars), UINT32_MAX);
     2642    if (fFreeArgVars && cArgs > 0)
     2643    {
     2644        for (uint32_t i = 0; i < cArgs; i++)
     2645        {
     2646            uint8_t idxVar = pReNative->aidxArgVars[i];
     2647            if (idxVar < RT_ELEMENTS(pReNative->aVars))
     2648            {
     2649                pReNative->aidxArgVars[i] = UINT8_MAX;
     2650                pReNative->bmVars        &= ~RT_BIT_32(idxVar);
     2651                Assert(   pReNative->aVars[idxVar].idxReg
     2652                       == (i < RT_ELEMENTS(g_aidxIemNativeCallRegs) ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
     2653            }
     2654        }
     2655        Assert(pReNative->u64ArgVars == UINT64_MAX);
     2656    }
     2657
     2658    /*
     2659     * Move anything important out of volatile registers.
     2660     */
     2661    if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
     2662        cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
     2663    uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
     2664#ifdef IEMNATIVE_REG_FIXED_TMP0
     2665                         & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
     2666#endif
     2667                         & ~g_afIemNativeCallRegs[cArgs];
     2668
     2669    fRegsToMove &= pReNative->bmHstRegs;
     2670    if (!fRegsToMove)
     2671    { /* likely */ }
     2672    else
     2673        while (fRegsToMove != 0)
     2674        {
     2675            unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
     2676            fRegsToMove &= ~RT_BIT_32(idxReg);
     2677
     2678            switch (pReNative->aHstRegs[idxReg].enmWhat)
     2679            {
     2680                case kIemNativeWhat_Var:
     2681                {
     2682                    uint8_t const idxVar = pReNative->aHstRegs[idxReg].idxVar;
     2683                    Assert(idxVar < RT_ELEMENTS(pReNative->aVars));
     2684                    Assert(pReNative->bmVars & RT_BIT_32(idxVar));
     2685                    Assert(pReNative->aVars[idxVar].idxReg == idxReg);
     2686                    if (pReNative->aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
     2687                        pReNative->aVars[idxVar].idxReg = UINT8_MAX;
     2688                    else
     2689                    {
     2690                        off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
     2691                        AssertReturn(off != UINT32_MAX, UINT32_MAX);
     2692                    }
     2693                    continue;
     2694                }
     2695
     2696                case kIemNativeWhat_Arg:
     2697                    AssertMsgFailed(("What?!?: %u\n", idxReg));
     2698                    continue;
     2699
     2700                case kIemNativeWhat_rc:
     2701                case kIemNativeWhat_Tmp:
     2702                    AssertMsgFailed(("Missing free: %u\n", idxReg));
     2703                    continue;
     2704
     2705                case kIemNativeWhat_FixedTmp:
     2706                case kIemNativeWhat_pVCpuFixed:
     2707                case kIemNativeWhat_pCtxFixed:
     2708                case kIemNativeWhat_FixedReserved:
     2709                case kIemNativeWhat_Invalid:
     2710                case kIemNativeWhat_End:
     2711                    AssertFailedReturn(UINT32_MAX);
     2712            }
     2713            AssertFailedReturn(UINT32_MAX);
     2714        }
     2715
     2716    /*
     2717     * Do the actual freeing.
     2718     */
     2719    pReNative->bmHstRegs &= ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
     2720
     2721    /* If there are guest register shadows in any call-volatile register, we
     2722       have to clear the corrsponding guest register masks for each register. */
     2723    uint32_t fHstRegsWithGstShadow = pReNative->bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
     2724    if (fHstRegsWithGstShadow)
     2725    {
     2726        pReNative->bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
     2727        do
     2728        {
     2729            unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
     2730            fHstRegsWithGstShadow = ~RT_BIT_32(idxReg);
     2731
     2732            Assert(pReNative->aHstRegs[idxReg].fGstRegShadows != 0);
     2733            pReNative->bmGstRegShadows &= ~pReNative->aHstRegs[idxReg].fGstRegShadows;
     2734            pReNative->aHstRegs[idxReg].fGstRegShadows = 0;
     2735        } while (fHstRegsWithGstShadow != 0);
     2736    }
     2737
     2738    return off;
     2739}
     2740
     2741
     2742/**
    13472743 * Emits a code for checking the return code of a call and rcPassUp, returning
    13482744 * from the code if either are non-zero.
     
    13632759    AssertReturn(pbCodeBuf, UINT32_MAX);
    13642760
    1365     /* edx = eax | rcPassUp*/
     2761    /* edx = eax | rcPassUp */
    13662762    pbCodeBuf[off++] = 0x0b;                    /* or edx, eax */
    13672763    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
     
    14932889    AssertReturn(off != UINT32_MAX, UINT32_MAX);
    14942890#endif
     2891/** @todo Must flush all shadow guest registers as well. */
     2892    off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4, false /*fFreeArgVars*/);
    14952893    uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
    14962894
     
    15002898#  ifndef VBOXSTRICTRC_STRICT_ENABLED
    15012899    off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
    1502     AssertReturn(off != UINT32_MAX, UINT32_MAX);
    15032900    if (cParams > 0)
    1504     {
    15052901        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
    1506         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1507     }
    15082902    if (cParams > 1)
    1509     {
    15102903        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
    1511         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1512     }
    15132904    if (cParams > 2)
    1514     {
    15152905        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
    1516         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1517     }
    15182906#  else  /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
    15192907    off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
    1520     AssertReturn(off != UINT32_MAX, UINT32_MAX);
    15212908    if (cParams > 0)
    1522     {
    15232909        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
    1524         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1525     }
    15262910    if (cParams > 1)
    1527     {
    15282911        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
    1529         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1530     }
    15312912    if (cParams > 2)
    1532     {
    15332913        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
    1534         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1535     }
    15362914    off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
    1537     AssertReturn(off != UINT32_MAX, UINT32_MAX);
    15382915    off = iemNativeEmitLeaGrpByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
    1539     AssertReturn(off != UINT32_MAX, UINT32_MAX);
    15402916#  endif /* VBOXSTRICTRC_STRICT_ENABLED */
    15412917# else
    15422918    off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
    1543     AssertReturn(off != UINT32_MAX, UINT32_MAX);
    15442919    if (cParams > 0)
    1545     {
    15462920        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
    1547         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1548     }
    15492921    if (cParams > 1)
    1550     {
    15512922        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
    1552         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1553     }
    15542923    if (cParams > 2)
    1555     {
    15562924        off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
    1557         AssertReturn(off != UINT32_MAX, UINT32_MAX);
    1558     }
    15592925# endif
    15602926    off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xAX, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
    1561     AssertReturn(off != UINT32_MAX, UINT32_MAX);
    15622927
    15632928    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
     
    17673132    /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ;  */
    17683133    AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
    1769     pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE);
     3134    pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
     3135                                                     IEMNATIVE_FRAME_SAVE_REG_SIZE);
    17703136
    17713137    /* retab / ret */
     
    18723238    AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
    18733239    /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
    1874     pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(false /*fSub*/, ARMV8_A64_REG_BP,
    1875                                                ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
     3240    pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
     3241                                                     ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
    18763242
    18773243    /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ;  Allocate the variable area from SP. */
    1878     pu32CodeBuf[off++] = Armv8A64MkInstrAddSub(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
     3244    pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
    18793245
    18803246    /* mov r28, r0  */
    18813247    off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
     3248    /* mov r27, r1  */
     3249    off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
    18823250
    18833251#else
     
    19303298#endif
    19313299}
    1932 
    1933 
    1934 /** Same as iemRegAddToEip32AndFinishingClearingRF. */
    1935 DECLINLINE(uint32_t) iemNativeEmitAddToEip32AndFinishingClearingRF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
    1936 {
    1937     /* Increment RIP. */
    1938     pVCpu->cpum.GstCtx.rip = (uint32_t)(pVCpu->cpum.GstCtx.eip + cbInstr);
    1939 
    1940     /* Consider flags. */
    1941     return iemNativeEmitFinishClearingRF(pReNative, off);
    1942 }
    1943 #endif
     3300#endif
     3301
     3302
     3303/** Same as iemRegAddToEip32AndFinishingNoFlags. */
     3304DECLINLINE(uint32_t) iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
     3305{
     3306    /* Allocate a temporary PC register. */
     3307    /** @todo this is not strictly required on AMD64, we could emit alternative
     3308     *        code here if we don't get a tmp register... */
     3309    uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
     3310    AssertReturn(idxPcReg != UINT8_MAX, UINT32_MAX);
     3311
     3312    /* Perform the addition and store the result. */
     3313    off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
     3314    off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
     3315
     3316    /* Free but don't flush the PC register. */
     3317    iemNativeRegFreeTmp(pReNative, idxPcReg);
     3318
     3319    return off;
     3320}
    19443321
    19453322/*
     
    19603337
    19613338
    1962 #define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fFlags)      {
    1963 
    1964 #define IEM_MC_END()                                    } AssertFailedReturn(UINT32_MAX /* shouldn't be reached! */)
     3339#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
     3340    {
     3341
     3342#define IEM_MC_END() \
     3343    } AssertFailedReturn(UINT32_MAX /* shouldn't be reached! */)
    19653344
    19663345#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr) \
    1967     return iemNativeEmitAddToIp16AndFinishingClearingRF(pReNative, off, a_cbInstr)
     3346    return iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, a_cbInstr)
    19683347
    19693348#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr) \
    1970     return iemNativeEmitAddToEip32AndFinishingClearingRF(pReNative, off, a_cbInstr)
     3349    return iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, a_cbInstr)
    19713350
    19723351#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr) \
    1973     return iemNativeEmitAddToRip64AndFinishingClearingRF(pReNative, off, a_cbInstr)
     3352    return iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, a_cbInstr)
    19743353
    19753354
     
    20393418        PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
    20403419        if (pfnRecom) /** @todo stats on this.   */
     3420        {
     3421            //STAM_COUNTER_INC()
    20413422            off = pfnRecom(pReNative, off, pCallEntry);
     3423        }
    20423424        else
    20433425            off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
     
    21103492
    21113493    iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
     3494#ifdef LOG_ENABLED
     3495    if (LogIs3Enabled())
     3496    {
     3497
     3498    }
     3499#endif
    21123500
    21133501    /*
  • trunk/src/VBox/VMM/VMMAll/IEMAllThrdPython.py

    r101387 r101484  
    5555
    5656g_kdTypeInfo = {
    57     # type name:    (cBits, fSigned, C-type       )
    58     'int8_t':       (    8,    True, 'int8_t',    ),
    59     'int16_t':      (   16,    True, 'int16_t',   ),
    60     'int32_t':      (   32,    True, 'int32_t',   ),
    61     'int64_t':      (   64,    True, 'int64_t',   ),
    62     'uint4_t':      (    4,   False, 'uint8_t',   ),
    63     'uint8_t':      (    8,   False, 'uint8_t',   ),
    64     'uint16_t':     (   16,   False, 'uint16_t', ),
    65     'uint32_t':     (   32,   False, 'uint32_t', ),
    66     'uint64_t':     (   64,   False, 'uint64_t', ),
    67     'uintptr_t':    (   64,   False, 'uintptr_t', ), # ASSUMES 64-bit host pointer size.
    68     'bool':         (    1,   False, 'bool',      ),
    69     'IEMMODE':      (    2,   False, 'IEMMODE',   ),
     57    # type name:        (cBits, fSigned, C-type      )
     58    'int8_t':           (    8,    True, 'int8_t',   ),
     59    'int16_t':          (   16,    True, 'int16_t',  ),
     60    'int32_t':          (   32,    True, 'int32_t',  ),
     61    'int64_t':          (   64,    True, 'int64_t',  ),
     62    'uint4_t':          (    4,   False, 'uint8_t',  ),
     63    'uint8_t':          (    8,   False, 'uint8_t',  ),
     64    'uint16_t':         (   16,   False, 'uint16_t', ),
     65    'uint32_t':         (   32,   False, 'uint32_t', ),
     66    'uint64_t':         (   64,   False, 'uint64_t', ),
     67    'uintptr_t':        (   64,   False, 'uintptr_t',), # ASSUMES 64-bit host pointer size.
     68    'bool':             (    1,   False, 'bool',     ),
     69    'IEMMODE':          (    2,   False, 'IEMMODE',  ),
    7070};
     71
     72# Only for getTypeBitCount/variables.
     73g_kdTypeInfo2 = {
     74    'RTFLOAT32U':       (   32,   False, 'RTFLOAT32U',      ),
     75    'RTFLOAT64U':       (   64,   False, 'RTFLOAT64U',      ),
     76    'RTUINT64U':        (   64,   False, 'RTUINT64U',       ),
     77    'RTGCPTR':          (   64,   False, 'RTGCPTR',         ),
     78    'RTPBCD80U':        (   80,   False, 'RTPBCD80U',       ),
     79    'RTFLOAT80U':       (   80,   False, 'RTFLOAT80U',      ),
     80    'IEMFPURESULT':     (80+16,   False, 'IEMFPURESULT',    ),
     81    'IEMFPURESULTTWO':  (80+16+80,False, 'IEMFPURESULTTWO', ),
     82    'RTUINT128U':       (  128,   False, 'RTUINT128U',      ),
     83    'X86XMMREG':        (  128,   False, 'X86XMMREG',       ),
     84    'IEMSSERESULT':     ( 128+32, False, 'IEMSSERESULT',    ),
     85    'IEMMEDIAF2XMMSRC': (  256,   False, 'IEMMEDIAF2XMMSRC',),
     86    'RTUINT256U':       (  256,   False, 'RTUINT256U',      ),
     87    'IEMPCMPISTRXSRC':  (  256,   False, 'IEMPCMPISTRXSRC', ),
     88    'IEMPCMPESTRXSRC':  (  384,   False, 'IEMPCMPESTRXSRC', ),
     89} | g_kdTypeInfo;
     90
     91def getTypeBitCount(sType):
     92    """
     93    Translate a type to size in bits
     94    """
     95    if sType in g_kdTypeInfo2:
     96        return g_kdTypeInfo2[sType][0];
     97    if '*' in sType or sType[0] == 'P':
     98        return 64;
     99    #raise Exception('Unknown type: %s' % (sType,));
     100    print('error: Unknown type: %s' % (sType,));
     101    return 64;
    71102
    72103g_kdIemFieldToType = {
     
    17521783                  % (cNative * 100.0 / cTotal, cNative, cTotal));
    17531784
     1785        # Gather arguments + variable statistics for the MC blocks.
     1786        cMaxArgs         = 0;
     1787        cMaxVars         = 0;
     1788        cMaxVarsAndArgs  = 0;
     1789        cbMaxArgs        = 0;
     1790        cbMaxVars        = 0;
     1791        cbMaxVarsAndArgs = 0;
     1792        for oThreadedFunction in self.aoThreadedFuncs:
     1793            if oThreadedFunction.oMcBlock.cLocals >= 0:
     1794                assert oThreadedFunction.oMcBlock.cArgs >= 0;
     1795                cMaxVars        = max(cMaxVars, oThreadedFunction.oMcBlock.cLocals);
     1796                cMaxArgs        = max(cMaxArgs, oThreadedFunction.oMcBlock.cArgs);
     1797                cMaxVarsAndArgs = max(cMaxVarsAndArgs, oThreadedFunction.oMcBlock.cLocals + oThreadedFunction.oMcBlock.cArgs);
     1798                # Calc stack allocation size:
     1799                cbArgs = 0;
     1800                for oArg in oThreadedFunction.oMcBlock.aoArgs:
     1801                    cbArgs += (getTypeBitCount(oArg.sType) + 63) // 64 * 8;
     1802                cbVars = 0;
     1803                for oVar in oThreadedFunction.oMcBlock.aoLocals:
     1804                     cbVars += (getTypeBitCount(oVar.sType) + 63) // 64 * 8;
     1805                cbMaxVars        = max(cbMaxVars, cbVars);
     1806                cbMaxArgs        = max(cbMaxArgs, cbArgs);
     1807                cbMaxVarsAndArgs = max(cbMaxVarsAndArgs, cbVars + cbArgs);
     1808                if cbMaxVarsAndArgs >= 0xc0:
     1809                    raise Exception('%s potentially uses too much stack: cbMaxVars=%#x cbMaxArgs=%#x'
     1810                                    % (oThreadedFunction.oMcBlock.oFunction.sName, cbMaxVars, cbMaxArgs,));
     1811
     1812        print('debug: max vars+args: %u bytes / %u; max vars: %u bytes / %u; max args: %u bytes / %u'
     1813              % (cbMaxVarsAndArgs, cMaxVarsAndArgs, cbMaxVars, cMaxVars, cbMaxArgs, cMaxArgs,));
     1814
    17541815        return True;
    17551816
  • trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp

    r101387 r101484  
    22692269        iemThreadedLogCurInstr(pVCpu, "EXn", 0);
    22702270# endif
     2271# ifdef RT_ARCH_AMD64
    22712272        VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
     2273# else
     2274        VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
     2275# endif
    22722276        if (RT_LIKELY(   rcStrict == VINF_SUCCESS
    22732277                      && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
  • trunk/src/VBox/VMM/include/IEMInternal.h

    r101448 r101484  
    547547
    548548/** @name IEM_MC_F_XXX - MC block flags/clues.
     549 * @todo Merge with IEM_CIMPL_F_XXX
    549550 * @{ */
    550551#define IEM_MC_F_ONLY_8086          RT_BIT_32(0)
     
    559560#define IEM_MC_F_64BIT              RT_BIT_32(6)
    560561#define IEM_MC_F_NOT_64BIT          RT_BIT_32(7)
     562/** @} */
     563
     564/** @name IEM_CIMPL_F_XXX - State change clues for CIMPL calls.
     565 *
     566 * These clues are mainly for the recompiler, so that it can emit correct code.
     567 *
     568 * They are processed by the python script and which also automatically
     569 * calculates flags for MC blocks based on the statements, extending the use of
     570 * these flags to describe MC block behavior to the recompiler core.  The python
     571 * script pass the flags to the IEM_MC2_END_EMIT_CALLS macro, but mainly for
     572 * error checking purposes.  The script emits the necessary fEndTb = true and
     573 * similar statements as this reduces compile time a tiny bit.
     574 *
     575 * @{ */
     576/** Flag set if direct branch, clear if absolute or indirect. */
     577#define IEM_CIMPL_F_BRANCH_DIRECT        RT_BIT_32(0)
     578/** Flag set if indirect branch, clear if direct or relative.
     579 * This is also used for all system control transfers (SYSCALL, SYSRET, INT, ++)
     580 * as well as for return instructions (RET, IRET, RETF). */
     581#define IEM_CIMPL_F_BRANCH_INDIRECT      RT_BIT_32(1)
     582/** Flag set if relative branch, clear if absolute or indirect. */
     583#define IEM_CIMPL_F_BRANCH_RELATIVE      RT_BIT_32(2)
     584/** Flag set if conditional branch, clear if unconditional. */
     585#define IEM_CIMPL_F_BRANCH_CONDITIONAL   RT_BIT_32(3)
     586/** Flag set if it's a far branch (changes CS). */
     587#define IEM_CIMPL_F_BRANCH_FAR           RT_BIT_32(4)
     588/** Convenience: Testing any kind of branch. */
     589#define IEM_CIMPL_F_BRANCH_ANY          (IEM_CIMPL_F_BRANCH_DIRECT | IEM_CIMPL_F_BRANCH_INDIRECT | IEM_CIMPL_F_BRANCH_RELATIVE)
     590
     591/** Execution flags may change (IEMCPU::fExec). */
     592#define IEM_CIMPL_F_MODE                RT_BIT_32(5)
     593/** May change significant portions of RFLAGS. */
     594#define IEM_CIMPL_F_RFLAGS              RT_BIT_32(6)
     595/** May change the status bits (X86_EFL_STATUS_BITS) in RFLAGS. */
     596#define IEM_CIMPL_F_STATUS_FLAGS        RT_BIT_32(7)
     597/** May trigger interrupt shadowing. */
     598#define IEM_CIMPL_F_INHIBIT_SHADOW      RT_BIT_32(8)
     599/** May enable interrupts, so recheck IRQ immediately afterwards executing
     600 *  the instruction. */
     601#define IEM_CIMPL_F_CHECK_IRQ_AFTER     RT_BIT_32(9)
     602/** May disable interrupts, so recheck IRQ immediately before executing the
     603 *  instruction. */
     604#define IEM_CIMPL_F_CHECK_IRQ_BEFORE    RT_BIT_32(10)
     605/** Convenience: Check for IRQ both before and after an instruction. */
     606#define IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER (IEM_CIMPL_F_CHECK_IRQ_BEFORE | IEM_CIMPL_F_CHECK_IRQ_AFTER)
     607/** May trigger a VM exit (treated like IEM_CIMPL_F_MODE atm). */
     608#define IEM_CIMPL_F_VMEXIT              RT_BIT_32(11)
     609/** May modify FPU state.
     610 * @todo Not sure if this is useful yet.  */
     611#define IEM_CIMPL_F_FPU                 RT_BIT_32(12)
     612/** REP prefixed instruction which may yield before updating PC.
     613 * @todo Not sure if this is useful, REP functions now return non-zero
     614 *       status if they don't update the PC. */
     615#define IEM_CIMPL_F_REP                 RT_BIT_32(13)
     616/** I/O instruction.
     617 * @todo Not sure if this is useful yet.  */
     618#define IEM_CIMPL_F_IO                  RT_BIT_32(14)
     619/** Force end of TB after the instruction. */
     620#define IEM_CIMPL_F_END_TB              RT_BIT_32(15)
     621/** Convenience: Raise exception (technically unnecessary, since it shouldn't return VINF_SUCCESS). */
     622#define IEM_CIMPL_F_XCPT \
     623    (IEM_CIMPL_F_BRANCH_INDIRECT | IEM_CIMPL_F_BRANCH_FAR | IEM_CIMPL_F_MODE | IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_VMEXIT)
    561624/** @} */
    562625
     
    779842typedef IEMTHRDEDCALLENTRY const *PCIEMTHRDEDCALLENTRY;
    780843
    781 /** Native IEM TB 'function' typedef.
    782  * This will throw/longjmp on occation.  */
     844/**
     845 * Native IEM TB 'function' typedef.
     846 *
     847 * This will throw/longjmp on occation.
     848 *
     849 * @note    AMD64 doesn't have that many non-volatile registers and does sport
     850 *          32-bit address displacments, so we don't need pCtx.
     851 *
     852 *          On ARM64 pCtx allows us to directly address the whole register
     853 *          context without requiring a separate indexing register holding the
     854 *          offset. This saves an instruction loading the offset for each guest
     855 *          CPU context access, at the cost of a non-volatile register.
     856 *          Fortunately, ARM64 has quite a lot more registers.
     857 */
     858typedef
     859#ifdef RT_ARCH_AMD64
     860int FNIEMTBNATIVE(PVMCPUCC pVCpu)
     861#else
     862int FNIEMTBNATIVE(PVMCPUCC pVCpu, PCPUMCTX pCtx)
     863#endif
    783864#if RT_CPLUSPLUS_PREREQ(201700)
    784 typedef int FNIEMTBNATIVE(PVMCPUCC pVCpu) IEM_NOEXCEPT_MAY_LONGJMP;
    785 #else
    786 typedef int FNIEMTBNATIVE(PVMCPUCC pVCpu);
    787 #endif
     865    IEM_NOEXCEPT_MAY_LONGJMP
     866#endif
     867    ;
    788868/** Pointer to a native IEM TB entry point function.
    789869 * This will throw/longjmp on occation.  */
  • trunk/src/VBox/VMM/include/IEMMc.h

    r101387 r101484  
    196196#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg)       a_Type const a_Name = (a_Value)
    197197#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg)   a_Type const a_Name = &(a_Local)
     198/** @note IEMAllInstPython.py duplicates the expansion. */
    198199#define IEM_MC_ARG_LOCAL_EFLAGS(a_pName, a_Name, a_iArg) \
    199200    uint32_t a_Name; \
     
    19721973#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3)  (a_rc) = (a_pfn)((a0), (a1), (a2), (a3))
    19731974
    1974 /** @name IEM_CIMPL_F_XXX - State change clues for CIMPL calls.
    1975  *
    1976  * These clues are mainly for the recompiler, so that it can emit correct code.
    1977  *
    1978  * They are processed by the python script and which also automatically
    1979  * calculates flags for MC blocks based on the statements, extending the use of
    1980  * these flags to describe MC block behavior to the recompiler core.  The python
    1981  * script pass the flags to the IEM_MC2_END_EMIT_CALLS macro, but mainly for
    1982  * error checking purposes.  The script emits the necessary fEndTb = true and
    1983  * similar statements as this reduces compile time a tiny bit.
    1984  *
    1985  * @{ */
    1986 /** Flag set if direct branch, clear if absolute or indirect. */
    1987 #define IEM_CIMPL_F_BRANCH_DIRECT        RT_BIT_32(0)
    1988 /** Flag set if indirect branch, clear if direct or relative.
    1989  * This is also used for all system control transfers (SYSCALL, SYSRET, INT, ++)
    1990  * as well as for return instructions (RET, IRET, RETF). */
    1991 #define IEM_CIMPL_F_BRANCH_INDIRECT      RT_BIT_32(1)
    1992 /** Flag set if relative branch, clear if absolute or indirect. */
    1993 #define IEM_CIMPL_F_BRANCH_RELATIVE      RT_BIT_32(2)
    1994 /** Flag set if conditional branch, clear if unconditional. */
    1995 #define IEM_CIMPL_F_BRANCH_CONDITIONAL   RT_BIT_32(3)
    1996 /** Flag set if it's a far branch (changes CS). */
    1997 #define IEM_CIMPL_F_BRANCH_FAR           RT_BIT_32(4)
    1998 /** Convenience: Testing any kind of branch. */
    1999 #define IEM_CIMPL_F_BRANCH_ANY          (IEM_CIMPL_F_BRANCH_DIRECT | IEM_CIMPL_F_BRANCH_INDIRECT | IEM_CIMPL_F_BRANCH_RELATIVE)
    2000 
    2001 /** Execution flags may change (IEMCPU::fExec). */
    2002 #define IEM_CIMPL_F_MODE                RT_BIT_32(5)
    2003 /** May change significant portions of RFLAGS. */
    2004 #define IEM_CIMPL_F_RFLAGS              RT_BIT_32(6)
    2005 /** May change the status bits (X86_EFL_STATUS_BITS) in RFLAGS. */
    2006 #define IEM_CIMPL_F_STATUS_FLAGS        RT_BIT_32(7)
    2007 /** May trigger interrupt shadowing. */
    2008 #define IEM_CIMPL_F_INHIBIT_SHADOW      RT_BIT_32(8)
    2009 /** May enable interrupts, so recheck IRQ immediately afterwards executing
    2010  *  the instruction. */
    2011 #define IEM_CIMPL_F_CHECK_IRQ_AFTER     RT_BIT_32(9)
    2012 /** May disable interrupts, so recheck IRQ immediately before executing the
    2013  *  instruction. */
    2014 #define IEM_CIMPL_F_CHECK_IRQ_BEFORE    RT_BIT_32(10)
    2015 /** Convenience: Check for IRQ both before and after an instruction. */
    2016 #define IEM_CIMPL_F_CHECK_IRQ_BEFORE_AND_AFTER (IEM_CIMPL_F_CHECK_IRQ_BEFORE | IEM_CIMPL_F_CHECK_IRQ_AFTER)
    2017 /** May trigger a VM exit (treated like IEM_CIMPL_F_MODE atm). */
    2018 #define IEM_CIMPL_F_VMEXIT              RT_BIT_32(11)
    2019 /** May modify FPU state.
    2020  * @todo Not sure if this is useful yet.  */
    2021 #define IEM_CIMPL_F_FPU                 RT_BIT_32(12)
    2022 /** REP prefixed instruction which may yield before updating PC.
    2023  * @todo Not sure if this is useful, REP functions now return non-zero
    2024  *       status if they don't update the PC. */
    2025 #define IEM_CIMPL_F_REP                 RT_BIT_32(13)
    2026 /** I/O instruction.
    2027  * @todo Not sure if this is useful yet.  */
    2028 #define IEM_CIMPL_F_IO                  RT_BIT_32(14)
    2029 /** Force end of TB after the instruction. */
    2030 #define IEM_CIMPL_F_END_TB              RT_BIT_32(15)
    2031 /** Convenience: Raise exception (technically unnecessary, since it shouldn't return VINF_SUCCESS). */
    2032 #define IEM_CIMPL_F_XCPT \
    2033     (IEM_CIMPL_F_BRANCH_INDIRECT | IEM_CIMPL_F_BRANCH_FAR | IEM_CIMPL_F_MODE | IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_VMEXIT)
    2034 /** @} */
    20351975
    20361976/** @def IEM_MC_CALL_CIMPL_HLP_RET
     
    24432383
    24442384/** Declares implicit arguments for IEM_MC_CALL_AVX_AIMPL_2,
    2445  *  IEM_MC_CALL_AVX_AIMPL_3, IEM_MC_CALL_AVX_AIMPL_4, ... */
     2385 *  IEM_MC_CALL_AVX_AIMPL_3, IEM_MC_CALL_AVX_AIMPL_4, ...
     2386 * @note IEMAllInstPython.py duplicates the expansion.  */
    24462387#define IEM_MC_IMPLICIT_AVX_AIMPL_ARGS() \
    24472388    IEM_MC_ARG_CONST(PX86XSAVEAREA, pXState, &pVCpu->cpum.GstCtx.XState, 0)
  • trunk/src/VBox/VMM/include/IEMN8veRecompiler.h

    r101387 r101484  
    4141 *
    4242 * @{  */
    43 /** The size of the area for stack variables and spills and stuff. */
    44 #define IEMNATIVE_FRAME_VAR_SIZE            0x40
     43/** The size of the area for stack variables and spills and stuff.
     44 * @note This limit is duplicated in the python script(s). */
     45#define IEMNATIVE_FRAME_VAR_SIZE            0xc0
    4546#ifdef RT_ARCH_AMD64
    4647/** Number of stack arguments slots for calls made from the frame. */
     
    113114 * @{ */
    114115/** @def IEMNATIVE_REG_FIXED_PVMCPU
    115  * The register number hold in pVCpu pointer.  */
     116 * The number of the register holding the pVCpu pointer.  */
     117/** @def IEMNATIVE_REG_FIXED_PCPUMCTX
     118 * The number of the register holding the &pVCpu->cpum.GstCtx pointer.
     119 * @note This not available on AMD64, only ARM64. */
    116120/** @def IEMNATIVE_REG_FIXED_TMP0
    117121 * Dedicated temporary register.
    118122 * @todo replace this by a register allocator and content tracker.  */
     123/** @def IEMNATIVE_REG_FIXED_MASK
     124 * Mask GPRs with fixes assignments, either by us or dictated by the CPU/OS
     125 * architecture. */
    119126#ifdef RT_ARCH_AMD64
    120127# define IEMNATIVE_REG_FIXED_PVMCPU         X86_GREG_xBX
    121128# define IEMNATIVE_REG_FIXED_TMP0           X86_GREG_x11
    122 
    123 #elif defined(RT_ARCH_ARM64)
     129# define IEMNATIVE_REG_FIXED_MASK          (  RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU) \
     130                                            | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0) \
     131                                            | RT_BIT_32(X86_GREG_xSP) \
     132                                            | RT_BIT_32(X86_GREG_xBP) )
     133
     134#elif defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
    124135# define IEMNATIVE_REG_FIXED_PVMCPU         ARMV8_A64_REG_X28
     136# define IEMNATIVE_REG_FIXED_PCPUMCTX       ARMV8_A64_REG_X27
    125137# define IEMNATIVE_REG_FIXED_TMP0           ARMV8_A64_REG_X15
     138# define IEMNATIVE_REG_FIXED_MASK           (  RT_BIT_32(ARMV8_A64_REG_SP) \
     139                                             | RT_BIT_32(ARMV8_A64_REG_LR) \
     140                                             | RT_BIT_32(ARMV8_A64_REG_BP) \
     141                                             | RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU) \
     142                                             | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX) \
     143                                             | RT_BIT_32(ARMV8_A64_REG_X18) \
     144                                             | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0) )
    126145
    127146#else
     
    144163/** @def IEMNATIVE_CALL_ARG3_GREG
    145164 * The general purpose register carrying argument \#3. */
     165/** @def IEMNATIVE_CALL_VOLATILE_GREG_MASK
     166 * Mask of registers the callee will not save and may trash. */
    146167#ifdef RT_ARCH_AMD64
    147168# define IEMNATIVE_CALL_RET_GREG             X86_GREG_xAX
     
    153174#  define IEMNATIVE_CALL_ARG2_GREG          X86_GREG_x8
    154175#  define IEMNATIVE_CALL_ARG3_GREG          X86_GREG_x9
     176#  define IEMNATIVE_CALL_VOLATILE_GREG_MASK (  RT_BIT_32(X86_GREG_xAX) \
     177                                             | RT_BIT_32(X86_GREG_xCX) \
     178                                             | RT_BIT_32(X86_GREG_xDX) \
     179                                             | RT_BIT_32(X86_GREG_x8) \
     180                                             | RT_BIT_32(X86_GREG_x9) \
     181                                             | RT_BIT_32(X86_GREG_x10) \
     182                                             | RT_BIT_32(X86_GREG_x11) )
    155183# else
    156184#  define IEMNATIVE_CALL_ARG_GREG_COUNT     6
     
    161189#  define IEMNATIVE_CALL_ARG4_GREG          X86_GREG_x8
    162190#  define IEMNATIVE_CALL_ARG5_GREG          X86_GREG_x9
     191#  define IEMNATIVE_CALL_VOLATILE_GREG_MASK (  RT_BIT_32(X86_GREG_xAX) \
     192                                             | RT_BIT_32(X86_GREG_xCX) \
     193                                             | RT_BIT_32(X86_GREG_xDX) \
     194                                             | RT_BIT_32(X86_GREG_xDI) \
     195                                             | RT_BIT_32(X86_GREG_xSI) \
     196                                             | RT_BIT_32(X86_GREG_x8) \
     197                                             | RT_BIT_32(X86_GREG_x9) \
     198                                             | RT_BIT_32(X86_GREG_x10) \
     199                                             | RT_BIT_32(X86_GREG_x11) )
    163200# endif
    164201
     
    174211# define IEMNATIVE_CALL_ARG6_GREG           ARMV8_A64_REG_X6
    175212# define IEMNATIVE_CALL_ARG7_GREG           ARMV8_A64_REG_X7
     213# define IEMNATIVE_CALL_VOLATILE_GREG_MASK  (  RT_BIT_32(ARMV8_A64_REG_X0) \
     214                                             | RT_BIT_32(ARMV8_A64_REG_X1) \
     215                                             | RT_BIT_32(ARMV8_A64_REG_X2) \
     216                                             | RT_BIT_32(ARMV8_A64_REG_X3) \
     217                                             | RT_BIT_32(ARMV8_A64_REG_X4) \
     218                                             | RT_BIT_32(ARMV8_A64_REG_X5) \
     219                                             | RT_BIT_32(ARMV8_A64_REG_X6) \
     220                                             | RT_BIT_32(ARMV8_A64_REG_X7) \
     221                                             | RT_BIT_32(ARMV8_A64_REG_X8) \
     222                                             | RT_BIT_32(ARMV8_A64_REG_X9) \
     223                                             | RT_BIT_32(ARMV8_A64_REG_X10) \
     224                                             | RT_BIT_32(ARMV8_A64_REG_X11) \
     225                                             | RT_BIT_32(ARMV8_A64_REG_X12) \
     226                                             | RT_BIT_32(ARMV8_A64_REG_X13) \
     227                                             | RT_BIT_32(ARMV8_A64_REG_X14) \
     228                                             | RT_BIT_32(ARMV8_A64_REG_X15) \
     229                                             | RT_BIT_32(ARMV8_A64_REG_X16) \
     230                                             | RT_BIT_32(ARMV8_A64_REG_X17) )
    176231
    177232#endif
    178233
    179234/** @} */
     235
     236
     237/** @def IEMNATIVE_HST_GREG_COUNT
     238 * Number of host general purpose registers we tracker. */
     239/** @def IEMNATIVE_HST_GREG_MASK
     240 * Mask corresponding to IEMNATIVE_HST_GREG_COUNT that can be applied to
     241 * inverted register masks and such to get down to a correct set of regs. */
     242#ifdef RT_ARCH_AMD64
     243# define IEMNATIVE_HST_GREG_COUNT           16
     244# define IEMNATIVE_HST_GREG_MASK            UINT32_C(0xffff)
     245
     246#elif defined(RT_ARCH_ARM64)
     247# define IEMNATIVE_HST_GREG_COUNT           32
     248# define IEMNATIVE_HST_GREG_MASK            UINT32_MAX
     249#else
     250# error "Port me!"
     251#endif
     252
    180253
    181254/** Native code generator label types. */
     
    232305typedef IEMNATIVEFIXUP *PIEMNATIVEFIXUP;
    233306
     307
     308/**
     309 * Guest registers that can be shadowed in GPRs.
     310 */
     311typedef enum IEMNATIVEGSTREG : uint8_t
     312{
     313    kIemNativeGstReg_GprFirst      = 0,
     314    kIemNativeGstReg_GprLast       = 15,
     315    kIemNativeGstReg_Pc,
     316    kIemNativeGstReg_Rflags,
     317    /* gap: 18..23 */
     318    kIemNativeGstReg_SegSelFirst   = 24,
     319    kIemNativeGstReg_SegSelLast    = 29,
     320    kIemNativeGstReg_SegBaseFirst  = 30,
     321    kIemNativeGstReg_SegBaseLast   = 35,
     322    kIemNativeGstReg_SegLimitFirst = 36,
     323    kIemNativeGstReg_SegLimitLast  = 41,
     324    kIemNativeGstReg_End
     325} IEMNATIVEGSTREG;
     326
     327/**
     328 * Guest registers (classes) that can be referenced.
     329 */
     330typedef enum IEMNATIVEGSTREGREF : uint8_t
     331{
     332    kIemNativeGstRegRef_Invalid = 0,
     333    kIemNativeGstRegRef_Gpr,
     334    kIemNativeGstRegRef_GprHighByte,    /**< AH, CH, DH, BH*/
     335    kIemNativeGstRegRef_EFlags,
     336    kIemNativeGstRegRef_MxCsr,
     337    kIemNativeGstRegRef_FpuReg,
     338    kIemNativeGstRegRef_MReg,
     339    kIemNativeGstRegRef_XReg,
     340    kIemNativeGstRegRef_YReg,
     341    kIemNativeGstRegRef_End
     342} IEMNATIVEGSTREGREF;
     343
     344
     345/** Variable kinds. */
     346typedef enum IEMNATIVEVARKIND : uint8_t
     347{
     348    /** Customary invalid zero value. */
     349    kIemNativeVarKind_Invalid = 0,
     350    /** This is either in a register or on the stack. */
     351    kIemNativeVarKind_Stack,
     352    /** Immediate value - loaded into register when needed, or can live on the
     353     *  stack if referenced (in theory). */
     354    kIemNativeVarKind_Immediate,
     355    /** Variable reference - loaded into register when needed, never stack. */
     356    kIemNativeVarKind_VarRef,
     357    /** Guest register reference - loaded into register when needed, never stack. */
     358    kIemNativeVarKind_GstRegRef,
     359    /** End of valid values. */
     360    kIemNativeVarKind_End
     361} IEMNATIVEVARKIND;
     362
     363
     364/** Variable or argument. */
     365typedef struct IEMNATIVEVAR
     366{
     367    /** The kind of variable. */
     368    IEMNATIVEVARKIND    enmKind;
     369    /** The variable size in bytes. */
     370    uint8_t             cbVar;
     371    /** The first stack slot (uint64_t), except for immediate and references
     372     *  where it usually is UINT8_MAX. */
     373    uint8_t             idxStackSlot;
     374    /** The host register allocated for the variable, UINT8_MAX if not. */
     375    uint8_t             idxReg;
     376    /** The argument number if argument, UINT8_MAX if regular variable. */
     377    uint8_t             uArgNo;
     378    /** If referenced, the index of the variable referencing this one, otherwise
     379     *  UINT8_MAX.  A referenced variable must only be placed on the stack and
     380     *  must be either kIemNativeVarKind_Stack or kIemNativeVarKind_Immediate. */
     381    uint8_t             idxReferrerVar;
     382    /** Guest register being shadowed here, kIemNativeGstReg_End(/UINT8_MAX) if not. */
     383    IEMNATIVEGSTREG     enmGstReg;
     384    uint8_t             bAlign;
     385
     386    union
     387    {
     388        /** kIemNativeVarKind_Immediate: The immediate value. */
     389        uint64_t            uValue;
     390        /** kIemNativeVarKind_VarRef: The index of the variable being referenced. */
     391        uint8_t             idxRefVar;
     392        /** kIemNativeVarKind_GstRegRef: The guest register being referrenced. */
     393        struct
     394        {
     395            /** The class of register. */
     396            IEMNATIVEGSTREGREF  enmClass;
     397            /** Index within the class. */
     398            uint8_t             idx;
     399        } GstRegRef;
     400    } u;
     401} IEMNATIVEVAR;
     402
     403/** What is being kept in a host register. */
     404typedef enum IEMNATIVEWHAT : uint8_t
     405{
     406    /** The traditional invalid zero value. */
     407    kIemNativeWhat_Invalid = 0,
     408    /** Mapping a variable (IEMNATIVEHSTREG::idxVar). */
     409    kIemNativeWhat_Var,
     410    /** Temporary register, this is typically freed when a MC completes. */
     411    kIemNativeWhat_Tmp,
     412    /** Call argument w/o a variable mapping.  This is free (via
     413     * IEMNATIVE_CALL_VOLATILE_GREG_MASK) after the call is emitted. */
     414    kIemNativeWhat_Arg,
     415    /** Return status code.
     416     * @todo not sure if we need this... */
     417    kIemNativeWhat_rc,
     418    /** The fixed pVCpu (PVMCPUCC) register.
     419     * @todo consider offsetting this on amd64 to use negative offsets to access
     420     *       more members using 8-byte disp. */
     421    kIemNativeWhat_pVCpuFixed,
     422    /** The fixed pCtx (PCPUMCTX) register.
     423     * @todo consider offsetting this on amd64 to use negative offsets to access
     424     *       more members using 8-byte disp. */
     425    kIemNativeWhat_pCtxFixed,
     426    /** Fixed temporary register. */
     427    kIemNativeWhat_FixedTmp,
     428    /** Register reserved by the CPU or OS architecture. */
     429    kIemNativeWhat_FixedReserved,
     430    /** End of valid values. */
     431    kIemNativeWhat_End
     432} IEMNATIVEWHAT;
     433
     434/**
     435 * Host general register entry.
     436 *
     437 * The actual allocation status is kept in IEMRECOMPILERSTATE::bmHstRegs.
     438 *
     439 * @todo Track immediate values in host registers similarlly to how we track the
     440 *       guest register shadow copies. For it to be real helpful, though,
     441 *       we probably need to know which will be reused and put them into
     442 *       non-volatile registers, otherwise it's going to be more or less
     443 *       restricted to an instruction or two.
     444 */
     445typedef struct IEMNATIVEHSTREG
     446{
     447    /** Set of guest registers this one shadows.
     448     *
     449     * Using a bitmap here so we can designate the same host register as a copy
     450     * for more than one guest register.  This is expected to be useful in
     451     * situations where one value is copied to several registers in a sequence.
     452     * If the mapping is 1:1, then we'd have to pick which side of a 'MOV SRC,DST'
     453     * sequence we'd want to let this register follow to be a copy of and there
     454     * will always be places where we'd be picking the wrong one.
     455     */
     456    uint64_t        fGstRegShadows;
     457    /** What is being kept in this register. */
     458    IEMNATIVEWHAT   enmWhat;
     459    /** Variable index if holding a variable, otherwise UINT8_MAX. */
     460    uint8_t         idxVar;
     461    /** Alignment padding. */
     462    uint8_t         abAlign[6];
     463} IEMNATIVEHSTREG;
     464
     465
    234466/**
    235467 * Native recompiler state.
     
    260492    /** The translation block being recompiled. */
    261493    PCIEMTB                     pTbOrg;
     494
     495    /** Allocation bitmap fro aHstRegs. */
     496    uint32_t                    bmHstRegs;
     497
     498    /** Bitmap marking which host register contains guest register shadow copies.
     499     * This is used during register allocation to try preserve copies.  */
     500    uint32_t                    bmHstRegsWithGstShadow;
     501    /** Bitmap marking valid entries in aidxGstRegShadows. */
     502    uint64_t                    bmGstRegShadows;
     503
     504    /** Allocation bitmap for aVars. */
     505    uint32_t                    bmVars;
     506    uint32_t                    u32Align;
     507    union
     508    {
     509        /** Index of variable arguments, UINT8_MAX if not valid. */
     510        uint8_t                 aidxArgVars[8];
     511        /** For more efficient resetting. */
     512        uint64_t                u64ArgVars;
     513    };
     514
     515    /** Host register allocation tracking. */
     516    IEMNATIVEHSTREG             aHstRegs[IEMNATIVE_HST_GREG_COUNT];
     517    /** Maps a guest register to a host GPR (index by IEMNATIVEGSTREG).
     518     * Entries are only valid if the corresponding bit in bmGstRegShadows is set.
     519     * (A shadow copy of a guest register can only be held in a one host register,
     520     * there are no duplicate copies or ambiguities like that). */
     521    uint8_t                     aidxGstRegShadows[kIemNativeGstReg_End];
     522    /** Variables and arguments. */
     523    IEMNATIVEVAR                aVars[16];
    262524} IEMRECOMPILERSTATE;
    263525/** Pointer to a native recompiler state. */
     
    293555DECLHIDDEN(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off,
    294556                                                        uint32_t cInstrReq) RT_NOEXCEPT;
     557
     558DECLHIDDEN(uint8_t)         iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
     559                                                 bool fPreferVolatile = true) RT_NOEXCEPT;
     560DECLHIDDEN(uint8_t)         iemNativeRegAllocTmpForGuest(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
     561                                                         IEMNATIVEGSTREG enmGstReg) RT_NOEXCEPT;
     562DECLHIDDEN(uint8_t)         iemNativeRegAllocVar(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t idxVar) RT_NOEXCEPT;
     563DECLHIDDEN(uint32_t)        iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs) RT_NOEXCEPT;
     564DECLHIDDEN(uint8_t)         iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
     565DECLHIDDEN(void)            iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
     566DECLHIDDEN(void)            iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT;
     567DECLHIDDEN(void)            iemNativeRegFreeAndFlushMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegMask) RT_NOEXCEPT;
    295568
    296569DECLHIDDEN(uint32_t)        iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
     
    468741
    469742
    470 /**
    471  * Emits a 32-bit GPR load of a VCpu value.
    472  */
    473 DECLINLINE(uint32_t) iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
    474 {
    475 #ifdef RT_ARCH_AMD64
    476     uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    477     AssertReturn(pbCodeBuf, UINT32_MAX);
    478 
    479     /* mov reg32, mem32 */
    480     if (iGpr >= 8)
    481         pbCodeBuf[off++] = X86_OP_REX_R;
    482     pbCodeBuf[off++] = 0x8b;
     743#ifdef RT_ARCH_AMD64
     744/**
     745 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
     746 */
     747DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByVCpuDisp(uint8_t *pbCodeBuf, uint32_t off, uint8_t iGprReg, uint32_t offVCpu)
     748{
    483749    if (offVCpu < 128)
    484750    {
    485         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU);
    486         pbCodeBuf[off++] = (uint8_t)offVCpu;
    487     }
    488     else
    489     {
    490         pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGpr & 7, IEMNATIVE_REG_FIXED_PVMCPU);
    491         pbCodeBuf[off++] = RT_BYTE1(offVCpu);
    492         pbCodeBuf[off++] = RT_BYTE2(offVCpu);
    493         pbCodeBuf[off++] = RT_BYTE3(offVCpu);
    494         pbCodeBuf[off++] = RT_BYTE4(offVCpu);
    495     }
    496 
    497 #elif RT_ARCH_ARM64
     751        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
     752        pbCodeBuf[off++] = (uint8_t)(int8_t)offVCpu;
     753    }
     754    else
     755    {
     756        pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, iGprReg & 7, IEMNATIVE_REG_FIXED_PVMCPU);
     757        pbCodeBuf[off++] = RT_BYTE1((uint32_t)offVCpu);
     758        pbCodeBuf[off++] = RT_BYTE2((uint32_t)offVCpu);
     759        pbCodeBuf[off++] = RT_BYTE3((uint32_t)offVCpu);
     760        pbCodeBuf[off++] = RT_BYTE4((uint32_t)offVCpu);
     761    }
     762    return off;
     763}
     764#elif RT_ARCH_ARM64
     765/**
     766 * Common bit of iemNativeEmitLoadGprFromVCpuU64 and friends.
     767 */
     768DECL_FORCE_INLINE(uint32_t) iemNativeEmitGprByVCpuLdSt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprReg,
     769                                                       uint32_t offVCpu, ARMV8A64INSTRLDSTTYPE enmOperation, unsigned cbData)
     770{
    498771    /*
    499772     * There are a couple of ldr variants that takes an immediate offset, so
     
    501774     * help with the addressing.
    502775     */
    503     if (offVCpu < _16K)
     776    if (offVCpu < _4K * cbData && !(offVCpu & (cbData - 1)))
    504777    {
    505778        /* Use the unsigned variant of ldr Wt, [<Xn|SP>, #off]. */
    506779        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    507780        AssertReturn(pu32CodeBuf, UINT32_MAX);
    508         pu32CodeBuf[off++] = UINT32_C(0xb9400000) | (offVCpu << 10) | (IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr;
     781        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGrp, IEMNATIVE_REG_FIXED_PVMCPU, offVCpu / cbData);
     782    }
     783    else if (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx) < (unsigned)(_4K * cbData) && !(offVCpu & (cbData - 1)))
     784    {
     785        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     786        AssertReturn(pu32CodeBuf, UINT32_MAX);
     787        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(enmOperation, iGrp, IEMNATIVE_REG_FIXED_PCPUMCTX,
     788                                                      (offVCpu - RT_UOFFSETOF(VMCPU, cpum.GstCtx)) / cbData);
    509789    }
    510790    else
    511791    {
    512792        /* The offset is too large, so we must load it into a register and use
    513            ldr Wt, [<Xn|SP>, (<Wm>|<Xm>). */
     793           ldr Wt, [<Xn|SP>, (<Wm>|<Xm>)]. */
    514794        /** @todo reduce by offVCpu by >> 3 or >> 2? if it saves instructions? */
    515795        off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, offVCpu);
     796
    516797        uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
    517798        AssertReturn(pu32CodeBuf, UINT32_MAX);
    518         pu32CodeBuf[off++] = UINT32_C(0xb8600800) | ((uint32_t)IEMNATIVE_REG_FIXED_TMP0 << 16)
    519                            | ((uint32_t)IEMNATIVE_REG_FIXED_PVMCPU << 5) | iGpr;
    520     }
     799        pu32CodeBuf[off++] = Armv8A64MkInstrStLdRegIdx(enmOperation, iGpr, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_REG_FIXED_TMP);
     800    }
     801    return off;
     802}
     803#endif
     804
     805
     806/**
     807 * Emits a 64-bit GPR load of a VCpu value.
     808 */
     809DECLINLINE(uint32_t) iemNativeEmitLoadGprFromVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     810{
     811#ifdef RT_ARCH_AMD64
     812    /* mov reg64, mem64 */
     813    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     814    AssertReturn(pbCodeBuf, UINT32_MAX);
     815    if (iGpr < 8)
     816        pbCodeBuf[off++] = X86_OP_REX_W;
     817    else
     818        pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
     819    pbCodeBuf[off++] = 0x8b;
     820    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf,off,iGpr, offVCpu);
     821
     822#elif RT_ARCH_ARM64
     823    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Dword, sizeof(uint64_t));
     824
     825#else
     826# error "port me"
     827#endif
     828    return off;
     829}
     830
     831
     832/**
     833 * Emits a 32-bit GPR load of a VCpu value.
     834 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
     835 */
     836DECLINLINE(uint32_t) iemNativeEmitLoadGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     837{
     838#ifdef RT_ARCH_AMD64
     839    /* mov reg32, mem32 */
     840    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     841    AssertReturn(pbCodeBuf, UINT32_MAX);
     842    if (iGpr >= 8)
     843        pbCodeBuf[off++] = X86_OP_REX_R;
     844    pbCodeBuf[off++] = 0x8b;
     845    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
     846
     847#elif RT_ARCH_ARM64
     848    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
     849
     850#else
     851# error "port me"
     852#endif
     853    return off;
     854}
     855
     856
     857/**
     858 * Emits a 16-bit GPR load of a VCpu value.
     859 * @note Bits 16 thru 63 in the GPR will be zero after the operation.
     860 */
     861DECLINLINE(uint32_t) iemNativeEmitLoadGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     862{
     863#ifdef RT_ARCH_AMD64
     864    /* movzx reg32, mem16 */
     865    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     866    AssertReturn(pbCodeBuf, UINT32_MAX);
     867    if (iGpr >= 8)
     868        pbCodeBuf[off++] = X86_OP_REX_R;
     869    pbCodeBuf[off++] = 0x0f;
     870    pbCodeBuf[off++] = 0xb7;
     871    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
     872
     873#elif RT_ARCH_ARM64
     874    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Half, sizeof(uint16_t));
     875
     876#else
     877# error "port me"
     878#endif
     879    return off;
     880}
     881
     882
     883/**
     884 * Emits a 8-bit GPR load of a VCpu value.
     885 * @note Bits 8 thru 63 in the GPR will be zero after the operation.
     886 */
     887DECLINLINE(uint32_t) iemNativeEmitLoadGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     888{
     889#ifdef RT_ARCH_AMD64
     890    /* movzx reg32, mem8 */
     891    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     892    AssertReturn(pbCodeBuf, UINT32_MAX);
     893    if (iGpr >= 8)
     894        pbCodeBuf[off++] = X86_OP_REX_R;
     895    pbCodeBuf[off++] = 0x0f;
     896    pbCodeBuf[off++] = 0xb6;
     897    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
     898
     899#elif RT_ARCH_ARM64
     900    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_Ld_Byte, sizeof(uint8_t));
     901
     902#else
     903# error "port me"
     904#endif
     905    return off;
     906}
     907
     908
     909/**
     910 * Emits a store of a GPR value to a 64-bit VCpu field.
     911 */
     912DECLINLINE(uint32_t) iemNativeEmitStoreGprToVCpuU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     913{
     914#ifdef RT_ARCH_AMD64
     915    /* mov mem64, reg64 */
     916    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     917    AssertReturn(pbCodeBuf, UINT32_MAX);
     918    if (iGpr < 8)
     919        pbCodeBuf[off++] = X86_OP_REX_W;
     920    else
     921        pbCodeBuf[off++] = X86_OP_REX_W | X86_OP_REX_R;
     922    pbCodeBuf[off++] = 0x89;
     923    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf,off,iGpr, offVCpu);
     924
     925#elif RT_ARCH_ARM64
     926    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Dword, sizeof(uint64_t));
     927
     928#else
     929# error "port me"
     930#endif
     931    return off;
     932}
     933
     934
     935/**
     936 * Emits a store of a GPR value to a 32-bit VCpu field.
     937 */
     938DECLINLINE(uint32_t) iemNativeEmitStoreGprFromVCpuU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     939{
     940#ifdef RT_ARCH_AMD64
     941    /* mov mem32, reg32 */
     942    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     943    AssertReturn(pbCodeBuf, UINT32_MAX);
     944    if (iGpr >= 8)
     945        pbCodeBuf[off++] = X86_OP_REX_R;
     946    pbCodeBuf[off++] = 0x89;
     947    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
     948
     949#elif RT_ARCH_ARM64
     950    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Word, sizeof(uint32_t));
     951
     952#else
     953# error "port me"
     954#endif
     955    return off;
     956}
     957
     958
     959/**
     960 * Emits a store of a GPR value to a 16-bit VCpu field.
     961 */
     962DECLINLINE(uint32_t) iemNativeEmitStoreGprFromVCpuU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     963{
     964#ifdef RT_ARCH_AMD64
     965    /* mov mem16, reg16 */
     966    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
     967    AssertReturn(pbCodeBuf, UINT32_MAX);
     968    pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
     969    if (iGpr >= 8)
     970        pbCodeBuf[off++] = X86_OP_REX_R;
     971    pbCodeBuf[off++] = 0x89;
     972    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
     973
     974#elif RT_ARCH_ARM64
     975    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Half, sizeof(uint16_t));
     976
     977#else
     978# error "port me"
     979#endif
     980    return off;
     981}
     982
     983
     984/**
     985 * Emits a store of a GPR value to a 8-bit VCpu field.
     986 */
     987DECLINLINE(uint32_t) iemNativeEmitStoreGprFromVCpuU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGpr, uint32_t offVCpu)
     988{
     989#ifdef RT_ARCH_AMD64
     990    /* mov mem8, reg8 */
     991    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
     992    AssertReturn(pbCodeBuf, UINT32_MAX);
     993    if (iGpr >= 8)
     994        pbCodeBuf[off++] = X86_OP_REX_R;
     995    pbCodeBuf[off++] = 0x88;
     996    off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, iGpr, offVCpu);
     997
     998#elif RT_ARCH_ARM64
     999    off = iemNativeEmitGprByVCpuLdSt(pReNative, off, iGpr, offVCpu, kArmv8A64InstrLdStType_St_Byte, sizeof(uint8_t));
    5211000
    5221001#else
     
    7391218    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
    7401219    AssertReturn(pbCodeBuf, UINT32_MAX);
    741     if (iGprDst < 7)
     1220    if (iGprDst < 8)
    7421221        pbCodeBuf[off++] = X86_OP_REX_W;
    7431222    else
     
    7621241#endif
    7631242
     1243
     1244/**
     1245 * Emits a 32-bit GPR additions with a 8-bit signed immediate.
     1246 * @note Bits 32 thru 63 in the GPR will be zero after the operation.
     1247 */
     1248DECLINLINE(uint32_t ) iemNativeEmitAddGpr32Imm8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGprDst, int8_t iImm8)
     1249{
     1250#if defined(RT_ARCH_AMD64)
     1251    uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
     1252    AssertReturn(pbCodeBuf, UINT32_MAX);
     1253    if (iGprDst >= 8)
     1254        pbCodeBuf[off++] = X86_OP_REX_B;
     1255    pbCodeBuf[off++] = 0x83;
     1256    pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, iGprDst & 7);
     1257    pbCodeBuf[off++] = (uint8_t)iImm8;
     1258
     1259#elif defined(RT_ARCH_ARM64)
     1260    uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
     1261    AssertReturn(pu32CodeBuf, UINT32_MAX);
     1262    if (iImm8 >= 0)
     1263        pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, iGprDst, iGprDst, (uint8_t)iImm8, false /*f64Bit*/);
     1264    else
     1265        pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, iGprDst, iGprDst, (uint8_t)-iImm8, false /*f64Bit*/);
     1266
     1267#else
     1268# error "Port me"
     1269#endif
     1270    return off;
     1271}
     1272
    7641273/** @} */
    7651274
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette